diff options
author | Srikant Patnaik | 2015-01-11 12:28:04 +0530 |
---|---|---|
committer | Srikant Patnaik | 2015-01-11 12:28:04 +0530 |
commit | 871480933a1c28f8a9fed4c4d34d06c439a7a422 (patch) | |
tree | 8718f573808810c2a1e8cb8fb6ac469093ca2784 /arch/s390 | |
parent | 9d40ac5867b9aefe0722bc1f110b965ff294d30d (diff) | |
download | FOSSEE-netbook-kernel-source-871480933a1c28f8a9fed4c4d34d06c439a7a422.tar.gz FOSSEE-netbook-kernel-source-871480933a1c28f8a9fed4c4d34d06c439a7a422.tar.bz2 FOSSEE-netbook-kernel-source-871480933a1c28f8a9fed4c4d34d06c439a7a422.zip |
Moved, renamed, and deleted files
The original directory structure was scattered and unorganized.
Changes are basically to make it look like kernel structure.
Diffstat (limited to 'arch/s390')
312 files changed, 66779 insertions, 0 deletions
diff --git a/arch/s390/Kbuild b/arch/s390/Kbuild new file mode 100644 index 00000000..9858476f --- /dev/null +++ b/arch/s390/Kbuild @@ -0,0 +1,7 @@ +obj-y += kernel/ +obj-y += mm/ +obj-$(CONFIG_KVM) += kvm/ +obj-$(CONFIG_CRYPTO_HW) += crypto/ +obj-$(CONFIG_S390_HYPFS_FS) += hypfs/ +obj-$(CONFIG_APPLDATA_BASE) += appldata/ +obj-$(CONFIG_MATHEMU) += math-emu/ diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig new file mode 100644 index 00000000..90150609 --- /dev/null +++ b/arch/s390/Kconfig @@ -0,0 +1,648 @@ +config MMU + def_bool y + +config ZONE_DMA + def_bool y + +config LOCKDEP_SUPPORT + def_bool y + +config STACKTRACE_SUPPORT + def_bool y + +config HAVE_LATENCYTOP_SUPPORT + def_bool y + +config RWSEM_GENERIC_SPINLOCK + bool + +config RWSEM_XCHGADD_ALGORITHM + def_bool y + +config ARCH_HAS_ILOG2_U32 + def_bool n + +config ARCH_HAS_ILOG2_U64 + def_bool n + +config GENERIC_HWEIGHT + def_bool y + +config GENERIC_TIME_VSYSCALL + def_bool y + +config GENERIC_CLOCKEVENTS + def_bool y + +config GENERIC_BUG + def_bool y if BUG + +config GENERIC_BUG_RELATIVE_POINTERS + def_bool y + +config NO_IOMEM + def_bool y + +config NO_DMA + def_bool y + +config ARCH_DMA_ADDR_T_64BIT + def_bool 64BIT + +config GENERIC_LOCKBREAK + def_bool y if SMP && PREEMPT + +config PGSTE + def_bool y if KVM + +config VIRT_CPU_ACCOUNTING + def_bool y + +config ARCH_SUPPORTS_DEBUG_PAGEALLOC + def_bool y + +config S390 + def_bool y + select USE_GENERIC_SMP_HELPERS if SMP + select GENERIC_CPU_DEVICES if !SMP + select HAVE_SYSCALL_WRAPPERS + select HAVE_FUNCTION_TRACER + select HAVE_FUNCTION_TRACE_MCOUNT_TEST + select HAVE_FTRACE_MCOUNT_RECORD + select HAVE_C_RECORDMCOUNT + select HAVE_SYSCALL_TRACEPOINTS + select HAVE_DYNAMIC_FTRACE + select HAVE_FUNCTION_GRAPH_TRACER + select HAVE_REGS_AND_STACK_ACCESS_API + select HAVE_OPROFILE + select HAVE_KPROBES + select HAVE_KRETPROBES + select HAVE_KVM if 64BIT + select HAVE_ARCH_TRACEHOOK + select INIT_ALL_POSSIBLE + select HAVE_IRQ_WORK + select HAVE_PERF_EVENTS + select ARCH_HAVE_NMI_SAFE_CMPXCHG + select HAVE_KERNEL_GZIP + select HAVE_KERNEL_BZIP2 + select HAVE_KERNEL_LZMA + select HAVE_KERNEL_LZO + select HAVE_KERNEL_XZ + select HAVE_ARCH_MUTEX_CPU_RELAX + select HAVE_ARCH_JUMP_LABEL if !MARCH_G5 + select ARCH_SAVE_PAGE_KEYS if HIBERNATION + select HAVE_MEMBLOCK + select HAVE_MEMBLOCK_NODE_MAP + select ARCH_DISCARD_MEMBLOCK + select ARCH_INLINE_SPIN_TRYLOCK + select ARCH_INLINE_SPIN_TRYLOCK_BH + select ARCH_INLINE_SPIN_LOCK + select ARCH_INLINE_SPIN_LOCK_BH + select ARCH_INLINE_SPIN_LOCK_IRQ + select ARCH_INLINE_SPIN_LOCK_IRQSAVE + select ARCH_INLINE_SPIN_UNLOCK + select ARCH_INLINE_SPIN_UNLOCK_BH + select ARCH_INLINE_SPIN_UNLOCK_IRQ + select ARCH_INLINE_SPIN_UNLOCK_IRQRESTORE + select ARCH_INLINE_READ_TRYLOCK + select ARCH_INLINE_READ_LOCK + select ARCH_INLINE_READ_LOCK_BH + select ARCH_INLINE_READ_LOCK_IRQ + select ARCH_INLINE_READ_LOCK_IRQSAVE + select ARCH_INLINE_READ_UNLOCK + select ARCH_INLINE_READ_UNLOCK_BH + select ARCH_INLINE_READ_UNLOCK_IRQ + select ARCH_INLINE_READ_UNLOCK_IRQRESTORE + select ARCH_INLINE_WRITE_TRYLOCK + select ARCH_INLINE_WRITE_LOCK + select ARCH_INLINE_WRITE_LOCK_BH + select ARCH_INLINE_WRITE_LOCK_IRQ + select ARCH_INLINE_WRITE_LOCK_IRQSAVE + select ARCH_INLINE_WRITE_UNLOCK + select ARCH_INLINE_WRITE_UNLOCK_BH + select ARCH_INLINE_WRITE_UNLOCK_IRQ + select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE + +config SCHED_OMIT_FRAME_POINTER + def_bool y + +source "init/Kconfig" + +source "kernel/Kconfig.freezer" + +menu "Base setup" + +comment "Processor type and features" + +source "kernel/time/Kconfig" + +config 64BIT + def_bool y + prompt "64 bit kernel" + help + Select this option if you have an IBM z/Architecture machine + and want to use the 64 bit addressing mode. + +config 32BIT + def_bool y if !64BIT + +config KTIME_SCALAR + def_bool 32BIT + +config SMP + def_bool y + prompt "Symmetric multi-processing support" + ---help--- + This enables support for systems with more than one CPU. If you have + a system with only one CPU, like most personal computers, say N. If + you have a system with more than one CPU, say Y. + + If you say N here, the kernel will run on single and multiprocessor + machines, but will use only one CPU of a multiprocessor machine. If + you say Y here, the kernel will run on many, but not all, + singleprocessor machines. On a singleprocessor machine, the kernel + will run faster if you say N here. + + See also the SMP-HOWTO available at + <http://www.tldp.org/docs.html#howto>. + + Even if you don't know what to do here, say Y. + +config NR_CPUS + int "Maximum number of CPUs (2-64)" + range 2 64 + depends on SMP + default "32" if !64BIT + default "64" if 64BIT + help + This allows you to specify the maximum number of CPUs which this + kernel will support. The maximum supported value is 64 and the + minimum value which makes sense is 2. + + This is purely to save memory - each supported CPU adds + approximately sixteen kilobytes to the kernel image. + +config HOTPLUG_CPU + def_bool y + prompt "Support for hot-pluggable CPUs" + depends on SMP + select HOTPLUG + help + Say Y here to be able to turn CPUs off and on. CPUs + can be controlled through /sys/devices/system/cpu/cpu#. + Say N if you want to disable CPU hotplug. + +config SCHED_MC + def_bool n + +config SCHED_BOOK + def_bool y + prompt "Book scheduler support" + depends on SMP + select SCHED_MC + help + Book scheduler support improves the CPU scheduler's decision making + when dealing with machines that have several books. + +config MATHEMU + def_bool y + prompt "IEEE FPU emulation" + depends on MARCH_G5 + help + This option is required for IEEE compliant floating point arithmetic + on older ESA/390 machines. Say Y unless you know your machine doesn't + need this. + +config COMPAT + def_bool y + prompt "Kernel support for 31 bit emulation" + depends on 64BIT + select COMPAT_BINFMT_ELF + select ARCH_WANT_OLD_COMPAT_IPC + help + Select this option if you want to enable your system kernel to + handle system-calls from ELF binaries for 31 bit ESA. This option + (and some other stuff like libraries and such) is needed for + executing 31 bit applications. It is safe to say "Y". + +config SYSVIPC_COMPAT + def_bool y if COMPAT && SYSVIPC + +config KEYS_COMPAT + def_bool y if COMPAT && KEYS + +config AUDIT_ARCH + def_bool y + +comment "Code generation options" + +choice + prompt "Processor type" + default MARCH_G5 + +config MARCH_G5 + bool "System/390 model G5 and G6" + depends on !64BIT + help + Select this to build a 31 bit kernel that works + on all ESA/390 and z/Architecture machines. + +config MARCH_Z900 + bool "IBM zSeries model z800 and z900" + help + Select this to enable optimizations for model z800/z900 (2064 and + 2066 series). This will enable some optimizations that are not + available on older ESA/390 (31 Bit) only CPUs. + +config MARCH_Z990 + bool "IBM zSeries model z890 and z990" + help + Select this to enable optimizations for model z890/z990 (2084 and + 2086 series). The kernel will be slightly faster but will not work + on older machines. + +config MARCH_Z9_109 + bool "IBM System z9" + help + Select this to enable optimizations for IBM System z9 (2094 and + 2096 series). The kernel will be slightly faster but will not work + on older machines. + +config MARCH_Z10 + bool "IBM System z10" + help + Select this to enable optimizations for IBM System z10 (2097 and + 2098 series). The kernel will be slightly faster but will not work + on older machines. + +config MARCH_Z196 + bool "IBM zEnterprise 114 and 196" + help + Select this to enable optimizations for IBM zEnterprise 114 and 196 + (2818 and 2817 series). The kernel will be slightly faster but will + not work on older machines. + +endchoice + +config PACK_STACK + def_bool y + prompt "Pack kernel stack" + help + This option enables the compiler option -mkernel-backchain if it + is available. If the option is available the compiler supports + the new stack layout which dramatically reduces the minimum stack + frame size. With an old compiler a non-leaf function needs a + minimum of 96 bytes on 31 bit and 160 bytes on 64 bit. With + -mkernel-backchain the minimum size drops to 16 byte on 31 bit + and 24 byte on 64 bit. + + Say Y if you are unsure. + +config SMALL_STACK + def_bool n + prompt "Use 8kb for kernel stack instead of 16kb" + depends on PACK_STACK && 64BIT && !LOCKDEP + help + If you say Y here and the compiler supports the -mkernel-backchain + option the kernel will use a smaller kernel stack size. The reduced + size is 8kb instead of 16kb. This allows to run more threads on a + system and reduces the pressure on the memory management for higher + order page allocations. + + Say N if you are unsure. + +config CHECK_STACK + def_bool y + prompt "Detect kernel stack overflow" + help + This option enables the compiler option -mstack-guard and + -mstack-size if they are available. If the compiler supports them + it will emit additional code to each function prolog to trigger + an illegal operation if the kernel stack is about to overflow. + + Say N if you are unsure. + +config STACK_GUARD + int "Size of the guard area (128-1024)" + range 128 1024 + depends on CHECK_STACK + default "256" + help + This allows you to specify the size of the guard area at the lower + end of the kernel stack. If the kernel stack points into the guard + area on function entry an illegal operation is triggered. The size + needs to be a power of 2. Please keep in mind that the size of an + interrupt frame is 184 bytes for 31 bit and 328 bytes on 64 bit. + The minimum size for the stack guard should be 256 for 31 bit and + 512 for 64 bit. + +config WARN_DYNAMIC_STACK + def_bool n + prompt "Emit compiler warnings for function with dynamic stack usage" + help + This option enables the compiler option -mwarn-dynamicstack. If the + compiler supports this options generates warnings for functions + that dynamically allocate stack space using alloca. + + Say N if you are unsure. + +comment "Kernel preemption" + +source "kernel/Kconfig.preempt" + +config ARCH_SPARSEMEM_ENABLE + def_bool y + select SPARSEMEM_VMEMMAP_ENABLE + select SPARSEMEM_VMEMMAP + select SPARSEMEM_STATIC if !64BIT + +config ARCH_SPARSEMEM_DEFAULT + def_bool y + +config ARCH_SELECT_MEMORY_MODEL + def_bool y + +config ARCH_ENABLE_MEMORY_HOTPLUG + def_bool y if SPARSEMEM + +config ARCH_ENABLE_MEMORY_HOTREMOVE + def_bool y + +config ARCH_HIBERNATION_POSSIBLE + def_bool y if 64BIT + +source "mm/Kconfig" + +comment "I/O subsystem configuration" + +config QDIO + def_tristate y + prompt "QDIO support" + ---help--- + This driver provides the Queued Direct I/O base support for + IBM System z. + + To compile this driver as a module, choose M here: the + module will be called qdio. + + If unsure, say Y. + +config CHSC_SCH + def_tristate m + prompt "Support for CHSC subchannels" + help + This driver allows usage of CHSC subchannels. A CHSC subchannel + is usually present on LPAR only. + The driver creates a device /dev/chsc, which may be used to + obtain I/O configuration information about the machine and + to issue asynchronous chsc commands (DANGEROUS). + You will usually only want to use this interface on a special + LPAR designated for system management. + + To compile this driver as a module, choose M here: the + module will be called chsc_sch. + + If unsure, say N. + +comment "Misc" + +config IPL + def_bool y + prompt "Builtin IPL record support" + help + If you want to use the produced kernel to IPL directly from a + device, you have to merge a bootsector specific to the device + into the first bytes of the kernel. You will have to select the + IPL device. + +choice + prompt "IPL method generated into head.S" + depends on IPL + default IPL_VM + help + Select "tape" if you want to IPL the image from a Tape. + + Select "vm_reader" if you are running under VM/ESA and want + to IPL the image from the emulated card reader. + +config IPL_TAPE + bool "tape" + +config IPL_VM + bool "vm_reader" + +endchoice + +source "fs/Kconfig.binfmt" + +config FORCE_MAX_ZONEORDER + int + default "9" + +config PFAULT + def_bool y + prompt "Pseudo page fault support" + help + Select this option, if you want to use PFAULT pseudo page fault + handling under VM. If running native or in LPAR, this option + has no effect. If your VM does not support PFAULT, PAGEEX + pseudo page fault handling will be used. + Note that VM 4.2 supports PFAULT but has a bug in its + implementation that causes some problems. + Everybody who wants to run Linux under VM != VM4.2 should select + this option. + +config SHARED_KERNEL + def_bool y + prompt "VM shared kernel support" + help + Select this option, if you want to share the text segment of the + Linux kernel between different VM guests. This reduces memory + usage with lots of guests but greatly increases kernel size. + Also if a kernel was IPL'ed from a shared segment the kexec system + call will not work. + You should only select this option if you know what you are + doing and want to exploit this feature. + +config CMM + def_tristate n + prompt "Cooperative memory management" + help + Select this option, if you want to enable the kernel interface + to reduce the memory size of the system. This is accomplished + by allocating pages of memory and put them "on hold". This only + makes sense for a system running under VM where the unused pages + will be reused by VM for other guest systems. The interface + allows an external monitor to balance memory of many systems. + Everybody who wants to run Linux under VM should select this + option. + +config CMM_IUCV + def_bool y + prompt "IUCV special message interface to cooperative memory management" + depends on CMM && (SMSGIUCV=y || CMM=SMSGIUCV) + help + Select this option to enable the special message interface to + the cooperative memory management. + +config APPLDATA_BASE + def_bool n + prompt "Linux - VM Monitor Stream, base infrastructure" + depends on PROC_FS + help + This provides a kernel interface for creating and updating z/VM APPLDATA + monitor records. The monitor records are updated at certain time + intervals, once the timer is started. + Writing 1 or 0 to /proc/appldata/timer starts(1) or stops(0) the timer, + i.e. enables or disables monitoring on the Linux side. + A custom interval value (in seconds) can be written to + /proc/appldata/interval. + + Defaults are 60 seconds interval and timer off. + The /proc entries can also be read from, showing the current settings. + +config APPLDATA_MEM + def_tristate m + prompt "Monitor memory management statistics" + depends on APPLDATA_BASE && VM_EVENT_COUNTERS + help + This provides memory management related data to the Linux - VM Monitor + Stream, like paging/swapping rate, memory utilisation, etc. + Writing 1 or 0 to /proc/appldata/memory creates(1) or removes(0) a z/VM + APPLDATA monitor record, i.e. enables or disables monitoring this record + on the z/VM side. + + Default is disabled. + The /proc entry can also be read from, showing the current settings. + + This can also be compiled as a module, which will be called + appldata_mem.o. + +config APPLDATA_OS + def_tristate m + prompt "Monitor OS statistics" + depends on APPLDATA_BASE + help + This provides OS related data to the Linux - VM Monitor Stream, like + CPU utilisation, etc. + Writing 1 or 0 to /proc/appldata/os creates(1) or removes(0) a z/VM + APPLDATA monitor record, i.e. enables or disables monitoring this record + on the z/VM side. + + Default is disabled. + This can also be compiled as a module, which will be called + appldata_os.o. + +config APPLDATA_NET_SUM + def_tristate m + prompt "Monitor overall network statistics" + depends on APPLDATA_BASE && NET + help + This provides network related data to the Linux - VM Monitor Stream, + currently there is only a total sum of network I/O statistics, no + per-interface data. + Writing 1 or 0 to /proc/appldata/net_sum creates(1) or removes(0) a z/VM + APPLDATA monitor record, i.e. enables or disables monitoring this record + on the z/VM side. + + Default is disabled. + This can also be compiled as a module, which will be called + appldata_net_sum.o. + +source kernel/Kconfig.hz + +config S390_HYPFS_FS + def_bool y + prompt "s390 hypervisor file system support" + select SYS_HYPERVISOR + help + This is a virtual file system intended to provide accounting + information in an s390 hypervisor environment. + +config KEXEC + def_bool n + prompt "kexec system call" + help + kexec is a system call that implements the ability to shutdown your + current kernel, and to start another kernel. It is like a reboot + but is independent of hardware/microcode support. + +config CRASH_DUMP + bool "kernel crash dumps" + depends on 64BIT + select KEXEC + help + Generate crash dump after being started by kexec. + Crash dump kernels are loaded in the main kernel with kexec-tools + into a specially reserved region and then later executed after + a crash by kdump/kexec. + For more details see Documentation/kdump/kdump.txt + +config ZFCPDUMP + def_bool n + prompt "zfcpdump support" + select SMP + help + Select this option if you want to build an zfcpdump enabled kernel. + Refer to <file:Documentation/s390/zfcpdump.txt> for more details on this. + +config S390_GUEST + def_bool y + prompt "s390 guest support for KVM (EXPERIMENTAL)" + depends on 64BIT && EXPERIMENTAL + select VIRTUALIZATION + select VIRTIO + select VIRTIO_RING + select VIRTIO_CONSOLE + help + Select this option if you want to run the kernel as a guest under + the KVM hypervisor. This will add detection for KVM as well as a + virtio transport. If KVM is detected, the virtio console will be + the default console. + +config SECCOMP + def_bool y + prompt "Enable seccomp to safely compute untrusted bytecode" + depends on PROC_FS + help + This kernel feature is useful for number crunching applications + that may need to compute untrusted bytecode during their + execution. By using pipes or other transports made available to + the process as file descriptors supporting the read/write + syscalls, it's possible to isolate those applications in + their own address space using seccomp. Once seccomp is + enabled via /proc/<pid>/seccomp, it cannot be disabled + and the task is only allowed to execute a few safe syscalls + defined by each seccomp mode. + + If unsure, say Y. + +endmenu + +menu "Power Management" + +source "kernel/power/Kconfig" + +endmenu + +source "net/Kconfig" + +config PCMCIA + def_bool n + +config CCW + def_bool y + +source "drivers/Kconfig" + +source "fs/Kconfig" + +source "arch/s390/Kconfig.debug" + +source "security/Kconfig" + +source "crypto/Kconfig" + +source "lib/Kconfig" + +source "arch/s390/kvm/Kconfig" diff --git a/arch/s390/Kconfig.debug b/arch/s390/Kconfig.debug new file mode 100644 index 00000000..d76cef3f --- /dev/null +++ b/arch/s390/Kconfig.debug @@ -0,0 +1,37 @@ +menu "Kernel hacking" + +config TRACE_IRQFLAGS_SUPPORT + def_bool y + +source "lib/Kconfig.debug" + +config STRICT_DEVMEM + def_bool y + prompt "Filter access to /dev/mem" + ---help--- + This option restricts access to /dev/mem. If this option is + disabled, you allow userspace access to all memory, including + kernel and userspace memory. Accidental memory access is likely + to be disastrous. + Memory access is required for experts who want to debug the kernel. + + If you are unsure, say Y. + +config DEBUG_STRICT_USER_COPY_CHECKS + def_bool n + prompt "Strict user copy size checks" + ---help--- + Enabling this option turns a certain set of sanity checks for user + copy operations into compile time warnings. + + The copy_from_user() etc checks are there to help test if there + are sufficient security checks on the length argument of + the copy operation, by having gcc prove that the argument is + within bounds. + + If unsure, or if you run an older (pre 4.4) gcc, say N. + +config DEBUG_SET_MODULE_RONX + def_bool y + depends on MODULES +endmenu diff --git a/arch/s390/Makefile b/arch/s390/Makefile new file mode 100644 index 00000000..0ad2f1e1 --- /dev/null +++ b/arch/s390/Makefile @@ -0,0 +1,131 @@ +# +# s390/Makefile +# +# This file is included by the global makefile so that you can add your own +# architecture-specific flags and dependencies. Remember to do have actions +# for "archclean" and "archdep" for cleaning up and making dependencies for +# this architecture +# +# This file is subject to the terms and conditions of the GNU General Public +# License. See the file "COPYING" in the main directory of this archive +# for more details. +# +# Copyright (C) 1994 by Linus Torvalds +# + +ifndef CONFIG_64BIT +LD_BFD := elf32-s390 +LDFLAGS := -m elf_s390 +KBUILD_CFLAGS += -m31 +KBUILD_AFLAGS += -m31 +UTS_MACHINE := s390 +STACK_SIZE := 8192 +CHECKFLAGS += -D__s390__ -msize-long +else +LD_BFD := elf64-s390 +LDFLAGS := -m elf64_s390 +KBUILD_AFLAGS_MODULE += -fpic -D__PIC__ +KBUILD_CFLAGS_MODULE += -fpic -D__PIC__ +KBUILD_CFLAGS += -m64 +KBUILD_AFLAGS += -m64 +UTS_MACHINE := s390x +STACK_SIZE := 16384 +CHECKFLAGS += -D__s390__ -D__s390x__ +endif + +export LD_BFD + +cflags-$(CONFIG_MARCH_G5) += $(call cc-option,-march=g5) +cflags-$(CONFIG_MARCH_Z900) += $(call cc-option,-march=z900) +cflags-$(CONFIG_MARCH_Z990) += $(call cc-option,-march=z990) +cflags-$(CONFIG_MARCH_Z9_109) += $(call cc-option,-march=z9-109) +cflags-$(CONFIG_MARCH_Z10) += $(call cc-option,-march=z10) +cflags-$(CONFIG_MARCH_Z196) += $(call cc-option,-march=z196) + +#KBUILD_IMAGE is necessary for make rpm +KBUILD_IMAGE :=arch/s390/boot/image + +# +# Prevent tail-call optimizations, to get clearer backtraces: +# +cflags-$(CONFIG_FRAME_POINTER) += -fno-optimize-sibling-calls + +# old style option for packed stacks +ifeq ($(call cc-option-yn,-mkernel-backchain),y) +cflags-$(CONFIG_PACK_STACK) += -mkernel-backchain -D__PACK_STACK +aflags-$(CONFIG_PACK_STACK) += -D__PACK_STACK +cflags-$(CONFIG_SMALL_STACK) += -D__SMALL_STACK +aflags-$(CONFIG_SMALL_STACK) += -D__SMALL_STACK +ifdef CONFIG_SMALL_STACK +STACK_SIZE := $(shell echo $$(($(STACK_SIZE)/2)) ) +endif +endif + +# new style option for packed stacks +ifeq ($(call cc-option-yn,-mpacked-stack),y) +cflags-$(CONFIG_PACK_STACK) += -mpacked-stack -D__PACK_STACK +aflags-$(CONFIG_PACK_STACK) += -D__PACK_STACK +cflags-$(CONFIG_SMALL_STACK) += -D__SMALL_STACK +aflags-$(CONFIG_SMALL_STACK) += -D__SMALL_STACK +ifdef CONFIG_SMALL_STACK +STACK_SIZE := $(shell echo $$(($(STACK_SIZE)/2)) ) +endif +endif + +ifeq ($(call cc-option-yn,-mstack-size=8192 -mstack-guard=128),y) +cflags-$(CONFIG_CHECK_STACK) += -mstack-size=$(STACK_SIZE) +ifneq ($(call cc-option-yn,-mstack-size=8192),y) +cflags-$(CONFIG_CHECK_STACK) += -mstack-guard=$(CONFIG_STACK_GUARD) +endif +endif + +ifeq ($(call cc-option-yn,-mwarn-dynamicstack),y) +cflags-$(CONFIG_WARN_DYNAMIC_STACK) += -mwarn-dynamicstack +endif + +KBUILD_CFLAGS += -mbackchain -msoft-float $(cflags-y) +KBUILD_CFLAGS += -pipe -fno-strength-reduce -Wno-sign-compare +KBUILD_AFLAGS += $(aflags-y) + +OBJCOPYFLAGS := -O binary + +head-y := arch/s390/kernel/head.o +head-y += arch/s390/kernel/$(if $(CONFIG_64BIT),head64.o,head31.o) +head-y += arch/s390/kernel/init_task.o + +# See arch/s390/Kbuild for content of core part of the kernel +core-y += arch/s390/ + +libs-y += arch/s390/lib/ +drivers-y += drivers/s390/ + +# must be linked after kernel +drivers-$(CONFIG_OPROFILE) += arch/s390/oprofile/ + +boot := arch/s390/boot + +all: image bzImage + +install: vmlinux + $(Q)$(MAKE) $(build)=$(boot) $@ + +image bzImage: vmlinux + $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ + +zfcpdump: + $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ + +vdso_install: +ifeq ($(CONFIG_64BIT),y) + $(Q)$(MAKE) $(build)=arch/$(ARCH)/kernel/vdso64 $@ +endif + $(Q)$(MAKE) $(build)=arch/$(ARCH)/kernel/vdso32 $@ + +archclean: + $(Q)$(MAKE) $(clean)=$(boot) + +# Don't use tabs in echo arguments +define archhelp + echo '* image - Kernel image for IPL ($(boot)/image)' + echo '* bzImage - Compressed kernel image for IPL ($(boot)/bzImage)' +endef diff --git a/arch/s390/appldata/Makefile b/arch/s390/appldata/Makefile new file mode 100644 index 00000000..99f1cf07 --- /dev/null +++ b/arch/s390/appldata/Makefile @@ -0,0 +1,8 @@ +# +# Makefile for the Linux - z/VM Monitor Stream. +# + +obj-$(CONFIG_APPLDATA_BASE) += appldata_base.o +obj-$(CONFIG_APPLDATA_MEM) += appldata_mem.o +obj-$(CONFIG_APPLDATA_OS) += appldata_os.o +obj-$(CONFIG_APPLDATA_NET_SUM) += appldata_net_sum.o diff --git a/arch/s390/appldata/appldata.h b/arch/s390/appldata/appldata.h new file mode 100644 index 00000000..f0b23fc7 --- /dev/null +++ b/arch/s390/appldata/appldata.h @@ -0,0 +1,49 @@ +/* + * arch/s390/appldata/appldata.h + * + * Definitions and interface for Linux - z/VM Monitor Stream. + * + * Copyright IBM Corp. 2003, 2008 + * + * Author: Gerald Schaefer <gerald.schaefer@de.ibm.com> + */ + +#define APPLDATA_MAX_REC_SIZE 4024 /* Maximum size of the */ + /* data buffer */ +#define APPLDATA_MAX_PROCS 100 + +#define APPLDATA_PROC_NAME_LENGTH 16 /* Max. length of /proc name */ + +#define APPLDATA_RECORD_MEM_ID 0x01 /* IDs to identify the */ +#define APPLDATA_RECORD_OS_ID 0x02 /* individual records, */ +#define APPLDATA_RECORD_NET_SUM_ID 0x03 /* must be < 256 ! */ +#define APPLDATA_RECORD_PROC_ID 0x04 + +#define CTL_APPLDATA_TIMER 2121 /* sysctl IDs, must be unique */ +#define CTL_APPLDATA_INTERVAL 2122 +#define CTL_APPLDATA_MEM 2123 +#define CTL_APPLDATA_OS 2124 +#define CTL_APPLDATA_NET_SUM 2125 +#define CTL_APPLDATA_PROC 2126 + +struct appldata_ops { + struct list_head list; + struct ctl_table_header *sysctl_header; + struct ctl_table *ctl_table; + int active; /* monitoring status */ + + /* fill in from here */ + char name[APPLDATA_PROC_NAME_LENGTH]; /* name of /proc fs node */ + unsigned char record_nr; /* Record Nr. for Product ID */ + void (*callback)(void *data); /* callback function */ + void *data; /* record data */ + unsigned int size; /* size of record */ + struct module *owner; /* THIS_MODULE */ + char mod_lvl[2]; /* modification level, EBCDIC */ +}; + +extern int appldata_register_ops(struct appldata_ops *ops); +extern void appldata_unregister_ops(struct appldata_ops *ops); +extern int appldata_diag(char record_nr, u16 function, unsigned long buffer, + u16 length, char *mod_lvl); + diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c new file mode 100644 index 00000000..24bff4f1 --- /dev/null +++ b/arch/s390/appldata/appldata_base.c @@ -0,0 +1,673 @@ +/* + * arch/s390/appldata/appldata_base.c + * + * Base infrastructure for Linux-z/VM Monitor Stream, Stage 1. + * Exports appldata_register_ops() and appldata_unregister_ops() for the + * data gathering modules. + * + * Copyright IBM Corp. 2003, 2009 + * + * Author: Gerald Schaefer <gerald.schaefer@de.ibm.com> + */ + +#define KMSG_COMPONENT "appldata" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/slab.h> +#include <linux/errno.h> +#include <linux/interrupt.h> +#include <linux/proc_fs.h> +#include <linux/mm.h> +#include <linux/swap.h> +#include <linux/pagemap.h> +#include <linux/sysctl.h> +#include <linux/notifier.h> +#include <linux/cpu.h> +#include <linux/workqueue.h> +#include <linux/suspend.h> +#include <linux/platform_device.h> +#include <asm/appldata.h> +#include <asm/timer.h> +#include <asm/uaccess.h> +#include <asm/io.h> +#include <asm/smp.h> + +#include "appldata.h" + + +#define APPLDATA_CPU_INTERVAL 10000 /* default (CPU) time for + sampling interval in + milliseconds */ + +#define TOD_MICRO 0x01000 /* nr. of TOD clock units + for 1 microsecond */ + +static struct platform_device *appldata_pdev; + +/* + * /proc entries (sysctl) + */ +static const char appldata_proc_name[APPLDATA_PROC_NAME_LENGTH] = "appldata"; +static int appldata_timer_handler(ctl_table *ctl, int write, + void __user *buffer, size_t *lenp, loff_t *ppos); +static int appldata_interval_handler(ctl_table *ctl, int write, + void __user *buffer, + size_t *lenp, loff_t *ppos); + +static struct ctl_table_header *appldata_sysctl_header; +static struct ctl_table appldata_table[] = { + { + .procname = "timer", + .mode = S_IRUGO | S_IWUSR, + .proc_handler = appldata_timer_handler, + }, + { + .procname = "interval", + .mode = S_IRUGO | S_IWUSR, + .proc_handler = appldata_interval_handler, + }, + { }, +}; + +static struct ctl_table appldata_dir_table[] = { + { + .procname = appldata_proc_name, + .maxlen = 0, + .mode = S_IRUGO | S_IXUGO, + .child = appldata_table, + }, + { }, +}; + +/* + * Timer + */ +static DEFINE_PER_CPU(struct vtimer_list, appldata_timer); +static atomic_t appldata_expire_count = ATOMIC_INIT(0); + +static DEFINE_SPINLOCK(appldata_timer_lock); +static int appldata_interval = APPLDATA_CPU_INTERVAL; +static int appldata_timer_active; +static int appldata_timer_suspended = 0; + +/* + * Work queue + */ +static struct workqueue_struct *appldata_wq; +static void appldata_work_fn(struct work_struct *work); +static DECLARE_WORK(appldata_work, appldata_work_fn); + + +/* + * Ops list + */ +static DEFINE_MUTEX(appldata_ops_mutex); +static LIST_HEAD(appldata_ops_list); + + +/*************************** timer, work, DIAG *******************************/ +/* + * appldata_timer_function() + * + * schedule work and reschedule timer + */ +static void appldata_timer_function(unsigned long data) +{ + if (atomic_dec_and_test(&appldata_expire_count)) { + atomic_set(&appldata_expire_count, num_online_cpus()); + queue_work(appldata_wq, (struct work_struct *) data); + } +} + +/* + * appldata_work_fn() + * + * call data gathering function for each (active) module + */ +static void appldata_work_fn(struct work_struct *work) +{ + struct list_head *lh; + struct appldata_ops *ops; + + get_online_cpus(); + mutex_lock(&appldata_ops_mutex); + list_for_each(lh, &appldata_ops_list) { + ops = list_entry(lh, struct appldata_ops, list); + if (ops->active == 1) { + ops->callback(ops->data); + } + } + mutex_unlock(&appldata_ops_mutex); + put_online_cpus(); +} + +/* + * appldata_diag() + * + * prepare parameter list, issue DIAG 0xDC + */ +int appldata_diag(char record_nr, u16 function, unsigned long buffer, + u16 length, char *mod_lvl) +{ + struct appldata_product_id id = { + .prod_nr = {0xD3, 0xC9, 0xD5, 0xE4, + 0xE7, 0xD2, 0xD9}, /* "LINUXKR" */ + .prod_fn = 0xD5D3, /* "NL" */ + .version_nr = 0xF2F6, /* "26" */ + .release_nr = 0xF0F1, /* "01" */ + }; + + id.record_nr = record_nr; + id.mod_lvl = (mod_lvl[0]) << 8 | mod_lvl[1]; + return appldata_asm(&id, function, (void *) buffer, length); +} +/************************ timer, work, DIAG <END> ****************************/ + + +/****************************** /proc stuff **********************************/ + +/* + * appldata_mod_vtimer_wrap() + * + * wrapper function for mod_virt_timer(), because smp_call_function_single() + * accepts only one parameter. + */ +static void __appldata_mod_vtimer_wrap(void *p) { + struct { + struct vtimer_list *timer; + u64 expires; + } *args = p; + mod_virt_timer_periodic(args->timer, args->expires); +} + +#define APPLDATA_ADD_TIMER 0 +#define APPLDATA_DEL_TIMER 1 +#define APPLDATA_MOD_TIMER 2 + +/* + * __appldata_vtimer_setup() + * + * Add, delete or modify virtual timers on all online cpus. + * The caller needs to get the appldata_timer_lock spinlock. + */ +static void +__appldata_vtimer_setup(int cmd) +{ + u64 per_cpu_interval; + int i; + + switch (cmd) { + case APPLDATA_ADD_TIMER: + if (appldata_timer_active) + break; + per_cpu_interval = (u64) (appldata_interval*1000 / + num_online_cpus()) * TOD_MICRO; + for_each_online_cpu(i) { + per_cpu(appldata_timer, i).expires = per_cpu_interval; + smp_call_function_single(i, add_virt_timer_periodic, + &per_cpu(appldata_timer, i), + 1); + } + appldata_timer_active = 1; + break; + case APPLDATA_DEL_TIMER: + for_each_online_cpu(i) + del_virt_timer(&per_cpu(appldata_timer, i)); + if (!appldata_timer_active) + break; + appldata_timer_active = 0; + atomic_set(&appldata_expire_count, num_online_cpus()); + break; + case APPLDATA_MOD_TIMER: + per_cpu_interval = (u64) (appldata_interval*1000 / + num_online_cpus()) * TOD_MICRO; + if (!appldata_timer_active) + break; + for_each_online_cpu(i) { + struct { + struct vtimer_list *timer; + u64 expires; + } args; + args.timer = &per_cpu(appldata_timer, i); + args.expires = per_cpu_interval; + smp_call_function_single(i, __appldata_mod_vtimer_wrap, + &args, 1); + } + } +} + +/* + * appldata_timer_handler() + * + * Start/Stop timer, show status of timer (0 = not active, 1 = active) + */ +static int +appldata_timer_handler(ctl_table *ctl, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + int len; + char buf[2]; + + if (!*lenp || *ppos) { + *lenp = 0; + return 0; + } + if (!write) { + len = sprintf(buf, appldata_timer_active ? "1\n" : "0\n"); + if (len > *lenp) + len = *lenp; + if (copy_to_user(buffer, buf, len)) + return -EFAULT; + goto out; + } + len = *lenp; + if (copy_from_user(buf, buffer, len > sizeof(buf) ? sizeof(buf) : len)) + return -EFAULT; + get_online_cpus(); + spin_lock(&appldata_timer_lock); + if (buf[0] == '1') + __appldata_vtimer_setup(APPLDATA_ADD_TIMER); + else if (buf[0] == '0') + __appldata_vtimer_setup(APPLDATA_DEL_TIMER); + spin_unlock(&appldata_timer_lock); + put_online_cpus(); +out: + *lenp = len; + *ppos += len; + return 0; +} + +/* + * appldata_interval_handler() + * + * Set (CPU) timer interval for collection of data (in milliseconds), show + * current timer interval. + */ +static int +appldata_interval_handler(ctl_table *ctl, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + int len, interval; + char buf[16]; + + if (!*lenp || *ppos) { + *lenp = 0; + return 0; + } + if (!write) { + len = sprintf(buf, "%i\n", appldata_interval); + if (len > *lenp) + len = *lenp; + if (copy_to_user(buffer, buf, len)) + return -EFAULT; + goto out; + } + len = *lenp; + if (copy_from_user(buf, buffer, len > sizeof(buf) ? sizeof(buf) : len)) { + return -EFAULT; + } + interval = 0; + sscanf(buf, "%i", &interval); + if (interval <= 0) + return -EINVAL; + + get_online_cpus(); + spin_lock(&appldata_timer_lock); + appldata_interval = interval; + __appldata_vtimer_setup(APPLDATA_MOD_TIMER); + spin_unlock(&appldata_timer_lock); + put_online_cpus(); +out: + *lenp = len; + *ppos += len; + return 0; +} + +/* + * appldata_generic_handler() + * + * Generic start/stop monitoring and DIAG, show status of + * monitoring (0 = not in process, 1 = in process) + */ +static int +appldata_generic_handler(ctl_table *ctl, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + struct appldata_ops *ops = NULL, *tmp_ops; + int rc, len, found; + char buf[2]; + struct list_head *lh; + + found = 0; + mutex_lock(&appldata_ops_mutex); + list_for_each(lh, &appldata_ops_list) { + tmp_ops = list_entry(lh, struct appldata_ops, list); + if (&tmp_ops->ctl_table[2] == ctl) { + found = 1; + } + } + if (!found) { + mutex_unlock(&appldata_ops_mutex); + return -ENODEV; + } + ops = ctl->data; + if (!try_module_get(ops->owner)) { // protect this function + mutex_unlock(&appldata_ops_mutex); + return -ENODEV; + } + mutex_unlock(&appldata_ops_mutex); + + if (!*lenp || *ppos) { + *lenp = 0; + module_put(ops->owner); + return 0; + } + if (!write) { + len = sprintf(buf, ops->active ? "1\n" : "0\n"); + if (len > *lenp) + len = *lenp; + if (copy_to_user(buffer, buf, len)) { + module_put(ops->owner); + return -EFAULT; + } + goto out; + } + len = *lenp; + if (copy_from_user(buf, buffer, + len > sizeof(buf) ? sizeof(buf) : len)) { + module_put(ops->owner); + return -EFAULT; + } + + mutex_lock(&appldata_ops_mutex); + if ((buf[0] == '1') && (ops->active == 0)) { + // protect work queue callback + if (!try_module_get(ops->owner)) { + mutex_unlock(&appldata_ops_mutex); + module_put(ops->owner); + return -ENODEV; + } + ops->callback(ops->data); // init record + rc = appldata_diag(ops->record_nr, + APPLDATA_START_INTERVAL_REC, + (unsigned long) ops->data, ops->size, + ops->mod_lvl); + if (rc != 0) { + pr_err("Starting the data collection for %s " + "failed with rc=%d\n", ops->name, rc); + module_put(ops->owner); + } else + ops->active = 1; + } else if ((buf[0] == '0') && (ops->active == 1)) { + ops->active = 0; + rc = appldata_diag(ops->record_nr, APPLDATA_STOP_REC, + (unsigned long) ops->data, ops->size, + ops->mod_lvl); + if (rc != 0) + pr_err("Stopping the data collection for %s " + "failed with rc=%d\n", ops->name, rc); + module_put(ops->owner); + } + mutex_unlock(&appldata_ops_mutex); +out: + *lenp = len; + *ppos += len; + module_put(ops->owner); + return 0; +} + +/*************************** /proc stuff <END> *******************************/ + + +/************************* module-ops management *****************************/ +/* + * appldata_register_ops() + * + * update ops list, register /proc/sys entries + */ +int appldata_register_ops(struct appldata_ops *ops) +{ + if (ops->size > APPLDATA_MAX_REC_SIZE) + return -EINVAL; + + ops->ctl_table = kzalloc(4 * sizeof(struct ctl_table), GFP_KERNEL); + if (!ops->ctl_table) + return -ENOMEM; + + mutex_lock(&appldata_ops_mutex); + list_add(&ops->list, &appldata_ops_list); + mutex_unlock(&appldata_ops_mutex); + + ops->ctl_table[0].procname = appldata_proc_name; + ops->ctl_table[0].maxlen = 0; + ops->ctl_table[0].mode = S_IRUGO | S_IXUGO; + ops->ctl_table[0].child = &ops->ctl_table[2]; + + ops->ctl_table[2].procname = ops->name; + ops->ctl_table[2].mode = S_IRUGO | S_IWUSR; + ops->ctl_table[2].proc_handler = appldata_generic_handler; + ops->ctl_table[2].data = ops; + + ops->sysctl_header = register_sysctl_table(ops->ctl_table); + if (!ops->sysctl_header) + goto out; + return 0; +out: + mutex_lock(&appldata_ops_mutex); + list_del(&ops->list); + mutex_unlock(&appldata_ops_mutex); + kfree(ops->ctl_table); + return -ENOMEM; +} + +/* + * appldata_unregister_ops() + * + * update ops list, unregister /proc entries, stop DIAG if necessary + */ +void appldata_unregister_ops(struct appldata_ops *ops) +{ + mutex_lock(&appldata_ops_mutex); + list_del(&ops->list); + mutex_unlock(&appldata_ops_mutex); + unregister_sysctl_table(ops->sysctl_header); + kfree(ops->ctl_table); +} +/********************** module-ops management <END> **************************/ + + +/**************************** suspend / resume *******************************/ +static int appldata_freeze(struct device *dev) +{ + struct appldata_ops *ops; + int rc; + struct list_head *lh; + + get_online_cpus(); + spin_lock(&appldata_timer_lock); + if (appldata_timer_active) { + __appldata_vtimer_setup(APPLDATA_DEL_TIMER); + appldata_timer_suspended = 1; + } + spin_unlock(&appldata_timer_lock); + put_online_cpus(); + + mutex_lock(&appldata_ops_mutex); + list_for_each(lh, &appldata_ops_list) { + ops = list_entry(lh, struct appldata_ops, list); + if (ops->active == 1) { + rc = appldata_diag(ops->record_nr, APPLDATA_STOP_REC, + (unsigned long) ops->data, ops->size, + ops->mod_lvl); + if (rc != 0) + pr_err("Stopping the data collection for %s " + "failed with rc=%d\n", ops->name, rc); + } + } + mutex_unlock(&appldata_ops_mutex); + return 0; +} + +static int appldata_restore(struct device *dev) +{ + struct appldata_ops *ops; + int rc; + struct list_head *lh; + + get_online_cpus(); + spin_lock(&appldata_timer_lock); + if (appldata_timer_suspended) { + __appldata_vtimer_setup(APPLDATA_ADD_TIMER); + appldata_timer_suspended = 0; + } + spin_unlock(&appldata_timer_lock); + put_online_cpus(); + + mutex_lock(&appldata_ops_mutex); + list_for_each(lh, &appldata_ops_list) { + ops = list_entry(lh, struct appldata_ops, list); + if (ops->active == 1) { + ops->callback(ops->data); // init record + rc = appldata_diag(ops->record_nr, + APPLDATA_START_INTERVAL_REC, + (unsigned long) ops->data, ops->size, + ops->mod_lvl); + if (rc != 0) { + pr_err("Starting the data collection for %s " + "failed with rc=%d\n", ops->name, rc); + } + } + } + mutex_unlock(&appldata_ops_mutex); + return 0; +} + +static int appldata_thaw(struct device *dev) +{ + return appldata_restore(dev); +} + +static const struct dev_pm_ops appldata_pm_ops = { + .freeze = appldata_freeze, + .thaw = appldata_thaw, + .restore = appldata_restore, +}; + +static struct platform_driver appldata_pdrv = { + .driver = { + .name = "appldata", + .owner = THIS_MODULE, + .pm = &appldata_pm_ops, + }, +}; +/************************* suspend / resume <END> ****************************/ + + +/******************************* init / exit *********************************/ + +static void __cpuinit appldata_online_cpu(int cpu) +{ + init_virt_timer(&per_cpu(appldata_timer, cpu)); + per_cpu(appldata_timer, cpu).function = appldata_timer_function; + per_cpu(appldata_timer, cpu).data = (unsigned long) + &appldata_work; + atomic_inc(&appldata_expire_count); + spin_lock(&appldata_timer_lock); + __appldata_vtimer_setup(APPLDATA_MOD_TIMER); + spin_unlock(&appldata_timer_lock); +} + +static void __cpuinit appldata_offline_cpu(int cpu) +{ + del_virt_timer(&per_cpu(appldata_timer, cpu)); + if (atomic_dec_and_test(&appldata_expire_count)) { + atomic_set(&appldata_expire_count, num_online_cpus()); + queue_work(appldata_wq, &appldata_work); + } + spin_lock(&appldata_timer_lock); + __appldata_vtimer_setup(APPLDATA_MOD_TIMER); + spin_unlock(&appldata_timer_lock); +} + +static int __cpuinit appldata_cpu_notify(struct notifier_block *self, + unsigned long action, + void *hcpu) +{ + switch (action) { + case CPU_ONLINE: + case CPU_ONLINE_FROZEN: + appldata_online_cpu((long) hcpu); + break; + case CPU_DEAD: + case CPU_DEAD_FROZEN: + appldata_offline_cpu((long) hcpu); + break; + default: + break; + } + return NOTIFY_OK; +} + +static struct notifier_block __cpuinitdata appldata_nb = { + .notifier_call = appldata_cpu_notify, +}; + +/* + * appldata_init() + * + * init timer, register /proc entries + */ +static int __init appldata_init(void) +{ + int i, rc; + + rc = platform_driver_register(&appldata_pdrv); + if (rc) + return rc; + + appldata_pdev = platform_device_register_simple("appldata", -1, NULL, + 0); + if (IS_ERR(appldata_pdev)) { + rc = PTR_ERR(appldata_pdev); + goto out_driver; + } + appldata_wq = create_singlethread_workqueue("appldata"); + if (!appldata_wq) { + rc = -ENOMEM; + goto out_device; + } + + get_online_cpus(); + for_each_online_cpu(i) + appldata_online_cpu(i); + put_online_cpus(); + + /* Register cpu hotplug notifier */ + register_hotcpu_notifier(&appldata_nb); + + appldata_sysctl_header = register_sysctl_table(appldata_dir_table); + return 0; + +out_device: + platform_device_unregister(appldata_pdev); +out_driver: + platform_driver_unregister(&appldata_pdrv); + return rc; +} + +__initcall(appldata_init); + +/**************************** init / exit <END> ******************************/ + +EXPORT_SYMBOL_GPL(appldata_register_ops); +EXPORT_SYMBOL_GPL(appldata_unregister_ops); +EXPORT_SYMBOL_GPL(appldata_diag); + +#ifdef CONFIG_SWAP +EXPORT_SYMBOL_GPL(si_swapinfo); +#endif +EXPORT_SYMBOL_GPL(nr_threads); +EXPORT_SYMBOL_GPL(nr_running); +EXPORT_SYMBOL_GPL(nr_iowait); diff --git a/arch/s390/appldata/appldata_mem.c b/arch/s390/appldata/appldata_mem.c new file mode 100644 index 00000000..f7d3dc55 --- /dev/null +++ b/arch/s390/appldata/appldata_mem.c @@ -0,0 +1,155 @@ +/* + * arch/s390/appldata/appldata_mem.c + * + * Data gathering module for Linux-VM Monitor Stream, Stage 1. + * Collects data related to memory management. + * + * Copyright (C) 2003,2006 IBM Corporation, IBM Deutschland Entwicklung GmbH. + * + * Author: Gerald Schaefer <gerald.schaefer@de.ibm.com> + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/errno.h> +#include <linux/kernel_stat.h> +#include <linux/pagemap.h> +#include <linux/swap.h> +#include <asm/io.h> + +#include "appldata.h" + + +#define P2K(x) ((x) << (PAGE_SHIFT - 10)) /* Converts #Pages to KB */ + +/* + * Memory data + * + * This is accessed as binary data by z/VM. If changes to it can't be avoided, + * the structure version (product ID, see appldata_base.c) needs to be changed + * as well and all documentation and z/VM applications using it must be + * updated. + * + * The record layout is documented in the Linux for zSeries Device Drivers + * book: + * http://oss.software.ibm.com/developerworks/opensource/linux390/index.shtml + */ +static struct appldata_mem_data { + u64 timestamp; + u32 sync_count_1; /* after VM collected the record data, */ + u32 sync_count_2; /* sync_count_1 and sync_count_2 should be the + same. If not, the record has been updated on + the Linux side while VM was collecting the + (possibly corrupt) data */ + + u64 pgpgin; /* data read from disk */ + u64 pgpgout; /* data written to disk */ + u64 pswpin; /* pages swapped in */ + u64 pswpout; /* pages swapped out */ + + u64 sharedram; /* sharedram is currently set to 0 */ + + u64 totalram; /* total main memory size */ + u64 freeram; /* free main memory size */ + u64 totalhigh; /* total high memory size */ + u64 freehigh; /* free high memory size */ + + u64 bufferram; /* memory reserved for buffers, free cache */ + u64 cached; /* size of (used) cache, w/o buffers */ + u64 totalswap; /* total swap space size */ + u64 freeswap; /* free swap space */ + +// New in 2.6 --> + u64 pgalloc; /* page allocations */ + u64 pgfault; /* page faults (major+minor) */ + u64 pgmajfault; /* page faults (major only) */ +// <-- New in 2.6 + +} __attribute__((packed)) appldata_mem_data; + + +/* + * appldata_get_mem_data() + * + * gather memory data + */ +static void appldata_get_mem_data(void *data) +{ + /* + * don't put large structures on the stack, we are + * serialized through the appldata_ops_mutex and can use static + */ + static struct sysinfo val; + unsigned long ev[NR_VM_EVENT_ITEMS]; + struct appldata_mem_data *mem_data; + + mem_data = data; + mem_data->sync_count_1++; + + all_vm_events(ev); + mem_data->pgpgin = ev[PGPGIN] >> 1; + mem_data->pgpgout = ev[PGPGOUT] >> 1; + mem_data->pswpin = ev[PSWPIN]; + mem_data->pswpout = ev[PSWPOUT]; + mem_data->pgalloc = ev[PGALLOC_NORMAL]; + mem_data->pgalloc += ev[PGALLOC_DMA]; + mem_data->pgfault = ev[PGFAULT]; + mem_data->pgmajfault = ev[PGMAJFAULT]; + + si_meminfo(&val); + mem_data->sharedram = val.sharedram; + mem_data->totalram = P2K(val.totalram); + mem_data->freeram = P2K(val.freeram); + mem_data->totalhigh = P2K(val.totalhigh); + mem_data->freehigh = P2K(val.freehigh); + mem_data->bufferram = P2K(val.bufferram); + mem_data->cached = P2K(global_page_state(NR_FILE_PAGES) + - val.bufferram); + + si_swapinfo(&val); + mem_data->totalswap = P2K(val.totalswap); + mem_data->freeswap = P2K(val.freeswap); + + mem_data->timestamp = get_clock(); + mem_data->sync_count_2++; +} + + +static struct appldata_ops ops = { + .name = "mem", + .record_nr = APPLDATA_RECORD_MEM_ID, + .size = sizeof(struct appldata_mem_data), + .callback = &appldata_get_mem_data, + .data = &appldata_mem_data, + .owner = THIS_MODULE, + .mod_lvl = {0xF0, 0xF0}, /* EBCDIC "00" */ +}; + + +/* + * appldata_mem_init() + * + * init_data, register ops + */ +static int __init appldata_mem_init(void) +{ + return appldata_register_ops(&ops); +} + +/* + * appldata_mem_exit() + * + * unregister ops + */ +static void __exit appldata_mem_exit(void) +{ + appldata_unregister_ops(&ops); +} + + +module_init(appldata_mem_init); +module_exit(appldata_mem_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Gerald Schaefer"); +MODULE_DESCRIPTION("Linux-VM Monitor Stream, MEMORY statistics"); diff --git a/arch/s390/appldata/appldata_net_sum.c b/arch/s390/appldata/appldata_net_sum.c new file mode 100644 index 00000000..5da7c562 --- /dev/null +++ b/arch/s390/appldata/appldata_net_sum.c @@ -0,0 +1,158 @@ +/* + * arch/s390/appldata/appldata_net_sum.c + * + * Data gathering module for Linux-VM Monitor Stream, Stage 1. + * Collects accumulated network statistics (Packets received/transmitted, + * dropped, errors, ...). + * + * Copyright (C) 2003,2006 IBM Corporation, IBM Deutschland Entwicklung GmbH. + * + * Author: Gerald Schaefer <gerald.schaefer@de.ibm.com> + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/errno.h> +#include <linux/kernel_stat.h> +#include <linux/netdevice.h> +#include <net/net_namespace.h> + +#include "appldata.h" + + +/* + * Network data + * + * This is accessed as binary data by z/VM. If changes to it can't be avoided, + * the structure version (product ID, see appldata_base.c) needs to be changed + * as well and all documentation and z/VM applications using it must be updated. + * + * The record layout is documented in the Linux for zSeries Device Drivers + * book: + * http://oss.software.ibm.com/developerworks/opensource/linux390/index.shtml + */ +static struct appldata_net_sum_data { + u64 timestamp; + u32 sync_count_1; /* after VM collected the record data, */ + u32 sync_count_2; /* sync_count_1 and sync_count_2 should be the + same. If not, the record has been updated on + the Linux side while VM was collecting the + (possibly corrupt) data */ + + u32 nr_interfaces; /* nr. of network interfaces being monitored */ + + u32 padding; /* next value is 64-bit aligned, so these */ + /* 4 byte would be padded out by compiler */ + + u64 rx_packets; /* total packets received */ + u64 tx_packets; /* total packets transmitted */ + u64 rx_bytes; /* total bytes received */ + u64 tx_bytes; /* total bytes transmitted */ + u64 rx_errors; /* bad packets received */ + u64 tx_errors; /* packet transmit problems */ + u64 rx_dropped; /* no space in linux buffers */ + u64 tx_dropped; /* no space available in linux */ + u64 collisions; /* collisions while transmitting */ +} __attribute__((packed)) appldata_net_sum_data; + + +/* + * appldata_get_net_sum_data() + * + * gather accumulated network statistics + */ +static void appldata_get_net_sum_data(void *data) +{ + int i; + struct appldata_net_sum_data *net_data; + struct net_device *dev; + unsigned long rx_packets, tx_packets, rx_bytes, tx_bytes, rx_errors, + tx_errors, rx_dropped, tx_dropped, collisions; + + net_data = data; + net_data->sync_count_1++; + + i = 0; + rx_packets = 0; + tx_packets = 0; + rx_bytes = 0; + tx_bytes = 0; + rx_errors = 0; + tx_errors = 0; + rx_dropped = 0; + tx_dropped = 0; + collisions = 0; + + rcu_read_lock(); + for_each_netdev_rcu(&init_net, dev) { + const struct rtnl_link_stats64 *stats; + struct rtnl_link_stats64 temp; + + stats = dev_get_stats(dev, &temp); + rx_packets += stats->rx_packets; + tx_packets += stats->tx_packets; + rx_bytes += stats->rx_bytes; + tx_bytes += stats->tx_bytes; + rx_errors += stats->rx_errors; + tx_errors += stats->tx_errors; + rx_dropped += stats->rx_dropped; + tx_dropped += stats->tx_dropped; + collisions += stats->collisions; + i++; + } + rcu_read_unlock(); + + net_data->nr_interfaces = i; + net_data->rx_packets = rx_packets; + net_data->tx_packets = tx_packets; + net_data->rx_bytes = rx_bytes; + net_data->tx_bytes = tx_bytes; + net_data->rx_errors = rx_errors; + net_data->tx_errors = tx_errors; + net_data->rx_dropped = rx_dropped; + net_data->tx_dropped = tx_dropped; + net_data->collisions = collisions; + + net_data->timestamp = get_clock(); + net_data->sync_count_2++; +} + + +static struct appldata_ops ops = { + .name = "net_sum", + .record_nr = APPLDATA_RECORD_NET_SUM_ID, + .size = sizeof(struct appldata_net_sum_data), + .callback = &appldata_get_net_sum_data, + .data = &appldata_net_sum_data, + .owner = THIS_MODULE, + .mod_lvl = {0xF0, 0xF0}, /* EBCDIC "00" */ +}; + + +/* + * appldata_net_init() + * + * init data, register ops + */ +static int __init appldata_net_init(void) +{ + return appldata_register_ops(&ops); +} + +/* + * appldata_net_exit() + * + * unregister ops + */ +static void __exit appldata_net_exit(void) +{ + appldata_unregister_ops(&ops); +} + + +module_init(appldata_net_init); +module_exit(appldata_net_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Gerald Schaefer"); +MODULE_DESCRIPTION("Linux-VM Monitor Stream, accumulated network statistics"); diff --git a/arch/s390/appldata/appldata_os.c b/arch/s390/appldata/appldata_os.c new file mode 100644 index 00000000..4de031d6 --- /dev/null +++ b/arch/s390/appldata/appldata_os.c @@ -0,0 +1,220 @@ +/* + * arch/s390/appldata/appldata_os.c + * + * Data gathering module for Linux-VM Monitor Stream, Stage 1. + * Collects misc. OS related data (CPU utilization, running processes). + * + * Copyright (C) 2003,2006 IBM Corporation, IBM Deutschland Entwicklung GmbH. + * + * Author: Gerald Schaefer <gerald.schaefer@de.ibm.com> + */ + +#define KMSG_COMPONENT "appldata" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/slab.h> +#include <linux/errno.h> +#include <linux/kernel_stat.h> +#include <linux/netdevice.h> +#include <linux/sched.h> +#include <asm/appldata.h> +#include <asm/smp.h> + +#include "appldata.h" + + +#define LOAD_INT(x) ((x) >> FSHIFT) +#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100) + +/* + * OS data + * + * This is accessed as binary data by z/VM. If changes to it can't be avoided, + * the structure version (product ID, see appldata_base.c) needs to be changed + * as well and all documentation and z/VM applications using it must be + * updated. + * + * The record layout is documented in the Linux for zSeries Device Drivers + * book: + * http://oss.software.ibm.com/developerworks/opensource/linux390/index.shtml + */ +struct appldata_os_per_cpu { + u32 per_cpu_user; /* timer ticks spent in user mode */ + u32 per_cpu_nice; /* ... spent with modified priority */ + u32 per_cpu_system; /* ... spent in kernel mode */ + u32 per_cpu_idle; /* ... spent in idle mode */ + + /* New in 2.6 */ + u32 per_cpu_irq; /* ... spent in interrupts */ + u32 per_cpu_softirq; /* ... spent in softirqs */ + u32 per_cpu_iowait; /* ... spent while waiting for I/O */ + + /* New in modification level 01 */ + u32 per_cpu_steal; /* ... stolen by hypervisor */ + u32 cpu_id; /* number of this CPU */ +} __attribute__((packed)); + +struct appldata_os_data { + u64 timestamp; + u32 sync_count_1; /* after VM collected the record data, */ + u32 sync_count_2; /* sync_count_1 and sync_count_2 should be the + same. If not, the record has been updated on + the Linux side while VM was collecting the + (possibly corrupt) data */ + + u32 nr_cpus; /* number of (virtual) CPUs */ + u32 per_cpu_size; /* size of the per-cpu data struct */ + u32 cpu_offset; /* offset of the first per-cpu data struct */ + + u32 nr_running; /* number of runnable threads */ + u32 nr_threads; /* number of threads */ + u32 avenrun[3]; /* average nr. of running processes during */ + /* the last 1, 5 and 15 minutes */ + + /* New in 2.6 */ + u32 nr_iowait; /* number of blocked threads + (waiting for I/O) */ + + /* per cpu data */ + struct appldata_os_per_cpu os_cpu[0]; +} __attribute__((packed)); + +static struct appldata_os_data *appldata_os_data; + +static struct appldata_ops ops = { + .name = "os", + .record_nr = APPLDATA_RECORD_OS_ID, + .owner = THIS_MODULE, + .mod_lvl = {0xF0, 0xF1}, /* EBCDIC "01" */ +}; + + +/* + * appldata_get_os_data() + * + * gather OS data + */ +static void appldata_get_os_data(void *data) +{ + int i, j, rc; + struct appldata_os_data *os_data; + unsigned int new_size; + + os_data = data; + os_data->sync_count_1++; + + os_data->nr_threads = nr_threads; + os_data->nr_running = nr_running(); + os_data->nr_iowait = nr_iowait(); + os_data->avenrun[0] = avenrun[0] + (FIXED_1/200); + os_data->avenrun[1] = avenrun[1] + (FIXED_1/200); + os_data->avenrun[2] = avenrun[2] + (FIXED_1/200); + + j = 0; + for_each_online_cpu(i) { + os_data->os_cpu[j].per_cpu_user = + cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_USER]); + os_data->os_cpu[j].per_cpu_nice = + cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_NICE]); + os_data->os_cpu[j].per_cpu_system = + cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_SYSTEM]); + os_data->os_cpu[j].per_cpu_idle = + cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_IDLE]); + os_data->os_cpu[j].per_cpu_irq = + cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_IRQ]); + os_data->os_cpu[j].per_cpu_softirq = + cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_SOFTIRQ]); + os_data->os_cpu[j].per_cpu_iowait = + cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_IOWAIT]); + os_data->os_cpu[j].per_cpu_steal = + cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_STEAL]); + os_data->os_cpu[j].cpu_id = i; + j++; + } + + os_data->nr_cpus = j; + + new_size = sizeof(struct appldata_os_data) + + (os_data->nr_cpus * sizeof(struct appldata_os_per_cpu)); + if (ops.size != new_size) { + if (ops.active) { + rc = appldata_diag(APPLDATA_RECORD_OS_ID, + APPLDATA_START_INTERVAL_REC, + (unsigned long) ops.data, new_size, + ops.mod_lvl); + if (rc != 0) + pr_err("Starting a new OS data collection " + "failed with rc=%d\n", rc); + + rc = appldata_diag(APPLDATA_RECORD_OS_ID, + APPLDATA_STOP_REC, + (unsigned long) ops.data, ops.size, + ops.mod_lvl); + if (rc != 0) + pr_err("Stopping a faulty OS data " + "collection failed with rc=%d\n", rc); + } + ops.size = new_size; + } + os_data->timestamp = get_clock(); + os_data->sync_count_2++; +} + + +/* + * appldata_os_init() + * + * init data, register ops + */ +static int __init appldata_os_init(void) +{ + int rc, max_size; + + max_size = sizeof(struct appldata_os_data) + + (NR_CPUS * sizeof(struct appldata_os_per_cpu)); + if (max_size > APPLDATA_MAX_REC_SIZE) { + pr_err("Maximum OS record size %i exceeds the maximum " + "record size %i\n", max_size, APPLDATA_MAX_REC_SIZE); + rc = -ENOMEM; + goto out; + } + + appldata_os_data = kzalloc(max_size, GFP_KERNEL | GFP_DMA); + if (appldata_os_data == NULL) { + rc = -ENOMEM; + goto out; + } + + appldata_os_data->per_cpu_size = sizeof(struct appldata_os_per_cpu); + appldata_os_data->cpu_offset = offsetof(struct appldata_os_data, + os_cpu); + + ops.data = appldata_os_data; + ops.callback = &appldata_get_os_data; + rc = appldata_register_ops(&ops); + if (rc != 0) + kfree(appldata_os_data); +out: + return rc; +} + +/* + * appldata_os_exit() + * + * unregister ops + */ +static void __exit appldata_os_exit(void) +{ + appldata_unregister_ops(&ops); + kfree(appldata_os_data); +} + + +module_init(appldata_os_init); +module_exit(appldata_os_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Gerald Schaefer"); +MODULE_DESCRIPTION("Linux-VM Monitor Stream, OS statistics"); diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile new file mode 100644 index 00000000..f2737a00 --- /dev/null +++ b/arch/s390/boot/Makefile @@ -0,0 +1,26 @@ +# +# Makefile for the linux s390-specific parts of the memory manager. +# + +COMPILE_VERSION := __linux_compile_version_id__`hostname | \ + tr -c '[0-9A-Za-z]' '_'`__`date | \ + tr -c '[0-9A-Za-z]' '_'`_t + +ccflags-y := -DCOMPILE_VERSION=$(COMPILE_VERSION) -gstabs -I. + +targets := image +targets += bzImage +subdir- := compressed + +$(obj)/image: vmlinux FORCE + $(call if_changed,objcopy) + +$(obj)/bzImage: $(obj)/compressed/vmlinux FORCE + $(call if_changed,objcopy) + +$(obj)/compressed/vmlinux: FORCE + $(Q)$(MAKE) $(build)=$(obj)/compressed $@ + +install: $(CONFIGURE) $(obj)/image + sh -x $(srctree)/$(obj)/install.sh $(KERNELRELEASE) $(obj)/image \ + System.map "$(INSTALL_PATH)" diff --git a/arch/s390/boot/compressed/Makefile b/arch/s390/boot/compressed/Makefile new file mode 100644 index 00000000..10e22c4e --- /dev/null +++ b/arch/s390/boot/compressed/Makefile @@ -0,0 +1,67 @@ +# +# linux/arch/s390/boot/compressed/Makefile +# +# create a compressed vmlinux image from the original vmlinux +# + +BITS := $(if $(CONFIG_64BIT),64,31) + +targets := vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 \ + vmlinux.bin.xz vmlinux.bin.lzma vmlinux.bin.lzo misc.o piggy.o \ + sizes.h head$(BITS).o + +KBUILD_CFLAGS := -m$(BITS) -D__KERNEL__ $(LINUX_INCLUDE) -O2 +KBUILD_CFLAGS += $(cflags-y) +KBUILD_CFLAGS += $(call cc-option,-mpacked-stack) +KBUILD_CFLAGS += $(call cc-option,-ffreestanding) + +GCOV_PROFILE := n + +OBJECTS := $(addprefix $(objtree)/arch/s390/kernel/, head.o sclp.o ebcdic.o) +OBJECTS += $(obj)/head$(BITS).o $(obj)/misc.o $(obj)/piggy.o + +LDFLAGS_vmlinux := --oformat $(LD_BFD) -e startup -T +$(obj)/vmlinux: $(obj)/vmlinux.lds $(OBJECTS) + $(call if_changed,ld) + @: + +sed-sizes := -e 's/^\([0-9a-fA-F]*\) . \(__bss_start\|_end\)$$/\#define SZ\2 0x\1/p' + +quiet_cmd_sizes = GEN $@ + cmd_sizes = $(NM) $< | sed -n $(sed-sizes) > $@ + +$(obj)/sizes.h: vmlinux + $(call if_changed,sizes) + +AFLAGS_head$(BITS).o += -I$(obj) +$(obj)/head$(BITS).o: $(obj)/sizes.h + +CFLAGS_misc.o += -I$(obj) +$(obj)/misc.o: $(obj)/sizes.h + +OBJCOPYFLAGS_vmlinux.bin := -R .comment -S +$(obj)/vmlinux.bin: vmlinux + $(call if_changed,objcopy) + +vmlinux.bin.all-y := $(obj)/vmlinux.bin + +suffix-$(CONFIG_KERNEL_GZIP) := gz +suffix-$(CONFIG_KERNEL_BZIP2) := bz2 +suffix-$(CONFIG_KERNEL_LZMA) := lzma +suffix-$(CONFIG_KERNEL_LZO) := lzo +suffix-$(CONFIG_KERNEL_XZ) := xz + +$(obj)/vmlinux.bin.gz: $(vmlinux.bin.all-y) + $(call if_changed,gzip) +$(obj)/vmlinux.bin.bz2: $(vmlinux.bin.all-y) + $(call if_changed,bzip2) +$(obj)/vmlinux.bin.lzma: $(vmlinux.bin.all-y) + $(call if_changed,lzma) +$(obj)/vmlinux.bin.lzo: $(vmlinux.bin.all-y) + $(call if_changed,lzo) +$(obj)/vmlinux.bin.xz: $(vmlinux.bin.all-y) + $(call if_changed,xzkern) + +LDFLAGS_piggy.o := -r --format binary --oformat $(LD_BFD) -T +$(obj)/piggy.o: $(obj)/vmlinux.scr $(obj)/vmlinux.bin.$(suffix-y) + $(call if_changed,ld) diff --git a/arch/s390/boot/compressed/head31.S b/arch/s390/boot/compressed/head31.S new file mode 100644 index 00000000..e8c9e18b --- /dev/null +++ b/arch/s390/boot/compressed/head31.S @@ -0,0 +1,51 @@ +/* + * Startup glue code to uncompress the kernel + * + * Copyright IBM Corp. 2010 + * + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> + */ + +#include <linux/init.h> +#include <linux/linkage.h> +#include <asm/asm-offsets.h> +#include <asm/thread_info.h> +#include <asm/page.h> +#include "sizes.h" + +__HEAD +ENTRY(startup_continue) + basr %r13,0 # get base +.LPG1: + # setup stack + l %r15,.Lstack-.LPG1(%r13) + ahi %r15,-96 + l %r1,.Ldecompress-.LPG1(%r13) + basr %r14,%r1 + # setup registers for memory mover & branch to target + lr %r4,%r2 + l %r2,.Loffset-.LPG1(%r13) + la %r4,0(%r2,%r4) + l %r3,.Lmvsize-.LPG1(%r13) + lr %r5,%r3 + # move the memory mover someplace safe + la %r1,0x200 + mvc 0(mover_end-mover,%r1),mover-.LPG1(%r13) + # decompress image is started at 0x11000 + lr %r6,%r2 + br %r1 +mover: + mvcle %r2,%r4,0 + jo mover + br %r6 +mover_end: + + .align 8 +.Lstack: + .long 0x8000 + (1<<(PAGE_SHIFT+THREAD_ORDER)) +.Ldecompress: + .long decompress_kernel +.Loffset: + .long 0x11000 +.Lmvsize: + .long SZ__bss_start diff --git a/arch/s390/boot/compressed/head64.S b/arch/s390/boot/compressed/head64.S new file mode 100644 index 00000000..f86a4eef --- /dev/null +++ b/arch/s390/boot/compressed/head64.S @@ -0,0 +1,48 @@ +/* + * Startup glue code to uncompress the kernel + * + * Copyright IBM Corp. 2010 + * + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> + */ + +#include <linux/init.h> +#include <linux/linkage.h> +#include <asm/asm-offsets.h> +#include <asm/thread_info.h> +#include <asm/page.h> +#include "sizes.h" + +__HEAD +ENTRY(startup_continue) + basr %r13,0 # get base +.LPG1: + # setup stack + lg %r15,.Lstack-.LPG1(%r13) + aghi %r15,-160 + brasl %r14,decompress_kernel + # setup registers for memory mover & branch to target + lgr %r4,%r2 + lg %r2,.Loffset-.LPG1(%r13) + la %r4,0(%r2,%r4) + lg %r3,.Lmvsize-.LPG1(%r13) + lgr %r5,%r3 + # move the memory mover someplace safe + la %r1,0x200 + mvc 0(mover_end-mover,%r1),mover-.LPG1(%r13) + # decompress image is started at 0x11000 + lgr %r6,%r2 + br %r1 +mover: + mvcle %r2,%r4,0 + jo mover + br %r6 +mover_end: + + .align 8 +.Lstack: + .quad 0x8000 + (1<<(PAGE_SHIFT+THREAD_ORDER)) +.Loffset: + .quad 0x11000 +.Lmvsize: + .quad SZ__bss_start diff --git a/arch/s390/boot/compressed/misc.c b/arch/s390/boot/compressed/misc.c new file mode 100644 index 00000000..465eca75 --- /dev/null +++ b/arch/s390/boot/compressed/misc.c @@ -0,0 +1,168 @@ +/* + * Definitions and wrapper functions for kernel decompressor + * + * Copyright IBM Corp. 2010 + * + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> + */ + +#include <asm/uaccess.h> +#include <asm/page.h> +#include <asm/ipl.h> +#include "sizes.h" + +/* + * gzip declarations + */ +#define STATIC static + +#undef memset +#undef memcpy +#undef memmove +#define memmove memmove +#define memzero(s, n) memset((s), 0, (n)) + +/* Symbols defined by linker scripts */ +extern char input_data[]; +extern int input_len; +extern char _text, _end; +extern char _bss, _ebss; + +static void error(char *m); + +static unsigned long free_mem_ptr; +static unsigned long free_mem_end_ptr; + +#ifdef CONFIG_HAVE_KERNEL_BZIP2 +#define HEAP_SIZE 0x400000 +#else +#define HEAP_SIZE 0x10000 +#endif + +#ifdef CONFIG_KERNEL_GZIP +#include "../../../../lib/decompress_inflate.c" +#endif + +#ifdef CONFIG_KERNEL_BZIP2 +#include "../../../../lib/decompress_bunzip2.c" +#endif + +#ifdef CONFIG_KERNEL_LZMA +#include "../../../../lib/decompress_unlzma.c" +#endif + +#ifdef CONFIG_KERNEL_LZO +#include "../../../../lib/decompress_unlzo.c" +#endif + +#ifdef CONFIG_KERNEL_XZ +#include "../../../../lib/decompress_unxz.c" +#endif + +extern _sclp_print_early(const char *); + +static int puts(const char *s) +{ + _sclp_print_early(s); + return 0; +} + +void *memset(void *s, int c, size_t n) +{ + char *xs; + + if (c == 0) + return __builtin_memset(s, 0, n); + + xs = (char *) s; + if (n > 0) + do { + *xs++ = c; + } while (--n > 0); + return s; +} + +void *memcpy(void *__dest, __const void *__src, size_t __n) +{ + return __builtin_memcpy(__dest, __src, __n); +} + +void *memmove(void *__dest, __const void *__src, size_t __n) +{ + char *d; + const char *s; + + if (__dest <= __src) + return __builtin_memcpy(__dest, __src, __n); + d = __dest + __n; + s = __src + __n; + while (__n--) + *--d = *--s; + return __dest; +} + +static void error(char *x) +{ + unsigned long long psw = 0x000a0000deadbeefULL; + + puts("\n\n"); + puts(x); + puts("\n\n -- System halted"); + + asm volatile("lpsw %0" : : "Q" (psw)); +} + +/* + * Safe guard the ipl parameter block against a memory area that will be + * overwritten. The validity check for the ipl parameter block is complex + * (see cio_get_iplinfo and ipl_save_parameters) but if the pointer to + * the ipl parameter block intersects with the passed memory area we can + * safely assume that we can read from that memory. In that case just copy + * the memory to IPL_PARMBLOCK_ORIGIN even if there is no ipl parameter + * block. + */ +static void check_ipl_parmblock(void *start, unsigned long size) +{ + void *src, *dst; + + src = (void *)(unsigned long) S390_lowcore.ipl_parmblock_ptr; + if (src + PAGE_SIZE <= start || src >= start + size) + return; + dst = (void *) IPL_PARMBLOCK_ORIGIN; + memmove(dst, src, PAGE_SIZE); + S390_lowcore.ipl_parmblock_ptr = IPL_PARMBLOCK_ORIGIN; +} + +unsigned long decompress_kernel(void) +{ + unsigned long output_addr; + unsigned char *output; + + output_addr = ((unsigned long) &_end + HEAP_SIZE + 4095UL) & -4096UL; + check_ipl_parmblock((void *) 0, output_addr + SZ__bss_start); + memset(&_bss, 0, &_ebss - &_bss); + free_mem_ptr = (unsigned long)&_end; + free_mem_end_ptr = free_mem_ptr + HEAP_SIZE; + output = (unsigned char *) output_addr; + +#ifdef CONFIG_BLK_DEV_INITRD + /* + * Move the initrd right behind the end of the decompressed + * kernel image. + */ + if (INITRD_START && INITRD_SIZE && + INITRD_START < (unsigned long) output + SZ__bss_start) { + check_ipl_parmblock(output + SZ__bss_start, + INITRD_START + INITRD_SIZE); + memmove(output + SZ__bss_start, + (void *) INITRD_START, INITRD_SIZE); + INITRD_START = (unsigned long) output + SZ__bss_start; + } +#endif + + puts("Uncompressing Linux... "); + decompress(input_data, input_len, NULL, NULL, output, NULL, error); + puts("Ok, booting the kernel.\n"); + return (unsigned long) output; +} + diff --git a/arch/s390/boot/compressed/vmlinux.lds.S b/arch/s390/boot/compressed/vmlinux.lds.S new file mode 100644 index 00000000..d80f79d8 --- /dev/null +++ b/arch/s390/boot/compressed/vmlinux.lds.S @@ -0,0 +1,55 @@ +#include <asm-generic/vmlinux.lds.h> + +#ifdef CONFIG_64BIT +OUTPUT_FORMAT("elf64-s390", "elf64-s390", "elf64-s390") +OUTPUT_ARCH(s390:64-bit) +#else +OUTPUT_FORMAT("elf32-s390", "elf32-s390", "elf32-s390") +OUTPUT_ARCH(s390) +#endif + +ENTRY(startup) + +SECTIONS +{ + /* Be careful parts of head_64.S assume startup_32 is at + * address 0. + */ + . = 0; + .head.text : { + _head = . ; + HEAD_TEXT + _ehead = . ; + } + .rodata.compressed : { + *(.rodata.compressed) + } + .text : { + _text = .; /* Text */ + *(.text) + *(.text.*) + _etext = . ; + } + .rodata : { + _rodata = . ; + *(.rodata) /* read-only data */ + *(.rodata.*) + _erodata = . ; + } + .data : { + _data = . ; + *(.data) + *(.data.*) + _edata = . ; + } + . = ALIGN(256); + .bss : { + _bss = . ; + *(.bss) + *(.bss.*) + *(COMMON) + . = ALIGN(8); /* For convenience during zeroing */ + _ebss = .; + } + _end = .; +} diff --git a/arch/s390/boot/compressed/vmlinux.scr b/arch/s390/boot/compressed/vmlinux.scr new file mode 100644 index 00000000..f02382ae --- /dev/null +++ b/arch/s390/boot/compressed/vmlinux.scr @@ -0,0 +1,10 @@ +SECTIONS +{ + .rodata.compressed : { + input_len = .; + LONG(input_data_end - input_data) input_data = .; + *(.data) + output_len = . - 4; + input_data_end = .; + } +} diff --git a/arch/s390/boot/install.sh b/arch/s390/boot/install.sh new file mode 100644 index 00000000..aed30696 --- /dev/null +++ b/arch/s390/boot/install.sh @@ -0,0 +1,38 @@ +#!/bin/sh +# +# arch/s390x/boot/install.sh +# +# This file is subject to the terms and conditions of the GNU General Public +# License. See the file "COPYING" in the main directory of this archive +# for more details. +# +# Copyright (C) 1995 by Linus Torvalds +# +# Adapted from code in arch/i386/boot/Makefile by H. Peter Anvin +# +# "make install" script for s390 architecture +# +# Arguments: +# $1 - kernel version +# $2 - kernel image file +# $3 - kernel map file +# $4 - default install path (blank if root directory) +# + +# User may have a custom install script + +if [ -x ~/bin/${INSTALLKERNEL} ]; then exec ~/bin/${INSTALLKERNEL} "$@"; fi +if [ -x /sbin/${INSTALLKERNEL} ]; then exec /sbin/${INSTALLKERNEL} "$@"; fi + +# Default install - same as make zlilo + +if [ -f $4/vmlinuz ]; then + mv $4/vmlinuz $4/vmlinuz.old +fi + +if [ -f $4/System.map ]; then + mv $4/System.map $4/System.old +fi + +cat $2 > $4/vmlinuz +cp $3 $4/System.map diff --git a/arch/s390/crypto/Makefile b/arch/s390/crypto/Makefile new file mode 100644 index 00000000..7f0b7cda --- /dev/null +++ b/arch/s390/crypto/Makefile @@ -0,0 +1,11 @@ +# +# Cryptographic API +# + +obj-$(CONFIG_CRYPTO_SHA1_S390) += sha1_s390.o sha_common.o +obj-$(CONFIG_CRYPTO_SHA256_S390) += sha256_s390.o sha_common.o +obj-$(CONFIG_CRYPTO_SHA512_S390) += sha512_s390.o sha_common.o +obj-$(CONFIG_CRYPTO_DES_S390) += des_s390.o +obj-$(CONFIG_CRYPTO_AES_S390) += aes_s390.o +obj-$(CONFIG_S390_PRNG) += prng.o +obj-$(CONFIG_CRYPTO_GHASH_S390) += ghash_s390.o diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c new file mode 100644 index 00000000..a9ce1358 --- /dev/null +++ b/arch/s390/crypto/aes_s390.c @@ -0,0 +1,937 @@ +/* + * Cryptographic API. + * + * s390 implementation of the AES Cipher Algorithm. + * + * s390 Version: + * Copyright IBM Corp. 2005,2007 + * Author(s): Jan Glauber (jang@de.ibm.com) + * Sebastian Siewior (sebastian@breakpoint.cc> SW-Fallback + * + * Derived from "crypto/aes_generic.c" + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + */ + +#define KMSG_COMPONENT "aes_s390" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include <crypto/aes.h> +#include <crypto/algapi.h> +#include <linux/err.h> +#include <linux/module.h> +#include <linux/init.h> +#include "crypt_s390.h" + +#define AES_KEYLEN_128 1 +#define AES_KEYLEN_192 2 +#define AES_KEYLEN_256 4 + +static u8 *ctrblk; +static char keylen_flag; + +struct s390_aes_ctx { + u8 iv[AES_BLOCK_SIZE]; + u8 key[AES_MAX_KEY_SIZE]; + long enc; + long dec; + int key_len; + union { + struct crypto_blkcipher *blk; + struct crypto_cipher *cip; + } fallback; +}; + +struct pcc_param { + u8 key[32]; + u8 tweak[16]; + u8 block[16]; + u8 bit[16]; + u8 xts[16]; +}; + +struct s390_xts_ctx { + u8 key[32]; + u8 xts_param[16]; + struct pcc_param pcc; + long enc; + long dec; + int key_len; + struct crypto_blkcipher *fallback; +}; + +/* + * Check if the key_len is supported by the HW. + * Returns 0 if it is, a positive number if it is not and software fallback is + * required or a negative number in case the key size is not valid + */ +static int need_fallback(unsigned int key_len) +{ + switch (key_len) { + case 16: + if (!(keylen_flag & AES_KEYLEN_128)) + return 1; + break; + case 24: + if (!(keylen_flag & AES_KEYLEN_192)) + return 1; + break; + case 32: + if (!(keylen_flag & AES_KEYLEN_256)) + return 1; + break; + default: + return -1; + break; + } + return 0; +} + +static int setkey_fallback_cip(struct crypto_tfm *tfm, const u8 *in_key, + unsigned int key_len) +{ + struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm); + int ret; + + sctx->fallback.cip->base.crt_flags &= ~CRYPTO_TFM_REQ_MASK; + sctx->fallback.cip->base.crt_flags |= (tfm->crt_flags & + CRYPTO_TFM_REQ_MASK); + + ret = crypto_cipher_setkey(sctx->fallback.cip, in_key, key_len); + if (ret) { + tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK; + tfm->crt_flags |= (sctx->fallback.cip->base.crt_flags & + CRYPTO_TFM_RES_MASK); + } + return ret; +} + +static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, + unsigned int key_len) +{ + struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm); + u32 *flags = &tfm->crt_flags; + int ret; + + ret = need_fallback(key_len); + if (ret < 0) { + *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; + return -EINVAL; + } + + sctx->key_len = key_len; + if (!ret) { + memcpy(sctx->key, in_key, key_len); + return 0; + } + + return setkey_fallback_cip(tfm, in_key, key_len); +} + +static void aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) +{ + const struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm); + + if (unlikely(need_fallback(sctx->key_len))) { + crypto_cipher_encrypt_one(sctx->fallback.cip, out, in); + return; + } + + switch (sctx->key_len) { + case 16: + crypt_s390_km(KM_AES_128_ENCRYPT, &sctx->key, out, in, + AES_BLOCK_SIZE); + break; + case 24: + crypt_s390_km(KM_AES_192_ENCRYPT, &sctx->key, out, in, + AES_BLOCK_SIZE); + break; + case 32: + crypt_s390_km(KM_AES_256_ENCRYPT, &sctx->key, out, in, + AES_BLOCK_SIZE); + break; + } +} + +static void aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) +{ + const struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm); + + if (unlikely(need_fallback(sctx->key_len))) { + crypto_cipher_decrypt_one(sctx->fallback.cip, out, in); + return; + } + + switch (sctx->key_len) { + case 16: + crypt_s390_km(KM_AES_128_DECRYPT, &sctx->key, out, in, + AES_BLOCK_SIZE); + break; + case 24: + crypt_s390_km(KM_AES_192_DECRYPT, &sctx->key, out, in, + AES_BLOCK_SIZE); + break; + case 32: + crypt_s390_km(KM_AES_256_DECRYPT, &sctx->key, out, in, + AES_BLOCK_SIZE); + break; + } +} + +static int fallback_init_cip(struct crypto_tfm *tfm) +{ + const char *name = tfm->__crt_alg->cra_name; + struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm); + + sctx->fallback.cip = crypto_alloc_cipher(name, 0, + CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK); + + if (IS_ERR(sctx->fallback.cip)) { + pr_err("Allocating AES fallback algorithm %s failed\n", + name); + return PTR_ERR(sctx->fallback.cip); + } + + return 0; +} + +static void fallback_exit_cip(struct crypto_tfm *tfm) +{ + struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm); + + crypto_free_cipher(sctx->fallback.cip); + sctx->fallback.cip = NULL; +} + +static struct crypto_alg aes_alg = { + .cra_name = "aes", + .cra_driver_name = "aes-s390", + .cra_priority = CRYPT_S390_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_CIPHER | + CRYPTO_ALG_NEED_FALLBACK, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct s390_aes_ctx), + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(aes_alg.cra_list), + .cra_init = fallback_init_cip, + .cra_exit = fallback_exit_cip, + .cra_u = { + .cipher = { + .cia_min_keysize = AES_MIN_KEY_SIZE, + .cia_max_keysize = AES_MAX_KEY_SIZE, + .cia_setkey = aes_set_key, + .cia_encrypt = aes_encrypt, + .cia_decrypt = aes_decrypt, + } + } +}; + +static int setkey_fallback_blk(struct crypto_tfm *tfm, const u8 *key, + unsigned int len) +{ + struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm); + unsigned int ret; + + sctx->fallback.blk->base.crt_flags &= ~CRYPTO_TFM_REQ_MASK; + sctx->fallback.blk->base.crt_flags |= (tfm->crt_flags & + CRYPTO_TFM_REQ_MASK); + + ret = crypto_blkcipher_setkey(sctx->fallback.blk, key, len); + if (ret) { + tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK; + tfm->crt_flags |= (sctx->fallback.blk->base.crt_flags & + CRYPTO_TFM_RES_MASK); + } + return ret; +} + +static int fallback_blk_dec(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + unsigned int ret; + struct crypto_blkcipher *tfm; + struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm); + + tfm = desc->tfm; + desc->tfm = sctx->fallback.blk; + + ret = crypto_blkcipher_decrypt_iv(desc, dst, src, nbytes); + + desc->tfm = tfm; + return ret; +} + +static int fallback_blk_enc(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + unsigned int ret; + struct crypto_blkcipher *tfm; + struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm); + + tfm = desc->tfm; + desc->tfm = sctx->fallback.blk; + + ret = crypto_blkcipher_encrypt_iv(desc, dst, src, nbytes); + + desc->tfm = tfm; + return ret; +} + +static int ecb_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, + unsigned int key_len) +{ + struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm); + int ret; + + ret = need_fallback(key_len); + if (ret > 0) { + sctx->key_len = key_len; + return setkey_fallback_blk(tfm, in_key, key_len); + } + + switch (key_len) { + case 16: + sctx->enc = KM_AES_128_ENCRYPT; + sctx->dec = KM_AES_128_DECRYPT; + break; + case 24: + sctx->enc = KM_AES_192_ENCRYPT; + sctx->dec = KM_AES_192_DECRYPT; + break; + case 32: + sctx->enc = KM_AES_256_ENCRYPT; + sctx->dec = KM_AES_256_DECRYPT; + break; + } + + return aes_set_key(tfm, in_key, key_len); +} + +static int ecb_aes_crypt(struct blkcipher_desc *desc, long func, void *param, + struct blkcipher_walk *walk) +{ + int ret = blkcipher_walk_virt(desc, walk); + unsigned int nbytes; + + while ((nbytes = walk->nbytes)) { + /* only use complete blocks */ + unsigned int n = nbytes & ~(AES_BLOCK_SIZE - 1); + u8 *out = walk->dst.virt.addr; + u8 *in = walk->src.virt.addr; + + ret = crypt_s390_km(func, param, out, in, n); + BUG_ON((ret < 0) || (ret != n)); + + nbytes &= AES_BLOCK_SIZE - 1; + ret = blkcipher_walk_done(desc, walk, nbytes); + } + + return ret; +} + +static int ecb_aes_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + + if (unlikely(need_fallback(sctx->key_len))) + return fallback_blk_enc(desc, dst, src, nbytes); + + blkcipher_walk_init(&walk, dst, src, nbytes); + return ecb_aes_crypt(desc, sctx->enc, sctx->key, &walk); +} + +static int ecb_aes_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + + if (unlikely(need_fallback(sctx->key_len))) + return fallback_blk_dec(desc, dst, src, nbytes); + + blkcipher_walk_init(&walk, dst, src, nbytes); + return ecb_aes_crypt(desc, sctx->dec, sctx->key, &walk); +} + +static int fallback_init_blk(struct crypto_tfm *tfm) +{ + const char *name = tfm->__crt_alg->cra_name; + struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm); + + sctx->fallback.blk = crypto_alloc_blkcipher(name, 0, + CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK); + + if (IS_ERR(sctx->fallback.blk)) { + pr_err("Allocating AES fallback algorithm %s failed\n", + name); + return PTR_ERR(sctx->fallback.blk); + } + + return 0; +} + +static void fallback_exit_blk(struct crypto_tfm *tfm) +{ + struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm); + + crypto_free_blkcipher(sctx->fallback.blk); + sctx->fallback.blk = NULL; +} + +static struct crypto_alg ecb_aes_alg = { + .cra_name = "ecb(aes)", + .cra_driver_name = "ecb-aes-s390", + .cra_priority = CRYPT_S390_COMPOSITE_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | + CRYPTO_ALG_NEED_FALLBACK, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct s390_aes_ctx), + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(ecb_aes_alg.cra_list), + .cra_init = fallback_init_blk, + .cra_exit = fallback_exit_blk, + .cra_u = { + .blkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .setkey = ecb_aes_set_key, + .encrypt = ecb_aes_encrypt, + .decrypt = ecb_aes_decrypt, + } + } +}; + +static int cbc_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, + unsigned int key_len) +{ + struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm); + int ret; + + ret = need_fallback(key_len); + if (ret > 0) { + sctx->key_len = key_len; + return setkey_fallback_blk(tfm, in_key, key_len); + } + + switch (key_len) { + case 16: + sctx->enc = KMC_AES_128_ENCRYPT; + sctx->dec = KMC_AES_128_DECRYPT; + break; + case 24: + sctx->enc = KMC_AES_192_ENCRYPT; + sctx->dec = KMC_AES_192_DECRYPT; + break; + case 32: + sctx->enc = KMC_AES_256_ENCRYPT; + sctx->dec = KMC_AES_256_DECRYPT; + break; + } + + return aes_set_key(tfm, in_key, key_len); +} + +static int cbc_aes_crypt(struct blkcipher_desc *desc, long func, void *param, + struct blkcipher_walk *walk) +{ + int ret = blkcipher_walk_virt(desc, walk); + unsigned int nbytes = walk->nbytes; + + if (!nbytes) + goto out; + + memcpy(param, walk->iv, AES_BLOCK_SIZE); + do { + /* only use complete blocks */ + unsigned int n = nbytes & ~(AES_BLOCK_SIZE - 1); + u8 *out = walk->dst.virt.addr; + u8 *in = walk->src.virt.addr; + + ret = crypt_s390_kmc(func, param, out, in, n); + BUG_ON((ret < 0) || (ret != n)); + + nbytes &= AES_BLOCK_SIZE - 1; + ret = blkcipher_walk_done(desc, walk, nbytes); + } while ((nbytes = walk->nbytes)); + memcpy(walk->iv, param, AES_BLOCK_SIZE); + +out: + return ret; +} + +static int cbc_aes_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + + if (unlikely(need_fallback(sctx->key_len))) + return fallback_blk_enc(desc, dst, src, nbytes); + + blkcipher_walk_init(&walk, dst, src, nbytes); + return cbc_aes_crypt(desc, sctx->enc, sctx->iv, &walk); +} + +static int cbc_aes_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + + if (unlikely(need_fallback(sctx->key_len))) + return fallback_blk_dec(desc, dst, src, nbytes); + + blkcipher_walk_init(&walk, dst, src, nbytes); + return cbc_aes_crypt(desc, sctx->dec, sctx->iv, &walk); +} + +static struct crypto_alg cbc_aes_alg = { + .cra_name = "cbc(aes)", + .cra_driver_name = "cbc-aes-s390", + .cra_priority = CRYPT_S390_COMPOSITE_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | + CRYPTO_ALG_NEED_FALLBACK, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct s390_aes_ctx), + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(cbc_aes_alg.cra_list), + .cra_init = fallback_init_blk, + .cra_exit = fallback_exit_blk, + .cra_u = { + .blkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = cbc_aes_set_key, + .encrypt = cbc_aes_encrypt, + .decrypt = cbc_aes_decrypt, + } + } +}; + +static int xts_fallback_setkey(struct crypto_tfm *tfm, const u8 *key, + unsigned int len) +{ + struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm); + unsigned int ret; + + xts_ctx->fallback->base.crt_flags &= ~CRYPTO_TFM_REQ_MASK; + xts_ctx->fallback->base.crt_flags |= (tfm->crt_flags & + CRYPTO_TFM_REQ_MASK); + + ret = crypto_blkcipher_setkey(xts_ctx->fallback, key, len); + if (ret) { + tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK; + tfm->crt_flags |= (xts_ctx->fallback->base.crt_flags & + CRYPTO_TFM_RES_MASK); + } + return ret; +} + +static int xts_fallback_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(desc->tfm); + struct crypto_blkcipher *tfm; + unsigned int ret; + + tfm = desc->tfm; + desc->tfm = xts_ctx->fallback; + + ret = crypto_blkcipher_decrypt_iv(desc, dst, src, nbytes); + + desc->tfm = tfm; + return ret; +} + +static int xts_fallback_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(desc->tfm); + struct crypto_blkcipher *tfm; + unsigned int ret; + + tfm = desc->tfm; + desc->tfm = xts_ctx->fallback; + + ret = crypto_blkcipher_encrypt_iv(desc, dst, src, nbytes); + + desc->tfm = tfm; + return ret; +} + +static int xts_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, + unsigned int key_len) +{ + struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm); + u32 *flags = &tfm->crt_flags; + + switch (key_len) { + case 32: + xts_ctx->enc = KM_XTS_128_ENCRYPT; + xts_ctx->dec = KM_XTS_128_DECRYPT; + memcpy(xts_ctx->key + 16, in_key, 16); + memcpy(xts_ctx->pcc.key + 16, in_key + 16, 16); + break; + case 48: + xts_ctx->enc = 0; + xts_ctx->dec = 0; + xts_fallback_setkey(tfm, in_key, key_len); + break; + case 64: + xts_ctx->enc = KM_XTS_256_ENCRYPT; + xts_ctx->dec = KM_XTS_256_DECRYPT; + memcpy(xts_ctx->key, in_key, 32); + memcpy(xts_ctx->pcc.key, in_key + 32, 32); + break; + default: + *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; + return -EINVAL; + } + xts_ctx->key_len = key_len; + return 0; +} + +static int xts_aes_crypt(struct blkcipher_desc *desc, long func, + struct s390_xts_ctx *xts_ctx, + struct blkcipher_walk *walk) +{ + unsigned int offset = (xts_ctx->key_len >> 1) & 0x10; + int ret = blkcipher_walk_virt(desc, walk); + unsigned int nbytes = walk->nbytes; + unsigned int n; + u8 *in, *out; + void *param; + + if (!nbytes) + goto out; + + memset(xts_ctx->pcc.block, 0, sizeof(xts_ctx->pcc.block)); + memset(xts_ctx->pcc.bit, 0, sizeof(xts_ctx->pcc.bit)); + memset(xts_ctx->pcc.xts, 0, sizeof(xts_ctx->pcc.xts)); + memcpy(xts_ctx->pcc.tweak, walk->iv, sizeof(xts_ctx->pcc.tweak)); + param = xts_ctx->pcc.key + offset; + ret = crypt_s390_pcc(func, param); + BUG_ON(ret < 0); + + memcpy(xts_ctx->xts_param, xts_ctx->pcc.xts, 16); + param = xts_ctx->key + offset; + do { + /* only use complete blocks */ + n = nbytes & ~(AES_BLOCK_SIZE - 1); + out = walk->dst.virt.addr; + in = walk->src.virt.addr; + + ret = crypt_s390_km(func, param, out, in, n); + BUG_ON(ret < 0 || ret != n); + + nbytes &= AES_BLOCK_SIZE - 1; + ret = blkcipher_walk_done(desc, walk, nbytes); + } while ((nbytes = walk->nbytes)); +out: + return ret; +} + +static int xts_aes_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + + if (unlikely(xts_ctx->key_len == 48)) + return xts_fallback_encrypt(desc, dst, src, nbytes); + + blkcipher_walk_init(&walk, dst, src, nbytes); + return xts_aes_crypt(desc, xts_ctx->enc, xts_ctx, &walk); +} + +static int xts_aes_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + + if (unlikely(xts_ctx->key_len == 48)) + return xts_fallback_decrypt(desc, dst, src, nbytes); + + blkcipher_walk_init(&walk, dst, src, nbytes); + return xts_aes_crypt(desc, xts_ctx->dec, xts_ctx, &walk); +} + +static int xts_fallback_init(struct crypto_tfm *tfm) +{ + const char *name = tfm->__crt_alg->cra_name; + struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm); + + xts_ctx->fallback = crypto_alloc_blkcipher(name, 0, + CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK); + + if (IS_ERR(xts_ctx->fallback)) { + pr_err("Allocating XTS fallback algorithm %s failed\n", + name); + return PTR_ERR(xts_ctx->fallback); + } + return 0; +} + +static void xts_fallback_exit(struct crypto_tfm *tfm) +{ + struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm); + + crypto_free_blkcipher(xts_ctx->fallback); + xts_ctx->fallback = NULL; +} + +static struct crypto_alg xts_aes_alg = { + .cra_name = "xts(aes)", + .cra_driver_name = "xts-aes-s390", + .cra_priority = CRYPT_S390_COMPOSITE_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | + CRYPTO_ALG_NEED_FALLBACK, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct s390_xts_ctx), + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(xts_aes_alg.cra_list), + .cra_init = xts_fallback_init, + .cra_exit = xts_fallback_exit, + .cra_u = { + .blkcipher = { + .min_keysize = 2 * AES_MIN_KEY_SIZE, + .max_keysize = 2 * AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = xts_aes_set_key, + .encrypt = xts_aes_encrypt, + .decrypt = xts_aes_decrypt, + } + } +}; + +static int ctr_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, + unsigned int key_len) +{ + struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm); + + switch (key_len) { + case 16: + sctx->enc = KMCTR_AES_128_ENCRYPT; + sctx->dec = KMCTR_AES_128_DECRYPT; + break; + case 24: + sctx->enc = KMCTR_AES_192_ENCRYPT; + sctx->dec = KMCTR_AES_192_DECRYPT; + break; + case 32: + sctx->enc = KMCTR_AES_256_ENCRYPT; + sctx->dec = KMCTR_AES_256_DECRYPT; + break; + } + + return aes_set_key(tfm, in_key, key_len); +} + +static int ctr_aes_crypt(struct blkcipher_desc *desc, long func, + struct s390_aes_ctx *sctx, struct blkcipher_walk *walk) +{ + int ret = blkcipher_walk_virt_block(desc, walk, AES_BLOCK_SIZE); + unsigned int i, n, nbytes; + u8 buf[AES_BLOCK_SIZE]; + u8 *out, *in; + + if (!walk->nbytes) + return ret; + + memcpy(ctrblk, walk->iv, AES_BLOCK_SIZE); + while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) { + out = walk->dst.virt.addr; + in = walk->src.virt.addr; + while (nbytes >= AES_BLOCK_SIZE) { + /* only use complete blocks, max. PAGE_SIZE */ + n = (nbytes > PAGE_SIZE) ? PAGE_SIZE : + nbytes & ~(AES_BLOCK_SIZE - 1); + for (i = AES_BLOCK_SIZE; i < n; i += AES_BLOCK_SIZE) { + memcpy(ctrblk + i, ctrblk + i - AES_BLOCK_SIZE, + AES_BLOCK_SIZE); + crypto_inc(ctrblk + i, AES_BLOCK_SIZE); + } + ret = crypt_s390_kmctr(func, sctx->key, out, in, n, ctrblk); + BUG_ON(ret < 0 || ret != n); + if (n > AES_BLOCK_SIZE) + memcpy(ctrblk, ctrblk + n - AES_BLOCK_SIZE, + AES_BLOCK_SIZE); + crypto_inc(ctrblk, AES_BLOCK_SIZE); + out += n; + in += n; + nbytes -= n; + } + ret = blkcipher_walk_done(desc, walk, nbytes); + } + /* + * final block may be < AES_BLOCK_SIZE, copy only nbytes + */ + if (nbytes) { + out = walk->dst.virt.addr; + in = walk->src.virt.addr; + ret = crypt_s390_kmctr(func, sctx->key, buf, in, + AES_BLOCK_SIZE, ctrblk); + BUG_ON(ret < 0 || ret != AES_BLOCK_SIZE); + memcpy(out, buf, nbytes); + crypto_inc(ctrblk, AES_BLOCK_SIZE); + ret = blkcipher_walk_done(desc, walk, 0); + } + memcpy(walk->iv, ctrblk, AES_BLOCK_SIZE); + return ret; +} + +static int ctr_aes_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + + blkcipher_walk_init(&walk, dst, src, nbytes); + return ctr_aes_crypt(desc, sctx->enc, sctx, &walk); +} + +static int ctr_aes_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + + blkcipher_walk_init(&walk, dst, src, nbytes); + return ctr_aes_crypt(desc, sctx->dec, sctx, &walk); +} + +static struct crypto_alg ctr_aes_alg = { + .cra_name = "ctr(aes)", + .cra_driver_name = "ctr-aes-s390", + .cra_priority = CRYPT_S390_COMPOSITE_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct s390_aes_ctx), + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(ctr_aes_alg.cra_list), + .cra_u = { + .blkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = ctr_aes_set_key, + .encrypt = ctr_aes_encrypt, + .decrypt = ctr_aes_decrypt, + } + } +}; + +static int __init aes_s390_init(void) +{ + int ret; + + if (crypt_s390_func_available(KM_AES_128_ENCRYPT, CRYPT_S390_MSA)) + keylen_flag |= AES_KEYLEN_128; + if (crypt_s390_func_available(KM_AES_192_ENCRYPT, CRYPT_S390_MSA)) + keylen_flag |= AES_KEYLEN_192; + if (crypt_s390_func_available(KM_AES_256_ENCRYPT, CRYPT_S390_MSA)) + keylen_flag |= AES_KEYLEN_256; + + if (!keylen_flag) + return -EOPNOTSUPP; + + /* z9 109 and z9 BC/EC only support 128 bit key length */ + if (keylen_flag == AES_KEYLEN_128) + pr_info("AES hardware acceleration is only available for" + " 128-bit keys\n"); + + ret = crypto_register_alg(&aes_alg); + if (ret) + goto aes_err; + + ret = crypto_register_alg(&ecb_aes_alg); + if (ret) + goto ecb_aes_err; + + ret = crypto_register_alg(&cbc_aes_alg); + if (ret) + goto cbc_aes_err; + + if (crypt_s390_func_available(KM_XTS_128_ENCRYPT, + CRYPT_S390_MSA | CRYPT_S390_MSA4) && + crypt_s390_func_available(KM_XTS_256_ENCRYPT, + CRYPT_S390_MSA | CRYPT_S390_MSA4)) { + ret = crypto_register_alg(&xts_aes_alg); + if (ret) + goto xts_aes_err; + } + + if (crypt_s390_func_available(KMCTR_AES_128_ENCRYPT, + CRYPT_S390_MSA | CRYPT_S390_MSA4) && + crypt_s390_func_available(KMCTR_AES_192_ENCRYPT, + CRYPT_S390_MSA | CRYPT_S390_MSA4) && + crypt_s390_func_available(KMCTR_AES_256_ENCRYPT, + CRYPT_S390_MSA | CRYPT_S390_MSA4)) { + ctrblk = (u8 *) __get_free_page(GFP_KERNEL); + if (!ctrblk) { + ret = -ENOMEM; + goto ctr_aes_err; + } + ret = crypto_register_alg(&ctr_aes_alg); + if (ret) { + free_page((unsigned long) ctrblk); + goto ctr_aes_err; + } + } + +out: + return ret; + +ctr_aes_err: + crypto_unregister_alg(&xts_aes_alg); +xts_aes_err: + crypto_unregister_alg(&cbc_aes_alg); +cbc_aes_err: + crypto_unregister_alg(&ecb_aes_alg); +ecb_aes_err: + crypto_unregister_alg(&aes_alg); +aes_err: + goto out; +} + +static void __exit aes_s390_fini(void) +{ + crypto_unregister_alg(&ctr_aes_alg); + free_page((unsigned long) ctrblk); + crypto_unregister_alg(&xts_aes_alg); + crypto_unregister_alg(&cbc_aes_alg); + crypto_unregister_alg(&ecb_aes_alg); + crypto_unregister_alg(&aes_alg); +} + +module_init(aes_s390_init); +module_exit(aes_s390_fini); + +MODULE_ALIAS("aes-all"); + +MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm"); +MODULE_LICENSE("GPL"); diff --git a/arch/s390/crypto/crypt_s390.h b/arch/s390/crypto/crypt_s390.h new file mode 100644 index 00000000..9178db6d --- /dev/null +++ b/arch/s390/crypto/crypt_s390.h @@ -0,0 +1,437 @@ +/* + * Cryptographic API. + * + * Support for s390 cryptographic instructions. + * + * Copyright IBM Corp. 2003,2007 + * Author(s): Thomas Spatzier + * Jan Glauber (jan.glauber@de.ibm.com) + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + */ +#ifndef _CRYPTO_ARCH_S390_CRYPT_S390_H +#define _CRYPTO_ARCH_S390_CRYPT_S390_H + +#include <asm/errno.h> +#include <asm/facility.h> + +#define CRYPT_S390_OP_MASK 0xFF00 +#define CRYPT_S390_FUNC_MASK 0x00FF + +#define CRYPT_S390_PRIORITY 300 +#define CRYPT_S390_COMPOSITE_PRIORITY 400 + +#define CRYPT_S390_MSA 0x1 +#define CRYPT_S390_MSA3 0x2 +#define CRYPT_S390_MSA4 0x4 + +/* s390 cryptographic operations */ +enum crypt_s390_operations { + CRYPT_S390_KM = 0x0100, + CRYPT_S390_KMC = 0x0200, + CRYPT_S390_KIMD = 0x0300, + CRYPT_S390_KLMD = 0x0400, + CRYPT_S390_KMAC = 0x0500, + CRYPT_S390_KMCTR = 0x0600 +}; + +/* + * function codes for KM (CIPHER MESSAGE) instruction + * 0x80 is the decipher modifier bit + */ +enum crypt_s390_km_func { + KM_QUERY = CRYPT_S390_KM | 0x0, + KM_DEA_ENCRYPT = CRYPT_S390_KM | 0x1, + KM_DEA_DECRYPT = CRYPT_S390_KM | 0x1 | 0x80, + KM_TDEA_128_ENCRYPT = CRYPT_S390_KM | 0x2, + KM_TDEA_128_DECRYPT = CRYPT_S390_KM | 0x2 | 0x80, + KM_TDEA_192_ENCRYPT = CRYPT_S390_KM | 0x3, + KM_TDEA_192_DECRYPT = CRYPT_S390_KM | 0x3 | 0x80, + KM_AES_128_ENCRYPT = CRYPT_S390_KM | 0x12, + KM_AES_128_DECRYPT = CRYPT_S390_KM | 0x12 | 0x80, + KM_AES_192_ENCRYPT = CRYPT_S390_KM | 0x13, + KM_AES_192_DECRYPT = CRYPT_S390_KM | 0x13 | 0x80, + KM_AES_256_ENCRYPT = CRYPT_S390_KM | 0x14, + KM_AES_256_DECRYPT = CRYPT_S390_KM | 0x14 | 0x80, + KM_XTS_128_ENCRYPT = CRYPT_S390_KM | 0x32, + KM_XTS_128_DECRYPT = CRYPT_S390_KM | 0x32 | 0x80, + KM_XTS_256_ENCRYPT = CRYPT_S390_KM | 0x34, + KM_XTS_256_DECRYPT = CRYPT_S390_KM | 0x34 | 0x80, +}; + +/* + * function codes for KMC (CIPHER MESSAGE WITH CHAINING) + * instruction + */ +enum crypt_s390_kmc_func { + KMC_QUERY = CRYPT_S390_KMC | 0x0, + KMC_DEA_ENCRYPT = CRYPT_S390_KMC | 0x1, + KMC_DEA_DECRYPT = CRYPT_S390_KMC | 0x1 | 0x80, + KMC_TDEA_128_ENCRYPT = CRYPT_S390_KMC | 0x2, + KMC_TDEA_128_DECRYPT = CRYPT_S390_KMC | 0x2 | 0x80, + KMC_TDEA_192_ENCRYPT = CRYPT_S390_KMC | 0x3, + KMC_TDEA_192_DECRYPT = CRYPT_S390_KMC | 0x3 | 0x80, + KMC_AES_128_ENCRYPT = CRYPT_S390_KMC | 0x12, + KMC_AES_128_DECRYPT = CRYPT_S390_KMC | 0x12 | 0x80, + KMC_AES_192_ENCRYPT = CRYPT_S390_KMC | 0x13, + KMC_AES_192_DECRYPT = CRYPT_S390_KMC | 0x13 | 0x80, + KMC_AES_256_ENCRYPT = CRYPT_S390_KMC | 0x14, + KMC_AES_256_DECRYPT = CRYPT_S390_KMC | 0x14 | 0x80, + KMC_PRNG = CRYPT_S390_KMC | 0x43, +}; + +/* + * function codes for KMCTR (CIPHER MESSAGE WITH COUNTER) + * instruction + */ +enum crypt_s390_kmctr_func { + KMCTR_QUERY = CRYPT_S390_KMCTR | 0x0, + KMCTR_DEA_ENCRYPT = CRYPT_S390_KMCTR | 0x1, + KMCTR_DEA_DECRYPT = CRYPT_S390_KMCTR | 0x1 | 0x80, + KMCTR_TDEA_128_ENCRYPT = CRYPT_S390_KMCTR | 0x2, + KMCTR_TDEA_128_DECRYPT = CRYPT_S390_KMCTR | 0x2 | 0x80, + KMCTR_TDEA_192_ENCRYPT = CRYPT_S390_KMCTR | 0x3, + KMCTR_TDEA_192_DECRYPT = CRYPT_S390_KMCTR | 0x3 | 0x80, + KMCTR_AES_128_ENCRYPT = CRYPT_S390_KMCTR | 0x12, + KMCTR_AES_128_DECRYPT = CRYPT_S390_KMCTR | 0x12 | 0x80, + KMCTR_AES_192_ENCRYPT = CRYPT_S390_KMCTR | 0x13, + KMCTR_AES_192_DECRYPT = CRYPT_S390_KMCTR | 0x13 | 0x80, + KMCTR_AES_256_ENCRYPT = CRYPT_S390_KMCTR | 0x14, + KMCTR_AES_256_DECRYPT = CRYPT_S390_KMCTR | 0x14 | 0x80, +}; + +/* + * function codes for KIMD (COMPUTE INTERMEDIATE MESSAGE DIGEST) + * instruction + */ +enum crypt_s390_kimd_func { + KIMD_QUERY = CRYPT_S390_KIMD | 0, + KIMD_SHA_1 = CRYPT_S390_KIMD | 1, + KIMD_SHA_256 = CRYPT_S390_KIMD | 2, + KIMD_SHA_512 = CRYPT_S390_KIMD | 3, + KIMD_GHASH = CRYPT_S390_KIMD | 65, +}; + +/* + * function codes for KLMD (COMPUTE LAST MESSAGE DIGEST) + * instruction + */ +enum crypt_s390_klmd_func { + KLMD_QUERY = CRYPT_S390_KLMD | 0, + KLMD_SHA_1 = CRYPT_S390_KLMD | 1, + KLMD_SHA_256 = CRYPT_S390_KLMD | 2, + KLMD_SHA_512 = CRYPT_S390_KLMD | 3, +}; + +/* + * function codes for KMAC (COMPUTE MESSAGE AUTHENTICATION CODE) + * instruction + */ +enum crypt_s390_kmac_func { + KMAC_QUERY = CRYPT_S390_KMAC | 0, + KMAC_DEA = CRYPT_S390_KMAC | 1, + KMAC_TDEA_128 = CRYPT_S390_KMAC | 2, + KMAC_TDEA_192 = CRYPT_S390_KMAC | 3 +}; + +/** + * crypt_s390_km: + * @func: the function code passed to KM; see crypt_s390_km_func + * @param: address of parameter block; see POP for details on each func + * @dest: address of destination memory area + * @src: address of source memory area + * @src_len: length of src operand in bytes + * + * Executes the KM (CIPHER MESSAGE) operation of the CPU. + * + * Returns -1 for failure, 0 for the query func, number of processed + * bytes for encryption/decryption funcs + */ +static inline int crypt_s390_km(long func, void *param, + u8 *dest, const u8 *src, long src_len) +{ + register long __func asm("0") = func & CRYPT_S390_FUNC_MASK; + register void *__param asm("1") = param; + register const u8 *__src asm("2") = src; + register long __src_len asm("3") = src_len; + register u8 *__dest asm("4") = dest; + int ret; + + asm volatile( + "0: .insn rre,0xb92e0000,%3,%1 \n" /* KM opcode */ + "1: brc 1,0b \n" /* handle partial completion */ + " la %0,0\n" + "2:\n" + EX_TABLE(0b,2b) EX_TABLE(1b,2b) + : "=d" (ret), "+a" (__src), "+d" (__src_len), "+a" (__dest) + : "d" (__func), "a" (__param), "0" (-1) : "cc", "memory"); + if (ret < 0) + return ret; + return (func & CRYPT_S390_FUNC_MASK) ? src_len - __src_len : __src_len; +} + +/** + * crypt_s390_kmc: + * @func: the function code passed to KM; see crypt_s390_kmc_func + * @param: address of parameter block; see POP for details on each func + * @dest: address of destination memory area + * @src: address of source memory area + * @src_len: length of src operand in bytes + * + * Executes the KMC (CIPHER MESSAGE WITH CHAINING) operation of the CPU. + * + * Returns -1 for failure, 0 for the query func, number of processed + * bytes for encryption/decryption funcs + */ +static inline int crypt_s390_kmc(long func, void *param, + u8 *dest, const u8 *src, long src_len) +{ + register long __func asm("0") = func & CRYPT_S390_FUNC_MASK; + register void *__param asm("1") = param; + register const u8 *__src asm("2") = src; + register long __src_len asm("3") = src_len; + register u8 *__dest asm("4") = dest; + int ret; + + asm volatile( + "0: .insn rre,0xb92f0000,%3,%1 \n" /* KMC opcode */ + "1: brc 1,0b \n" /* handle partial completion */ + " la %0,0\n" + "2:\n" + EX_TABLE(0b,2b) EX_TABLE(1b,2b) + : "=d" (ret), "+a" (__src), "+d" (__src_len), "+a" (__dest) + : "d" (__func), "a" (__param), "0" (-1) : "cc", "memory"); + if (ret < 0) + return ret; + return (func & CRYPT_S390_FUNC_MASK) ? src_len - __src_len : __src_len; +} + +/** + * crypt_s390_kimd: + * @func: the function code passed to KM; see crypt_s390_kimd_func + * @param: address of parameter block; see POP for details on each func + * @src: address of source memory area + * @src_len: length of src operand in bytes + * + * Executes the KIMD (COMPUTE INTERMEDIATE MESSAGE DIGEST) operation + * of the CPU. + * + * Returns -1 for failure, 0 for the query func, number of processed + * bytes for digest funcs + */ +static inline int crypt_s390_kimd(long func, void *param, + const u8 *src, long src_len) +{ + register long __func asm("0") = func & CRYPT_S390_FUNC_MASK; + register void *__param asm("1") = param; + register const u8 *__src asm("2") = src; + register long __src_len asm("3") = src_len; + int ret; + + asm volatile( + "0: .insn rre,0xb93e0000,%1,%1 \n" /* KIMD opcode */ + "1: brc 1,0b \n" /* handle partial completion */ + " la %0,0\n" + "2:\n" + EX_TABLE(0b,2b) EX_TABLE(1b,2b) + : "=d" (ret), "+a" (__src), "+d" (__src_len) + : "d" (__func), "a" (__param), "0" (-1) : "cc", "memory"); + if (ret < 0) + return ret; + return (func & CRYPT_S390_FUNC_MASK) ? src_len - __src_len : __src_len; +} + +/** + * crypt_s390_klmd: + * @func: the function code passed to KM; see crypt_s390_klmd_func + * @param: address of parameter block; see POP for details on each func + * @src: address of source memory area + * @src_len: length of src operand in bytes + * + * Executes the KLMD (COMPUTE LAST MESSAGE DIGEST) operation of the CPU. + * + * Returns -1 for failure, 0 for the query func, number of processed + * bytes for digest funcs + */ +static inline int crypt_s390_klmd(long func, void *param, + const u8 *src, long src_len) +{ + register long __func asm("0") = func & CRYPT_S390_FUNC_MASK; + register void *__param asm("1") = param; + register const u8 *__src asm("2") = src; + register long __src_len asm("3") = src_len; + int ret; + + asm volatile( + "0: .insn rre,0xb93f0000,%1,%1 \n" /* KLMD opcode */ + "1: brc 1,0b \n" /* handle partial completion */ + " la %0,0\n" + "2:\n" + EX_TABLE(0b,2b) EX_TABLE(1b,2b) + : "=d" (ret), "+a" (__src), "+d" (__src_len) + : "d" (__func), "a" (__param), "0" (-1) : "cc", "memory"); + if (ret < 0) + return ret; + return (func & CRYPT_S390_FUNC_MASK) ? src_len - __src_len : __src_len; +} + +/** + * crypt_s390_kmac: + * @func: the function code passed to KM; see crypt_s390_klmd_func + * @param: address of parameter block; see POP for details on each func + * @src: address of source memory area + * @src_len: length of src operand in bytes + * + * Executes the KMAC (COMPUTE MESSAGE AUTHENTICATION CODE) operation + * of the CPU. + * + * Returns -1 for failure, 0 for the query func, number of processed + * bytes for digest funcs + */ +static inline int crypt_s390_kmac(long func, void *param, + const u8 *src, long src_len) +{ + register long __func asm("0") = func & CRYPT_S390_FUNC_MASK; + register void *__param asm("1") = param; + register const u8 *__src asm("2") = src; + register long __src_len asm("3") = src_len; + int ret; + + asm volatile( + "0: .insn rre,0xb91e0000,%1,%1 \n" /* KLAC opcode */ + "1: brc 1,0b \n" /* handle partial completion */ + " la %0,0\n" + "2:\n" + EX_TABLE(0b,2b) EX_TABLE(1b,2b) + : "=d" (ret), "+a" (__src), "+d" (__src_len) + : "d" (__func), "a" (__param), "0" (-1) : "cc", "memory"); + if (ret < 0) + return ret; + return (func & CRYPT_S390_FUNC_MASK) ? src_len - __src_len : __src_len; +} + +/** + * crypt_s390_kmctr: + * @func: the function code passed to KMCTR; see crypt_s390_kmctr_func + * @param: address of parameter block; see POP for details on each func + * @dest: address of destination memory area + * @src: address of source memory area + * @src_len: length of src operand in bytes + * @counter: address of counter value + * + * Executes the KMCTR (CIPHER MESSAGE WITH COUNTER) operation of the CPU. + * + * Returns -1 for failure, 0 for the query func, number of processed + * bytes for encryption/decryption funcs + */ +static inline int crypt_s390_kmctr(long func, void *param, u8 *dest, + const u8 *src, long src_len, u8 *counter) +{ + register long __func asm("0") = func & CRYPT_S390_FUNC_MASK; + register void *__param asm("1") = param; + register const u8 *__src asm("2") = src; + register long __src_len asm("3") = src_len; + register u8 *__dest asm("4") = dest; + register u8 *__ctr asm("6") = counter; + int ret = -1; + + asm volatile( + "0: .insn rrf,0xb92d0000,%3,%1,%4,0 \n" /* KMCTR opcode */ + "1: brc 1,0b \n" /* handle partial completion */ + " la %0,0\n" + "2:\n" + EX_TABLE(0b,2b) EX_TABLE(1b,2b) + : "+d" (ret), "+a" (__src), "+d" (__src_len), "+a" (__dest), + "+a" (__ctr) + : "d" (__func), "a" (__param) : "cc", "memory"); + if (ret < 0) + return ret; + return (func & CRYPT_S390_FUNC_MASK) ? src_len - __src_len : __src_len; +} + +/** + * crypt_s390_func_available: + * @func: the function code of the specific function; 0 if op in general + * + * Tests if a specific crypto function is implemented on the machine. + * + * Returns 1 if func available; 0 if func or op in general not available + */ +static inline int crypt_s390_func_available(int func, + unsigned int facility_mask) +{ + unsigned char status[16]; + int ret; + + if (facility_mask & CRYPT_S390_MSA && !test_facility(17)) + return 0; + + if (facility_mask & CRYPT_S390_MSA3 && + (!test_facility(2) || !test_facility(76))) + return 0; + if (facility_mask & CRYPT_S390_MSA4 && + (!test_facility(2) || !test_facility(77))) + return 0; + + switch (func & CRYPT_S390_OP_MASK) { + case CRYPT_S390_KM: + ret = crypt_s390_km(KM_QUERY, &status, NULL, NULL, 0); + break; + case CRYPT_S390_KMC: + ret = crypt_s390_kmc(KMC_QUERY, &status, NULL, NULL, 0); + break; + case CRYPT_S390_KIMD: + ret = crypt_s390_kimd(KIMD_QUERY, &status, NULL, 0); + break; + case CRYPT_S390_KLMD: + ret = crypt_s390_klmd(KLMD_QUERY, &status, NULL, 0); + break; + case CRYPT_S390_KMAC: + ret = crypt_s390_kmac(KMAC_QUERY, &status, NULL, 0); + break; + case CRYPT_S390_KMCTR: + ret = crypt_s390_kmctr(KMCTR_QUERY, &status, NULL, NULL, 0, + NULL); + break; + default: + return 0; + } + if (ret < 0) + return 0; + func &= CRYPT_S390_FUNC_MASK; + func &= 0x7f; /* mask modifier bit */ + return (status[func >> 3] & (0x80 >> (func & 7))) != 0; +} + +/** + * crypt_s390_pcc: + * @func: the function code passed to KM; see crypt_s390_km_func + * @param: address of parameter block; see POP for details on each func + * + * Executes the PCC (PERFORM CRYPTOGRAPHIC COMPUTATION) operation of the CPU. + * + * Returns -1 for failure, 0 for success. + */ +static inline int crypt_s390_pcc(long func, void *param) +{ + register long __func asm("0") = func & 0x7f; /* encrypt or decrypt */ + register void *__param asm("1") = param; + int ret = -1; + + asm volatile( + "0: .insn rre,0xb92c0000,0,0 \n" /* PCC opcode */ + "1: brc 1,0b \n" /* handle partial completion */ + " la %0,0\n" + "2:\n" + EX_TABLE(0b,2b) EX_TABLE(1b,2b) + : "+d" (ret) + : "d" (__func), "a" (__param) : "cc", "memory"); + return ret; +} + + +#endif /* _CRYPTO_ARCH_S390_CRYPT_S390_H */ diff --git a/arch/s390/crypto/crypto_des.h b/arch/s390/crypto/crypto_des.h new file mode 100644 index 00000000..6210457c --- /dev/null +++ b/arch/s390/crypto/crypto_des.h @@ -0,0 +1,18 @@ +/* + * Cryptographic API. + * + * Function for checking keys for the DES and Tripple DES Encryption + * algorithms. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + */ +#ifndef __CRYPTO_DES_H__ +#define __CRYPTO_DES_H__ + +extern int crypto_des_check_key(const u8*, unsigned int, u32*); + +#endif /*__CRYPTO_DES_H__*/ diff --git a/arch/s390/crypto/des_s390.c b/arch/s390/crypto/des_s390.c new file mode 100644 index 00000000..a52bfd12 --- /dev/null +++ b/arch/s390/crypto/des_s390.c @@ -0,0 +1,600 @@ +/* + * Cryptographic API. + * + * s390 implementation of the DES Cipher Algorithm. + * + * Copyright IBM Corp. 2003,2011 + * Author(s): Thomas Spatzier + * Jan Glauber (jan.glauber@de.ibm.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + */ + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/crypto.h> +#include <crypto/algapi.h> +#include <crypto/des.h> + +#include "crypt_s390.h" + +#define DES3_KEY_SIZE (3 * DES_KEY_SIZE) + +static u8 *ctrblk; + +struct s390_des_ctx { + u8 iv[DES_BLOCK_SIZE]; + u8 key[DES3_KEY_SIZE]; +}; + +static int des_setkey(struct crypto_tfm *tfm, const u8 *key, + unsigned int key_len) +{ + struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm); + u32 *flags = &tfm->crt_flags; + u32 tmp[DES_EXPKEY_WORDS]; + + /* check for weak keys */ + if (!des_ekey(tmp, key) && (*flags & CRYPTO_TFM_REQ_WEAK_KEY)) { + *flags |= CRYPTO_TFM_RES_WEAK_KEY; + return -EINVAL; + } + + memcpy(ctx->key, key, key_len); + return 0; +} + +static void des_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) +{ + struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm); + + crypt_s390_km(KM_DEA_ENCRYPT, ctx->key, out, in, DES_BLOCK_SIZE); +} + +static void des_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) +{ + struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm); + + crypt_s390_km(KM_DEA_DECRYPT, ctx->key, out, in, DES_BLOCK_SIZE); +} + +static struct crypto_alg des_alg = { + .cra_name = "des", + .cra_driver_name = "des-s390", + .cra_priority = CRYPT_S390_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_CIPHER, + .cra_blocksize = DES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct s390_des_ctx), + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(des_alg.cra_list), + .cra_u = { + .cipher = { + .cia_min_keysize = DES_KEY_SIZE, + .cia_max_keysize = DES_KEY_SIZE, + .cia_setkey = des_setkey, + .cia_encrypt = des_encrypt, + .cia_decrypt = des_decrypt, + } + } +}; + +static int ecb_desall_crypt(struct blkcipher_desc *desc, long func, + u8 *key, struct blkcipher_walk *walk) +{ + int ret = blkcipher_walk_virt(desc, walk); + unsigned int nbytes; + + while ((nbytes = walk->nbytes)) { + /* only use complete blocks */ + unsigned int n = nbytes & ~(DES_BLOCK_SIZE - 1); + u8 *out = walk->dst.virt.addr; + u8 *in = walk->src.virt.addr; + + ret = crypt_s390_km(func, key, out, in, n); + BUG_ON((ret < 0) || (ret != n)); + + nbytes &= DES_BLOCK_SIZE - 1; + ret = blkcipher_walk_done(desc, walk, nbytes); + } + + return ret; +} + +static int cbc_desall_crypt(struct blkcipher_desc *desc, long func, + u8 *iv, struct blkcipher_walk *walk) +{ + int ret = blkcipher_walk_virt(desc, walk); + unsigned int nbytes = walk->nbytes; + + if (!nbytes) + goto out; + + memcpy(iv, walk->iv, DES_BLOCK_SIZE); + do { + /* only use complete blocks */ + unsigned int n = nbytes & ~(DES_BLOCK_SIZE - 1); + u8 *out = walk->dst.virt.addr; + u8 *in = walk->src.virt.addr; + + ret = crypt_s390_kmc(func, iv, out, in, n); + BUG_ON((ret < 0) || (ret != n)); + + nbytes &= DES_BLOCK_SIZE - 1; + ret = blkcipher_walk_done(desc, walk, nbytes); + } while ((nbytes = walk->nbytes)); + memcpy(walk->iv, iv, DES_BLOCK_SIZE); + +out: + return ret; +} + +static int ecb_des_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + + blkcipher_walk_init(&walk, dst, src, nbytes); + return ecb_desall_crypt(desc, KM_DEA_ENCRYPT, ctx->key, &walk); +} + +static int ecb_des_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + + blkcipher_walk_init(&walk, dst, src, nbytes); + return ecb_desall_crypt(desc, KM_DEA_DECRYPT, ctx->key, &walk); +} + +static struct crypto_alg ecb_des_alg = { + .cra_name = "ecb(des)", + .cra_driver_name = "ecb-des-s390", + .cra_priority = CRYPT_S390_COMPOSITE_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = DES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct s390_des_ctx), + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(ecb_des_alg.cra_list), + .cra_u = { + .blkcipher = { + .min_keysize = DES_KEY_SIZE, + .max_keysize = DES_KEY_SIZE, + .setkey = des_setkey, + .encrypt = ecb_des_encrypt, + .decrypt = ecb_des_decrypt, + } + } +}; + +static int cbc_des_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + + blkcipher_walk_init(&walk, dst, src, nbytes); + return cbc_desall_crypt(desc, KMC_DEA_ENCRYPT, ctx->iv, &walk); +} + +static int cbc_des_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + + blkcipher_walk_init(&walk, dst, src, nbytes); + return cbc_desall_crypt(desc, KMC_DEA_DECRYPT, ctx->iv, &walk); +} + +static struct crypto_alg cbc_des_alg = { + .cra_name = "cbc(des)", + .cra_driver_name = "cbc-des-s390", + .cra_priority = CRYPT_S390_COMPOSITE_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = DES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct s390_des_ctx), + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(cbc_des_alg.cra_list), + .cra_u = { + .blkcipher = { + .min_keysize = DES_KEY_SIZE, + .max_keysize = DES_KEY_SIZE, + .ivsize = DES_BLOCK_SIZE, + .setkey = des_setkey, + .encrypt = cbc_des_encrypt, + .decrypt = cbc_des_decrypt, + } + } +}; + +/* + * RFC2451: + * + * For DES-EDE3, there is no known need to reject weak or + * complementation keys. Any weakness is obviated by the use of + * multiple keys. + * + * However, if the first two or last two independent 64-bit keys are + * equal (k1 == k2 or k2 == k3), then the DES3 operation is simply the + * same as DES. Implementers MUST reject keys that exhibit this + * property. + * + */ +static int des3_setkey(struct crypto_tfm *tfm, const u8 *key, + unsigned int key_len) +{ + struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm); + u32 *flags = &tfm->crt_flags; + + if (!(memcmp(key, &key[DES_KEY_SIZE], DES_KEY_SIZE) && + memcmp(&key[DES_KEY_SIZE], &key[DES_KEY_SIZE * 2], + DES_KEY_SIZE)) && + (*flags & CRYPTO_TFM_REQ_WEAK_KEY)) { + *flags |= CRYPTO_TFM_RES_WEAK_KEY; + return -EINVAL; + } + memcpy(ctx->key, key, key_len); + return 0; +} + +static void des3_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ + struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm); + + crypt_s390_km(KM_TDEA_192_ENCRYPT, ctx->key, dst, src, DES_BLOCK_SIZE); +} + +static void des3_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ + struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm); + + crypt_s390_km(KM_TDEA_192_DECRYPT, ctx->key, dst, src, DES_BLOCK_SIZE); +} + +static struct crypto_alg des3_alg = { + .cra_name = "des3_ede", + .cra_driver_name = "des3_ede-s390", + .cra_priority = CRYPT_S390_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_CIPHER, + .cra_blocksize = DES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct s390_des_ctx), + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(des3_alg.cra_list), + .cra_u = { + .cipher = { + .cia_min_keysize = DES3_KEY_SIZE, + .cia_max_keysize = DES3_KEY_SIZE, + .cia_setkey = des3_setkey, + .cia_encrypt = des3_encrypt, + .cia_decrypt = des3_decrypt, + } + } +}; + +static int ecb_des3_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + + blkcipher_walk_init(&walk, dst, src, nbytes); + return ecb_desall_crypt(desc, KM_TDEA_192_ENCRYPT, ctx->key, &walk); +} + +static int ecb_des3_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + + blkcipher_walk_init(&walk, dst, src, nbytes); + return ecb_desall_crypt(desc, KM_TDEA_192_DECRYPT, ctx->key, &walk); +} + +static struct crypto_alg ecb_des3_alg = { + .cra_name = "ecb(des3_ede)", + .cra_driver_name = "ecb-des3_ede-s390", + .cra_priority = CRYPT_S390_COMPOSITE_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = DES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct s390_des_ctx), + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT( + ecb_des3_alg.cra_list), + .cra_u = { + .blkcipher = { + .min_keysize = DES3_KEY_SIZE, + .max_keysize = DES3_KEY_SIZE, + .setkey = des3_setkey, + .encrypt = ecb_des3_encrypt, + .decrypt = ecb_des3_decrypt, + } + } +}; + +static int cbc_des3_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + + blkcipher_walk_init(&walk, dst, src, nbytes); + return cbc_desall_crypt(desc, KMC_TDEA_192_ENCRYPT, ctx->iv, &walk); +} + +static int cbc_des3_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + + blkcipher_walk_init(&walk, dst, src, nbytes); + return cbc_desall_crypt(desc, KMC_TDEA_192_DECRYPT, ctx->iv, &walk); +} + +static struct crypto_alg cbc_des3_alg = { + .cra_name = "cbc(des3_ede)", + .cra_driver_name = "cbc-des3_ede-s390", + .cra_priority = CRYPT_S390_COMPOSITE_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = DES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct s390_des_ctx), + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT( + cbc_des3_alg.cra_list), + .cra_u = { + .blkcipher = { + .min_keysize = DES3_KEY_SIZE, + .max_keysize = DES3_KEY_SIZE, + .ivsize = DES_BLOCK_SIZE, + .setkey = des3_setkey, + .encrypt = cbc_des3_encrypt, + .decrypt = cbc_des3_decrypt, + } + } +}; + +static int ctr_desall_crypt(struct blkcipher_desc *desc, long func, + struct s390_des_ctx *ctx, struct blkcipher_walk *walk) +{ + int ret = blkcipher_walk_virt_block(desc, walk, DES_BLOCK_SIZE); + unsigned int i, n, nbytes; + u8 buf[DES_BLOCK_SIZE]; + u8 *out, *in; + + memcpy(ctrblk, walk->iv, DES_BLOCK_SIZE); + while ((nbytes = walk->nbytes) >= DES_BLOCK_SIZE) { + out = walk->dst.virt.addr; + in = walk->src.virt.addr; + while (nbytes >= DES_BLOCK_SIZE) { + /* align to block size, max. PAGE_SIZE */ + n = (nbytes > PAGE_SIZE) ? PAGE_SIZE : + nbytes & ~(DES_BLOCK_SIZE - 1); + for (i = DES_BLOCK_SIZE; i < n; i += DES_BLOCK_SIZE) { + memcpy(ctrblk + i, ctrblk + i - DES_BLOCK_SIZE, + DES_BLOCK_SIZE); + crypto_inc(ctrblk + i, DES_BLOCK_SIZE); + } + ret = crypt_s390_kmctr(func, ctx->key, out, in, n, ctrblk); + BUG_ON((ret < 0) || (ret != n)); + if (n > DES_BLOCK_SIZE) + memcpy(ctrblk, ctrblk + n - DES_BLOCK_SIZE, + DES_BLOCK_SIZE); + crypto_inc(ctrblk, DES_BLOCK_SIZE); + out += n; + in += n; + nbytes -= n; + } + ret = blkcipher_walk_done(desc, walk, nbytes); + } + + /* final block may be < DES_BLOCK_SIZE, copy only nbytes */ + if (nbytes) { + out = walk->dst.virt.addr; + in = walk->src.virt.addr; + ret = crypt_s390_kmctr(func, ctx->key, buf, in, + DES_BLOCK_SIZE, ctrblk); + BUG_ON(ret < 0 || ret != DES_BLOCK_SIZE); + memcpy(out, buf, nbytes); + crypto_inc(ctrblk, DES_BLOCK_SIZE); + ret = blkcipher_walk_done(desc, walk, 0); + } + memcpy(walk->iv, ctrblk, DES_BLOCK_SIZE); + return ret; +} + +static int ctr_des_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + + blkcipher_walk_init(&walk, dst, src, nbytes); + return ctr_desall_crypt(desc, KMCTR_DEA_ENCRYPT, ctx, &walk); +} + +static int ctr_des_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + + blkcipher_walk_init(&walk, dst, src, nbytes); + return ctr_desall_crypt(desc, KMCTR_DEA_DECRYPT, ctx, &walk); +} + +static struct crypto_alg ctr_des_alg = { + .cra_name = "ctr(des)", + .cra_driver_name = "ctr-des-s390", + .cra_priority = CRYPT_S390_COMPOSITE_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct s390_des_ctx), + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(ctr_des_alg.cra_list), + .cra_u = { + .blkcipher = { + .min_keysize = DES_KEY_SIZE, + .max_keysize = DES_KEY_SIZE, + .ivsize = DES_BLOCK_SIZE, + .setkey = des_setkey, + .encrypt = ctr_des_encrypt, + .decrypt = ctr_des_decrypt, + } + } +}; + +static int ctr_des3_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + + blkcipher_walk_init(&walk, dst, src, nbytes); + return ctr_desall_crypt(desc, KMCTR_TDEA_192_ENCRYPT, ctx, &walk); +} + +static int ctr_des3_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + + blkcipher_walk_init(&walk, dst, src, nbytes); + return ctr_desall_crypt(desc, KMCTR_TDEA_192_DECRYPT, ctx, &walk); +} + +static struct crypto_alg ctr_des3_alg = { + .cra_name = "ctr(des3_ede)", + .cra_driver_name = "ctr-des3_ede-s390", + .cra_priority = CRYPT_S390_COMPOSITE_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct s390_des_ctx), + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(ctr_des3_alg.cra_list), + .cra_u = { + .blkcipher = { + .min_keysize = DES3_KEY_SIZE, + .max_keysize = DES3_KEY_SIZE, + .ivsize = DES_BLOCK_SIZE, + .setkey = des3_setkey, + .encrypt = ctr_des3_encrypt, + .decrypt = ctr_des3_decrypt, + } + } +}; + +static int __init des_s390_init(void) +{ + int ret; + + if (!crypt_s390_func_available(KM_DEA_ENCRYPT, CRYPT_S390_MSA) || + !crypt_s390_func_available(KM_TDEA_192_ENCRYPT, CRYPT_S390_MSA)) + return -EOPNOTSUPP; + + ret = crypto_register_alg(&des_alg); + if (ret) + goto des_err; + ret = crypto_register_alg(&ecb_des_alg); + if (ret) + goto ecb_des_err; + ret = crypto_register_alg(&cbc_des_alg); + if (ret) + goto cbc_des_err; + ret = crypto_register_alg(&des3_alg); + if (ret) + goto des3_err; + ret = crypto_register_alg(&ecb_des3_alg); + if (ret) + goto ecb_des3_err; + ret = crypto_register_alg(&cbc_des3_alg); + if (ret) + goto cbc_des3_err; + + if (crypt_s390_func_available(KMCTR_DEA_ENCRYPT, + CRYPT_S390_MSA | CRYPT_S390_MSA4) && + crypt_s390_func_available(KMCTR_TDEA_192_ENCRYPT, + CRYPT_S390_MSA | CRYPT_S390_MSA4)) { + ret = crypto_register_alg(&ctr_des_alg); + if (ret) + goto ctr_des_err; + ret = crypto_register_alg(&ctr_des3_alg); + if (ret) + goto ctr_des3_err; + ctrblk = (u8 *) __get_free_page(GFP_KERNEL); + if (!ctrblk) { + ret = -ENOMEM; + goto ctr_mem_err; + } + } +out: + return ret; + +ctr_mem_err: + crypto_unregister_alg(&ctr_des3_alg); +ctr_des3_err: + crypto_unregister_alg(&ctr_des_alg); +ctr_des_err: + crypto_unregister_alg(&cbc_des3_alg); +cbc_des3_err: + crypto_unregister_alg(&ecb_des3_alg); +ecb_des3_err: + crypto_unregister_alg(&des3_alg); +des3_err: + crypto_unregister_alg(&cbc_des_alg); +cbc_des_err: + crypto_unregister_alg(&ecb_des_alg); +ecb_des_err: + crypto_unregister_alg(&des_alg); +des_err: + goto out; +} + +static void __exit des_s390_exit(void) +{ + if (ctrblk) { + crypto_unregister_alg(&ctr_des_alg); + crypto_unregister_alg(&ctr_des3_alg); + free_page((unsigned long) ctrblk); + } + crypto_unregister_alg(&cbc_des3_alg); + crypto_unregister_alg(&ecb_des3_alg); + crypto_unregister_alg(&des3_alg); + crypto_unregister_alg(&cbc_des_alg); + crypto_unregister_alg(&ecb_des_alg); + crypto_unregister_alg(&des_alg); +} + +module_init(des_s390_init); +module_exit(des_s390_exit); + +MODULE_ALIAS("des"); +MODULE_ALIAS("des3_ede"); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("DES & Triple DES EDE Cipher Algorithms"); diff --git a/arch/s390/crypto/ghash_s390.c b/arch/s390/crypto/ghash_s390.c new file mode 100644 index 00000000..b1bd170f --- /dev/null +++ b/arch/s390/crypto/ghash_s390.c @@ -0,0 +1,162 @@ +/* + * Cryptographic API. + * + * s390 implementation of the GHASH algorithm for GCM (Galois/Counter Mode). + * + * Copyright IBM Corp. 2011 + * Author(s): Gerald Schaefer <gerald.schaefer@de.ibm.com> + */ + +#include <crypto/internal/hash.h> +#include <linux/module.h> + +#include "crypt_s390.h" + +#define GHASH_BLOCK_SIZE 16 +#define GHASH_DIGEST_SIZE 16 + +struct ghash_ctx { + u8 icv[16]; + u8 key[16]; +}; + +struct ghash_desc_ctx { + u8 buffer[GHASH_BLOCK_SIZE]; + u32 bytes; +}; + +static int ghash_init(struct shash_desc *desc) +{ + struct ghash_desc_ctx *dctx = shash_desc_ctx(desc); + + memset(dctx, 0, sizeof(*dctx)); + + return 0; +} + +static int ghash_setkey(struct crypto_shash *tfm, + const u8 *key, unsigned int keylen) +{ + struct ghash_ctx *ctx = crypto_shash_ctx(tfm); + + if (keylen != GHASH_BLOCK_SIZE) { + crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; + } + + memcpy(ctx->key, key, GHASH_BLOCK_SIZE); + memset(ctx->icv, 0, GHASH_BLOCK_SIZE); + + return 0; +} + +static int ghash_update(struct shash_desc *desc, + const u8 *src, unsigned int srclen) +{ + struct ghash_desc_ctx *dctx = shash_desc_ctx(desc); + struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm); + unsigned int n; + u8 *buf = dctx->buffer; + int ret; + + if (dctx->bytes) { + u8 *pos = buf + (GHASH_BLOCK_SIZE - dctx->bytes); + + n = min(srclen, dctx->bytes); + dctx->bytes -= n; + srclen -= n; + + memcpy(pos, src, n); + src += n; + + if (!dctx->bytes) { + ret = crypt_s390_kimd(KIMD_GHASH, ctx, buf, + GHASH_BLOCK_SIZE); + BUG_ON(ret != GHASH_BLOCK_SIZE); + } + } + + n = srclen & ~(GHASH_BLOCK_SIZE - 1); + if (n) { + ret = crypt_s390_kimd(KIMD_GHASH, ctx, src, n); + BUG_ON(ret != n); + src += n; + srclen -= n; + } + + if (srclen) { + dctx->bytes = GHASH_BLOCK_SIZE - srclen; + memcpy(buf, src, srclen); + } + + return 0; +} + +static void ghash_flush(struct ghash_ctx *ctx, struct ghash_desc_ctx *dctx) +{ + u8 *buf = dctx->buffer; + int ret; + + if (dctx->bytes) { + u8 *pos = buf + (GHASH_BLOCK_SIZE - dctx->bytes); + + memset(pos, 0, dctx->bytes); + + ret = crypt_s390_kimd(KIMD_GHASH, ctx, buf, GHASH_BLOCK_SIZE); + BUG_ON(ret != GHASH_BLOCK_SIZE); + } + + dctx->bytes = 0; +} + +static int ghash_final(struct shash_desc *desc, u8 *dst) +{ + struct ghash_desc_ctx *dctx = shash_desc_ctx(desc); + struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm); + + ghash_flush(ctx, dctx); + memcpy(dst, ctx->icv, GHASH_BLOCK_SIZE); + + return 0; +} + +static struct shash_alg ghash_alg = { + .digestsize = GHASH_DIGEST_SIZE, + .init = ghash_init, + .update = ghash_update, + .final = ghash_final, + .setkey = ghash_setkey, + .descsize = sizeof(struct ghash_desc_ctx), + .base = { + .cra_name = "ghash", + .cra_driver_name = "ghash-s390", + .cra_priority = CRYPT_S390_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = GHASH_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct ghash_ctx), + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(ghash_alg.base.cra_list), + }, +}; + +static int __init ghash_mod_init(void) +{ + if (!crypt_s390_func_available(KIMD_GHASH, + CRYPT_S390_MSA | CRYPT_S390_MSA4)) + return -EOPNOTSUPP; + + return crypto_register_shash(&ghash_alg); +} + +static void __exit ghash_mod_exit(void) +{ + crypto_unregister_shash(&ghash_alg); +} + +module_init(ghash_mod_init); +module_exit(ghash_mod_exit); + +MODULE_ALIAS("ghash"); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("GHASH Message Digest Algorithm, s390 implementation"); diff --git a/arch/s390/crypto/prng.c b/arch/s390/crypto/prng.c new file mode 100644 index 00000000..0808fbf0 --- /dev/null +++ b/arch/s390/crypto/prng.c @@ -0,0 +1,211 @@ +/* + * Copyright IBM Corp. 2006,2007 + * Author(s): Jan Glauber <jan.glauber@de.ibm.com> + * Driver for the s390 pseudo random number generator + */ +#include <linux/fs.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/miscdevice.h> +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/random.h> +#include <linux/slab.h> +#include <asm/debug.h> +#include <asm/uaccess.h> + +#include "crypt_s390.h" + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Jan Glauber <jan.glauber@de.ibm.com>"); +MODULE_DESCRIPTION("s390 PRNG interface"); + +static int prng_chunk_size = 256; +module_param(prng_chunk_size, int, S_IRUSR | S_IRGRP | S_IROTH); +MODULE_PARM_DESC(prng_chunk_size, "PRNG read chunk size in bytes"); + +static int prng_entropy_limit = 4096; +module_param(prng_entropy_limit, int, S_IRUSR | S_IRGRP | S_IROTH | S_IWUSR); +MODULE_PARM_DESC(prng_entropy_limit, + "PRNG add entropy after that much bytes were produced"); + +/* + * Any one who considers arithmetical methods of producing random digits is, + * of course, in a state of sin. -- John von Neumann + */ + +struct s390_prng_data { + unsigned long count; /* how many bytes were produced */ + char *buf; +}; + +static struct s390_prng_data *p; + +/* copied from libica, use a non-zero initial parameter block */ +static unsigned char parm_block[32] = { +0x0F,0x2B,0x8E,0x63,0x8C,0x8E,0xD2,0x52,0x64,0xB7,0xA0,0x7B,0x75,0x28,0xB8,0xF4, +0x75,0x5F,0xD2,0xA6,0x8D,0x97,0x11,0xFF,0x49,0xD8,0x23,0xF3,0x7E,0x21,0xEC,0xA0, +}; + +static int prng_open(struct inode *inode, struct file *file) +{ + return nonseekable_open(inode, file); +} + +static void prng_add_entropy(void) +{ + __u64 entropy[4]; + unsigned int i; + int ret; + + for (i = 0; i < 16; i++) { + ret = crypt_s390_kmc(KMC_PRNG, parm_block, (char *)entropy, + (char *)entropy, sizeof(entropy)); + BUG_ON(ret < 0 || ret != sizeof(entropy)); + memcpy(parm_block, entropy, sizeof(entropy)); + } +} + +static void prng_seed(int nbytes) +{ + char buf[16]; + int i = 0; + + BUG_ON(nbytes > 16); + get_random_bytes(buf, nbytes); + + /* Add the entropy */ + while (nbytes >= 8) { + *((__u64 *)parm_block) ^= *((__u64 *)(buf+i)); + prng_add_entropy(); + i += 8; + nbytes -= 8; + } + prng_add_entropy(); +} + +static ssize_t prng_read(struct file *file, char __user *ubuf, size_t nbytes, + loff_t *ppos) +{ + int chunk, n; + int ret = 0; + int tmp; + + /* nbytes can be arbitrary length, we split it into chunks */ + while (nbytes) { + /* same as in extract_entropy_user in random.c */ + if (need_resched()) { + if (signal_pending(current)) { + if (ret == 0) + ret = -ERESTARTSYS; + break; + } + schedule(); + } + + /* + * we lose some random bytes if an attacker issues + * reads < 8 bytes, but we don't care + */ + chunk = min_t(int, nbytes, prng_chunk_size); + + /* PRNG only likes multiples of 8 bytes */ + n = (chunk + 7) & -8; + + if (p->count > prng_entropy_limit) + prng_seed(8); + + /* if the CPU supports PRNG stckf is present too */ + asm volatile(".insn s,0xb27c0000,%0" + : "=m" (*((unsigned long long *)p->buf)) : : "cc"); + + /* + * Beside the STCKF the input for the TDES-EDE is the output + * of the last operation. We differ here from X9.17 since we + * only store one timestamp into the buffer. Padding the whole + * buffer with timestamps does not improve security, since + * successive stckf have nearly constant offsets. + * If an attacker knows the first timestamp it would be + * trivial to guess the additional values. One timestamp + * is therefore enough and still guarantees unique input values. + * + * Note: you can still get strict X9.17 conformity by setting + * prng_chunk_size to 8 bytes. + */ + tmp = crypt_s390_kmc(KMC_PRNG, parm_block, p->buf, p->buf, n); + BUG_ON((tmp < 0) || (tmp != n)); + + p->count += n; + + if (copy_to_user(ubuf, p->buf, chunk)) + return -EFAULT; + + nbytes -= chunk; + ret += chunk; + ubuf += chunk; + } + return ret; +} + +static const struct file_operations prng_fops = { + .owner = THIS_MODULE, + .open = &prng_open, + .release = NULL, + .read = &prng_read, + .llseek = noop_llseek, +}; + +static struct miscdevice prng_dev = { + .name = "prandom", + .minor = MISC_DYNAMIC_MINOR, + .fops = &prng_fops, +}; + +static int __init prng_init(void) +{ + int ret; + + /* check if the CPU has a PRNG */ + if (!crypt_s390_func_available(KMC_PRNG, CRYPT_S390_MSA)) + return -EOPNOTSUPP; + + if (prng_chunk_size < 8) + return -EINVAL; + + p = kmalloc(sizeof(struct s390_prng_data), GFP_KERNEL); + if (!p) + return -ENOMEM; + p->count = 0; + + p->buf = kmalloc(prng_chunk_size, GFP_KERNEL); + if (!p->buf) { + ret = -ENOMEM; + goto out_free; + } + + /* initialize the PRNG, add 128 bits of entropy */ + prng_seed(16); + + ret = misc_register(&prng_dev); + if (ret) + goto out_buf; + return 0; + +out_buf: + kfree(p->buf); +out_free: + kfree(p); + return ret; +} + +static void __exit prng_exit(void) +{ + /* wipe me */ + kzfree(p->buf); + kfree(p); + + misc_deregister(&prng_dev); +} + +module_init(prng_init); +module_exit(prng_exit); diff --git a/arch/s390/crypto/sha.h b/arch/s390/crypto/sha.h new file mode 100644 index 00000000..f4e9dc71 --- /dev/null +++ b/arch/s390/crypto/sha.h @@ -0,0 +1,37 @@ +/* + * Cryptographic API. + * + * s390 generic implementation of the SHA Secure Hash Algorithms. + * + * Copyright IBM Corp. 2007 + * Author(s): Jan Glauber (jang@de.ibm.com) + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + */ +#ifndef _CRYPTO_ARCH_S390_SHA_H +#define _CRYPTO_ARCH_S390_SHA_H + +#include <linux/crypto.h> +#include <crypto/sha.h> + +/* must be big enough for the largest SHA variant */ +#define SHA_MAX_STATE_SIZE 16 +#define SHA_MAX_BLOCK_SIZE SHA512_BLOCK_SIZE + +struct s390_sha_ctx { + u64 count; /* message length in bytes */ + u32 state[SHA_MAX_STATE_SIZE]; + u8 buf[2 * SHA_MAX_BLOCK_SIZE]; + int func; /* KIMD function to use */ +}; + +struct shash_desc; + +int s390_sha_update(struct shash_desc *desc, const u8 *data, unsigned int len); +int s390_sha_final(struct shash_desc *desc, u8 *out); + +#endif diff --git a/arch/s390/crypto/sha1_s390.c b/arch/s390/crypto/sha1_s390.c new file mode 100644 index 00000000..e9868c6e --- /dev/null +++ b/arch/s390/crypto/sha1_s390.c @@ -0,0 +1,108 @@ +/* + * Cryptographic API. + * + * s390 implementation of the SHA1 Secure Hash Algorithm. + * + * Derived from cryptoapi implementation, adapted for in-place + * scatterlist interface. Originally based on the public domain + * implementation written by Steve Reid. + * + * s390 Version: + * Copyright IBM Corp. 2003,2007 + * Author(s): Thomas Spatzier + * Jan Glauber (jan.glauber@de.ibm.com) + * + * Derived from "crypto/sha1_generic.c" + * Copyright (c) Alan Smithee. + * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk> + * Copyright (c) Jean-Francois Dive <jef@linuxbe.org> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + */ +#include <crypto/internal/hash.h> +#include <linux/init.h> +#include <linux/module.h> +#include <crypto/sha.h> + +#include "crypt_s390.h" +#include "sha.h" + +static int sha1_init(struct shash_desc *desc) +{ + struct s390_sha_ctx *sctx = shash_desc_ctx(desc); + + sctx->state[0] = SHA1_H0; + sctx->state[1] = SHA1_H1; + sctx->state[2] = SHA1_H2; + sctx->state[3] = SHA1_H3; + sctx->state[4] = SHA1_H4; + sctx->count = 0; + sctx->func = KIMD_SHA_1; + + return 0; +} + +static int sha1_export(struct shash_desc *desc, void *out) +{ + struct s390_sha_ctx *sctx = shash_desc_ctx(desc); + struct sha1_state *octx = out; + + octx->count = sctx->count; + memcpy(octx->state, sctx->state, sizeof(octx->state)); + memcpy(octx->buffer, sctx->buf, sizeof(octx->buffer)); + return 0; +} + +static int sha1_import(struct shash_desc *desc, const void *in) +{ + struct s390_sha_ctx *sctx = shash_desc_ctx(desc); + const struct sha1_state *ictx = in; + + sctx->count = ictx->count; + memcpy(sctx->state, ictx->state, sizeof(ictx->state)); + memcpy(sctx->buf, ictx->buffer, sizeof(ictx->buffer)); + sctx->func = KIMD_SHA_1; + return 0; +} + +static struct shash_alg alg = { + .digestsize = SHA1_DIGEST_SIZE, + .init = sha1_init, + .update = s390_sha_update, + .final = s390_sha_final, + .export = sha1_export, + .import = sha1_import, + .descsize = sizeof(struct s390_sha_ctx), + .statesize = sizeof(struct sha1_state), + .base = { + .cra_name = "sha1", + .cra_driver_name= "sha1-s390", + .cra_priority = CRYPT_S390_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA1_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +}; + +static int __init sha1_s390_init(void) +{ + if (!crypt_s390_func_available(KIMD_SHA_1, CRYPT_S390_MSA)) + return -EOPNOTSUPP; + return crypto_register_shash(&alg); +} + +static void __exit sha1_s390_fini(void) +{ + crypto_unregister_shash(&alg); +} + +module_init(sha1_s390_init); +module_exit(sha1_s390_fini); + +MODULE_ALIAS("sha1"); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm"); diff --git a/arch/s390/crypto/sha256_s390.c b/arch/s390/crypto/sha256_s390.c new file mode 100644 index 00000000..0317a354 --- /dev/null +++ b/arch/s390/crypto/sha256_s390.c @@ -0,0 +1,149 @@ +/* + * Cryptographic API. + * + * s390 implementation of the SHA256 and SHA224 Secure Hash Algorithm. + * + * s390 Version: + * Copyright IBM Corp. 2005,2011 + * Author(s): Jan Glauber (jang@de.ibm.com) + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + */ +#include <crypto/internal/hash.h> +#include <linux/init.h> +#include <linux/module.h> +#include <crypto/sha.h> + +#include "crypt_s390.h" +#include "sha.h" + +static int sha256_init(struct shash_desc *desc) +{ + struct s390_sha_ctx *sctx = shash_desc_ctx(desc); + + sctx->state[0] = SHA256_H0; + sctx->state[1] = SHA256_H1; + sctx->state[2] = SHA256_H2; + sctx->state[3] = SHA256_H3; + sctx->state[4] = SHA256_H4; + sctx->state[5] = SHA256_H5; + sctx->state[6] = SHA256_H6; + sctx->state[7] = SHA256_H7; + sctx->count = 0; + sctx->func = KIMD_SHA_256; + + return 0; +} + +static int sha256_export(struct shash_desc *desc, void *out) +{ + struct s390_sha_ctx *sctx = shash_desc_ctx(desc); + struct sha256_state *octx = out; + + octx->count = sctx->count; + memcpy(octx->state, sctx->state, sizeof(octx->state)); + memcpy(octx->buf, sctx->buf, sizeof(octx->buf)); + return 0; +} + +static int sha256_import(struct shash_desc *desc, const void *in) +{ + struct s390_sha_ctx *sctx = shash_desc_ctx(desc); + const struct sha256_state *ictx = in; + + sctx->count = ictx->count; + memcpy(sctx->state, ictx->state, sizeof(ictx->state)); + memcpy(sctx->buf, ictx->buf, sizeof(ictx->buf)); + sctx->func = KIMD_SHA_256; + return 0; +} + +static struct shash_alg sha256_alg = { + .digestsize = SHA256_DIGEST_SIZE, + .init = sha256_init, + .update = s390_sha_update, + .final = s390_sha_final, + .export = sha256_export, + .import = sha256_import, + .descsize = sizeof(struct s390_sha_ctx), + .statesize = sizeof(struct sha256_state), + .base = { + .cra_name = "sha256", + .cra_driver_name= "sha256-s390", + .cra_priority = CRYPT_S390_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA256_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +}; + +static int sha224_init(struct shash_desc *desc) +{ + struct s390_sha_ctx *sctx = shash_desc_ctx(desc); + + sctx->state[0] = SHA224_H0; + sctx->state[1] = SHA224_H1; + sctx->state[2] = SHA224_H2; + sctx->state[3] = SHA224_H3; + sctx->state[4] = SHA224_H4; + sctx->state[5] = SHA224_H5; + sctx->state[6] = SHA224_H6; + sctx->state[7] = SHA224_H7; + sctx->count = 0; + sctx->func = KIMD_SHA_256; + + return 0; +} + +static struct shash_alg sha224_alg = { + .digestsize = SHA224_DIGEST_SIZE, + .init = sha224_init, + .update = s390_sha_update, + .final = s390_sha_final, + .export = sha256_export, + .import = sha256_import, + .descsize = sizeof(struct s390_sha_ctx), + .statesize = sizeof(struct sha256_state), + .base = { + .cra_name = "sha224", + .cra_driver_name= "sha224-s390", + .cra_priority = CRYPT_S390_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA224_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +}; + +static int __init sha256_s390_init(void) +{ + int ret; + + if (!crypt_s390_func_available(KIMD_SHA_256, CRYPT_S390_MSA)) + return -EOPNOTSUPP; + ret = crypto_register_shash(&sha256_alg); + if (ret < 0) + goto out; + ret = crypto_register_shash(&sha224_alg); + if (ret < 0) + crypto_unregister_shash(&sha256_alg); +out: + return ret; +} + +static void __exit sha256_s390_fini(void) +{ + crypto_unregister_shash(&sha224_alg); + crypto_unregister_shash(&sha256_alg); +} + +module_init(sha256_s390_init); +module_exit(sha256_s390_fini); + +MODULE_ALIAS("sha256"); +MODULE_ALIAS("sha224"); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("SHA256 and SHA224 Secure Hash Algorithm"); diff --git a/arch/s390/crypto/sha512_s390.c b/arch/s390/crypto/sha512_s390.c new file mode 100644 index 00000000..32a81383 --- /dev/null +++ b/arch/s390/crypto/sha512_s390.c @@ -0,0 +1,155 @@ +/* + * Cryptographic API. + * + * s390 implementation of the SHA512 and SHA38 Secure Hash Algorithm. + * + * Copyright IBM Corp. 2007 + * Author(s): Jan Glauber (jang@de.ibm.com) + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + */ +#include <crypto/internal/hash.h> +#include <crypto/sha.h> +#include <linux/errno.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/module.h> + +#include "sha.h" +#include "crypt_s390.h" + +static int sha512_init(struct shash_desc *desc) +{ + struct s390_sha_ctx *ctx = shash_desc_ctx(desc); + + *(__u64 *)&ctx->state[0] = 0x6a09e667f3bcc908ULL; + *(__u64 *)&ctx->state[2] = 0xbb67ae8584caa73bULL; + *(__u64 *)&ctx->state[4] = 0x3c6ef372fe94f82bULL; + *(__u64 *)&ctx->state[6] = 0xa54ff53a5f1d36f1ULL; + *(__u64 *)&ctx->state[8] = 0x510e527fade682d1ULL; + *(__u64 *)&ctx->state[10] = 0x9b05688c2b3e6c1fULL; + *(__u64 *)&ctx->state[12] = 0x1f83d9abfb41bd6bULL; + *(__u64 *)&ctx->state[14] = 0x5be0cd19137e2179ULL; + ctx->count = 0; + ctx->func = KIMD_SHA_512; + + return 0; +} + +static int sha512_export(struct shash_desc *desc, void *out) +{ + struct s390_sha_ctx *sctx = shash_desc_ctx(desc); + struct sha512_state *octx = out; + + octx->count[0] = sctx->count; + octx->count[1] = 0; + memcpy(octx->state, sctx->state, sizeof(octx->state)); + memcpy(octx->buf, sctx->buf, sizeof(octx->buf)); + return 0; +} + +static int sha512_import(struct shash_desc *desc, const void *in) +{ + struct s390_sha_ctx *sctx = shash_desc_ctx(desc); + const struct sha512_state *ictx = in; + + if (unlikely(ictx->count[1])) + return -ERANGE; + sctx->count = ictx->count[0]; + + memcpy(sctx->state, ictx->state, sizeof(ictx->state)); + memcpy(sctx->buf, ictx->buf, sizeof(ictx->buf)); + sctx->func = KIMD_SHA_512; + return 0; +} + +static struct shash_alg sha512_alg = { + .digestsize = SHA512_DIGEST_SIZE, + .init = sha512_init, + .update = s390_sha_update, + .final = s390_sha_final, + .export = sha512_export, + .import = sha512_import, + .descsize = sizeof(struct s390_sha_ctx), + .statesize = sizeof(struct sha512_state), + .base = { + .cra_name = "sha512", + .cra_driver_name= "sha512-s390", + .cra_priority = CRYPT_S390_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA512_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +}; + +MODULE_ALIAS("sha512"); + +static int sha384_init(struct shash_desc *desc) +{ + struct s390_sha_ctx *ctx = shash_desc_ctx(desc); + + *(__u64 *)&ctx->state[0] = 0xcbbb9d5dc1059ed8ULL; + *(__u64 *)&ctx->state[2] = 0x629a292a367cd507ULL; + *(__u64 *)&ctx->state[4] = 0x9159015a3070dd17ULL; + *(__u64 *)&ctx->state[6] = 0x152fecd8f70e5939ULL; + *(__u64 *)&ctx->state[8] = 0x67332667ffc00b31ULL; + *(__u64 *)&ctx->state[10] = 0x8eb44a8768581511ULL; + *(__u64 *)&ctx->state[12] = 0xdb0c2e0d64f98fa7ULL; + *(__u64 *)&ctx->state[14] = 0x47b5481dbefa4fa4ULL; + ctx->count = 0; + ctx->func = KIMD_SHA_512; + + return 0; +} + +static struct shash_alg sha384_alg = { + .digestsize = SHA384_DIGEST_SIZE, + .init = sha384_init, + .update = s390_sha_update, + .final = s390_sha_final, + .export = sha512_export, + .import = sha512_import, + .descsize = sizeof(struct s390_sha_ctx), + .statesize = sizeof(struct sha512_state), + .base = { + .cra_name = "sha384", + .cra_driver_name= "sha384-s390", + .cra_priority = CRYPT_S390_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA384_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct s390_sha_ctx), + .cra_module = THIS_MODULE, + } +}; + +MODULE_ALIAS("sha384"); + +static int __init init(void) +{ + int ret; + + if (!crypt_s390_func_available(KIMD_SHA_512, CRYPT_S390_MSA)) + return -EOPNOTSUPP; + if ((ret = crypto_register_shash(&sha512_alg)) < 0) + goto out; + if ((ret = crypto_register_shash(&sha384_alg)) < 0) + crypto_unregister_shash(&sha512_alg); +out: + return ret; +} + +static void __exit fini(void) +{ + crypto_unregister_shash(&sha512_alg); + crypto_unregister_shash(&sha384_alg); +} + +module_init(init); +module_exit(fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("SHA512 and SHA-384 Secure Hash Algorithm"); diff --git a/arch/s390/crypto/sha_common.c b/arch/s390/crypto/sha_common.c new file mode 100644 index 00000000..bd37d09b --- /dev/null +++ b/arch/s390/crypto/sha_common.c @@ -0,0 +1,103 @@ +/* + * Cryptographic API. + * + * s390 generic implementation of the SHA Secure Hash Algorithms. + * + * Copyright IBM Corp. 2007 + * Author(s): Jan Glauber (jang@de.ibm.com) + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + */ + +#include <crypto/internal/hash.h> +#include <linux/module.h> +#include "sha.h" +#include "crypt_s390.h" + +int s390_sha_update(struct shash_desc *desc, const u8 *data, unsigned int len) +{ + struct s390_sha_ctx *ctx = shash_desc_ctx(desc); + unsigned int bsize = crypto_shash_blocksize(desc->tfm); + unsigned int index; + int ret; + + /* how much is already in the buffer? */ + index = ctx->count & (bsize - 1); + ctx->count += len; + + if ((index + len) < bsize) + goto store; + + /* process one stored block */ + if (index) { + memcpy(ctx->buf + index, data, bsize - index); + ret = crypt_s390_kimd(ctx->func, ctx->state, ctx->buf, bsize); + BUG_ON(ret != bsize); + data += bsize - index; + len -= bsize - index; + index = 0; + } + + /* process as many blocks as possible */ + if (len >= bsize) { + ret = crypt_s390_kimd(ctx->func, ctx->state, data, + len & ~(bsize - 1)); + BUG_ON(ret != (len & ~(bsize - 1))); + data += ret; + len -= ret; + } +store: + if (len) + memcpy(ctx->buf + index , data, len); + + return 0; +} +EXPORT_SYMBOL_GPL(s390_sha_update); + +int s390_sha_final(struct shash_desc *desc, u8 *out) +{ + struct s390_sha_ctx *ctx = shash_desc_ctx(desc); + unsigned int bsize = crypto_shash_blocksize(desc->tfm); + u64 bits; + unsigned int index, end, plen; + int ret; + + /* SHA-512 uses 128 bit padding length */ + plen = (bsize > SHA256_BLOCK_SIZE) ? 16 : 8; + + /* must perform manual padding */ + index = ctx->count & (bsize - 1); + end = (index < bsize - plen) ? bsize : (2 * bsize); + + /* start pad with 1 */ + ctx->buf[index] = 0x80; + index++; + + /* pad with zeros */ + memset(ctx->buf + index, 0x00, end - index - 8); + + /* + * Append message length. Well, SHA-512 wants a 128 bit length value, + * nevertheless we use u64, should be enough for now... + */ + bits = ctx->count * 8; + memcpy(ctx->buf + end - 8, &bits, sizeof(bits)); + + ret = crypt_s390_kimd(ctx->func, ctx->state, ctx->buf, end); + BUG_ON(ret != end); + + /* copy digest to out */ + memcpy(out, ctx->state, crypto_shash_digestsize(desc->tfm)); + /* wipe context */ + memset(ctx, 0, sizeof *ctx); + + return 0; +} +EXPORT_SYMBOL_GPL(s390_sha_final); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("s390 SHA cipher common functions"); diff --git a/arch/s390/defconfig b/arch/s390/defconfig new file mode 100644 index 00000000..1957a9dd --- /dev/null +++ b/arch/s390/defconfig @@ -0,0 +1,175 @@ +CONFIG_EXPERIMENTAL=y +CONFIG_SYSVIPC=y +CONFIG_POSIX_MQUEUE=y +CONFIG_FHANDLE=y +CONFIG_TASKSTATS=y +CONFIG_TASK_DELAY_ACCT=y +CONFIG_TASK_XACCT=y +CONFIG_TASK_IO_ACCOUNTING=y +CONFIG_AUDIT=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_CGROUPS=y +CONFIG_CPUSETS=y +CONFIG_CGROUP_CPUACCT=y +CONFIG_RESOURCE_COUNTERS=y +CONFIG_CGROUP_MEM_RES_CTLR=y +CONFIG_CGROUP_MEM_RES_CTLR_SWAP=y +CONFIG_CGROUP_SCHED=y +CONFIG_RT_GROUP_SCHED=y +CONFIG_BLK_CGROUP=y +CONFIG_NAMESPACES=y +CONFIG_BLK_DEV_INITRD=y +CONFIG_RD_BZIP2=y +CONFIG_RD_LZMA=y +CONFIG_RD_XZ=y +CONFIG_RD_LZO=y +CONFIG_EXPERT=y +# CONFIG_COMPAT_BRK is not set +CONFIG_PROFILING=y +CONFIG_OPROFILE=y +CONFIG_KPROBES=y +CONFIG_MODULES=y +CONFIG_MODULE_UNLOAD=y +CONFIG_MODVERSIONS=y +CONFIG_PARTITION_ADVANCED=y +CONFIG_IBM_PARTITION=y +CONFIG_DEFAULT_DEADLINE=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_PREEMPT=y +CONFIG_MEMORY_HOTPLUG=y +CONFIG_MEMORY_HOTREMOVE=y +CONFIG_KSM=y +CONFIG_BINFMT_MISC=m +CONFIG_CMM=m +CONFIG_HZ_100=y +CONFIG_CRASH_DUMP=y +CONFIG_HIBERNATION=y +CONFIG_PACKET=y +CONFIG_UNIX=y +CONFIG_NET_KEY=y +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +# CONFIG_INET_LRO is not set +CONFIG_IPV6=y +CONFIG_L2TP=m +CONFIG_L2TP_DEBUGFS=m +CONFIG_VLAN_8021Q=y +CONFIG_NET_SCHED=y +CONFIG_NET_SCH_CBQ=m +CONFIG_NET_SCH_PRIO=m +CONFIG_NET_SCH_RED=m +CONFIG_NET_SCH_SFQ=m +CONFIG_NET_SCH_TEQL=m +CONFIG_NET_SCH_TBF=m +CONFIG_NET_SCH_GRED=m +CONFIG_NET_SCH_DSMARK=m +CONFIG_NET_CLS_TCINDEX=m +CONFIG_NET_CLS_ROUTE4=m +CONFIG_NET_CLS_FW=m +CONFIG_NET_CLS_U32=m +CONFIG_CLS_U32_MARK=y +CONFIG_NET_CLS_RSVP=m +CONFIG_NET_CLS_RSVP6=m +CONFIG_NET_CLS_ACT=y +CONFIG_NET_ACT_POLICE=y +CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" +CONFIG_DEVTMPFS=y +CONFIG_BLK_DEV_LOOP=m +CONFIG_BLK_DEV_NBD=m +CONFIG_BLK_DEV_RAM=y +CONFIG_VIRTIO_BLK=y +CONFIG_SCSI=y +CONFIG_BLK_DEV_SD=y +CONFIG_CHR_DEV_ST=y +CONFIG_BLK_DEV_SR=y +CONFIG_BLK_DEV_SR_VENDOR=y +CONFIG_CHR_DEV_SG=y +CONFIG_SCSI_MULTI_LUN=y +CONFIG_SCSI_CONSTANTS=y +CONFIG_SCSI_LOGGING=y +CONFIG_SCSI_SCAN_ASYNC=y +CONFIG_ZFCP=y +CONFIG_NETDEVICES=y +CONFIG_BONDING=m +CONFIG_DUMMY=m +CONFIG_EQUALIZER=m +CONFIG_TUN=m +CONFIG_VIRTIO_NET=y +CONFIG_RAW_DRIVER=m +CONFIG_VIRTIO_BALLOON=y +CONFIG_EXT2_FS=y +CONFIG_EXT3_FS=y +# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y +CONFIG_PROC_KCORE=y +CONFIG_TMPFS=y +CONFIG_TMPFS_POSIX_ACL=y +# CONFIG_NETWORK_FILESYSTEMS is not set +CONFIG_MAGIC_SYSRQ=y +CONFIG_TIMER_STATS=y +CONFIG_PROVE_LOCKING=y +CONFIG_PROVE_RCU=y +CONFIG_LOCK_STAT=y +CONFIG_DEBUG_LOCKDEP=y +CONFIG_DEBUG_LIST=y +CONFIG_DEBUG_NOTIFIERS=y +CONFIG_RCU_TRACE=y +CONFIG_KPROBES_SANITY_TEST=y +CONFIG_DEBUG_FORCE_WEAK_PER_CPU=y +CONFIG_CPU_NOTIFIER_ERROR_INJECT=m +CONFIG_LATENCYTOP=y +CONFIG_DEBUG_PAGEALLOC=y +CONFIG_BLK_DEV_IO_TRACE=y +# CONFIG_STRICT_DEVMEM is not set +CONFIG_CRYPTO_NULL=m +CONFIG_CRYPTO_CRYPTD=m +CONFIG_CRYPTO_AUTHENC=m +CONFIG_CRYPTO_TEST=m +CONFIG_CRYPTO_CCM=m +CONFIG_CRYPTO_GCM=m +CONFIG_CRYPTO_CBC=y +CONFIG_CRYPTO_CTS=m +CONFIG_CRYPTO_ECB=m +CONFIG_CRYPTO_LRW=m +CONFIG_CRYPTO_PCBC=m +CONFIG_CRYPTO_XTS=m +CONFIG_CRYPTO_XCBC=m +CONFIG_CRYPTO_VMAC=m +CONFIG_CRYPTO_MD4=m +CONFIG_CRYPTO_MICHAEL_MIC=m +CONFIG_CRYPTO_RMD128=m +CONFIG_CRYPTO_RMD160=m +CONFIG_CRYPTO_RMD256=m +CONFIG_CRYPTO_RMD320=m +CONFIG_CRYPTO_SHA256=m +CONFIG_CRYPTO_SHA512=m +CONFIG_CRYPTO_TGR192=m +CONFIG_CRYPTO_WP512=m +CONFIG_CRYPTO_ANUBIS=m +CONFIG_CRYPTO_ARC4=m +CONFIG_CRYPTO_BLOWFISH=m +CONFIG_CRYPTO_CAMELLIA=m +CONFIG_CRYPTO_CAST5=m +CONFIG_CRYPTO_CAST6=m +CONFIG_CRYPTO_DES=m +CONFIG_CRYPTO_FCRYPT=m +CONFIG_CRYPTO_KHAZAD=m +CONFIG_CRYPTO_SALSA20=m +CONFIG_CRYPTO_SEED=m +CONFIG_CRYPTO_SERPENT=m +CONFIG_CRYPTO_TEA=m +CONFIG_CRYPTO_TWOFISH=m +CONFIG_CRYPTO_DEFLATE=m +CONFIG_CRYPTO_ZLIB=m +CONFIG_CRYPTO_LZO=m +CONFIG_ZCRYPT=m +CONFIG_CRYPTO_SHA1_S390=m +CONFIG_CRYPTO_SHA256_S390=m +CONFIG_CRYPTO_SHA512_S390=m +CONFIG_CRYPTO_DES_S390=m +CONFIG_CRYPTO_AES_S390=m +CONFIG_CRC7=m diff --git a/arch/s390/hypfs/Makefile b/arch/s390/hypfs/Makefile new file mode 100644 index 00000000..2e671d50 --- /dev/null +++ b/arch/s390/hypfs/Makefile @@ -0,0 +1,7 @@ +# +# Makefile for the linux hypfs filesystem routines. +# + +obj-$(CONFIG_S390_HYPFS_FS) += s390_hypfs.o + +s390_hypfs-objs := inode.o hypfs_diag.o hypfs_vm.o hypfs_dbfs.o diff --git a/arch/s390/hypfs/hypfs.h b/arch/s390/hypfs/hypfs.h new file mode 100644 index 00000000..d9df5a06 --- /dev/null +++ b/arch/s390/hypfs/hypfs.h @@ -0,0 +1,72 @@ +/* + * arch/s390/hypfs/hypfs.h + * Hypervisor filesystem for Linux on s390. + * + * Copyright (C) IBM Corp. 2006 + * Author(s): Michael Holzheu <holzheu@de.ibm.com> + */ + +#ifndef _HYPFS_H_ +#define _HYPFS_H_ + +#include <linux/fs.h> +#include <linux/types.h> +#include <linux/debugfs.h> +#include <linux/workqueue.h> +#include <linux/kref.h> + +#define REG_FILE_MODE 0440 +#define UPDATE_FILE_MODE 0220 +#define DIR_MODE 0550 + +extern struct dentry *hypfs_mkdir(struct super_block *sb, struct dentry *parent, + const char *name); + +extern struct dentry *hypfs_create_u64(struct super_block *sb, + struct dentry *dir, const char *name, + __u64 value); + +extern struct dentry *hypfs_create_str(struct super_block *sb, + struct dentry *dir, const char *name, + char *string); + +/* LPAR Hypervisor */ +extern int hypfs_diag_init(void); +extern void hypfs_diag_exit(void); +extern int hypfs_diag_create_files(struct super_block *sb, struct dentry *root); + +/* VM Hypervisor */ +extern int hypfs_vm_init(void); +extern void hypfs_vm_exit(void); +extern int hypfs_vm_create_files(struct super_block *sb, struct dentry *root); + +/* debugfs interface */ +struct hypfs_dbfs_file; + +struct hypfs_dbfs_data { + void *buf; + void *buf_free_ptr; + size_t size; + struct hypfs_dbfs_file *dbfs_file; + struct kref kref; +}; + +struct hypfs_dbfs_file { + const char *name; + int (*data_create)(void **data, void **data_free_ptr, + size_t *size); + void (*data_free)(const void *buf_free_ptr); + + /* Private data for hypfs_dbfs.c */ + struct hypfs_dbfs_data *data; + struct delayed_work data_free_work; + struct mutex lock; + struct dentry *dentry; +}; + +extern int hypfs_dbfs_init(void); +extern void hypfs_dbfs_exit(void); +extern int hypfs_dbfs_create_file(struct hypfs_dbfs_file *df); +extern void hypfs_dbfs_remove_file(struct hypfs_dbfs_file *df); + +#endif /* _HYPFS_H_ */ diff --git a/arch/s390/hypfs/hypfs_dbfs.c b/arch/s390/hypfs/hypfs_dbfs.c new file mode 100644 index 00000000..b478013b --- /dev/null +++ b/arch/s390/hypfs/hypfs_dbfs.c @@ -0,0 +1,116 @@ +/* + * Hypervisor filesystem for Linux on s390 - debugfs interface + * + * Copyright (C) IBM Corp. 2010 + * Author(s): Michael Holzheu <holzheu@linux.vnet.ibm.com> + */ + +#include <linux/slab.h> +#include "hypfs.h" + +static struct dentry *dbfs_dir; + +static struct hypfs_dbfs_data *hypfs_dbfs_data_alloc(struct hypfs_dbfs_file *f) +{ + struct hypfs_dbfs_data *data; + + data = kmalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return NULL; + kref_init(&data->kref); + data->dbfs_file = f; + return data; +} + +static void hypfs_dbfs_data_free(struct kref *kref) +{ + struct hypfs_dbfs_data *data; + + data = container_of(kref, struct hypfs_dbfs_data, kref); + data->dbfs_file->data_free(data->buf_free_ptr); + kfree(data); +} + +static void data_free_delayed(struct work_struct *work) +{ + struct hypfs_dbfs_data *data; + struct hypfs_dbfs_file *df; + + df = container_of(work, struct hypfs_dbfs_file, data_free_work.work); + mutex_lock(&df->lock); + data = df->data; + df->data = NULL; + mutex_unlock(&df->lock); + kref_put(&data->kref, hypfs_dbfs_data_free); +} + +static ssize_t dbfs_read(struct file *file, char __user *buf, + size_t size, loff_t *ppos) +{ + struct hypfs_dbfs_data *data; + struct hypfs_dbfs_file *df; + ssize_t rc; + + if (*ppos != 0) + return 0; + + df = file->f_path.dentry->d_inode->i_private; + mutex_lock(&df->lock); + if (!df->data) { + data = hypfs_dbfs_data_alloc(df); + if (!data) { + mutex_unlock(&df->lock); + return -ENOMEM; + } + rc = df->data_create(&data->buf, &data->buf_free_ptr, + &data->size); + if (rc) { + mutex_unlock(&df->lock); + kfree(data); + return rc; + } + df->data = data; + schedule_delayed_work(&df->data_free_work, HZ); + } + data = df->data; + kref_get(&data->kref); + mutex_unlock(&df->lock); + + rc = simple_read_from_buffer(buf, size, ppos, data->buf, data->size); + kref_put(&data->kref, hypfs_dbfs_data_free); + return rc; +} + +static const struct file_operations dbfs_ops = { + .read = dbfs_read, + .llseek = no_llseek, +}; + +int hypfs_dbfs_create_file(struct hypfs_dbfs_file *df) +{ + df->dentry = debugfs_create_file(df->name, 0400, dbfs_dir, df, + &dbfs_ops); + if (IS_ERR(df->dentry)) + return PTR_ERR(df->dentry); + mutex_init(&df->lock); + INIT_DELAYED_WORK(&df->data_free_work, data_free_delayed); + return 0; +} + +void hypfs_dbfs_remove_file(struct hypfs_dbfs_file *df) +{ + debugfs_remove(df->dentry); +} + +int hypfs_dbfs_init(void) +{ + dbfs_dir = debugfs_create_dir("s390_hypfs", NULL); + if (IS_ERR(dbfs_dir)) + return PTR_ERR(dbfs_dir); + return 0; +} + +void hypfs_dbfs_exit(void) +{ + debugfs_remove(dbfs_dir); +} diff --git a/arch/s390/hypfs/hypfs_diag.c b/arch/s390/hypfs/hypfs_diag.c new file mode 100644 index 00000000..74c8f5e7 --- /dev/null +++ b/arch/s390/hypfs/hypfs_diag.c @@ -0,0 +1,778 @@ +/* + * arch/s390/hypfs/hypfs_diag.c + * Hypervisor filesystem for Linux on s390. Diag 204 and 224 + * implementation. + * + * Copyright IBM Corp. 2006, 2008 + * Author(s): Michael Holzheu <holzheu@de.ibm.com> + */ + +#define KMSG_COMPONENT "hypfs" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include <linux/types.h> +#include <linux/errno.h> +#include <linux/slab.h> +#include <linux/string.h> +#include <linux/vmalloc.h> +#include <linux/mm.h> +#include <asm/ebcdic.h> +#include "hypfs.h" + +#define LPAR_NAME_LEN 8 /* lpar name len in diag 204 data */ +#define CPU_NAME_LEN 16 /* type name len of cpus in diag224 name table */ +#define TMP_SIZE 64 /* size of temporary buffers */ + +#define DBFS_D204_HDR_VERSION 0 + +/* diag 204 subcodes */ +enum diag204_sc { + SUBC_STIB4 = 4, + SUBC_RSI = 5, + SUBC_STIB6 = 6, + SUBC_STIB7 = 7 +}; + +/* The two available diag 204 data formats */ +enum diag204_format { + INFO_SIMPLE = 0, + INFO_EXT = 0x00010000 +}; + +/* bit is set in flags, when physical cpu info is included in diag 204 data */ +#define LPAR_PHYS_FLG 0x80 + +static char *diag224_cpu_names; /* diag 224 name table */ +static enum diag204_sc diag204_store_sc; /* used subcode for store */ +static enum diag204_format diag204_info_type; /* used diag 204 data format */ + +static void *diag204_buf; /* 4K aligned buffer for diag204 data */ +static void *diag204_buf_vmalloc; /* vmalloc pointer for diag204 data */ +static int diag204_buf_pages; /* number of pages for diag204 data */ + +static struct dentry *dbfs_d204_file; + +/* + * DIAG 204 data structures and member access functions. + * + * Since we have two different diag 204 data formats for old and new s390 + * machines, we do not access the structs directly, but use getter functions for + * each struct member instead. This should make the code more readable. + */ + +/* Time information block */ + +struct info_blk_hdr { + __u8 npar; + __u8 flags; + __u16 tslice; + __u16 phys_cpus; + __u16 this_part; + __u64 curtod; +} __attribute__ ((packed)); + +struct x_info_blk_hdr { + __u8 npar; + __u8 flags; + __u16 tslice; + __u16 phys_cpus; + __u16 this_part; + __u64 curtod1; + __u64 curtod2; + char reserved[40]; +} __attribute__ ((packed)); + +static inline int info_blk_hdr__size(enum diag204_format type) +{ + if (type == INFO_SIMPLE) + return sizeof(struct info_blk_hdr); + else /* INFO_EXT */ + return sizeof(struct x_info_blk_hdr); +} + +static inline __u8 info_blk_hdr__npar(enum diag204_format type, void *hdr) +{ + if (type == INFO_SIMPLE) + return ((struct info_blk_hdr *)hdr)->npar; + else /* INFO_EXT */ + return ((struct x_info_blk_hdr *)hdr)->npar; +} + +static inline __u8 info_blk_hdr__flags(enum diag204_format type, void *hdr) +{ + if (type == INFO_SIMPLE) + return ((struct info_blk_hdr *)hdr)->flags; + else /* INFO_EXT */ + return ((struct x_info_blk_hdr *)hdr)->flags; +} + +static inline __u16 info_blk_hdr__pcpus(enum diag204_format type, void *hdr) +{ + if (type == INFO_SIMPLE) + return ((struct info_blk_hdr *)hdr)->phys_cpus; + else /* INFO_EXT */ + return ((struct x_info_blk_hdr *)hdr)->phys_cpus; +} + +/* Partition header */ + +struct part_hdr { + __u8 pn; + __u8 cpus; + char reserved[6]; + char part_name[LPAR_NAME_LEN]; +} __attribute__ ((packed)); + +struct x_part_hdr { + __u8 pn; + __u8 cpus; + __u8 rcpus; + __u8 pflag; + __u32 mlu; + char part_name[LPAR_NAME_LEN]; + char lpc_name[8]; + char os_name[8]; + __u64 online_cs; + __u64 online_es; + __u8 upid; + char reserved1[3]; + __u32 group_mlu; + char group_name[8]; + char reserved2[32]; +} __attribute__ ((packed)); + +static inline int part_hdr__size(enum diag204_format type) +{ + if (type == INFO_SIMPLE) + return sizeof(struct part_hdr); + else /* INFO_EXT */ + return sizeof(struct x_part_hdr); +} + +static inline __u8 part_hdr__rcpus(enum diag204_format type, void *hdr) +{ + if (type == INFO_SIMPLE) + return ((struct part_hdr *)hdr)->cpus; + else /* INFO_EXT */ + return ((struct x_part_hdr *)hdr)->rcpus; +} + +static inline void part_hdr__part_name(enum diag204_format type, void *hdr, + char *name) +{ + if (type == INFO_SIMPLE) + memcpy(name, ((struct part_hdr *)hdr)->part_name, + LPAR_NAME_LEN); + else /* INFO_EXT */ + memcpy(name, ((struct x_part_hdr *)hdr)->part_name, + LPAR_NAME_LEN); + EBCASC(name, LPAR_NAME_LEN); + name[LPAR_NAME_LEN] = 0; + strim(name); +} + +struct cpu_info { + __u16 cpu_addr; + char reserved1[2]; + __u8 ctidx; + __u8 cflag; + __u16 weight; + __u64 acc_time; + __u64 lp_time; +} __attribute__ ((packed)); + +struct x_cpu_info { + __u16 cpu_addr; + char reserved1[2]; + __u8 ctidx; + __u8 cflag; + __u16 weight; + __u64 acc_time; + __u64 lp_time; + __u16 min_weight; + __u16 cur_weight; + __u16 max_weight; + char reseved2[2]; + __u64 online_time; + __u64 wait_time; + __u32 pma_weight; + __u32 polar_weight; + char reserved3[40]; +} __attribute__ ((packed)); + +/* CPU info block */ + +static inline int cpu_info__size(enum diag204_format type) +{ + if (type == INFO_SIMPLE) + return sizeof(struct cpu_info); + else /* INFO_EXT */ + return sizeof(struct x_cpu_info); +} + +static inline __u8 cpu_info__ctidx(enum diag204_format type, void *hdr) +{ + if (type == INFO_SIMPLE) + return ((struct cpu_info *)hdr)->ctidx; + else /* INFO_EXT */ + return ((struct x_cpu_info *)hdr)->ctidx; +} + +static inline __u16 cpu_info__cpu_addr(enum diag204_format type, void *hdr) +{ + if (type == INFO_SIMPLE) + return ((struct cpu_info *)hdr)->cpu_addr; + else /* INFO_EXT */ + return ((struct x_cpu_info *)hdr)->cpu_addr; +} + +static inline __u64 cpu_info__acc_time(enum diag204_format type, void *hdr) +{ + if (type == INFO_SIMPLE) + return ((struct cpu_info *)hdr)->acc_time; + else /* INFO_EXT */ + return ((struct x_cpu_info *)hdr)->acc_time; +} + +static inline __u64 cpu_info__lp_time(enum diag204_format type, void *hdr) +{ + if (type == INFO_SIMPLE) + return ((struct cpu_info *)hdr)->lp_time; + else /* INFO_EXT */ + return ((struct x_cpu_info *)hdr)->lp_time; +} + +static inline __u64 cpu_info__online_time(enum diag204_format type, void *hdr) +{ + if (type == INFO_SIMPLE) + return 0; /* online_time not available in simple info */ + else /* INFO_EXT */ + return ((struct x_cpu_info *)hdr)->online_time; +} + +/* Physical header */ + +struct phys_hdr { + char reserved1[1]; + __u8 cpus; + char reserved2[6]; + char mgm_name[8]; +} __attribute__ ((packed)); + +struct x_phys_hdr { + char reserved1[1]; + __u8 cpus; + char reserved2[6]; + char mgm_name[8]; + char reserved3[80]; +} __attribute__ ((packed)); + +static inline int phys_hdr__size(enum diag204_format type) +{ + if (type == INFO_SIMPLE) + return sizeof(struct phys_hdr); + else /* INFO_EXT */ + return sizeof(struct x_phys_hdr); +} + +static inline __u8 phys_hdr__cpus(enum diag204_format type, void *hdr) +{ + if (type == INFO_SIMPLE) + return ((struct phys_hdr *)hdr)->cpus; + else /* INFO_EXT */ + return ((struct x_phys_hdr *)hdr)->cpus; +} + +/* Physical CPU info block */ + +struct phys_cpu { + __u16 cpu_addr; + char reserved1[2]; + __u8 ctidx; + char reserved2[3]; + __u64 mgm_time; + char reserved3[8]; +} __attribute__ ((packed)); + +struct x_phys_cpu { + __u16 cpu_addr; + char reserved1[2]; + __u8 ctidx; + char reserved2[3]; + __u64 mgm_time; + char reserved3[80]; +} __attribute__ ((packed)); + +static inline int phys_cpu__size(enum diag204_format type) +{ + if (type == INFO_SIMPLE) + return sizeof(struct phys_cpu); + else /* INFO_EXT */ + return sizeof(struct x_phys_cpu); +} + +static inline __u16 phys_cpu__cpu_addr(enum diag204_format type, void *hdr) +{ + if (type == INFO_SIMPLE) + return ((struct phys_cpu *)hdr)->cpu_addr; + else /* INFO_EXT */ + return ((struct x_phys_cpu *)hdr)->cpu_addr; +} + +static inline __u64 phys_cpu__mgm_time(enum diag204_format type, void *hdr) +{ + if (type == INFO_SIMPLE) + return ((struct phys_cpu *)hdr)->mgm_time; + else /* INFO_EXT */ + return ((struct x_phys_cpu *)hdr)->mgm_time; +} + +static inline __u64 phys_cpu__ctidx(enum diag204_format type, void *hdr) +{ + if (type == INFO_SIMPLE) + return ((struct phys_cpu *)hdr)->ctidx; + else /* INFO_EXT */ + return ((struct x_phys_cpu *)hdr)->ctidx; +} + +/* Diagnose 204 functions */ + +static int diag204(unsigned long subcode, unsigned long size, void *addr) +{ + register unsigned long _subcode asm("0") = subcode; + register unsigned long _size asm("1") = size; + + asm volatile( + " diag %2,%0,0x204\n" + "0:\n" + EX_TABLE(0b,0b) + : "+d" (_subcode), "+d" (_size) : "d" (addr) : "memory"); + if (_subcode) + return -1; + return _size; +} + +/* + * For the old diag subcode 4 with simple data format we have to use real + * memory. If we use subcode 6 or 7 with extended data format, we can (and + * should) use vmalloc, since we need a lot of memory in that case. Currently + * up to 93 pages! + */ + +static void diag204_free_buffer(void) +{ + if (!diag204_buf) + return; + if (diag204_buf_vmalloc) { + vfree(diag204_buf_vmalloc); + diag204_buf_vmalloc = NULL; + } else { + free_pages((unsigned long) diag204_buf, 0); + } + diag204_buf = NULL; +} + +static void *page_align_ptr(void *ptr) +{ + return (void *) PAGE_ALIGN((unsigned long) ptr); +} + +static void *diag204_alloc_vbuf(int pages) +{ + /* The buffer has to be page aligned! */ + diag204_buf_vmalloc = vmalloc(PAGE_SIZE * (pages + 1)); + if (!diag204_buf_vmalloc) + return ERR_PTR(-ENOMEM); + diag204_buf = page_align_ptr(diag204_buf_vmalloc); + diag204_buf_pages = pages; + return diag204_buf; +} + +static void *diag204_alloc_rbuf(void) +{ + diag204_buf = (void*)__get_free_pages(GFP_KERNEL,0); + if (!diag204_buf) + return ERR_PTR(-ENOMEM); + diag204_buf_pages = 1; + return diag204_buf; +} + +static void *diag204_get_buffer(enum diag204_format fmt, int *pages) +{ + if (diag204_buf) { + *pages = diag204_buf_pages; + return diag204_buf; + } + if (fmt == INFO_SIMPLE) { + *pages = 1; + return diag204_alloc_rbuf(); + } else {/* INFO_EXT */ + *pages = diag204((unsigned long)SUBC_RSI | + (unsigned long)INFO_EXT, 0, NULL); + if (*pages <= 0) + return ERR_PTR(-ENOSYS); + else + return diag204_alloc_vbuf(*pages); + } +} + +/* + * diag204_probe() has to find out, which type of diagnose 204 implementation + * we have on our machine. Currently there are three possible scanarios: + * - subcode 4 + simple data format (only one page) + * - subcode 4-6 + extended data format + * - subcode 4-7 + extended data format + * + * Subcode 5 is used to retrieve the size of the data, provided by subcodes + * 6 and 7. Subcode 7 basically has the same function as subcode 6. In addition + * to subcode 6 it provides also information about secondary cpus. + * In order to get as much information as possible, we first try + * subcode 7, then 6 and if both fail, we use subcode 4. + */ + +static int diag204_probe(void) +{ + void *buf; + int pages, rc; + + buf = diag204_get_buffer(INFO_EXT, &pages); + if (!IS_ERR(buf)) { + if (diag204((unsigned long)SUBC_STIB7 | + (unsigned long)INFO_EXT, pages, buf) >= 0) { + diag204_store_sc = SUBC_STIB7; + diag204_info_type = INFO_EXT; + goto out; + } + if (diag204((unsigned long)SUBC_STIB6 | + (unsigned long)INFO_EXT, pages, buf) >= 0) { + diag204_store_sc = SUBC_STIB6; + diag204_info_type = INFO_EXT; + goto out; + } + diag204_free_buffer(); + } + + /* subcodes 6 and 7 failed, now try subcode 4 */ + + buf = diag204_get_buffer(INFO_SIMPLE, &pages); + if (IS_ERR(buf)) { + rc = PTR_ERR(buf); + goto fail_alloc; + } + if (diag204((unsigned long)SUBC_STIB4 | + (unsigned long)INFO_SIMPLE, pages, buf) >= 0) { + diag204_store_sc = SUBC_STIB4; + diag204_info_type = INFO_SIMPLE; + goto out; + } else { + rc = -ENOSYS; + goto fail_store; + } +out: + rc = 0; +fail_store: + diag204_free_buffer(); +fail_alloc: + return rc; +} + +static int diag204_do_store(void *buf, int pages) +{ + int rc; + + rc = diag204((unsigned long) diag204_store_sc | + (unsigned long) diag204_info_type, pages, buf); + return rc < 0 ? -ENOSYS : 0; +} + +static void *diag204_store(void) +{ + void *buf; + int pages, rc; + + buf = diag204_get_buffer(diag204_info_type, &pages); + if (IS_ERR(buf)) + goto out; + rc = diag204_do_store(buf, pages); + if (rc) + return ERR_PTR(rc); +out: + return buf; +} + +/* Diagnose 224 functions */ + +static int diag224(void *ptr) +{ + int rc = -EOPNOTSUPP; + + asm volatile( + " diag %1,%2,0x224\n" + "0: lhi %0,0x0\n" + "1:\n" + EX_TABLE(0b,1b) + : "+d" (rc) :"d" (0), "d" (ptr) : "memory"); + return rc; +} + +static int diag224_get_name_table(void) +{ + /* memory must be below 2GB */ + diag224_cpu_names = kmalloc(PAGE_SIZE, GFP_KERNEL | GFP_DMA); + if (!diag224_cpu_names) + return -ENOMEM; + if (diag224(diag224_cpu_names)) { + kfree(diag224_cpu_names); + return -EOPNOTSUPP; + } + EBCASC(diag224_cpu_names + 16, (*diag224_cpu_names + 1) * 16); + return 0; +} + +static void diag224_delete_name_table(void) +{ + kfree(diag224_cpu_names); +} + +static int diag224_idx2name(int index, char *name) +{ + memcpy(name, diag224_cpu_names + ((index + 1) * CPU_NAME_LEN), + CPU_NAME_LEN); + name[CPU_NAME_LEN] = 0; + strim(name); + return 0; +} + +struct dbfs_d204_hdr { + u64 len; /* Length of d204 buffer without header */ + u16 version; /* Version of header */ + u8 sc; /* Used subcode */ + char reserved[53]; +} __attribute__ ((packed)); + +struct dbfs_d204 { + struct dbfs_d204_hdr hdr; /* 64 byte header */ + char buf[]; /* d204 buffer */ +} __attribute__ ((packed)); + +static int dbfs_d204_create(void **data, void **data_free_ptr, size_t *size) +{ + struct dbfs_d204 *d204; + int rc, buf_size; + void *base; + + buf_size = PAGE_SIZE * (diag204_buf_pages + 1) + sizeof(d204->hdr); + base = vzalloc(buf_size); + if (!base) + return -ENOMEM; + d204 = page_align_ptr(base + sizeof(d204->hdr)) - sizeof(d204->hdr); + rc = diag204_do_store(d204->buf, diag204_buf_pages); + if (rc) { + vfree(base); + return rc; + } + d204->hdr.version = DBFS_D204_HDR_VERSION; + d204->hdr.len = PAGE_SIZE * diag204_buf_pages; + d204->hdr.sc = diag204_store_sc; + *data = d204; + *data_free_ptr = base; + *size = d204->hdr.len + sizeof(struct dbfs_d204_hdr); + return 0; +} + +static struct hypfs_dbfs_file dbfs_file_d204 = { + .name = "diag_204", + .data_create = dbfs_d204_create, + .data_free = vfree, +}; + +__init int hypfs_diag_init(void) +{ + int rc; + + if (diag204_probe()) { + pr_err("The hardware system does not support hypfs\n"); + return -ENODATA; + } + if (diag204_info_type == INFO_EXT) { + rc = hypfs_dbfs_create_file(&dbfs_file_d204); + if (rc) + return rc; + } + if (MACHINE_IS_LPAR) { + rc = diag224_get_name_table(); + if (rc) { + pr_err("The hardware system does not provide all " + "functions required by hypfs\n"); + debugfs_remove(dbfs_d204_file); + return rc; + } + } + return 0; +} + +void hypfs_diag_exit(void) +{ + debugfs_remove(dbfs_d204_file); + diag224_delete_name_table(); + diag204_free_buffer(); + hypfs_dbfs_remove_file(&dbfs_file_d204); +} + +/* + * Functions to create the directory structure + * ******************************************* + */ + +static int hypfs_create_cpu_files(struct super_block *sb, + struct dentry *cpus_dir, void *cpu_info) +{ + struct dentry *cpu_dir; + char buffer[TMP_SIZE]; + void *rc; + + snprintf(buffer, TMP_SIZE, "%d", cpu_info__cpu_addr(diag204_info_type, + cpu_info)); + cpu_dir = hypfs_mkdir(sb, cpus_dir, buffer); + rc = hypfs_create_u64(sb, cpu_dir, "mgmtime", + cpu_info__acc_time(diag204_info_type, cpu_info) - + cpu_info__lp_time(diag204_info_type, cpu_info)); + if (IS_ERR(rc)) + return PTR_ERR(rc); + rc = hypfs_create_u64(sb, cpu_dir, "cputime", + cpu_info__lp_time(diag204_info_type, cpu_info)); + if (IS_ERR(rc)) + return PTR_ERR(rc); + if (diag204_info_type == INFO_EXT) { + rc = hypfs_create_u64(sb, cpu_dir, "onlinetime", + cpu_info__online_time(diag204_info_type, + cpu_info)); + if (IS_ERR(rc)) + return PTR_ERR(rc); + } + diag224_idx2name(cpu_info__ctidx(diag204_info_type, cpu_info), buffer); + rc = hypfs_create_str(sb, cpu_dir, "type", buffer); + if (IS_ERR(rc)) + return PTR_ERR(rc); + return 0; +} + +static void *hypfs_create_lpar_files(struct super_block *sb, + struct dentry *systems_dir, void *part_hdr) +{ + struct dentry *cpus_dir; + struct dentry *lpar_dir; + char lpar_name[LPAR_NAME_LEN + 1]; + void *cpu_info; + int i; + + part_hdr__part_name(diag204_info_type, part_hdr, lpar_name); + lpar_name[LPAR_NAME_LEN] = 0; + lpar_dir = hypfs_mkdir(sb, systems_dir, lpar_name); + if (IS_ERR(lpar_dir)) + return lpar_dir; + cpus_dir = hypfs_mkdir(sb, lpar_dir, "cpus"); + if (IS_ERR(cpus_dir)) + return cpus_dir; + cpu_info = part_hdr + part_hdr__size(diag204_info_type); + for (i = 0; i < part_hdr__rcpus(diag204_info_type, part_hdr); i++) { + int rc; + rc = hypfs_create_cpu_files(sb, cpus_dir, cpu_info); + if (rc) + return ERR_PTR(rc); + cpu_info += cpu_info__size(diag204_info_type); + } + return cpu_info; +} + +static int hypfs_create_phys_cpu_files(struct super_block *sb, + struct dentry *cpus_dir, void *cpu_info) +{ + struct dentry *cpu_dir; + char buffer[TMP_SIZE]; + void *rc; + + snprintf(buffer, TMP_SIZE, "%i", phys_cpu__cpu_addr(diag204_info_type, + cpu_info)); + cpu_dir = hypfs_mkdir(sb, cpus_dir, buffer); + if (IS_ERR(cpu_dir)) + return PTR_ERR(cpu_dir); + rc = hypfs_create_u64(sb, cpu_dir, "mgmtime", + phys_cpu__mgm_time(diag204_info_type, cpu_info)); + if (IS_ERR(rc)) + return PTR_ERR(rc); + diag224_idx2name(phys_cpu__ctidx(diag204_info_type, cpu_info), buffer); + rc = hypfs_create_str(sb, cpu_dir, "type", buffer); + if (IS_ERR(rc)) + return PTR_ERR(rc); + return 0; +} + +static void *hypfs_create_phys_files(struct super_block *sb, + struct dentry *parent_dir, void *phys_hdr) +{ + int i; + void *cpu_info; + struct dentry *cpus_dir; + + cpus_dir = hypfs_mkdir(sb, parent_dir, "cpus"); + if (IS_ERR(cpus_dir)) + return cpus_dir; + cpu_info = phys_hdr + phys_hdr__size(diag204_info_type); + for (i = 0; i < phys_hdr__cpus(diag204_info_type, phys_hdr); i++) { + int rc; + rc = hypfs_create_phys_cpu_files(sb, cpus_dir, cpu_info); + if (rc) + return ERR_PTR(rc); + cpu_info += phys_cpu__size(diag204_info_type); + } + return cpu_info; +} + +int hypfs_diag_create_files(struct super_block *sb, struct dentry *root) +{ + struct dentry *systems_dir, *hyp_dir; + void *time_hdr, *part_hdr; + int i, rc; + void *buffer, *ptr; + + buffer = diag204_store(); + if (IS_ERR(buffer)) + return PTR_ERR(buffer); + + systems_dir = hypfs_mkdir(sb, root, "systems"); + if (IS_ERR(systems_dir)) { + rc = PTR_ERR(systems_dir); + goto err_out; + } + time_hdr = (struct x_info_blk_hdr *)buffer; + part_hdr = time_hdr + info_blk_hdr__size(diag204_info_type); + for (i = 0; i < info_blk_hdr__npar(diag204_info_type, time_hdr); i++) { + part_hdr = hypfs_create_lpar_files(sb, systems_dir, part_hdr); + if (IS_ERR(part_hdr)) { + rc = PTR_ERR(part_hdr); + goto err_out; + } + } + if (info_blk_hdr__flags(diag204_info_type, time_hdr) & LPAR_PHYS_FLG) { + ptr = hypfs_create_phys_files(sb, root, part_hdr); + if (IS_ERR(ptr)) { + rc = PTR_ERR(ptr); + goto err_out; + } + } + hyp_dir = hypfs_mkdir(sb, root, "hyp"); + if (IS_ERR(hyp_dir)) { + rc = PTR_ERR(hyp_dir); + goto err_out; + } + ptr = hypfs_create_str(sb, hyp_dir, "type", "LPAR Hypervisor"); + if (IS_ERR(ptr)) { + rc = PTR_ERR(ptr); + goto err_out; + } + rc = 0; + +err_out: + return rc; +} diff --git a/arch/s390/hypfs/hypfs_vm.c b/arch/s390/hypfs/hypfs_vm.c new file mode 100644 index 00000000..e5479600 --- /dev/null +++ b/arch/s390/hypfs/hypfs_vm.c @@ -0,0 +1,283 @@ +/* + * Hypervisor filesystem for Linux on s390. z/VM implementation. + * + * Copyright (C) IBM Corp. 2006 + * Author(s): Michael Holzheu <holzheu@de.ibm.com> + */ + +#include <linux/types.h> +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/vmalloc.h> +#include <asm/ebcdic.h> +#include <asm/timex.h> +#include "hypfs.h" + +#define NAME_LEN 8 +#define DBFS_D2FC_HDR_VERSION 0 + +static char local_guest[] = " "; +static char all_guests[] = "* "; +static char *guest_query; + +struct diag2fc_data { + __u32 version; + __u32 flags; + __u64 used_cpu; + __u64 el_time; + __u64 mem_min_kb; + __u64 mem_max_kb; + __u64 mem_share_kb; + __u64 mem_used_kb; + __u32 pcpus; + __u32 lcpus; + __u32 vcpus; + __u32 cpu_min; + __u32 cpu_max; + __u32 cpu_shares; + __u32 cpu_use_samp; + __u32 cpu_delay_samp; + __u32 page_wait_samp; + __u32 idle_samp; + __u32 other_samp; + __u32 total_samp; + char guest_name[NAME_LEN]; +}; + +struct diag2fc_parm_list { + char userid[NAME_LEN]; + char aci_grp[NAME_LEN]; + __u64 addr; + __u32 size; + __u32 fmt; +}; + +static int diag2fc(int size, char* query, void *addr) +{ + unsigned long residual_cnt; + unsigned long rc; + struct diag2fc_parm_list parm_list; + + memcpy(parm_list.userid, query, NAME_LEN); + ASCEBC(parm_list.userid, NAME_LEN); + parm_list.addr = (unsigned long) addr ; + parm_list.size = size; + parm_list.fmt = 0x02; + memset(parm_list.aci_grp, 0x40, NAME_LEN); + rc = -1; + + asm volatile( + " diag %0,%1,0x2fc\n" + "0:\n" + EX_TABLE(0b,0b) + : "=d" (residual_cnt), "+d" (rc) : "0" (&parm_list) : "memory"); + + if ((rc != 0 ) && (rc != -2)) + return rc; + else + return -residual_cnt; +} + +/* + * Allocate buffer for "query" and store diag 2fc at "offset" + */ +static void *diag2fc_store(char *query, unsigned int *count, int offset) +{ + void *data; + int size; + + do { + size = diag2fc(0, query, NULL); + if (size < 0) + return ERR_PTR(-EACCES); + data = vmalloc(size + offset); + if (!data) + return ERR_PTR(-ENOMEM); + if (diag2fc(size, query, data + offset) == 0) + break; + vfree(data); + } while (1); + *count = (size / sizeof(struct diag2fc_data)); + + return data; +} + +static void diag2fc_free(const void *data) +{ + vfree(data); +} + +#define ATTRIBUTE(sb, dir, name, member) \ +do { \ + void *rc; \ + rc = hypfs_create_u64(sb, dir, name, member); \ + if (IS_ERR(rc)) \ + return PTR_ERR(rc); \ +} while(0) + +static int hpyfs_vm_create_guest(struct super_block *sb, + struct dentry *systems_dir, + struct diag2fc_data *data) +{ + char guest_name[NAME_LEN + 1] = {}; + struct dentry *guest_dir, *cpus_dir, *samples_dir, *mem_dir; + int dedicated_flag, capped_value; + + capped_value = (data->flags & 0x00000006) >> 1; + dedicated_flag = (data->flags & 0x00000008) >> 3; + + /* guest dir */ + memcpy(guest_name, data->guest_name, NAME_LEN); + EBCASC(guest_name, NAME_LEN); + strim(guest_name); + guest_dir = hypfs_mkdir(sb, systems_dir, guest_name); + if (IS_ERR(guest_dir)) + return PTR_ERR(guest_dir); + ATTRIBUTE(sb, guest_dir, "onlinetime_us", data->el_time); + + /* logical cpu information */ + cpus_dir = hypfs_mkdir(sb, guest_dir, "cpus"); + if (IS_ERR(cpus_dir)) + return PTR_ERR(cpus_dir); + ATTRIBUTE(sb, cpus_dir, "cputime_us", data->used_cpu); + ATTRIBUTE(sb, cpus_dir, "capped", capped_value); + ATTRIBUTE(sb, cpus_dir, "dedicated", dedicated_flag); + ATTRIBUTE(sb, cpus_dir, "count", data->vcpus); + ATTRIBUTE(sb, cpus_dir, "weight_min", data->cpu_min); + ATTRIBUTE(sb, cpus_dir, "weight_max", data->cpu_max); + ATTRIBUTE(sb, cpus_dir, "weight_cur", data->cpu_shares); + + /* memory information */ + mem_dir = hypfs_mkdir(sb, guest_dir, "mem"); + if (IS_ERR(mem_dir)) + return PTR_ERR(mem_dir); + ATTRIBUTE(sb, mem_dir, "min_KiB", data->mem_min_kb); + ATTRIBUTE(sb, mem_dir, "max_KiB", data->mem_max_kb); + ATTRIBUTE(sb, mem_dir, "used_KiB", data->mem_used_kb); + ATTRIBUTE(sb, mem_dir, "share_KiB", data->mem_share_kb); + + /* samples */ + samples_dir = hypfs_mkdir(sb, guest_dir, "samples"); + if (IS_ERR(samples_dir)) + return PTR_ERR(samples_dir); + ATTRIBUTE(sb, samples_dir, "cpu_using", data->cpu_use_samp); + ATTRIBUTE(sb, samples_dir, "cpu_delay", data->cpu_delay_samp); + ATTRIBUTE(sb, samples_dir, "mem_delay", data->page_wait_samp); + ATTRIBUTE(sb, samples_dir, "idle", data->idle_samp); + ATTRIBUTE(sb, samples_dir, "other", data->other_samp); + ATTRIBUTE(sb, samples_dir, "total", data->total_samp); + return 0; +} + +int hypfs_vm_create_files(struct super_block *sb, struct dentry *root) +{ + struct dentry *dir, *file; + struct diag2fc_data *data; + unsigned int count = 0; + int rc, i; + + data = diag2fc_store(guest_query, &count, 0); + if (IS_ERR(data)) + return PTR_ERR(data); + + /* Hpervisor Info */ + dir = hypfs_mkdir(sb, root, "hyp"); + if (IS_ERR(dir)) { + rc = PTR_ERR(dir); + goto failed; + } + file = hypfs_create_str(sb, dir, "type", "z/VM Hypervisor"); + if (IS_ERR(file)) { + rc = PTR_ERR(file); + goto failed; + } + + /* physical cpus */ + dir = hypfs_mkdir(sb, root, "cpus"); + if (IS_ERR(dir)) { + rc = PTR_ERR(dir); + goto failed; + } + file = hypfs_create_u64(sb, dir, "count", data->lcpus); + if (IS_ERR(file)) { + rc = PTR_ERR(file); + goto failed; + } + + /* guests */ + dir = hypfs_mkdir(sb, root, "systems"); + if (IS_ERR(dir)) { + rc = PTR_ERR(dir); + goto failed; + } + + for (i = 0; i < count; i++) { + rc = hpyfs_vm_create_guest(sb, dir, &(data[i])); + if (rc) + goto failed; + } + diag2fc_free(data); + return 0; + +failed: + diag2fc_free(data); + return rc; +} + +struct dbfs_d2fc_hdr { + u64 len; /* Length of d2fc buffer without header */ + u16 version; /* Version of header */ + char tod_ext[16]; /* TOD clock for d2fc */ + u64 count; /* Number of VM guests in d2fc buffer */ + char reserved[30]; +} __attribute__ ((packed)); + +struct dbfs_d2fc { + struct dbfs_d2fc_hdr hdr; /* 64 byte header */ + char buf[]; /* d2fc buffer */ +} __attribute__ ((packed)); + +static int dbfs_diag2fc_create(void **data, void **data_free_ptr, size_t *size) +{ + struct dbfs_d2fc *d2fc; + unsigned int count; + + d2fc = diag2fc_store(guest_query, &count, sizeof(d2fc->hdr)); + if (IS_ERR(d2fc)) + return PTR_ERR(d2fc); + get_clock_ext(d2fc->hdr.tod_ext); + d2fc->hdr.len = count * sizeof(struct diag2fc_data); + d2fc->hdr.version = DBFS_D2FC_HDR_VERSION; + d2fc->hdr.count = count; + memset(&d2fc->hdr.reserved, 0, sizeof(d2fc->hdr.reserved)); + *data = d2fc; + *data_free_ptr = d2fc; + *size = d2fc->hdr.len + sizeof(struct dbfs_d2fc_hdr); + return 0; +} + +static struct hypfs_dbfs_file dbfs_file_2fc = { + .name = "diag_2fc", + .data_create = dbfs_diag2fc_create, + .data_free = diag2fc_free, +}; + +int hypfs_vm_init(void) +{ + if (!MACHINE_IS_VM) + return 0; + if (diag2fc(0, all_guests, NULL) > 0) + guest_query = all_guests; + else if (diag2fc(0, local_guest, NULL) > 0) + guest_query = local_guest; + else + return -EACCES; + return hypfs_dbfs_create_file(&dbfs_file_2fc); +} + +void hypfs_vm_exit(void) +{ + if (!MACHINE_IS_VM) + return; + hypfs_dbfs_remove_file(&dbfs_file_2fc); +} diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c new file mode 100644 index 00000000..6a2cb560 --- /dev/null +++ b/arch/s390/hypfs/inode.c @@ -0,0 +1,515 @@ +/* + * arch/s390/hypfs/inode.c + * Hypervisor filesystem for Linux on s390. + * + * Copyright IBM Corp. 2006, 2008 + * Author(s): Michael Holzheu <holzheu@de.ibm.com> + */ + +#define KMSG_COMPONENT "hypfs" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include <linux/types.h> +#include <linux/errno.h> +#include <linux/fs.h> +#include <linux/namei.h> +#include <linux/vfs.h> +#include <linux/slab.h> +#include <linux/pagemap.h> +#include <linux/time.h> +#include <linux/parser.h> +#include <linux/sysfs.h> +#include <linux/module.h> +#include <linux/seq_file.h> +#include <linux/mount.h> +#include <asm/ebcdic.h> +#include "hypfs.h" + +#define HYPFS_MAGIC 0x687970 /* ASCII 'hyp' */ +#define TMP_SIZE 64 /* size of temporary buffers */ + +static struct dentry *hypfs_create_update_file(struct super_block *sb, + struct dentry *dir); + +struct hypfs_sb_info { + uid_t uid; /* uid used for files and dirs */ + gid_t gid; /* gid used for files and dirs */ + struct dentry *update_file; /* file to trigger update */ + time_t last_update; /* last update time in secs since 1970 */ + struct mutex lock; /* lock to protect update process */ +}; + +static const struct file_operations hypfs_file_ops; +static struct file_system_type hypfs_type; +static const struct super_operations hypfs_s_ops; + +/* start of list of all dentries, which have to be deleted on update */ +static struct dentry *hypfs_last_dentry; + +static void hypfs_update_update(struct super_block *sb) +{ + struct hypfs_sb_info *sb_info = sb->s_fs_info; + struct inode *inode = sb_info->update_file->d_inode; + + sb_info->last_update = get_seconds(); + inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; +} + +/* directory tree removal functions */ + +static void hypfs_add_dentry(struct dentry *dentry) +{ + dentry->d_fsdata = hypfs_last_dentry; + hypfs_last_dentry = dentry; +} + +static inline int hypfs_positive(struct dentry *dentry) +{ + return dentry->d_inode && !d_unhashed(dentry); +} + +static void hypfs_remove(struct dentry *dentry) +{ + struct dentry *parent; + + parent = dentry->d_parent; + if (!parent || !parent->d_inode) + return; + mutex_lock(&parent->d_inode->i_mutex); + if (hypfs_positive(dentry)) { + if (S_ISDIR(dentry->d_inode->i_mode)) + simple_rmdir(parent->d_inode, dentry); + else + simple_unlink(parent->d_inode, dentry); + } + d_delete(dentry); + dput(dentry); + mutex_unlock(&parent->d_inode->i_mutex); +} + +static void hypfs_delete_tree(struct dentry *root) +{ + while (hypfs_last_dentry) { + struct dentry *next_dentry; + next_dentry = hypfs_last_dentry->d_fsdata; + hypfs_remove(hypfs_last_dentry); + hypfs_last_dentry = next_dentry; + } +} + +static struct inode *hypfs_make_inode(struct super_block *sb, umode_t mode) +{ + struct inode *ret = new_inode(sb); + + if (ret) { + struct hypfs_sb_info *hypfs_info = sb->s_fs_info; + ret->i_mode = mode; + ret->i_uid = hypfs_info->uid; + ret->i_gid = hypfs_info->gid; + ret->i_atime = ret->i_mtime = ret->i_ctime = CURRENT_TIME; + if (S_ISDIR(mode)) + set_nlink(ret, 2); + } + return ret; +} + +static void hypfs_evict_inode(struct inode *inode) +{ + end_writeback(inode); + kfree(inode->i_private); +} + +static int hypfs_open(struct inode *inode, struct file *filp) +{ + char *data = filp->f_path.dentry->d_inode->i_private; + struct hypfs_sb_info *fs_info; + + if (filp->f_mode & FMODE_WRITE) { + if (!(inode->i_mode & S_IWUGO)) + return -EACCES; + } + if (filp->f_mode & FMODE_READ) { + if (!(inode->i_mode & S_IRUGO)) + return -EACCES; + } + + fs_info = inode->i_sb->s_fs_info; + if(data) { + mutex_lock(&fs_info->lock); + filp->private_data = kstrdup(data, GFP_KERNEL); + if (!filp->private_data) { + mutex_unlock(&fs_info->lock); + return -ENOMEM; + } + mutex_unlock(&fs_info->lock); + } + return nonseekable_open(inode, filp); +} + +static ssize_t hypfs_aio_read(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t offset) +{ + char *data; + ssize_t ret; + struct file *filp = iocb->ki_filp; + /* XXX: temporary */ + char __user *buf = iov[0].iov_base; + size_t count = iov[0].iov_len; + + if (nr_segs != 1) + return -EINVAL; + + data = filp->private_data; + ret = simple_read_from_buffer(buf, count, &offset, data, strlen(data)); + if (ret <= 0) + return ret; + + iocb->ki_pos += ret; + file_accessed(filp); + + return ret; +} +static ssize_t hypfs_aio_write(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t offset) +{ + int rc; + struct super_block *sb; + struct hypfs_sb_info *fs_info; + size_t count = iov_length(iov, nr_segs); + + sb = iocb->ki_filp->f_path.dentry->d_inode->i_sb; + fs_info = sb->s_fs_info; + /* + * Currently we only allow one update per second for two reasons: + * 1. diag 204 is VERY expensive + * 2. If several processes do updates in parallel and then read the + * hypfs data, the likelihood of collisions is reduced, if we restrict + * the minimum update interval. A collision occurs, if during the + * data gathering of one process another process triggers an update + * If the first process wants to ensure consistent data, it has + * to restart data collection in this case. + */ + mutex_lock(&fs_info->lock); + if (fs_info->last_update == get_seconds()) { + rc = -EBUSY; + goto out; + } + hypfs_delete_tree(sb->s_root); + if (MACHINE_IS_VM) + rc = hypfs_vm_create_files(sb, sb->s_root); + else + rc = hypfs_diag_create_files(sb, sb->s_root); + if (rc) { + pr_err("Updating the hypfs tree failed\n"); + hypfs_delete_tree(sb->s_root); + goto out; + } + hypfs_update_update(sb); + rc = count; +out: + mutex_unlock(&fs_info->lock); + return rc; +} + +static int hypfs_release(struct inode *inode, struct file *filp) +{ + kfree(filp->private_data); + return 0; +} + +enum { opt_uid, opt_gid, opt_err }; + +static const match_table_t hypfs_tokens = { + {opt_uid, "uid=%u"}, + {opt_gid, "gid=%u"}, + {opt_err, NULL} +}; + +static int hypfs_parse_options(char *options, struct super_block *sb) +{ + char *str; + substring_t args[MAX_OPT_ARGS]; + + if (!options) + return 0; + while ((str = strsep(&options, ",")) != NULL) { + int token, option; + struct hypfs_sb_info *hypfs_info = sb->s_fs_info; + + if (!*str) + continue; + token = match_token(str, hypfs_tokens, args); + switch (token) { + case opt_uid: + if (match_int(&args[0], &option)) + return -EINVAL; + hypfs_info->uid = option; + break; + case opt_gid: + if (match_int(&args[0], &option)) + return -EINVAL; + hypfs_info->gid = option; + break; + case opt_err: + default: + pr_err("%s is not a valid mount option\n", str); + return -EINVAL; + } + } + return 0; +} + +static int hypfs_show_options(struct seq_file *s, struct dentry *root) +{ + struct hypfs_sb_info *hypfs_info = root->d_sb->s_fs_info; + + seq_printf(s, ",uid=%u", hypfs_info->uid); + seq_printf(s, ",gid=%u", hypfs_info->gid); + return 0; +} + +static int hypfs_fill_super(struct super_block *sb, void *data, int silent) +{ + struct inode *root_inode; + struct dentry *root_dentry; + int rc = 0; + struct hypfs_sb_info *sbi; + + sbi = kzalloc(sizeof(struct hypfs_sb_info), GFP_KERNEL); + if (!sbi) + return -ENOMEM; + mutex_init(&sbi->lock); + sbi->uid = current_uid(); + sbi->gid = current_gid(); + sb->s_fs_info = sbi; + sb->s_blocksize = PAGE_CACHE_SIZE; + sb->s_blocksize_bits = PAGE_CACHE_SHIFT; + sb->s_magic = HYPFS_MAGIC; + sb->s_op = &hypfs_s_ops; + if (hypfs_parse_options(data, sb)) + return -EINVAL; + root_inode = hypfs_make_inode(sb, S_IFDIR | 0755); + if (!root_inode) + return -ENOMEM; + root_inode->i_op = &simple_dir_inode_operations; + root_inode->i_fop = &simple_dir_operations; + sb->s_root = root_dentry = d_make_root(root_inode); + if (!root_dentry) + return -ENOMEM; + if (MACHINE_IS_VM) + rc = hypfs_vm_create_files(sb, root_dentry); + else + rc = hypfs_diag_create_files(sb, root_dentry); + if (rc) + return rc; + sbi->update_file = hypfs_create_update_file(sb, root_dentry); + if (IS_ERR(sbi->update_file)) + return PTR_ERR(sbi->update_file); + hypfs_update_update(sb); + pr_info("Hypervisor filesystem mounted\n"); + return 0; +} + +static struct dentry *hypfs_mount(struct file_system_type *fst, int flags, + const char *devname, void *data) +{ + return mount_single(fst, flags, data, hypfs_fill_super); +} + +static void hypfs_kill_super(struct super_block *sb) +{ + struct hypfs_sb_info *sb_info = sb->s_fs_info; + + if (sb->s_root) + hypfs_delete_tree(sb->s_root); + if (sb_info->update_file) + hypfs_remove(sb_info->update_file); + kfree(sb->s_fs_info); + sb->s_fs_info = NULL; + kill_litter_super(sb); +} + +static struct dentry *hypfs_create_file(struct super_block *sb, + struct dentry *parent, const char *name, + char *data, umode_t mode) +{ + struct dentry *dentry; + struct inode *inode; + + mutex_lock(&parent->d_inode->i_mutex); + dentry = lookup_one_len(name, parent, strlen(name)); + if (IS_ERR(dentry)) { + dentry = ERR_PTR(-ENOMEM); + goto fail; + } + inode = hypfs_make_inode(sb, mode); + if (!inode) { + dput(dentry); + dentry = ERR_PTR(-ENOMEM); + goto fail; + } + if (S_ISREG(mode)) { + inode->i_fop = &hypfs_file_ops; + if (data) + inode->i_size = strlen(data); + else + inode->i_size = 0; + } else if (S_ISDIR(mode)) { + inode->i_op = &simple_dir_inode_operations; + inode->i_fop = &simple_dir_operations; + inc_nlink(parent->d_inode); + } else + BUG(); + inode->i_private = data; + d_instantiate(dentry, inode); + dget(dentry); +fail: + mutex_unlock(&parent->d_inode->i_mutex); + return dentry; +} + +struct dentry *hypfs_mkdir(struct super_block *sb, struct dentry *parent, + const char *name) +{ + struct dentry *dentry; + + dentry = hypfs_create_file(sb, parent, name, NULL, S_IFDIR | DIR_MODE); + if (IS_ERR(dentry)) + return dentry; + hypfs_add_dentry(dentry); + return dentry; +} + +static struct dentry *hypfs_create_update_file(struct super_block *sb, + struct dentry *dir) +{ + struct dentry *dentry; + + dentry = hypfs_create_file(sb, dir, "update", NULL, + S_IFREG | UPDATE_FILE_MODE); + /* + * We do not put the update file on the 'delete' list with + * hypfs_add_dentry(), since it should not be removed when the tree + * is updated. + */ + return dentry; +} + +struct dentry *hypfs_create_u64(struct super_block *sb, struct dentry *dir, + const char *name, __u64 value) +{ + char *buffer; + char tmp[TMP_SIZE]; + struct dentry *dentry; + + snprintf(tmp, TMP_SIZE, "%llu\n", (unsigned long long int)value); + buffer = kstrdup(tmp, GFP_KERNEL); + if (!buffer) + return ERR_PTR(-ENOMEM); + dentry = + hypfs_create_file(sb, dir, name, buffer, S_IFREG | REG_FILE_MODE); + if (IS_ERR(dentry)) { + kfree(buffer); + return ERR_PTR(-ENOMEM); + } + hypfs_add_dentry(dentry); + return dentry; +} + +struct dentry *hypfs_create_str(struct super_block *sb, struct dentry *dir, + const char *name, char *string) +{ + char *buffer; + struct dentry *dentry; + + buffer = kmalloc(strlen(string) + 2, GFP_KERNEL); + if (!buffer) + return ERR_PTR(-ENOMEM); + sprintf(buffer, "%s\n", string); + dentry = + hypfs_create_file(sb, dir, name, buffer, S_IFREG | REG_FILE_MODE); + if (IS_ERR(dentry)) { + kfree(buffer); + return ERR_PTR(-ENOMEM); + } + hypfs_add_dentry(dentry); + return dentry; +} + +static const struct file_operations hypfs_file_ops = { + .open = hypfs_open, + .release = hypfs_release, + .read = do_sync_read, + .write = do_sync_write, + .aio_read = hypfs_aio_read, + .aio_write = hypfs_aio_write, + .llseek = no_llseek, +}; + +static struct file_system_type hypfs_type = { + .owner = THIS_MODULE, + .name = "s390_hypfs", + .mount = hypfs_mount, + .kill_sb = hypfs_kill_super +}; + +static const struct super_operations hypfs_s_ops = { + .statfs = simple_statfs, + .evict_inode = hypfs_evict_inode, + .show_options = hypfs_show_options, +}; + +static struct kobject *s390_kobj; + +static int __init hypfs_init(void) +{ + int rc; + + rc = hypfs_dbfs_init(); + if (rc) + return rc; + if (hypfs_diag_init()) { + rc = -ENODATA; + goto fail_dbfs_exit; + } + if (hypfs_vm_init()) { + rc = -ENODATA; + goto fail_hypfs_diag_exit; + } + s390_kobj = kobject_create_and_add("s390", hypervisor_kobj); + if (!s390_kobj) { + rc = -ENOMEM; + goto fail_hypfs_vm_exit; + } + rc = register_filesystem(&hypfs_type); + if (rc) + goto fail_filesystem; + return 0; + +fail_filesystem: + kobject_put(s390_kobj); +fail_hypfs_vm_exit: + hypfs_vm_exit(); +fail_hypfs_diag_exit: + hypfs_diag_exit(); +fail_dbfs_exit: + hypfs_dbfs_exit(); + pr_err("Initialization of hypfs failed with rc=%i\n", rc); + return rc; +} + +static void __exit hypfs_exit(void) +{ + hypfs_diag_exit(); + hypfs_vm_exit(); + hypfs_dbfs_exit(); + unregister_filesystem(&hypfs_type); + kobject_put(s390_kobj); +} + +module_init(hypfs_init) +module_exit(hypfs_exit) + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Michael Holzheu <holzheu@de.ibm.com>"); +MODULE_DESCRIPTION("s390 Hypervisor Filesystem"); diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild new file mode 100644 index 00000000..287d7bbb --- /dev/null +++ b/arch/s390/include/asm/Kbuild @@ -0,0 +1,15 @@ +include include/asm-generic/Kbuild.asm + +header-y += chpid.h +header-y += chsc.h +header-y += cmb.h +header-y += dasd.h +header-y += debug.h +header-y += kvm_virtio.h +header-y += monwriter.h +header-y += qeth.h +header-y += schid.h +header-y += tape390.h +header-y += ucontext.h +header-y += vtoc.h +header-y += zcrypt.h diff --git a/arch/s390/include/asm/airq.h b/arch/s390/include/asm/airq.h new file mode 100644 index 00000000..1ac80d6b --- /dev/null +++ b/arch/s390/include/asm/airq.h @@ -0,0 +1,19 @@ +/* + * include/asm-s390/airq.h + * + * Copyright IBM Corp. 2002,2007 + * Author(s): Ingo Adlung <adlung@de.ibm.com> + * Cornelia Huck <cornelia.huck@de.ibm.com> + * Arnd Bergmann <arndb@de.ibm.com> + * Peter Oberparleiter <peter.oberparleiter@de.ibm.com> + */ + +#ifndef _ASM_S390_AIRQ_H +#define _ASM_S390_AIRQ_H + +typedef void (*adapter_int_handler_t)(void *, void *); + +void *s390_register_adapter_interrupt(adapter_int_handler_t, void *, u8); +void s390_unregister_adapter_interrupt(void *, u8); + +#endif /* _ASM_S390_AIRQ_H */ diff --git a/arch/s390/include/asm/appldata.h b/arch/s390/include/asm/appldata.h new file mode 100644 index 00000000..79283dac --- /dev/null +++ b/arch/s390/include/asm/appldata.h @@ -0,0 +1,90 @@ +/* + * include/asm-s390/appldata.h + * + * Copyright (C) IBM Corp. 2006 + * + * Author(s): Melissa Howland <melissah@us.ibm.com> + */ + +#ifndef _ASM_S390_APPLDATA_H +#define _ASM_S390_APPLDATA_H + +#include <asm/io.h> + +#ifndef CONFIG_64BIT + +#define APPLDATA_START_INTERVAL_REC 0x00 /* Function codes for */ +#define APPLDATA_STOP_REC 0x01 /* DIAG 0xDC */ +#define APPLDATA_GEN_EVENT_REC 0x02 +#define APPLDATA_START_CONFIG_REC 0x03 + +/* + * Parameter list for DIAGNOSE X'DC' + */ +struct appldata_parameter_list { + u16 diag; /* The DIAGNOSE code X'00DC' */ + u8 function; /* The function code for the DIAGNOSE */ + u8 parlist_length; /* Length of the parameter list */ + u32 product_id_addr; /* Address of the 16-byte product ID */ + u16 reserved; + u16 buffer_length; /* Length of the application data buffer */ + u32 buffer_addr; /* Address of the application data buffer */ +} __attribute__ ((packed)); + +#else /* CONFIG_64BIT */ + +#define APPLDATA_START_INTERVAL_REC 0x80 +#define APPLDATA_STOP_REC 0x81 +#define APPLDATA_GEN_EVENT_REC 0x82 +#define APPLDATA_START_CONFIG_REC 0x83 + +/* + * Parameter list for DIAGNOSE X'DC' + */ +struct appldata_parameter_list { + u16 diag; + u8 function; + u8 parlist_length; + u32 unused01; + u16 reserved; + u16 buffer_length; + u32 unused02; + u64 product_id_addr; + u64 buffer_addr; +} __attribute__ ((packed)); + +#endif /* CONFIG_64BIT */ + +struct appldata_product_id { + char prod_nr[7]; /* product number */ + u16 prod_fn; /* product function */ + u8 record_nr; /* record number */ + u16 version_nr; /* version */ + u16 release_nr; /* release */ + u16 mod_lvl; /* modification level */ +} __attribute__ ((packed)); + +static inline int appldata_asm(struct appldata_product_id *id, + unsigned short fn, void *buffer, + unsigned short length) +{ + struct appldata_parameter_list parm_list; + int ry; + + if (!MACHINE_IS_VM) + return -ENOSYS; + parm_list.diag = 0xdc; + parm_list.function = fn; + parm_list.parlist_length = sizeof(parm_list); + parm_list.buffer_length = length; + parm_list.product_id_addr = (unsigned long) id; + parm_list.buffer_addr = virt_to_phys(buffer); + asm volatile( + " diag %1,%0,0xdc" + : "=d" (ry) + : "d" (&parm_list), "m" (parm_list), "m" (*id) + : "cc"); + return ry; +} + +#endif /* _ASM_S390_APPLDATA_H */ diff --git a/arch/s390/include/asm/asm-offsets.h b/arch/s390/include/asm/asm-offsets.h new file mode 100644 index 00000000..d370ee36 --- /dev/null +++ b/arch/s390/include/asm/asm-offsets.h @@ -0,0 +1 @@ +#include <generated/asm-offsets.h> diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h new file mode 100644 index 00000000..748347ba --- /dev/null +++ b/arch/s390/include/asm/atomic.h @@ -0,0 +1,334 @@ +#ifndef __ARCH_S390_ATOMIC__ +#define __ARCH_S390_ATOMIC__ + +/* + * Copyright 1999,2009 IBM Corp. + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>, + * Denis Joseph Barrow, + * Arnd Bergmann <arndb@de.ibm.com>, + * + * Atomic operations that C can't guarantee us. + * Useful for resource counting etc. + * s390 uses 'Compare And Swap' for atomicity in SMP environment. + * + */ + +#include <linux/compiler.h> +#include <linux/types.h> +#include <asm/cmpxchg.h> + +#define ATOMIC_INIT(i) { (i) } + +#define __CS_LOOP(ptr, op_val, op_string) ({ \ + int old_val, new_val; \ + asm volatile( \ + " l %0,%2\n" \ + "0: lr %1,%0\n" \ + op_string " %1,%3\n" \ + " cs %0,%1,%2\n" \ + " jl 0b" \ + : "=&d" (old_val), "=&d" (new_val), \ + "=Q" (((atomic_t *)(ptr))->counter) \ + : "d" (op_val), "Q" (((atomic_t *)(ptr))->counter) \ + : "cc", "memory"); \ + new_val; \ +}) + +static inline int atomic_read(const atomic_t *v) +{ + int c; + + asm volatile( + " l %0,%1\n" + : "=d" (c) : "Q" (v->counter)); + return c; +} + +static inline void atomic_set(atomic_t *v, int i) +{ + asm volatile( + " st %1,%0\n" + : "=Q" (v->counter) : "d" (i)); +} + +static inline int atomic_add_return(int i, atomic_t *v) +{ + return __CS_LOOP(v, i, "ar"); +} +#define atomic_add(_i, _v) atomic_add_return(_i, _v) +#define atomic_add_negative(_i, _v) (atomic_add_return(_i, _v) < 0) +#define atomic_inc(_v) atomic_add_return(1, _v) +#define atomic_inc_return(_v) atomic_add_return(1, _v) +#define atomic_inc_and_test(_v) (atomic_add_return(1, _v) == 0) + +static inline int atomic_sub_return(int i, atomic_t *v) +{ + return __CS_LOOP(v, i, "sr"); +} +#define atomic_sub(_i, _v) atomic_sub_return(_i, _v) +#define atomic_sub_and_test(_i, _v) (atomic_sub_return(_i, _v) == 0) +#define atomic_dec(_v) atomic_sub_return(1, _v) +#define atomic_dec_return(_v) atomic_sub_return(1, _v) +#define atomic_dec_and_test(_v) (atomic_sub_return(1, _v) == 0) + +static inline void atomic_clear_mask(unsigned long mask, atomic_t *v) +{ + __CS_LOOP(v, ~mask, "nr"); +} + +static inline void atomic_set_mask(unsigned long mask, atomic_t *v) +{ + __CS_LOOP(v, mask, "or"); +} + +#define atomic_xchg(v, new) (xchg(&((v)->counter), new)) + +static inline int atomic_cmpxchg(atomic_t *v, int old, int new) +{ + asm volatile( + " cs %0,%2,%1" + : "+d" (old), "=Q" (v->counter) + : "d" (new), "Q" (v->counter) + : "cc", "memory"); + return old; +} + +static inline int __atomic_add_unless(atomic_t *v, int a, int u) +{ + int c, old; + c = atomic_read(v); + for (;;) { + if (unlikely(c == u)) + break; + old = atomic_cmpxchg(v, c, c + a); + if (likely(old == c)) + break; + c = old; + } + return c; +} + + +#undef __CS_LOOP + +#define ATOMIC64_INIT(i) { (i) } + +#ifdef CONFIG_64BIT + +#define __CSG_LOOP(ptr, op_val, op_string) ({ \ + long long old_val, new_val; \ + asm volatile( \ + " lg %0,%2\n" \ + "0: lgr %1,%0\n" \ + op_string " %1,%3\n" \ + " csg %0,%1,%2\n" \ + " jl 0b" \ + : "=&d" (old_val), "=&d" (new_val), \ + "=Q" (((atomic_t *)(ptr))->counter) \ + : "d" (op_val), "Q" (((atomic_t *)(ptr))->counter) \ + : "cc", "memory"); \ + new_val; \ +}) + +static inline long long atomic64_read(const atomic64_t *v) +{ + long long c; + + asm volatile( + " lg %0,%1\n" + : "=d" (c) : "Q" (v->counter)); + return c; +} + +static inline void atomic64_set(atomic64_t *v, long long i) +{ + asm volatile( + " stg %1,%0\n" + : "=Q" (v->counter) : "d" (i)); +} + +static inline long long atomic64_add_return(long long i, atomic64_t *v) +{ + return __CSG_LOOP(v, i, "agr"); +} + +static inline long long atomic64_sub_return(long long i, atomic64_t *v) +{ + return __CSG_LOOP(v, i, "sgr"); +} + +static inline void atomic64_clear_mask(unsigned long mask, atomic64_t *v) +{ + __CSG_LOOP(v, ~mask, "ngr"); +} + +static inline void atomic64_set_mask(unsigned long mask, atomic64_t *v) +{ + __CSG_LOOP(v, mask, "ogr"); +} + +#define atomic64_xchg(v, new) (xchg(&((v)->counter), new)) + +static inline long long atomic64_cmpxchg(atomic64_t *v, + long long old, long long new) +{ + asm volatile( + " csg %0,%2,%1" + : "+d" (old), "=Q" (v->counter) + : "d" (new), "Q" (v->counter) + : "cc", "memory"); + return old; +} + +#undef __CSG_LOOP + +#else /* CONFIG_64BIT */ + +typedef struct { + long long counter; +} atomic64_t; + +static inline long long atomic64_read(const atomic64_t *v) +{ + register_pair rp; + + asm volatile( + " lm %0,%N0,%1" + : "=&d" (rp) : "Q" (v->counter) ); + return rp.pair; +} + +static inline void atomic64_set(atomic64_t *v, long long i) +{ + register_pair rp = {.pair = i}; + + asm volatile( + " stm %1,%N1,%0" + : "=Q" (v->counter) : "d" (rp) ); +} + +static inline long long atomic64_xchg(atomic64_t *v, long long new) +{ + register_pair rp_new = {.pair = new}; + register_pair rp_old; + + asm volatile( + " lm %0,%N0,%1\n" + "0: cds %0,%2,%1\n" + " jl 0b\n" + : "=&d" (rp_old), "=Q" (v->counter) + : "d" (rp_new), "Q" (v->counter) + : "cc"); + return rp_old.pair; +} + +static inline long long atomic64_cmpxchg(atomic64_t *v, + long long old, long long new) +{ + register_pair rp_old = {.pair = old}; + register_pair rp_new = {.pair = new}; + + asm volatile( + " cds %0,%2,%1" + : "+&d" (rp_old), "=Q" (v->counter) + : "d" (rp_new), "Q" (v->counter) + : "cc"); + return rp_old.pair; +} + + +static inline long long atomic64_add_return(long long i, atomic64_t *v) +{ + long long old, new; + + do { + old = atomic64_read(v); + new = old + i; + } while (atomic64_cmpxchg(v, old, new) != old); + return new; +} + +static inline long long atomic64_sub_return(long long i, atomic64_t *v) +{ + long long old, new; + + do { + old = atomic64_read(v); + new = old - i; + } while (atomic64_cmpxchg(v, old, new) != old); + return new; +} + +static inline void atomic64_set_mask(unsigned long long mask, atomic64_t *v) +{ + long long old, new; + + do { + old = atomic64_read(v); + new = old | mask; + } while (atomic64_cmpxchg(v, old, new) != old); +} + +static inline void atomic64_clear_mask(unsigned long long mask, atomic64_t *v) +{ + long long old, new; + + do { + old = atomic64_read(v); + new = old & mask; + } while (atomic64_cmpxchg(v, old, new) != old); +} + +#endif /* CONFIG_64BIT */ + +static inline int atomic64_add_unless(atomic64_t *v, long long a, long long u) +{ + long long c, old; + + c = atomic64_read(v); + for (;;) { + if (unlikely(c == u)) + break; + old = atomic64_cmpxchg(v, c, c + a); + if (likely(old == c)) + break; + c = old; + } + return c != u; +} + +static inline long long atomic64_dec_if_positive(atomic64_t *v) +{ + long long c, old, dec; + + c = atomic64_read(v); + for (;;) { + dec = c - 1; + if (unlikely(dec < 0)) + break; + old = atomic64_cmpxchg((v), c, dec); + if (likely(old == c)) + break; + c = old; + } + return dec; +} + +#define atomic64_add(_i, _v) atomic64_add_return(_i, _v) +#define atomic64_add_negative(_i, _v) (atomic64_add_return(_i, _v) < 0) +#define atomic64_inc(_v) atomic64_add_return(1, _v) +#define atomic64_inc_return(_v) atomic64_add_return(1, _v) +#define atomic64_inc_and_test(_v) (atomic64_add_return(1, _v) == 0) +#define atomic64_sub(_i, _v) atomic64_sub_return(_i, _v) +#define atomic64_sub_and_test(_i, _v) (atomic64_sub_return(_i, _v) == 0) +#define atomic64_dec(_v) atomic64_sub_return(1, _v) +#define atomic64_dec_return(_v) atomic64_sub_return(1, _v) +#define atomic64_dec_and_test(_v) (atomic64_sub_return(1, _v) == 0) +#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0) + +#define smp_mb__before_atomic_dec() smp_mb() +#define smp_mb__after_atomic_dec() smp_mb() +#define smp_mb__before_atomic_inc() smp_mb() +#define smp_mb__after_atomic_inc() smp_mb() + +#endif /* __ARCH_S390_ATOMIC__ */ diff --git a/arch/s390/include/asm/auxvec.h b/arch/s390/include/asm/auxvec.h new file mode 100644 index 00000000..a1f153e8 --- /dev/null +++ b/arch/s390/include/asm/auxvec.h @@ -0,0 +1,6 @@ +#ifndef __ASMS390_AUXVEC_H +#define __ASMS390_AUXVEC_H + +#define AT_SYSINFO_EHDR 33 + +#endif diff --git a/arch/s390/include/asm/barrier.h b/arch/s390/include/asm/barrier.h new file mode 100644 index 00000000..451273ad --- /dev/null +++ b/arch/s390/include/asm/barrier.h @@ -0,0 +1,35 @@ +/* + * Copyright IBM Corp. 1999, 2009 + * + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> + */ + +#ifndef __ASM_BARRIER_H +#define __ASM_BARRIER_H + +/* + * Force strict CPU ordering. + * And yes, this is required on UP too when we're talking + * to devices. + * + * This is very similar to the ppc eieio/sync instruction in that is + * does a checkpoint syncronisation & makes sure that + * all memory ops have completed wrt other CPU's ( see 7-15 POP DJB ). + */ + +#define eieio() asm volatile("bcr 15,0" : : : "memory") +#define SYNC_OTHER_CORES(x) eieio() +#define mb() eieio() +#define rmb() eieio() +#define wmb() eieio() +#define read_barrier_depends() do { } while(0) +#define smp_mb() mb() +#define smp_rmb() rmb() +#define smp_wmb() wmb() +#define smp_read_barrier_depends() read_barrier_depends() +#define smp_mb__before_clear_bit() smp_mb() +#define smp_mb__after_clear_bit() smp_mb() + +#define set_mb(var, value) do { var = value; mb(); } while (0) + +#endif /* __ASM_BARRIER_H */ diff --git a/arch/s390/include/asm/bitops.h b/arch/s390/include/asm/bitops.h new file mode 100644 index 00000000..e5beb490 --- /dev/null +++ b/arch/s390/include/asm/bitops.h @@ -0,0 +1,840 @@ +#ifndef _S390_BITOPS_H +#define _S390_BITOPS_H + +/* + * include/asm-s390/bitops.h + * + * S390 version + * Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) + * + * Derived from "include/asm-i386/bitops.h" + * Copyright (C) 1992, Linus Torvalds + * + */ + +#ifdef __KERNEL__ + +#ifndef _LINUX_BITOPS_H +#error only <linux/bitops.h> can be included directly +#endif + +#include <linux/compiler.h> + +/* + * 32 bit bitops format: + * bit 0 is the LSB of *addr; bit 31 is the MSB of *addr; + * bit 32 is the LSB of *(addr+4). That combined with the + * big endian byte order on S390 give the following bit + * order in memory: + * 1f 1e 1d 1c 1b 1a 19 18 17 16 15 14 13 12 11 10 \ + * 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 + * after that follows the next long with bit numbers + * 3f 3e 3d 3c 3b 3a 39 38 37 36 35 34 33 32 31 30 + * 2f 2e 2d 2c 2b 2a 29 28 27 26 25 24 23 22 21 20 + * The reason for this bit ordering is the fact that + * in the architecture independent code bits operations + * of the form "flags |= (1 << bitnr)" are used INTERMIXED + * with operation of the form "set_bit(bitnr, flags)". + * + * 64 bit bitops format: + * bit 0 is the LSB of *addr; bit 63 is the MSB of *addr; + * bit 64 is the LSB of *(addr+8). That combined with the + * big endian byte order on S390 give the following bit + * order in memory: + * 3f 3e 3d 3c 3b 3a 39 38 37 36 35 34 33 32 31 30 + * 2f 2e 2d 2c 2b 2a 29 28 27 26 25 24 23 22 21 20 + * 1f 1e 1d 1c 1b 1a 19 18 17 16 15 14 13 12 11 10 + * 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 + * after that follows the next long with bit numbers + * 7f 7e 7d 7c 7b 7a 79 78 77 76 75 74 73 72 71 70 + * 6f 6e 6d 6c 6b 6a 69 68 67 66 65 64 63 62 61 60 + * 5f 5e 5d 5c 5b 5a 59 58 57 56 55 54 53 52 51 50 + * 4f 4e 4d 4c 4b 4a 49 48 47 46 45 44 43 42 41 40 + * The reason for this bit ordering is the fact that + * in the architecture independent code bits operations + * of the form "flags |= (1 << bitnr)" are used INTERMIXED + * with operation of the form "set_bit(bitnr, flags)". + */ + +/* bitmap tables from arch/s390/kernel/bitmap.c */ +extern const char _oi_bitmap[]; +extern const char _ni_bitmap[]; +extern const char _zb_findmap[]; +extern const char _sb_findmap[]; + +#ifndef __s390x__ + +#define __BITOPS_ALIGN 3 +#define __BITOPS_WORDSIZE 32 +#define __BITOPS_OR "or" +#define __BITOPS_AND "nr" +#define __BITOPS_XOR "xr" + +#define __BITOPS_LOOP(__old, __new, __addr, __val, __op_string) \ + asm volatile( \ + " l %0,%2\n" \ + "0: lr %1,%0\n" \ + __op_string " %1,%3\n" \ + " cs %0,%1,%2\n" \ + " jl 0b" \ + : "=&d" (__old), "=&d" (__new), \ + "=Q" (*(unsigned long *) __addr) \ + : "d" (__val), "Q" (*(unsigned long *) __addr) \ + : "cc"); + +#else /* __s390x__ */ + +#define __BITOPS_ALIGN 7 +#define __BITOPS_WORDSIZE 64 +#define __BITOPS_OR "ogr" +#define __BITOPS_AND "ngr" +#define __BITOPS_XOR "xgr" + +#define __BITOPS_LOOP(__old, __new, __addr, __val, __op_string) \ + asm volatile( \ + " lg %0,%2\n" \ + "0: lgr %1,%0\n" \ + __op_string " %1,%3\n" \ + " csg %0,%1,%2\n" \ + " jl 0b" \ + : "=&d" (__old), "=&d" (__new), \ + "=Q" (*(unsigned long *) __addr) \ + : "d" (__val), "Q" (*(unsigned long *) __addr) \ + : "cc"); + +#endif /* __s390x__ */ + +#define __BITOPS_WORDS(bits) (((bits)+__BITOPS_WORDSIZE-1)/__BITOPS_WORDSIZE) +#define __BITOPS_BARRIER() asm volatile("" : : : "memory") + +#ifdef CONFIG_SMP +/* + * SMP safe set_bit routine based on compare and swap (CS) + */ +static inline void set_bit_cs(unsigned long nr, volatile unsigned long *ptr) +{ + unsigned long addr, old, new, mask; + + addr = (unsigned long) ptr; + /* calculate address for CS */ + addr += (nr ^ (nr & (__BITOPS_WORDSIZE - 1))) >> 3; + /* make OR mask */ + mask = 1UL << (nr & (__BITOPS_WORDSIZE - 1)); + /* Do the atomic update. */ + __BITOPS_LOOP(old, new, addr, mask, __BITOPS_OR); +} + +/* + * SMP safe clear_bit routine based on compare and swap (CS) + */ +static inline void clear_bit_cs(unsigned long nr, volatile unsigned long *ptr) +{ + unsigned long addr, old, new, mask; + + addr = (unsigned long) ptr; + /* calculate address for CS */ + addr += (nr ^ (nr & (__BITOPS_WORDSIZE - 1))) >> 3; + /* make AND mask */ + mask = ~(1UL << (nr & (__BITOPS_WORDSIZE - 1))); + /* Do the atomic update. */ + __BITOPS_LOOP(old, new, addr, mask, __BITOPS_AND); +} + +/* + * SMP safe change_bit routine based on compare and swap (CS) + */ +static inline void change_bit_cs(unsigned long nr, volatile unsigned long *ptr) +{ + unsigned long addr, old, new, mask; + + addr = (unsigned long) ptr; + /* calculate address for CS */ + addr += (nr ^ (nr & (__BITOPS_WORDSIZE - 1))) >> 3; + /* make XOR mask */ + mask = 1UL << (nr & (__BITOPS_WORDSIZE - 1)); + /* Do the atomic update. */ + __BITOPS_LOOP(old, new, addr, mask, __BITOPS_XOR); +} + +/* + * SMP safe test_and_set_bit routine based on compare and swap (CS) + */ +static inline int +test_and_set_bit_cs(unsigned long nr, volatile unsigned long *ptr) +{ + unsigned long addr, old, new, mask; + + addr = (unsigned long) ptr; + /* calculate address for CS */ + addr += (nr ^ (nr & (__BITOPS_WORDSIZE - 1))) >> 3; + /* make OR/test mask */ + mask = 1UL << (nr & (__BITOPS_WORDSIZE - 1)); + /* Do the atomic update. */ + __BITOPS_LOOP(old, new, addr, mask, __BITOPS_OR); + __BITOPS_BARRIER(); + return (old & mask) != 0; +} + +/* + * SMP safe test_and_clear_bit routine based on compare and swap (CS) + */ +static inline int +test_and_clear_bit_cs(unsigned long nr, volatile unsigned long *ptr) +{ + unsigned long addr, old, new, mask; + + addr = (unsigned long) ptr; + /* calculate address for CS */ + addr += (nr ^ (nr & (__BITOPS_WORDSIZE - 1))) >> 3; + /* make AND/test mask */ + mask = ~(1UL << (nr & (__BITOPS_WORDSIZE - 1))); + /* Do the atomic update. */ + __BITOPS_LOOP(old, new, addr, mask, __BITOPS_AND); + __BITOPS_BARRIER(); + return (old ^ new) != 0; +} + +/* + * SMP safe test_and_change_bit routine based on compare and swap (CS) + */ +static inline int +test_and_change_bit_cs(unsigned long nr, volatile unsigned long *ptr) +{ + unsigned long addr, old, new, mask; + + addr = (unsigned long) ptr; + /* calculate address for CS */ + addr += (nr ^ (nr & (__BITOPS_WORDSIZE - 1))) >> 3; + /* make XOR/test mask */ + mask = 1UL << (nr & (__BITOPS_WORDSIZE - 1)); + /* Do the atomic update. */ + __BITOPS_LOOP(old, new, addr, mask, __BITOPS_XOR); + __BITOPS_BARRIER(); + return (old & mask) != 0; +} +#endif /* CONFIG_SMP */ + +/* + * fast, non-SMP set_bit routine + */ +static inline void __set_bit(unsigned long nr, volatile unsigned long *ptr) +{ + unsigned long addr; + + addr = (unsigned long) ptr + ((nr ^ (__BITOPS_WORDSIZE - 8)) >> 3); + asm volatile( + " oc %O0(1,%R0),%1" + : "=Q" (*(char *) addr) : "Q" (_oi_bitmap[nr & 7]) : "cc" ); +} + +static inline void +__constant_set_bit(const unsigned long nr, volatile unsigned long *ptr) +{ + unsigned long addr; + + addr = ((unsigned long) ptr) + ((nr ^ (__BITOPS_WORDSIZE - 8)) >> 3); + *(unsigned char *) addr |= 1 << (nr & 7); +} + +#define set_bit_simple(nr,addr) \ +(__builtin_constant_p((nr)) ? \ + __constant_set_bit((nr),(addr)) : \ + __set_bit((nr),(addr)) ) + +/* + * fast, non-SMP clear_bit routine + */ +static inline void +__clear_bit(unsigned long nr, volatile unsigned long *ptr) +{ + unsigned long addr; + + addr = (unsigned long) ptr + ((nr ^ (__BITOPS_WORDSIZE - 8)) >> 3); + asm volatile( + " nc %O0(1,%R0),%1" + : "=Q" (*(char *) addr) : "Q" (_ni_bitmap[nr & 7]) : "cc" ); +} + +static inline void +__constant_clear_bit(const unsigned long nr, volatile unsigned long *ptr) +{ + unsigned long addr; + + addr = ((unsigned long) ptr) + ((nr ^ (__BITOPS_WORDSIZE - 8)) >> 3); + *(unsigned char *) addr &= ~(1 << (nr & 7)); +} + +#define clear_bit_simple(nr,addr) \ +(__builtin_constant_p((nr)) ? \ + __constant_clear_bit((nr),(addr)) : \ + __clear_bit((nr),(addr)) ) + +/* + * fast, non-SMP change_bit routine + */ +static inline void __change_bit(unsigned long nr, volatile unsigned long *ptr) +{ + unsigned long addr; + + addr = (unsigned long) ptr + ((nr ^ (__BITOPS_WORDSIZE - 8)) >> 3); + asm volatile( + " xc %O0(1,%R0),%1" + : "=Q" (*(char *) addr) : "Q" (_oi_bitmap[nr & 7]) : "cc" ); +} + +static inline void +__constant_change_bit(const unsigned long nr, volatile unsigned long *ptr) +{ + unsigned long addr; + + addr = ((unsigned long) ptr) + ((nr ^ (__BITOPS_WORDSIZE - 8)) >> 3); + *(unsigned char *) addr ^= 1 << (nr & 7); +} + +#define change_bit_simple(nr,addr) \ +(__builtin_constant_p((nr)) ? \ + __constant_change_bit((nr),(addr)) : \ + __change_bit((nr),(addr)) ) + +/* + * fast, non-SMP test_and_set_bit routine + */ +static inline int +test_and_set_bit_simple(unsigned long nr, volatile unsigned long *ptr) +{ + unsigned long addr; + unsigned char ch; + + addr = (unsigned long) ptr + ((nr ^ (__BITOPS_WORDSIZE - 8)) >> 3); + ch = *(unsigned char *) addr; + asm volatile( + " oc %O0(1,%R0),%1" + : "=Q" (*(char *) addr) : "Q" (_oi_bitmap[nr & 7]) + : "cc", "memory"); + return (ch >> (nr & 7)) & 1; +} +#define __test_and_set_bit(X,Y) test_and_set_bit_simple(X,Y) + +/* + * fast, non-SMP test_and_clear_bit routine + */ +static inline int +test_and_clear_bit_simple(unsigned long nr, volatile unsigned long *ptr) +{ + unsigned long addr; + unsigned char ch; + + addr = (unsigned long) ptr + ((nr ^ (__BITOPS_WORDSIZE - 8)) >> 3); + ch = *(unsigned char *) addr; + asm volatile( + " nc %O0(1,%R0),%1" + : "=Q" (*(char *) addr) : "Q" (_ni_bitmap[nr & 7]) + : "cc", "memory"); + return (ch >> (nr & 7)) & 1; +} +#define __test_and_clear_bit(X,Y) test_and_clear_bit_simple(X,Y) + +/* + * fast, non-SMP test_and_change_bit routine + */ +static inline int +test_and_change_bit_simple(unsigned long nr, volatile unsigned long *ptr) +{ + unsigned long addr; + unsigned char ch; + + addr = (unsigned long) ptr + ((nr ^ (__BITOPS_WORDSIZE - 8)) >> 3); + ch = *(unsigned char *) addr; + asm volatile( + " xc %O0(1,%R0),%1" + : "=Q" (*(char *) addr) : "Q" (_oi_bitmap[nr & 7]) + : "cc", "memory"); + return (ch >> (nr & 7)) & 1; +} +#define __test_and_change_bit(X,Y) test_and_change_bit_simple(X,Y) + +#ifdef CONFIG_SMP +#define set_bit set_bit_cs +#define clear_bit clear_bit_cs +#define change_bit change_bit_cs +#define test_and_set_bit test_and_set_bit_cs +#define test_and_clear_bit test_and_clear_bit_cs +#define test_and_change_bit test_and_change_bit_cs +#else +#define set_bit set_bit_simple +#define clear_bit clear_bit_simple +#define change_bit change_bit_simple +#define test_and_set_bit test_and_set_bit_simple +#define test_and_clear_bit test_and_clear_bit_simple +#define test_and_change_bit test_and_change_bit_simple +#endif + + +/* + * This routine doesn't need to be atomic. + */ + +static inline int __test_bit(unsigned long nr, const volatile unsigned long *ptr) +{ + unsigned long addr; + unsigned char ch; + + addr = (unsigned long) ptr + ((nr ^ (__BITOPS_WORDSIZE - 8)) >> 3); + ch = *(volatile unsigned char *) addr; + return (ch >> (nr & 7)) & 1; +} + +static inline int +__constant_test_bit(unsigned long nr, const volatile unsigned long *addr) { + return (((volatile char *) addr) + [(nr^(__BITOPS_WORDSIZE-8))>>3] & (1<<(nr&7))) != 0; +} + +#define test_bit(nr,addr) \ +(__builtin_constant_p((nr)) ? \ + __constant_test_bit((nr),(addr)) : \ + __test_bit((nr),(addr)) ) + +/* + * Optimized find bit helper functions. + */ + +/** + * __ffz_word_loop - find byte offset of first long != -1UL + * @addr: pointer to array of unsigned long + * @size: size of the array in bits + */ +static inline unsigned long __ffz_word_loop(const unsigned long *addr, + unsigned long size) +{ + typedef struct { long _[__BITOPS_WORDS(size)]; } addrtype; + unsigned long bytes = 0; + + asm volatile( +#ifndef __s390x__ + " ahi %1,-1\n" + " sra %1,5\n" + " jz 1f\n" + "0: c %2,0(%0,%3)\n" + " jne 1f\n" + " la %0,4(%0)\n" + " brct %1,0b\n" + "1:\n" +#else + " aghi %1,-1\n" + " srag %1,%1,6\n" + " jz 1f\n" + "0: cg %2,0(%0,%3)\n" + " jne 1f\n" + " la %0,8(%0)\n" + " brct %1,0b\n" + "1:\n" +#endif + : "+&a" (bytes), "+&d" (size) + : "d" (-1UL), "a" (addr), "m" (*(addrtype *) addr) + : "cc" ); + return bytes; +} + +/** + * __ffs_word_loop - find byte offset of first long != 0UL + * @addr: pointer to array of unsigned long + * @size: size of the array in bits + */ +static inline unsigned long __ffs_word_loop(const unsigned long *addr, + unsigned long size) +{ + typedef struct { long _[__BITOPS_WORDS(size)]; } addrtype; + unsigned long bytes = 0; + + asm volatile( +#ifndef __s390x__ + " ahi %1,-1\n" + " sra %1,5\n" + " jz 1f\n" + "0: c %2,0(%0,%3)\n" + " jne 1f\n" + " la %0,4(%0)\n" + " brct %1,0b\n" + "1:\n" +#else + " aghi %1,-1\n" + " srag %1,%1,6\n" + " jz 1f\n" + "0: cg %2,0(%0,%3)\n" + " jne 1f\n" + " la %0,8(%0)\n" + " brct %1,0b\n" + "1:\n" +#endif + : "+&a" (bytes), "+&a" (size) + : "d" (0UL), "a" (addr), "m" (*(addrtype *) addr) + : "cc" ); + return bytes; +} + +/** + * __ffz_word - add number of the first unset bit + * @nr: base value the bit number is added to + * @word: the word that is searched for unset bits + */ +static inline unsigned long __ffz_word(unsigned long nr, unsigned long word) +{ +#ifdef __s390x__ + if ((word & 0xffffffff) == 0xffffffff) { + word >>= 32; + nr += 32; + } +#endif + if ((word & 0xffff) == 0xffff) { + word >>= 16; + nr += 16; + } + if ((word & 0xff) == 0xff) { + word >>= 8; + nr += 8; + } + return nr + _zb_findmap[(unsigned char) word]; +} + +/** + * __ffs_word - add number of the first set bit + * @nr: base value the bit number is added to + * @word: the word that is searched for set bits + */ +static inline unsigned long __ffs_word(unsigned long nr, unsigned long word) +{ +#ifdef __s390x__ + if ((word & 0xffffffff) == 0) { + word >>= 32; + nr += 32; + } +#endif + if ((word & 0xffff) == 0) { + word >>= 16; + nr += 16; + } + if ((word & 0xff) == 0) { + word >>= 8; + nr += 8; + } + return nr + _sb_findmap[(unsigned char) word]; +} + + +/** + * __load_ulong_be - load big endian unsigned long + * @p: pointer to array of unsigned long + * @offset: byte offset of source value in the array + */ +static inline unsigned long __load_ulong_be(const unsigned long *p, + unsigned long offset) +{ + p = (unsigned long *)((unsigned long) p + offset); + return *p; +} + +/** + * __load_ulong_le - load little endian unsigned long + * @p: pointer to array of unsigned long + * @offset: byte offset of source value in the array + */ +static inline unsigned long __load_ulong_le(const unsigned long *p, + unsigned long offset) +{ + unsigned long word; + + p = (unsigned long *)((unsigned long) p + offset); +#ifndef __s390x__ + asm volatile( + " ic %0,%O1(%R1)\n" + " icm %0,2,%O1+1(%R1)\n" + " icm %0,4,%O1+2(%R1)\n" + " icm %0,8,%O1+3(%R1)" + : "=&d" (word) : "Q" (*p) : "cc"); +#else + asm volatile( + " lrvg %0,%1" + : "=d" (word) : "m" (*p) ); +#endif + return word; +} + +/* + * The various find bit functions. + */ + +/* + * ffz - find first zero in word. + * @word: The word to search + * + * Undefined if no zero exists, so code should check against ~0UL first. + */ +static inline unsigned long ffz(unsigned long word) +{ + return __ffz_word(0, word); +} + +/** + * __ffs - find first bit in word. + * @word: The word to search + * + * Undefined if no bit exists, so code should check against 0 first. + */ +static inline unsigned long __ffs (unsigned long word) +{ + return __ffs_word(0, word); +} + +/** + * ffs - find first bit set + * @x: the word to search + * + * This is defined the same way as + * the libc and compiler builtin ffs routines, therefore + * differs in spirit from the above ffz (man ffs). + */ +static inline int ffs(int x) +{ + if (!x) + return 0; + return __ffs_word(1, x); +} + +/** + * find_first_zero_bit - find the first zero bit in a memory region + * @addr: The address to start the search at + * @size: The maximum size to search + * + * Returns the bit-number of the first zero bit, not the number of the byte + * containing a bit. + */ +static inline unsigned long find_first_zero_bit(const unsigned long *addr, + unsigned long size) +{ + unsigned long bytes, bits; + + if (!size) + return 0; + bytes = __ffz_word_loop(addr, size); + bits = __ffz_word(bytes*8, __load_ulong_be(addr, bytes)); + return (bits < size) ? bits : size; +} +#define find_first_zero_bit find_first_zero_bit + +/** + * find_first_bit - find the first set bit in a memory region + * @addr: The address to start the search at + * @size: The maximum size to search + * + * Returns the bit-number of the first set bit, not the number of the byte + * containing a bit. + */ +static inline unsigned long find_first_bit(const unsigned long * addr, + unsigned long size) +{ + unsigned long bytes, bits; + + if (!size) + return 0; + bytes = __ffs_word_loop(addr, size); + bits = __ffs_word(bytes*8, __load_ulong_be(addr, bytes)); + return (bits < size) ? bits : size; +} +#define find_first_bit find_first_bit + +/** + * find_next_zero_bit - find the first zero bit in a memory region + * @addr: The address to base the search on + * @offset: The bitnumber to start searching at + * @size: The maximum size to search + */ +static inline int find_next_zero_bit (const unsigned long * addr, + unsigned long size, + unsigned long offset) +{ + const unsigned long *p; + unsigned long bit, set; + + if (offset >= size) + return size; + bit = offset & (__BITOPS_WORDSIZE - 1); + offset -= bit; + size -= offset; + p = addr + offset / __BITOPS_WORDSIZE; + if (bit) { + /* + * __ffz_word returns __BITOPS_WORDSIZE + * if no zero bit is present in the word. + */ + set = __ffz_word(bit, *p >> bit); + if (set >= size) + return size + offset; + if (set < __BITOPS_WORDSIZE) + return set + offset; + offset += __BITOPS_WORDSIZE; + size -= __BITOPS_WORDSIZE; + p++; + } + return offset + find_first_zero_bit(p, size); +} +#define find_next_zero_bit find_next_zero_bit + +/** + * find_next_bit - find the first set bit in a memory region + * @addr: The address to base the search on + * @offset: The bitnumber to start searching at + * @size: The maximum size to search + */ +static inline int find_next_bit (const unsigned long * addr, + unsigned long size, + unsigned long offset) +{ + const unsigned long *p; + unsigned long bit, set; + + if (offset >= size) + return size; + bit = offset & (__BITOPS_WORDSIZE - 1); + offset -= bit; + size -= offset; + p = addr + offset / __BITOPS_WORDSIZE; + if (bit) { + /* + * __ffs_word returns __BITOPS_WORDSIZE + * if no one bit is present in the word. + */ + set = __ffs_word(0, *p & (~0UL << bit)); + if (set >= size) + return size + offset; + if (set < __BITOPS_WORDSIZE) + return set + offset; + offset += __BITOPS_WORDSIZE; + size -= __BITOPS_WORDSIZE; + p++; + } + return offset + find_first_bit(p, size); +} +#define find_next_bit find_next_bit + +/* + * Every architecture must define this function. It's the fastest + * way of searching a 140-bit bitmap where the first 100 bits are + * unlikely to be set. It's guaranteed that at least one of the 140 + * bits is cleared. + */ +static inline int sched_find_first_bit(unsigned long *b) +{ + return find_first_bit(b, 140); +} + +#include <asm-generic/bitops/fls.h> +#include <asm-generic/bitops/__fls.h> +#include <asm-generic/bitops/fls64.h> + +#include <asm-generic/bitops/hweight.h> +#include <asm-generic/bitops/lock.h> + +/* + * ATTENTION: intel byte ordering convention for ext2 and minix !! + * bit 0 is the LSB of addr; bit 31 is the MSB of addr; + * bit 32 is the LSB of (addr+4). + * That combined with the little endian byte order of Intel gives the + * following bit order in memory: + * 07 06 05 04 03 02 01 00 15 14 13 12 11 10 09 08 \ + * 23 22 21 20 19 18 17 16 31 30 29 28 27 26 25 24 + */ + +static inline int find_first_zero_bit_le(void *vaddr, unsigned int size) +{ + unsigned long bytes, bits; + + if (!size) + return 0; + bytes = __ffz_word_loop(vaddr, size); + bits = __ffz_word(bytes*8, __load_ulong_le(vaddr, bytes)); + return (bits < size) ? bits : size; +} +#define find_first_zero_bit_le find_first_zero_bit_le + +static inline int find_next_zero_bit_le(void *vaddr, unsigned long size, + unsigned long offset) +{ + unsigned long *addr = vaddr, *p; + unsigned long bit, set; + + if (offset >= size) + return size; + bit = offset & (__BITOPS_WORDSIZE - 1); + offset -= bit; + size -= offset; + p = addr + offset / __BITOPS_WORDSIZE; + if (bit) { + /* + * s390 version of ffz returns __BITOPS_WORDSIZE + * if no zero bit is present in the word. + */ + set = __ffz_word(bit, __load_ulong_le(p, 0) >> bit); + if (set >= size) + return size + offset; + if (set < __BITOPS_WORDSIZE) + return set + offset; + offset += __BITOPS_WORDSIZE; + size -= __BITOPS_WORDSIZE; + p++; + } + return offset + find_first_zero_bit_le(p, size); +} +#define find_next_zero_bit_le find_next_zero_bit_le + +static inline unsigned long find_first_bit_le(void *vaddr, unsigned long size) +{ + unsigned long bytes, bits; + + if (!size) + return 0; + bytes = __ffs_word_loop(vaddr, size); + bits = __ffs_word(bytes*8, __load_ulong_le(vaddr, bytes)); + return (bits < size) ? bits : size; +} +#define find_first_bit_le find_first_bit_le + +static inline int find_next_bit_le(void *vaddr, unsigned long size, + unsigned long offset) +{ + unsigned long *addr = vaddr, *p; + unsigned long bit, set; + + if (offset >= size) + return size; + bit = offset & (__BITOPS_WORDSIZE - 1); + offset -= bit; + size -= offset; + p = addr + offset / __BITOPS_WORDSIZE; + if (bit) { + /* + * s390 version of ffz returns __BITOPS_WORDSIZE + * if no zero bit is present in the word. + */ + set = __ffs_word(0, __load_ulong_le(p, 0) & (~0UL << bit)); + if (set >= size) + return size + offset; + if (set < __BITOPS_WORDSIZE) + return set + offset; + offset += __BITOPS_WORDSIZE; + size -= __BITOPS_WORDSIZE; + p++; + } + return offset + find_first_bit_le(p, size); +} +#define find_next_bit_le find_next_bit_le + +#include <asm-generic/bitops/le.h> + +#include <asm-generic/bitops/ext2-atomic-setbit.h> + + +#endif /* __KERNEL__ */ + +#endif /* _S390_BITOPS_H */ diff --git a/arch/s390/include/asm/bitsperlong.h b/arch/s390/include/asm/bitsperlong.h new file mode 100644 index 00000000..6b235aea --- /dev/null +++ b/arch/s390/include/asm/bitsperlong.h @@ -0,0 +1,13 @@ +#ifndef __ASM_S390_BITSPERLONG_H +#define __ASM_S390_BITSPERLONG_H + +#ifndef __s390x__ +#define __BITS_PER_LONG 32 +#else +#define __BITS_PER_LONG 64 +#endif + +#include <asm-generic/bitsperlong.h> + +#endif /* __ASM_S390_BITSPERLONG_H */ + diff --git a/arch/s390/include/asm/bug.h b/arch/s390/include/asm/bug.h new file mode 100644 index 00000000..bf90d1fd --- /dev/null +++ b/arch/s390/include/asm/bug.h @@ -0,0 +1,71 @@ +#ifndef _ASM_S390_BUG_H +#define _ASM_S390_BUG_H + +#include <linux/kernel.h> + +#ifdef CONFIG_BUG + +#ifdef CONFIG_DEBUG_BUGVERBOSE + +#define __EMIT_BUG(x) do { \ + asm volatile( \ + "0: j 0b+2\n" \ + "1:\n" \ + ".section .rodata.str,\"aMS\",@progbits,1\n" \ + "2: .asciz \""__FILE__"\"\n" \ + ".previous\n" \ + ".section __bug_table,\"a\"\n" \ + "3: .long 1b-3b,2b-3b\n" \ + " .short %0,%1\n" \ + " .org 3b+%2\n" \ + ".previous\n" \ + : : "i" (__LINE__), \ + "i" (x), \ + "i" (sizeof(struct bug_entry))); \ +} while (0) + +#else /* CONFIG_DEBUG_BUGVERBOSE */ + +#define __EMIT_BUG(x) do { \ + asm volatile( \ + "0: j 0b+2\n" \ + "1:\n" \ + ".section __bug_table,\"a\"\n" \ + "2: .long 1b-2b\n" \ + " .short %0\n" \ + " .org 2b+%1\n" \ + ".previous\n" \ + : : "i" (x), \ + "i" (sizeof(struct bug_entry))); \ +} while (0) + +#endif /* CONFIG_DEBUG_BUGVERBOSE */ + +#define BUG() do { \ + __EMIT_BUG(0); \ + unreachable(); \ +} while (0) + +#define __WARN_TAINT(taint) do { \ + __EMIT_BUG(BUGFLAG_TAINT(taint)); \ +} while (0) + +#define WARN_ON(x) ({ \ + int __ret_warn_on = !!(x); \ + if (__builtin_constant_p(__ret_warn_on)) { \ + if (__ret_warn_on) \ + __WARN(); \ + } else { \ + if (unlikely(__ret_warn_on)) \ + __WARN(); \ + } \ + unlikely(__ret_warn_on); \ +}) + +#define HAVE_ARCH_BUG +#define HAVE_ARCH_WARN_ON +#endif /* CONFIG_BUG */ + +#include <asm-generic/bug.h> + +#endif /* _ASM_S390_BUG_H */ diff --git a/arch/s390/include/asm/bugs.h b/arch/s390/include/asm/bugs.h new file mode 100644 index 00000000..011f1e6a --- /dev/null +++ b/arch/s390/include/asm/bugs.h @@ -0,0 +1,22 @@ +/* + * include/asm-s390/bugs.h + * + * S390 version + * Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) + * + * Derived from "include/asm-i386/bugs.h" + * Copyright (C) 1994 Linus Torvalds + */ + +/* + * This is included by init/main.c to check for architecture-dependent bugs. + * + * Needs: + * void check_bugs(void); + */ + +static inline void check_bugs(void) +{ + /* s390 has no bugs ... */ +} diff --git a/arch/s390/include/asm/byteorder.h b/arch/s390/include/asm/byteorder.h new file mode 100644 index 00000000..a332e59e --- /dev/null +++ b/arch/s390/include/asm/byteorder.h @@ -0,0 +1,6 @@ +#ifndef _S390_BYTEORDER_H +#define _S390_BYTEORDER_H + +#include <linux/byteorder/big_endian.h> + +#endif /* _S390_BYTEORDER_H */ diff --git a/arch/s390/include/asm/cache.h b/arch/s390/include/asm/cache.h new file mode 100644 index 00000000..2a30d5ac --- /dev/null +++ b/arch/s390/include/asm/cache.h @@ -0,0 +1,20 @@ +/* + * include/asm-s390/cache.h + * + * S390 version + * Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation + * + * Derived from "include/asm-i386/cache.h" + * Copyright (C) 1992, Linus Torvalds + */ + +#ifndef __ARCH_S390_CACHE_H +#define __ARCH_S390_CACHE_H + +#define L1_CACHE_BYTES 256 +#define L1_CACHE_SHIFT 8 +#define NET_SKB_PAD 32 + +#define __read_mostly __attribute__((__section__(".data..read_mostly"))) + +#endif diff --git a/arch/s390/include/asm/cacheflush.h b/arch/s390/include/asm/cacheflush.h new file mode 100644 index 00000000..3e20383d --- /dev/null +++ b/arch/s390/include/asm/cacheflush.h @@ -0,0 +1,16 @@ +#ifndef _S390_CACHEFLUSH_H +#define _S390_CACHEFLUSH_H + +/* Caches aren't brain-dead on the s390. */ +#include <asm-generic/cacheflush.h> + +#ifdef CONFIG_DEBUG_PAGEALLOC +void kernel_map_pages(struct page *page, int numpages, int enable); +#endif + +int set_memory_ro(unsigned long addr, int numpages); +int set_memory_rw(unsigned long addr, int numpages); +int set_memory_nx(unsigned long addr, int numpages); +int set_memory_x(unsigned long addr, int numpages); + +#endif /* _S390_CACHEFLUSH_H */ diff --git a/arch/s390/include/asm/ccwdev.h b/arch/s390/include/asm/ccwdev.h new file mode 100644 index 00000000..9381c92c --- /dev/null +++ b/arch/s390/include/asm/ccwdev.h @@ -0,0 +1,230 @@ +/* + * Copyright IBM Corp. 2002, 2009 + * + * Author(s): Arnd Bergmann <arndb@de.ibm.com> + * + * Interface for CCW device drivers + */ +#ifndef _S390_CCWDEV_H_ +#define _S390_CCWDEV_H_ + +#include <linux/device.h> +#include <linux/mod_devicetable.h> +#include <asm/fcx.h> +#include <asm/irq.h> + +/* structs from asm/cio.h */ +struct irb; +struct ccw1; +struct ccw_dev_id; + +/* simplified initializers for struct ccw_device: + * CCW_DEVICE and CCW_DEVICE_DEVTYPE initialize one + * entry in your MODULE_DEVICE_TABLE and set the match_flag correctly */ +#define CCW_DEVICE(cu, cum) \ + .cu_type=(cu), .cu_model=(cum), \ + .match_flags=(CCW_DEVICE_ID_MATCH_CU_TYPE \ + | (cum ? CCW_DEVICE_ID_MATCH_CU_MODEL : 0)) + +#define CCW_DEVICE_DEVTYPE(cu, cum, dev, devm) \ + .cu_type=(cu), .cu_model=(cum), .dev_type=(dev), .dev_model=(devm),\ + .match_flags=CCW_DEVICE_ID_MATCH_CU_TYPE \ + | ((cum) ? CCW_DEVICE_ID_MATCH_CU_MODEL : 0) \ + | CCW_DEVICE_ID_MATCH_DEVICE_TYPE \ + | ((devm) ? CCW_DEVICE_ID_MATCH_DEVICE_MODEL : 0) + +/* scan through an array of device ids and return the first + * entry that matches the device. + * + * the array must end with an entry containing zero match_flags + */ +static inline const struct ccw_device_id * +ccw_device_id_match(const struct ccw_device_id *array, + const struct ccw_device_id *match) +{ + const struct ccw_device_id *id = array; + + for (id = array; id->match_flags; id++) { + if ((id->match_flags & CCW_DEVICE_ID_MATCH_CU_TYPE) + && (id->cu_type != match->cu_type)) + continue; + + if ((id->match_flags & CCW_DEVICE_ID_MATCH_CU_MODEL) + && (id->cu_model != match->cu_model)) + continue; + + if ((id->match_flags & CCW_DEVICE_ID_MATCH_DEVICE_TYPE) + && (id->dev_type != match->dev_type)) + continue; + + if ((id->match_flags & CCW_DEVICE_ID_MATCH_DEVICE_MODEL) + && (id->dev_model != match->dev_model)) + continue; + + return id; + } + + return NULL; +} + +/** + * struct ccw_device - channel attached device + * @ccwlock: pointer to device lock + * @id: id of this device + * @drv: ccw driver for this device + * @dev: embedded device structure + * @online: online status of device + * @handler: interrupt handler + * + * @handler is a member of the device rather than the driver since a driver + * can have different interrupt handlers for different ccw devices + * (multi-subchannel drivers). + */ +struct ccw_device { + spinlock_t *ccwlock; +/* private: */ + struct ccw_device_private *private; /* cio private information */ +/* public: */ + struct ccw_device_id id; + struct ccw_driver *drv; + struct device dev; + int online; + void (*handler) (struct ccw_device *, unsigned long, struct irb *); +}; + +/* + * Possible events used by the path_event notifier. + */ +#define PE_NONE 0x0 +#define PE_PATH_GONE 0x1 /* A path is no longer available. */ +#define PE_PATH_AVAILABLE 0x2 /* A path has become available and + was successfully verified. */ +#define PE_PATHGROUP_ESTABLISHED 0x4 /* A pathgroup was reset and had + to be established again. */ + +/* + * Possible CIO actions triggered by the unit check handler. + */ +enum uc_todo { + UC_TODO_RETRY, + UC_TODO_RETRY_ON_NEW_PATH, + UC_TODO_STOP +}; + +/** + * struct ccw driver - device driver for channel attached devices + * @ids: ids supported by this driver + * @probe: function called on probe + * @remove: function called on remove + * @set_online: called when setting device online + * @set_offline: called when setting device offline + * @notify: notify driver of device state changes + * @path_event: notify driver of channel path events + * @shutdown: called at device shutdown + * @prepare: prepare for pm state transition + * @complete: undo work done in @prepare + * @freeze: callback for freezing during hibernation snapshotting + * @thaw: undo work done in @freeze + * @restore: callback for restoring after hibernation + * @uc_handler: callback for unit check handler + * @driver: embedded device driver structure + * @int_class: interruption class to use for accounting interrupts + */ +struct ccw_driver { + struct ccw_device_id *ids; + int (*probe) (struct ccw_device *); + void (*remove) (struct ccw_device *); + int (*set_online) (struct ccw_device *); + int (*set_offline) (struct ccw_device *); + int (*notify) (struct ccw_device *, int); + void (*path_event) (struct ccw_device *, int *); + void (*shutdown) (struct ccw_device *); + int (*prepare) (struct ccw_device *); + void (*complete) (struct ccw_device *); + int (*freeze)(struct ccw_device *); + int (*thaw) (struct ccw_device *); + int (*restore)(struct ccw_device *); + enum uc_todo (*uc_handler) (struct ccw_device *, struct irb *); + struct device_driver driver; + enum interruption_class int_class; +}; + +extern struct ccw_device *get_ccwdev_by_busid(struct ccw_driver *cdrv, + const char *bus_id); + +/* devices drivers call these during module load and unload. + * When a driver is registered, its probe method is called + * when new devices for its type pop up */ +extern int ccw_driver_register (struct ccw_driver *driver); +extern void ccw_driver_unregister (struct ccw_driver *driver); + +struct ccw1; + +extern int ccw_device_set_options_mask(struct ccw_device *, unsigned long); +extern int ccw_device_set_options(struct ccw_device *, unsigned long); +extern void ccw_device_clear_options(struct ccw_device *, unsigned long); +int ccw_device_is_pathgroup(struct ccw_device *cdev); +int ccw_device_is_multipath(struct ccw_device *cdev); + +/* Allow for i/o completion notification after primary interrupt status. */ +#define CCWDEV_EARLY_NOTIFICATION 0x0001 +/* Report all interrupt conditions. */ +#define CCWDEV_REPORT_ALL 0x0002 +/* Try to perform path grouping. */ +#define CCWDEV_DO_PATHGROUP 0x0004 +/* Allow forced onlining of boxed devices. */ +#define CCWDEV_ALLOW_FORCE 0x0008 +/* Try to use multipath mode. */ +#define CCWDEV_DO_MULTIPATH 0x0010 + +extern int ccw_device_start(struct ccw_device *, struct ccw1 *, + unsigned long, __u8, unsigned long); +extern int ccw_device_start_timeout(struct ccw_device *, struct ccw1 *, + unsigned long, __u8, unsigned long, int); +extern int ccw_device_start_key(struct ccw_device *, struct ccw1 *, + unsigned long, __u8, __u8, unsigned long); +extern int ccw_device_start_timeout_key(struct ccw_device *, struct ccw1 *, + unsigned long, __u8, __u8, + unsigned long, int); + + +extern int ccw_device_resume(struct ccw_device *); +extern int ccw_device_halt(struct ccw_device *, unsigned long); +extern int ccw_device_clear(struct ccw_device *, unsigned long); +int ccw_device_tm_start_key(struct ccw_device *cdev, struct tcw *tcw, + unsigned long intparm, u8 lpm, u8 key); +int ccw_device_tm_start_key(struct ccw_device *, struct tcw *, + unsigned long, u8, u8); +int ccw_device_tm_start_timeout_key(struct ccw_device *, struct tcw *, + unsigned long, u8, u8, int); +int ccw_device_tm_start(struct ccw_device *, struct tcw *, + unsigned long, u8); +int ccw_device_tm_start_timeout(struct ccw_device *, struct tcw *, + unsigned long, u8, int); +int ccw_device_tm_intrg(struct ccw_device *cdev); + +int ccw_device_get_mdc(struct ccw_device *cdev, u8 mask); + +extern int ccw_device_set_online(struct ccw_device *cdev); +extern int ccw_device_set_offline(struct ccw_device *cdev); + + +extern struct ciw *ccw_device_get_ciw(struct ccw_device *, __u32 cmd); +extern __u8 ccw_device_get_path_mask(struct ccw_device *); +extern void ccw_device_get_id(struct ccw_device *, struct ccw_dev_id *); + +#define get_ccwdev_lock(x) (x)->ccwlock + +#define to_ccwdev(n) container_of(n, struct ccw_device, dev) +#define to_ccwdrv(n) container_of(n, struct ccw_driver, driver) + +extern struct ccw_device *ccw_device_probe_console(void); +extern int ccw_device_force_console(void); + +int ccw_device_siosl(struct ccw_device *); + +// FIXME: these have to go +extern int _ccw_device_get_subchannel_number(struct ccw_device *); + +extern void *ccw_device_get_chp_desc(struct ccw_device *, int); +#endif /* _S390_CCWDEV_H_ */ diff --git a/arch/s390/include/asm/ccwgroup.h b/arch/s390/include/asm/ccwgroup.h new file mode 100644 index 00000000..f2ea2c56 --- /dev/null +++ b/arch/s390/include/asm/ccwgroup.h @@ -0,0 +1,75 @@ +#ifndef S390_CCWGROUP_H +#define S390_CCWGROUP_H + +struct ccw_device; +struct ccw_driver; + +/** + * struct ccwgroup_device - ccw group device + * @creator_id: unique number of the driver + * @state: online/offline state + * @count: number of attached slave devices + * @dev: embedded device structure + * @cdev: variable number of slave devices, allocated as needed + */ +struct ccwgroup_device { + unsigned long creator_id; + enum { + CCWGROUP_OFFLINE, + CCWGROUP_ONLINE, + } state; +/* private: */ + atomic_t onoff; + struct mutex reg_mutex; +/* public: */ + unsigned int count; + struct device dev; + struct ccw_device *cdev[0]; +}; + +/** + * struct ccwgroup_driver - driver for ccw group devices + * @max_slaves: maximum number of slave devices + * @driver_id: unique id + * @probe: function called on probe + * @remove: function called on remove + * @set_online: function called when device is set online + * @set_offline: function called when device is set offline + * @shutdown: function called when device is shut down + * @prepare: prepare for pm state transition + * @complete: undo work done in @prepare + * @freeze: callback for freezing during hibernation snapshotting + * @thaw: undo work done in @freeze + * @restore: callback for restoring after hibernation + * @driver: embedded driver structure + */ +struct ccwgroup_driver { + int max_slaves; + unsigned long driver_id; + + int (*probe) (struct ccwgroup_device *); + void (*remove) (struct ccwgroup_device *); + int (*set_online) (struct ccwgroup_device *); + int (*set_offline) (struct ccwgroup_device *); + void (*shutdown)(struct ccwgroup_device *); + int (*prepare) (struct ccwgroup_device *); + void (*complete) (struct ccwgroup_device *); + int (*freeze)(struct ccwgroup_device *); + int (*thaw) (struct ccwgroup_device *); + int (*restore)(struct ccwgroup_device *); + + struct device_driver driver; +}; + +extern int ccwgroup_driver_register (struct ccwgroup_driver *cdriver); +extern void ccwgroup_driver_unregister (struct ccwgroup_driver *cdriver); +int ccwgroup_create_from_string(struct device *root, unsigned int creator_id, + struct ccw_driver *cdrv, int num_devices, + const char *buf); + +extern int ccwgroup_probe_ccwdev(struct ccw_device *cdev); +extern void ccwgroup_remove_ccwdev(struct ccw_device *cdev); + +#define to_ccwgroupdev(x) container_of((x), struct ccwgroup_device, dev) +#define to_ccwgroupdrv(x) container_of((x), struct ccwgroup_driver, driver) +#endif diff --git a/arch/s390/include/asm/checksum.h b/arch/s390/include/asm/checksum.h new file mode 100644 index 00000000..6c00f680 --- /dev/null +++ b/arch/s390/include/asm/checksum.h @@ -0,0 +1,149 @@ +#ifndef _S390_CHECKSUM_H +#define _S390_CHECKSUM_H + +/* + * include/asm-s390/checksum.h + * S390 fast network checksum routines + * see also arch/S390/lib/checksum.c + * + * S390 version + * Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Author(s): Ulrich Hild (first version) + * Martin Schwidefsky (heavily optimized CKSM version) + * D.J. Barrow (third attempt) + */ + +#include <asm/uaccess.h> + +/* + * computes the checksum of a memory block at buff, length len, + * and adds in "sum" (32-bit) + * + * returns a 32-bit number suitable for feeding into itself + * or csum_tcpudp_magic + * + * this function must be called with even lengths, except + * for the last fragment, which may be odd + * + * it's best to have buff aligned on a 32-bit boundary + */ +static inline __wsum +csum_partial(const void *buff, int len, __wsum sum) +{ + register unsigned long reg2 asm("2") = (unsigned long) buff; + register unsigned long reg3 asm("3") = (unsigned long) len; + + asm volatile( + "0: cksm %0,%1\n" /* do checksum on longs */ + " jo 0b\n" + : "+d" (sum), "+d" (reg2), "+d" (reg3) : : "cc", "memory"); + return sum; +} + +/* + * the same as csum_partial_copy, but copies from user space. + * + * here even more important to align src and dst on a 32-bit (or even + * better 64-bit) boundary + * + * Copy from userspace and compute checksum. If we catch an exception + * then zero the rest of the buffer. + */ +static inline __wsum +csum_partial_copy_from_user(const void __user *src, void *dst, + int len, __wsum sum, + int *err_ptr) +{ + int missing; + + missing = copy_from_user(dst, src, len); + if (missing) { + memset(dst + len - missing, 0, missing); + *err_ptr = -EFAULT; + } + + return csum_partial(dst, len, sum); +} + + +static inline __wsum +csum_partial_copy_nocheck (const void *src, void *dst, int len, __wsum sum) +{ + memcpy(dst,src,len); + return csum_partial(dst, len, sum); +} + +/* + * Fold a partial checksum without adding pseudo headers + */ +static inline __sum16 csum_fold(__wsum sum) +{ + u32 csum = (__force u32) sum; + + csum += (csum >> 16) + (csum << 16); + csum >>= 16; + return (__force __sum16) ~csum; +} + +/* + * This is a version of ip_compute_csum() optimized for IP headers, + * which always checksum on 4 octet boundaries. + * + */ +static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl) +{ + return csum_fold(csum_partial(iph, ihl*4, 0)); +} + +/* + * computes the checksum of the TCP/UDP pseudo-header + * returns a 32-bit checksum + */ +static inline __wsum +csum_tcpudp_nofold(__be32 saddr, __be32 daddr, + unsigned short len, unsigned short proto, + __wsum sum) +{ + __u32 csum = (__force __u32)sum; + + csum += (__force __u32)saddr; + if (csum < (__force __u32)saddr) + csum++; + + csum += (__force __u32)daddr; + if (csum < (__force __u32)daddr) + csum++; + + csum += len + proto; + if (csum < len + proto) + csum++; + + return (__force __wsum)csum; +} + +/* + * computes the checksum of the TCP/UDP pseudo-header + * returns a 16-bit checksum, already complemented + */ + +static inline __sum16 +csum_tcpudp_magic(__be32 saddr, __be32 daddr, + unsigned short len, unsigned short proto, + __wsum sum) +{ + return csum_fold(csum_tcpudp_nofold(saddr,daddr,len,proto,sum)); +} + +/* + * this routine is used for miscellaneous IP-like checksums, mainly + * in icmp.c + */ + +static inline __sum16 ip_compute_csum(const void *buff, int len) +{ + return csum_fold(csum_partial(buff, len, 0)); +} + +#endif /* _S390_CHECKSUM_H */ + + diff --git a/arch/s390/include/asm/chpid.h b/arch/s390/include/asm/chpid.h new file mode 100644 index 00000000..8e88e222 --- /dev/null +++ b/arch/s390/include/asm/chpid.h @@ -0,0 +1,56 @@ +/* + * drivers/s390/cio/chpid.h + * + * Copyright IBM Corp. 2007 + * Author(s): Peter Oberparleiter <peter.oberparleiter@de.ibm.com> + */ + +#ifndef _ASM_S390_CHPID_H +#define _ASM_S390_CHPID_H + +#include <linux/string.h> +#include <linux/types.h> + +#define __MAX_CHPID 255 + +struct chp_id { + u8 reserved1; + u8 cssid; + u8 reserved2; + u8 id; +} __attribute__((packed)); + +#ifdef __KERNEL__ +#include <asm/cio.h> + +static inline void chp_id_init(struct chp_id *chpid) +{ + memset(chpid, 0, sizeof(struct chp_id)); +} + +static inline int chp_id_is_equal(struct chp_id *a, struct chp_id *b) +{ + return (a->id == b->id) && (a->cssid == b->cssid); +} + +static inline void chp_id_next(struct chp_id *chpid) +{ + if (chpid->id < __MAX_CHPID) + chpid->id++; + else { + chpid->id = 0; + chpid->cssid++; + } +} + +static inline int chp_id_is_valid(struct chp_id *chpid) +{ + return (chpid->cssid <= __MAX_CSSID); +} + + +#define chp_id_for_each(c) \ + for (chp_id_init(c); chp_id_is_valid(c); chp_id_next(c)) +#endif /* __KERNEL */ + +#endif /* _ASM_S390_CHPID_H */ diff --git a/arch/s390/include/asm/chsc.h b/arch/s390/include/asm/chsc.h new file mode 100644 index 00000000..4943654e --- /dev/null +++ b/arch/s390/include/asm/chsc.h @@ -0,0 +1,156 @@ +/* + * ioctl interface for /dev/chsc + * + * Copyright 2008 IBM Corp. + * Author(s): Cornelia Huck <cornelia.huck@de.ibm.com> + */ + +#ifndef _ASM_CHSC_H +#define _ASM_CHSC_H + +#include <linux/types.h> +#include <asm/chpid.h> +#include <asm/schid.h> + +struct chsc_async_header { + __u16 length; + __u16 code; + __u32 cmd_dependend; + __u32 key : 4; + __u32 : 28; + struct subchannel_id sid; +} __attribute__ ((packed)); + +struct chsc_async_area { + struct chsc_async_header header; + __u8 data[PAGE_SIZE - 16 /* size of chsc_async_header */]; +} __attribute__ ((packed)); + + +struct chsc_response_struct { + __u16 length; + __u16 code; + __u32 parms; + __u8 data[PAGE_SIZE - 8]; +} __attribute__ ((packed)); + +struct chsc_chp_cd { + struct chp_id chpid; + int m; + int fmt; + struct chsc_response_struct cpcb; +}; + +struct chsc_cu_cd { + __u16 cun; + __u8 cssid; + int m; + int fmt; + struct chsc_response_struct cucb; +}; + +struct chsc_sch_cud { + struct subchannel_id schid; + int fmt; + struct chsc_response_struct scub; +}; + +struct conf_id { + int m; + __u8 cssid; + __u8 ssid; +}; + +struct chsc_conf_info { + struct conf_id id; + int fmt; + struct chsc_response_struct scid; +}; + +struct ccl_parm_chpid { + int m; + struct chp_id chp; +}; + +struct ccl_parm_cssids { + __u8 f_cssid; + __u8 l_cssid; +}; + +struct chsc_comp_list { + struct { + enum { + CCL_CU_ON_CHP = 1, + CCL_CHP_TYPE_CAP = 2, + CCL_CSS_IMG = 4, + CCL_CSS_IMG_CONF_CHAR = 5, + CCL_IOP_CHP = 6, + } ctype; + int fmt; + struct ccl_parm_chpid chpid; + struct ccl_parm_cssids cssids; + } req; + struct chsc_response_struct sccl; +}; + +struct chsc_dcal { + struct { + enum { + DCAL_CSS_IID_PN = 4, + } atype; + __u32 list_parm[2]; + int fmt; + } req; + struct chsc_response_struct sdcal; +}; + +struct chsc_cpd_info { + struct chp_id chpid; + int m; + int fmt; + int rfmt; + int c; + struct chsc_response_struct chpdb; +}; + +#define CHSC_IOCTL_MAGIC 'c' + +#define CHSC_START _IOWR(CHSC_IOCTL_MAGIC, 0x81, struct chsc_async_area) +#define CHSC_INFO_CHANNEL_PATH _IOWR(CHSC_IOCTL_MAGIC, 0x82, \ + struct chsc_chp_cd) +#define CHSC_INFO_CU _IOWR(CHSC_IOCTL_MAGIC, 0x83, struct chsc_cu_cd) +#define CHSC_INFO_SCH_CU _IOWR(CHSC_IOCTL_MAGIC, 0x84, struct chsc_sch_cud) +#define CHSC_INFO_CI _IOWR(CHSC_IOCTL_MAGIC, 0x85, struct chsc_conf_info) +#define CHSC_INFO_CCL _IOWR(CHSC_IOCTL_MAGIC, 0x86, struct chsc_comp_list) +#define CHSC_INFO_CPD _IOWR(CHSC_IOCTL_MAGIC, 0x87, struct chsc_cpd_info) +#define CHSC_INFO_DCAL _IOWR(CHSC_IOCTL_MAGIC, 0x88, struct chsc_dcal) + +#ifdef __KERNEL__ + +struct css_general_char { + u64 : 12; + u32 dynio : 1; /* bit 12 */ + u32 : 28; + u32 aif : 1; /* bit 41 */ + u32 : 3; + u32 mcss : 1; /* bit 45 */ + u32 fcs : 1; /* bit 46 */ + u32 : 1; + u32 ext_mb : 1; /* bit 48 */ + u32 : 7; + u32 aif_tdd : 1; /* bit 56 */ + u32 : 1; + u32 qebsm : 1; /* bit 58 */ + u32 : 8; + u32 aif_osa : 1; /* bit 67 */ + u32 : 14; + u32 cib : 1; /* bit 82 */ + u32 : 5; + u32 fcx : 1; /* bit 88 */ + u32 : 7; +}__attribute__((packed)); + +extern struct css_general_char css_general_characteristics; + +#endif /* __KERNEL__ */ +#endif diff --git a/arch/s390/include/asm/cio.h b/arch/s390/include/asm/cio.h new file mode 100644 index 00000000..fc50a334 --- /dev/null +++ b/arch/s390/include/asm/cio.h @@ -0,0 +1,295 @@ +/* + * include/asm-s390/cio.h + * include/asm-s390x/cio.h + * + * Common interface for I/O on S/390 + */ +#ifndef _ASM_S390_CIO_H_ +#define _ASM_S390_CIO_H_ + +#include <linux/spinlock.h> +#include <asm/types.h> + +#ifdef __KERNEL__ + +#define LPM_ANYPATH 0xff +#define __MAX_CSSID 0 + +#include <asm/scsw.h> + +/** + * struct ccw1 - channel command word + * @cmd_code: command code + * @flags: flags, like IDA addressing, etc. + * @count: byte count + * @cda: data address + * + * The ccw is the basic structure to build channel programs that perform + * operations with the device or the control unit. Only Format-1 channel + * command words are supported. + */ +struct ccw1 { + __u8 cmd_code; + __u8 flags; + __u16 count; + __u32 cda; +} __attribute__ ((packed,aligned(8))); + +#define CCW_FLAG_DC 0x80 +#define CCW_FLAG_CC 0x40 +#define CCW_FLAG_SLI 0x20 +#define CCW_FLAG_SKIP 0x10 +#define CCW_FLAG_PCI 0x08 +#define CCW_FLAG_IDA 0x04 +#define CCW_FLAG_SUSPEND 0x02 + +#define CCW_CMD_READ_IPL 0x02 +#define CCW_CMD_NOOP 0x03 +#define CCW_CMD_BASIC_SENSE 0x04 +#define CCW_CMD_TIC 0x08 +#define CCW_CMD_STLCK 0x14 +#define CCW_CMD_SENSE_PGID 0x34 +#define CCW_CMD_SUSPEND_RECONN 0x5B +#define CCW_CMD_RDC 0x64 +#define CCW_CMD_RELEASE 0x94 +#define CCW_CMD_SET_PGID 0xAF +#define CCW_CMD_SENSE_ID 0xE4 +#define CCW_CMD_DCTL 0xF3 + +#define SENSE_MAX_COUNT 0x20 + +/** + * struct erw - extended report word + * @res0: reserved + * @auth: authorization check + * @pvrf: path-verification-required flag + * @cpt: channel-path timeout + * @fsavf: failing storage address validity flag + * @cons: concurrent sense + * @scavf: secondary ccw address validity flag + * @fsaf: failing storage address format + * @scnt: sense count, if @cons == %1 + * @res16: reserved + */ +struct erw { + __u32 res0 : 3; + __u32 auth : 1; + __u32 pvrf : 1; + __u32 cpt : 1; + __u32 fsavf : 1; + __u32 cons : 1; + __u32 scavf : 1; + __u32 fsaf : 1; + __u32 scnt : 6; + __u32 res16 : 16; +} __attribute__ ((packed)); + +/** + * struct sublog - subchannel logout area + * @res0: reserved + * @esf: extended status flags + * @lpum: last path used mask + * @arep: ancillary report + * @fvf: field-validity flags + * @sacc: storage access code + * @termc: termination code + * @devsc: device-status check + * @serr: secondary error + * @ioerr: i/o-error alert + * @seqc: sequence code + */ +struct sublog { + __u32 res0 : 1; + __u32 esf : 7; + __u32 lpum : 8; + __u32 arep : 1; + __u32 fvf : 5; + __u32 sacc : 2; + __u32 termc : 2; + __u32 devsc : 1; + __u32 serr : 1; + __u32 ioerr : 1; + __u32 seqc : 3; +} __attribute__ ((packed)); + +/** + * struct esw0 - Format 0 Extended Status Word (ESW) + * @sublog: subchannel logout + * @erw: extended report word + * @faddr: failing storage address + * @saddr: secondary ccw address + */ +struct esw0 { + struct sublog sublog; + struct erw erw; + __u32 faddr[2]; + __u32 saddr; +} __attribute__ ((packed)); + +/** + * struct esw1 - Format 1 Extended Status Word (ESW) + * @zero0: reserved zeros + * @lpum: last path used mask + * @zero16: reserved zeros + * @erw: extended report word + * @zeros: three fullwords of zeros + */ +struct esw1 { + __u8 zero0; + __u8 lpum; + __u16 zero16; + struct erw erw; + __u32 zeros[3]; +} __attribute__ ((packed)); + +/** + * struct esw2 - Format 2 Extended Status Word (ESW) + * @zero0: reserved zeros + * @lpum: last path used mask + * @dcti: device-connect-time interval + * @erw: extended report word + * @zeros: three fullwords of zeros + */ +struct esw2 { + __u8 zero0; + __u8 lpum; + __u16 dcti; + struct erw erw; + __u32 zeros[3]; +} __attribute__ ((packed)); + +/** + * struct esw3 - Format 3 Extended Status Word (ESW) + * @zero0: reserved zeros + * @lpum: last path used mask + * @res: reserved + * @erw: extended report word + * @zeros: three fullwords of zeros + */ +struct esw3 { + __u8 zero0; + __u8 lpum; + __u16 res; + struct erw erw; + __u32 zeros[3]; +} __attribute__ ((packed)); + +/** + * struct irb - interruption response block + * @scsw: subchannel status word + * @esw: extened status word, 4 formats + * @ecw: extended control word + * + * The irb that is handed to the device driver when an interrupt occurs. For + * solicited interrupts, the common I/O layer already performs checks whether + * a field is valid; a field not being valid is always passed as %0. + * If a unit check occurred, @ecw may contain sense data; this is retrieved + * by the common I/O layer itself if the device doesn't support concurrent + * sense (so that the device driver never needs to perform basic sene itself). + * For unsolicited interrupts, the irb is passed as-is (expect for sense data, + * if applicable). + */ +struct irb { + union scsw scsw; + union { + struct esw0 esw0; + struct esw1 esw1; + struct esw2 esw2; + struct esw3 esw3; + } esw; + __u8 ecw[32]; +} __attribute__ ((packed,aligned(4))); + +/** + * struct ciw - command information word (CIW) layout + * @et: entry type + * @reserved: reserved bits + * @ct: command type + * @cmd: command code + * @count: command count + */ +struct ciw { + __u32 et : 2; + __u32 reserved : 2; + __u32 ct : 4; + __u32 cmd : 8; + __u32 count : 16; +} __attribute__ ((packed)); + +#define CIW_TYPE_RCD 0x0 /* read configuration data */ +#define CIW_TYPE_SII 0x1 /* set interface identifier */ +#define CIW_TYPE_RNI 0x2 /* read node identifier */ + +/* + * Flags used as input parameters for do_IO() + */ +#define DOIO_ALLOW_SUSPEND 0x0001 /* allow for channel prog. suspend */ +#define DOIO_DENY_PREFETCH 0x0002 /* don't allow for CCW prefetch */ +#define DOIO_SUPPRESS_INTER 0x0004 /* suppress intermediate inter. */ + /* ... for suspended CCWs */ +/* Device or subchannel gone. */ +#define CIO_GONE 0x0001 +/* No path to device. */ +#define CIO_NO_PATH 0x0002 +/* Device has appeared. */ +#define CIO_OPER 0x0004 +/* Sick revalidation of device. */ +#define CIO_REVALIDATE 0x0008 +/* Device did not respond in time. */ +#define CIO_BOXED 0x0010 + +/** + * struct ccw_dev_id - unique identifier for ccw devices + * @ssid: subchannel set id + * @devno: device number + * + * This structure is not directly based on any hardware structure. The + * hardware identifies a device by its device number and its subchannel, + * which is in turn identified by its id. In order to get a unique identifier + * for ccw devices across subchannel sets, @struct ccw_dev_id has been + * introduced. + */ +struct ccw_dev_id { + u8 ssid; + u16 devno; +}; + +/** + * ccw_device_id_is_equal() - compare two ccw_dev_ids + * @dev_id1: a ccw_dev_id + * @dev_id2: another ccw_dev_id + * Returns: + * %1 if the two structures are equal field-by-field, + * %0 if not. + * Context: + * any + */ +static inline int ccw_dev_id_is_equal(struct ccw_dev_id *dev_id1, + struct ccw_dev_id *dev_id2) +{ + if ((dev_id1->ssid == dev_id2->ssid) && + (dev_id1->devno == dev_id2->devno)) + return 1; + return 0; +} + +extern void wait_cons_dev(void); + +extern void css_schedule_reprobe(void); + +extern void reipl_ccw_dev(struct ccw_dev_id *id); + +struct cio_iplinfo { + u16 devno; + int is_qdio; +}; + +extern int cio_get_iplinfo(struct cio_iplinfo *iplinfo); + +/* Function from drivers/s390/cio/chsc.c */ +int chsc_sstpc(void *page, unsigned int op, u16 ctrl); +int chsc_sstpi(void *page, void *result, size_t size); + +#endif + +#endif diff --git a/arch/s390/include/asm/cmb.h b/arch/s390/include/asm/cmb.h new file mode 100644 index 00000000..39ae0329 --- /dev/null +++ b/arch/s390/include/asm/cmb.h @@ -0,0 +1,61 @@ +#ifndef S390_CMB_H +#define S390_CMB_H + +#include <linux/types.h> + +/** + * struct cmbdata - channel measurement block data for user space + * @size: size of the stored data + * @elapsed_time: time since last sampling + * @ssch_rsch_count: number of ssch and rsch + * @sample_count: number of samples + * @device_connect_time: time of device connect + * @function_pending_time: time of function pending + * @device_disconnect_time: time of device disconnect + * @control_unit_queuing_time: time of control unit queuing + * @device_active_only_time: time of device active only + * @device_busy_time: time of device busy (ext. format) + * @initial_command_response_time: initial command response time (ext. format) + * + * All values are stored as 64 bit for simplicity, especially + * in 32 bit emulation mode. All time values are normalized to + * nanoseconds. + * Currently, two formats are known, which differ by the size of + * this structure, i.e. the last two members are only set when + * the extended channel measurement facility (first shipped in + * z990 machines) is activated. + * Potentially, more fields could be added, which would result in a + * new ioctl number. + */ +struct cmbdata { + __u64 size; + __u64 elapsed_time; + /* basic and exended format: */ + __u64 ssch_rsch_count; + __u64 sample_count; + __u64 device_connect_time; + __u64 function_pending_time; + __u64 device_disconnect_time; + __u64 control_unit_queuing_time; + __u64 device_active_only_time; + /* extended format only: */ + __u64 device_busy_time; + __u64 initial_command_response_time; +}; + +/* enable channel measurement */ +#define BIODASDCMFENABLE _IO(DASD_IOCTL_LETTER, 32) +/* enable channel measurement */ +#define BIODASDCMFDISABLE _IO(DASD_IOCTL_LETTER, 33) +/* read channel measurement data */ +#define BIODASDREADALLCMB _IOWR(DASD_IOCTL_LETTER, 33, struct cmbdata) + +#ifdef __KERNEL__ +struct ccw_device; +extern int enable_cmf(struct ccw_device *cdev); +extern int disable_cmf(struct ccw_device *cdev); +extern u64 cmf_read(struct ccw_device *cdev, int index); +extern int cmf_readall(struct ccw_device *cdev, struct cmbdata *data); + +#endif /* __KERNEL__ */ +#endif /* S390_CMB_H */ diff --git a/arch/s390/include/asm/cmpxchg.h b/arch/s390/include/asm/cmpxchg.h new file mode 100644 index 00000000..81d79084 --- /dev/null +++ b/arch/s390/include/asm/cmpxchg.h @@ -0,0 +1,224 @@ +/* + * Copyright IBM Corp. 1999, 2011 + * + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>, + */ + +#ifndef __ASM_CMPXCHG_H +#define __ASM_CMPXCHG_H + +#include <linux/types.h> + +extern void __xchg_called_with_bad_pointer(void); + +static inline unsigned long __xchg(unsigned long x, void *ptr, int size) +{ + unsigned long addr, old; + int shift; + + switch (size) { + case 1: + addr = (unsigned long) ptr; + shift = (3 ^ (addr & 3)) << 3; + addr ^= addr & 3; + asm volatile( + " l %0,%4\n" + "0: lr 0,%0\n" + " nr 0,%3\n" + " or 0,%2\n" + " cs %0,0,%4\n" + " jl 0b\n" + : "=&d" (old), "=Q" (*(int *) addr) + : "d" (x << shift), "d" (~(255 << shift)), + "Q" (*(int *) addr) : "memory", "cc", "0"); + return old >> shift; + case 2: + addr = (unsigned long) ptr; + shift = (2 ^ (addr & 2)) << 3; + addr ^= addr & 2; + asm volatile( + " l %0,%4\n" + "0: lr 0,%0\n" + " nr 0,%3\n" + " or 0,%2\n" + " cs %0,0,%4\n" + " jl 0b\n" + : "=&d" (old), "=Q" (*(int *) addr) + : "d" (x << shift), "d" (~(65535 << shift)), + "Q" (*(int *) addr) : "memory", "cc", "0"); + return old >> shift; + case 4: + asm volatile( + " l %0,%3\n" + "0: cs %0,%2,%3\n" + " jl 0b\n" + : "=&d" (old), "=Q" (*(int *) ptr) + : "d" (x), "Q" (*(int *) ptr) + : "memory", "cc"); + return old; +#ifdef CONFIG_64BIT + case 8: + asm volatile( + " lg %0,%3\n" + "0: csg %0,%2,%3\n" + " jl 0b\n" + : "=&d" (old), "=m" (*(long *) ptr) + : "d" (x), "Q" (*(long *) ptr) + : "memory", "cc"); + return old; +#endif /* CONFIG_64BIT */ + } + __xchg_called_with_bad_pointer(); + return x; +} + +#define xchg(ptr, x) \ +({ \ + __typeof__(*(ptr)) __ret; \ + __ret = (__typeof__(*(ptr))) \ + __xchg((unsigned long)(x), (void *)(ptr), sizeof(*(ptr)));\ + __ret; \ +}) + +/* + * Atomic compare and exchange. Compare OLD with MEM, if identical, + * store NEW in MEM. Return the initial value in MEM. Success is + * indicated by comparing RETURN with OLD. + */ + +#define __HAVE_ARCH_CMPXCHG + +extern void __cmpxchg_called_with_bad_pointer(void); + +static inline unsigned long __cmpxchg(void *ptr, unsigned long old, + unsigned long new, int size) +{ + unsigned long addr, prev, tmp; + int shift; + + switch (size) { + case 1: + addr = (unsigned long) ptr; + shift = (3 ^ (addr & 3)) << 3; + addr ^= addr & 3; + asm volatile( + " l %0,%2\n" + "0: nr %0,%5\n" + " lr %1,%0\n" + " or %0,%3\n" + " or %1,%4\n" + " cs %0,%1,%2\n" + " jnl 1f\n" + " xr %1,%0\n" + " nr %1,%5\n" + " jnz 0b\n" + "1:" + : "=&d" (prev), "=&d" (tmp), "=Q" (*(int *) ptr) + : "d" (old << shift), "d" (new << shift), + "d" (~(255 << shift)), "Q" (*(int *) ptr) + : "memory", "cc"); + return prev >> shift; + case 2: + addr = (unsigned long) ptr; + shift = (2 ^ (addr & 2)) << 3; + addr ^= addr & 2; + asm volatile( + " l %0,%2\n" + "0: nr %0,%5\n" + " lr %1,%0\n" + " or %0,%3\n" + " or %1,%4\n" + " cs %0,%1,%2\n" + " jnl 1f\n" + " xr %1,%0\n" + " nr %1,%5\n" + " jnz 0b\n" + "1:" + : "=&d" (prev), "=&d" (tmp), "=Q" (*(int *) ptr) + : "d" (old << shift), "d" (new << shift), + "d" (~(65535 << shift)), "Q" (*(int *) ptr) + : "memory", "cc"); + return prev >> shift; + case 4: + asm volatile( + " cs %0,%3,%1\n" + : "=&d" (prev), "=Q" (*(int *) ptr) + : "0" (old), "d" (new), "Q" (*(int *) ptr) + : "memory", "cc"); + return prev; +#ifdef CONFIG_64BIT + case 8: + asm volatile( + " csg %0,%3,%1\n" + : "=&d" (prev), "=Q" (*(long *) ptr) + : "0" (old), "d" (new), "Q" (*(long *) ptr) + : "memory", "cc"); + return prev; +#endif /* CONFIG_64BIT */ + } + __cmpxchg_called_with_bad_pointer(); + return old; +} + +#define cmpxchg(ptr, o, n) \ + ((__typeof__(*(ptr)))__cmpxchg((ptr), (unsigned long)(o), \ + (unsigned long)(n), sizeof(*(ptr)))) + +#ifdef CONFIG_64BIT +#define cmpxchg64(ptr, o, n) \ +({ \ + cmpxchg((ptr), (o), (n)); \ +}) +#else /* CONFIG_64BIT */ +static inline unsigned long long __cmpxchg64(void *ptr, + unsigned long long old, + unsigned long long new) +{ + register_pair rp_old = {.pair = old}; + register_pair rp_new = {.pair = new}; + + asm volatile( + " cds %0,%2,%1" + : "+&d" (rp_old), "=Q" (ptr) + : "d" (rp_new), "Q" (ptr) + : "cc"); + return rp_old.pair; +} +#define cmpxchg64(ptr, o, n) \ + ((__typeof__(*(ptr)))__cmpxchg64((ptr), \ + (unsigned long long)(o), \ + (unsigned long long)(n))) +#endif /* CONFIG_64BIT */ + +#include <asm-generic/cmpxchg-local.h> + +static inline unsigned long __cmpxchg_local(void *ptr, + unsigned long old, + unsigned long new, int size) +{ + switch (size) { + case 1: + case 2: + case 4: +#ifdef CONFIG_64BIT + case 8: +#endif + return __cmpxchg(ptr, old, new, size); + default: + return __cmpxchg_local_generic(ptr, old, new, size); + } + + return old; +} + +/* + * cmpxchg_local and cmpxchg64_local are atomic wrt current CPU. Always make + * them available. + */ +#define cmpxchg_local(ptr, o, n) \ + ((__typeof__(*(ptr)))__cmpxchg_local((ptr), (unsigned long)(o), \ + (unsigned long)(n), sizeof(*(ptr)))) + +#define cmpxchg64_local(ptr, o, n) cmpxchg64((ptr), (o), (n)) + +#endif /* __ASM_CMPXCHG_H */ diff --git a/arch/s390/include/asm/compat.h b/arch/s390/include/asm/compat.h new file mode 100644 index 00000000..234f1d85 --- /dev/null +++ b/arch/s390/include/asm/compat.h @@ -0,0 +1,244 @@ +#ifndef _ASM_S390X_COMPAT_H +#define _ASM_S390X_COMPAT_H +/* + * Architecture specific compatibility types + */ +#include <linux/types.h> +#include <linux/sched.h> +#include <linux/thread_info.h> + +#define PSW32_MASK_PER 0x40000000UL +#define PSW32_MASK_DAT 0x04000000UL +#define PSW32_MASK_IO 0x02000000UL +#define PSW32_MASK_EXT 0x01000000UL +#define PSW32_MASK_KEY 0x00F00000UL +#define PSW32_MASK_BASE 0x00080000UL /* Always one */ +#define PSW32_MASK_MCHECK 0x00040000UL +#define PSW32_MASK_WAIT 0x00020000UL +#define PSW32_MASK_PSTATE 0x00010000UL +#define PSW32_MASK_ASC 0x0000C000UL +#define PSW32_MASK_CC 0x00003000UL +#define PSW32_MASK_PM 0x00000f00UL + +#define PSW32_MASK_USER 0x00003F00UL + +#define PSW32_ADDR_AMODE 0x80000000UL +#define PSW32_ADDR_INSN 0x7FFFFFFFUL + +#define PSW32_DEFAULT_KEY (((u32) PAGE_DEFAULT_ACC) << 20) + +#define PSW32_ASC_PRIMARY 0x00000000UL +#define PSW32_ASC_ACCREG 0x00004000UL +#define PSW32_ASC_SECONDARY 0x00008000UL +#define PSW32_ASC_HOME 0x0000C000UL + +extern u32 psw32_user_bits; + +#define COMPAT_USER_HZ 100 +#define COMPAT_UTS_MACHINE "s390\0\0\0\0" + +typedef u32 compat_size_t; +typedef s32 compat_ssize_t; +typedef s32 compat_time_t; +typedef s32 compat_clock_t; +typedef s32 compat_pid_t; +typedef u16 __compat_uid_t; +typedef u16 __compat_gid_t; +typedef u32 __compat_uid32_t; +typedef u32 __compat_gid32_t; +typedef u16 compat_mode_t; +typedef u32 compat_ino_t; +typedef u16 compat_dev_t; +typedef s32 compat_off_t; +typedef s64 compat_loff_t; +typedef u16 compat_nlink_t; +typedef u16 compat_ipc_pid_t; +typedef s32 compat_daddr_t; +typedef u32 compat_caddr_t; +typedef __kernel_fsid_t compat_fsid_t; +typedef s32 compat_key_t; +typedef s32 compat_timer_t; + +typedef s32 compat_int_t; +typedef s32 compat_long_t; +typedef s64 compat_s64; +typedef u32 compat_uint_t; +typedef u32 compat_ulong_t; +typedef u64 compat_u64; + +struct compat_timespec { + compat_time_t tv_sec; + s32 tv_nsec; +}; + +struct compat_timeval { + compat_time_t tv_sec; + s32 tv_usec; +}; + +struct compat_stat { + compat_dev_t st_dev; + u16 __pad1; + compat_ino_t st_ino; + compat_mode_t st_mode; + compat_nlink_t st_nlink; + __compat_uid_t st_uid; + __compat_gid_t st_gid; + compat_dev_t st_rdev; + u16 __pad2; + u32 st_size; + u32 st_blksize; + u32 st_blocks; + u32 st_atime; + u32 st_atime_nsec; + u32 st_mtime; + u32 st_mtime_nsec; + u32 st_ctime; + u32 st_ctime_nsec; + u32 __unused4; + u32 __unused5; +}; + +struct compat_flock { + short l_type; + short l_whence; + compat_off_t l_start; + compat_off_t l_len; + compat_pid_t l_pid; +}; + +#define F_GETLK64 12 +#define F_SETLK64 13 +#define F_SETLKW64 14 + +struct compat_flock64 { + short l_type; + short l_whence; + compat_loff_t l_start; + compat_loff_t l_len; + compat_pid_t l_pid; +}; + +struct compat_statfs { + s32 f_type; + s32 f_bsize; + s32 f_blocks; + s32 f_bfree; + s32 f_bavail; + s32 f_files; + s32 f_ffree; + compat_fsid_t f_fsid; + s32 f_namelen; + s32 f_frsize; + s32 f_flags; + s32 f_spare[5]; +}; + +#define COMPAT_RLIM_OLD_INFINITY 0x7fffffff +#define COMPAT_RLIM_INFINITY 0xffffffff + +typedef u32 compat_old_sigset_t; /* at least 32 bits */ + +#define _COMPAT_NSIG 64 +#define _COMPAT_NSIG_BPW 32 + +typedef u32 compat_sigset_word; + +#define COMPAT_OFF_T_MAX 0x7fffffff +#define COMPAT_LOFF_T_MAX 0x7fffffffffffffffL + +/* + * A pointer passed in from user mode. This should not + * be used for syscall parameters, just declare them + * as pointers because the syscall entry code will have + * appropriately converted them already. + */ +typedef u32 compat_uptr_t; + +static inline void __user *compat_ptr(compat_uptr_t uptr) +{ + return (void __user *)(unsigned long)(uptr & 0x7fffffffUL); +} + +static inline compat_uptr_t ptr_to_compat(void __user *uptr) +{ + return (u32)(unsigned long)uptr; +} + +#ifdef CONFIG_COMPAT + +static inline int is_compat_task(void) +{ + return is_32bit_task(); +} + +#endif + +static inline void __user *arch_compat_alloc_user_space(long len) +{ + unsigned long stack; + + stack = KSTK_ESP(current); + if (is_compat_task()) + stack &= 0x7fffffffUL; + return (void __user *) (stack - len); +} + +struct compat_ipc64_perm { + compat_key_t key; + __compat_uid32_t uid; + __compat_gid32_t gid; + __compat_uid32_t cuid; + __compat_gid32_t cgid; + compat_mode_t mode; + unsigned short __pad1; + unsigned short seq; + unsigned short __pad2; + unsigned int __unused1; + unsigned int __unused2; +}; + +struct compat_semid64_ds { + struct compat_ipc64_perm sem_perm; + compat_time_t sem_otime; + compat_ulong_t __pad1; + compat_time_t sem_ctime; + compat_ulong_t __pad2; + compat_ulong_t sem_nsems; + compat_ulong_t __unused1; + compat_ulong_t __unused2; +}; + +struct compat_msqid64_ds { + struct compat_ipc64_perm msg_perm; + compat_time_t msg_stime; + compat_ulong_t __pad1; + compat_time_t msg_rtime; + compat_ulong_t __pad2; + compat_time_t msg_ctime; + compat_ulong_t __pad3; + compat_ulong_t msg_cbytes; + compat_ulong_t msg_qnum; + compat_ulong_t msg_qbytes; + compat_pid_t msg_lspid; + compat_pid_t msg_lrpid; + compat_ulong_t __unused1; + compat_ulong_t __unused2; +}; + +struct compat_shmid64_ds { + struct compat_ipc64_perm shm_perm; + compat_size_t shm_segsz; + compat_time_t shm_atime; + compat_ulong_t __pad1; + compat_time_t shm_dtime; + compat_ulong_t __pad2; + compat_time_t shm_ctime; + compat_ulong_t __pad3; + compat_pid_t shm_cpid; + compat_pid_t shm_lpid; + compat_ulong_t shm_nattch; + compat_ulong_t __unused1; + compat_ulong_t __unused2; +}; +#endif /* _ASM_S390X_COMPAT_H */ diff --git a/arch/s390/include/asm/cpcmd.h b/arch/s390/include/asm/cpcmd.h new file mode 100644 index 00000000..48a9eab1 --- /dev/null +++ b/arch/s390/include/asm/cpcmd.h @@ -0,0 +1,34 @@ +/* + * arch/s390/kernel/cpcmd.h + * + * S390 version + * Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), + * Christian Borntraeger (cborntra@de.ibm.com), + */ + +#ifndef _ASM_S390_CPCMD_H +#define _ASM_S390_CPCMD_H + +/* + * the lowlevel function for cpcmd + * the caller of __cpcmd has to ensure that the response buffer is below 2 GB + */ +extern int __cpcmd(const char *cmd, char *response, int rlen, int *response_code); + +/* + * cpcmd is the in-kernel interface for issuing CP commands + * + * cmd: null-terminated command string, max 240 characters + * response: response buffer for VM's textual response + * rlen: size of the response buffer, cpcmd will not exceed this size + * but will cap the output, if its too large. Everything that + * did not fit into the buffer will be silently dropped + * response_code: return pointer for VM's error code + * return value: the size of the response. The caller can check if the buffer + * was large enough by comparing the return value and rlen + * NOTE: If the response buffer is not below 2 GB, cpcmd can sleep + */ +extern int cpcmd(const char *cmd, char *response, int rlen, int *response_code); + +#endif /* _ASM_S390_CPCMD_H */ diff --git a/arch/s390/include/asm/cpu.h b/arch/s390/include/asm/cpu.h new file mode 100644 index 00000000..e0b69540 --- /dev/null +++ b/arch/s390/include/asm/cpu.h @@ -0,0 +1,26 @@ +/* + * Copyright IBM Corp. 2000,2009 + * Author(s): Hartmut Penner <hp@de.ibm.com>, + * Martin Schwidefsky <schwidefsky@de.ibm.com>, + * Christian Ehrhardt <ehrhardt@de.ibm.com>, + */ + +#ifndef _ASM_S390_CPU_H +#define _ASM_S390_CPU_H + +#define MAX_CPU_ADDRESS 255 + +#ifndef __ASSEMBLY__ + +#include <linux/types.h> + +struct cpuid +{ + unsigned int version : 8; + unsigned int ident : 24; + unsigned int machine : 16; + unsigned int unused : 16; +} __attribute__ ((packed, aligned(8))); + +#endif /* __ASSEMBLY__ */ +#endif /* _ASM_S390_CPU_H */ diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h new file mode 100644 index 00000000..a3afecda --- /dev/null +++ b/arch/s390/include/asm/cpu_mf.h @@ -0,0 +1,97 @@ +/* + * CPU-measurement facilities + * + * Copyright IBM Corp. 2012 + * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com> + * Jan Glauber <jang@linux.vnet.ibm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + */ +#ifndef _ASM_S390_CPU_MF_H +#define _ASM_S390_CPU_MF_H + +#include <asm/facility.h> + +#define CPU_MF_INT_SF_IAE (1 << 31) /* invalid entry address */ +#define CPU_MF_INT_SF_ISE (1 << 30) /* incorrect SDBT entry */ +#define CPU_MF_INT_SF_PRA (1 << 29) /* program request alert */ +#define CPU_MF_INT_SF_SACA (1 << 23) /* sampler auth. change alert */ +#define CPU_MF_INT_SF_LSDA (1 << 22) /* loss of sample data alert */ +#define CPU_MF_INT_CF_CACA (1 << 7) /* counter auth. change alert */ +#define CPU_MF_INT_CF_LCDA (1 << 6) /* loss of counter data alert */ + +#define CPU_MF_INT_CF_MASK (CPU_MF_INT_CF_CACA|CPU_MF_INT_CF_LCDA) +#define CPU_MF_INT_SF_MASK (CPU_MF_INT_SF_IAE|CPU_MF_INT_SF_ISE| \ + CPU_MF_INT_SF_PRA|CPU_MF_INT_SF_SACA| \ + CPU_MF_INT_SF_LSDA) + +/* CPU measurement facility support */ +static inline int cpum_cf_avail(void) +{ + return MACHINE_HAS_SPP && test_facility(67); +} + +static inline int cpum_sf_avail(void) +{ + return MACHINE_HAS_SPP && test_facility(68); +} + + +struct cpumf_ctr_info { + u16 cfvn; + u16 auth_ctl; + u16 enable_ctl; + u16 act_ctl; + u16 max_cpu; + u16 csvn; + u16 max_cg; + u16 reserved1; + u32 reserved2[12]; +} __packed; + +/* Query counter information */ +static inline int qctri(struct cpumf_ctr_info *info) +{ + int rc = -EINVAL; + + asm volatile ( + "0: .insn s,0xb28e0000,%1\n" + "1: lhi %0,0\n" + "2:\n" + EX_TABLE(1b, 2b) + : "+d" (rc), "=Q" (*info)); + return rc; +} + +/* Load CPU-counter-set controls */ +static inline int lcctl(u64 ctl) +{ + int cc; + + asm volatile ( + " .insn s,0xb2840000,%1\n" + " ipm %0\n" + " srl %0,28\n" + : "=d" (cc) : "m" (ctl) : "cc"); + return cc; +} + +/* Extract CPU counter */ +static inline int ecctr(u64 ctr, u64 *val) +{ + register u64 content asm("4") = 0; + int cc; + + asm volatile ( + " .insn rre,0xb2e40000,%0,%2\n" + " ipm %1\n" + " srl %1,28\n" + : "=d" (content), "=d" (cc) : "d" (ctr) : "cc"); + if (!cc) + *val = content; + return cc; +} + +#endif /* _ASM_S390_CPU_MF_H */ diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h new file mode 100644 index 00000000..24ef186a --- /dev/null +++ b/arch/s390/include/asm/cputime.h @@ -0,0 +1,191 @@ +/* + * include/asm-s390/cputime.h + * + * (C) Copyright IBM Corp. 2004 + * + * Author: Martin Schwidefsky <schwidefsky@de.ibm.com> + */ + +#ifndef _S390_CPUTIME_H +#define _S390_CPUTIME_H + +#include <linux/types.h> +#include <linux/percpu.h> +#include <linux/spinlock.h> +#include <asm/div64.h> + +/* We want to use full resolution of the CPU timer: 2**-12 micro-seconds. */ + +typedef unsigned long long __nocast cputime_t; +typedef unsigned long long __nocast cputime64_t; + +static inline unsigned long __div(unsigned long long n, unsigned long base) +{ +#ifndef __s390x__ + register_pair rp; + + rp.pair = n >> 1; + asm ("dr %0,%1" : "+d" (rp) : "d" (base >> 1)); + return rp.subreg.odd; +#else /* __s390x__ */ + return n / base; +#endif /* __s390x__ */ +} + +#define cputime_one_jiffy jiffies_to_cputime(1) + +/* + * Convert cputime to jiffies and back. + */ +static inline unsigned long cputime_to_jiffies(const cputime_t cputime) +{ + return __div((__force unsigned long long) cputime, 4096000000ULL / HZ); +} + +static inline cputime_t jiffies_to_cputime(const unsigned int jif) +{ + return (__force cputime_t)(jif * (4096000000ULL / HZ)); +} + +static inline u64 cputime64_to_jiffies64(cputime64_t cputime) +{ + unsigned long long jif = (__force unsigned long long) cputime; + do_div(jif, 4096000000ULL / HZ); + return jif; +} + +static inline cputime64_t jiffies64_to_cputime64(const u64 jif) +{ + return (__force cputime64_t)(jif * (4096000000ULL / HZ)); +} + +/* + * Convert cputime to microseconds and back. + */ +static inline unsigned int cputime_to_usecs(const cputime_t cputime) +{ + return (__force unsigned long long) cputime >> 12; +} + +static inline cputime_t usecs_to_cputime(const unsigned int m) +{ + return (__force cputime_t)(m * 4096ULL); +} + +#define usecs_to_cputime64(m) usecs_to_cputime(m) + +/* + * Convert cputime to milliseconds and back. + */ +static inline unsigned int cputime_to_secs(const cputime_t cputime) +{ + return __div((__force unsigned long long) cputime, 2048000000) >> 1; +} + +static inline cputime_t secs_to_cputime(const unsigned int s) +{ + return (__force cputime_t)(s * 4096000000ULL); +} + +/* + * Convert cputime to timespec and back. + */ +static inline cputime_t timespec_to_cputime(const struct timespec *value) +{ + unsigned long long ret = value->tv_sec * 4096000000ULL; + return (__force cputime_t)(ret + value->tv_nsec * 4096 / 1000); +} + +static inline void cputime_to_timespec(const cputime_t cputime, + struct timespec *value) +{ + unsigned long long __cputime = (__force unsigned long long) cputime; +#ifndef __s390x__ + register_pair rp; + + rp.pair = __cputime >> 1; + asm ("dr %0,%1" : "+d" (rp) : "d" (2048000000UL)); + value->tv_nsec = rp.subreg.even * 1000 / 4096; + value->tv_sec = rp.subreg.odd; +#else + value->tv_nsec = (__cputime % 4096000000ULL) * 1000 / 4096; + value->tv_sec = __cputime / 4096000000ULL; +#endif +} + +/* + * Convert cputime to timeval and back. + * Since cputime and timeval have the same resolution (microseconds) + * this is easy. + */ +static inline cputime_t timeval_to_cputime(const struct timeval *value) +{ + unsigned long long ret = value->tv_sec * 4096000000ULL; + return (__force cputime_t)(ret + value->tv_usec * 4096ULL); +} + +static inline void cputime_to_timeval(const cputime_t cputime, + struct timeval *value) +{ + unsigned long long __cputime = (__force unsigned long long) cputime; +#ifndef __s390x__ + register_pair rp; + + rp.pair = __cputime >> 1; + asm ("dr %0,%1" : "+d" (rp) : "d" (2048000000UL)); + value->tv_usec = rp.subreg.even / 4096; + value->tv_sec = rp.subreg.odd; +#else + value->tv_usec = (__cputime % 4096000000ULL) / 4096; + value->tv_sec = __cputime / 4096000000ULL; +#endif +} + +/* + * Convert cputime to clock and back. + */ +static inline clock_t cputime_to_clock_t(cputime_t cputime) +{ + unsigned long long clock = (__force unsigned long long) cputime; + do_div(clock, 4096000000ULL / USER_HZ); + return clock; +} + +static inline cputime_t clock_t_to_cputime(unsigned long x) +{ + return (__force cputime_t)(x * (4096000000ULL / USER_HZ)); +} + +/* + * Convert cputime64 to clock. + */ +static inline clock_t cputime64_to_clock_t(cputime64_t cputime) +{ + unsigned long long clock = (__force unsigned long long) cputime; + do_div(clock, 4096000000ULL / USER_HZ); + return clock; +} + +struct s390_idle_data { + unsigned int sequence; + unsigned long long idle_count; + unsigned long long idle_enter; + unsigned long long idle_exit; + unsigned long long idle_time; + int nohz_delay; +}; + +DECLARE_PER_CPU(struct s390_idle_data, s390_idle); + +cputime64_t s390_get_idle_time(int cpu); + +#define arch_idle_time(cpu) s390_get_idle_time(cpu) + +static inline int s390_nohz_delay(int cpu) +{ + return __get_cpu_var(s390_idle).nohz_delay != 0; +} + +#define arch_needs_cpu(cpu) s390_nohz_delay(cpu) + +#endif /* _S390_CPUTIME_H */ diff --git a/arch/s390/include/asm/crw.h b/arch/s390/include/asm/crw.h new file mode 100644 index 00000000..749a97e6 --- /dev/null +++ b/arch/s390/include/asm/crw.h @@ -0,0 +1,69 @@ +/* + * Data definitions for channel report processing + * Copyright IBM Corp. 2000,2009 + * Author(s): Ingo Adlung <adlung@de.ibm.com>, + * Martin Schwidefsky <schwidefsky@de.ibm.com>, + * Cornelia Huck <cornelia.huck@de.ibm.com>, + * Heiko Carstens <heiko.carstens@de.ibm.com>, + */ + +#ifndef _ASM_S390_CRW_H +#define _ASM_S390_CRW_H + +#include <linux/types.h> + +/* + * Channel Report Word + */ +struct crw { + __u32 res1 : 1; /* reserved zero */ + __u32 slct : 1; /* solicited */ + __u32 oflw : 1; /* overflow */ + __u32 chn : 1; /* chained */ + __u32 rsc : 4; /* reporting source code */ + __u32 anc : 1; /* ancillary report */ + __u32 res2 : 1; /* reserved zero */ + __u32 erc : 6; /* error-recovery code */ + __u32 rsid : 16; /* reporting-source ID */ +} __attribute__ ((packed)); + +typedef void (*crw_handler_t)(struct crw *, struct crw *, int); + +extern int crw_register_handler(int rsc, crw_handler_t handler); +extern void crw_unregister_handler(int rsc); +extern void crw_handle_channel_report(void); +void crw_wait_for_channel_report(void); + +#define NR_RSCS 16 + +#define CRW_RSC_MONITOR 0x2 /* monitoring facility */ +#define CRW_RSC_SCH 0x3 /* subchannel */ +#define CRW_RSC_CPATH 0x4 /* channel path */ +#define CRW_RSC_CONFIG 0x9 /* configuration-alert facility */ +#define CRW_RSC_CSS 0xB /* channel subsystem */ + +#define CRW_ERC_EVENT 0x00 /* event information pending */ +#define CRW_ERC_AVAIL 0x01 /* available */ +#define CRW_ERC_INIT 0x02 /* initialized */ +#define CRW_ERC_TERROR 0x03 /* temporary error */ +#define CRW_ERC_IPARM 0x04 /* installed parm initialized */ +#define CRW_ERC_TERM 0x05 /* terminal */ +#define CRW_ERC_PERRN 0x06 /* perm. error, fac. not init */ +#define CRW_ERC_PERRI 0x07 /* perm. error, facility init */ +#define CRW_ERC_PMOD 0x08 /* installed parameters modified */ + +static inline int stcrw(struct crw *pcrw) +{ + int ccode; + + asm volatile( + " stcrw 0(%2)\n" + " ipm %0\n" + " srl %0,28\n" + : "=d" (ccode), "=m" (*pcrw) + : "a" (pcrw) + : "cc" ); + return ccode; +} + +#endif /* _ASM_S390_CRW_H */ diff --git a/arch/s390/include/asm/ctl_reg.h b/arch/s390/include/asm/ctl_reg.h new file mode 100644 index 00000000..ecde9417 --- /dev/null +++ b/arch/s390/include/asm/ctl_reg.h @@ -0,0 +1,76 @@ +/* + * Copyright IBM Corp. 1999, 2009 + * + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> + */ + +#ifndef __ASM_CTL_REG_H +#define __ASM_CTL_REG_H + +#ifdef __s390x__ + +#define __ctl_load(array, low, high) ({ \ + typedef struct { char _[sizeof(array)]; } addrtype; \ + asm volatile( \ + " lctlg %1,%2,%0\n" \ + : : "Q" (*(addrtype *)(&array)), \ + "i" (low), "i" (high)); \ + }) + +#define __ctl_store(array, low, high) ({ \ + typedef struct { char _[sizeof(array)]; } addrtype; \ + asm volatile( \ + " stctg %1,%2,%0\n" \ + : "=Q" (*(addrtype *)(&array)) \ + : "i" (low), "i" (high)); \ + }) + +#else /* __s390x__ */ + +#define __ctl_load(array, low, high) ({ \ + typedef struct { char _[sizeof(array)]; } addrtype; \ + asm volatile( \ + " lctl %1,%2,%0\n" \ + : : "Q" (*(addrtype *)(&array)), \ + "i" (low), "i" (high)); \ +}) + +#define __ctl_store(array, low, high) ({ \ + typedef struct { char _[sizeof(array)]; } addrtype; \ + asm volatile( \ + " stctl %1,%2,%0\n" \ + : "=Q" (*(addrtype *)(&array)) \ + : "i" (low), "i" (high)); \ + }) + +#endif /* __s390x__ */ + +#define __ctl_set_bit(cr, bit) ({ \ + unsigned long __dummy; \ + __ctl_store(__dummy, cr, cr); \ + __dummy |= 1UL << (bit); \ + __ctl_load(__dummy, cr, cr); \ +}) + +#define __ctl_clear_bit(cr, bit) ({ \ + unsigned long __dummy; \ + __ctl_store(__dummy, cr, cr); \ + __dummy &= ~(1UL << (bit)); \ + __ctl_load(__dummy, cr, cr); \ +}) + +#ifdef CONFIG_SMP + +extern void smp_ctl_set_bit(int cr, int bit); +extern void smp_ctl_clear_bit(int cr, int bit); +#define ctl_set_bit(cr, bit) smp_ctl_set_bit(cr, bit) +#define ctl_clear_bit(cr, bit) smp_ctl_clear_bit(cr, bit) + +#else + +#define ctl_set_bit(cr, bit) __ctl_set_bit(cr, bit) +#define ctl_clear_bit(cr, bit) __ctl_clear_bit(cr, bit) + +#endif /* CONFIG_SMP */ + +#endif /* __ASM_CTL_REG_H */ diff --git a/arch/s390/include/asm/current.h b/arch/s390/include/asm/current.h new file mode 100644 index 00000000..83cf36cd --- /dev/null +++ b/arch/s390/include/asm/current.h @@ -0,0 +1,23 @@ +/* + * include/asm-s390/current.h + * + * S390 version + * Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) + * + * Derived from "include/asm-i386/current.h" + */ + +#ifndef _S390_CURRENT_H +#define _S390_CURRENT_H + +#ifdef __KERNEL__ +#include <asm/lowcore.h> + +struct task_struct; + +#define current ((struct task_struct *const)S390_lowcore.current_task) + +#endif + +#endif /* !(_S390_CURRENT_H) */ diff --git a/arch/s390/include/asm/dasd.h b/arch/s390/include/asm/dasd.h new file mode 100644 index 00000000..0be28efe --- /dev/null +++ b/arch/s390/include/asm/dasd.h @@ -0,0 +1,292 @@ +/* + * File...........: linux/drivers/s390/block/dasd.c + * Author(s)......: Holger Smolinski <Holger.Smolinski@de.ibm.com> + * Bugreports.to..: <Linux390@de.ibm.com> + * (C) IBM Corporation, IBM Deutschland Entwicklung GmbH, 1999,2000 + * EMC Symmetrix ioctl Copyright EMC Corporation, 2008 + * Author.........: Nigel Hislop <hislop_nigel@emc.com> + * + * This file is the interface of the DASD device driver, which is exported to user space + * any future changes wrt the API will result in a change of the APIVERSION reported + * to userspace by the DASDAPIVER-ioctl + * + */ + +#ifndef DASD_H +#define DASD_H +#include <linux/types.h> +#include <linux/ioctl.h> + +#define DASD_IOCTL_LETTER 'D' + +#define DASD_API_VERSION 6 + +/* + * struct dasd_information2_t + * represents any data about the device, which is visible to userspace. + * including foramt and featueres. + */ +typedef struct dasd_information2_t { + unsigned int devno; /* S/390 devno */ + unsigned int real_devno; /* for aliases */ + unsigned int schid; /* S/390 subchannel identifier */ + unsigned int cu_type : 16; /* from SenseID */ + unsigned int cu_model : 8; /* from SenseID */ + unsigned int dev_type : 16; /* from SenseID */ + unsigned int dev_model : 8; /* from SenseID */ + unsigned int open_count; + unsigned int req_queue_len; + unsigned int chanq_len; /* length of chanq */ + char type[4]; /* from discipline.name, 'none' for unknown */ + unsigned int status; /* current device level */ + unsigned int label_block; /* where to find the VOLSER */ + unsigned int FBA_layout; /* fixed block size (like AIXVOL) */ + unsigned int characteristics_size; + unsigned int confdata_size; + char characteristics[64]; /* from read_device_characteristics */ + char configuration_data[256]; /* from read_configuration_data */ + unsigned int format; /* format info like formatted/cdl/ldl/... */ + unsigned int features; /* dasd features like 'ro',... */ + unsigned int reserved0; /* reserved for further use ,... */ + unsigned int reserved1; /* reserved for further use ,... */ + unsigned int reserved2; /* reserved for further use ,... */ + unsigned int reserved3; /* reserved for further use ,... */ + unsigned int reserved4; /* reserved for further use ,... */ + unsigned int reserved5; /* reserved for further use ,... */ + unsigned int reserved6; /* reserved for further use ,... */ + unsigned int reserved7; /* reserved for further use ,... */ +} dasd_information2_t; + +/* + * values to be used for dasd_information_t.format + * 0x00: NOT formatted + * 0x01: Linux disc layout + * 0x02: Common disc layout + */ +#define DASD_FORMAT_NONE 0 +#define DASD_FORMAT_LDL 1 +#define DASD_FORMAT_CDL 2 +/* + * values to be used for dasd_information_t.features + * 0x00: default features + * 0x01: readonly (ro) + * 0x02: use diag discipline (diag) + * 0x04: set the device initially online (internal use only) + * 0x08: enable ERP related logging + * 0x20: give access to raw eckd data + */ +#define DASD_FEATURE_DEFAULT 0x00 +#define DASD_FEATURE_READONLY 0x01 +#define DASD_FEATURE_USEDIAG 0x02 +#define DASD_FEATURE_INITIAL_ONLINE 0x04 +#define DASD_FEATURE_ERPLOG 0x08 +#define DASD_FEATURE_FAILFAST 0x10 +#define DASD_FEATURE_FAILONSLCK 0x20 +#define DASD_FEATURE_USERAW 0x40 + +#define DASD_PARTN_BITS 2 + +/* + * struct dasd_information_t + * represents any data about the data, which is visible to userspace + */ +typedef struct dasd_information_t { + unsigned int devno; /* S/390 devno */ + unsigned int real_devno; /* for aliases */ + unsigned int schid; /* S/390 subchannel identifier */ + unsigned int cu_type : 16; /* from SenseID */ + unsigned int cu_model : 8; /* from SenseID */ + unsigned int dev_type : 16; /* from SenseID */ + unsigned int dev_model : 8; /* from SenseID */ + unsigned int open_count; + unsigned int req_queue_len; + unsigned int chanq_len; /* length of chanq */ + char type[4]; /* from discipline.name, 'none' for unknown */ + unsigned int status; /* current device level */ + unsigned int label_block; /* where to find the VOLSER */ + unsigned int FBA_layout; /* fixed block size (like AIXVOL) */ + unsigned int characteristics_size; + unsigned int confdata_size; + char characteristics[64]; /* from read_device_characteristics */ + char configuration_data[256]; /* from read_configuration_data */ +} dasd_information_t; + +/* + * Read Subsystem Data - Performance Statistics + */ +typedef struct dasd_rssd_perf_stats_t { + unsigned char invalid:1; + unsigned char format:3; + unsigned char data_format:4; + unsigned char unit_address; + unsigned short device_status; + unsigned int nr_read_normal; + unsigned int nr_read_normal_hits; + unsigned int nr_write_normal; + unsigned int nr_write_fast_normal_hits; + unsigned int nr_read_seq; + unsigned int nr_read_seq_hits; + unsigned int nr_write_seq; + unsigned int nr_write_fast_seq_hits; + unsigned int nr_read_cache; + unsigned int nr_read_cache_hits; + unsigned int nr_write_cache; + unsigned int nr_write_fast_cache_hits; + unsigned int nr_inhibit_cache; + unsigned int nr_bybass_cache; + unsigned int nr_seq_dasd_to_cache; + unsigned int nr_dasd_to_cache; + unsigned int nr_cache_to_dasd; + unsigned int nr_delayed_fast_write; + unsigned int nr_normal_fast_write; + unsigned int nr_seq_fast_write; + unsigned int nr_cache_miss; + unsigned char status2; + unsigned int nr_quick_write_promotes; + unsigned char reserved; + unsigned short ssid; + unsigned char reseved2[96]; +} __attribute__((packed)) dasd_rssd_perf_stats_t; + +/* + * struct profile_info_t + * holds the profinling information + */ +typedef struct dasd_profile_info_t { + unsigned int dasd_io_reqs; /* number of requests processed at all */ + unsigned int dasd_io_sects; /* number of sectors processed at all */ + unsigned int dasd_io_secs[32]; /* histogram of request's sizes */ + unsigned int dasd_io_times[32]; /* histogram of requests's times */ + unsigned int dasd_io_timps[32]; /* histogram of requests's times per sector */ + unsigned int dasd_io_time1[32]; /* histogram of time from build to start */ + unsigned int dasd_io_time2[32]; /* histogram of time from start to irq */ + unsigned int dasd_io_time2ps[32]; /* histogram of time from start to irq */ + unsigned int dasd_io_time3[32]; /* histogram of time from irq to end */ + unsigned int dasd_io_nr_req[32]; /* histogram of # of requests in chanq */ +} dasd_profile_info_t; + +/* + * struct format_data_t + * represents all data necessary to format a dasd + */ +typedef struct format_data_t { + unsigned int start_unit; /* from track */ + unsigned int stop_unit; /* to track */ + unsigned int blksize; /* sectorsize */ + unsigned int intensity; +} format_data_t; + +/* + * values to be used for format_data_t.intensity + * 0/8: normal format + * 1/9: also write record zero + * 3/11: also write home address + * 4/12: invalidate track + */ +#define DASD_FMT_INT_FMT_R0 1 /* write record zero */ +#define DASD_FMT_INT_FMT_HA 2 /* write home address, also set FMT_R0 ! */ +#define DASD_FMT_INT_INVAL 4 /* invalidate tracks */ +#define DASD_FMT_INT_COMPAT 8 /* use OS/390 compatible disk layout */ + + +/* + * struct attrib_data_t + * represents the operation (cache) bits for the device. + * Used in DE to influence caching of the DASD. + */ +typedef struct attrib_data_t { + unsigned char operation:3; /* cache operation mode */ + unsigned char reserved:5; /* cache operation mode */ + __u16 nr_cyl; /* no of cyliners for read ahaed */ + __u8 reserved2[29]; /* for future use */ +} __attribute__ ((packed)) attrib_data_t; + +/* definition of operation (cache) bits within attributes of DE */ +#define DASD_NORMAL_CACHE 0x0 +#define DASD_BYPASS_CACHE 0x1 +#define DASD_INHIBIT_LOAD 0x2 +#define DASD_SEQ_ACCESS 0x3 +#define DASD_SEQ_PRESTAGE 0x4 +#define DASD_REC_ACCESS 0x5 + +/* + * Perform EMC Symmetrix I/O + */ +typedef struct dasd_symmio_parms { + unsigned char reserved[8]; /* compat with older releases */ + unsigned long long psf_data; /* char * cast to u64 */ + unsigned long long rssd_result; /* char * cast to u64 */ + int psf_data_len; + int rssd_result_len; +} __attribute__ ((packed)) dasd_symmio_parms_t; + +/* + * Data returned by Sense Path Group ID (SNID) + */ +struct dasd_snid_data { + struct { + __u8 group:2; + __u8 reserve:2; + __u8 mode:1; + __u8 res:3; + } __attribute__ ((packed)) path_state; + __u8 pgid[11]; +} __attribute__ ((packed)); + +struct dasd_snid_ioctl_data { + struct dasd_snid_data data; + __u8 path_mask; +} __attribute__ ((packed)); + + +/******************************************************************************** + * SECTION: Definition of IOCTLs + * + * Here ist how the ioctl-nr should be used: + * 0 - 31 DASD driver itself + * 32 - 239 still open + * 240 - 255 reserved for EMC + *******************************************************************************/ + +/* Disable the volume (for Linux) */ +#define BIODASDDISABLE _IO(DASD_IOCTL_LETTER,0) +/* Enable the volume (for Linux) */ +#define BIODASDENABLE _IO(DASD_IOCTL_LETTER,1) +/* Issue a reserve/release command, rsp. */ +#define BIODASDRSRV _IO(DASD_IOCTL_LETTER,2) /* reserve */ +#define BIODASDRLSE _IO(DASD_IOCTL_LETTER,3) /* release */ +#define BIODASDSLCK _IO(DASD_IOCTL_LETTER,4) /* steal lock */ +/* reset profiling information of a device */ +#define BIODASDPRRST _IO(DASD_IOCTL_LETTER,5) +/* Quiesce IO on device */ +#define BIODASDQUIESCE _IO(DASD_IOCTL_LETTER,6) +/* Resume IO on device */ +#define BIODASDRESUME _IO(DASD_IOCTL_LETTER,7) + + +/* retrieve API version number */ +#define DASDAPIVER _IOR(DASD_IOCTL_LETTER,0,int) +/* Get information on a dasd device */ +#define BIODASDINFO _IOR(DASD_IOCTL_LETTER,1,dasd_information_t) +/* retrieve profiling information of a device */ +#define BIODASDPRRD _IOR(DASD_IOCTL_LETTER,2,dasd_profile_info_t) +/* Get information on a dasd device (enhanced) */ +#define BIODASDINFO2 _IOR(DASD_IOCTL_LETTER,3,dasd_information2_t) +/* Performance Statistics Read */ +#define BIODASDPSRD _IOR(DASD_IOCTL_LETTER,4,dasd_rssd_perf_stats_t) +/* Get Attributes (cache operations) */ +#define BIODASDGATTR _IOR(DASD_IOCTL_LETTER,5,attrib_data_t) + + +/* #define BIODASDFORMAT _IOW(IOCTL_LETTER,0,format_data_t) , deprecated */ +#define BIODASDFMT _IOW(DASD_IOCTL_LETTER,1,format_data_t) +/* Set Attributes (cache operations) */ +#define BIODASDSATTR _IOW(DASD_IOCTL_LETTER,2,attrib_data_t) + +/* Get Sense Path Group ID (SNID) data */ +#define BIODASDSNID _IOWR(DASD_IOCTL_LETTER, 1, struct dasd_snid_ioctl_data) + +#define BIODASDSYMMIO _IOWR(DASD_IOCTL_LETTER, 240, dasd_symmio_parms_t) + +#endif /* DASD_H */ + diff --git a/arch/s390/include/asm/debug.h b/arch/s390/include/asm/debug.h new file mode 100644 index 00000000..8a8245ed --- /dev/null +++ b/arch/s390/include/asm/debug.h @@ -0,0 +1,260 @@ +/* + * include/asm-s390/debug.h + * S/390 debug facility + * + * Copyright (C) 1999, 2000 IBM Deutschland Entwicklung GmbH, + * IBM Corporation + */ + +#ifndef DEBUG_H +#define DEBUG_H + +#include <linux/fs.h> + +/* Note: + * struct __debug_entry must be defined outside of #ifdef __KERNEL__ + * in order to allow a user program to analyze the 'raw'-view. + */ + +struct __debug_entry{ + union { + struct { + unsigned long long clock:52; + unsigned long long exception:1; + unsigned long long level:3; + unsigned long long cpuid:8; + } fields; + + unsigned long long stck; + } id; + void* caller; +} __attribute__((packed)); + + +#define __DEBUG_FEATURE_VERSION 2 /* version of debug feature */ + +#ifdef __KERNEL__ +#include <linux/string.h> +#include <linux/spinlock.h> +#include <linux/kernel.h> +#include <linux/time.h> + +#define DEBUG_MAX_LEVEL 6 /* debug levels range from 0 to 6 */ +#define DEBUG_OFF_LEVEL -1 /* level where debug is switched off */ +#define DEBUG_FLUSH_ALL -1 /* parameter to flush all areas */ +#define DEBUG_MAX_VIEWS 10 /* max number of views in proc fs */ +#define DEBUG_MAX_NAME_LEN 64 /* max length for a debugfs file name */ +#define DEBUG_DEFAULT_LEVEL 3 /* initial debug level */ + +#define DEBUG_DIR_ROOT "s390dbf" /* name of debug root directory in proc fs */ + +#define DEBUG_DATA(entry) (char*)(entry + 1) /* data is stored behind */ + /* the entry information */ + +typedef struct __debug_entry debug_entry_t; + +struct debug_view; + +typedef struct debug_info { + struct debug_info* next; + struct debug_info* prev; + atomic_t ref_count; + spinlock_t lock; + int level; + int nr_areas; + int pages_per_area; + int buf_size; + int entry_size; + debug_entry_t*** areas; + int active_area; + int *active_pages; + int *active_entries; + struct dentry* debugfs_root_entry; + struct dentry* debugfs_entries[DEBUG_MAX_VIEWS]; + struct debug_view* views[DEBUG_MAX_VIEWS]; + char name[DEBUG_MAX_NAME_LEN]; + umode_t mode; +} debug_info_t; + +typedef int (debug_header_proc_t) (debug_info_t* id, + struct debug_view* view, + int area, + debug_entry_t* entry, + char* out_buf); + +typedef int (debug_format_proc_t) (debug_info_t* id, + struct debug_view* view, char* out_buf, + const char* in_buf); +typedef int (debug_prolog_proc_t) (debug_info_t* id, + struct debug_view* view, + char* out_buf); +typedef int (debug_input_proc_t) (debug_info_t* id, + struct debug_view* view, + struct file* file, + const char __user *user_buf, + size_t in_buf_size, loff_t* offset); + +int debug_dflt_header_fn(debug_info_t* id, struct debug_view* view, + int area, debug_entry_t* entry, char* out_buf); + +struct debug_view { + char name[DEBUG_MAX_NAME_LEN]; + debug_prolog_proc_t* prolog_proc; + debug_header_proc_t* header_proc; + debug_format_proc_t* format_proc; + debug_input_proc_t* input_proc; + void* private_data; +}; + +extern struct debug_view debug_hex_ascii_view; +extern struct debug_view debug_raw_view; +extern struct debug_view debug_sprintf_view; + +/* do NOT use the _common functions */ + +debug_entry_t* debug_event_common(debug_info_t* id, int level, + const void* data, int length); + +debug_entry_t* debug_exception_common(debug_info_t* id, int level, + const void* data, int length); + +/* Debug Feature API: */ + +debug_info_t *debug_register(const char *name, int pages, int nr_areas, + int buf_size); + +debug_info_t *debug_register_mode(const char *name, int pages, int nr_areas, + int buf_size, umode_t mode, uid_t uid, + gid_t gid); + +void debug_unregister(debug_info_t* id); + +void debug_set_level(debug_info_t* id, int new_level); + +void debug_set_critical(void); +void debug_stop_all(void); + +static inline debug_entry_t* +debug_event(debug_info_t* id, int level, void* data, int length) +{ + if ((!id) || (level > id->level) || (id->pages_per_area == 0)) + return NULL; + return debug_event_common(id,level,data,length); +} + +static inline debug_entry_t* +debug_int_event(debug_info_t* id, int level, unsigned int tag) +{ + unsigned int t=tag; + if ((!id) || (level > id->level) || (id->pages_per_area == 0)) + return NULL; + return debug_event_common(id,level,&t,sizeof(unsigned int)); +} + +static inline debug_entry_t * +debug_long_event (debug_info_t* id, int level, unsigned long tag) +{ + unsigned long t=tag; + if ((!id) || (level > id->level) || (id->pages_per_area == 0)) + return NULL; + return debug_event_common(id,level,&t,sizeof(unsigned long)); +} + +static inline debug_entry_t* +debug_text_event(debug_info_t* id, int level, const char* txt) +{ + if ((!id) || (level > id->level) || (id->pages_per_area == 0)) + return NULL; + return debug_event_common(id,level,txt,strlen(txt)); +} + +/* + * IMPORTANT: Use "%s" in sprintf format strings with care! Only pointers are + * stored in the s390dbf. See Documentation/s390/s390dbf.txt for more details! + */ +extern debug_entry_t * +debug_sprintf_event(debug_info_t* id,int level,char *string,...) + __attribute__ ((format(printf, 3, 4))); + + +static inline debug_entry_t* +debug_exception(debug_info_t* id, int level, void* data, int length) +{ + if ((!id) || (level > id->level) || (id->pages_per_area == 0)) + return NULL; + return debug_exception_common(id,level,data,length); +} + +static inline debug_entry_t* +debug_int_exception(debug_info_t* id, int level, unsigned int tag) +{ + unsigned int t=tag; + if ((!id) || (level > id->level) || (id->pages_per_area == 0)) + return NULL; + return debug_exception_common(id,level,&t,sizeof(unsigned int)); +} + +static inline debug_entry_t * +debug_long_exception (debug_info_t* id, int level, unsigned long tag) +{ + unsigned long t=tag; + if ((!id) || (level > id->level) || (id->pages_per_area == 0)) + return NULL; + return debug_exception_common(id,level,&t,sizeof(unsigned long)); +} + +static inline debug_entry_t* +debug_text_exception(debug_info_t* id, int level, const char* txt) +{ + if ((!id) || (level > id->level) || (id->pages_per_area == 0)) + return NULL; + return debug_exception_common(id,level,txt,strlen(txt)); +} + +/* + * IMPORTANT: Use "%s" in sprintf format strings with care! Only pointers are + * stored in the s390dbf. See Documentation/s390/s390dbf.txt for more details! + */ +extern debug_entry_t * +debug_sprintf_exception(debug_info_t* id,int level,char *string,...) + __attribute__ ((format(printf, 3, 4))); + +int debug_register_view(debug_info_t* id, struct debug_view* view); +int debug_unregister_view(debug_info_t* id, struct debug_view* view); + +/* + define the debug levels: + - 0 No debugging output to console or syslog + - 1 Log internal errors to syslog, ignore check conditions + - 2 Log internal errors and check conditions to syslog + - 3 Log internal errors to console, log check conditions to syslog + - 4 Log internal errors and check conditions to console + - 5 panic on internal errors, log check conditions to console + - 6 panic on both, internal errors and check conditions + */ + +#ifndef DEBUG_LEVEL +#define DEBUG_LEVEL 4 +#endif + +#define INTERNAL_ERRMSG(x,y...) "E" __FILE__ "%d: " x, __LINE__, y +#define INTERNAL_WRNMSG(x,y...) "W" __FILE__ "%d: " x, __LINE__, y +#define INTERNAL_INFMSG(x,y...) "I" __FILE__ "%d: " x, __LINE__, y +#define INTERNAL_DEBMSG(x,y...) "D" __FILE__ "%d: " x, __LINE__, y + +#if DEBUG_LEVEL > 0 +#define PRINT_DEBUG(x...) printk ( KERN_DEBUG PRINTK_HEADER x ) +#define PRINT_INFO(x...) printk ( KERN_INFO PRINTK_HEADER x ) +#define PRINT_WARN(x...) printk ( KERN_WARNING PRINTK_HEADER x ) +#define PRINT_ERR(x...) printk ( KERN_ERR PRINTK_HEADER x ) +#define PRINT_FATAL(x...) panic ( PRINTK_HEADER x ) +#else +#define PRINT_DEBUG(x...) printk ( KERN_DEBUG PRINTK_HEADER x ) +#define PRINT_INFO(x...) printk ( KERN_DEBUG PRINTK_HEADER x ) +#define PRINT_WARN(x...) printk ( KERN_DEBUG PRINTK_HEADER x ) +#define PRINT_ERR(x...) printk ( KERN_DEBUG PRINTK_HEADER x ) +#define PRINT_FATAL(x...) printk ( KERN_DEBUG PRINTK_HEADER x ) +#endif /* DASD_DEBUG */ + +#endif /* __KERNEL__ */ +#endif /* DEBUG_H */ diff --git a/arch/s390/include/asm/delay.h b/arch/s390/include/asm/delay.h new file mode 100644 index 00000000..0e3b35f9 --- /dev/null +++ b/arch/s390/include/asm/delay.h @@ -0,0 +1,26 @@ +/* + * include/asm-s390/delay.h + * + * S390 version + * Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) + * + * Derived from "include/asm-i386/delay.h" + * Copyright (C) 1993 Linus Torvalds + * + * Delay routines calling functions in arch/s390/lib/delay.c + */ + +#ifndef _S390_DELAY_H +#define _S390_DELAY_H + +void __ndelay(unsigned long long nsecs); +void __udelay(unsigned long long usecs); +void udelay_simple(unsigned long long usecs); +void __delay(unsigned long loops); + +#define ndelay(n) __ndelay((unsigned long long) (n)) +#define udelay(n) __udelay((unsigned long long) (n)) +#define mdelay(n) __udelay((unsigned long long) (n) * 1000) + +#endif /* defined(_S390_DELAY_H) */ diff --git a/arch/s390/include/asm/device.h b/arch/s390/include/asm/device.h new file mode 100644 index 00000000..d8f9872b --- /dev/null +++ b/arch/s390/include/asm/device.h @@ -0,0 +1,7 @@ +/* + * Arch specific extensions to struct device + * + * This file is released under the GPLv2 + */ +#include <asm-generic/device.h> + diff --git a/arch/s390/include/asm/diag.h b/arch/s390/include/asm/diag.h new file mode 100644 index 00000000..7e91c580 --- /dev/null +++ b/arch/s390/include/asm/diag.h @@ -0,0 +1,52 @@ +/* + * s390 diagnose functions + * + * Copyright IBM Corp. 2007 + * Author(s): Michael Holzheu <holzheu@de.ibm.com> + */ + +#ifndef _ASM_S390_DIAG_H +#define _ASM_S390_DIAG_H + +/* + * Diagnose 10: Release page range + */ +static inline void diag10_range(unsigned long start_pfn, unsigned long num_pfn) +{ + unsigned long start_addr, end_addr; + + start_addr = start_pfn << PAGE_SHIFT; + end_addr = (start_pfn + num_pfn - 1) << PAGE_SHIFT; + + asm volatile( + "0: diag %0,%1,0x10\n" + "1:\n" + EX_TABLE(0b, 1b) + EX_TABLE(1b, 1b) + : : "a" (start_addr), "a" (end_addr)); +} + +/* + * Diagnose 14: Input spool file manipulation + */ +extern int diag14(unsigned long rx, unsigned long ry1, unsigned long subcode); + +/* + * Diagnose 210: Get information about a virtual device + */ +struct diag210 { + u16 vrdcdvno; /* device number (input) */ + u16 vrdclen; /* data block length (input) */ + u8 vrdcvcla; /* virtual device class (output) */ + u8 vrdcvtyp; /* virtual device type (output) */ + u8 vrdcvsta; /* virtual device status (output) */ + u8 vrdcvfla; /* virtual device flags (output) */ + u8 vrdcrccl; /* real device class (output) */ + u8 vrdccrty; /* real device type (output) */ + u8 vrdccrmd; /* real device model (output) */ + u8 vrdccrft; /* real device feature (output) */ +} __attribute__((packed, aligned(4))); + +extern int diag210(struct diag210 *addr); + +#endif /* _ASM_S390_DIAG_H */ diff --git a/arch/s390/include/asm/div64.h b/arch/s390/include/asm/div64.h new file mode 100644 index 00000000..6cd978ce --- /dev/null +++ b/arch/s390/include/asm/div64.h @@ -0,0 +1 @@ +#include <asm-generic/div64.h> diff --git a/arch/s390/include/asm/dma.h b/arch/s390/include/asm/dma.h new file mode 100644 index 00000000..7425c6af --- /dev/null +++ b/arch/s390/include/asm/dma.h @@ -0,0 +1,16 @@ +/* + * include/asm-s390/dma.h + * + * S390 version + */ + +#ifndef _ASM_DMA_H +#define _ASM_DMA_H + +#include <asm/io.h> /* need byte IO */ + +#define MAX_DMA_ADDRESS 0x80000000 + +#define free_dma(x) do { } while (0) + +#endif /* _ASM_DMA_H */ diff --git a/arch/s390/include/asm/ebcdic.h b/arch/s390/include/asm/ebcdic.h new file mode 100644 index 00000000..7f6f641d --- /dev/null +++ b/arch/s390/include/asm/ebcdic.h @@ -0,0 +1,49 @@ +/* + * include/asm-s390/ebcdic.h + * EBCDIC -> ASCII, ASCII -> EBCDIC conversion routines. + * + * S390 version + * Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> + */ + +#ifndef _EBCDIC_H +#define _EBCDIC_H + +#ifndef _S390_TYPES_H +#include <types.h> +#endif + +extern __u8 _ascebc_500[256]; /* ASCII -> EBCDIC 500 conversion table */ +extern __u8 _ebcasc_500[256]; /* EBCDIC 500 -> ASCII conversion table */ +extern __u8 _ascebc[256]; /* ASCII -> EBCDIC conversion table */ +extern __u8 _ebcasc[256]; /* EBCDIC -> ASCII conversion table */ +extern __u8 _ebc_tolower[256]; /* EBCDIC -> lowercase */ +extern __u8 _ebc_toupper[256]; /* EBCDIC -> uppercase */ + +static inline void +codepage_convert(const __u8 *codepage, volatile __u8 * addr, unsigned long nr) +{ + if (nr-- <= 0) + return; + asm volatile( + " bras 1,1f\n" + " tr 0(1,%0),0(%2)\n" + "0: tr 0(256,%0),0(%2)\n" + " la %0,256(%0)\n" + "1: ahi %1,-256\n" + " jnm 0b\n" + " ex %1,0(1)" + : "+&a" (addr), "+&a" (nr) + : "a" (codepage) : "cc", "memory", "1"); +} + +#define ASCEBC(addr,nr) codepage_convert(_ascebc, addr, nr) +#define EBCASC(addr,nr) codepage_convert(_ebcasc, addr, nr) +#define ASCEBC_500(addr,nr) codepage_convert(_ascebc_500, addr, nr) +#define EBCASC_500(addr,nr) codepage_convert(_ebcasc_500, addr, nr) +#define EBC_TOLOWER(addr,nr) codepage_convert(_ebc_tolower, addr, nr) +#define EBC_TOUPPER(addr,nr) codepage_convert(_ebc_toupper, addr, nr) + +#endif + diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h new file mode 100644 index 00000000..c4ee39f7 --- /dev/null +++ b/arch/s390/include/asm/elf.h @@ -0,0 +1,216 @@ +/* + * include/asm-s390/elf.h + * + * S390 version + * + * Derived from "include/asm-i386/elf.h" + */ + +#ifndef __ASMS390_ELF_H +#define __ASMS390_ELF_H + +/* s390 relocations defined by the ABIs */ +#define R_390_NONE 0 /* No reloc. */ +#define R_390_8 1 /* Direct 8 bit. */ +#define R_390_12 2 /* Direct 12 bit. */ +#define R_390_16 3 /* Direct 16 bit. */ +#define R_390_32 4 /* Direct 32 bit. */ +#define R_390_PC32 5 /* PC relative 32 bit. */ +#define R_390_GOT12 6 /* 12 bit GOT offset. */ +#define R_390_GOT32 7 /* 32 bit GOT offset. */ +#define R_390_PLT32 8 /* 32 bit PC relative PLT address. */ +#define R_390_COPY 9 /* Copy symbol at runtime. */ +#define R_390_GLOB_DAT 10 /* Create GOT entry. */ +#define R_390_JMP_SLOT 11 /* Create PLT entry. */ +#define R_390_RELATIVE 12 /* Adjust by program base. */ +#define R_390_GOTOFF32 13 /* 32 bit offset to GOT. */ +#define R_390_GOTPC 14 /* 32 bit PC rel. offset to GOT. */ +#define R_390_GOT16 15 /* 16 bit GOT offset. */ +#define R_390_PC16 16 /* PC relative 16 bit. */ +#define R_390_PC16DBL 17 /* PC relative 16 bit shifted by 1. */ +#define R_390_PLT16DBL 18 /* 16 bit PC rel. PLT shifted by 1. */ +#define R_390_PC32DBL 19 /* PC relative 32 bit shifted by 1. */ +#define R_390_PLT32DBL 20 /* 32 bit PC rel. PLT shifted by 1. */ +#define R_390_GOTPCDBL 21 /* 32 bit PC rel. GOT shifted by 1. */ +#define R_390_64 22 /* Direct 64 bit. */ +#define R_390_PC64 23 /* PC relative 64 bit. */ +#define R_390_GOT64 24 /* 64 bit GOT offset. */ +#define R_390_PLT64 25 /* 64 bit PC relative PLT address. */ +#define R_390_GOTENT 26 /* 32 bit PC rel. to GOT entry >> 1. */ +#define R_390_GOTOFF16 27 /* 16 bit offset to GOT. */ +#define R_390_GOTOFF64 28 /* 64 bit offset to GOT. */ +#define R_390_GOTPLT12 29 /* 12 bit offset to jump slot. */ +#define R_390_GOTPLT16 30 /* 16 bit offset to jump slot. */ +#define R_390_GOTPLT32 31 /* 32 bit offset to jump slot. */ +#define R_390_GOTPLT64 32 /* 64 bit offset to jump slot. */ +#define R_390_GOTPLTENT 33 /* 32 bit rel. offset to jump slot. */ +#define R_390_PLTOFF16 34 /* 16 bit offset from GOT to PLT. */ +#define R_390_PLTOFF32 35 /* 32 bit offset from GOT to PLT. */ +#define R_390_PLTOFF64 36 /* 16 bit offset from GOT to PLT. */ +#define R_390_TLS_LOAD 37 /* Tag for load insn in TLS code. */ +#define R_390_TLS_GDCALL 38 /* Tag for function call in general + dynamic TLS code. */ +#define R_390_TLS_LDCALL 39 /* Tag for function call in local + dynamic TLS code. */ +#define R_390_TLS_GD32 40 /* Direct 32 bit for general dynamic + thread local data. */ +#define R_390_TLS_GD64 41 /* Direct 64 bit for general dynamic + thread local data. */ +#define R_390_TLS_GOTIE12 42 /* 12 bit GOT offset for static TLS + block offset. */ +#define R_390_TLS_GOTIE32 43 /* 32 bit GOT offset for static TLS + block offset. */ +#define R_390_TLS_GOTIE64 44 /* 64 bit GOT offset for static TLS + block offset. */ +#define R_390_TLS_LDM32 45 /* Direct 32 bit for local dynamic + thread local data in LD code. */ +#define R_390_TLS_LDM64 46 /* Direct 64 bit for local dynamic + thread local data in LD code. */ +#define R_390_TLS_IE32 47 /* 32 bit address of GOT entry for + negated static TLS block offset. */ +#define R_390_TLS_IE64 48 /* 64 bit address of GOT entry for + negated static TLS block offset. */ +#define R_390_TLS_IEENT 49 /* 32 bit rel. offset to GOT entry for + negated static TLS block offset. */ +#define R_390_TLS_LE32 50 /* 32 bit negated offset relative to + static TLS block. */ +#define R_390_TLS_LE64 51 /* 64 bit negated offset relative to + static TLS block. */ +#define R_390_TLS_LDO32 52 /* 32 bit offset relative to TLS + block. */ +#define R_390_TLS_LDO64 53 /* 64 bit offset relative to TLS + block. */ +#define R_390_TLS_DTPMOD 54 /* ID of module containing symbol. */ +#define R_390_TLS_DTPOFF 55 /* Offset in TLS block. */ +#define R_390_TLS_TPOFF 56 /* Negate offset in static TLS + block. */ +#define R_390_20 57 /* Direct 20 bit. */ +#define R_390_GOT20 58 /* 20 bit GOT offset. */ +#define R_390_GOTPLT20 59 /* 20 bit offset to jump slot. */ +#define R_390_TLS_GOTIE20 60 /* 20 bit GOT offset for static TLS + block offset. */ +/* Keep this the last entry. */ +#define R_390_NUM 61 + +/* Bits present in AT_HWCAP. */ +#define HWCAP_S390_ESAN3 1 +#define HWCAP_S390_ZARCH 2 +#define HWCAP_S390_STFLE 4 +#define HWCAP_S390_MSA 8 +#define HWCAP_S390_LDISP 16 +#define HWCAP_S390_EIMM 32 +#define HWCAP_S390_DFP 64 +#define HWCAP_S390_HPAGE 128 +#define HWCAP_S390_ETF3EH 256 +#define HWCAP_S390_HIGH_GPRS 512 + +/* + * These are used to set parameters in the core dumps. + */ +#ifndef __s390x__ +#define ELF_CLASS ELFCLASS32 +#else /* __s390x__ */ +#define ELF_CLASS ELFCLASS64 +#endif /* __s390x__ */ +#define ELF_DATA ELFDATA2MSB +#define ELF_ARCH EM_S390 + +/* + * ELF register definitions.. + */ + +#include <asm/ptrace.h> +#include <asm/user.h> + +typedef s390_fp_regs elf_fpregset_t; +typedef s390_regs elf_gregset_t; + +typedef s390_fp_regs compat_elf_fpregset_t; +typedef s390_compat_regs compat_elf_gregset_t; + +#include <linux/sched.h> /* for task_struct */ +#include <asm/mmu_context.h> + +#include <asm/vdso.h> + +extern unsigned int vdso_enabled; + +/* + * This is used to ensure we don't load something for the wrong architecture. + */ +#define elf_check_arch(x) \ + (((x)->e_machine == EM_S390 || (x)->e_machine == EM_S390_OLD) \ + && (x)->e_ident[EI_CLASS] == ELF_CLASS) +#define compat_elf_check_arch(x) \ + (((x)->e_machine == EM_S390 || (x)->e_machine == EM_S390_OLD) \ + && (x)->e_ident[EI_CLASS] == ELF_CLASS) +#define compat_start_thread start_thread31 + +/* For SVR4/S390 the function pointer to be registered with `atexit` is + passed in R14. */ +#define ELF_PLAT_INIT(_r, load_addr) \ + do { \ + _r->gprs[14] = 0; \ + } while (0) + +#define CORE_DUMP_USE_REGSET +#define ELF_EXEC_PAGESIZE 4096 + +/* This is the location that an ET_DYN program is loaded if exec'ed. Typical + use of this is to invoke "./ld.so someprog" to test out a new version of + the loader. We need to make sure that it is out of the way of the program + that it will "exec", and that there is sufficient room for the brk. */ + +extern unsigned long randomize_et_dyn(unsigned long base); +#define ELF_ET_DYN_BASE (randomize_et_dyn(STACK_TOP / 3 * 2)) + +/* This yields a mask that user programs can use to figure out what + instruction set this CPU supports. */ + +extern unsigned long elf_hwcap; +#define ELF_HWCAP (elf_hwcap) + +/* This yields a string that ld.so will use to load implementation + specific libraries for optimization. This is more specific in + intent than poking at uname or /proc/cpuinfo. + + For the moment, we have only optimizations for the Intel generations, + but that could change... */ + +#define ELF_PLATFORM_SIZE 8 +extern char elf_platform[]; +#define ELF_PLATFORM (elf_platform) + +#ifndef __s390x__ +#define SET_PERSONALITY(ex) set_personality(PER_LINUX) +#else /* __s390x__ */ +#define SET_PERSONALITY(ex) \ +do { \ + if (personality(current->personality) != PER_LINUX32) \ + set_personality(PER_LINUX | \ + (current->personality & ~PER_MASK)); \ + if ((ex).e_ident[EI_CLASS] == ELFCLASS32) \ + set_thread_flag(TIF_31BIT); \ + else \ + clear_thread_flag(TIF_31BIT); \ +} while (0) +#endif /* __s390x__ */ + +#define STACK_RND_MASK 0x7ffUL + +#define ARCH_DLINFO \ +do { \ + if (vdso_enabled) \ + NEW_AUX_ENT(AT_SYSINFO_EHDR, \ + (unsigned long)current->mm->context.vdso_base); \ +} while (0) + +struct linux_binprm; + +#define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1 +int arch_setup_additional_pages(struct linux_binprm *, int); + +extern unsigned long arch_randomize_brk(struct mm_struct *mm); +#define arch_randomize_brk arch_randomize_brk + +#endif diff --git a/arch/s390/include/asm/emergency-restart.h b/arch/s390/include/asm/emergency-restart.h new file mode 100644 index 00000000..108d8c48 --- /dev/null +++ b/arch/s390/include/asm/emergency-restart.h @@ -0,0 +1,6 @@ +#ifndef _ASM_EMERGENCY_RESTART_H +#define _ASM_EMERGENCY_RESTART_H + +#include <asm-generic/emergency-restart.h> + +#endif /* _ASM_EMERGENCY_RESTART_H */ diff --git a/arch/s390/include/asm/errno.h b/arch/s390/include/asm/errno.h new file mode 100644 index 00000000..e41d5b37 --- /dev/null +++ b/arch/s390/include/asm/errno.h @@ -0,0 +1,13 @@ +/* + * include/asm-s390/errno.h + * + * S390 version + * + */ + +#ifndef _S390_ERRNO_H +#define _S390_ERRNO_H + +#include <asm-generic/errno.h> + +#endif diff --git a/arch/s390/include/asm/etr.h b/arch/s390/include/asm/etr.h new file mode 100644 index 00000000..538e1b36 --- /dev/null +++ b/arch/s390/include/asm/etr.h @@ -0,0 +1,258 @@ +/* + * include/asm-s390/etr.h + * + * Copyright IBM Corp. 2006 + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) + */ +#ifndef __S390_ETR_H +#define __S390_ETR_H + +/* ETR attachment control register */ +struct etr_eacr { + unsigned int e0 : 1; /* port 0 stepping control */ + unsigned int e1 : 1; /* port 1 stepping control */ + unsigned int _pad0 : 5; /* must be 00100 */ + unsigned int dp : 1; /* data port control */ + unsigned int p0 : 1; /* port 0 change recognition control */ + unsigned int p1 : 1; /* port 1 change recognition control */ + unsigned int _pad1 : 3; /* must be 000 */ + unsigned int ea : 1; /* ETR alert control */ + unsigned int es : 1; /* ETR sync check control */ + unsigned int sl : 1; /* switch to local control */ +} __attribute__ ((packed)); + +/* Port state returned by steai */ +enum etr_psc { + etr_psc_operational = 0, + etr_psc_semi_operational = 1, + etr_psc_protocol_error = 4, + etr_psc_no_symbols = 8, + etr_psc_no_signal = 12, + etr_psc_pps_mode = 13 +}; + +/* Logical port state returned by stetr */ +enum etr_lpsc { + etr_lpsc_operational_step = 0, + etr_lpsc_operational_alt = 1, + etr_lpsc_semi_operational = 2, + etr_lpsc_protocol_error = 4, + etr_lpsc_no_symbol_sync = 8, + etr_lpsc_no_signal = 12, + etr_lpsc_pps_mode = 13 +}; + +/* ETR status words */ +struct etr_esw { + struct etr_eacr eacr; /* attachment control register */ + unsigned int y : 1; /* stepping mode */ + unsigned int _pad0 : 5; /* must be 00000 */ + unsigned int p : 1; /* stepping port number */ + unsigned int q : 1; /* data port number */ + unsigned int psc0 : 4; /* port 0 state code */ + unsigned int psc1 : 4; /* port 1 state code */ +} __attribute__ ((packed)); + +/* Second level data register status word */ +struct etr_slsw { + unsigned int vv1 : 1; /* copy of validity bit data frame 1 */ + unsigned int vv2 : 1; /* copy of validity bit data frame 2 */ + unsigned int vv3 : 1; /* copy of validity bit data frame 3 */ + unsigned int vv4 : 1; /* copy of validity bit data frame 4 */ + unsigned int _pad0 : 19; /* must by all zeroes */ + unsigned int n : 1; /* EAF port number */ + unsigned int v1 : 1; /* validity bit ETR data frame 1 */ + unsigned int v2 : 1; /* validity bit ETR data frame 2 */ + unsigned int v3 : 1; /* validity bit ETR data frame 3 */ + unsigned int v4 : 1; /* validity bit ETR data frame 4 */ + unsigned int _pad1 : 4; /* must be 0000 */ +} __attribute__ ((packed)); + +/* ETR data frames */ +struct etr_edf1 { + unsigned int u : 1; /* untuned bit */ + unsigned int _pad0 : 1; /* must be 0 */ + unsigned int r : 1; /* service request bit */ + unsigned int _pad1 : 4; /* must be 0000 */ + unsigned int a : 1; /* time adjustment bit */ + unsigned int net_id : 8; /* ETR network id */ + unsigned int etr_id : 8; /* id of ETR which sends data frames */ + unsigned int etr_pn : 8; /* port number of ETR output port */ +} __attribute__ ((packed)); + +struct etr_edf2 { + unsigned int etv : 32; /* Upper 32 bits of TOD. */ +} __attribute__ ((packed)); + +struct etr_edf3 { + unsigned int rc : 8; /* failure reason code */ + unsigned int _pad0 : 3; /* must be 000 */ + unsigned int c : 1; /* ETR coupled bit */ + unsigned int tc : 4; /* ETR type code */ + unsigned int blto : 8; /* biased local time offset */ + /* (blto - 128) * 15 = minutes */ + unsigned int buo : 8; /* biased utc offset */ + /* (buo - 128) = leap seconds */ +} __attribute__ ((packed)); + +struct etr_edf4 { + unsigned int ed : 8; /* ETS device dependent data */ + unsigned int _pad0 : 1; /* must be 0 */ + unsigned int buc : 5; /* biased ut1 correction */ + /* (buc - 16) * 0.1 seconds */ + unsigned int em : 6; /* ETS error magnitude */ + unsigned int dc : 6; /* ETS drift code */ + unsigned int sc : 6; /* ETS steering code */ +} __attribute__ ((packed)); + +/* + * ETR attachment information block, two formats + * format 1 has 4 reserved words with a size of 64 bytes + * format 2 has 16 reserved words with a size of 96 bytes + */ +struct etr_aib { + struct etr_esw esw; + struct etr_slsw slsw; + unsigned long long tsp; + struct etr_edf1 edf1; + struct etr_edf2 edf2; + struct etr_edf3 edf3; + struct etr_edf4 edf4; + unsigned int reserved[16]; +} __attribute__ ((packed,aligned(8))); + +/* ETR interruption parameter */ +struct etr_irq_parm { + unsigned int _pad0 : 8; + unsigned int pc0 : 1; /* port 0 state change */ + unsigned int pc1 : 1; /* port 1 state change */ + unsigned int _pad1 : 3; + unsigned int eai : 1; /* ETR alert indication */ + unsigned int _pad2 : 18; +} __attribute__ ((packed)); + +/* Query TOD offset result */ +struct etr_ptff_qto { + unsigned long long physical_clock; + unsigned long long tod_offset; + unsigned long long logical_tod_offset; + unsigned long long tod_epoch_difference; +} __attribute__ ((packed)); + +/* Inline assembly helper functions */ +static inline int etr_setr(struct etr_eacr *ctrl) +{ + int rc = -ENOSYS; + + asm volatile( + " .insn s,0xb2160000,%1\n" + "0: la %0,0\n" + "1:\n" + EX_TABLE(0b,1b) + : "+d" (rc) : "Q" (*ctrl)); + return rc; +} + +/* Stores a format 1 aib with 64 bytes */ +static inline int etr_stetr(struct etr_aib *aib) +{ + int rc = -ENOSYS; + + asm volatile( + " .insn s,0xb2170000,%1\n" + "0: la %0,0\n" + "1:\n" + EX_TABLE(0b,1b) + : "+d" (rc) : "Q" (*aib)); + return rc; +} + +/* Stores a format 2 aib with 96 bytes for specified port */ +static inline int etr_steai(struct etr_aib *aib, unsigned int func) +{ + register unsigned int reg0 asm("0") = func; + int rc = -ENOSYS; + + asm volatile( + " .insn s,0xb2b30000,%1\n" + "0: la %0,0\n" + "1:\n" + EX_TABLE(0b,1b) + : "+d" (rc) : "Q" (*aib), "d" (reg0)); + return rc; +} + +/* Function codes for the steai instruction. */ +#define ETR_STEAI_STEPPING_PORT 0x10 +#define ETR_STEAI_ALTERNATE_PORT 0x11 +#define ETR_STEAI_PORT_0 0x12 +#define ETR_STEAI_PORT_1 0x13 + +static inline int etr_ptff(void *ptff_block, unsigned int func) +{ + register unsigned int reg0 asm("0") = func; + register unsigned long reg1 asm("1") = (unsigned long) ptff_block; + int rc = -ENOSYS; + + asm volatile( + " .word 0x0104\n" + " ipm %0\n" + " srl %0,28\n" + : "=d" (rc), "=m" (ptff_block) + : "d" (reg0), "d" (reg1), "m" (ptff_block) : "cc"); + return rc; +} + +/* Function codes for the ptff instruction. */ +#define ETR_PTFF_QAF 0x00 /* query available functions */ +#define ETR_PTFF_QTO 0x01 /* query tod offset */ +#define ETR_PTFF_QSI 0x02 /* query steering information */ +#define ETR_PTFF_ATO 0x40 /* adjust tod offset */ +#define ETR_PTFF_STO 0x41 /* set tod offset */ +#define ETR_PTFF_SFS 0x42 /* set fine steering rate */ +#define ETR_PTFF_SGS 0x43 /* set gross steering rate */ + +/* Functions needed by the machine check handler */ +void etr_switch_to_local(void); +void etr_sync_check(void); + +/* STP interruption parameter */ +struct stp_irq_parm { + unsigned int _pad0 : 14; + unsigned int tsc : 1; /* Timing status change */ + unsigned int lac : 1; /* Link availability change */ + unsigned int tcpc : 1; /* Time control parameter change */ + unsigned int _pad2 : 15; +} __attribute__ ((packed)); + +#define STP_OP_SYNC 1 +#define STP_OP_CTRL 3 + +struct stp_sstpi { + unsigned int rsvd0; + unsigned int rsvd1 : 8; + unsigned int stratum : 8; + unsigned int vbits : 16; + unsigned int leaps : 16; + unsigned int tmd : 4; + unsigned int ctn : 4; + unsigned int rsvd2 : 3; + unsigned int c : 1; + unsigned int tst : 4; + unsigned int tzo : 16; + unsigned int dsto : 16; + unsigned int ctrl : 16; + unsigned int rsvd3 : 16; + unsigned int tto; + unsigned int rsvd4; + unsigned int ctnid[3]; + unsigned int rsvd5; + unsigned int todoff[4]; + unsigned int rsvd6[48]; +} __attribute__ ((packed)); + +/* Functions needed by the machine check handler */ +void stp_sync_check(void); +void stp_island_check(void); + +#endif /* __S390_ETR_H */ diff --git a/arch/s390/include/asm/exec.h b/arch/s390/include/asm/exec.h new file mode 100644 index 00000000..c4a93d63 --- /dev/null +++ b/arch/s390/include/asm/exec.h @@ -0,0 +1,12 @@ +/* + * Copyright IBM Corp. 1999, 2009 + * + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> + */ + +#ifndef __ASM_EXEC_H +#define __ASM_EXEC_H + +extern unsigned long arch_align_stack(unsigned long sp); + +#endif /* __ASM_EXEC_H */ diff --git a/arch/s390/include/asm/extmem.h b/arch/s390/include/asm/extmem.h new file mode 100644 index 00000000..33837d75 --- /dev/null +++ b/arch/s390/include/asm/extmem.h @@ -0,0 +1,33 @@ +/* + * include/asm-s390x/extmem.h + * + * definitions for external memory segment support + * Copyright (C) 2003 IBM Deutschland Entwicklung GmbH, IBM Corporation + */ + +#ifndef _ASM_S390X_DCSS_H +#define _ASM_S390X_DCSS_H +#ifndef __ASSEMBLY__ + +/* possible values for segment type as returned by segment_info */ +#define SEG_TYPE_SW 0 +#define SEG_TYPE_EW 1 +#define SEG_TYPE_SR 2 +#define SEG_TYPE_ER 3 +#define SEG_TYPE_SN 4 +#define SEG_TYPE_EN 5 +#define SEG_TYPE_SC 6 +#define SEG_TYPE_EWEN 7 + +#define SEGMENT_SHARED 0 +#define SEGMENT_EXCLUSIVE 1 + +int segment_load (char *name, int segtype, unsigned long *addr, unsigned long *length); +void segment_unload(char *name); +void segment_save(char *name); +int segment_type (char* name); +int segment_modify_shared (char *name, int do_nonshared); +void segment_warning(int rc, char *seg_name); + +#endif +#endif diff --git a/arch/s390/include/asm/facility.h b/arch/s390/include/asm/facility.h new file mode 100644 index 00000000..2ee66a65 --- /dev/null +++ b/arch/s390/include/asm/facility.h @@ -0,0 +1,62 @@ +/* + * Copyright IBM Corp. 1999, 2009 + * + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> + */ + +#ifndef __ASM_FACILITY_H +#define __ASM_FACILITY_H + +#include <linux/string.h> +#include <linux/preempt.h> +#include <asm/lowcore.h> + +#define MAX_FACILITY_BIT (256*8) /* stfle_fac_list has 256 bytes */ + +/* + * The test_facility function uses the bit odering where the MSB is bit 0. + * That makes it easier to query facility bits with the bit number as + * documented in the Principles of Operation. + */ +static inline int test_facility(unsigned long nr) +{ + unsigned char *ptr; + + if (nr >= MAX_FACILITY_BIT) + return 0; + ptr = (unsigned char *) &S390_lowcore.stfle_fac_list + (nr >> 3); + return (*ptr & (0x80 >> (nr & 7))) != 0; +} + +/** + * stfle - Store facility list extended + * @stfle_fac_list: array where facility list can be stored + * @size: size of passed in array in double words + */ +static inline void stfle(u64 *stfle_fac_list, int size) +{ + unsigned long nr; + + preempt_disable(); + asm volatile( + " .insn s,0xb2b10000,0(0)\n" /* stfl */ + "0:\n" + EX_TABLE(0b, 0b) + : "+m" (S390_lowcore.stfl_fac_list)); + nr = 4; /* bytes stored by stfl */ + memcpy(stfle_fac_list, &S390_lowcore.stfl_fac_list, 4); + if (S390_lowcore.stfl_fac_list & 0x01000000) { + /* More facility bits available with stfle */ + register unsigned long reg0 asm("0") = size - 1; + + asm volatile(".insn s,0xb2b00000,0(%1)" /* stfle */ + : "+d" (reg0) + : "a" (stfle_fac_list) + : "memory", "cc"); + nr = (reg0 + 1) * 8; /* # bytes stored by stfle */ + } + memset((char *) stfle_fac_list + nr, 0, size * 8 - nr); + preempt_enable(); +} + +#endif /* __ASM_FACILITY_H */ diff --git a/arch/s390/include/asm/fb.h b/arch/s390/include/asm/fb.h new file mode 100644 index 00000000..c7df3803 --- /dev/null +++ b/arch/s390/include/asm/fb.h @@ -0,0 +1,12 @@ +#ifndef _ASM_FB_H_ +#define _ASM_FB_H_ +#include <linux/fb.h> + +#define fb_pgprotect(...) do {} while (0) + +static inline int fb_is_primary_device(struct fb_info *info) +{ + return 0; +} + +#endif /* _ASM_FB_H_ */ diff --git a/arch/s390/include/asm/fcntl.h b/arch/s390/include/asm/fcntl.h new file mode 100644 index 00000000..46ab12db --- /dev/null +++ b/arch/s390/include/asm/fcntl.h @@ -0,0 +1 @@ +#include <asm-generic/fcntl.h> diff --git a/arch/s390/include/asm/fcx.h b/arch/s390/include/asm/fcx.h new file mode 100644 index 00000000..ef617099 --- /dev/null +++ b/arch/s390/include/asm/fcx.h @@ -0,0 +1,311 @@ +/* + * Functions for assembling fcx enabled I/O control blocks. + * + * Copyright IBM Corp. 2008 + * Author(s): Peter Oberparleiter <peter.oberparleiter@de.ibm.com> + */ + +#ifndef _ASM_S390_FCX_H +#define _ASM_S390_FCX_H _ASM_S390_FCX_H + +#include <linux/types.h> + +#define TCW_FORMAT_DEFAULT 0 +#define TCW_TIDAW_FORMAT_DEFAULT 0 +#define TCW_FLAGS_INPUT_TIDA 1 << (23 - 5) +#define TCW_FLAGS_TCCB_TIDA 1 << (23 - 6) +#define TCW_FLAGS_OUTPUT_TIDA 1 << (23 - 7) +#define TCW_FLAGS_TIDAW_FORMAT(x) ((x) & 3) << (23 - 9) +#define TCW_FLAGS_GET_TIDAW_FORMAT(x) (((x) >> (23 - 9)) & 3) + +/** + * struct tcw - Transport Control Word (TCW) + * @format: TCW format + * @flags: TCW flags + * @tccbl: Transport-Command-Control-Block Length + * @r: Read Operations + * @w: Write Operations + * @output: Output-Data Address + * @input: Input-Data Address + * @tsb: Transport-Status-Block Address + * @tccb: Transport-Command-Control-Block Address + * @output_count: Output Count + * @input_count: Input Count + * @intrg: Interrogate TCW Address + */ +struct tcw { + u32 format:2; + u32 :6; + u32 flags:24; + u32 :8; + u32 tccbl:6; + u32 r:1; + u32 w:1; + u32 :16; + u64 output; + u64 input; + u64 tsb; + u64 tccb; + u32 output_count; + u32 input_count; + u32 :32; + u32 :32; + u32 :32; + u32 intrg; +} __attribute__ ((packed, aligned(64))); + +#define TIDAW_FLAGS_LAST 1 << (7 - 0) +#define TIDAW_FLAGS_SKIP 1 << (7 - 1) +#define TIDAW_FLAGS_DATA_INT 1 << (7 - 2) +#define TIDAW_FLAGS_TTIC 1 << (7 - 3) +#define TIDAW_FLAGS_INSERT_CBC 1 << (7 - 4) + +/** + * struct tidaw - Transport-Indirect-Addressing Word (TIDAW) + * @flags: TIDAW flags. Can be an arithmetic OR of the following constants: + * %TIDAW_FLAGS_LAST, %TIDAW_FLAGS_SKIP, %TIDAW_FLAGS_DATA_INT, + * %TIDAW_FLAGS_TTIC, %TIDAW_FLAGS_INSERT_CBC + * @count: Count + * @addr: Address + */ +struct tidaw { + u32 flags:8; + u32 :24; + u32 count; + u64 addr; +} __attribute__ ((packed, aligned(16))); + +/** + * struct tsa_iostat - I/O-Status Transport-Status Area (IO-Stat TSA) + * @dev_time: Device Time + * @def_time: Defer Time + * @queue_time: Queue Time + * @dev_busy_time: Device-Busy Time + * @dev_act_time: Device-Active-Only Time + * @sense: Sense Data (if present) + */ +struct tsa_iostat { + u32 dev_time; + u32 def_time; + u32 queue_time; + u32 dev_busy_time; + u32 dev_act_time; + u8 sense[32]; +} __attribute__ ((packed)); + +/** + * struct tsa_ddpcs - Device-Detected-Program-Check Transport-Status Area (DDPC TSA) + * @rc: Reason Code + * @rcq: Reason Code Qualifier + * @sense: Sense Data (if present) + */ +struct tsa_ddpc { + u32 :24; + u32 rc:8; + u8 rcq[16]; + u8 sense[32]; +} __attribute__ ((packed)); + +#define TSA_INTRG_FLAGS_CU_STATE_VALID 1 << (7 - 0) +#define TSA_INTRG_FLAGS_DEV_STATE_VALID 1 << (7 - 1) +#define TSA_INTRG_FLAGS_OP_STATE_VALID 1 << (7 - 2) + +/** + * struct tsa_intrg - Interrogate Transport-Status Area (Intrg. TSA) + * @format: Format + * @flags: Flags. Can be an arithmetic OR of the following constants: + * %TSA_INTRG_FLAGS_CU_STATE_VALID, %TSA_INTRG_FLAGS_DEV_STATE_VALID, + * %TSA_INTRG_FLAGS_OP_STATE_VALID + * @cu_state: Controle-Unit State + * @dev_state: Device State + * @op_state: Operation State + * @sd_info: State-Dependent Information + * @dl_id: Device-Level Identifier + * @dd_data: Device-Dependent Data + */ +struct tsa_intrg { + u32 format:8; + u32 flags:8; + u32 cu_state:8; + u32 dev_state:8; + u32 op_state:8; + u32 :24; + u8 sd_info[12]; + u32 dl_id; + u8 dd_data[28]; +} __attribute__ ((packed)); + +#define TSB_FORMAT_NONE 0 +#define TSB_FORMAT_IOSTAT 1 +#define TSB_FORMAT_DDPC 2 +#define TSB_FORMAT_INTRG 3 + +#define TSB_FLAGS_DCW_OFFSET_VALID 1 << (7 - 0) +#define TSB_FLAGS_COUNT_VALID 1 << (7 - 1) +#define TSB_FLAGS_CACHE_MISS 1 << (7 - 2) +#define TSB_FLAGS_TIME_VALID 1 << (7 - 3) +#define TSB_FLAGS_FORMAT(x) ((x) & 7) +#define TSB_FORMAT(t) ((t)->flags & 7) + +/** + * struct tsb - Transport-Status Block (TSB) + * @length: Length + * @flags: Flags. Can be an arithmetic OR of the following constants: + * %TSB_FLAGS_DCW_OFFSET_VALID, %TSB_FLAGS_COUNT_VALID, %TSB_FLAGS_CACHE_MISS, + * %TSB_FLAGS_TIME_VALID + * @dcw_offset: DCW Offset + * @count: Count + * @tsa: Transport-Status-Area + */ +struct tsb { + u32 length:8; + u32 flags:8; + u32 dcw_offset:16; + u32 count; + u32 :32; + union { + struct tsa_iostat iostat; + struct tsa_ddpc ddpc; + struct tsa_intrg intrg; + } __attribute__ ((packed)) tsa; +} __attribute__ ((packed, aligned(8))); + +#define DCW_INTRG_FORMAT_DEFAULT 0 + +#define DCW_INTRG_RC_UNSPECIFIED 0 +#define DCW_INTRG_RC_TIMEOUT 1 + +#define DCW_INTRG_RCQ_UNSPECIFIED 0 +#define DCW_INTRG_RCQ_PRIMARY 1 +#define DCW_INTRG_RCQ_SECONDARY 2 + +#define DCW_INTRG_FLAGS_MPM 1 < (7 - 0) +#define DCW_INTRG_FLAGS_PPR 1 < (7 - 1) +#define DCW_INTRG_FLAGS_CRIT 1 < (7 - 2) + +/** + * struct dcw_intrg_data - Interrogate DCW data + * @format: Format. Should be %DCW_INTRG_FORMAT_DEFAULT + * @rc: Reason Code. Can be one of %DCW_INTRG_RC_UNSPECIFIED, + * %DCW_INTRG_RC_TIMEOUT + * @rcq: Reason Code Qualifier: Can be one of %DCW_INTRG_RCQ_UNSPECIFIED, + * %DCW_INTRG_RCQ_PRIMARY, %DCW_INTRG_RCQ_SECONDARY + * @lpm: Logical-Path Mask + * @pam: Path-Available Mask + * @pim: Path-Installed Mask + * @timeout: Timeout + * @flags: Flags. Can be an arithmetic OR of %DCW_INTRG_FLAGS_MPM, + * %DCW_INTRG_FLAGS_PPR, %DCW_INTRG_FLAGS_CRIT + * @time: Time + * @prog_id: Program Identifier + * @prog_data: Program-Dependent Data + */ +struct dcw_intrg_data { + u32 format:8; + u32 rc:8; + u32 rcq:8; + u32 lpm:8; + u32 pam:8; + u32 pim:8; + u32 timeout:16; + u32 flags:8; + u32 :24; + u32 :32; + u64 time; + u64 prog_id; + u8 prog_data[0]; +} __attribute__ ((packed)); + +#define DCW_FLAGS_CC 1 << (7 - 1) + +#define DCW_CMD_WRITE 0x01 +#define DCW_CMD_READ 0x02 +#define DCW_CMD_CONTROL 0x03 +#define DCW_CMD_SENSE 0x04 +#define DCW_CMD_SENSE_ID 0xe4 +#define DCW_CMD_INTRG 0x40 + +/** + * struct dcw - Device-Command Word (DCW) + * @cmd: Command Code. Can be one of %DCW_CMD_WRITE, %DCW_CMD_READ, + * %DCW_CMD_CONTROL, %DCW_CMD_SENSE, %DCW_CMD_SENSE_ID, %DCW_CMD_INTRG + * @flags: Flags. Can be an arithmetic OR of %DCW_FLAGS_CC + * @cd_count: Control-Data Count + * @count: Count + * @cd: Control Data + */ +struct dcw { + u32 cmd:8; + u32 flags:8; + u32 :8; + u32 cd_count:8; + u32 count; + u8 cd[0]; +} __attribute__ ((packed)); + +#define TCCB_FORMAT_DEFAULT 0x7f +#define TCCB_MAX_DCW 30 +#define TCCB_MAX_SIZE (sizeof(struct tccb_tcah) + \ + TCCB_MAX_DCW * sizeof(struct dcw) + \ + sizeof(struct tccb_tcat)) +#define TCCB_SAC_DEFAULT 0x1ffe +#define TCCB_SAC_INTRG 0x1fff + +/** + * struct tccb_tcah - Transport-Command-Area Header (TCAH) + * @format: Format. Should be %TCCB_FORMAT_DEFAULT + * @tcal: Transport-Command-Area Length + * @sac: Service-Action Code. Can be one of %TCCB_SAC_DEFAULT, %TCCB_SAC_INTRG + * @prio: Priority + */ +struct tccb_tcah { + u32 format:8; + u32 :24; + u32 :24; + u32 tcal:8; + u32 sac:16; + u32 :8; + u32 prio:8; + u32 :32; +} __attribute__ ((packed)); + +/** + * struct tccb_tcat - Transport-Command-Area Trailer (TCAT) + * @count: Transport Count + */ +struct tccb_tcat { + u32 :32; + u32 count; +} __attribute__ ((packed)); + +/** + * struct tccb - (partial) Transport-Command-Control Block (TCCB) + * @tcah: TCAH + * @tca: Transport-Command Area + */ +struct tccb { + struct tccb_tcah tcah; + u8 tca[0]; +} __attribute__ ((packed, aligned(8))); + +struct tcw *tcw_get_intrg(struct tcw *tcw); +void *tcw_get_data(struct tcw *tcw); +struct tccb *tcw_get_tccb(struct tcw *tcw); +struct tsb *tcw_get_tsb(struct tcw *tcw); + +void tcw_init(struct tcw *tcw, int r, int w); +void tcw_finalize(struct tcw *tcw, int num_tidaws); + +void tcw_set_intrg(struct tcw *tcw, struct tcw *intrg_tcw); +void tcw_set_data(struct tcw *tcw, void *data, int use_tidal); +void tcw_set_tccb(struct tcw *tcw, struct tccb *tccb); +void tcw_set_tsb(struct tcw *tcw, struct tsb *tsb); + +void tccb_init(struct tccb *tccb, size_t tccb_size, u32 sac); +void tsb_init(struct tsb *tsb); +struct dcw *tccb_add_dcw(struct tccb *tccb, size_t tccb_size, u8 cmd, u8 flags, + void *cd, u8 cd_count, u32 count); +struct tidaw *tcw_add_tidaw(struct tcw *tcw, int num_tidaws, u8 flags, + void *addr, u32 count); + +#endif /* _ASM_S390_FCX_H */ diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h new file mode 100644 index 00000000..b7931faa --- /dev/null +++ b/arch/s390/include/asm/ftrace.h @@ -0,0 +1,24 @@ +#ifndef _ASM_S390_FTRACE_H +#define _ASM_S390_FTRACE_H + +#ifndef __ASSEMBLY__ + +extern void _mcount(void); + +struct dyn_arch_ftrace { }; + +#define MCOUNT_ADDR ((long)_mcount) + +#ifdef CONFIG_64BIT +#define MCOUNT_INSN_SIZE 12 +#else +#define MCOUNT_INSN_SIZE 20 +#endif + +static inline unsigned long ftrace_call_adjust(unsigned long addr) +{ + return addr; +} + +#endif /* __ASSEMBLY__ */ +#endif /* _ASM_S390_FTRACE_H */ diff --git a/arch/s390/include/asm/futex.h b/arch/s390/include/asm/futex.h new file mode 100644 index 00000000..81cf36b6 --- /dev/null +++ b/arch/s390/include/asm/futex.h @@ -0,0 +1,52 @@ +#ifndef _ASM_S390_FUTEX_H +#define _ASM_S390_FUTEX_H + +#ifdef __KERNEL__ + +#include <linux/futex.h> +#include <linux/uaccess.h> +#include <asm/errno.h> + +static inline int futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr) +{ + int op = (encoded_op >> 28) & 7; + int cmp = (encoded_op >> 24) & 15; + int oparg = (encoded_op << 8) >> 20; + int cmparg = (encoded_op << 20) >> 20; + int oldval, ret; + + if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) + oparg = 1 << oparg; + + if (! access_ok (VERIFY_WRITE, uaddr, sizeof(u32))) + return -EFAULT; + + pagefault_disable(); + ret = uaccess.futex_atomic_op(op, uaddr, oparg, &oldval); + pagefault_enable(); + + if (!ret) { + switch (cmp) { + case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break; + case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break; + case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break; + case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break; + case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break; + case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break; + default: ret = -ENOSYS; + } + } + return ret; +} + +static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, + u32 oldval, u32 newval) +{ + if (! access_ok (VERIFY_WRITE, uaddr, sizeof(u32))) + return -EFAULT; + + return uaccess.futex_atomic_cmpxchg(uval, uaddr, oldval, newval); +} + +#endif /* __KERNEL__ */ +#endif /* _ASM_S390_FUTEX_H */ diff --git a/arch/s390/include/asm/hardirq.h b/arch/s390/include/asm/hardirq.h new file mode 100644 index 00000000..510ba9ef --- /dev/null +++ b/arch/s390/include/asm/hardirq.h @@ -0,0 +1,25 @@ +/* + * include/asm-s390/hardirq.h + * + * S390 version + * Copyright (C) 1999,2000 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), + * Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com) + * + * Derived from "include/asm-i386/hardirq.h" + */ + +#ifndef __ASM_HARDIRQ_H +#define __ASM_HARDIRQ_H + +#include <asm/lowcore.h> + +#define local_softirq_pending() (S390_lowcore.softirq_pending) + +#define __ARCH_IRQ_STAT +#define __ARCH_HAS_DO_SOFTIRQ +#define __ARCH_IRQ_EXIT_IRQS_DISABLED + +#define HARDIRQ_BITS 8 + +#endif /* __ASM_HARDIRQ_H */ diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h new file mode 100644 index 00000000..799ed0f1 --- /dev/null +++ b/arch/s390/include/asm/hugetlb.h @@ -0,0 +1,149 @@ +/* + * IBM System z Huge TLB Page Support for Kernel. + * + * Copyright IBM Corp. 2008 + * Author(s): Gerald Schaefer <gerald.schaefer@de.ibm.com> + */ + +#ifndef _ASM_S390_HUGETLB_H +#define _ASM_S390_HUGETLB_H + +#include <asm/page.h> +#include <asm/pgtable.h> + + +#define is_hugepage_only_range(mm, addr, len) 0 +#define hugetlb_free_pgd_range free_pgd_range + +void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte); + +/* + * If the arch doesn't supply something else, assume that hugepage + * size aligned regions are ok without further preparation. + */ +static inline int prepare_hugepage_range(struct file *file, + unsigned long addr, unsigned long len) +{ + if (len & ~HPAGE_MASK) + return -EINVAL; + if (addr & ~HPAGE_MASK) + return -EINVAL; + return 0; +} + +#define hugetlb_prefault_arch_hook(mm) do { } while (0) + +int arch_prepare_hugepage(struct page *page); +void arch_release_hugepage(struct page *page); + +static inline pte_t huge_pte_wrprotect(pte_t pte) +{ + pte_val(pte) |= _PAGE_RO; + return pte; +} + +static inline int huge_pte_none(pte_t pte) +{ + return (pte_val(pte) & _SEGMENT_ENTRY_INV) && + !(pte_val(pte) & _SEGMENT_ENTRY_RO); +} + +static inline pte_t huge_ptep_get(pte_t *ptep) +{ + pte_t pte = *ptep; + unsigned long mask; + + if (!MACHINE_HAS_HPAGE) { + ptep = (pte_t *) (pte_val(pte) & _SEGMENT_ENTRY_ORIGIN); + if (ptep) { + mask = pte_val(pte) & + (_SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO); + pte = pte_mkhuge(*ptep); + pte_val(pte) |= mask; + } + } + return pte; +} + +static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + pte_t pte = huge_ptep_get(ptep); + + mm->context.flush_mm = 1; + pmd_clear((pmd_t *) ptep); + return pte; +} + +static inline void __pmd_csp(pmd_t *pmdp) +{ + register unsigned long reg2 asm("2") = pmd_val(*pmdp); + register unsigned long reg3 asm("3") = pmd_val(*pmdp) | + _SEGMENT_ENTRY_INV; + register unsigned long reg4 asm("4") = ((unsigned long) pmdp) + 5; + + asm volatile( + " csp %1,%3" + : "=m" (*pmdp) + : "d" (reg2), "d" (reg3), "d" (reg4), "m" (*pmdp) : "cc"); + pmd_val(*pmdp) = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY; +} + +static inline void __pmd_idte(unsigned long address, pmd_t *pmdp) +{ + unsigned long sto = (unsigned long) pmdp - + pmd_index(address) * sizeof(pmd_t); + + if (!(pmd_val(*pmdp) & _SEGMENT_ENTRY_INV)) { + asm volatile( + " .insn rrf,0xb98e0000,%2,%3,0,0" + : "=m" (*pmdp) + : "m" (*pmdp), "a" (sto), + "a" ((address & HPAGE_MASK)) + ); + } + pmd_val(*pmdp) = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY; +} + +static inline void huge_ptep_invalidate(struct mm_struct *mm, + unsigned long address, pte_t *ptep) +{ + pmd_t *pmdp = (pmd_t *) ptep; + + if (MACHINE_HAS_IDTE) + __pmd_idte(address, pmdp); + else + __pmd_csp(pmdp); +} + +#define huge_ptep_set_access_flags(__vma, __addr, __ptep, __entry, __dirty) \ +({ \ + int __changed = !pte_same(huge_ptep_get(__ptep), __entry); \ + if (__changed) { \ + huge_ptep_invalidate((__vma)->vm_mm, __addr, __ptep); \ + set_huge_pte_at((__vma)->vm_mm, __addr, __ptep, __entry); \ + } \ + __changed; \ +}) + +#define huge_ptep_set_wrprotect(__mm, __addr, __ptep) \ +({ \ + pte_t __pte = huge_ptep_get(__ptep); \ + if (pte_write(__pte)) { \ + (__mm)->context.flush_mm = 1; \ + if (atomic_read(&(__mm)->context.attach_count) > 1 || \ + (__mm) != current->active_mm) \ + huge_ptep_invalidate(__mm, __addr, __ptep); \ + set_huge_pte_at(__mm, __addr, __ptep, \ + huge_pte_wrprotect(__pte)); \ + } \ +}) + +static inline void huge_ptep_clear_flush(struct vm_area_struct *vma, + unsigned long address, pte_t *ptep) +{ + huge_ptep_invalidate(vma->vm_mm, address, ptep); +} + +#endif /* _ASM_S390_HUGETLB_H */ diff --git a/arch/s390/include/asm/idals.h b/arch/s390/include/asm/idals.h new file mode 100644 index 00000000..aae276d0 --- /dev/null +++ b/arch/s390/include/asm/idals.h @@ -0,0 +1,249 @@ +/* + * File...........: linux/include/asm-s390x/idals.h + * Author(s)......: Holger Smolinski <Holger.Smolinski@de.ibm.com> + * Martin Schwidefsky <schwidefsky@de.ibm.com> + * Bugreports.to..: <Linux390@de.ibm.com> + * (C) IBM Corporation, IBM Deutschland Entwicklung GmbH, 2000a + + * History of changes + * 07/24/00 new file + * 05/04/02 code restructuring. + */ + +#ifndef _S390_IDALS_H +#define _S390_IDALS_H + +#include <linux/errno.h> +#include <linux/err.h> +#include <linux/types.h> +#include <linux/slab.h> +#include <asm/cio.h> +#include <asm/uaccess.h> + +#ifdef __s390x__ +#define IDA_SIZE_LOG 12 /* 11 for 2k , 12 for 4k */ +#else +#define IDA_SIZE_LOG 11 /* 11 for 2k , 12 for 4k */ +#endif +#define IDA_BLOCK_SIZE (1L<<IDA_SIZE_LOG) + +/* + * Test if an address/length pair needs an idal list. + */ +static inline int +idal_is_needed(void *vaddr, unsigned int length) +{ +#ifdef __s390x__ + return ((__pa(vaddr) + length - 1) >> 31) != 0; +#else + return 0; +#endif +} + + +/* + * Return the number of idal words needed for an address/length pair. + */ +static inline unsigned int idal_nr_words(void *vaddr, unsigned int length) +{ + return ((__pa(vaddr) & (IDA_BLOCK_SIZE-1)) + length + + (IDA_BLOCK_SIZE-1)) >> IDA_SIZE_LOG; +} + +/* + * Create the list of idal words for an address/length pair. + */ +static inline unsigned long *idal_create_words(unsigned long *idaws, + void *vaddr, unsigned int length) +{ + unsigned long paddr; + unsigned int cidaw; + + paddr = __pa(vaddr); + cidaw = ((paddr & (IDA_BLOCK_SIZE-1)) + length + + (IDA_BLOCK_SIZE-1)) >> IDA_SIZE_LOG; + *idaws++ = paddr; + paddr &= -IDA_BLOCK_SIZE; + while (--cidaw > 0) { + paddr += IDA_BLOCK_SIZE; + *idaws++ = paddr; + } + return idaws; +} + +/* + * Sets the address of the data in CCW. + * If necessary it allocates an IDAL and sets the appropriate flags. + */ +static inline int +set_normalized_cda(struct ccw1 * ccw, void *vaddr) +{ +#ifdef __s390x__ + unsigned int nridaws; + unsigned long *idal; + + if (ccw->flags & CCW_FLAG_IDA) + return -EINVAL; + nridaws = idal_nr_words(vaddr, ccw->count); + if (nridaws > 0) { + idal = kmalloc(nridaws * sizeof(unsigned long), + GFP_ATOMIC | GFP_DMA ); + if (idal == NULL) + return -ENOMEM; + idal_create_words(idal, vaddr, ccw->count); + ccw->flags |= CCW_FLAG_IDA; + vaddr = idal; + } +#endif + ccw->cda = (__u32)(unsigned long) vaddr; + return 0; +} + +/* + * Releases any allocated IDAL related to the CCW. + */ +static inline void +clear_normalized_cda(struct ccw1 * ccw) +{ +#ifdef __s390x__ + if (ccw->flags & CCW_FLAG_IDA) { + kfree((void *)(unsigned long) ccw->cda); + ccw->flags &= ~CCW_FLAG_IDA; + } +#endif + ccw->cda = 0; +} + +/* + * Idal buffer extension + */ +struct idal_buffer { + size_t size; + size_t page_order; + void *data[0]; +}; + +/* + * Allocate an idal buffer + */ +static inline struct idal_buffer * +idal_buffer_alloc(size_t size, int page_order) +{ + struct idal_buffer *ib; + int nr_chunks, nr_ptrs, i; + + nr_ptrs = (size + IDA_BLOCK_SIZE - 1) >> IDA_SIZE_LOG; + nr_chunks = (4096 << page_order) >> IDA_SIZE_LOG; + ib = kmalloc(sizeof(struct idal_buffer) + nr_ptrs*sizeof(void *), + GFP_DMA | GFP_KERNEL); + if (ib == NULL) + return ERR_PTR(-ENOMEM); + ib->size = size; + ib->page_order = page_order; + for (i = 0; i < nr_ptrs; i++) { + if ((i & (nr_chunks - 1)) != 0) { + ib->data[i] = ib->data[i-1] + IDA_BLOCK_SIZE; + continue; + } + ib->data[i] = (void *) + __get_free_pages(GFP_KERNEL, page_order); + if (ib->data[i] != NULL) + continue; + // Not enough memory + while (i >= nr_chunks) { + i -= nr_chunks; + free_pages((unsigned long) ib->data[i], + ib->page_order); + } + kfree(ib); + return ERR_PTR(-ENOMEM); + } + return ib; +} + +/* + * Free an idal buffer. + */ +static inline void +idal_buffer_free(struct idal_buffer *ib) +{ + int nr_chunks, nr_ptrs, i; + + nr_ptrs = (ib->size + IDA_BLOCK_SIZE - 1) >> IDA_SIZE_LOG; + nr_chunks = (4096 << ib->page_order) >> IDA_SIZE_LOG; + for (i = 0; i < nr_ptrs; i += nr_chunks) + free_pages((unsigned long) ib->data[i], ib->page_order); + kfree(ib); +} + +/* + * Test if a idal list is really needed. + */ +static inline int +__idal_buffer_is_needed(struct idal_buffer *ib) +{ +#ifdef __s390x__ + return ib->size > (4096ul << ib->page_order) || + idal_is_needed(ib->data[0], ib->size); +#else + return ib->size > (4096ul << ib->page_order); +#endif +} + +/* + * Set channel data address to idal buffer. + */ +static inline void +idal_buffer_set_cda(struct idal_buffer *ib, struct ccw1 *ccw) +{ + if (__idal_buffer_is_needed(ib)) { + // setup idals; + ccw->cda = (u32)(addr_t) ib->data; + ccw->flags |= CCW_FLAG_IDA; + } else + // we do not need idals - use direct addressing + ccw->cda = (u32)(addr_t) ib->data[0]; + ccw->count = ib->size; +} + +/* + * Copy count bytes from an idal buffer to user memory + */ +static inline size_t +idal_buffer_to_user(struct idal_buffer *ib, void __user *to, size_t count) +{ + size_t left; + int i; + + BUG_ON(count > ib->size); + for (i = 0; count > IDA_BLOCK_SIZE; i++) { + left = copy_to_user(to, ib->data[i], IDA_BLOCK_SIZE); + if (left) + return left + count - IDA_BLOCK_SIZE; + to = (void __user *) to + IDA_BLOCK_SIZE; + count -= IDA_BLOCK_SIZE; + } + return copy_to_user(to, ib->data[i], count); +} + +/* + * Copy count bytes from user memory to an idal buffer + */ +static inline size_t +idal_buffer_from_user(struct idal_buffer *ib, const void __user *from, size_t count) +{ + size_t left; + int i; + + BUG_ON(count > ib->size); + for (i = 0; count > IDA_BLOCK_SIZE; i++) { + left = copy_from_user(ib->data[i], from, IDA_BLOCK_SIZE); + if (left) + return left + count - IDA_BLOCK_SIZE; + from = (void __user *) from + IDA_BLOCK_SIZE; + count -= IDA_BLOCK_SIZE; + } + return copy_from_user(ib->data[i], from, count); +} + +#endif diff --git a/arch/s390/include/asm/io.h b/arch/s390/include/asm/io.h new file mode 100644 index 00000000..b7ff6afc --- /dev/null +++ b/arch/s390/include/asm/io.h @@ -0,0 +1,54 @@ +/* + * include/asm-s390/io.h + * + * S390 version + * Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) + * + * Derived from "include/asm-i386/io.h" + */ + +#ifndef _S390_IO_H +#define _S390_IO_H + +#ifdef __KERNEL__ + +#include <asm/page.h> + +#define IO_SPACE_LIMIT 0xffffffff + +/* + * Change virtual addresses to physical addresses and vv. + * These are pretty trivial + */ +static inline unsigned long virt_to_phys(volatile void * address) +{ + unsigned long real_address; + asm volatile( + " lra %0,0(%1)\n" + " jz 0f\n" + " la %0,0\n" + "0:" + : "=a" (real_address) : "a" (address) : "cc"); + return real_address; +} + +static inline void * phys_to_virt(unsigned long address) +{ + return (void *) address; +} + +/* + * Convert a physical pointer to a virtual kernel pointer for /dev/mem + * access + */ +#define xlate_dev_mem_ptr(p) __va(p) + +/* + * Convert a virtual cached pointer to an uncached pointer + */ +#define xlate_dev_kmem_ptr(p) p + +#endif /* __KERNEL__ */ + +#endif diff --git a/arch/s390/include/asm/ioctl.h b/arch/s390/include/asm/ioctl.h new file mode 100644 index 00000000..b279fe06 --- /dev/null +++ b/arch/s390/include/asm/ioctl.h @@ -0,0 +1 @@ +#include <asm-generic/ioctl.h> diff --git a/arch/s390/include/asm/ioctls.h b/arch/s390/include/asm/ioctls.h new file mode 100644 index 00000000..960a4c1e --- /dev/null +++ b/arch/s390/include/asm/ioctls.h @@ -0,0 +1,8 @@ +#ifndef __ARCH_S390_IOCTLS_H__ +#define __ARCH_S390_IOCTLS_H__ + +#define FIOQSIZE 0x545E + +#include <asm-generic/ioctls.h> + +#endif diff --git a/arch/s390/include/asm/ipcbuf.h b/arch/s390/include/asm/ipcbuf.h new file mode 100644 index 00000000..37f293d1 --- /dev/null +++ b/arch/s390/include/asm/ipcbuf.h @@ -0,0 +1,31 @@ +#ifndef __S390_IPCBUF_H__ +#define __S390_IPCBUF_H__ + +/* + * The user_ipc_perm structure for S/390 architecture. + * Note extra padding because this structure is passed back and forth + * between kernel and user space. + * + * Pad space is left for: + * - 32-bit mode_t and seq + * - 2 miscellaneous 32-bit values + */ + +struct ipc64_perm +{ + __kernel_key_t key; + __kernel_uid32_t uid; + __kernel_gid32_t gid; + __kernel_uid32_t cuid; + __kernel_gid32_t cgid; + __kernel_mode_t mode; + unsigned short __pad1; + unsigned short seq; +#ifndef __s390x__ + unsigned short __pad2; +#endif /* ! __s390x__ */ + unsigned long __unused1; + unsigned long __unused2; +}; + +#endif /* __S390_IPCBUF_H__ */ diff --git a/arch/s390/include/asm/ipl.h b/arch/s390/include/asm/ipl.h new file mode 100644 index 00000000..2bd6cb89 --- /dev/null +++ b/arch/s390/include/asm/ipl.h @@ -0,0 +1,174 @@ +/* + * s390 (re)ipl support + * + * Copyright IBM Corp. 2007 + */ + +#ifndef _ASM_S390_IPL_H +#define _ASM_S390_IPL_H + +#include <asm/types.h> +#include <asm/cio.h> +#include <asm/setup.h> + +#define IPL_PARMBLOCK_ORIGIN 0x2000 + +#define IPL_PARM_BLK_FCP_LEN (sizeof(struct ipl_list_hdr) + \ + sizeof(struct ipl_block_fcp)) + +#define IPL_PARM_BLK0_FCP_LEN (sizeof(struct ipl_block_fcp) + 8) + +#define IPL_PARM_BLK_CCW_LEN (sizeof(struct ipl_list_hdr) + \ + sizeof(struct ipl_block_ccw)) + +#define IPL_PARM_BLK0_CCW_LEN (sizeof(struct ipl_block_ccw) + 8) + +#define IPL_MAX_SUPPORTED_VERSION (0) + +#define IPL_PARMBLOCK_START ((struct ipl_parameter_block *) \ + IPL_PARMBLOCK_ORIGIN) +#define IPL_PARMBLOCK_SIZE (IPL_PARMBLOCK_START->hdr.len) + +struct ipl_list_hdr { + u32 len; + u8 reserved1[3]; + u8 version; + u32 blk0_len; + u8 pbt; + u8 flags; + u16 reserved2; +} __attribute__((packed)); + +struct ipl_block_fcp { + u8 reserved1[313-1]; + u8 opt; + u8 reserved2[3]; + u16 reserved3; + u16 devno; + u8 reserved4[4]; + u64 wwpn; + u64 lun; + u32 bootprog; + u8 reserved5[12]; + u64 br_lba; + u32 scp_data_len; + u8 reserved6[260]; + u8 scp_data[]; +} __attribute__((packed)); + +#define DIAG308_VMPARM_SIZE 64 +#define DIAG308_SCPDATA_SIZE (PAGE_SIZE - (sizeof(struct ipl_list_hdr) + \ + offsetof(struct ipl_block_fcp, scp_data))) + +struct ipl_block_ccw { + u8 load_parm[8]; + u8 reserved1[84]; + u8 reserved2[2]; + u16 devno; + u8 vm_flags; + u8 reserved3[3]; + u32 vm_parm_len; + u8 nss_name[8]; + u8 vm_parm[DIAG308_VMPARM_SIZE]; + u8 reserved4[8]; +} __attribute__((packed)); + +struct ipl_parameter_block { + struct ipl_list_hdr hdr; + union { + struct ipl_block_fcp fcp; + struct ipl_block_ccw ccw; + } ipl_info; +} __attribute__((packed,aligned(4096))); + +/* + * IPL validity flags + */ +extern u32 ipl_flags; +extern u32 dump_prefix_page; +extern unsigned int zfcpdump_prefix_array[]; + +extern void do_reipl(void); +extern void do_halt(void); +extern void do_poff(void); +extern void ipl_save_parameters(void); +extern void ipl_update_parameters(void); +extern size_t append_ipl_vmparm(char *, size_t); +extern size_t append_ipl_scpdata(char *, size_t); + +enum { + IPL_DEVNO_VALID = 1, + IPL_PARMBLOCK_VALID = 2, + IPL_NSS_VALID = 4, +}; + +enum ipl_type { + IPL_TYPE_UNKNOWN = 1, + IPL_TYPE_CCW = 2, + IPL_TYPE_FCP = 4, + IPL_TYPE_FCP_DUMP = 8, + IPL_TYPE_NSS = 16, +}; + +struct ipl_info +{ + enum ipl_type type; + union { + struct { + struct ccw_dev_id dev_id; + } ccw; + struct { + struct ccw_dev_id dev_id; + u64 wwpn; + u64 lun; + } fcp; + struct { + char name[NSS_NAME_SIZE + 1]; + } nss; + } data; +}; + +extern struct ipl_info ipl_info; +extern void setup_ipl(void); + +/* + * DIAG 308 support + */ +enum diag308_subcode { + DIAG308_REL_HSA = 2, + DIAG308_IPL = 3, + DIAG308_DUMP = 4, + DIAG308_SET = 5, + DIAG308_STORE = 6, +}; + +enum diag308_ipl_type { + DIAG308_IPL_TYPE_FCP = 0, + DIAG308_IPL_TYPE_CCW = 2, +}; + +enum diag308_opt { + DIAG308_IPL_OPT_IPL = 0x10, + DIAG308_IPL_OPT_DUMP = 0x20, +}; + +enum diag308_flags { + DIAG308_FLAGS_LP_VALID = 0x80, +}; + +enum diag308_vm_flags { + DIAG308_VM_FLAGS_NSS_VALID = 0x80, + DIAG308_VM_FLAGS_VP_VALID = 0x40, +}; + +enum diag308_rc { + DIAG308_RC_OK = 0x0001, + DIAG308_RC_NOCONFIG = 0x0102, +}; + +extern int diag308(unsigned long subcode, void *addr); +extern void diag308_reset(void); +extern void store_status(void); +extern void lgr_info_log(void); + +#endif /* _ASM_S390_IPL_H */ diff --git a/arch/s390/include/asm/irq.h b/arch/s390/include/asm/irq.h new file mode 100644 index 00000000..5289cacd --- /dev/null +++ b/arch/s390/include/asm/irq.h @@ -0,0 +1,51 @@ +#ifndef _ASM_IRQ_H +#define _ASM_IRQ_H + +#include <linux/hardirq.h> +#include <linux/types.h> + +enum interruption_class { + EXTERNAL_INTERRUPT, + IO_INTERRUPT, + EXTINT_CLK, + EXTINT_EXC, + EXTINT_EMS, + EXTINT_TMR, + EXTINT_TLA, + EXTINT_PFL, + EXTINT_DSD, + EXTINT_VRT, + EXTINT_SCP, + EXTINT_IUC, + EXTINT_CPM, + IOINT_CIO, + IOINT_QAI, + IOINT_DAS, + IOINT_C15, + IOINT_C70, + IOINT_TAP, + IOINT_VMR, + IOINT_LCS, + IOINT_CLW, + IOINT_CTC, + IOINT_APB, + IOINT_CSC, + NMI_NMI, + NR_IRQS, +}; + +struct ext_code { + unsigned short subcode; + unsigned short code; +}; + +typedef void (*ext_int_handler_t)(struct ext_code, unsigned int, unsigned long); + +int register_external_interrupt(u16 code, ext_int_handler_t handler); +int unregister_external_interrupt(u16 code, ext_int_handler_t handler); +void service_subclass_irq_register(void); +void service_subclass_irq_unregister(void); +void measurement_alert_subclass_register(void); +void measurement_alert_subclass_unregister(void); + +#endif /* _ASM_IRQ_H */ diff --git a/arch/s390/include/asm/irq_regs.h b/arch/s390/include/asm/irq_regs.h new file mode 100644 index 00000000..3dd9c0b7 --- /dev/null +++ b/arch/s390/include/asm/irq_regs.h @@ -0,0 +1 @@ +#include <asm-generic/irq_regs.h> diff --git a/arch/s390/include/asm/irqflags.h b/arch/s390/include/asm/irqflags.h new file mode 100644 index 00000000..38fdf451 --- /dev/null +++ b/arch/s390/include/asm/irqflags.h @@ -0,0 +1,72 @@ +/* + * Copyright IBM Corp. 2006,2010 + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> + */ + +#ifndef __ASM_IRQFLAGS_H +#define __ASM_IRQFLAGS_H + +#include <linux/types.h> + +/* store then OR system mask. */ +#define __arch_local_irq_stosm(__or) \ +({ \ + unsigned long __mask; \ + asm volatile( \ + " stosm %0,%1" \ + : "=Q" (__mask) : "i" (__or) : "memory"); \ + __mask; \ +}) + +/* store then AND system mask. */ +#define __arch_local_irq_stnsm(__and) \ +({ \ + unsigned long __mask; \ + asm volatile( \ + " stnsm %0,%1" \ + : "=Q" (__mask) : "i" (__and) : "memory"); \ + __mask; \ +}) + +/* set system mask. */ +static inline notrace void __arch_local_irq_ssm(unsigned long flags) +{ + asm volatile("ssm %0" : : "Q" (flags) : "memory"); +} + +static inline notrace unsigned long arch_local_save_flags(void) +{ + return __arch_local_irq_stosm(0x00); +} + +static inline notrace unsigned long arch_local_irq_save(void) +{ + return __arch_local_irq_stnsm(0xfc); +} + +static inline notrace void arch_local_irq_disable(void) +{ + arch_local_irq_save(); +} + +static inline notrace void arch_local_irq_enable(void) +{ + __arch_local_irq_stosm(0x03); +} + +static inline notrace void arch_local_irq_restore(unsigned long flags) +{ + __arch_local_irq_ssm(flags); +} + +static inline notrace bool arch_irqs_disabled_flags(unsigned long flags) +{ + return !(flags & (3UL << (BITS_PER_LONG - 8))); +} + +static inline notrace bool arch_irqs_disabled(void) +{ + return arch_irqs_disabled_flags(arch_local_save_flags()); +} + +#endif /* __ASM_IRQFLAGS_H */ diff --git a/arch/s390/include/asm/isc.h b/arch/s390/include/asm/isc.h new file mode 100644 index 00000000..1420a111 --- /dev/null +++ b/arch/s390/include/asm/isc.h @@ -0,0 +1,26 @@ +#ifndef _ASM_S390_ISC_H +#define _ASM_S390_ISC_H + +#include <linux/types.h> + +/* + * I/O interruption subclasses used by drivers. + * Please add all used iscs here so that it is possible to distribute + * isc usage between drivers. + * Reminder: 0 is highest priority, 7 lowest. + */ +#define MAX_ISC 7 + +/* Regular I/O interrupts. */ +#define IO_SCH_ISC 3 /* regular I/O subchannels */ +#define CONSOLE_ISC 1 /* console I/O subchannel */ +#define CHSC_SCH_ISC 7 /* CHSC subchannels */ +/* Adapter interrupts. */ +#define QDIO_AIRQ_ISC IO_SCH_ISC /* I/O subchannel in qdio mode */ +#define AP_ISC 6 /* adjunct processor (crypto) devices */ + +/* Functions for registration of I/O interruption subclasses */ +void isc_register(unsigned int isc); +void isc_unregister(unsigned int isc); + +#endif /* _ASM_S390_ISC_H */ diff --git a/arch/s390/include/asm/itcw.h b/arch/s390/include/asm/itcw.h new file mode 100644 index 00000000..fb1bedd3 --- /dev/null +++ b/arch/s390/include/asm/itcw.h @@ -0,0 +1,30 @@ +/* + * Functions for incremental construction of fcx enabled I/O control blocks. + * + * Copyright IBM Corp. 2008 + * Author(s): Peter Oberparleiter <peter.oberparleiter@de.ibm.com> + */ + +#ifndef _ASM_S390_ITCW_H +#define _ASM_S390_ITCW_H + +#include <linux/types.h> +#include <asm/fcx.h> + +#define ITCW_OP_READ 0 +#define ITCW_OP_WRITE 1 + +struct itcw; + +struct tcw *itcw_get_tcw(struct itcw *itcw); +size_t itcw_calc_size(int intrg, int max_tidaws, int intrg_max_tidaws); +struct itcw *itcw_init(void *buffer, size_t size, int op, int intrg, + int max_tidaws, int intrg_max_tidaws); +struct dcw *itcw_add_dcw(struct itcw *itcw, u8 cmd, u8 flags, void *cd, + u8 cd_count, u32 count); +struct tidaw *itcw_add_tidaw(struct itcw *itcw, u8 flags, void *addr, + u32 count); +void itcw_set_data(struct itcw *itcw, void *addr, int use_tidal); +void itcw_finalize(struct itcw *itcw); + +#endif /* _ASM_S390_ITCW_H */ diff --git a/arch/s390/include/asm/jump_label.h b/arch/s390/include/asm/jump_label.h new file mode 100644 index 00000000..6c32190d --- /dev/null +++ b/arch/s390/include/asm/jump_label.h @@ -0,0 +1,37 @@ +#ifndef _ASM_S390_JUMP_LABEL_H +#define _ASM_S390_JUMP_LABEL_H + +#include <linux/types.h> + +#define JUMP_LABEL_NOP_SIZE 6 + +#ifdef CONFIG_64BIT +#define ASM_PTR ".quad" +#define ASM_ALIGN ".balign 8" +#else +#define ASM_PTR ".long" +#define ASM_ALIGN ".balign 4" +#endif + +static __always_inline bool arch_static_branch(struct static_key *key) +{ + asm goto("0: brcl 0,0\n" + ".pushsection __jump_table, \"aw\"\n" + ASM_ALIGN "\n" + ASM_PTR " 0b, %l[label], %0\n" + ".popsection\n" + : : "X" (key) : : label); + return false; +label: + return true; +} + +typedef unsigned long jump_label_t; + +struct jump_entry { + jump_label_t code; + jump_label_t target; + jump_label_t key; +}; + +#endif diff --git a/arch/s390/include/asm/kdebug.h b/arch/s390/include/asm/kdebug.h new file mode 100644 index 00000000..5c1abd47 --- /dev/null +++ b/arch/s390/include/asm/kdebug.h @@ -0,0 +1,27 @@ +#ifndef _S390_KDEBUG_H +#define _S390_KDEBUG_H + +/* + * Feb 2006 Ported to s390 <grundym@us.ibm.com> + */ + +struct pt_regs; + +enum die_val { + DIE_OOPS = 1, + DIE_BPT, + DIE_SSTEP, + DIE_PANIC, + DIE_NMI, + DIE_DIE, + DIE_NMIWATCHDOG, + DIE_KERNELDEBUG, + DIE_TRAP, + DIE_GPF, + DIE_CALL, + DIE_NMI_IPI, +}; + +extern void die(struct pt_regs *, const char *); + +#endif diff --git a/arch/s390/include/asm/kexec.h b/arch/s390/include/asm/kexec.h new file mode 100644 index 00000000..3f30dac8 --- /dev/null +++ b/arch/s390/include/asm/kexec.h @@ -0,0 +1,67 @@ +/* + * include/asm-s390/kexec.h + * + * (C) Copyright IBM Corp. 2005 + * + * Author(s): Rolf Adelsberger <adelsberger@de.ibm.com> + * + */ + +#ifndef _S390_KEXEC_H +#define _S390_KEXEC_H + +#ifdef __KERNEL__ +#include <asm/page.h> +#endif +#include <asm/processor.h> +/* + * KEXEC_SOURCE_MEMORY_LIMIT maximum page get_free_page can return. + * I.e. Maximum page that is mapped directly into kernel memory, + * and kmap is not required. + */ + +/* Maximum physical address we can use pages from */ +#define KEXEC_SOURCE_MEMORY_LIMIT (-1UL) + +/* Maximum address we can reach in physical address mode */ +#define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL) + +/* Maximum address we can use for the control pages */ +/* Not more than 2GB */ +#define KEXEC_CONTROL_MEMORY_LIMIT (1UL<<31) + +/* Maximum address we can use for the crash control pages */ +#define KEXEC_CRASH_CONTROL_MEMORY_LIMIT (-1UL) + +/* Allocate one page for the pdp and the second for the code */ +#define KEXEC_CONTROL_PAGE_SIZE 4096 + +/* Alignment of crashkernel memory */ +#define KEXEC_CRASH_MEM_ALIGN HPAGE_SIZE + +/* The native architecture */ +#define KEXEC_ARCH KEXEC_ARCH_S390 + +/* + * Size for s390x ELF notes per CPU + * + * Seven notes plus zero note at the end: prstatus, fpregset, timer, + * tod_cmp, tod_reg, control regs, and prefix + */ +#define KEXEC_NOTE_BYTES \ + (ALIGN(sizeof(struct elf_note), 4) * 8 + \ + ALIGN(sizeof("CORE"), 4) * 7 + \ + ALIGN(sizeof(struct elf_prstatus), 4) + \ + ALIGN(sizeof(elf_fpregset_t), 4) + \ + ALIGN(sizeof(u64), 4) + \ + ALIGN(sizeof(u64), 4) + \ + ALIGN(sizeof(u32), 4) + \ + ALIGN(sizeof(u64) * 16, 4) + \ + ALIGN(sizeof(u32), 4) \ + ) + +/* Provide a dummy definition to avoid build failures. */ +static inline void crash_setup_regs(struct pt_regs *newregs, + struct pt_regs *oldregs) { } + +#endif /*_S390_KEXEC_H */ diff --git a/arch/s390/include/asm/kmap_types.h b/arch/s390/include/asm/kmap_types.h new file mode 100644 index 00000000..94ec3ee0 --- /dev/null +++ b/arch/s390/include/asm/kmap_types.h @@ -0,0 +1,8 @@ +#ifdef __KERNEL__ +#ifndef _ASM_KMAP_TYPES_H +#define _ASM_KMAP_TYPES_H + +#include <asm-generic/kmap_types.h> + +#endif +#endif /* __KERNEL__ */ diff --git a/arch/s390/include/asm/kprobes.h b/arch/s390/include/asm/kprobes.h new file mode 100644 index 00000000..a231a943 --- /dev/null +++ b/arch/s390/include/asm/kprobes.h @@ -0,0 +1,87 @@ +#ifndef _ASM_S390_KPROBES_H +#define _ASM_S390_KPROBES_H +/* + * Kernel Probes (KProbes) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2002, 2006 + * + * 2002-Oct Created by Vamsi Krishna S <vamsi_krishna@in.ibm.com> Kernel + * Probes initial implementation ( includes suggestions from + * Rusty Russell). + * 2004-Nov Modified for PPC64 by Ananth N Mavinakayanahalli + * <ananth@in.ibm.com> + * 2005-Dec Used as a template for s390 by Mike Grundy + * <grundym@us.ibm.com> + */ +#include <linux/types.h> +#include <linux/ptrace.h> +#include <linux/percpu.h> + +struct pt_regs; +struct kprobe; + +typedef u16 kprobe_opcode_t; +#define BREAKPOINT_INSTRUCTION 0x0002 + +/* Maximum instruction size is 3 (16bit) halfwords: */ +#define MAX_INSN_SIZE 0x0003 +#define MAX_STACK_SIZE 64 +#define MIN_STACK_SIZE(ADDR) (((MAX_STACK_SIZE) < \ + (((unsigned long)current_thread_info()) + THREAD_SIZE - (ADDR))) \ + ? (MAX_STACK_SIZE) \ + : (((unsigned long)current_thread_info()) + THREAD_SIZE - (ADDR))) + +#define kretprobe_blacklist_size 0 + +#define KPROBE_SWAP_INST 0x10 + +#define FIXUP_PSW_NORMAL 0x08 +#define FIXUP_BRANCH_NOT_TAKEN 0x04 +#define FIXUP_RETURN_REGISTER 0x02 +#define FIXUP_NOT_REQUIRED 0x01 + +/* Architecture specific copy of original instruction */ +struct arch_specific_insn { + /* copy of original instruction */ + kprobe_opcode_t insn[MAX_INSN_SIZE]; +}; + +struct prev_kprobe { + struct kprobe *kp; + unsigned long status; +}; + +/* per-cpu kprobe control block */ +struct kprobe_ctlblk { + unsigned long kprobe_status; + unsigned long kprobe_saved_imask; + unsigned long kprobe_saved_ctl[3]; + struct prev_kprobe prev_kprobe; + struct pt_regs jprobe_saved_regs; + kprobe_opcode_t jprobes_stack[MAX_STACK_SIZE]; +}; + +void arch_remove_kprobe(struct kprobe *p); +void kretprobe_trampoline(void); + +int kprobe_fault_handler(struct pt_regs *regs, int trapnr); +int kprobe_exceptions_notify(struct notifier_block *self, + unsigned long val, void *data); + +#define flush_insn_slot(p) do { } while (0) + +#endif /* _ASM_S390_KPROBES_H */ diff --git a/arch/s390/include/asm/kvm.h b/arch/s390/include/asm/kvm.h new file mode 100644 index 00000000..96076676 --- /dev/null +++ b/arch/s390/include/asm/kvm.h @@ -0,0 +1,55 @@ +#ifndef __LINUX_KVM_S390_H +#define __LINUX_KVM_S390_H +/* + * asm-s390/kvm.h - KVM s390 specific structures and definitions + * + * Copyright IBM Corp. 2008 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * Author(s): Carsten Otte <cotte@de.ibm.com> + * Christian Borntraeger <borntraeger@de.ibm.com> + */ +#include <linux/types.h> + +#define __KVM_S390 + +/* for KVM_GET_REGS and KVM_SET_REGS */ +struct kvm_regs { + /* general purpose regs for s390 */ + __u64 gprs[16]; +}; + +/* for KVM_GET_SREGS and KVM_SET_SREGS */ +struct kvm_sregs { + __u32 acrs[16]; + __u64 crs[16]; +}; + +/* for KVM_GET_FPU and KVM_SET_FPU */ +struct kvm_fpu { + __u32 fpc; + __u64 fprs[16]; +}; + +struct kvm_debug_exit_arch { +}; + +/* for KVM_SET_GUEST_DEBUG */ +struct kvm_guest_debug_arch { +}; + +#define KVM_SYNC_PREFIX (1UL << 0) +#define KVM_SYNC_GPRS (1UL << 1) +#define KVM_SYNC_ACRS (1UL << 2) +#define KVM_SYNC_CRS (1UL << 3) +/* definition of registers in kvm_run */ +struct kvm_sync_regs { + __u64 prefix; /* prefix register */ + __u64 gprs[16]; /* general purpose registers */ + __u32 acrs[16]; /* access registers */ + __u64 crs[16]; /* control registers */ +}; +#endif diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h new file mode 100644 index 00000000..73438728 --- /dev/null +++ b/arch/s390/include/asm/kvm_host.h @@ -0,0 +1,259 @@ +/* + * asm-s390/kvm_host.h - definition for kernel virtual machines on s390 + * + * Copyright IBM Corp. 2008,2009 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * Author(s): Carsten Otte <cotte@de.ibm.com> + */ + + +#ifndef ASM_KVM_HOST_H +#define ASM_KVM_HOST_H +#include <linux/hrtimer.h> +#include <linux/interrupt.h> +#include <linux/kvm_host.h> +#include <asm/debug.h> +#include <asm/cpu.h> + +#define KVM_MAX_VCPUS 64 +#define KVM_MEMORY_SLOTS 32 +/* memory slots that does not exposed to userspace */ +#define KVM_PRIVATE_MEM_SLOTS 4 + +struct sca_entry { + atomic_t scn; + __u32 reserved; + __u64 sda; + __u64 reserved2[2]; +} __attribute__((packed)); + + +struct sca_block { + __u64 ipte_control; + __u64 reserved[5]; + __u64 mcn; + __u64 reserved2; + struct sca_entry cpu[64]; +} __attribute__((packed)); + +#define KVM_NR_PAGE_SIZES 2 +#define KVM_HPAGE_GFN_SHIFT(x) (((x) - 1) * 8) +#define KVM_HPAGE_SHIFT(x) (PAGE_SHIFT + KVM_HPAGE_GFN_SHIFT(x)) +#define KVM_HPAGE_SIZE(x) (1UL << KVM_HPAGE_SHIFT(x)) +#define KVM_HPAGE_MASK(x) (~(KVM_HPAGE_SIZE(x) - 1)) +#define KVM_PAGES_PER_HPAGE(x) (KVM_HPAGE_SIZE(x) / PAGE_SIZE) + +#define CPUSTAT_STOPPED 0x80000000 +#define CPUSTAT_WAIT 0x10000000 +#define CPUSTAT_ECALL_PEND 0x08000000 +#define CPUSTAT_STOP_INT 0x04000000 +#define CPUSTAT_IO_INT 0x02000000 +#define CPUSTAT_EXT_INT 0x01000000 +#define CPUSTAT_RUNNING 0x00800000 +#define CPUSTAT_RETAINED 0x00400000 +#define CPUSTAT_TIMING_SUB 0x00020000 +#define CPUSTAT_SIE_SUB 0x00010000 +#define CPUSTAT_RRF 0x00008000 +#define CPUSTAT_SLSV 0x00004000 +#define CPUSTAT_SLSR 0x00002000 +#define CPUSTAT_ZARCH 0x00000800 +#define CPUSTAT_MCDS 0x00000100 +#define CPUSTAT_SM 0x00000080 +#define CPUSTAT_G 0x00000008 +#define CPUSTAT_J 0x00000002 +#define CPUSTAT_P 0x00000001 + +struct kvm_s390_sie_block { + atomic_t cpuflags; /* 0x0000 */ + __u32 prefix; /* 0x0004 */ + __u8 reserved8[32]; /* 0x0008 */ + __u64 cputm; /* 0x0028 */ + __u64 ckc; /* 0x0030 */ + __u64 epoch; /* 0x0038 */ + __u8 reserved40[4]; /* 0x0040 */ +#define LCTL_CR0 0x8000 + __u16 lctl; /* 0x0044 */ + __s16 icpua; /* 0x0046 */ + __u32 ictl; /* 0x0048 */ + __u32 eca; /* 0x004c */ + __u8 icptcode; /* 0x0050 */ + __u8 reserved51; /* 0x0051 */ + __u16 ihcpu; /* 0x0052 */ + __u8 reserved54[2]; /* 0x0054 */ + __u16 ipa; /* 0x0056 */ + __u32 ipb; /* 0x0058 */ + __u32 scaoh; /* 0x005c */ + __u8 reserved60; /* 0x0060 */ + __u8 ecb; /* 0x0061 */ + __u8 reserved62[2]; /* 0x0062 */ + __u32 scaol; /* 0x0064 */ + __u8 reserved68[4]; /* 0x0068 */ + __u32 todpr; /* 0x006c */ + __u8 reserved70[32]; /* 0x0070 */ + psw_t gpsw; /* 0x0090 */ + __u64 gg14; /* 0x00a0 */ + __u64 gg15; /* 0x00a8 */ + __u8 reservedb0[30]; /* 0x00b0 */ + __u16 iprcc; /* 0x00ce */ + __u8 reservedd0[48]; /* 0x00d0 */ + __u64 gcr[16]; /* 0x0100 */ + __u64 gbea; /* 0x0180 */ + __u8 reserved188[24]; /* 0x0188 */ + __u32 fac; /* 0x01a0 */ + __u8 reserved1a4[92]; /* 0x01a4 */ +} __attribute__((packed)); + +struct kvm_vcpu_stat { + u32 exit_userspace; + u32 exit_null; + u32 exit_external_request; + u32 exit_external_interrupt; + u32 exit_stop_request; + u32 exit_validity; + u32 exit_instruction; + u32 instruction_lctl; + u32 instruction_lctlg; + u32 exit_program_interruption; + u32 exit_instr_and_program; + u32 deliver_external_call; + u32 deliver_emergency_signal; + u32 deliver_service_signal; + u32 deliver_virtio_interrupt; + u32 deliver_stop_signal; + u32 deliver_prefix_signal; + u32 deliver_restart_signal; + u32 deliver_program_int; + u32 exit_wait_state; + u32 instruction_stidp; + u32 instruction_spx; + u32 instruction_stpx; + u32 instruction_stap; + u32 instruction_storage_key; + u32 instruction_stsch; + u32 instruction_chsc; + u32 instruction_stsi; + u32 instruction_stfl; + u32 instruction_tprot; + u32 instruction_sigp_sense; + u32 instruction_sigp_sense_running; + u32 instruction_sigp_external_call; + u32 instruction_sigp_emergency; + u32 instruction_sigp_stop; + u32 instruction_sigp_arch; + u32 instruction_sigp_prefix; + u32 instruction_sigp_restart; + u32 diagnose_10; + u32 diagnose_44; +}; + +struct kvm_s390_io_info { + __u16 subchannel_id; /* 0x0b8 */ + __u16 subchannel_nr; /* 0x0ba */ + __u32 io_int_parm; /* 0x0bc */ + __u32 io_int_word; /* 0x0c0 */ +}; + +struct kvm_s390_ext_info { + __u32 ext_params; + __u64 ext_params2; +}; + +#define PGM_OPERATION 0x01 +#define PGM_PRIVILEGED_OPERATION 0x02 +#define PGM_EXECUTE 0x03 +#define PGM_PROTECTION 0x04 +#define PGM_ADDRESSING 0x05 +#define PGM_SPECIFICATION 0x06 +#define PGM_DATA 0x07 + +struct kvm_s390_pgm_info { + __u16 code; +}; + +struct kvm_s390_prefix_info { + __u32 address; +}; + +struct kvm_s390_extcall_info { + __u16 code; +}; + +struct kvm_s390_emerg_info { + __u16 code; +}; + +struct kvm_s390_interrupt_info { + struct list_head list; + u64 type; + union { + struct kvm_s390_io_info io; + struct kvm_s390_ext_info ext; + struct kvm_s390_pgm_info pgm; + struct kvm_s390_emerg_info emerg; + struct kvm_s390_extcall_info extcall; + struct kvm_s390_prefix_info prefix; + }; +}; + +/* for local_interrupt.action_flags */ +#define ACTION_STORE_ON_STOP (1<<0) +#define ACTION_STOP_ON_STOP (1<<1) +#define ACTION_RELOADVCPU_ON_STOP (1<<2) + +struct kvm_s390_local_interrupt { + spinlock_t lock; + struct list_head list; + atomic_t active; + struct kvm_s390_float_interrupt *float_int; + int timer_due; /* event indicator for waitqueue below */ + wait_queue_head_t wq; + atomic_t *cpuflags; + unsigned int action_bits; +}; + +struct kvm_s390_float_interrupt { + spinlock_t lock; + struct list_head list; + atomic_t active; + int next_rr_cpu; + unsigned long idle_mask[(KVM_MAX_VCPUS + sizeof(long) - 1) + / sizeof(long)]; + struct kvm_s390_local_interrupt *local_int[KVM_MAX_VCPUS]; +}; + + +struct kvm_vcpu_arch { + struct kvm_s390_sie_block *sie_block; + s390_fp_regs host_fpregs; + unsigned int host_acrs[NUM_ACRS]; + s390_fp_regs guest_fpregs; + struct kvm_s390_local_interrupt local_int; + struct hrtimer ckc_timer; + struct tasklet_struct tasklet; + union { + struct cpuid cpu_id; + u64 stidp_data; + }; + struct gmap *gmap; +}; + +struct kvm_vm_stat { + u32 remote_tlb_flush; +}; + +struct kvm_arch_memory_slot { +}; + +struct kvm_arch{ + struct sca_block *sca; + debug_info_t *dbf; + struct kvm_s390_float_interrupt float_int; + struct gmap *gmap; +}; + +extern int sie64a(struct kvm_s390_sie_block *, u64 *); +#endif diff --git a/arch/s390/include/asm/kvm_para.h b/arch/s390/include/asm/kvm_para.h new file mode 100644 index 00000000..6964db22 --- /dev/null +++ b/arch/s390/include/asm/kvm_para.h @@ -0,0 +1,154 @@ +/* + * asm-s390/kvm_para.h - definition for paravirtual devices on s390 + * + * Copyright IBM Corp. 2008 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * Author(s): Christian Borntraeger <borntraeger@de.ibm.com> + */ + +#ifndef __S390_KVM_PARA_H +#define __S390_KVM_PARA_H + +#ifdef __KERNEL__ + +/* + * Hypercalls for KVM on s390. The calling convention is similar to the + * s390 ABI, so we use R2-R6 for parameters 1-5. In addition we use R1 + * as hypercall number and R7 as parameter 6. The return value is + * written to R2. We use the diagnose instruction as hypercall. To avoid + * conflicts with existing diagnoses for LPAR and z/VM, we do not use + * the instruction encoded number, but specify the number in R1 and + * use 0x500 as KVM hypercall + * + * Copyright IBM Corp. 2007,2008 + * Author(s): Christian Borntraeger <borntraeger@de.ibm.com> + * + * This work is licensed under the terms of the GNU GPL, version 2. + */ + +static inline long kvm_hypercall0(unsigned long nr) +{ + register unsigned long __nr asm("1") = nr; + register long __rc asm("2"); + + asm volatile ("diag 2,4,0x500\n" + : "=d" (__rc) : "d" (__nr): "memory", "cc"); + return __rc; +} + +static inline long kvm_hypercall1(unsigned long nr, unsigned long p1) +{ + register unsigned long __nr asm("1") = nr; + register unsigned long __p1 asm("2") = p1; + register long __rc asm("2"); + + asm volatile ("diag 2,4,0x500\n" + : "=d" (__rc) : "d" (__nr), "0" (__p1) : "memory", "cc"); + return __rc; +} + +static inline long kvm_hypercall2(unsigned long nr, unsigned long p1, + unsigned long p2) +{ + register unsigned long __nr asm("1") = nr; + register unsigned long __p1 asm("2") = p1; + register unsigned long __p2 asm("3") = p2; + register long __rc asm("2"); + + asm volatile ("diag 2,4,0x500\n" + : "=d" (__rc) : "d" (__nr), "0" (__p1), "d" (__p2) + : "memory", "cc"); + return __rc; +} + +static inline long kvm_hypercall3(unsigned long nr, unsigned long p1, + unsigned long p2, unsigned long p3) +{ + register unsigned long __nr asm("1") = nr; + register unsigned long __p1 asm("2") = p1; + register unsigned long __p2 asm("3") = p2; + register unsigned long __p3 asm("4") = p3; + register long __rc asm("2"); + + asm volatile ("diag 2,4,0x500\n" + : "=d" (__rc) : "d" (__nr), "0" (__p1), "d" (__p2), + "d" (__p3) : "memory", "cc"); + return __rc; +} + + +static inline long kvm_hypercall4(unsigned long nr, unsigned long p1, + unsigned long p2, unsigned long p3, + unsigned long p4) +{ + register unsigned long __nr asm("1") = nr; + register unsigned long __p1 asm("2") = p1; + register unsigned long __p2 asm("3") = p2; + register unsigned long __p3 asm("4") = p3; + register unsigned long __p4 asm("5") = p4; + register long __rc asm("2"); + + asm volatile ("diag 2,4,0x500\n" + : "=d" (__rc) : "d" (__nr), "0" (__p1), "d" (__p2), + "d" (__p3), "d" (__p4) : "memory", "cc"); + return __rc; +} + +static inline long kvm_hypercall5(unsigned long nr, unsigned long p1, + unsigned long p2, unsigned long p3, + unsigned long p4, unsigned long p5) +{ + register unsigned long __nr asm("1") = nr; + register unsigned long __p1 asm("2") = p1; + register unsigned long __p2 asm("3") = p2; + register unsigned long __p3 asm("4") = p3; + register unsigned long __p4 asm("5") = p4; + register unsigned long __p5 asm("6") = p5; + register long __rc asm("2"); + + asm volatile ("diag 2,4,0x500\n" + : "=d" (__rc) : "d" (__nr), "0" (__p1), "d" (__p2), + "d" (__p3), "d" (__p4), "d" (__p5) : "memory", "cc"); + return __rc; +} + +static inline long kvm_hypercall6(unsigned long nr, unsigned long p1, + unsigned long p2, unsigned long p3, + unsigned long p4, unsigned long p5, + unsigned long p6) +{ + register unsigned long __nr asm("1") = nr; + register unsigned long __p1 asm("2") = p1; + register unsigned long __p2 asm("3") = p2; + register unsigned long __p3 asm("4") = p3; + register unsigned long __p4 asm("5") = p4; + register unsigned long __p5 asm("6") = p5; + register unsigned long __p6 asm("7") = p6; + register long __rc asm("2"); + + asm volatile ("diag 2,4,0x500\n" + : "=d" (__rc) : "d" (__nr), "0" (__p1), "d" (__p2), + "d" (__p3), "d" (__p4), "d" (__p5), "d" (__p6) + : "memory", "cc"); + return __rc; +} + +/* kvm on s390 is always paravirtualization enabled */ +static inline int kvm_para_available(void) +{ + return 1; +} + +/* No feature bits are currently assigned for kvm on s390 */ +static inline unsigned int kvm_arch_para_features(void) +{ + return 0; +} + +#endif + +#endif /* __S390_KVM_PARA_H */ diff --git a/arch/s390/include/asm/kvm_virtio.h b/arch/s390/include/asm/kvm_virtio.h new file mode 100644 index 00000000..72f61418 --- /dev/null +++ b/arch/s390/include/asm/kvm_virtio.h @@ -0,0 +1,64 @@ +/* + * kvm_virtio.h - definition for virtio for kvm on s390 + * + * Copyright IBM Corp. 2008 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * Author(s): Christian Borntraeger <borntraeger@de.ibm.com> + */ + +#ifndef __KVM_S390_VIRTIO_H +#define __KVM_S390_VIRTIO_H + +#include <linux/types.h> + +struct kvm_device_desc { + /* The device type: console, network, disk etc. Type 0 terminates. */ + __u8 type; + /* The number of virtqueues (first in config array) */ + __u8 num_vq; + /* + * The number of bytes of feature bits. Multiply by 2: one for host + * features and one for guest acknowledgements. + */ + __u8 feature_len; + /* The number of bytes of the config array after virtqueues. */ + __u8 config_len; + /* A status byte, written by the Guest. */ + __u8 status; + __u8 config[0]; +}; + +/* + * This is how we expect the device configuration field for a virtqueue + * to be laid out in config space. + */ +struct kvm_vqconfig { + /* The token returned with an interrupt. Set by the guest */ + __u64 token; + /* The address of the virtio ring */ + __u64 address; + /* The number of entries in the virtio_ring */ + __u16 num; + +}; + +#define KVM_S390_VIRTIO_NOTIFY 0 +#define KVM_S390_VIRTIO_RESET 1 +#define KVM_S390_VIRTIO_SET_STATUS 2 + +/* The alignment to use between consumer and producer parts of vring. + * This is pagesize for historical reasons. */ +#define KVM_S390_VIRTIO_RING_ALIGN 4096 + + +/* These values are supposed to be in ext_params on an interrupt */ +#define VIRTIO_PARAM_MASK 0xff +#define VIRTIO_PARAM_VRING_INTERRUPT 0x0 +#define VIRTIO_PARAM_CONFIG_CHANGED 0x1 +#define VIRTIO_PARAM_DEV_ADD 0x2 + +#endif diff --git a/arch/s390/include/asm/linkage.h b/arch/s390/include/asm/linkage.h new file mode 100644 index 00000000..fc8a8284 --- /dev/null +++ b/arch/s390/include/asm/linkage.h @@ -0,0 +1,9 @@ +#ifndef __ASM_LINKAGE_H +#define __ASM_LINKAGE_H + +#include <linux/stringify.h> + +#define __ALIGN .align 4, 0x07 +#define __ALIGN_STR __stringify(__ALIGN) + +#endif diff --git a/arch/s390/include/asm/local.h b/arch/s390/include/asm/local.h new file mode 100644 index 00000000..c11c530f --- /dev/null +++ b/arch/s390/include/asm/local.h @@ -0,0 +1 @@ +#include <asm-generic/local.h> diff --git a/arch/s390/include/asm/local64.h b/arch/s390/include/asm/local64.h new file mode 100644 index 00000000..36c93b5c --- /dev/null +++ b/arch/s390/include/asm/local64.h @@ -0,0 +1 @@ +#include <asm-generic/local64.h> diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h new file mode 100644 index 00000000..47853deb --- /dev/null +++ b/arch/s390/include/asm/lowcore.h @@ -0,0 +1,361 @@ +/* + * Copyright IBM Corp. 1999,2012 + * Author(s): Hartmut Penner <hp@de.ibm.com>, + * Martin Schwidefsky <schwidefsky@de.ibm.com>, + * Denis Joseph Barrow, + */ + +#ifndef _ASM_S390_LOWCORE_H +#define _ASM_S390_LOWCORE_H + +#include <linux/types.h> +#include <asm/ptrace.h> +#include <asm/cpu.h> + +#ifdef CONFIG_32BIT + +#define LC_ORDER 0 +#define LC_PAGES 1 + +struct save_area { + u32 ext_save; + u64 timer; + u64 clk_cmp; + u8 pad1[24]; + u8 psw[8]; + u32 pref_reg; + u8 pad2[20]; + u32 acc_regs[16]; + u64 fp_regs[4]; + u32 gp_regs[16]; + u32 ctrl_regs[16]; +} __packed; + +struct _lowcore { + psw_t restart_psw; /* 0x0000 */ + psw_t restart_old_psw; /* 0x0008 */ + __u8 pad_0x0010[0x0014-0x0010]; /* 0x0010 */ + __u32 ipl_parmblock_ptr; /* 0x0014 */ + psw_t external_old_psw; /* 0x0018 */ + psw_t svc_old_psw; /* 0x0020 */ + psw_t program_old_psw; /* 0x0028 */ + psw_t mcck_old_psw; /* 0x0030 */ + psw_t io_old_psw; /* 0x0038 */ + __u8 pad_0x0040[0x0058-0x0040]; /* 0x0040 */ + psw_t external_new_psw; /* 0x0058 */ + psw_t svc_new_psw; /* 0x0060 */ + psw_t program_new_psw; /* 0x0068 */ + psw_t mcck_new_psw; /* 0x0070 */ + psw_t io_new_psw; /* 0x0078 */ + __u32 ext_params; /* 0x0080 */ + __u16 ext_cpu_addr; /* 0x0084 */ + __u16 ext_int_code; /* 0x0086 */ + __u16 svc_ilc; /* 0x0088 */ + __u16 svc_code; /* 0x008a */ + __u16 pgm_ilc; /* 0x008c */ + __u16 pgm_code; /* 0x008e */ + __u32 trans_exc_code; /* 0x0090 */ + __u16 mon_class_num; /* 0x0094 */ + __u16 per_perc_atmid; /* 0x0096 */ + __u32 per_address; /* 0x0098 */ + __u32 monitor_code; /* 0x009c */ + __u8 exc_access_id; /* 0x00a0 */ + __u8 per_access_id; /* 0x00a1 */ + __u8 op_access_id; /* 0x00a2 */ + __u8 ar_access_id; /* 0x00a3 */ + __u8 pad_0x00a4[0x00b8-0x00a4]; /* 0x00a4 */ + __u16 subchannel_id; /* 0x00b8 */ + __u16 subchannel_nr; /* 0x00ba */ + __u32 io_int_parm; /* 0x00bc */ + __u32 io_int_word; /* 0x00c0 */ + __u8 pad_0x00c4[0x00c8-0x00c4]; /* 0x00c4 */ + __u32 stfl_fac_list; /* 0x00c8 */ + __u8 pad_0x00cc[0x00d4-0x00cc]; /* 0x00cc */ + __u32 extended_save_area_addr; /* 0x00d4 */ + __u32 cpu_timer_save_area[2]; /* 0x00d8 */ + __u32 clock_comp_save_area[2]; /* 0x00e0 */ + __u32 mcck_interruption_code[2]; /* 0x00e8 */ + __u8 pad_0x00f0[0x00f4-0x00f0]; /* 0x00f0 */ + __u32 external_damage_code; /* 0x00f4 */ + __u32 failing_storage_address; /* 0x00f8 */ + __u8 pad_0x00fc[0x0100-0x00fc]; /* 0x00fc */ + psw_t psw_save_area; /* 0x0100 */ + __u32 prefixreg_save_area; /* 0x0108 */ + __u8 pad_0x010c[0x0120-0x010c]; /* 0x010c */ + + /* CPU register save area: defined by architecture */ + __u32 access_regs_save_area[16]; /* 0x0120 */ + __u32 floating_pt_save_area[8]; /* 0x0160 */ + __u32 gpregs_save_area[16]; /* 0x0180 */ + __u32 cregs_save_area[16]; /* 0x01c0 */ + + /* Save areas. */ + __u32 save_area_sync[8]; /* 0x0200 */ + __u32 save_area_async[8]; /* 0x0220 */ + __u32 save_area_restart[1]; /* 0x0240 */ + __u8 pad_0x0244[0x0248-0x0244]; /* 0x0244 */ + + /* Return psws. */ + psw_t return_psw; /* 0x0248 */ + psw_t return_mcck_psw; /* 0x0250 */ + + /* CPU time accounting values */ + __u64 sync_enter_timer; /* 0x0258 */ + __u64 async_enter_timer; /* 0x0260 */ + __u64 mcck_enter_timer; /* 0x0268 */ + __u64 exit_timer; /* 0x0270 */ + __u64 user_timer; /* 0x0278 */ + __u64 system_timer; /* 0x0280 */ + __u64 steal_timer; /* 0x0288 */ + __u64 last_update_timer; /* 0x0290 */ + __u64 last_update_clock; /* 0x0298 */ + __u64 int_clock; /* 0x02a0 */ + __u64 mcck_clock; /* 0x02a8 */ + __u64 clock_comparator; /* 0x02b0 */ + + /* Current process. */ + __u32 current_task; /* 0x02b8 */ + __u32 thread_info; /* 0x02bc */ + __u32 kernel_stack; /* 0x02c0 */ + + /* Interrupt, panic and restart stack. */ + __u32 async_stack; /* 0x02c4 */ + __u32 panic_stack; /* 0x02c8 */ + __u32 restart_stack; /* 0x02cc */ + + /* Restart function and parameter. */ + __u32 restart_fn; /* 0x02d0 */ + __u32 restart_data; /* 0x02d4 */ + __u32 restart_source; /* 0x02d8 */ + + /* Address space pointer. */ + __u32 kernel_asce; /* 0x02dc */ + __u32 user_asce; /* 0x02e0 */ + __u32 current_pid; /* 0x02e4 */ + + /* SMP info area */ + __u32 cpu_nr; /* 0x02e8 */ + __u32 softirq_pending; /* 0x02ec */ + __u32 percpu_offset; /* 0x02f0 */ + __u32 machine_flags; /* 0x02f4 */ + __u32 ftrace_func; /* 0x02f8 */ + __u8 pad_0x02fc[0x0300-0x02fc]; /* 0x02fc */ + + /* Interrupt response block */ + __u8 irb[64]; /* 0x0300 */ + + __u8 pad_0x0340[0x0e00-0x0340]; /* 0x0340 */ + + /* + * 0xe00 contains the address of the IPL Parameter Information + * block. Dump tools need IPIB for IPL after dump. + * Note: do not change the position of any fields in 0x0e00-0x0f00 + */ + __u32 ipib; /* 0x0e00 */ + __u32 ipib_checksum; /* 0x0e04 */ + __u32 vmcore_info; /* 0x0e08 */ + __u8 pad_0x0e0c[0x0e18-0x0e0c]; /* 0x0e0c */ + __u32 os_info; /* 0x0e18 */ + __u8 pad_0x0e1c[0x0f00-0x0e1c]; /* 0x0e1c */ + + /* Extended facility list */ + __u64 stfle_fac_list[32]; /* 0x0f00 */ +} __packed; + +#else /* CONFIG_32BIT */ + +#define LC_ORDER 1 +#define LC_PAGES 2 + +struct save_area { + u64 fp_regs[16]; + u64 gp_regs[16]; + u8 psw[16]; + u8 pad1[8]; + u32 pref_reg; + u32 fp_ctrl_reg; + u8 pad2[4]; + u32 tod_reg; + u64 timer; + u64 clk_cmp; + u8 pad3[8]; + u32 acc_regs[16]; + u64 ctrl_regs[16]; +} __packed; + +struct _lowcore { + __u8 pad_0x0000[0x0014-0x0000]; /* 0x0000 */ + __u32 ipl_parmblock_ptr; /* 0x0014 */ + __u8 pad_0x0018[0x0080-0x0018]; /* 0x0018 */ + __u32 ext_params; /* 0x0080 */ + __u16 ext_cpu_addr; /* 0x0084 */ + __u16 ext_int_code; /* 0x0086 */ + __u16 svc_ilc; /* 0x0088 */ + __u16 svc_code; /* 0x008a */ + __u16 pgm_ilc; /* 0x008c */ + __u16 pgm_code; /* 0x008e */ + __u32 data_exc_code; /* 0x0090 */ + __u16 mon_class_num; /* 0x0094 */ + __u16 per_perc_atmid; /* 0x0096 */ + __u64 per_address; /* 0x0098 */ + __u8 exc_access_id; /* 0x00a0 */ + __u8 per_access_id; /* 0x00a1 */ + __u8 op_access_id; /* 0x00a2 */ + __u8 ar_access_id; /* 0x00a3 */ + __u8 pad_0x00a4[0x00a8-0x00a4]; /* 0x00a4 */ + __u64 trans_exc_code; /* 0x00a8 */ + __u64 monitor_code; /* 0x00b0 */ + __u16 subchannel_id; /* 0x00b8 */ + __u16 subchannel_nr; /* 0x00ba */ + __u32 io_int_parm; /* 0x00bc */ + __u32 io_int_word; /* 0x00c0 */ + __u8 pad_0x00c4[0x00c8-0x00c4]; /* 0x00c4 */ + __u32 stfl_fac_list; /* 0x00c8 */ + __u8 pad_0x00cc[0x00e8-0x00cc]; /* 0x00cc */ + __u32 mcck_interruption_code[2]; /* 0x00e8 */ + __u8 pad_0x00f0[0x00f4-0x00f0]; /* 0x00f0 */ + __u32 external_damage_code; /* 0x00f4 */ + __u64 failing_storage_address; /* 0x00f8 */ + __u8 pad_0x0100[0x0110-0x0100]; /* 0x0100 */ + __u64 breaking_event_addr; /* 0x0110 */ + __u8 pad_0x0118[0x0120-0x0118]; /* 0x0118 */ + psw_t restart_old_psw; /* 0x0120 */ + psw_t external_old_psw; /* 0x0130 */ + psw_t svc_old_psw; /* 0x0140 */ + psw_t program_old_psw; /* 0x0150 */ + psw_t mcck_old_psw; /* 0x0160 */ + psw_t io_old_psw; /* 0x0170 */ + __u8 pad_0x0180[0x01a0-0x0180]; /* 0x0180 */ + psw_t restart_psw; /* 0x01a0 */ + psw_t external_new_psw; /* 0x01b0 */ + psw_t svc_new_psw; /* 0x01c0 */ + psw_t program_new_psw; /* 0x01d0 */ + psw_t mcck_new_psw; /* 0x01e0 */ + psw_t io_new_psw; /* 0x01f0 */ + + /* Save areas. */ + __u64 save_area_sync[8]; /* 0x0200 */ + __u64 save_area_async[8]; /* 0x0240 */ + __u64 save_area_restart[1]; /* 0x0280 */ + __u8 pad_0x0288[0x0290-0x0288]; /* 0x0288 */ + + /* Return psws. */ + psw_t return_psw; /* 0x0290 */ + psw_t return_mcck_psw; /* 0x02a0 */ + + /* CPU accounting and timing values. */ + __u64 sync_enter_timer; /* 0x02b0 */ + __u64 async_enter_timer; /* 0x02b8 */ + __u64 mcck_enter_timer; /* 0x02c0 */ + __u64 exit_timer; /* 0x02c8 */ + __u64 user_timer; /* 0x02d0 */ + __u64 system_timer; /* 0x02d8 */ + __u64 steal_timer; /* 0x02e0 */ + __u64 last_update_timer; /* 0x02e8 */ + __u64 last_update_clock; /* 0x02f0 */ + __u64 int_clock; /* 0x02f8 */ + __u64 mcck_clock; /* 0x0300 */ + __u64 clock_comparator; /* 0x0308 */ + + /* Current process. */ + __u64 current_task; /* 0x0310 */ + __u64 thread_info; /* 0x0318 */ + __u64 kernel_stack; /* 0x0320 */ + + /* Interrupt, panic and restart stack. */ + __u64 async_stack; /* 0x0328 */ + __u64 panic_stack; /* 0x0330 */ + __u64 restart_stack; /* 0x0338 */ + + /* Restart function and parameter. */ + __u64 restart_fn; /* 0x0340 */ + __u64 restart_data; /* 0x0348 */ + __u64 restart_source; /* 0x0350 */ + + /* Address space pointer. */ + __u64 kernel_asce; /* 0x0358 */ + __u64 user_asce; /* 0x0360 */ + __u64 current_pid; /* 0x0368 */ + + /* SMP info area */ + __u32 cpu_nr; /* 0x0370 */ + __u32 softirq_pending; /* 0x0374 */ + __u64 percpu_offset; /* 0x0378 */ + __u64 vdso_per_cpu_data; /* 0x0380 */ + __u64 machine_flags; /* 0x0388 */ + __u64 ftrace_func; /* 0x0390 */ + __u64 gmap; /* 0x0398 */ + __u8 pad_0x03a0[0x0400-0x03a0]; /* 0x03a0 */ + + /* Interrupt response block. */ + __u8 irb[64]; /* 0x0400 */ + + /* Per cpu primary space access list */ + __u32 paste[16]; /* 0x0440 */ + + __u8 pad_0x0480[0x0e00-0x0480]; /* 0x0480 */ + + /* + * 0xe00 contains the address of the IPL Parameter Information + * block. Dump tools need IPIB for IPL after dump. + * Note: do not change the position of any fields in 0x0e00-0x0f00 + */ + __u64 ipib; /* 0x0e00 */ + __u32 ipib_checksum; /* 0x0e08 */ + /* + * Because the vmcore_info pointer is not 8 byte aligned it never + * should not be accessed directly. For accessing the pointer, first + * copy it to a local pointer variable. + */ + __u8 vmcore_info[8]; /* 0x0e0c */ + __u8 pad_0x0e14[0x0e18-0x0e14]; /* 0x0e14 */ + __u64 os_info; /* 0x0e18 */ + __u8 pad_0x0e20[0x0f00-0x0e20]; /* 0x0e20 */ + + /* Extended facility list */ + __u64 stfle_fac_list[32]; /* 0x0f00 */ + __u8 pad_0x1000[0x11b8-0x1000]; /* 0x1000 */ + + /* 64 bit extparam used for pfault/diag 250: defined by architecture */ + __u64 ext_params2; /* 0x11B8 */ + __u8 pad_0x11c0[0x1200-0x11C0]; /* 0x11C0 */ + + /* CPU register save area: defined by architecture */ + __u64 floating_pt_save_area[16]; /* 0x1200 */ + __u64 gpregs_save_area[16]; /* 0x1280 */ + psw_t psw_save_area; /* 0x1300 */ + __u8 pad_0x1310[0x1318-0x1310]; /* 0x1310 */ + __u32 prefixreg_save_area; /* 0x1318 */ + __u32 fpt_creg_save_area; /* 0x131c */ + __u8 pad_0x1320[0x1324-0x1320]; /* 0x1320 */ + __u32 tod_progreg_save_area; /* 0x1324 */ + __u32 cpu_timer_save_area[2]; /* 0x1328 */ + __u32 clock_comp_save_area[2]; /* 0x1330 */ + __u8 pad_0x1338[0x1340-0x1338]; /* 0x1338 */ + __u32 access_regs_save_area[16]; /* 0x1340 */ + __u64 cregs_save_area[16]; /* 0x1380 */ + + /* align to the top of the prefix area */ + __u8 pad_0x1400[0x2000-0x1400]; /* 0x1400 */ +} __packed; + +#endif /* CONFIG_32BIT */ + +#define S390_lowcore (*((struct _lowcore *) 0)) + +extern struct _lowcore *lowcore_ptr[]; + +static inline void set_prefix(__u32 address) +{ + asm volatile("spx %0" : : "m" (address) : "memory"); +} + +static inline __u32 store_prefix(void) +{ + __u32 address; + + asm volatile("stpx %0" : "=m" (address)); + return address; +} + +#endif /* _ASM_S390_LOWCORE_H */ diff --git a/arch/s390/include/asm/mathemu.h b/arch/s390/include/asm/mathemu.h new file mode 100644 index 00000000..e8dd1ba8 --- /dev/null +++ b/arch/s390/include/asm/mathemu.h @@ -0,0 +1,29 @@ +/* + * arch/s390/kernel/mathemu.h + * IEEE floating point emulation. + * + * S390 version + * Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) + */ + +#ifndef __MATHEMU__ +#define __MATHEMU__ + +extern int math_emu_b3(__u8 *, struct pt_regs *); +extern int math_emu_ed(__u8 *, struct pt_regs *); +extern int math_emu_ldr(__u8 *); +extern int math_emu_ler(__u8 *); +extern int math_emu_std(__u8 *, struct pt_regs *); +extern int math_emu_ld(__u8 *, struct pt_regs *); +extern int math_emu_ste(__u8 *, struct pt_regs *); +extern int math_emu_le(__u8 *, struct pt_regs *); +extern int math_emu_lfpc(__u8 *, struct pt_regs *); +extern int math_emu_stfpc(__u8 *, struct pt_regs *); +extern int math_emu_srnm(__u8 *, struct pt_regs *); + +#endif /* __MATHEMU__ */ + + + + diff --git a/arch/s390/include/asm/mman.h b/arch/s390/include/asm/mman.h new file mode 100644 index 00000000..d49760e6 --- /dev/null +++ b/arch/s390/include/asm/mman.h @@ -0,0 +1,21 @@ +/* + * include/asm-s390/mman.h + * + * S390 version + * + * Derived from "include/asm-i386/mman.h" + */ + +#ifndef __S390_MMAN_H__ +#define __S390_MMAN_H__ + +#include <asm-generic/mman.h> + +#if defined(__KERNEL__) +#if !defined(__ASSEMBLY__) && defined(CONFIG_64BIT) +int s390_mmap_check(unsigned long addr, unsigned long len); +#define arch_mmap_check(addr,len,flags) s390_mmap_check(addr,len) +#endif +#endif + +#endif /* __S390_MMAN_H__ */ diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h new file mode 100644 index 00000000..63401787 --- /dev/null +++ b/arch/s390/include/asm/mmu.h @@ -0,0 +1,40 @@ +#ifndef __MMU_H +#define __MMU_H + +#include <linux/errno.h> + +typedef struct { + atomic_t attach_count; + unsigned int flush_mm; + spinlock_t list_lock; + struct list_head pgtable_list; + struct list_head gmap_list; + unsigned long asce_bits; + unsigned long asce_limit; + unsigned long vdso_base; + /* Cloned contexts will be created with extended page tables. */ + unsigned int alloc_pgste:1; + /* The mmu context has extended page tables. */ + unsigned int has_pgste:1; +} mm_context_t; + +#define INIT_MM_CONTEXT(name) \ + .context.list_lock = __SPIN_LOCK_UNLOCKED(name.context.list_lock), \ + .context.pgtable_list = LIST_HEAD_INIT(name.context.pgtable_list), \ + .context.gmap_list = LIST_HEAD_INIT(name.context.gmap_list), + +static inline int tprot(unsigned long addr) +{ + int rc = -EFAULT; + + asm volatile( + " tprot 0(%1),0\n" + "0: ipm %0\n" + " srl %0,28\n" + "1:\n" + EX_TABLE(0b,1b) + : "+d" (rc) : "a" (addr) : "cc"); + return rc; +} + +#endif diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h new file mode 100644 index 00000000..5d09e405 --- /dev/null +++ b/arch/s390/include/asm/mmu_context.h @@ -0,0 +1,96 @@ +/* + * include/asm-s390/mmu_context.h + * + * S390 version + * + * Derived from "include/asm-i386/mmu_context.h" + */ + +#ifndef __S390_MMU_CONTEXT_H +#define __S390_MMU_CONTEXT_H + +#include <asm/pgalloc.h> +#include <asm/uaccess.h> +#include <asm/tlbflush.h> +#include <asm/ctl_reg.h> +#include <asm-generic/mm_hooks.h> + +static inline int init_new_context(struct task_struct *tsk, + struct mm_struct *mm) +{ + atomic_set(&mm->context.attach_count, 0); + mm->context.flush_mm = 0; + mm->context.asce_bits = _ASCE_TABLE_LENGTH | _ASCE_USER_BITS; +#ifdef CONFIG_64BIT + mm->context.asce_bits |= _ASCE_TYPE_REGION3; +#endif + if (current->mm && current->mm->context.alloc_pgste) { + /* + * alloc_pgste indicates, that any NEW context will be created + * with extended page tables. The old context is unchanged. The + * page table allocation and the page table operations will + * look at has_pgste to distinguish normal and extended page + * tables. The only way to create extended page tables is to + * set alloc_pgste and then create a new context (e.g. dup_mm). + * The page table allocation is called after init_new_context + * and if has_pgste is set, it will create extended page + * tables. + */ + mm->context.has_pgste = 1; + mm->context.alloc_pgste = 1; + } else { + mm->context.has_pgste = 0; + mm->context.alloc_pgste = 0; + } + mm->context.asce_limit = STACK_TOP_MAX; + crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm)); + return 0; +} + +#define destroy_context(mm) do { } while (0) + +#ifndef __s390x__ +#define LCTL_OPCODE "lctl" +#else +#define LCTL_OPCODE "lctlg" +#endif + +static inline void update_mm(struct mm_struct *mm, struct task_struct *tsk) +{ + pgd_t *pgd = mm->pgd; + + S390_lowcore.user_asce = mm->context.asce_bits | __pa(pgd); + if (user_mode != HOME_SPACE_MODE) { + /* Load primary space page table origin. */ + asm volatile(LCTL_OPCODE" 1,1,%0\n" + : : "m" (S390_lowcore.user_asce) ); + } else + /* Load home space page table origin. */ + asm volatile(LCTL_OPCODE" 13,13,%0" + : : "m" (S390_lowcore.user_asce) ); + set_fs(current->thread.mm_segment); +} + +static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, + struct task_struct *tsk) +{ + cpumask_set_cpu(smp_processor_id(), mm_cpumask(next)); + update_mm(next, tsk); + atomic_dec(&prev->context.attach_count); + WARN_ON(atomic_read(&prev->context.attach_count) < 0); + atomic_inc(&next->context.attach_count); + /* Check for TLBs not flushed yet */ + if (next->context.flush_mm) + __tlb_flush_mm(next); +} + +#define enter_lazy_tlb(mm,tsk) do { } while (0) +#define deactivate_mm(tsk,mm) do { } while (0) + +static inline void activate_mm(struct mm_struct *prev, + struct mm_struct *next) +{ + switch_mm(prev, next, current); +} + +#endif /* __S390_MMU_CONTEXT_H */ diff --git a/arch/s390/include/asm/module.h b/arch/s390/include/asm/module.h new file mode 100644 index 00000000..1cc1c5af --- /dev/null +++ b/arch/s390/include/asm/module.h @@ -0,0 +1,46 @@ +#ifndef _ASM_S390_MODULE_H +#define _ASM_S390_MODULE_H +/* + * This file contains the s390 architecture specific module code. + */ + +struct mod_arch_syminfo +{ + unsigned long got_offset; + unsigned long plt_offset; + int got_initialized; + int plt_initialized; +}; + +struct mod_arch_specific +{ + /* Starting offset of got in the module core memory. */ + unsigned long got_offset; + /* Starting offset of plt in the module core memory. */ + unsigned long plt_offset; + /* Size of the got. */ + unsigned long got_size; + /* Size of the plt. */ + unsigned long plt_size; + /* Number of symbols in syminfo. */ + int nsyms; + /* Additional symbol information (got and plt offsets). */ + struct mod_arch_syminfo *syminfo; +}; + +#ifdef __s390x__ +#define ElfW(x) Elf64_ ## x +#define ELFW(x) ELF64_ ## x +#else +#define ElfW(x) Elf32_ ## x +#define ELFW(x) ELF32_ ## x +#endif + +#define Elf_Addr ElfW(Addr) +#define Elf_Rela ElfW(Rela) +#define Elf_Shdr ElfW(Shdr) +#define Elf_Sym ElfW(Sym) +#define Elf_Ehdr ElfW(Ehdr) +#define ELF_R_SYM ELFW(R_SYM) +#define ELF_R_TYPE ELFW(R_TYPE) +#endif /* _ASM_S390_MODULE_H */ diff --git a/arch/s390/include/asm/monwriter.h b/arch/s390/include/asm/monwriter.h new file mode 100644 index 00000000..f0cbf96c --- /dev/null +++ b/arch/s390/include/asm/monwriter.h @@ -0,0 +1,33 @@ +/* + * include/asm-s390/monwriter.h + * + * Copyright (C) IBM Corp. 2006 + * Character device driver for writing z/VM APPLDATA monitor records + * Version 1.0 + * Author(s): Melissa Howland <melissah@us.ibm.com> + * + */ + +#ifndef _ASM_390_MONWRITER_H +#define _ASM_390_MONWRITER_H + +/* mon_function values */ +#define MONWRITE_START_INTERVAL 0x00 /* start interval recording */ +#define MONWRITE_STOP_INTERVAL 0x01 /* stop interval or config recording */ +#define MONWRITE_GEN_EVENT 0x02 /* generate event record */ +#define MONWRITE_START_CONFIG 0x03 /* start configuration recording */ + +/* the header the app uses in its write() data */ +struct monwrite_hdr { + unsigned char mon_function; + unsigned short applid; + unsigned char record_num; + unsigned short version; + unsigned short release; + unsigned short mod_level; + unsigned short datalen; + unsigned char hdrlen; + +} __attribute__((packed)); + +#endif /* _ASM_390_MONWRITER_H */ diff --git a/arch/s390/include/asm/msgbuf.h b/arch/s390/include/asm/msgbuf.h new file mode 100644 index 00000000..1bbdee92 --- /dev/null +++ b/arch/s390/include/asm/msgbuf.h @@ -0,0 +1,37 @@ +#ifndef _S390_MSGBUF_H +#define _S390_MSGBUF_H + +/* + * The msqid64_ds structure for S/390 architecture. + * Note extra padding because this structure is passed back and forth + * between kernel and user space. + * + * Pad space is left for: + * - 64-bit time_t to solve y2038 problem + * - 2 miscellaneous 32-bit values + */ + +struct msqid64_ds { + struct ipc64_perm msg_perm; + __kernel_time_t msg_stime; /* last msgsnd time */ +#ifndef __s390x__ + unsigned long __unused1; +#endif /* ! __s390x__ */ + __kernel_time_t msg_rtime; /* last msgrcv time */ +#ifndef __s390x__ + unsigned long __unused2; +#endif /* ! __s390x__ */ + __kernel_time_t msg_ctime; /* last change time */ +#ifndef __s390x__ + unsigned long __unused3; +#endif /* ! __s390x__ */ + unsigned long msg_cbytes; /* current number of bytes on queue */ + unsigned long msg_qnum; /* number of messages in queue */ + unsigned long msg_qbytes; /* max number of bytes on queue */ + __kernel_pid_t msg_lspid; /* pid of last msgsnd */ + __kernel_pid_t msg_lrpid; /* last receive pid */ + unsigned long __unused4; + unsigned long __unused5; +}; + +#endif /* _S390_MSGBUF_H */ diff --git a/arch/s390/include/asm/mutex.h b/arch/s390/include/asm/mutex.h new file mode 100644 index 00000000..688271f5 --- /dev/null +++ b/arch/s390/include/asm/mutex.h @@ -0,0 +1,11 @@ +/* + * Pull in the generic implementation for the mutex fastpath. + * + * TODO: implement optimized primitives instead, or leave the generic + * implementation in place, or pick the atomic_xchg() based generic + * implementation. (see asm-generic/mutex-xchg.h for details) + */ + +#include <asm-generic/mutex-dec.h> + +#define arch_mutex_cpu_relax() barrier() diff --git a/arch/s390/include/asm/nmi.h b/arch/s390/include/asm/nmi.h new file mode 100644 index 00000000..f4b60441 --- /dev/null +++ b/arch/s390/include/asm/nmi.h @@ -0,0 +1,66 @@ +/* + * Machine check handler definitions + * + * Copyright IBM Corp. 2000,2009 + * Author(s): Ingo Adlung <adlung@de.ibm.com>, + * Martin Schwidefsky <schwidefsky@de.ibm.com>, + * Cornelia Huck <cornelia.huck@de.ibm.com>, + * Heiko Carstens <heiko.carstens@de.ibm.com>, + */ + +#ifndef _ASM_S390_NMI_H +#define _ASM_S390_NMI_H + +#include <linux/types.h> + +struct mci { + __u32 sd : 1; /* 00 system damage */ + __u32 pd : 1; /* 01 instruction-processing damage */ + __u32 sr : 1; /* 02 system recovery */ + __u32 : 1; /* 03 */ + __u32 cd : 1; /* 04 timing-facility damage */ + __u32 ed : 1; /* 05 external damage */ + __u32 : 1; /* 06 */ + __u32 dg : 1; /* 07 degradation */ + __u32 w : 1; /* 08 warning pending */ + __u32 cp : 1; /* 09 channel-report pending */ + __u32 sp : 1; /* 10 service-processor damage */ + __u32 ck : 1; /* 11 channel-subsystem damage */ + __u32 : 2; /* 12-13 */ + __u32 b : 1; /* 14 backed up */ + __u32 : 1; /* 15 */ + __u32 se : 1; /* 16 storage error uncorrected */ + __u32 sc : 1; /* 17 storage error corrected */ + __u32 ke : 1; /* 18 storage-key error uncorrected */ + __u32 ds : 1; /* 19 storage degradation */ + __u32 wp : 1; /* 20 psw mwp validity */ + __u32 ms : 1; /* 21 psw mask and key validity */ + __u32 pm : 1; /* 22 psw program mask and cc validity */ + __u32 ia : 1; /* 23 psw instruction address validity */ + __u32 fa : 1; /* 24 failing storage address validity */ + __u32 : 1; /* 25 */ + __u32 ec : 1; /* 26 external damage code validity */ + __u32 fp : 1; /* 27 floating point register validity */ + __u32 gr : 1; /* 28 general register validity */ + __u32 cr : 1; /* 29 control register validity */ + __u32 : 1; /* 30 */ + __u32 st : 1; /* 31 storage logical validity */ + __u32 ie : 1; /* 32 indirect storage error */ + __u32 ar : 1; /* 33 access register validity */ + __u32 da : 1; /* 34 delayed access exception */ + __u32 : 7; /* 35-41 */ + __u32 pr : 1; /* 42 tod programmable register validity */ + __u32 fc : 1; /* 43 fp control register validity */ + __u32 ap : 1; /* 44 ancillary report */ + __u32 : 1; /* 45 */ + __u32 ct : 1; /* 46 cpu timer validity */ + __u32 cc : 1; /* 47 clock comparator validity */ + __u32 : 16; /* 47-63 */ +}; + +struct pt_regs; + +extern void s390_handle_mcck(void); +extern void s390_do_machine_check(struct pt_regs *regs); + +#endif /* _ASM_S390_NMI_H */ diff --git a/arch/s390/include/asm/os_info.h b/arch/s390/include/asm/os_info.h new file mode 100644 index 00000000..d07518af --- /dev/null +++ b/arch/s390/include/asm/os_info.h @@ -0,0 +1,50 @@ +/* + * OS info memory interface + * + * Copyright IBM Corp. 2012 + * Author(s): Michael Holzheu <holzheu@linux.vnet.ibm.com> + */ +#ifndef _ASM_S390_OS_INFO_H +#define _ASM_S390_OS_INFO_H + +#define OS_INFO_VERSION_MAJOR 1 +#define OS_INFO_VERSION_MINOR 1 +#define OS_INFO_MAGIC 0x4f53494e464f535aULL /* OSINFOSZ */ + +#define OS_INFO_VMCOREINFO 0 +#define OS_INFO_REIPL_BLOCK 1 +#define OS_INFO_INIT_FN 2 + +struct os_info_entry { + u64 addr; + u64 size; + u32 csum; +} __packed; + +struct os_info { + u64 magic; + u32 csum; + u16 version_major; + u16 version_minor; + u64 crashkernel_addr; + u64 crashkernel_size; + struct os_info_entry entry[3]; + u8 reserved[4004]; +} __packed; + +void os_info_init(void); +void os_info_entry_add(int nr, void *ptr, u64 len); +void os_info_crashkernel_add(unsigned long base, unsigned long size); +u32 os_info_csum(struct os_info *os_info); + +#ifdef CONFIG_CRASH_DUMP +void *os_info_old_entry(int nr, unsigned long *size); +int copy_from_oldmem(void *dest, void *src, size_t count); +#else +static inline void *os_info_old_entry(int nr, unsigned long *size) +{ + return NULL; +} +#endif + +#endif /* _ASM_S390_OS_INFO_H */ diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h new file mode 100644 index 00000000..f7ec548c --- /dev/null +++ b/arch/s390/include/asm/page.h @@ -0,0 +1,208 @@ +/* + * include/asm-s390/page.h + * + * S390 version + * Copyright (C) 1999,2000 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Author(s): Hartmut Penner (hp@de.ibm.com) + */ + +#ifndef _S390_PAGE_H +#define _S390_PAGE_H + +#include <linux/const.h> +#include <asm/types.h> + +/* PAGE_SHIFT determines the page size */ +#define PAGE_SHIFT 12 +#define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT) +#define PAGE_MASK (~(PAGE_SIZE-1)) +#define PAGE_DEFAULT_ACC 0 +#define PAGE_DEFAULT_KEY (PAGE_DEFAULT_ACC << 4) + +#define HPAGE_SHIFT 20 +#define HPAGE_SIZE (1UL << HPAGE_SHIFT) +#define HPAGE_MASK (~(HPAGE_SIZE - 1)) +#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) + +#define ARCH_HAS_SETCLEAR_HUGE_PTE +#define ARCH_HAS_HUGE_PTE_TYPE +#define ARCH_HAS_PREPARE_HUGEPAGE +#define ARCH_HAS_HUGEPAGE_CLEAR_FLUSH + +#include <asm/setup.h> +#ifndef __ASSEMBLY__ + +static inline void clear_page(void *page) +{ + if (MACHINE_HAS_PFMF) { + asm volatile( + " .insn rre,0xb9af0000,%0,%1" + : : "d" (0x10000), "a" (page) : "memory", "cc"); + } else { + register unsigned long reg1 asm ("1") = 0; + register void *reg2 asm ("2") = page; + register unsigned long reg3 asm ("3") = 4096; + asm volatile( + " mvcl 2,0" + : "+d" (reg2), "+d" (reg3) : "d" (reg1) + : "memory", "cc"); + } +} + +static inline void copy_page(void *to, void *from) +{ + if (MACHINE_HAS_MVPG) { + register unsigned long reg0 asm ("0") = 0; + asm volatile( + " mvpg %0,%1" + : : "a" (to), "a" (from), "d" (reg0) + : "memory", "cc"); + } else + asm volatile( + " mvc 0(256,%0),0(%1)\n" + " mvc 256(256,%0),256(%1)\n" + " mvc 512(256,%0),512(%1)\n" + " mvc 768(256,%0),768(%1)\n" + " mvc 1024(256,%0),1024(%1)\n" + " mvc 1280(256,%0),1280(%1)\n" + " mvc 1536(256,%0),1536(%1)\n" + " mvc 1792(256,%0),1792(%1)\n" + " mvc 2048(256,%0),2048(%1)\n" + " mvc 2304(256,%0),2304(%1)\n" + " mvc 2560(256,%0),2560(%1)\n" + " mvc 2816(256,%0),2816(%1)\n" + " mvc 3072(256,%0),3072(%1)\n" + " mvc 3328(256,%0),3328(%1)\n" + " mvc 3584(256,%0),3584(%1)\n" + " mvc 3840(256,%0),3840(%1)\n" + : : "a" (to), "a" (from) : "memory"); +} + +#define clear_user_page(page, vaddr, pg) clear_page(page) +#define copy_user_page(to, from, vaddr, pg) copy_page(to, from) + +#define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \ + alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr) +#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE + +/* + * These are used to make use of C type-checking.. + */ + +typedef struct { unsigned long pgprot; } pgprot_t; +typedef struct { unsigned long pgste; } pgste_t; +typedef struct { unsigned long pte; } pte_t; +typedef struct { unsigned long pmd; } pmd_t; +typedef struct { unsigned long pud; } pud_t; +typedef struct { unsigned long pgd; } pgd_t; +typedef pte_t *pgtable_t; + +#define pgprot_val(x) ((x).pgprot) +#define pgste_val(x) ((x).pgste) +#define pte_val(x) ((x).pte) +#define pmd_val(x) ((x).pmd) +#define pud_val(x) ((x).pud) +#define pgd_val(x) ((x).pgd) + +#define __pgste(x) ((pgste_t) { (x) } ) +#define __pte(x) ((pte_t) { (x) } ) +#define __pmd(x) ((pmd_t) { (x) } ) +#define __pud(x) ((pud_t) { (x) } ) +#define __pgd(x) ((pgd_t) { (x) } ) +#define __pgprot(x) ((pgprot_t) { (x) } ) + +static inline void page_set_storage_key(unsigned long addr, + unsigned char skey, int mapped) +{ + if (!mapped) + asm volatile(".insn rrf,0xb22b0000,%0,%1,8,0" + : : "d" (skey), "a" (addr)); + else + asm volatile("sske %0,%1" : : "d" (skey), "a" (addr)); +} + +static inline unsigned char page_get_storage_key(unsigned long addr) +{ + unsigned char skey; + + asm volatile("iske %0,%1" : "=d" (skey) : "a" (addr)); + return skey; +} + +static inline int page_reset_referenced(unsigned long addr) +{ + unsigned int ipm; + + asm volatile( + " rrbe 0,%1\n" + " ipm %0\n" + : "=d" (ipm) : "a" (addr) : "cc"); + return !!(ipm & 0x20000000); +} + +/* Bits int the storage key */ +#define _PAGE_CHANGED 0x02 /* HW changed bit */ +#define _PAGE_REFERENCED 0x04 /* HW referenced bit */ +#define _PAGE_FP_BIT 0x08 /* HW fetch protection bit */ +#define _PAGE_ACC_BITS 0xf0 /* HW access control bits */ + +/* + * Test and clear dirty bit in storage key. + * We can't clear the changed bit atomically. This is a potential + * race against modification of the referenced bit. This function + * should therefore only be called if it is not mapped in any + * address space. + */ +#define __HAVE_ARCH_PAGE_TEST_AND_CLEAR_DIRTY +static inline int page_test_and_clear_dirty(unsigned long pfn, int mapped) +{ + unsigned char skey; + + skey = page_get_storage_key(pfn << PAGE_SHIFT); + if (!(skey & _PAGE_CHANGED)) + return 0; + page_set_storage_key(pfn << PAGE_SHIFT, skey & ~_PAGE_CHANGED, mapped); + return 1; +} + +/* + * Test and clear referenced bit in storage key. + */ +#define __HAVE_ARCH_PAGE_TEST_AND_CLEAR_YOUNG +static inline int page_test_and_clear_young(unsigned long pfn) +{ + return page_reset_referenced(pfn << PAGE_SHIFT); +} + +struct page; +void arch_free_page(struct page *page, int order); +void arch_alloc_page(struct page *page, int order); +void arch_set_page_states(int make_stable); + +static inline int devmem_is_allowed(unsigned long pfn) +{ + return 0; +} + +#define HAVE_ARCH_FREE_PAGE +#define HAVE_ARCH_ALLOC_PAGE + +#endif /* !__ASSEMBLY__ */ + +#define __PAGE_OFFSET 0x0UL +#define PAGE_OFFSET 0x0UL +#define __pa(x) (unsigned long)(x) +#define __va(x) (void *)(unsigned long)(x) +#define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT) +#define page_to_phys(page) (page_to_pfn(page) << PAGE_SHIFT) +#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT) + +#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | \ + VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) + +#include <asm-generic/memory_model.h> +#include <asm-generic/getorder.h> + +#define __HAVE_ARCH_GATE_AREA 1 + +#endif /* _S390_PAGE_H */ diff --git a/arch/s390/include/asm/param.h b/arch/s390/include/asm/param.h new file mode 100644 index 00000000..c616821b --- /dev/null +++ b/arch/s390/include/asm/param.h @@ -0,0 +1,6 @@ +#ifndef _ASMS390_PARAM_H +#define _ASMS390_PARAM_H + +#include <asm-generic/param.h> + +#endif /* _ASMS390_PARAM_H */ diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h new file mode 100644 index 00000000..42a145c9 --- /dev/null +++ b/arch/s390/include/asm/pci.h @@ -0,0 +1,10 @@ +#ifndef __ASM_S390_PCI_H +#define __ASM_S390_PCI_H + +/* S/390 systems don't have a PCI bus. This file is just here because some stupid .c code + * includes it even if CONFIG_PCI is not set. + */ +#define PCI_DMA_BUS_IS_PHYS (0) + +#endif /* __ASM_S390_PCI_H */ + diff --git a/arch/s390/include/asm/percpu.h b/arch/s390/include/asm/percpu.h new file mode 100644 index 00000000..0fbd1899 --- /dev/null +++ b/arch/s390/include/asm/percpu.h @@ -0,0 +1,89 @@ +#ifndef __ARCH_S390_PERCPU__ +#define __ARCH_S390_PERCPU__ + +#include <linux/preempt.h> +#include <asm/cmpxchg.h> + +/* + * s390 uses its own implementation for per cpu data, the offset of + * the cpu local data area is cached in the cpu's lowcore memory. + */ +#define __my_cpu_offset S390_lowcore.percpu_offset + +/* + * For 64 bit module code, the module may be more than 4G above the + * per cpu area, use weak definitions to force the compiler to + * generate external references. + */ +#if defined(CONFIG_SMP) && defined(__s390x__) && defined(MODULE) +#define ARCH_NEEDS_WEAK_PER_CPU +#endif + +#define arch_this_cpu_to_op(pcp, val, op) \ +do { \ + typedef typeof(pcp) pcp_op_T__; \ + pcp_op_T__ old__, new__, prev__; \ + pcp_op_T__ *ptr__; \ + preempt_disable(); \ + ptr__ = __this_cpu_ptr(&(pcp)); \ + prev__ = *ptr__; \ + do { \ + old__ = prev__; \ + new__ = old__ op (val); \ + switch (sizeof(*ptr__)) { \ + case 8: \ + prev__ = cmpxchg64(ptr__, old__, new__); \ + break; \ + default: \ + prev__ = cmpxchg(ptr__, old__, new__); \ + } \ + } while (prev__ != old__); \ + preempt_enable(); \ +} while (0) + +#define this_cpu_add_1(pcp, val) arch_this_cpu_to_op(pcp, val, +) +#define this_cpu_add_2(pcp, val) arch_this_cpu_to_op(pcp, val, +) +#define this_cpu_add_4(pcp, val) arch_this_cpu_to_op(pcp, val, +) +#define this_cpu_add_8(pcp, val) arch_this_cpu_to_op(pcp, val, +) + +#define this_cpu_and_1(pcp, val) arch_this_cpu_to_op(pcp, val, &) +#define this_cpu_and_2(pcp, val) arch_this_cpu_to_op(pcp, val, &) +#define this_cpu_and_4(pcp, val) arch_this_cpu_to_op(pcp, val, &) +#define this_cpu_and_8(pcp, val) arch_this_cpu_to_op(pcp, val, &) + +#define this_cpu_or_1(pcp, val) arch_this_cpu_to_op(pcp, val, |) +#define this_cpu_or_2(pcp, val) arch_this_cpu_to_op(pcp, val, |) +#define this_cpu_or_4(pcp, val) arch_this_cpu_to_op(pcp, val, |) +#define this_cpu_or_8(pcp, val) arch_this_cpu_to_op(pcp, val, |) + +#define this_cpu_xor_1(pcp, val) arch_this_cpu_to_op(pcp, val, ^) +#define this_cpu_xor_2(pcp, val) arch_this_cpu_to_op(pcp, val, ^) +#define this_cpu_xor_4(pcp, val) arch_this_cpu_to_op(pcp, val, ^) +#define this_cpu_xor_8(pcp, val) arch_this_cpu_to_op(pcp, val, ^) + +#define arch_this_cpu_cmpxchg(pcp, oval, nval) \ +({ \ + typedef typeof(pcp) pcp_op_T__; \ + pcp_op_T__ ret__; \ + pcp_op_T__ *ptr__; \ + preempt_disable(); \ + ptr__ = __this_cpu_ptr(&(pcp)); \ + switch (sizeof(*ptr__)) { \ + case 8: \ + ret__ = cmpxchg64(ptr__, oval, nval); \ + break; \ + default: \ + ret__ = cmpxchg(ptr__, oval, nval); \ + } \ + preempt_enable(); \ + ret__; \ +}) + +#define this_cpu_cmpxchg_1(pcp, oval, nval) arch_this_cpu_cmpxchg(pcp, oval, nval) +#define this_cpu_cmpxchg_2(pcp, oval, nval) arch_this_cpu_cmpxchg(pcp, oval, nval) +#define this_cpu_cmpxchg_4(pcp, oval, nval) arch_this_cpu_cmpxchg(pcp, oval, nval) +#define this_cpu_cmpxchg_8(pcp, oval, nval) arch_this_cpu_cmpxchg(pcp, oval, nval) + +#include <asm-generic/percpu.h> + +#endif /* __ARCH_S390_PERCPU__ */ diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h new file mode 100644 index 00000000..7941968e --- /dev/null +++ b/arch/s390/include/asm/perf_event.h @@ -0,0 +1,16 @@ +/* + * Performance event support - s390 specific definitions. + * + * Copyright IBM Corp. 2009, 2012 + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> + * Hendrik Brueckner <brueckner@linux.vnet.ibm.com> + */ + +#include <asm/cpu_mf.h> + +/* CPU-measurement counter facility */ +#define PERF_CPUM_CF_MAX_CTR 160 + +/* Per-CPU flags for PMU states */ +#define PMU_F_RESERVED 0x1000 +#define PMU_F_ENABLED 0x2000 diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h new file mode 100644 index 00000000..78e30419 --- /dev/null +++ b/arch/s390/include/asm/pgalloc.h @@ -0,0 +1,142 @@ +/* + * include/asm-s390/pgalloc.h + * + * S390 version + * Copyright (C) 1999,2000 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Author(s): Hartmut Penner (hp@de.ibm.com) + * Martin Schwidefsky (schwidefsky@de.ibm.com) + * + * Derived from "include/asm-i386/pgalloc.h" + * Copyright (C) 1994 Linus Torvalds + */ + +#ifndef _S390_PGALLOC_H +#define _S390_PGALLOC_H + +#include <linux/threads.h> +#include <linux/gfp.h> +#include <linux/mm.h> + +unsigned long *crst_table_alloc(struct mm_struct *); +void crst_table_free(struct mm_struct *, unsigned long *); + +unsigned long *page_table_alloc(struct mm_struct *, unsigned long); +void page_table_free(struct mm_struct *, unsigned long *); +void page_table_free_rcu(struct mmu_gather *, unsigned long *); + +static inline void clear_table(unsigned long *s, unsigned long val, size_t n) +{ + typedef struct { char _[n]; } addrtype; + + *s = val; + n = (n / 256) - 1; + asm volatile( +#ifdef CONFIG_64BIT + " mvc 8(248,%0),0(%0)\n" +#else + " mvc 4(252,%0),0(%0)\n" +#endif + "0: mvc 256(256,%0),0(%0)\n" + " la %0,256(%0)\n" + " brct %1,0b\n" + : "+a" (s), "+d" (n), "=m" (*(addrtype *) s) + : "m" (*(addrtype *) s)); +} + +static inline void crst_table_init(unsigned long *crst, unsigned long entry) +{ + clear_table(crst, entry, sizeof(unsigned long)*2048); +} + +#ifndef __s390x__ + +static inline unsigned long pgd_entry_type(struct mm_struct *mm) +{ + return _SEGMENT_ENTRY_EMPTY; +} + +#define pud_alloc_one(mm,address) ({ BUG(); ((pud_t *)2); }) +#define pud_free(mm, x) do { } while (0) + +#define pmd_alloc_one(mm,address) ({ BUG(); ((pmd_t *)2); }) +#define pmd_free(mm, x) do { } while (0) + +#define pgd_populate(mm, pgd, pud) BUG() +#define pud_populate(mm, pud, pmd) BUG() + +#else /* __s390x__ */ + +static inline unsigned long pgd_entry_type(struct mm_struct *mm) +{ + if (mm->context.asce_limit <= (1UL << 31)) + return _SEGMENT_ENTRY_EMPTY; + if (mm->context.asce_limit <= (1UL << 42)) + return _REGION3_ENTRY_EMPTY; + return _REGION2_ENTRY_EMPTY; +} + +int crst_table_upgrade(struct mm_struct *, unsigned long limit); +void crst_table_downgrade(struct mm_struct *, unsigned long limit); + +static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address) +{ + unsigned long *table = crst_table_alloc(mm); + if (table) + crst_table_init(table, _REGION3_ENTRY_EMPTY); + return (pud_t *) table; +} +#define pud_free(mm, pud) crst_table_free(mm, (unsigned long *) pud) + +static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long vmaddr) +{ + unsigned long *table = crst_table_alloc(mm); + if (table) + crst_table_init(table, _SEGMENT_ENTRY_EMPTY); + return (pmd_t *) table; +} +#define pmd_free(mm, pmd) crst_table_free(mm, (unsigned long *) pmd) + +static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud) +{ + pgd_val(*pgd) = _REGION2_ENTRY | __pa(pud); +} + +static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) +{ + pud_val(*pud) = _REGION3_ENTRY | __pa(pmd); +} + +#endif /* __s390x__ */ + +static inline pgd_t *pgd_alloc(struct mm_struct *mm) +{ + spin_lock_init(&mm->context.list_lock); + INIT_LIST_HEAD(&mm->context.pgtable_list); + INIT_LIST_HEAD(&mm->context.gmap_list); + return (pgd_t *) crst_table_alloc(mm); +} +#define pgd_free(mm, pgd) crst_table_free(mm, (unsigned long *) pgd) + +static inline void pmd_populate(struct mm_struct *mm, + pmd_t *pmd, pgtable_t pte) +{ + pmd_val(*pmd) = _SEGMENT_ENTRY + __pa(pte); +} + +#define pmd_populate_kernel(mm, pmd, pte) pmd_populate(mm, pmd, pte) + +#define pmd_pgtable(pmd) \ + (pgtable_t)(pmd_val(pmd) & -sizeof(pte_t)*PTRS_PER_PTE) + +/* + * page table entry allocation/free routines. + */ +#define pte_alloc_one_kernel(mm, vmaddr) ((pte_t *) page_table_alloc(mm, vmaddr)) +#define pte_alloc_one(mm, vmaddr) ((pte_t *) page_table_alloc(mm, vmaddr)) + +#define pte_free_kernel(mm, pte) page_table_free(mm, (unsigned long *) pte) +#define pte_free(mm, pte) page_table_free(mm, (unsigned long *) pte) + +extern void rcu_table_freelist_finish(void); + +#endif /* _S390_PGALLOC_H */ diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h new file mode 100644 index 00000000..011358c1 --- /dev/null +++ b/arch/s390/include/asm/pgtable.h @@ -0,0 +1,1248 @@ +/* + * include/asm-s390/pgtable.h + * + * S390 version + * Copyright (C) 1999,2000 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Author(s): Hartmut Penner (hp@de.ibm.com) + * Ulrich Weigand (weigand@de.ibm.com) + * Martin Schwidefsky (schwidefsky@de.ibm.com) + * + * Derived from "include/asm-i386/pgtable.h" + */ + +#ifndef _ASM_S390_PGTABLE_H +#define _ASM_S390_PGTABLE_H + +/* + * The Linux memory management assumes a three-level page table setup. For + * s390 31 bit we "fold" the mid level into the top-level page table, so + * that we physically have the same two-level page table as the s390 mmu + * expects in 31 bit mode. For s390 64 bit we use three of the five levels + * the hardware provides (region first and region second tables are not + * used). + * + * The "pgd_xxx()" functions are trivial for a folded two-level + * setup: the pgd is never bad, and a pmd always exists (as it's folded + * into the pgd entry) + * + * This file contains the functions and defines necessary to modify and use + * the S390 page table tree. + */ +#ifndef __ASSEMBLY__ +#include <linux/sched.h> +#include <linux/mm_types.h> +#include <asm/bug.h> +#include <asm/page.h> + +extern pgd_t swapper_pg_dir[] __attribute__ ((aligned (4096))); +extern void paging_init(void); +extern void vmem_map_init(void); +extern void fault_init(void); + +/* + * The S390 doesn't have any external MMU info: the kernel page + * tables contain all the necessary information. + */ +#define update_mmu_cache(vma, address, ptep) do { } while (0) + +/* + * ZERO_PAGE is a global shared page that is always zero; used + * for zero-mapped memory areas etc.. + */ + +extern unsigned long empty_zero_page; +extern unsigned long zero_page_mask; + +#define ZERO_PAGE(vaddr) \ + (virt_to_page((void *)(empty_zero_page + \ + (((unsigned long)(vaddr)) &zero_page_mask)))) + +#define is_zero_pfn is_zero_pfn +static inline int is_zero_pfn(unsigned long pfn) +{ + extern unsigned long zero_pfn; + unsigned long offset_from_zero_pfn = pfn - zero_pfn; + return offset_from_zero_pfn <= (zero_page_mask >> PAGE_SHIFT); +} + +#define my_zero_pfn(addr) page_to_pfn(ZERO_PAGE(addr)) + +#endif /* !__ASSEMBLY__ */ + +/* + * PMD_SHIFT determines the size of the area a second-level page + * table can map + * PGDIR_SHIFT determines what a third-level page table entry can map + */ +#ifndef __s390x__ +# define PMD_SHIFT 20 +# define PUD_SHIFT 20 +# define PGDIR_SHIFT 20 +#else /* __s390x__ */ +# define PMD_SHIFT 20 +# define PUD_SHIFT 31 +# define PGDIR_SHIFT 42 +#endif /* __s390x__ */ + +#define PMD_SIZE (1UL << PMD_SHIFT) +#define PMD_MASK (~(PMD_SIZE-1)) +#define PUD_SIZE (1UL << PUD_SHIFT) +#define PUD_MASK (~(PUD_SIZE-1)) +#define PGDIR_SIZE (1UL << PGDIR_SHIFT) +#define PGDIR_MASK (~(PGDIR_SIZE-1)) + +/* + * entries per page directory level: the S390 is two-level, so + * we don't really have any PMD directory physically. + * for S390 segment-table entries are combined to one PGD + * that leads to 1024 pte per pgd + */ +#define PTRS_PER_PTE 256 +#ifndef __s390x__ +#define PTRS_PER_PMD 1 +#define PTRS_PER_PUD 1 +#else /* __s390x__ */ +#define PTRS_PER_PMD 2048 +#define PTRS_PER_PUD 2048 +#endif /* __s390x__ */ +#define PTRS_PER_PGD 2048 + +#define FIRST_USER_ADDRESS 0 + +#define pte_ERROR(e) \ + printk("%s:%d: bad pte %p.\n", __FILE__, __LINE__, (void *) pte_val(e)) +#define pmd_ERROR(e) \ + printk("%s:%d: bad pmd %p.\n", __FILE__, __LINE__, (void *) pmd_val(e)) +#define pud_ERROR(e) \ + printk("%s:%d: bad pud %p.\n", __FILE__, __LINE__, (void *) pud_val(e)) +#define pgd_ERROR(e) \ + printk("%s:%d: bad pgd %p.\n", __FILE__, __LINE__, (void *) pgd_val(e)) + +#ifndef __ASSEMBLY__ +/* + * The vmalloc area will always be on the topmost area of the kernel + * mapping. We reserve 96MB (31bit) / 128GB (64bit) for vmalloc, + * which should be enough for any sane case. + * By putting vmalloc at the top, we maximise the gap between physical + * memory and vmalloc to catch misplaced memory accesses. As a side + * effect, this also makes sure that 64 bit module code cannot be used + * as system call address. + */ +extern unsigned long VMALLOC_START; +extern unsigned long VMALLOC_END; +extern struct page *vmemmap; + +#define VMEM_MAX_PHYS ((unsigned long) vmemmap) + +/* + * A 31 bit pagetable entry of S390 has following format: + * | PFRA | | OS | + * 0 0IP0 + * 00000000001111111111222222222233 + * 01234567890123456789012345678901 + * + * I Page-Invalid Bit: Page is not available for address-translation + * P Page-Protection Bit: Store access not possible for page + * + * A 31 bit segmenttable entry of S390 has following format: + * | P-table origin | |PTL + * 0 IC + * 00000000001111111111222222222233 + * 01234567890123456789012345678901 + * + * I Segment-Invalid Bit: Segment is not available for address-translation + * C Common-Segment Bit: Segment is not private (PoP 3-30) + * PTL Page-Table-Length: Page-table length (PTL+1*16 entries -> up to 256) + * + * The 31 bit segmenttable origin of S390 has following format: + * + * |S-table origin | | STL | + * X **GPS + * 00000000001111111111222222222233 + * 01234567890123456789012345678901 + * + * X Space-Switch event: + * G Segment-Invalid Bit: * + * P Private-Space Bit: Segment is not private (PoP 3-30) + * S Storage-Alteration: + * STL Segment-Table-Length: Segment-table length (STL+1*16 entries -> up to 2048) + * + * A 64 bit pagetable entry of S390 has following format: + * | PFRA |0IPC| OS | + * 0000000000111111111122222222223333333333444444444455555555556666 + * 0123456789012345678901234567890123456789012345678901234567890123 + * + * I Page-Invalid Bit: Page is not available for address-translation + * P Page-Protection Bit: Store access not possible for page + * C Change-bit override: HW is not required to set change bit + * + * A 64 bit segmenttable entry of S390 has following format: + * | P-table origin | TT + * 0000000000111111111122222222223333333333444444444455555555556666 + * 0123456789012345678901234567890123456789012345678901234567890123 + * + * I Segment-Invalid Bit: Segment is not available for address-translation + * C Common-Segment Bit: Segment is not private (PoP 3-30) + * P Page-Protection Bit: Store access not possible for page + * TT Type 00 + * + * A 64 bit region table entry of S390 has following format: + * | S-table origin | TF TTTL + * 0000000000111111111122222222223333333333444444444455555555556666 + * 0123456789012345678901234567890123456789012345678901234567890123 + * + * I Segment-Invalid Bit: Segment is not available for address-translation + * TT Type 01 + * TF + * TL Table length + * + * The 64 bit regiontable origin of S390 has following format: + * | region table origon | DTTL + * 0000000000111111111122222222223333333333444444444455555555556666 + * 0123456789012345678901234567890123456789012345678901234567890123 + * + * X Space-Switch event: + * G Segment-Invalid Bit: + * P Private-Space Bit: + * S Storage-Alteration: + * R Real space + * TL Table-Length: + * + * A storage key has the following format: + * | ACC |F|R|C|0| + * 0 3 4 5 6 7 + * ACC: access key + * F : fetch protection bit + * R : referenced bit + * C : changed bit + */ + +/* Hardware bits in the page table entry */ +#define _PAGE_CO 0x100 /* HW Change-bit override */ +#define _PAGE_RO 0x200 /* HW read-only bit */ +#define _PAGE_INVALID 0x400 /* HW invalid bit */ + +/* Software bits in the page table entry */ +#define _PAGE_SWT 0x001 /* SW pte type bit t */ +#define _PAGE_SWX 0x002 /* SW pte type bit x */ +#define _PAGE_SWC 0x004 /* SW pte changed bit (for KVM) */ +#define _PAGE_SWR 0x008 /* SW pte referenced bit (for KVM) */ +#define _PAGE_SPECIAL 0x010 /* SW associated with special page */ +#define __HAVE_ARCH_PTE_SPECIAL + +/* Set of bits not changed in pte_modify */ +#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_SPECIAL | _PAGE_SWC | _PAGE_SWR) + +/* Six different types of pages. */ +#define _PAGE_TYPE_EMPTY 0x400 +#define _PAGE_TYPE_NONE 0x401 +#define _PAGE_TYPE_SWAP 0x403 +#define _PAGE_TYPE_FILE 0x601 /* bit 0x002 is used for offset !! */ +#define _PAGE_TYPE_RO 0x200 +#define _PAGE_TYPE_RW 0x000 + +/* + * Only four types for huge pages, using the invalid bit and protection bit + * of a segment table entry. + */ +#define _HPAGE_TYPE_EMPTY 0x020 /* _SEGMENT_ENTRY_INV */ +#define _HPAGE_TYPE_NONE 0x220 +#define _HPAGE_TYPE_RO 0x200 /* _SEGMENT_ENTRY_RO */ +#define _HPAGE_TYPE_RW 0x000 + +/* + * PTE type bits are rather complicated. handle_pte_fault uses pte_present, + * pte_none and pte_file to find out the pte type WITHOUT holding the page + * table lock. ptep_clear_flush on the other hand uses ptep_clear_flush to + * invalidate a given pte. ipte sets the hw invalid bit and clears all tlbs + * for the page. The page table entry is set to _PAGE_TYPE_EMPTY afterwards. + * This change is done while holding the lock, but the intermediate step + * of a previously valid pte with the hw invalid bit set can be observed by + * handle_pte_fault. That makes it necessary that all valid pte types with + * the hw invalid bit set must be distinguishable from the four pte types + * empty, none, swap and file. + * + * irxt ipte irxt + * _PAGE_TYPE_EMPTY 1000 -> 1000 + * _PAGE_TYPE_NONE 1001 -> 1001 + * _PAGE_TYPE_SWAP 1011 -> 1011 + * _PAGE_TYPE_FILE 11?1 -> 11?1 + * _PAGE_TYPE_RO 0100 -> 1100 + * _PAGE_TYPE_RW 0000 -> 1000 + * + * pte_none is true for bits combinations 1000, 1010, 1100, 1110 + * pte_present is true for bits combinations 0000, 0010, 0100, 0110, 1001 + * pte_file is true for bits combinations 1101, 1111 + * swap pte is 1011 and 0001, 0011, 0101, 0111 are invalid. + */ + +#ifndef __s390x__ + +/* Bits in the segment table address-space-control-element */ +#define _ASCE_SPACE_SWITCH 0x80000000UL /* space switch event */ +#define _ASCE_ORIGIN_MASK 0x7ffff000UL /* segment table origin */ +#define _ASCE_PRIVATE_SPACE 0x100 /* private space control */ +#define _ASCE_ALT_EVENT 0x80 /* storage alteration event control */ +#define _ASCE_TABLE_LENGTH 0x7f /* 128 x 64 entries = 8k */ + +/* Bits in the segment table entry */ +#define _SEGMENT_ENTRY_ORIGIN 0x7fffffc0UL /* page table origin */ +#define _SEGMENT_ENTRY_RO 0x200 /* page protection bit */ +#define _SEGMENT_ENTRY_INV 0x20 /* invalid segment table entry */ +#define _SEGMENT_ENTRY_COMMON 0x10 /* common segment bit */ +#define _SEGMENT_ENTRY_PTL 0x0f /* page table length */ + +#define _SEGMENT_ENTRY (_SEGMENT_ENTRY_PTL) +#define _SEGMENT_ENTRY_EMPTY (_SEGMENT_ENTRY_INV) + +/* Page status table bits for virtualization */ +#define RCP_ACC_BITS 0xf0000000UL +#define RCP_FP_BIT 0x08000000UL +#define RCP_PCL_BIT 0x00800000UL +#define RCP_HR_BIT 0x00400000UL +#define RCP_HC_BIT 0x00200000UL +#define RCP_GR_BIT 0x00040000UL +#define RCP_GC_BIT 0x00020000UL + +/* User dirty / referenced bit for KVM's migration feature */ +#define KVM_UR_BIT 0x00008000UL +#define KVM_UC_BIT 0x00004000UL + +#else /* __s390x__ */ + +/* Bits in the segment/region table address-space-control-element */ +#define _ASCE_ORIGIN ~0xfffUL/* segment table origin */ +#define _ASCE_PRIVATE_SPACE 0x100 /* private space control */ +#define _ASCE_ALT_EVENT 0x80 /* storage alteration event control */ +#define _ASCE_SPACE_SWITCH 0x40 /* space switch event */ +#define _ASCE_REAL_SPACE 0x20 /* real space control */ +#define _ASCE_TYPE_MASK 0x0c /* asce table type mask */ +#define _ASCE_TYPE_REGION1 0x0c /* region first table type */ +#define _ASCE_TYPE_REGION2 0x08 /* region second table type */ +#define _ASCE_TYPE_REGION3 0x04 /* region third table type */ +#define _ASCE_TYPE_SEGMENT 0x00 /* segment table type */ +#define _ASCE_TABLE_LENGTH 0x03 /* region table length */ + +/* Bits in the region table entry */ +#define _REGION_ENTRY_ORIGIN ~0xfffUL/* region/segment table origin */ +#define _REGION_ENTRY_INV 0x20 /* invalid region table entry */ +#define _REGION_ENTRY_TYPE_MASK 0x0c /* region/segment table type mask */ +#define _REGION_ENTRY_TYPE_R1 0x0c /* region first table type */ +#define _REGION_ENTRY_TYPE_R2 0x08 /* region second table type */ +#define _REGION_ENTRY_TYPE_R3 0x04 /* region third table type */ +#define _REGION_ENTRY_LENGTH 0x03 /* region third length */ + +#define _REGION1_ENTRY (_REGION_ENTRY_TYPE_R1 | _REGION_ENTRY_LENGTH) +#define _REGION1_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R1 | _REGION_ENTRY_INV) +#define _REGION2_ENTRY (_REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_LENGTH) +#define _REGION2_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_INV) +#define _REGION3_ENTRY (_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_LENGTH) +#define _REGION3_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_INV) + +/* Bits in the segment table entry */ +#define _SEGMENT_ENTRY_ORIGIN ~0x7ffUL/* segment table origin */ +#define _SEGMENT_ENTRY_RO 0x200 /* page protection bit */ +#define _SEGMENT_ENTRY_INV 0x20 /* invalid segment table entry */ + +#define _SEGMENT_ENTRY (0) +#define _SEGMENT_ENTRY_EMPTY (_SEGMENT_ENTRY_INV) + +#define _SEGMENT_ENTRY_LARGE 0x400 /* STE-format control, large page */ +#define _SEGMENT_ENTRY_CO 0x100 /* change-recording override */ + +/* Page status table bits for virtualization */ +#define RCP_ACC_BITS 0xf000000000000000UL +#define RCP_FP_BIT 0x0800000000000000UL +#define RCP_PCL_BIT 0x0080000000000000UL +#define RCP_HR_BIT 0x0040000000000000UL +#define RCP_HC_BIT 0x0020000000000000UL +#define RCP_GR_BIT 0x0004000000000000UL +#define RCP_GC_BIT 0x0002000000000000UL + +/* User dirty / referenced bit for KVM's migration feature */ +#define KVM_UR_BIT 0x0000800000000000UL +#define KVM_UC_BIT 0x0000400000000000UL + +#endif /* __s390x__ */ + +/* + * A user page table pointer has the space-switch-event bit, the + * private-space-control bit and the storage-alteration-event-control + * bit set. A kernel page table pointer doesn't need them. + */ +#define _ASCE_USER_BITS (_ASCE_SPACE_SWITCH | _ASCE_PRIVATE_SPACE | \ + _ASCE_ALT_EVENT) + +/* + * Page protection definitions. + */ +#define PAGE_NONE __pgprot(_PAGE_TYPE_NONE) +#define PAGE_RO __pgprot(_PAGE_TYPE_RO) +#define PAGE_RW __pgprot(_PAGE_TYPE_RW) + +#define PAGE_KERNEL PAGE_RW +#define PAGE_COPY PAGE_RO + +/* + * On s390 the page table entry has an invalid bit and a read-only bit. + * Read permission implies execute permission and write permission + * implies read permission. + */ + /*xwr*/ +#define __P000 PAGE_NONE +#define __P001 PAGE_RO +#define __P010 PAGE_RO +#define __P011 PAGE_RO +#define __P100 PAGE_RO +#define __P101 PAGE_RO +#define __P110 PAGE_RO +#define __P111 PAGE_RO + +#define __S000 PAGE_NONE +#define __S001 PAGE_RO +#define __S010 PAGE_RW +#define __S011 PAGE_RW +#define __S100 PAGE_RO +#define __S101 PAGE_RO +#define __S110 PAGE_RW +#define __S111 PAGE_RW + +static inline int mm_exclusive(struct mm_struct *mm) +{ + return likely(mm == current->active_mm && + atomic_read(&mm->context.attach_count) <= 1); +} + +static inline int mm_has_pgste(struct mm_struct *mm) +{ +#ifdef CONFIG_PGSTE + if (unlikely(mm->context.has_pgste)) + return 1; +#endif + return 0; +} +/* + * pgd/pmd/pte query functions + */ +#ifndef __s390x__ + +static inline int pgd_present(pgd_t pgd) { return 1; } +static inline int pgd_none(pgd_t pgd) { return 0; } +static inline int pgd_bad(pgd_t pgd) { return 0; } + +static inline int pud_present(pud_t pud) { return 1; } +static inline int pud_none(pud_t pud) { return 0; } +static inline int pud_bad(pud_t pud) { return 0; } + +#else /* __s390x__ */ + +static inline int pgd_present(pgd_t pgd) +{ + if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R2) + return 1; + return (pgd_val(pgd) & _REGION_ENTRY_ORIGIN) != 0UL; +} + +static inline int pgd_none(pgd_t pgd) +{ + if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R2) + return 0; + return (pgd_val(pgd) & _REGION_ENTRY_INV) != 0UL; +} + +static inline int pgd_bad(pgd_t pgd) +{ + /* + * With dynamic page table levels the pgd can be a region table + * entry or a segment table entry. Check for the bit that are + * invalid for either table entry. + */ + unsigned long mask = + ~_SEGMENT_ENTRY_ORIGIN & ~_REGION_ENTRY_INV & + ~_REGION_ENTRY_TYPE_MASK & ~_REGION_ENTRY_LENGTH; + return (pgd_val(pgd) & mask) != 0; +} + +static inline int pud_present(pud_t pud) +{ + if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R3) + return 1; + return (pud_val(pud) & _REGION_ENTRY_ORIGIN) != 0UL; +} + +static inline int pud_none(pud_t pud) +{ + if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R3) + return 0; + return (pud_val(pud) & _REGION_ENTRY_INV) != 0UL; +} + +static inline int pud_bad(pud_t pud) +{ + /* + * With dynamic page table levels the pud can be a region table + * entry or a segment table entry. Check for the bit that are + * invalid for either table entry. + */ + unsigned long mask = + ~_SEGMENT_ENTRY_ORIGIN & ~_REGION_ENTRY_INV & + ~_REGION_ENTRY_TYPE_MASK & ~_REGION_ENTRY_LENGTH; + return (pud_val(pud) & mask) != 0; +} + +#endif /* __s390x__ */ + +static inline int pmd_present(pmd_t pmd) +{ + return (pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN) != 0UL; +} + +static inline int pmd_none(pmd_t pmd) +{ + return (pmd_val(pmd) & _SEGMENT_ENTRY_INV) != 0UL; +} + +static inline int pmd_bad(pmd_t pmd) +{ + unsigned long mask = ~_SEGMENT_ENTRY_ORIGIN & ~_SEGMENT_ENTRY_INV; + return (pmd_val(pmd) & mask) != _SEGMENT_ENTRY; +} + +static inline int pte_none(pte_t pte) +{ + return (pte_val(pte) & _PAGE_INVALID) && !(pte_val(pte) & _PAGE_SWT); +} + +static inline int pte_present(pte_t pte) +{ + unsigned long mask = _PAGE_RO | _PAGE_INVALID | _PAGE_SWT | _PAGE_SWX; + return (pte_val(pte) & mask) == _PAGE_TYPE_NONE || + (!(pte_val(pte) & _PAGE_INVALID) && + !(pte_val(pte) & _PAGE_SWT)); +} + +static inline int pte_file(pte_t pte) +{ + unsigned long mask = _PAGE_RO | _PAGE_INVALID | _PAGE_SWT; + return (pte_val(pte) & mask) == _PAGE_TYPE_FILE; +} + +static inline int pte_special(pte_t pte) +{ + return (pte_val(pte) & _PAGE_SPECIAL); +} + +#define __HAVE_ARCH_PTE_SAME +static inline int pte_same(pte_t a, pte_t b) +{ + return pte_val(a) == pte_val(b); +} + +static inline pgste_t pgste_get_lock(pte_t *ptep) +{ + unsigned long new = 0; +#ifdef CONFIG_PGSTE + unsigned long old; + + preempt_disable(); + asm( + " lg %0,%2\n" + "0: lgr %1,%0\n" + " nihh %0,0xff7f\n" /* clear RCP_PCL_BIT in old */ + " oihh %1,0x0080\n" /* set RCP_PCL_BIT in new */ + " csg %0,%1,%2\n" + " jl 0b\n" + : "=&d" (old), "=&d" (new), "=Q" (ptep[PTRS_PER_PTE]) + : "Q" (ptep[PTRS_PER_PTE]) : "cc"); +#endif + return __pgste(new); +} + +static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste) +{ +#ifdef CONFIG_PGSTE + asm( + " nihh %1,0xff7f\n" /* clear RCP_PCL_BIT */ + " stg %1,%0\n" + : "=Q" (ptep[PTRS_PER_PTE]) + : "d" (pgste_val(pgste)), "Q" (ptep[PTRS_PER_PTE]) : "cc"); + preempt_enable(); +#endif +} + +static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste) +{ +#ifdef CONFIG_PGSTE + unsigned long address, bits; + unsigned char skey; + + if (!pte_present(*ptep)) + return pgste; + address = pte_val(*ptep) & PAGE_MASK; + skey = page_get_storage_key(address); + bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED); + /* Clear page changed & referenced bit in the storage key */ + if (bits & _PAGE_CHANGED) + page_set_storage_key(address, skey ^ bits, 1); + else if (bits) + page_reset_referenced(address); + /* Transfer page changed & referenced bit to guest bits in pgste */ + pgste_val(pgste) |= bits << 48; /* RCP_GR_BIT & RCP_GC_BIT */ + /* Get host changed & referenced bits from pgste */ + bits |= (pgste_val(pgste) & (RCP_HR_BIT | RCP_HC_BIT)) >> 52; + /* Clear host bits in pgste. */ + pgste_val(pgste) &= ~(RCP_HR_BIT | RCP_HC_BIT); + pgste_val(pgste) &= ~(RCP_ACC_BITS | RCP_FP_BIT); + /* Copy page access key and fetch protection bit to pgste */ + pgste_val(pgste) |= + (unsigned long) (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56; + /* Transfer changed and referenced to kvm user bits */ + pgste_val(pgste) |= bits << 45; /* KVM_UR_BIT & KVM_UC_BIT */ + /* Transfer changed & referenced to pte sofware bits */ + pte_val(*ptep) |= bits << 1; /* _PAGE_SWR & _PAGE_SWC */ +#endif + return pgste; + +} + +static inline pgste_t pgste_update_young(pte_t *ptep, pgste_t pgste) +{ +#ifdef CONFIG_PGSTE + int young; + + if (!pte_present(*ptep)) + return pgste; + young = page_reset_referenced(pte_val(*ptep) & PAGE_MASK); + /* Transfer page referenced bit to pte software bit (host view) */ + if (young || (pgste_val(pgste) & RCP_HR_BIT)) + pte_val(*ptep) |= _PAGE_SWR; + /* Clear host referenced bit in pgste. */ + pgste_val(pgste) &= ~RCP_HR_BIT; + /* Transfer page referenced bit to guest bit in pgste */ + pgste_val(pgste) |= (unsigned long) young << 50; /* set RCP_GR_BIT */ +#endif + return pgste; + +} + +static inline void pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry) +{ +#ifdef CONFIG_PGSTE + unsigned long address; + unsigned long okey, nkey; + + if (!pte_present(entry)) + return; + address = pte_val(entry) & PAGE_MASK; + okey = nkey = page_get_storage_key(address); + nkey &= ~(_PAGE_ACC_BITS | _PAGE_FP_BIT); + /* Set page access key and fetch protection bit from pgste */ + nkey |= (pgste_val(pgste) & (RCP_ACC_BITS | RCP_FP_BIT)) >> 56; + if (okey != nkey) + page_set_storage_key(address, nkey, 1); +#endif +} + +/** + * struct gmap_struct - guest address space + * @mm: pointer to the parent mm_struct + * @table: pointer to the page directory + * @asce: address space control element for gmap page table + * @crst_list: list of all crst tables used in the guest address space + */ +struct gmap { + struct list_head list; + struct mm_struct *mm; + unsigned long *table; + unsigned long asce; + struct list_head crst_list; +}; + +/** + * struct gmap_rmap - reverse mapping for segment table entries + * @next: pointer to the next gmap_rmap structure in the list + * @entry: pointer to a segment table entry + */ +struct gmap_rmap { + struct list_head list; + unsigned long *entry; +}; + +/** + * struct gmap_pgtable - gmap information attached to a page table + * @vmaddr: address of the 1MB segment in the process virtual memory + * @mapper: list of segment table entries maping a page table + */ +struct gmap_pgtable { + unsigned long vmaddr; + struct list_head mapper; +}; + +struct gmap *gmap_alloc(struct mm_struct *mm); +void gmap_free(struct gmap *gmap); +void gmap_enable(struct gmap *gmap); +void gmap_disable(struct gmap *gmap); +int gmap_map_segment(struct gmap *gmap, unsigned long from, + unsigned long to, unsigned long length); +int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len); +unsigned long __gmap_fault(unsigned long address, struct gmap *); +unsigned long gmap_fault(unsigned long address, struct gmap *); +void gmap_discard(unsigned long from, unsigned long to, struct gmap *); + +/* + * Certain architectures need to do special things when PTEs + * within a page table are directly modified. Thus, the following + * hook is made available. + */ +static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t entry) +{ + pgste_t pgste; + + if (mm_has_pgste(mm)) { + pgste = pgste_get_lock(ptep); + pgste_set_pte(ptep, pgste, entry); + *ptep = entry; + pgste_set_unlock(ptep, pgste); + } else + *ptep = entry; +} + +/* + * query functions pte_write/pte_dirty/pte_young only work if + * pte_present() is true. Undefined behaviour if not.. + */ +static inline int pte_write(pte_t pte) +{ + return (pte_val(pte) & _PAGE_RO) == 0; +} + +static inline int pte_dirty(pte_t pte) +{ +#ifdef CONFIG_PGSTE + if (pte_val(pte) & _PAGE_SWC) + return 1; +#endif + return 0; +} + +static inline int pte_young(pte_t pte) +{ +#ifdef CONFIG_PGSTE + if (pte_val(pte) & _PAGE_SWR) + return 1; +#endif + return 0; +} + +/* + * pgd/pmd/pte modification functions + */ + +static inline void pgd_clear(pgd_t *pgd) +{ +#ifdef __s390x__ + if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2) + pgd_val(*pgd) = _REGION2_ENTRY_EMPTY; +#endif +} + +static inline void pud_clear(pud_t *pud) +{ +#ifdef __s390x__ + if ((pud_val(*pud) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) + pud_val(*pud) = _REGION3_ENTRY_EMPTY; +#endif +} + +static inline void pmd_clear(pmd_t *pmdp) +{ + pmd_val(*pmdp) = _SEGMENT_ENTRY_EMPTY; +} + +static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) +{ + pte_val(*ptep) = _PAGE_TYPE_EMPTY; +} + +/* + * The following pte modification functions only work if + * pte_present() is true. Undefined behaviour if not.. + */ +static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) +{ + pte_val(pte) &= _PAGE_CHG_MASK; + pte_val(pte) |= pgprot_val(newprot); + return pte; +} + +static inline pte_t pte_wrprotect(pte_t pte) +{ + /* Do not clobber _PAGE_TYPE_NONE pages! */ + if (!(pte_val(pte) & _PAGE_INVALID)) + pte_val(pte) |= _PAGE_RO; + return pte; +} + +static inline pte_t pte_mkwrite(pte_t pte) +{ + pte_val(pte) &= ~_PAGE_RO; + return pte; +} + +static inline pte_t pte_mkclean(pte_t pte) +{ +#ifdef CONFIG_PGSTE + pte_val(pte) &= ~_PAGE_SWC; +#endif + return pte; +} + +static inline pte_t pte_mkdirty(pte_t pte) +{ + return pte; +} + +static inline pte_t pte_mkold(pte_t pte) +{ +#ifdef CONFIG_PGSTE + pte_val(pte) &= ~_PAGE_SWR; +#endif + return pte; +} + +static inline pte_t pte_mkyoung(pte_t pte) +{ + return pte; +} + +static inline pte_t pte_mkspecial(pte_t pte) +{ + pte_val(pte) |= _PAGE_SPECIAL; + return pte; +} + +#ifdef CONFIG_HUGETLB_PAGE +static inline pte_t pte_mkhuge(pte_t pte) +{ + /* + * PROT_NONE needs to be remapped from the pte type to the ste type. + * The HW invalid bit is also different for pte and ste. The pte + * invalid bit happens to be the same as the ste _SEGMENT_ENTRY_LARGE + * bit, so we don't have to clear it. + */ + if (pte_val(pte) & _PAGE_INVALID) { + if (pte_val(pte) & _PAGE_SWT) + pte_val(pte) |= _HPAGE_TYPE_NONE; + pte_val(pte) |= _SEGMENT_ENTRY_INV; + } + /* + * Clear SW pte bits SWT and SWX, there are no SW bits in a segment + * table entry. + */ + pte_val(pte) &= ~(_PAGE_SWT | _PAGE_SWX); + /* + * Also set the change-override bit because we don't need dirty bit + * tracking for hugetlbfs pages. + */ + pte_val(pte) |= (_SEGMENT_ENTRY_LARGE | _SEGMENT_ENTRY_CO); + return pte; +} +#endif + +/* + * Get (and clear) the user dirty bit for a pte. + */ +static inline int ptep_test_and_clear_user_dirty(struct mm_struct *mm, + pte_t *ptep) +{ + pgste_t pgste; + int dirty = 0; + + if (mm_has_pgste(mm)) { + pgste = pgste_get_lock(ptep); + pgste = pgste_update_all(ptep, pgste); + dirty = !!(pgste_val(pgste) & KVM_UC_BIT); + pgste_val(pgste) &= ~KVM_UC_BIT; + pgste_set_unlock(ptep, pgste); + return dirty; + } + return dirty; +} + +/* + * Get (and clear) the user referenced bit for a pte. + */ +static inline int ptep_test_and_clear_user_young(struct mm_struct *mm, + pte_t *ptep) +{ + pgste_t pgste; + int young = 0; + + if (mm_has_pgste(mm)) { + pgste = pgste_get_lock(ptep); + pgste = pgste_update_young(ptep, pgste); + young = !!(pgste_val(pgste) & KVM_UR_BIT); + pgste_val(pgste) &= ~KVM_UR_BIT; + pgste_set_unlock(ptep, pgste); + } + return young; +} + +#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG +static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep) +{ + pgste_t pgste; + pte_t pte; + + if (mm_has_pgste(vma->vm_mm)) { + pgste = pgste_get_lock(ptep); + pgste = pgste_update_young(ptep, pgste); + pte = *ptep; + *ptep = pte_mkold(pte); + pgste_set_unlock(ptep, pgste); + return pte_young(pte); + } + return 0; +} + +#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH +static inline int ptep_clear_flush_young(struct vm_area_struct *vma, + unsigned long address, pte_t *ptep) +{ + /* No need to flush TLB + * On s390 reference bits are in storage key and never in TLB + * With virtualization we handle the reference bit, without we + * we can simply return */ + return ptep_test_and_clear_young(vma, address, ptep); +} + +static inline void __ptep_ipte(unsigned long address, pte_t *ptep) +{ + if (!(pte_val(*ptep) & _PAGE_INVALID)) { +#ifndef __s390x__ + /* pto must point to the start of the segment table */ + pte_t *pto = (pte_t *) (((unsigned long) ptep) & 0x7ffffc00); +#else + /* ipte in zarch mode can do the math */ + pte_t *pto = ptep; +#endif + asm volatile( + " ipte %2,%3" + : "=m" (*ptep) : "m" (*ptep), + "a" (pto), "a" (address)); + } +} + +/* + * This is hard to understand. ptep_get_and_clear and ptep_clear_flush + * both clear the TLB for the unmapped pte. The reason is that + * ptep_get_and_clear is used in common code (e.g. change_pte_range) + * to modify an active pte. The sequence is + * 1) ptep_get_and_clear + * 2) set_pte_at + * 3) flush_tlb_range + * On s390 the tlb needs to get flushed with the modification of the pte + * if the pte is active. The only way how this can be implemented is to + * have ptep_get_and_clear do the tlb flush. In exchange flush_tlb_range + * is a nop. + */ +#define __HAVE_ARCH_PTEP_GET_AND_CLEAR +static inline pte_t ptep_get_and_clear(struct mm_struct *mm, + unsigned long address, pte_t *ptep) +{ + pgste_t pgste; + pte_t pte; + + mm->context.flush_mm = 1; + if (mm_has_pgste(mm)) + pgste = pgste_get_lock(ptep); + + pte = *ptep; + if (!mm_exclusive(mm)) + __ptep_ipte(address, ptep); + pte_val(*ptep) = _PAGE_TYPE_EMPTY; + + if (mm_has_pgste(mm)) { + pgste = pgste_update_all(&pte, pgste); + pgste_set_unlock(ptep, pgste); + } + return pte; +} + +#define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION +static inline pte_t ptep_modify_prot_start(struct mm_struct *mm, + unsigned long address, + pte_t *ptep) +{ + pte_t pte; + + mm->context.flush_mm = 1; + if (mm_has_pgste(mm)) + pgste_get_lock(ptep); + + pte = *ptep; + if (!mm_exclusive(mm)) + __ptep_ipte(address, ptep); + return pte; +} + +static inline void ptep_modify_prot_commit(struct mm_struct *mm, + unsigned long address, + pte_t *ptep, pte_t pte) +{ + *ptep = pte; + if (mm_has_pgste(mm)) + pgste_set_unlock(ptep, *(pgste_t *)(ptep + PTRS_PER_PTE)); +} + +#define __HAVE_ARCH_PTEP_CLEAR_FLUSH +static inline pte_t ptep_clear_flush(struct vm_area_struct *vma, + unsigned long address, pte_t *ptep) +{ + pgste_t pgste; + pte_t pte; + + if (mm_has_pgste(vma->vm_mm)) + pgste = pgste_get_lock(ptep); + + pte = *ptep; + __ptep_ipte(address, ptep); + pte_val(*ptep) = _PAGE_TYPE_EMPTY; + + if (mm_has_pgste(vma->vm_mm)) { + pgste = pgste_update_all(&pte, pgste); + pgste_set_unlock(ptep, pgste); + } + return pte; +} + +/* + * The batched pte unmap code uses ptep_get_and_clear_full to clear the + * ptes. Here an optimization is possible. tlb_gather_mmu flushes all + * tlbs of an mm if it can guarantee that the ptes of the mm_struct + * cannot be accessed while the batched unmap is running. In this case + * full==1 and a simple pte_clear is enough. See tlb.h. + */ +#define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL +static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, + unsigned long address, + pte_t *ptep, int full) +{ + pgste_t pgste; + pte_t pte; + + if (mm_has_pgste(mm)) + pgste = pgste_get_lock(ptep); + + pte = *ptep; + if (!full) + __ptep_ipte(address, ptep); + pte_val(*ptep) = _PAGE_TYPE_EMPTY; + + if (mm_has_pgste(mm)) { + pgste = pgste_update_all(&pte, pgste); + pgste_set_unlock(ptep, pgste); + } + return pte; +} + +#define __HAVE_ARCH_PTEP_SET_WRPROTECT +static inline pte_t ptep_set_wrprotect(struct mm_struct *mm, + unsigned long address, pte_t *ptep) +{ + pgste_t pgste; + pte_t pte = *ptep; + + if (pte_write(pte)) { + mm->context.flush_mm = 1; + if (mm_has_pgste(mm)) + pgste = pgste_get_lock(ptep); + + if (!mm_exclusive(mm)) + __ptep_ipte(address, ptep); + *ptep = pte_wrprotect(pte); + + if (mm_has_pgste(mm)) + pgste_set_unlock(ptep, pgste); + } + return pte; +} + +#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS +static inline int ptep_set_access_flags(struct vm_area_struct *vma, + unsigned long address, pte_t *ptep, + pte_t entry, int dirty) +{ + pgste_t pgste; + + if (pte_same(*ptep, entry)) + return 0; + if (mm_has_pgste(vma->vm_mm)) + pgste = pgste_get_lock(ptep); + + __ptep_ipte(address, ptep); + *ptep = entry; + + if (mm_has_pgste(vma->vm_mm)) + pgste_set_unlock(ptep, pgste); + return 1; +} + +/* + * Conversion functions: convert a page and protection to a page entry, + * and a page entry and page directory to the page they refer to. + */ +static inline pte_t mk_pte_phys(unsigned long physpage, pgprot_t pgprot) +{ + pte_t __pte; + pte_val(__pte) = physpage + pgprot_val(pgprot); + return __pte; +} + +static inline pte_t mk_pte(struct page *page, pgprot_t pgprot) +{ + unsigned long physpage = page_to_phys(page); + + return mk_pte_phys(physpage, pgprot); +} + +#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1)) +#define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1)) +#define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1)) +#define pte_index(address) (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE-1)) + +#define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address)) +#define pgd_offset_k(address) pgd_offset(&init_mm, address) + +#ifndef __s390x__ + +#define pmd_deref(pmd) (pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN) +#define pud_deref(pmd) ({ BUG(); 0UL; }) +#define pgd_deref(pmd) ({ BUG(); 0UL; }) + +#define pud_offset(pgd, address) ((pud_t *) pgd) +#define pmd_offset(pud, address) ((pmd_t *) pud + pmd_index(address)) + +#else /* __s390x__ */ + +#define pmd_deref(pmd) (pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN) +#define pud_deref(pud) (pud_val(pud) & _REGION_ENTRY_ORIGIN) +#define pgd_deref(pgd) (pgd_val(pgd) & _REGION_ENTRY_ORIGIN) + +static inline pud_t *pud_offset(pgd_t *pgd, unsigned long address) +{ + pud_t *pud = (pud_t *) pgd; + if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2) + pud = (pud_t *) pgd_deref(*pgd); + return pud + pud_index(address); +} + +static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address) +{ + pmd_t *pmd = (pmd_t *) pud; + if ((pud_val(*pud) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) + pmd = (pmd_t *) pud_deref(*pud); + return pmd + pmd_index(address); +} + +#endif /* __s390x__ */ + +#define pfn_pte(pfn,pgprot) mk_pte_phys(__pa((pfn) << PAGE_SHIFT),(pgprot)) +#define pte_pfn(x) (pte_val(x) >> PAGE_SHIFT) +#define pte_page(x) pfn_to_page(pte_pfn(x)) + +#define pmd_page(pmd) pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT) + +/* Find an entry in the lowest level page table.. */ +#define pte_offset(pmd, addr) ((pte_t *) pmd_deref(*(pmd)) + pte_index(addr)) +#define pte_offset_kernel(pmd, address) pte_offset(pmd,address) +#define pte_offset_map(pmd, address) pte_offset_kernel(pmd, address) +#define pte_unmap(pte) do { } while (0) + +/* + * 31 bit swap entry format: + * A page-table entry has some bits we have to treat in a special way. + * Bits 0, 20 and bit 23 have to be zero, otherwise an specification + * exception will occur instead of a page translation exception. The + * specifiation exception has the bad habit not to store necessary + * information in the lowcore. + * Bit 21 and bit 22 are the page invalid bit and the page protection + * bit. We set both to indicate a swapped page. + * Bit 30 and 31 are used to distinguish the different page types. For + * a swapped page these bits need to be zero. + * This leaves the bits 1-19 and bits 24-29 to store type and offset. + * We use the 5 bits from 25-29 for the type and the 20 bits from 1-19 + * plus 24 for the offset. + * 0| offset |0110|o|type |00| + * 0 0000000001111111111 2222 2 22222 33 + * 0 1234567890123456789 0123 4 56789 01 + * + * 64 bit swap entry format: + * A page-table entry has some bits we have to treat in a special way. + * Bits 52 and bit 55 have to be zero, otherwise an specification + * exception will occur instead of a page translation exception. The + * specifiation exception has the bad habit not to store necessary + * information in the lowcore. + * Bit 53 and bit 54 are the page invalid bit and the page protection + * bit. We set both to indicate a swapped page. + * Bit 62 and 63 are used to distinguish the different page types. For + * a swapped page these bits need to be zero. + * This leaves the bits 0-51 and bits 56-61 to store type and offset. + * We use the 5 bits from 57-61 for the type and the 53 bits from 0-51 + * plus 56 for the offset. + * | offset |0110|o|type |00| + * 0000000000111111111122222222223333333333444444444455 5555 5 55566 66 + * 0123456789012345678901234567890123456789012345678901 2345 6 78901 23 + */ +#ifndef __s390x__ +#define __SWP_OFFSET_MASK (~0UL >> 12) +#else +#define __SWP_OFFSET_MASK (~0UL >> 11) +#endif +static inline pte_t mk_swap_pte(unsigned long type, unsigned long offset) +{ + pte_t pte; + offset &= __SWP_OFFSET_MASK; + pte_val(pte) = _PAGE_TYPE_SWAP | ((type & 0x1f) << 2) | + ((offset & 1UL) << 7) | ((offset & ~1UL) << 11); + return pte; +} + +#define __swp_type(entry) (((entry).val >> 2) & 0x1f) +#define __swp_offset(entry) (((entry).val >> 11) | (((entry).val >> 7) & 1)) +#define __swp_entry(type,offset) ((swp_entry_t) { pte_val(mk_swap_pte((type),(offset))) }) + +#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) +#define __swp_entry_to_pte(x) ((pte_t) { (x).val }) + +#ifndef __s390x__ +# define PTE_FILE_MAX_BITS 26 +#else /* __s390x__ */ +# define PTE_FILE_MAX_BITS 59 +#endif /* __s390x__ */ + +#define pte_to_pgoff(__pte) \ + ((((__pte).pte >> 12) << 7) + (((__pte).pte >> 1) & 0x7f)) + +#define pgoff_to_pte(__off) \ + ((pte_t) { ((((__off) & 0x7f) << 1) + (((__off) >> 7) << 12)) \ + | _PAGE_TYPE_FILE }) + +#endif /* !__ASSEMBLY__ */ + +#define kern_addr_valid(addr) (1) + +extern int vmem_add_mapping(unsigned long start, unsigned long size); +extern int vmem_remove_mapping(unsigned long start, unsigned long size); +extern int s390_enable_sie(void); + +/* + * No page table caches to initialise + */ +#define pgtable_cache_init() do { } while (0) + +#include <asm-generic/pgtable.h> + +#endif /* _S390_PAGE_H */ diff --git a/arch/s390/include/asm/poll.h b/arch/s390/include/asm/poll.h new file mode 100644 index 00000000..c98509d3 --- /dev/null +++ b/arch/s390/include/asm/poll.h @@ -0,0 +1 @@ +#include <asm-generic/poll.h> diff --git a/arch/s390/include/asm/posix_types.h b/arch/s390/include/asm/posix_types.h new file mode 100644 index 00000000..edf8527f --- /dev/null +++ b/arch/s390/include/asm/posix_types.h @@ -0,0 +1,57 @@ +/* + * include/asm-s390/posix_types.h + * + * S390 version + * + */ + +#ifndef __ARCH_S390_POSIX_TYPES_H +#define __ARCH_S390_POSIX_TYPES_H + +/* + * This file is generally used by user-level software, so you need to + * be a little careful about namespace pollution etc. Also, we cannot + * assume GCC is being used. + */ + +typedef unsigned long __kernel_size_t; +#define __kernel_size_t __kernel_size_t + +typedef unsigned short __kernel_old_dev_t; +#define __kernel_old_dev_t __kernel_old_dev_t + +#ifndef __s390x__ + +typedef unsigned long __kernel_ino_t; +typedef unsigned short __kernel_mode_t; +typedef unsigned short __kernel_nlink_t; +typedef unsigned short __kernel_ipc_pid_t; +typedef unsigned short __kernel_uid_t; +typedef unsigned short __kernel_gid_t; +typedef int __kernel_ssize_t; +typedef int __kernel_ptrdiff_t; + +#else /* __s390x__ */ + +typedef unsigned int __kernel_ino_t; +typedef unsigned int __kernel_mode_t; +typedef unsigned int __kernel_nlink_t; +typedef int __kernel_ipc_pid_t; +typedef unsigned int __kernel_uid_t; +typedef unsigned int __kernel_gid_t; +typedef long __kernel_ssize_t; +typedef long __kernel_ptrdiff_t; +typedef unsigned long __kernel_sigset_t; /* at least 32 bits */ + +#endif /* __s390x__ */ + +#define __kernel_ino_t __kernel_ino_t +#define __kernel_mode_t __kernel_mode_t +#define __kernel_nlink_t __kernel_nlink_t +#define __kernel_ipc_pid_t __kernel_ipc_pid_t +#define __kernel_uid_t __kernel_uid_t +#define __kernel_gid_t __kernel_gid_t + +#include <asm-generic/posix_types.h> + +#endif diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h new file mode 100644 index 00000000..d499b30e --- /dev/null +++ b/arch/s390/include/asm/processor.h @@ -0,0 +1,363 @@ +/* + * include/asm-s390/processor.h + * + * S390 version + * Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Author(s): Hartmut Penner (hp@de.ibm.com), + * Martin Schwidefsky (schwidefsky@de.ibm.com) + * + * Derived from "include/asm-i386/processor.h" + * Copyright (C) 1994, Linus Torvalds + */ + +#ifndef __ASM_S390_PROCESSOR_H +#define __ASM_S390_PROCESSOR_H + +#include <linux/linkage.h> +#include <linux/irqflags.h> +#include <asm/cpu.h> +#include <asm/page.h> +#include <asm/ptrace.h> +#include <asm/setup.h> + +#ifdef __KERNEL__ +/* + * Default implementation of macro that returns current + * instruction pointer ("program counter"). + */ +#define current_text_addr() ({ void *pc; asm("basr %0,0" : "=a" (pc)); pc; }) + +static inline void get_cpu_id(struct cpuid *ptr) +{ + asm volatile("stidp %0" : "=Q" (*ptr)); +} + +extern void s390_adjust_jiffies(void); +extern int get_cpu_capability(unsigned int *); +extern const struct seq_operations cpuinfo_op; +extern int sysctl_ieee_emulation_warnings; + +/* + * User space process size: 2GB for 31 bit, 4TB or 8PT for 64 bit. + */ +#ifndef __s390x__ + +#define TASK_SIZE (1UL << 31) +#define TASK_UNMAPPED_BASE (1UL << 30) + +#else /* __s390x__ */ + +#define TASK_SIZE_OF(tsk) ((tsk)->mm->context.asce_limit) +#define TASK_UNMAPPED_BASE (test_thread_flag(TIF_31BIT) ? \ + (1UL << 30) : (1UL << 41)) +#define TASK_SIZE TASK_SIZE_OF(current) + +#endif /* __s390x__ */ + +#ifdef __KERNEL__ + +#ifndef __s390x__ +#define STACK_TOP (1UL << 31) +#define STACK_TOP_MAX (1UL << 31) +#else /* __s390x__ */ +#define STACK_TOP (1UL << (test_thread_flag(TIF_31BIT) ? 31:42)) +#define STACK_TOP_MAX (1UL << 42) +#endif /* __s390x__ */ + + +#endif + +#define HAVE_ARCH_PICK_MMAP_LAYOUT + +typedef struct { + __u32 ar4; +} mm_segment_t; + +/* + * Thread structure + */ +struct thread_struct { + s390_fp_regs fp_regs; + unsigned int acrs[NUM_ACRS]; + unsigned long ksp; /* kernel stack pointer */ + mm_segment_t mm_segment; + unsigned long gmap_addr; /* address of last gmap fault. */ + struct per_regs per_user; /* User specified PER registers */ + struct per_event per_event; /* Cause of the last PER trap */ + /* pfault_wait is used to block the process on a pfault event */ + unsigned long pfault_wait; + struct list_head list; +}; + +typedef struct thread_struct thread_struct; + +/* + * Stack layout of a C stack frame. + */ +#ifndef __PACK_STACK +struct stack_frame { + unsigned long back_chain; + unsigned long empty1[5]; + unsigned long gprs[10]; + unsigned int empty2[8]; +}; +#else +struct stack_frame { + unsigned long empty1[5]; + unsigned int empty2[8]; + unsigned long gprs[10]; + unsigned long back_chain; +}; +#endif + +#define ARCH_MIN_TASKALIGN 8 + +#define INIT_THREAD { \ + .ksp = sizeof(init_stack) + (unsigned long) &init_stack, \ +} + +/* + * Do necessary setup to start up a new thread. + */ +#define start_thread(regs, new_psw, new_stackp) do { \ + regs->psw.mask = psw_user_bits | PSW_MASK_EA | PSW_MASK_BA; \ + regs->psw.addr = new_psw | PSW_ADDR_AMODE; \ + regs->gprs[15] = new_stackp; \ +} while (0) + +#define start_thread31(regs, new_psw, new_stackp) do { \ + regs->psw.mask = psw_user_bits | PSW_MASK_BA; \ + regs->psw.addr = new_psw | PSW_ADDR_AMODE; \ + regs->gprs[15] = new_stackp; \ + crst_table_downgrade(current->mm, 1UL << 31); \ +} while (0) + +/* Forward declaration, a strange C thing */ +struct task_struct; +struct mm_struct; +struct seq_file; + +/* Free all resources held by a thread. */ +extern void release_thread(struct task_struct *); +extern int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags); + +/* Prepare to copy thread state - unlazy all lazy status */ +#define prepare_to_copy(tsk) do { } while (0) + +/* + * Return saved PC of a blocked thread. + */ +extern unsigned long thread_saved_pc(struct task_struct *t); + +extern void show_code(struct pt_regs *regs); + +unsigned long get_wchan(struct task_struct *p); +#define task_pt_regs(tsk) ((struct pt_regs *) \ + (task_stack_page(tsk) + THREAD_SIZE) - 1) +#define KSTK_EIP(tsk) (task_pt_regs(tsk)->psw.addr) +#define KSTK_ESP(tsk) (task_pt_regs(tsk)->gprs[15]) + +static inline unsigned short stap(void) +{ + unsigned short cpu_address; + + asm volatile("stap %0" : "=m" (cpu_address)); + return cpu_address; +} + +/* + * Give up the time slice of the virtual PU. + */ +static inline void cpu_relax(void) +{ + if (MACHINE_HAS_DIAG44) + asm volatile("diag 0,0,68"); + barrier(); +} + +static inline void psw_set_key(unsigned int key) +{ + asm volatile("spka 0(%0)" : : "d" (key)); +} + +/* + * Set PSW to specified value. + */ +static inline void __load_psw(psw_t psw) +{ +#ifndef __s390x__ + asm volatile("lpsw %0" : : "Q" (psw) : "cc"); +#else + asm volatile("lpswe %0" : : "Q" (psw) : "cc"); +#endif +} + +/* + * Set PSW mask to specified value, while leaving the + * PSW addr pointing to the next instruction. + */ +static inline void __load_psw_mask (unsigned long mask) +{ + unsigned long addr; + psw_t psw; + + psw.mask = mask; + +#ifndef __s390x__ + asm volatile( + " basr %0,0\n" + "0: ahi %0,1f-0b\n" + " st %0,%O1+4(%R1)\n" + " lpsw %1\n" + "1:" + : "=&d" (addr), "=Q" (psw) : "Q" (psw) : "memory", "cc"); +#else /* __s390x__ */ + asm volatile( + " larl %0,1f\n" + " stg %0,%O1+8(%R1)\n" + " lpswe %1\n" + "1:" + : "=&d" (addr), "=Q" (psw) : "Q" (psw) : "memory", "cc"); +#endif /* __s390x__ */ +} + +/* + * Rewind PSW instruction address by specified number of bytes. + */ +static inline unsigned long __rewind_psw(psw_t psw, unsigned long ilc) +{ +#ifndef __s390x__ + if (psw.addr & PSW_ADDR_AMODE) + /* 31 bit mode */ + return (psw.addr - ilc) | PSW_ADDR_AMODE; + /* 24 bit mode */ + return (psw.addr - ilc) & ((1UL << 24) - 1); +#else + unsigned long mask; + + mask = (psw.mask & PSW_MASK_EA) ? -1UL : + (psw.mask & PSW_MASK_BA) ? (1UL << 31) - 1 : + (1UL << 24) - 1; + return (psw.addr - ilc) & mask; +#endif +} + +/* + * Function to drop a processor into disabled wait state + */ +static inline void __noreturn disabled_wait(unsigned long code) +{ + unsigned long ctl_buf; + psw_t dw_psw; + + dw_psw.mask = PSW_MASK_BASE | PSW_MASK_WAIT | PSW_MASK_BA | PSW_MASK_EA; + dw_psw.addr = code; + /* + * Store status and then load disabled wait psw, + * the processor is dead afterwards + */ +#ifndef __s390x__ + asm volatile( + " stctl 0,0,0(%2)\n" + " ni 0(%2),0xef\n" /* switch off protection */ + " lctl 0,0,0(%2)\n" + " stpt 0xd8\n" /* store timer */ + " stckc 0xe0\n" /* store clock comparator */ + " stpx 0x108\n" /* store prefix register */ + " stam 0,15,0x120\n" /* store access registers */ + " std 0,0x160\n" /* store f0 */ + " std 2,0x168\n" /* store f2 */ + " std 4,0x170\n" /* store f4 */ + " std 6,0x178\n" /* store f6 */ + " stm 0,15,0x180\n" /* store general registers */ + " stctl 0,15,0x1c0\n" /* store control registers */ + " oi 0x1c0,0x10\n" /* fake protection bit */ + " lpsw 0(%1)" + : "=m" (ctl_buf) + : "a" (&dw_psw), "a" (&ctl_buf), "m" (dw_psw) : "cc"); +#else /* __s390x__ */ + asm volatile( + " stctg 0,0,0(%2)\n" + " ni 4(%2),0xef\n" /* switch off protection */ + " lctlg 0,0,0(%2)\n" + " lghi 1,0x1000\n" + " stpt 0x328(1)\n" /* store timer */ + " stckc 0x330(1)\n" /* store clock comparator */ + " stpx 0x318(1)\n" /* store prefix register */ + " stam 0,15,0x340(1)\n"/* store access registers */ + " stfpc 0x31c(1)\n" /* store fpu control */ + " std 0,0x200(1)\n" /* store f0 */ + " std 1,0x208(1)\n" /* store f1 */ + " std 2,0x210(1)\n" /* store f2 */ + " std 3,0x218(1)\n" /* store f3 */ + " std 4,0x220(1)\n" /* store f4 */ + " std 5,0x228(1)\n" /* store f5 */ + " std 6,0x230(1)\n" /* store f6 */ + " std 7,0x238(1)\n" /* store f7 */ + " std 8,0x240(1)\n" /* store f8 */ + " std 9,0x248(1)\n" /* store f9 */ + " std 10,0x250(1)\n" /* store f10 */ + " std 11,0x258(1)\n" /* store f11 */ + " std 12,0x260(1)\n" /* store f12 */ + " std 13,0x268(1)\n" /* store f13 */ + " std 14,0x270(1)\n" /* store f14 */ + " std 15,0x278(1)\n" /* store f15 */ + " stmg 0,15,0x280(1)\n"/* store general registers */ + " stctg 0,15,0x380(1)\n"/* store control registers */ + " oi 0x384(1),0x10\n"/* fake protection bit */ + " lpswe 0(%1)" + : "=m" (ctl_buf) + : "a" (&dw_psw), "a" (&ctl_buf), "m" (dw_psw) : "cc", "0", "1"); +#endif /* __s390x__ */ + while (1); +} + +/* + * Use to set psw mask except for the first byte which + * won't be changed by this function. + */ +static inline void +__set_psw_mask(unsigned long mask) +{ + __load_psw_mask(mask | (arch_local_save_flags() & ~(-1UL >> 8))); +} + +#define local_mcck_enable() \ + __set_psw_mask(psw_kernel_bits | PSW_MASK_DAT | PSW_MASK_MCHECK) +#define local_mcck_disable() \ + __set_psw_mask(psw_kernel_bits | PSW_MASK_DAT) + +/* + * Basic Machine Check/Program Check Handler. + */ + +extern void s390_base_mcck_handler(void); +extern void s390_base_pgm_handler(void); +extern void s390_base_ext_handler(void); + +extern void (*s390_base_mcck_handler_fn)(void); +extern void (*s390_base_pgm_handler_fn)(void); +extern void (*s390_base_ext_handler_fn)(void); + +#define ARCH_LOW_ADDRESS_LIMIT 0x7fffffffUL + +#endif + +/* + * Helper macro for exception table entries + */ +#ifndef __s390x__ +#define EX_TABLE(_fault,_target) \ + ".section __ex_table,\"a\"\n" \ + " .align 4\n" \ + " .long " #_fault "," #_target "\n" \ + ".previous\n" +#else +#define EX_TABLE(_fault,_target) \ + ".section __ex_table,\"a\"\n" \ + " .align 8\n" \ + " .quad " #_fault "," #_target "\n" \ + ".previous\n" +#endif + +#endif /* __ASM_S390_PROCESSOR_H */ diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h new file mode 100644 index 00000000..aeb77f01 --- /dev/null +++ b/arch/s390/include/asm/ptrace.h @@ -0,0 +1,564 @@ +/* + * include/asm-s390/ptrace.h + * + * S390 version + * Copyright (C) 1999,2000 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Author(s): Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com) + */ + +#ifndef _S390_PTRACE_H +#define _S390_PTRACE_H + +/* + * Offsets in the user_regs_struct. They are used for the ptrace + * system call and in entry.S + */ +#ifndef __s390x__ + +#define PT_PSWMASK 0x00 +#define PT_PSWADDR 0x04 +#define PT_GPR0 0x08 +#define PT_GPR1 0x0C +#define PT_GPR2 0x10 +#define PT_GPR3 0x14 +#define PT_GPR4 0x18 +#define PT_GPR5 0x1C +#define PT_GPR6 0x20 +#define PT_GPR7 0x24 +#define PT_GPR8 0x28 +#define PT_GPR9 0x2C +#define PT_GPR10 0x30 +#define PT_GPR11 0x34 +#define PT_GPR12 0x38 +#define PT_GPR13 0x3C +#define PT_GPR14 0x40 +#define PT_GPR15 0x44 +#define PT_ACR0 0x48 +#define PT_ACR1 0x4C +#define PT_ACR2 0x50 +#define PT_ACR3 0x54 +#define PT_ACR4 0x58 +#define PT_ACR5 0x5C +#define PT_ACR6 0x60 +#define PT_ACR7 0x64 +#define PT_ACR8 0x68 +#define PT_ACR9 0x6C +#define PT_ACR10 0x70 +#define PT_ACR11 0x74 +#define PT_ACR12 0x78 +#define PT_ACR13 0x7C +#define PT_ACR14 0x80 +#define PT_ACR15 0x84 +#define PT_ORIGGPR2 0x88 +#define PT_FPC 0x90 +/* + * A nasty fact of life that the ptrace api + * only supports passing of longs. + */ +#define PT_FPR0_HI 0x98 +#define PT_FPR0_LO 0x9C +#define PT_FPR1_HI 0xA0 +#define PT_FPR1_LO 0xA4 +#define PT_FPR2_HI 0xA8 +#define PT_FPR2_LO 0xAC +#define PT_FPR3_HI 0xB0 +#define PT_FPR3_LO 0xB4 +#define PT_FPR4_HI 0xB8 +#define PT_FPR4_LO 0xBC +#define PT_FPR5_HI 0xC0 +#define PT_FPR5_LO 0xC4 +#define PT_FPR6_HI 0xC8 +#define PT_FPR6_LO 0xCC +#define PT_FPR7_HI 0xD0 +#define PT_FPR7_LO 0xD4 +#define PT_FPR8_HI 0xD8 +#define PT_FPR8_LO 0XDC +#define PT_FPR9_HI 0xE0 +#define PT_FPR9_LO 0xE4 +#define PT_FPR10_HI 0xE8 +#define PT_FPR10_LO 0xEC +#define PT_FPR11_HI 0xF0 +#define PT_FPR11_LO 0xF4 +#define PT_FPR12_HI 0xF8 +#define PT_FPR12_LO 0xFC +#define PT_FPR13_HI 0x100 +#define PT_FPR13_LO 0x104 +#define PT_FPR14_HI 0x108 +#define PT_FPR14_LO 0x10C +#define PT_FPR15_HI 0x110 +#define PT_FPR15_LO 0x114 +#define PT_CR_9 0x118 +#define PT_CR_10 0x11C +#define PT_CR_11 0x120 +#define PT_IEEE_IP 0x13C +#define PT_LASTOFF PT_IEEE_IP +#define PT_ENDREGS 0x140-1 + +#define GPR_SIZE 4 +#define CR_SIZE 4 + +#define STACK_FRAME_OVERHEAD 96 /* size of minimum stack frame */ + +#else /* __s390x__ */ + +#define PT_PSWMASK 0x00 +#define PT_PSWADDR 0x08 +#define PT_GPR0 0x10 +#define PT_GPR1 0x18 +#define PT_GPR2 0x20 +#define PT_GPR3 0x28 +#define PT_GPR4 0x30 +#define PT_GPR5 0x38 +#define PT_GPR6 0x40 +#define PT_GPR7 0x48 +#define PT_GPR8 0x50 +#define PT_GPR9 0x58 +#define PT_GPR10 0x60 +#define PT_GPR11 0x68 +#define PT_GPR12 0x70 +#define PT_GPR13 0x78 +#define PT_GPR14 0x80 +#define PT_GPR15 0x88 +#define PT_ACR0 0x90 +#define PT_ACR1 0x94 +#define PT_ACR2 0x98 +#define PT_ACR3 0x9C +#define PT_ACR4 0xA0 +#define PT_ACR5 0xA4 +#define PT_ACR6 0xA8 +#define PT_ACR7 0xAC +#define PT_ACR8 0xB0 +#define PT_ACR9 0xB4 +#define PT_ACR10 0xB8 +#define PT_ACR11 0xBC +#define PT_ACR12 0xC0 +#define PT_ACR13 0xC4 +#define PT_ACR14 0xC8 +#define PT_ACR15 0xCC +#define PT_ORIGGPR2 0xD0 +#define PT_FPC 0xD8 +#define PT_FPR0 0xE0 +#define PT_FPR1 0xE8 +#define PT_FPR2 0xF0 +#define PT_FPR3 0xF8 +#define PT_FPR4 0x100 +#define PT_FPR5 0x108 +#define PT_FPR6 0x110 +#define PT_FPR7 0x118 +#define PT_FPR8 0x120 +#define PT_FPR9 0x128 +#define PT_FPR10 0x130 +#define PT_FPR11 0x138 +#define PT_FPR12 0x140 +#define PT_FPR13 0x148 +#define PT_FPR14 0x150 +#define PT_FPR15 0x158 +#define PT_CR_9 0x160 +#define PT_CR_10 0x168 +#define PT_CR_11 0x170 +#define PT_IEEE_IP 0x1A8 +#define PT_LASTOFF PT_IEEE_IP +#define PT_ENDREGS 0x1B0-1 + +#define GPR_SIZE 8 +#define CR_SIZE 8 + +#define STACK_FRAME_OVERHEAD 160 /* size of minimum stack frame */ + +#endif /* __s390x__ */ + +#define NUM_GPRS 16 +#define NUM_FPRS 16 +#define NUM_CRS 16 +#define NUM_ACRS 16 + +#define NUM_CR_WORDS 3 + +#define FPR_SIZE 8 +#define FPC_SIZE 4 +#define FPC_PAD_SIZE 4 /* gcc insists on aligning the fpregs */ +#define ACR_SIZE 4 + + +#define PTRACE_OLDSETOPTIONS 21 + +#ifndef __ASSEMBLY__ +#include <linux/stddef.h> +#include <linux/types.h> + +typedef union +{ + float f; + double d; + __u64 ui; + struct + { + __u32 hi; + __u32 lo; + } fp; +} freg_t; + +typedef struct +{ + __u32 fpc; + freg_t fprs[NUM_FPRS]; +} s390_fp_regs; + +#define FPC_EXCEPTION_MASK 0xF8000000 +#define FPC_FLAGS_MASK 0x00F80000 +#define FPC_DXC_MASK 0x0000FF00 +#define FPC_RM_MASK 0x00000003 +#define FPC_VALID_MASK 0xF8F8FF03 + +/* this typedef defines how a Program Status Word looks like */ +typedef struct +{ + unsigned long mask; + unsigned long addr; +} __attribute__ ((aligned(8))) psw_t; + +typedef struct +{ + __u32 mask; + __u32 addr; +} __attribute__ ((aligned(8))) psw_compat_t; + +#ifndef __s390x__ + +#define PSW_MASK_PER 0x40000000UL +#define PSW_MASK_DAT 0x04000000UL +#define PSW_MASK_IO 0x02000000UL +#define PSW_MASK_EXT 0x01000000UL +#define PSW_MASK_KEY 0x00F00000UL +#define PSW_MASK_BASE 0x00080000UL /* always one */ +#define PSW_MASK_MCHECK 0x00040000UL +#define PSW_MASK_WAIT 0x00020000UL +#define PSW_MASK_PSTATE 0x00010000UL +#define PSW_MASK_ASC 0x0000C000UL +#define PSW_MASK_CC 0x00003000UL +#define PSW_MASK_PM 0x00000F00UL +#define PSW_MASK_EA 0x00000000UL +#define PSW_MASK_BA 0x00000000UL + +#define PSW_MASK_USER 0x00003F00UL + +#define PSW_ADDR_AMODE 0x80000000UL +#define PSW_ADDR_INSN 0x7FFFFFFFUL + +#define PSW_DEFAULT_KEY (((unsigned long) PAGE_DEFAULT_ACC) << 20) + +#define PSW_ASC_PRIMARY 0x00000000UL +#define PSW_ASC_ACCREG 0x00004000UL +#define PSW_ASC_SECONDARY 0x00008000UL +#define PSW_ASC_HOME 0x0000C000UL + +#else /* __s390x__ */ + +#define PSW_MASK_PER 0x4000000000000000UL +#define PSW_MASK_DAT 0x0400000000000000UL +#define PSW_MASK_IO 0x0200000000000000UL +#define PSW_MASK_EXT 0x0100000000000000UL +#define PSW_MASK_BASE 0x0000000000000000UL +#define PSW_MASK_KEY 0x00F0000000000000UL +#define PSW_MASK_MCHECK 0x0004000000000000UL +#define PSW_MASK_WAIT 0x0002000000000000UL +#define PSW_MASK_PSTATE 0x0001000000000000UL +#define PSW_MASK_ASC 0x0000C00000000000UL +#define PSW_MASK_CC 0x0000300000000000UL +#define PSW_MASK_PM 0x00000F0000000000UL +#define PSW_MASK_EA 0x0000000100000000UL +#define PSW_MASK_BA 0x0000000080000000UL + +#define PSW_MASK_USER 0x00003F0180000000UL + +#define PSW_ADDR_AMODE 0x0000000000000000UL +#define PSW_ADDR_INSN 0xFFFFFFFFFFFFFFFFUL + +#define PSW_DEFAULT_KEY (((unsigned long) PAGE_DEFAULT_ACC) << 52) + +#define PSW_ASC_PRIMARY 0x0000000000000000UL +#define PSW_ASC_ACCREG 0x0000400000000000UL +#define PSW_ASC_SECONDARY 0x0000800000000000UL +#define PSW_ASC_HOME 0x0000C00000000000UL + +#endif /* __s390x__ */ + +#ifdef __KERNEL__ +extern long psw_kernel_bits; +extern long psw_user_bits; +#endif + +/* + * The s390_regs structure is used to define the elf_gregset_t. + */ +typedef struct +{ + psw_t psw; + unsigned long gprs[NUM_GPRS]; + unsigned int acrs[NUM_ACRS]; + unsigned long orig_gpr2; +} s390_regs; + +typedef struct +{ + psw_compat_t psw; + __u32 gprs[NUM_GPRS]; + __u32 acrs[NUM_ACRS]; + __u32 orig_gpr2; +} s390_compat_regs; + +typedef struct +{ + __u32 gprs_high[NUM_GPRS]; +} s390_compat_regs_high; + +#ifdef __KERNEL__ + +/* + * The pt_regs struct defines the way the registers are stored on + * the stack during a system call. + */ +struct pt_regs +{ + unsigned long args[1]; + psw_t psw; + unsigned long gprs[NUM_GPRS]; + unsigned long orig_gpr2; + unsigned int int_code; + unsigned long int_parm_long; +}; + +/* + * Program event recording (PER) register set. + */ +struct per_regs { + unsigned long control; /* PER control bits */ + unsigned long start; /* PER starting address */ + unsigned long end; /* PER ending address */ +}; + +/* + * PER event contains information about the cause of the last PER exception. + */ +struct per_event { + unsigned short cause; /* PER code, ATMID and AI */ + unsigned long address; /* PER address */ + unsigned char paid; /* PER access identification */ +}; + +/* + * Simplified per_info structure used to decode the ptrace user space ABI. + */ +struct per_struct_kernel { + unsigned long cr9; /* PER control bits */ + unsigned long cr10; /* PER starting address */ + unsigned long cr11; /* PER ending address */ + unsigned long bits; /* Obsolete software bits */ + unsigned long starting_addr; /* User specified start address */ + unsigned long ending_addr; /* User specified end address */ + unsigned short perc_atmid; /* PER trap ATMID */ + unsigned long address; /* PER trap instruction address */ + unsigned char access_id; /* PER trap access identification */ +}; + +#define PER_EVENT_MASK 0xE9000000UL + +#define PER_EVENT_BRANCH 0x80000000UL +#define PER_EVENT_IFETCH 0x40000000UL +#define PER_EVENT_STORE 0x20000000UL +#define PER_EVENT_STORE_REAL 0x08000000UL +#define PER_EVENT_NULLIFICATION 0x01000000UL + +#define PER_CONTROL_MASK 0x00a00000UL + +#define PER_CONTROL_BRANCH_ADDRESS 0x00800000UL +#define PER_CONTROL_ALTERATION 0x00200000UL + +#endif + +/* + * Now for the user space program event recording (trace) definitions. + * The following structures are used only for the ptrace interface, don't + * touch or even look at it if you don't want to modify the user-space + * ptrace interface. In particular stay away from it for in-kernel PER. + */ +typedef struct +{ + unsigned long cr[NUM_CR_WORDS]; +} per_cr_words; + +#define PER_EM_MASK 0xE8000000UL + +typedef struct +{ +#ifdef __s390x__ + unsigned : 32; +#endif /* __s390x__ */ + unsigned em_branching : 1; + unsigned em_instruction_fetch : 1; + /* + * Switching on storage alteration automatically fixes + * the storage alteration event bit in the users std. + */ + unsigned em_storage_alteration : 1; + unsigned em_gpr_alt_unused : 1; + unsigned em_store_real_address : 1; + unsigned : 3; + unsigned branch_addr_ctl : 1; + unsigned : 1; + unsigned storage_alt_space_ctl : 1; + unsigned : 21; + unsigned long starting_addr; + unsigned long ending_addr; +} per_cr_bits; + +typedef struct +{ + unsigned short perc_atmid; + unsigned long address; + unsigned char access_id; +} per_lowcore_words; + +typedef struct +{ + unsigned perc_branching : 1; + unsigned perc_instruction_fetch : 1; + unsigned perc_storage_alteration : 1; + unsigned perc_gpr_alt_unused : 1; + unsigned perc_store_real_address : 1; + unsigned : 3; + unsigned atmid_psw_bit_31 : 1; + unsigned atmid_validity_bit : 1; + unsigned atmid_psw_bit_32 : 1; + unsigned atmid_psw_bit_5 : 1; + unsigned atmid_psw_bit_16 : 1; + unsigned atmid_psw_bit_17 : 1; + unsigned si : 2; + unsigned long address; + unsigned : 4; + unsigned access_id : 4; +} per_lowcore_bits; + +typedef struct +{ + union { + per_cr_words words; + per_cr_bits bits; + } control_regs; + /* + * Use these flags instead of setting em_instruction_fetch + * directly they are used so that single stepping can be + * switched on & off while not affecting other tracing + */ + unsigned single_step : 1; + unsigned instruction_fetch : 1; + unsigned : 30; + /* + * These addresses are copied into cr10 & cr11 if single + * stepping is switched off + */ + unsigned long starting_addr; + unsigned long ending_addr; + union { + per_lowcore_words words; + per_lowcore_bits bits; + } lowcore; +} per_struct; + +typedef struct +{ + unsigned int len; + unsigned long kernel_addr; + unsigned long process_addr; +} ptrace_area; + +/* + * S/390 specific non posix ptrace requests. I chose unusual values so + * they are unlikely to clash with future ptrace definitions. + */ +#define PTRACE_PEEKUSR_AREA 0x5000 +#define PTRACE_POKEUSR_AREA 0x5001 +#define PTRACE_PEEKTEXT_AREA 0x5002 +#define PTRACE_PEEKDATA_AREA 0x5003 +#define PTRACE_POKETEXT_AREA 0x5004 +#define PTRACE_POKEDATA_AREA 0x5005 +#define PTRACE_GET_LAST_BREAK 0x5006 +#define PTRACE_PEEK_SYSTEM_CALL 0x5007 +#define PTRACE_POKE_SYSTEM_CALL 0x5008 + +/* + * PT_PROT definition is loosely based on hppa bsd definition in + * gdb/hppab-nat.c + */ +#define PTRACE_PROT 21 + +typedef enum +{ + ptprot_set_access_watchpoint, + ptprot_set_write_watchpoint, + ptprot_disable_watchpoint +} ptprot_flags; + +typedef struct +{ + unsigned long lowaddr; + unsigned long hiaddr; + ptprot_flags prot; +} ptprot_area; + +/* Sequence of bytes for breakpoint illegal instruction. */ +#define S390_BREAKPOINT {0x0,0x1} +#define S390_BREAKPOINT_U16 ((__u16)0x0001) +#define S390_SYSCALL_OPCODE ((__u16)0x0a00) +#define S390_SYSCALL_SIZE 2 + +/* + * The user_regs_struct defines the way the user registers are + * store on the stack for signal handling. + */ +struct user_regs_struct +{ + psw_t psw; + unsigned long gprs[NUM_GPRS]; + unsigned int acrs[NUM_ACRS]; + unsigned long orig_gpr2; + s390_fp_regs fp_regs; + /* + * These per registers are in here so that gdb can modify them + * itself as there is no "official" ptrace interface for hardware + * watchpoints. This is the way intel does it. + */ + per_struct per_info; + unsigned long ieee_instruction_pointer; /* obsolete, always 0 */ +}; + +#ifdef __KERNEL__ +/* + * These are defined as per linux/ptrace.h, which see. + */ +#define arch_has_single_step() (1) + +#define user_mode(regs) (((regs)->psw.mask & PSW_MASK_PSTATE) != 0) +#define instruction_pointer(regs) ((regs)->psw.addr & PSW_ADDR_INSN) +#define user_stack_pointer(regs)((regs)->gprs[15]) +#define profile_pc(regs) instruction_pointer(regs) + +static inline long regs_return_value(struct pt_regs *regs) +{ + return regs->gprs[2]; +} + +int regs_query_register_offset(const char *name); +const char *regs_query_register_name(unsigned int offset); +unsigned long regs_get_register(struct pt_regs *regs, unsigned int offset); +unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n); + +static inline unsigned long kernel_stack_pointer(struct pt_regs *regs) +{ + return regs->gprs[15] & PSW_ADDR_INSN; +} + +#endif /* __KERNEL__ */ +#endif /* __ASSEMBLY__ */ + +#endif /* _S390_PTRACE_H */ diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h new file mode 100644 index 00000000..d75c8e78 --- /dev/null +++ b/arch/s390/include/asm/qdio.h @@ -0,0 +1,406 @@ +/* + * linux/include/asm-s390/qdio.h + * + * Copyright 2000,2008 IBM Corp. + * Author(s): Utz Bacher <utz.bacher@de.ibm.com> + * Jan Glauber <jang@linux.vnet.ibm.com> + * + */ +#ifndef __QDIO_H__ +#define __QDIO_H__ + +#include <linux/interrupt.h> +#include <asm/cio.h> +#include <asm/ccwdev.h> + +/* only use 4 queues to save some cachelines */ +#define QDIO_MAX_QUEUES_PER_IRQ 4 +#define QDIO_MAX_BUFFERS_PER_Q 128 +#define QDIO_MAX_BUFFERS_MASK (QDIO_MAX_BUFFERS_PER_Q - 1) +#define QDIO_MAX_ELEMENTS_PER_BUFFER 16 +#define QDIO_SBAL_SIZE 256 + +#define QDIO_QETH_QFMT 0 +#define QDIO_ZFCP_QFMT 1 +#define QDIO_IQDIO_QFMT 2 + +/** + * struct qdesfmt0 - queue descriptor, format 0 + * @sliba: storage list information block address + * @sla: storage list address + * @slsba: storage list state block address + * @akey: access key for DLIB + * @bkey: access key for SL + * @ckey: access key for SBALs + * @dkey: access key for SLSB + */ +struct qdesfmt0 { + u64 sliba; + u64 sla; + u64 slsba; + u32 : 32; + u32 akey : 4; + u32 bkey : 4; + u32 ckey : 4; + u32 dkey : 4; + u32 : 16; +} __attribute__ ((packed)); + +#define QDR_AC_MULTI_BUFFER_ENABLE 0x01 + +/** + * struct qdr - queue description record (QDR) + * @qfmt: queue format + * @pfmt: implementation dependent parameter format + * @ac: adapter characteristics + * @iqdcnt: input queue descriptor count + * @oqdcnt: output queue descriptor count + * @iqdsz: inpout queue descriptor size + * @oqdsz: output queue descriptor size + * @qiba: queue information block address + * @qkey: queue information block key + * @qdf0: queue descriptions + */ +struct qdr { + u32 qfmt : 8; + u32 pfmt : 8; + u32 : 8; + u32 ac : 8; + u32 : 8; + u32 iqdcnt : 8; + u32 : 8; + u32 oqdcnt : 8; + u32 : 8; + u32 iqdsz : 8; + u32 : 8; + u32 oqdsz : 8; + /* private: */ + u32 res[9]; + /* public: */ + u64 qiba; + u32 : 32; + u32 qkey : 4; + u32 : 28; + struct qdesfmt0 qdf0[126]; +} __attribute__ ((packed, aligned(4096))); + +#define QIB_AC_OUTBOUND_PCI_SUPPORTED 0x40 +#define QIB_RFLAGS_ENABLE_QEBSM 0x80 +#define QIB_RFLAGS_ENABLE_DATA_DIV 0x02 + +/** + * struct qib - queue information block (QIB) + * @qfmt: queue format + * @pfmt: implementation dependent parameter format + * @rflags: QEBSM + * @ac: adapter characteristics + * @isliba: absolute address of first input SLIB + * @osliba: absolute address of first output SLIB + * @ebcnam: adapter identifier in EBCDIC + * @parm: implementation dependent parameters + */ +struct qib { + u32 qfmt : 8; + u32 pfmt : 8; + u32 rflags : 8; + u32 ac : 8; + u32 : 32; + u64 isliba; + u64 osliba; + u32 : 32; + u32 : 32; + u8 ebcnam[8]; + /* private: */ + u8 res[88]; + /* public: */ + u8 parm[QDIO_MAX_BUFFERS_PER_Q]; +} __attribute__ ((packed, aligned(256))); + +/** + * struct slibe - storage list information block element (SLIBE) + * @parms: implementation dependent parameters + */ +struct slibe { + u64 parms; +}; + +/** + * struct qaob - queue asynchronous operation block + * @res0: reserved parameters + * @res1: reserved parameter + * @res2: reserved parameter + * @res3: reserved parameter + * @aorc: asynchronous operation return code + * @flags: internal flags + * @cbtbs: control block type + * @sb_count: number of storage blocks + * @sba: storage block element addresses + * @dcount: size of storage block elements + * @user0: user defineable value + * @res4: reserved paramater + * @user1: user defineable value + * @user2: user defineable value + */ +struct qaob { + u64 res0[6]; + u8 res1; + u8 res2; + u8 res3; + u8 aorc; + u8 flags; + u16 cbtbs; + u8 sb_count; + u64 sba[QDIO_MAX_ELEMENTS_PER_BUFFER]; + u16 dcount[QDIO_MAX_ELEMENTS_PER_BUFFER]; + u64 user0; + u64 res4[2]; + u64 user1; + u64 user2; +} __attribute__ ((packed, aligned(256))); + +/** + * struct slib - storage list information block (SLIB) + * @nsliba: next SLIB address (if any) + * @sla: SL address + * @slsba: SLSB address + * @slibe: SLIB elements + */ +struct slib { + u64 nsliba; + u64 sla; + u64 slsba; + /* private: */ + u8 res[1000]; + /* public: */ + struct slibe slibe[QDIO_MAX_BUFFERS_PER_Q]; +} __attribute__ ((packed, aligned(2048))); + +#define SBAL_EFLAGS_LAST_ENTRY 0x40 +#define SBAL_EFLAGS_CONTIGUOUS 0x20 +#define SBAL_EFLAGS_FIRST_FRAG 0x04 +#define SBAL_EFLAGS_MIDDLE_FRAG 0x08 +#define SBAL_EFLAGS_LAST_FRAG 0x0c +#define SBAL_EFLAGS_MASK 0x6f + +#define SBAL_SFLAGS0_PCI_REQ 0x40 +#define SBAL_SFLAGS0_DATA_CONTINUATION 0x20 + +/* Awesome OpenFCP extensions */ +#define SBAL_SFLAGS0_TYPE_STATUS 0x00 +#define SBAL_SFLAGS0_TYPE_WRITE 0x08 +#define SBAL_SFLAGS0_TYPE_READ 0x10 +#define SBAL_SFLAGS0_TYPE_WRITE_READ 0x18 +#define SBAL_SFLAGS0_MORE_SBALS 0x04 +#define SBAL_SFLAGS0_COMMAND 0x02 +#define SBAL_SFLAGS0_LAST_SBAL 0x00 +#define SBAL_SFLAGS0_ONLY_SBAL SBAL_SFLAGS0_COMMAND +#define SBAL_SFLAGS0_MIDDLE_SBAL SBAL_SFLAGS0_MORE_SBALS +#define SBAL_SFLAGS0_FIRST_SBAL (SBAL_SFLAGS0_MORE_SBALS | SBAL_SFLAGS0_COMMAND) + +/** + * struct qdio_buffer_element - SBAL entry + * @eflags: SBAL entry flags + * @scount: SBAL count + * @sflags: whole SBAL flags + * @length: length + * @addr: address +*/ +struct qdio_buffer_element { + u8 eflags; + /* private: */ + u8 res1; + /* public: */ + u8 scount; + u8 sflags; + u32 length; +#ifdef CONFIG_32BIT + /* private: */ + void *res2; + /* public: */ +#endif + void *addr; +} __attribute__ ((packed, aligned(16))); + +/** + * struct qdio_buffer - storage block address list (SBAL) + * @element: SBAL entries + */ +struct qdio_buffer { + struct qdio_buffer_element element[QDIO_MAX_ELEMENTS_PER_BUFFER]; +} __attribute__ ((packed, aligned(256))); + +/** + * struct sl_element - storage list entry + * @sbal: absolute SBAL address + */ +struct sl_element { +#ifdef CONFIG_32BIT + /* private: */ + unsigned long reserved; + /* public: */ +#endif + unsigned long sbal; +} __attribute__ ((packed)); + +/** + * struct sl - storage list (SL) + * @element: SL entries + */ +struct sl { + struct sl_element element[QDIO_MAX_BUFFERS_PER_Q]; +} __attribute__ ((packed, aligned(1024))); + +/** + * struct slsb - storage list state block (SLSB) + * @val: state per buffer + */ +struct slsb { + u8 val[QDIO_MAX_BUFFERS_PER_Q]; +} __attribute__ ((packed, aligned(256))); + +#define CHSC_AC2_MULTI_BUFFER_AVAILABLE 0x0080 +#define CHSC_AC2_MULTI_BUFFER_ENABLED 0x0040 +#define CHSC_AC2_DATA_DIV_AVAILABLE 0x0010 +#define CHSC_AC2_DATA_DIV_ENABLED 0x0002 + +/** + * struct qdio_outbuf_state - SBAL related asynchronous operation information + * (for communication with upper layer programs) + * (only required for use with completion queues) + * @flags: flags indicating state of buffer + * @aob: pointer to QAOB used for the particular SBAL + * @user: pointer to upper layer program's state information related to SBAL + * (stored in user1 data of QAOB) + */ +struct qdio_outbuf_state { + u8 flags; + struct qaob *aob; + void *user; +}; + +#define QDIO_OUTBUF_STATE_FLAG_NONE 0x00 +#define QDIO_OUTBUF_STATE_FLAG_PENDING 0x01 + +#define CHSC_AC1_INITIATE_INPUTQ 0x80 + + +/* qdio adapter-characteristics-1 flag */ +#define AC1_SIGA_INPUT_NEEDED 0x40 /* process input queues */ +#define AC1_SIGA_OUTPUT_NEEDED 0x20 /* process output queues */ +#define AC1_SIGA_SYNC_NEEDED 0x10 /* ask hypervisor to sync */ +#define AC1_AUTOMATIC_SYNC_ON_THININT 0x08 /* set by hypervisor */ +#define AC1_AUTOMATIC_SYNC_ON_OUT_PCI 0x04 /* set by hypervisor */ +#define AC1_SC_QEBSM_AVAILABLE 0x02 /* available for subchannel */ +#define AC1_SC_QEBSM_ENABLED 0x01 /* enabled for subchannel */ + +#define CHSC_AC2_DATA_DIV_AVAILABLE 0x0010 +#define CHSC_AC2_DATA_DIV_ENABLED 0x0002 + +#define CHSC_AC3_FORMAT2_CQ_AVAILABLE 0x8000 + +struct qdio_ssqd_desc { + u8 flags; + u8:8; + u16 sch; + u8 qfmt; + u8 parm; + u8 qdioac1; + u8 sch_class; + u8 pcnt; + u8 icnt; + u8:8; + u8 ocnt; + u8:8; + u8 mbccnt; + u16 qdioac2; + u64 sch_token; + u8 mro; + u8 mri; + u16 qdioac3; + u16:16; + u8:8; + u8 mmwc; +} __attribute__ ((packed)); + +/* params are: ccw_device, qdio_error, queue_number, + first element processed, number of elements processed, int_parm */ +typedef void qdio_handler_t(struct ccw_device *, unsigned int, int, + int, int, unsigned long); + +/* qdio errors reported to the upper-layer program */ +#define QDIO_ERROR_SIGA_TARGET 0x02 +#define QDIO_ERROR_SIGA_ACCESS_EXCEPTION 0x10 +#define QDIO_ERROR_SIGA_BUSY 0x20 +#define QDIO_ERROR_ACTIVATE_CHECK_CONDITION 0x40 +#define QDIO_ERROR_SLSB_STATE 0x80 + +/* for qdio_cleanup */ +#define QDIO_FLAG_CLEANUP_USING_CLEAR 0x01 +#define QDIO_FLAG_CLEANUP_USING_HALT 0x02 + +/** + * struct qdio_initialize - qdio initalization data + * @cdev: associated ccw device + * @q_format: queue format + * @adapter_name: name for the adapter + * @qib_param_field_format: format for qib_parm_field + * @qib_param_field: pointer to 128 bytes or NULL, if no param field + * @qib_rflags: rflags to set + * @input_slib_elements: pointer to no_input_qs * 128 words of data or NULL + * @output_slib_elements: pointer to no_output_qs * 128 words of data or NULL + * @no_input_qs: number of input queues + * @no_output_qs: number of output queues + * @input_handler: handler to be called for input queues + * @output_handler: handler to be called for output queues + * @queue_start_poll_array: polling handlers (one per input queue or NULL) + * @int_parm: interruption parameter + * @input_sbal_addr_array: address of no_input_qs * 128 pointers + * @output_sbal_addr_array: address of no_output_qs * 128 pointers + * @output_sbal_state_array: no_output_qs * 128 state info (for CQ or NULL) + */ +struct qdio_initialize { + struct ccw_device *cdev; + unsigned char q_format; + unsigned char qdr_ac; + unsigned char adapter_name[8]; + unsigned int qib_param_field_format; + unsigned char *qib_param_field; + unsigned char qib_rflags; + unsigned long *input_slib_elements; + unsigned long *output_slib_elements; + unsigned int no_input_qs; + unsigned int no_output_qs; + qdio_handler_t *input_handler; + qdio_handler_t *output_handler; + void (**queue_start_poll_array) (struct ccw_device *, int, + unsigned long); + int scan_threshold; + unsigned long int_parm; + void **input_sbal_addr_array; + void **output_sbal_addr_array; + struct qdio_outbuf_state *output_sbal_state_array; +}; + +#define QDIO_STATE_INACTIVE 0x00000002 /* after qdio_cleanup */ +#define QDIO_STATE_ESTABLISHED 0x00000004 /* after qdio_establish */ +#define QDIO_STATE_ACTIVE 0x00000008 /* after qdio_activate */ +#define QDIO_STATE_STOPPED 0x00000010 /* after queues went down */ + +#define QDIO_FLAG_SYNC_INPUT 0x01 +#define QDIO_FLAG_SYNC_OUTPUT 0x02 +#define QDIO_FLAG_PCI_OUT 0x10 + +extern int qdio_allocate(struct qdio_initialize *); +extern int qdio_establish(struct qdio_initialize *); +extern int qdio_activate(struct ccw_device *); +extern void qdio_release_aob(struct qaob *); +extern int do_QDIO(struct ccw_device *, unsigned int, int, unsigned int, + unsigned int); +extern int qdio_start_irq(struct ccw_device *, int); +extern int qdio_stop_irq(struct ccw_device *, int); +extern int qdio_get_next_buffers(struct ccw_device *, int, int *, int *); +extern int qdio_shutdown(struct ccw_device *, int); +extern int qdio_free(struct ccw_device *); +extern int qdio_get_ssqd_desc(struct ccw_device *, struct qdio_ssqd_desc *); + +#endif /* __QDIO_H__ */ diff --git a/arch/s390/include/asm/qeth.h b/arch/s390/include/asm/qeth.h new file mode 100644 index 00000000..2c7c898c --- /dev/null +++ b/arch/s390/include/asm/qeth.h @@ -0,0 +1,117 @@ +/* + * include/asm-s390/qeth.h + * + * ioctl definitions for qeth driver + * + * Copyright (C) 2004 IBM Corporation + * + * Author(s): Thomas Spatzier <tspat@de.ibm.com> + * + */ +#ifndef __ASM_S390_QETH_IOCTL_H__ +#define __ASM_S390_QETH_IOCTL_H__ +#include <linux/types.h> +#include <linux/ioctl.h> + +#define SIOC_QETH_ARP_SET_NO_ENTRIES (SIOCDEVPRIVATE) +#define SIOC_QETH_ARP_QUERY_INFO (SIOCDEVPRIVATE + 1) +#define SIOC_QETH_ARP_ADD_ENTRY (SIOCDEVPRIVATE + 2) +#define SIOC_QETH_ARP_REMOVE_ENTRY (SIOCDEVPRIVATE + 3) +#define SIOC_QETH_ARP_FLUSH_CACHE (SIOCDEVPRIVATE + 4) +#define SIOC_QETH_ADP_SET_SNMP_CONTROL (SIOCDEVPRIVATE + 5) +#define SIOC_QETH_GET_CARD_TYPE (SIOCDEVPRIVATE + 6) +#define SIOC_QETH_QUERY_OAT (SIOCDEVPRIVATE + 7) + +struct qeth_arp_cache_entry { + __u8 macaddr[6]; + __u8 reserved1[2]; + __u8 ipaddr[16]; /* for both IPv4 and IPv6 */ + __u8 reserved2[32]; +} __attribute__ ((packed)); + +enum qeth_arp_ipaddrtype { + QETHARP_IP_ADDR_V4 = 1, + QETHARP_IP_ADDR_V6 = 2, +}; +struct qeth_arp_entrytype { + __u8 mac; + __u8 ip; +} __attribute__((packed)); + +#define QETH_QARP_MEDIASPECIFIC_BYTES 32 +#define QETH_QARP_MACADDRTYPE_BYTES 1 +struct qeth_arp_qi_entry7 { + __u8 media_specific[QETH_QARP_MEDIASPECIFIC_BYTES]; + struct qeth_arp_entrytype type; + __u8 macaddr[6]; + __u8 ipaddr[4]; +} __attribute__((packed)); + +struct qeth_arp_qi_entry7_ipv6 { + __u8 media_specific[QETH_QARP_MEDIASPECIFIC_BYTES]; + struct qeth_arp_entrytype type; + __u8 macaddr[6]; + __u8 ipaddr[16]; +} __attribute__((packed)); + +struct qeth_arp_qi_entry7_short { + struct qeth_arp_entrytype type; + __u8 macaddr[6]; + __u8 ipaddr[4]; +} __attribute__((packed)); + +struct qeth_arp_qi_entry7_short_ipv6 { + struct qeth_arp_entrytype type; + __u8 macaddr[6]; + __u8 ipaddr[16]; +} __attribute__((packed)); + +struct qeth_arp_qi_entry5 { + __u8 media_specific[QETH_QARP_MEDIASPECIFIC_BYTES]; + struct qeth_arp_entrytype type; + __u8 ipaddr[4]; +} __attribute__((packed)); + +struct qeth_arp_qi_entry5_ipv6 { + __u8 media_specific[QETH_QARP_MEDIASPECIFIC_BYTES]; + struct qeth_arp_entrytype type; + __u8 ipaddr[16]; +} __attribute__((packed)); + +struct qeth_arp_qi_entry5_short { + struct qeth_arp_entrytype type; + __u8 ipaddr[4]; +} __attribute__((packed)); + +struct qeth_arp_qi_entry5_short_ipv6 { + struct qeth_arp_entrytype type; + __u8 ipaddr[16]; +} __attribute__((packed)); +/* + * can be set by user if no "media specific information" is wanted + * -> saves a lot of space in user space buffer + */ +#define QETH_QARP_STRIP_ENTRIES 0x8000 +#define QETH_QARP_WITH_IPV6 0x4000 +#define QETH_QARP_REQUEST_MASK 0x00ff + +/* data sent to user space as result of query arp ioctl */ +#define QETH_QARP_USER_DATA_SIZE 20000 +#define QETH_QARP_MASK_OFFSET 4 +#define QETH_QARP_ENTRIES_OFFSET 6 +struct qeth_arp_query_user_data { + union { + __u32 data_len; /* set by user space program */ + __u32 no_entries; /* set by kernel */ + } u; + __u16 mask_bits; + char *entries; +} __attribute__((packed)); + +struct qeth_query_oat_data { + __u32 command; + __u32 buffer_len; + __u32 response_len; + __u64 ptr; +}; +#endif /* __ASM_S390_QETH_IOCTL_H__ */ diff --git a/arch/s390/include/asm/reset.h b/arch/s390/include/asm/reset.h new file mode 100644 index 00000000..3d6ad4ad --- /dev/null +++ b/arch/s390/include/asm/reset.h @@ -0,0 +1,21 @@ +/* + * include/asm-s390/reset.h + * + * Copyright IBM Corp. 2006 + * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com> + */ + +#ifndef _ASM_S390_RESET_H +#define _ASM_S390_RESET_H + +#include <linux/list.h> + +struct reset_call { + struct list_head list; + void (*fn)(void); +}; + +extern void register_reset_call(struct reset_call *reset); +extern void unregister_reset_call(struct reset_call *reset); +extern void s390_reset_system(void (*func)(void *), void *data); +#endif /* _ASM_S390_RESET_H */ diff --git a/arch/s390/include/asm/resource.h b/arch/s390/include/asm/resource.h new file mode 100644 index 00000000..366c01de --- /dev/null +++ b/arch/s390/include/asm/resource.h @@ -0,0 +1,15 @@ +/* + * include/asm-s390/resource.h + * + * S390 version + * + * Derived from "include/asm-i386/resources.h" + */ + +#ifndef _S390_RESOURCE_H +#define _S390_RESOURCE_H + +#include <asm-generic/resource.h> + +#endif + diff --git a/arch/s390/include/asm/rwsem.h b/arch/s390/include/asm/rwsem.h new file mode 100644 index 00000000..d0eb4653 --- /dev/null +++ b/arch/s390/include/asm/rwsem.h @@ -0,0 +1,323 @@ +#ifndef _S390_RWSEM_H +#define _S390_RWSEM_H + +/* + * include/asm-s390/rwsem.h + * + * S390 version + * Copyright (C) 2002 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) + * + * Based on asm-alpha/semaphore.h and asm-i386/rwsem.h + */ + +/* + * + * The MSW of the count is the negated number of active writers and waiting + * lockers, and the LSW is the total number of active locks + * + * The lock count is initialized to 0 (no active and no waiting lockers). + * + * When a writer subtracts WRITE_BIAS, it'll get 0xffff0001 for the case of an + * uncontended lock. This can be determined because XADD returns the old value. + * Readers increment by 1 and see a positive value when uncontended, negative + * if there are writers (and maybe) readers waiting (in which case it goes to + * sleep). + * + * The value of WAITING_BIAS supports up to 32766 waiting processes. This can + * be extended to 65534 by manually checking the whole MSW rather than relying + * on the S flag. + * + * The value of ACTIVE_BIAS supports up to 65535 active processes. + * + * This should be totally fair - if anything is waiting, a process that wants a + * lock will go to the back of the queue. When the currently active lock is + * released, if there's a writer at the front of the queue, then that and only + * that will be woken up; if there's a bunch of consequtive readers at the + * front, then they'll all be woken up, but no other readers will be. + */ + +#ifndef _LINUX_RWSEM_H +#error "please don't include asm/rwsem.h directly, use linux/rwsem.h instead" +#endif + +#ifdef __KERNEL__ + +#ifndef __s390x__ +#define RWSEM_UNLOCKED_VALUE 0x00000000 +#define RWSEM_ACTIVE_BIAS 0x00000001 +#define RWSEM_ACTIVE_MASK 0x0000ffff +#define RWSEM_WAITING_BIAS (-0x00010000) +#else /* __s390x__ */ +#define RWSEM_UNLOCKED_VALUE 0x0000000000000000L +#define RWSEM_ACTIVE_BIAS 0x0000000000000001L +#define RWSEM_ACTIVE_MASK 0x00000000ffffffffL +#define RWSEM_WAITING_BIAS (-0x0000000100000000L) +#endif /* __s390x__ */ +#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS +#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) + +/* + * lock for reading + */ +static inline void __down_read(struct rw_semaphore *sem) +{ + signed long old, new; + + asm volatile( +#ifndef __s390x__ + " l %0,%2\n" + "0: lr %1,%0\n" + " ahi %1,%4\n" + " cs %0,%1,%2\n" + " jl 0b" +#else /* __s390x__ */ + " lg %0,%2\n" + "0: lgr %1,%0\n" + " aghi %1,%4\n" + " csg %0,%1,%2\n" + " jl 0b" +#endif /* __s390x__ */ + : "=&d" (old), "=&d" (new), "=Q" (sem->count) + : "Q" (sem->count), "i" (RWSEM_ACTIVE_READ_BIAS) + : "cc", "memory"); + if (old < 0) + rwsem_down_read_failed(sem); +} + +/* + * trylock for reading -- returns 1 if successful, 0 if contention + */ +static inline int __down_read_trylock(struct rw_semaphore *sem) +{ + signed long old, new; + + asm volatile( +#ifndef __s390x__ + " l %0,%2\n" + "0: ltr %1,%0\n" + " jm 1f\n" + " ahi %1,%4\n" + " cs %0,%1,%2\n" + " jl 0b\n" + "1:" +#else /* __s390x__ */ + " lg %0,%2\n" + "0: ltgr %1,%0\n" + " jm 1f\n" + " aghi %1,%4\n" + " csg %0,%1,%2\n" + " jl 0b\n" + "1:" +#endif /* __s390x__ */ + : "=&d" (old), "=&d" (new), "=Q" (sem->count) + : "Q" (sem->count), "i" (RWSEM_ACTIVE_READ_BIAS) + : "cc", "memory"); + return old >= 0 ? 1 : 0; +} + +/* + * lock for writing + */ +static inline void __down_write_nested(struct rw_semaphore *sem, int subclass) +{ + signed long old, new, tmp; + + tmp = RWSEM_ACTIVE_WRITE_BIAS; + asm volatile( +#ifndef __s390x__ + " l %0,%2\n" + "0: lr %1,%0\n" + " a %1,%4\n" + " cs %0,%1,%2\n" + " jl 0b" +#else /* __s390x__ */ + " lg %0,%2\n" + "0: lgr %1,%0\n" + " ag %1,%4\n" + " csg %0,%1,%2\n" + " jl 0b" +#endif /* __s390x__ */ + : "=&d" (old), "=&d" (new), "=Q" (sem->count) + : "Q" (sem->count), "m" (tmp) + : "cc", "memory"); + if (old != 0) + rwsem_down_write_failed(sem); +} + +static inline void __down_write(struct rw_semaphore *sem) +{ + __down_write_nested(sem, 0); +} + +/* + * trylock for writing -- returns 1 if successful, 0 if contention + */ +static inline int __down_write_trylock(struct rw_semaphore *sem) +{ + signed long old; + + asm volatile( +#ifndef __s390x__ + " l %0,%1\n" + "0: ltr %0,%0\n" + " jnz 1f\n" + " cs %0,%3,%1\n" + " jl 0b\n" +#else /* __s390x__ */ + " lg %0,%1\n" + "0: ltgr %0,%0\n" + " jnz 1f\n" + " csg %0,%3,%1\n" + " jl 0b\n" +#endif /* __s390x__ */ + "1:" + : "=&d" (old), "=Q" (sem->count) + : "Q" (sem->count), "d" (RWSEM_ACTIVE_WRITE_BIAS) + : "cc", "memory"); + return (old == RWSEM_UNLOCKED_VALUE) ? 1 : 0; +} + +/* + * unlock after reading + */ +static inline void __up_read(struct rw_semaphore *sem) +{ + signed long old, new; + + asm volatile( +#ifndef __s390x__ + " l %0,%2\n" + "0: lr %1,%0\n" + " ahi %1,%4\n" + " cs %0,%1,%2\n" + " jl 0b" +#else /* __s390x__ */ + " lg %0,%2\n" + "0: lgr %1,%0\n" + " aghi %1,%4\n" + " csg %0,%1,%2\n" + " jl 0b" +#endif /* __s390x__ */ + : "=&d" (old), "=&d" (new), "=Q" (sem->count) + : "Q" (sem->count), "i" (-RWSEM_ACTIVE_READ_BIAS) + : "cc", "memory"); + if (new < 0) + if ((new & RWSEM_ACTIVE_MASK) == 0) + rwsem_wake(sem); +} + +/* + * unlock after writing + */ +static inline void __up_write(struct rw_semaphore *sem) +{ + signed long old, new, tmp; + + tmp = -RWSEM_ACTIVE_WRITE_BIAS; + asm volatile( +#ifndef __s390x__ + " l %0,%2\n" + "0: lr %1,%0\n" + " a %1,%4\n" + " cs %0,%1,%2\n" + " jl 0b" +#else /* __s390x__ */ + " lg %0,%2\n" + "0: lgr %1,%0\n" + " ag %1,%4\n" + " csg %0,%1,%2\n" + " jl 0b" +#endif /* __s390x__ */ + : "=&d" (old), "=&d" (new), "=Q" (sem->count) + : "Q" (sem->count), "m" (tmp) + : "cc", "memory"); + if (new < 0) + if ((new & RWSEM_ACTIVE_MASK) == 0) + rwsem_wake(sem); +} + +/* + * downgrade write lock to read lock + */ +static inline void __downgrade_write(struct rw_semaphore *sem) +{ + signed long old, new, tmp; + + tmp = -RWSEM_WAITING_BIAS; + asm volatile( +#ifndef __s390x__ + " l %0,%2\n" + "0: lr %1,%0\n" + " a %1,%4\n" + " cs %0,%1,%2\n" + " jl 0b" +#else /* __s390x__ */ + " lg %0,%2\n" + "0: lgr %1,%0\n" + " ag %1,%4\n" + " csg %0,%1,%2\n" + " jl 0b" +#endif /* __s390x__ */ + : "=&d" (old), "=&d" (new), "=Q" (sem->count) + : "Q" (sem->count), "m" (tmp) + : "cc", "memory"); + if (new > 1) + rwsem_downgrade_wake(sem); +} + +/* + * implement atomic add functionality + */ +static inline void rwsem_atomic_add(long delta, struct rw_semaphore *sem) +{ + signed long old, new; + + asm volatile( +#ifndef __s390x__ + " l %0,%2\n" + "0: lr %1,%0\n" + " ar %1,%4\n" + " cs %0,%1,%2\n" + " jl 0b" +#else /* __s390x__ */ + " lg %0,%2\n" + "0: lgr %1,%0\n" + " agr %1,%4\n" + " csg %0,%1,%2\n" + " jl 0b" +#endif /* __s390x__ */ + : "=&d" (old), "=&d" (new), "=Q" (sem->count) + : "Q" (sem->count), "d" (delta) + : "cc", "memory"); +} + +/* + * implement exchange and add functionality + */ +static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem) +{ + signed long old, new; + + asm volatile( +#ifndef __s390x__ + " l %0,%2\n" + "0: lr %1,%0\n" + " ar %1,%4\n" + " cs %0,%1,%2\n" + " jl 0b" +#else /* __s390x__ */ + " lg %0,%2\n" + "0: lgr %1,%0\n" + " agr %1,%4\n" + " csg %0,%1,%2\n" + " jl 0b" +#endif /* __s390x__ */ + : "=&d" (old), "=&d" (new), "=Q" (sem->count) + : "Q" (sem->count), "d" (delta) + : "cc", "memory"); + return new; +} + +#endif /* __KERNEL__ */ +#endif /* _S390_RWSEM_H */ diff --git a/arch/s390/include/asm/scatterlist.h b/arch/s390/include/asm/scatterlist.h new file mode 100644 index 00000000..6d45ef6c --- /dev/null +++ b/arch/s390/include/asm/scatterlist.h @@ -0,0 +1,3 @@ +#include <asm-generic/scatterlist.h> + +#define ARCH_HAS_SG_CHAIN diff --git a/arch/s390/include/asm/schid.h b/arch/s390/include/asm/schid.h new file mode 100644 index 00000000..3e4d401b --- /dev/null +++ b/arch/s390/include/asm/schid.h @@ -0,0 +1,34 @@ +#ifndef ASM_SCHID_H +#define ASM_SCHID_H + +#include <linux/types.h> + +struct subchannel_id { + __u32 cssid : 8; + __u32 : 4; + __u32 m : 1; + __u32 ssid : 2; + __u32 one : 1; + __u32 sch_no : 16; +} __attribute__ ((packed, aligned(4))); + +#ifdef __KERNEL__ +#include <linux/string.h> + +/* Helper function for sane state of pre-allocated subchannel_id. */ +static inline void +init_subchannel_id(struct subchannel_id *schid) +{ + memset(schid, 0, sizeof(struct subchannel_id)); + schid->one = 1; +} + +static inline int +schid_equal(struct subchannel_id *schid1, struct subchannel_id *schid2) +{ + return !memcmp(schid1, schid2, sizeof(struct subchannel_id)); +} + +#endif /* __KERNEL__ */ + +#endif /* ASM_SCHID_H */ diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h new file mode 100644 index 00000000..fed7bee6 --- /dev/null +++ b/arch/s390/include/asm/sclp.h @@ -0,0 +1,58 @@ +/* + * include/asm-s390/sclp.h + * + * Copyright IBM Corp. 2007 + * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com> + */ + +#ifndef _ASM_S390_SCLP_H +#define _ASM_S390_SCLP_H + +#include <linux/types.h> +#include <asm/chpid.h> + +#define SCLP_CHP_INFO_MASK_SIZE 32 + +struct sclp_chp_info { + u8 recognized[SCLP_CHP_INFO_MASK_SIZE]; + u8 standby[SCLP_CHP_INFO_MASK_SIZE]; + u8 configured[SCLP_CHP_INFO_MASK_SIZE]; +}; + +#define LOADPARM_LEN 8 + +struct sclp_ipl_info { + int is_valid; + int has_dump; + char loadparm[LOADPARM_LEN]; +}; + +struct sclp_cpu_entry { + u8 address; + u8 reserved0[13]; + u8 type; + u8 reserved1; +} __attribute__((packed)); + +struct sclp_cpu_info { + unsigned int configured; + unsigned int standby; + unsigned int combined; + int has_cpu_type; + struct sclp_cpu_entry cpu[255]; +}; + +int sclp_get_cpu_info(struct sclp_cpu_info *info); +int sclp_cpu_configure(u8 cpu); +int sclp_cpu_deconfigure(u8 cpu); +void sclp_facilities_detect(void); +unsigned long long sclp_get_rnmax(void); +unsigned long long sclp_get_rzm(void); +int sclp_sdias_blk_count(void); +int sclp_sdias_copy(void *dest, int blk_num, int nr_blks); +int sclp_chp_configure(struct chp_id chpid); +int sclp_chp_deconfigure(struct chp_id chpid); +int sclp_chp_read_info(struct sclp_chp_info *info); +void sclp_get_ipl_info(struct sclp_ipl_info *info); + +#endif /* _ASM_S390_SCLP_H */ diff --git a/arch/s390/include/asm/scsw.h b/arch/s390/include/asm/scsw.h new file mode 100644 index 00000000..de389cb5 --- /dev/null +++ b/arch/s390/include/asm/scsw.h @@ -0,0 +1,956 @@ +/* + * Helper functions for scsw access. + * + * Copyright IBM Corp. 2008,2009 + * Author(s): Peter Oberparleiter <peter.oberparleiter@de.ibm.com> + */ + +#ifndef _ASM_S390_SCSW_H_ +#define _ASM_S390_SCSW_H_ + +#include <linux/types.h> +#include <asm/chsc.h> +#include <asm/cio.h> + +/** + * struct cmd_scsw - command-mode subchannel status word + * @key: subchannel key + * @sctl: suspend control + * @eswf: esw format + * @cc: deferred condition code + * @fmt: format + * @pfch: prefetch + * @isic: initial-status interruption control + * @alcc: address-limit checking control + * @ssi: suppress-suspended interruption + * @zcc: zero condition code + * @ectl: extended control + * @pno: path not operational + * @res: reserved + * @fctl: function control + * @actl: activity control + * @stctl: status control + * @cpa: channel program address + * @dstat: device status + * @cstat: subchannel status + * @count: residual count + */ +struct cmd_scsw { + __u32 key : 4; + __u32 sctl : 1; + __u32 eswf : 1; + __u32 cc : 2; + __u32 fmt : 1; + __u32 pfch : 1; + __u32 isic : 1; + __u32 alcc : 1; + __u32 ssi : 1; + __u32 zcc : 1; + __u32 ectl : 1; + __u32 pno : 1; + __u32 res : 1; + __u32 fctl : 3; + __u32 actl : 7; + __u32 stctl : 5; + __u32 cpa; + __u32 dstat : 8; + __u32 cstat : 8; + __u32 count : 16; +} __attribute__ ((packed)); + +/** + * struct tm_scsw - transport-mode subchannel status word + * @key: subchannel key + * @eswf: esw format + * @cc: deferred condition code + * @fmt: format + * @x: IRB-format control + * @q: interrogate-complete + * @ectl: extended control + * @pno: path not operational + * @fctl: function control + * @actl: activity control + * @stctl: status control + * @tcw: TCW address + * @dstat: device status + * @cstat: subchannel status + * @fcxs: FCX status + * @schxs: subchannel-extended status + */ +struct tm_scsw { + u32 key:4; + u32 :1; + u32 eswf:1; + u32 cc:2; + u32 fmt:3; + u32 x:1; + u32 q:1; + u32 :1; + u32 ectl:1; + u32 pno:1; + u32 :1; + u32 fctl:3; + u32 actl:7; + u32 stctl:5; + u32 tcw; + u32 dstat:8; + u32 cstat:8; + u32 fcxs:8; + u32 schxs:8; +} __attribute__ ((packed)); + +/** + * union scsw - subchannel status word + * @cmd: command-mode SCSW + * @tm: transport-mode SCSW + */ +union scsw { + struct cmd_scsw cmd; + struct tm_scsw tm; +} __attribute__ ((packed)); + +#define SCSW_FCTL_CLEAR_FUNC 0x1 +#define SCSW_FCTL_HALT_FUNC 0x2 +#define SCSW_FCTL_START_FUNC 0x4 + +#define SCSW_ACTL_SUSPENDED 0x1 +#define SCSW_ACTL_DEVACT 0x2 +#define SCSW_ACTL_SCHACT 0x4 +#define SCSW_ACTL_CLEAR_PEND 0x8 +#define SCSW_ACTL_HALT_PEND 0x10 +#define SCSW_ACTL_START_PEND 0x20 +#define SCSW_ACTL_RESUME_PEND 0x40 + +#define SCSW_STCTL_STATUS_PEND 0x1 +#define SCSW_STCTL_SEC_STATUS 0x2 +#define SCSW_STCTL_PRIM_STATUS 0x4 +#define SCSW_STCTL_INTER_STATUS 0x8 +#define SCSW_STCTL_ALERT_STATUS 0x10 + +#define DEV_STAT_ATTENTION 0x80 +#define DEV_STAT_STAT_MOD 0x40 +#define DEV_STAT_CU_END 0x20 +#define DEV_STAT_BUSY 0x10 +#define DEV_STAT_CHN_END 0x08 +#define DEV_STAT_DEV_END 0x04 +#define DEV_STAT_UNIT_CHECK 0x02 +#define DEV_STAT_UNIT_EXCEP 0x01 + +#define SCHN_STAT_PCI 0x80 +#define SCHN_STAT_INCORR_LEN 0x40 +#define SCHN_STAT_PROG_CHECK 0x20 +#define SCHN_STAT_PROT_CHECK 0x10 +#define SCHN_STAT_CHN_DATA_CHK 0x08 +#define SCHN_STAT_CHN_CTRL_CHK 0x04 +#define SCHN_STAT_INTF_CTRL_CHK 0x02 +#define SCHN_STAT_CHAIN_CHECK 0x01 + +/* + * architectured values for first sense byte + */ +#define SNS0_CMD_REJECT 0x80 +#define SNS_CMD_REJECT SNS0_CMD_REJEC +#define SNS0_INTERVENTION_REQ 0x40 +#define SNS0_BUS_OUT_CHECK 0x20 +#define SNS0_EQUIPMENT_CHECK 0x10 +#define SNS0_DATA_CHECK 0x08 +#define SNS0_OVERRUN 0x04 +#define SNS0_INCOMPL_DOMAIN 0x01 + +/* + * architectured values for second sense byte + */ +#define SNS1_PERM_ERR 0x80 +#define SNS1_INV_TRACK_FORMAT 0x40 +#define SNS1_EOC 0x20 +#define SNS1_MESSAGE_TO_OPER 0x10 +#define SNS1_NO_REC_FOUND 0x08 +#define SNS1_FILE_PROTECTED 0x04 +#define SNS1_WRITE_INHIBITED 0x02 +#define SNS1_INPRECISE_END 0x01 + +/* + * architectured values for third sense byte + */ +#define SNS2_REQ_INH_WRITE 0x80 +#define SNS2_CORRECTABLE 0x40 +#define SNS2_FIRST_LOG_ERR 0x20 +#define SNS2_ENV_DATA_PRESENT 0x10 +#define SNS2_INPRECISE_END 0x04 + +/** + * scsw_is_tm - check for transport mode scsw + * @scsw: pointer to scsw + * + * Return non-zero if the specified scsw is a transport mode scsw, zero + * otherwise. + */ +static inline int scsw_is_tm(union scsw *scsw) +{ + return css_general_characteristics.fcx && (scsw->tm.x == 1); +} + +/** + * scsw_key - return scsw key field + * @scsw: pointer to scsw + * + * Return the value of the key field of the specified scsw, regardless of + * whether it is a transport mode or command mode scsw. + */ +static inline u32 scsw_key(union scsw *scsw) +{ + if (scsw_is_tm(scsw)) + return scsw->tm.key; + else + return scsw->cmd.key; +} + +/** + * scsw_eswf - return scsw eswf field + * @scsw: pointer to scsw + * + * Return the value of the eswf field of the specified scsw, regardless of + * whether it is a transport mode or command mode scsw. + */ +static inline u32 scsw_eswf(union scsw *scsw) +{ + if (scsw_is_tm(scsw)) + return scsw->tm.eswf; + else + return scsw->cmd.eswf; +} + +/** + * scsw_cc - return scsw cc field + * @scsw: pointer to scsw + * + * Return the value of the cc field of the specified scsw, regardless of + * whether it is a transport mode or command mode scsw. + */ +static inline u32 scsw_cc(union scsw *scsw) +{ + if (scsw_is_tm(scsw)) + return scsw->tm.cc; + else + return scsw->cmd.cc; +} + +/** + * scsw_ectl - return scsw ectl field + * @scsw: pointer to scsw + * + * Return the value of the ectl field of the specified scsw, regardless of + * whether it is a transport mode or command mode scsw. + */ +static inline u32 scsw_ectl(union scsw *scsw) +{ + if (scsw_is_tm(scsw)) + return scsw->tm.ectl; + else + return scsw->cmd.ectl; +} + +/** + * scsw_pno - return scsw pno field + * @scsw: pointer to scsw + * + * Return the value of the pno field of the specified scsw, regardless of + * whether it is a transport mode or command mode scsw. + */ +static inline u32 scsw_pno(union scsw *scsw) +{ + if (scsw_is_tm(scsw)) + return scsw->tm.pno; + else + return scsw->cmd.pno; +} + +/** + * scsw_fctl - return scsw fctl field + * @scsw: pointer to scsw + * + * Return the value of the fctl field of the specified scsw, regardless of + * whether it is a transport mode or command mode scsw. + */ +static inline u32 scsw_fctl(union scsw *scsw) +{ + if (scsw_is_tm(scsw)) + return scsw->tm.fctl; + else + return scsw->cmd.fctl; +} + +/** + * scsw_actl - return scsw actl field + * @scsw: pointer to scsw + * + * Return the value of the actl field of the specified scsw, regardless of + * whether it is a transport mode or command mode scsw. + */ +static inline u32 scsw_actl(union scsw *scsw) +{ + if (scsw_is_tm(scsw)) + return scsw->tm.actl; + else + return scsw->cmd.actl; +} + +/** + * scsw_stctl - return scsw stctl field + * @scsw: pointer to scsw + * + * Return the value of the stctl field of the specified scsw, regardless of + * whether it is a transport mode or command mode scsw. + */ +static inline u32 scsw_stctl(union scsw *scsw) +{ + if (scsw_is_tm(scsw)) + return scsw->tm.stctl; + else + return scsw->cmd.stctl; +} + +/** + * scsw_dstat - return scsw dstat field + * @scsw: pointer to scsw + * + * Return the value of the dstat field of the specified scsw, regardless of + * whether it is a transport mode or command mode scsw. + */ +static inline u32 scsw_dstat(union scsw *scsw) +{ + if (scsw_is_tm(scsw)) + return scsw->tm.dstat; + else + return scsw->cmd.dstat; +} + +/** + * scsw_cstat - return scsw cstat field + * @scsw: pointer to scsw + * + * Return the value of the cstat field of the specified scsw, regardless of + * whether it is a transport mode or command mode scsw. + */ +static inline u32 scsw_cstat(union scsw *scsw) +{ + if (scsw_is_tm(scsw)) + return scsw->tm.cstat; + else + return scsw->cmd.cstat; +} + +/** + * scsw_cmd_is_valid_key - check key field validity + * @scsw: pointer to scsw + * + * Return non-zero if the key field of the specified command mode scsw is + * valid, zero otherwise. + */ +static inline int scsw_cmd_is_valid_key(union scsw *scsw) +{ + return (scsw->cmd.fctl & SCSW_FCTL_START_FUNC); +} + +/** + * scsw_cmd_is_valid_sctl - check fctl field validity + * @scsw: pointer to scsw + * + * Return non-zero if the fctl field of the specified command mode scsw is + * valid, zero otherwise. + */ +static inline int scsw_cmd_is_valid_sctl(union scsw *scsw) +{ + return (scsw->cmd.fctl & SCSW_FCTL_START_FUNC); +} + +/** + * scsw_cmd_is_valid_eswf - check eswf field validity + * @scsw: pointer to scsw + * + * Return non-zero if the eswf field of the specified command mode scsw is + * valid, zero otherwise. + */ +static inline int scsw_cmd_is_valid_eswf(union scsw *scsw) +{ + return (scsw->cmd.stctl & SCSW_STCTL_STATUS_PEND); +} + +/** + * scsw_cmd_is_valid_cc - check cc field validity + * @scsw: pointer to scsw + * + * Return non-zero if the cc field of the specified command mode scsw is + * valid, zero otherwise. + */ +static inline int scsw_cmd_is_valid_cc(union scsw *scsw) +{ + return (scsw->cmd.fctl & SCSW_FCTL_START_FUNC) && + (scsw->cmd.stctl & SCSW_STCTL_STATUS_PEND); +} + +/** + * scsw_cmd_is_valid_fmt - check fmt field validity + * @scsw: pointer to scsw + * + * Return non-zero if the fmt field of the specified command mode scsw is + * valid, zero otherwise. + */ +static inline int scsw_cmd_is_valid_fmt(union scsw *scsw) +{ + return (scsw->cmd.fctl & SCSW_FCTL_START_FUNC); +} + +/** + * scsw_cmd_is_valid_pfch - check pfch field validity + * @scsw: pointer to scsw + * + * Return non-zero if the pfch field of the specified command mode scsw is + * valid, zero otherwise. + */ +static inline int scsw_cmd_is_valid_pfch(union scsw *scsw) +{ + return (scsw->cmd.fctl & SCSW_FCTL_START_FUNC); +} + +/** + * scsw_cmd_is_valid_isic - check isic field validity + * @scsw: pointer to scsw + * + * Return non-zero if the isic field of the specified command mode scsw is + * valid, zero otherwise. + */ +static inline int scsw_cmd_is_valid_isic(union scsw *scsw) +{ + return (scsw->cmd.fctl & SCSW_FCTL_START_FUNC); +} + +/** + * scsw_cmd_is_valid_alcc - check alcc field validity + * @scsw: pointer to scsw + * + * Return non-zero if the alcc field of the specified command mode scsw is + * valid, zero otherwise. + */ +static inline int scsw_cmd_is_valid_alcc(union scsw *scsw) +{ + return (scsw->cmd.fctl & SCSW_FCTL_START_FUNC); +} + +/** + * scsw_cmd_is_valid_ssi - check ssi field validity + * @scsw: pointer to scsw + * + * Return non-zero if the ssi field of the specified command mode scsw is + * valid, zero otherwise. + */ +static inline int scsw_cmd_is_valid_ssi(union scsw *scsw) +{ + return (scsw->cmd.fctl & SCSW_FCTL_START_FUNC); +} + +/** + * scsw_cmd_is_valid_zcc - check zcc field validity + * @scsw: pointer to scsw + * + * Return non-zero if the zcc field of the specified command mode scsw is + * valid, zero otherwise. + */ +static inline int scsw_cmd_is_valid_zcc(union scsw *scsw) +{ + return (scsw->cmd.fctl & SCSW_FCTL_START_FUNC) && + (scsw->cmd.stctl & SCSW_STCTL_INTER_STATUS); +} + +/** + * scsw_cmd_is_valid_ectl - check ectl field validity + * @scsw: pointer to scsw + * + * Return non-zero if the ectl field of the specified command mode scsw is + * valid, zero otherwise. + */ +static inline int scsw_cmd_is_valid_ectl(union scsw *scsw) +{ + return (scsw->cmd.stctl & SCSW_STCTL_STATUS_PEND) && + !(scsw->cmd.stctl & SCSW_STCTL_INTER_STATUS) && + (scsw->cmd.stctl & SCSW_STCTL_ALERT_STATUS); +} + +/** + * scsw_cmd_is_valid_pno - check pno field validity + * @scsw: pointer to scsw + * + * Return non-zero if the pno field of the specified command mode scsw is + * valid, zero otherwise. + */ +static inline int scsw_cmd_is_valid_pno(union scsw *scsw) +{ + return (scsw->cmd.fctl != 0) && + (scsw->cmd.stctl & SCSW_STCTL_STATUS_PEND) && + (!(scsw->cmd.stctl & SCSW_STCTL_INTER_STATUS) || + ((scsw->cmd.stctl & SCSW_STCTL_INTER_STATUS) && + (scsw->cmd.actl & SCSW_ACTL_SUSPENDED))); +} + +/** + * scsw_cmd_is_valid_fctl - check fctl field validity + * @scsw: pointer to scsw + * + * Return non-zero if the fctl field of the specified command mode scsw is + * valid, zero otherwise. + */ +static inline int scsw_cmd_is_valid_fctl(union scsw *scsw) +{ + /* Only valid if pmcw.dnv == 1*/ + return 1; +} + +/** + * scsw_cmd_is_valid_actl - check actl field validity + * @scsw: pointer to scsw + * + * Return non-zero if the actl field of the specified command mode scsw is + * valid, zero otherwise. + */ +static inline int scsw_cmd_is_valid_actl(union scsw *scsw) +{ + /* Only valid if pmcw.dnv == 1*/ + return 1; +} + +/** + * scsw_cmd_is_valid_stctl - check stctl field validity + * @scsw: pointer to scsw + * + * Return non-zero if the stctl field of the specified command mode scsw is + * valid, zero otherwise. + */ +static inline int scsw_cmd_is_valid_stctl(union scsw *scsw) +{ + /* Only valid if pmcw.dnv == 1*/ + return 1; +} + +/** + * scsw_cmd_is_valid_dstat - check dstat field validity + * @scsw: pointer to scsw + * + * Return non-zero if the dstat field of the specified command mode scsw is + * valid, zero otherwise. + */ +static inline int scsw_cmd_is_valid_dstat(union scsw *scsw) +{ + return (scsw->cmd.stctl & SCSW_STCTL_STATUS_PEND) && + (scsw->cmd.cc != 3); +} + +/** + * scsw_cmd_is_valid_cstat - check cstat field validity + * @scsw: pointer to scsw + * + * Return non-zero if the cstat field of the specified command mode scsw is + * valid, zero otherwise. + */ +static inline int scsw_cmd_is_valid_cstat(union scsw *scsw) +{ + return (scsw->cmd.stctl & SCSW_STCTL_STATUS_PEND) && + (scsw->cmd.cc != 3); +} + +/** + * scsw_tm_is_valid_key - check key field validity + * @scsw: pointer to scsw + * + * Return non-zero if the key field of the specified transport mode scsw is + * valid, zero otherwise. + */ +static inline int scsw_tm_is_valid_key(union scsw *scsw) +{ + return (scsw->tm.fctl & SCSW_FCTL_START_FUNC); +} + +/** + * scsw_tm_is_valid_eswf - check eswf field validity + * @scsw: pointer to scsw + * + * Return non-zero if the eswf field of the specified transport mode scsw is + * valid, zero otherwise. + */ +static inline int scsw_tm_is_valid_eswf(union scsw *scsw) +{ + return (scsw->tm.stctl & SCSW_STCTL_STATUS_PEND); +} + +/** + * scsw_tm_is_valid_cc - check cc field validity + * @scsw: pointer to scsw + * + * Return non-zero if the cc field of the specified transport mode scsw is + * valid, zero otherwise. + */ +static inline int scsw_tm_is_valid_cc(union scsw *scsw) +{ + return (scsw->tm.fctl & SCSW_FCTL_START_FUNC) && + (scsw->tm.stctl & SCSW_STCTL_STATUS_PEND); +} + +/** + * scsw_tm_is_valid_fmt - check fmt field validity + * @scsw: pointer to scsw + * + * Return non-zero if the fmt field of the specified transport mode scsw is + * valid, zero otherwise. + */ +static inline int scsw_tm_is_valid_fmt(union scsw *scsw) +{ + return 1; +} + +/** + * scsw_tm_is_valid_x - check x field validity + * @scsw: pointer to scsw + * + * Return non-zero if the x field of the specified transport mode scsw is + * valid, zero otherwise. + */ +static inline int scsw_tm_is_valid_x(union scsw *scsw) +{ + return 1; +} + +/** + * scsw_tm_is_valid_q - check q field validity + * @scsw: pointer to scsw + * + * Return non-zero if the q field of the specified transport mode scsw is + * valid, zero otherwise. + */ +static inline int scsw_tm_is_valid_q(union scsw *scsw) +{ + return 1; +} + +/** + * scsw_tm_is_valid_ectl - check ectl field validity + * @scsw: pointer to scsw + * + * Return non-zero if the ectl field of the specified transport mode scsw is + * valid, zero otherwise. + */ +static inline int scsw_tm_is_valid_ectl(union scsw *scsw) +{ + return (scsw->tm.stctl & SCSW_STCTL_STATUS_PEND) && + !(scsw->tm.stctl & SCSW_STCTL_INTER_STATUS) && + (scsw->tm.stctl & SCSW_STCTL_ALERT_STATUS); +} + +/** + * scsw_tm_is_valid_pno - check pno field validity + * @scsw: pointer to scsw + * + * Return non-zero if the pno field of the specified transport mode scsw is + * valid, zero otherwise. + */ +static inline int scsw_tm_is_valid_pno(union scsw *scsw) +{ + return (scsw->tm.fctl != 0) && + (scsw->tm.stctl & SCSW_STCTL_STATUS_PEND) && + (!(scsw->tm.stctl & SCSW_STCTL_INTER_STATUS) || + ((scsw->tm.stctl & SCSW_STCTL_INTER_STATUS) && + (scsw->tm.actl & SCSW_ACTL_SUSPENDED))); +} + +/** + * scsw_tm_is_valid_fctl - check fctl field validity + * @scsw: pointer to scsw + * + * Return non-zero if the fctl field of the specified transport mode scsw is + * valid, zero otherwise. + */ +static inline int scsw_tm_is_valid_fctl(union scsw *scsw) +{ + /* Only valid if pmcw.dnv == 1*/ + return 1; +} + +/** + * scsw_tm_is_valid_actl - check actl field validity + * @scsw: pointer to scsw + * + * Return non-zero if the actl field of the specified transport mode scsw is + * valid, zero otherwise. + */ +static inline int scsw_tm_is_valid_actl(union scsw *scsw) +{ + /* Only valid if pmcw.dnv == 1*/ + return 1; +} + +/** + * scsw_tm_is_valid_stctl - check stctl field validity + * @scsw: pointer to scsw + * + * Return non-zero if the stctl field of the specified transport mode scsw is + * valid, zero otherwise. + */ +static inline int scsw_tm_is_valid_stctl(union scsw *scsw) +{ + /* Only valid if pmcw.dnv == 1*/ + return 1; +} + +/** + * scsw_tm_is_valid_dstat - check dstat field validity + * @scsw: pointer to scsw + * + * Return non-zero if the dstat field of the specified transport mode scsw is + * valid, zero otherwise. + */ +static inline int scsw_tm_is_valid_dstat(union scsw *scsw) +{ + return (scsw->tm.stctl & SCSW_STCTL_STATUS_PEND) && + (scsw->tm.cc != 3); +} + +/** + * scsw_tm_is_valid_cstat - check cstat field validity + * @scsw: pointer to scsw + * + * Return non-zero if the cstat field of the specified transport mode scsw is + * valid, zero otherwise. + */ +static inline int scsw_tm_is_valid_cstat(union scsw *scsw) +{ + return (scsw->tm.stctl & SCSW_STCTL_STATUS_PEND) && + (scsw->tm.cc != 3); +} + +/** + * scsw_tm_is_valid_fcxs - check fcxs field validity + * @scsw: pointer to scsw + * + * Return non-zero if the fcxs field of the specified transport mode scsw is + * valid, zero otherwise. + */ +static inline int scsw_tm_is_valid_fcxs(union scsw *scsw) +{ + return 1; +} + +/** + * scsw_tm_is_valid_schxs - check schxs field validity + * @scsw: pointer to scsw + * + * Return non-zero if the schxs field of the specified transport mode scsw is + * valid, zero otherwise. + */ +static inline int scsw_tm_is_valid_schxs(union scsw *scsw) +{ + return (scsw->tm.cstat & (SCHN_STAT_PROG_CHECK | + SCHN_STAT_INTF_CTRL_CHK | + SCHN_STAT_PROT_CHECK | + SCHN_STAT_CHN_DATA_CHK)); +} + +/** + * scsw_is_valid_actl - check actl field validity + * @scsw: pointer to scsw + * + * Return non-zero if the actl field of the specified scsw is valid, + * regardless of whether it is a transport mode or command mode scsw. + * Return zero if the field does not contain a valid value. + */ +static inline int scsw_is_valid_actl(union scsw *scsw) +{ + if (scsw_is_tm(scsw)) + return scsw_tm_is_valid_actl(scsw); + else + return scsw_cmd_is_valid_actl(scsw); +} + +/** + * scsw_is_valid_cc - check cc field validity + * @scsw: pointer to scsw + * + * Return non-zero if the cc field of the specified scsw is valid, + * regardless of whether it is a transport mode or command mode scsw. + * Return zero if the field does not contain a valid value. + */ +static inline int scsw_is_valid_cc(union scsw *scsw) +{ + if (scsw_is_tm(scsw)) + return scsw_tm_is_valid_cc(scsw); + else + return scsw_cmd_is_valid_cc(scsw); +} + +/** + * scsw_is_valid_cstat - check cstat field validity + * @scsw: pointer to scsw + * + * Return non-zero if the cstat field of the specified scsw is valid, + * regardless of whether it is a transport mode or command mode scsw. + * Return zero if the field does not contain a valid value. + */ +static inline int scsw_is_valid_cstat(union scsw *scsw) +{ + if (scsw_is_tm(scsw)) + return scsw_tm_is_valid_cstat(scsw); + else + return scsw_cmd_is_valid_cstat(scsw); +} + +/** + * scsw_is_valid_dstat - check dstat field validity + * @scsw: pointer to scsw + * + * Return non-zero if the dstat field of the specified scsw is valid, + * regardless of whether it is a transport mode or command mode scsw. + * Return zero if the field does not contain a valid value. + */ +static inline int scsw_is_valid_dstat(union scsw *scsw) +{ + if (scsw_is_tm(scsw)) + return scsw_tm_is_valid_dstat(scsw); + else + return scsw_cmd_is_valid_dstat(scsw); +} + +/** + * scsw_is_valid_ectl - check ectl field validity + * @scsw: pointer to scsw + * + * Return non-zero if the ectl field of the specified scsw is valid, + * regardless of whether it is a transport mode or command mode scsw. + * Return zero if the field does not contain a valid value. + */ +static inline int scsw_is_valid_ectl(union scsw *scsw) +{ + if (scsw_is_tm(scsw)) + return scsw_tm_is_valid_ectl(scsw); + else + return scsw_cmd_is_valid_ectl(scsw); +} + +/** + * scsw_is_valid_eswf - check eswf field validity + * @scsw: pointer to scsw + * + * Return non-zero if the eswf field of the specified scsw is valid, + * regardless of whether it is a transport mode or command mode scsw. + * Return zero if the field does not contain a valid value. + */ +static inline int scsw_is_valid_eswf(union scsw *scsw) +{ + if (scsw_is_tm(scsw)) + return scsw_tm_is_valid_eswf(scsw); + else + return scsw_cmd_is_valid_eswf(scsw); +} + +/** + * scsw_is_valid_fctl - check fctl field validity + * @scsw: pointer to scsw + * + * Return non-zero if the fctl field of the specified scsw is valid, + * regardless of whether it is a transport mode or command mode scsw. + * Return zero if the field does not contain a valid value. + */ +static inline int scsw_is_valid_fctl(union scsw *scsw) +{ + if (scsw_is_tm(scsw)) + return scsw_tm_is_valid_fctl(scsw); + else + return scsw_cmd_is_valid_fctl(scsw); +} + +/** + * scsw_is_valid_key - check key field validity + * @scsw: pointer to scsw + * + * Return non-zero if the key field of the specified scsw is valid, + * regardless of whether it is a transport mode or command mode scsw. + * Return zero if the field does not contain a valid value. + */ +static inline int scsw_is_valid_key(union scsw *scsw) +{ + if (scsw_is_tm(scsw)) + return scsw_tm_is_valid_key(scsw); + else + return scsw_cmd_is_valid_key(scsw); +} + +/** + * scsw_is_valid_pno - check pno field validity + * @scsw: pointer to scsw + * + * Return non-zero if the pno field of the specified scsw is valid, + * regardless of whether it is a transport mode or command mode scsw. + * Return zero if the field does not contain a valid value. + */ +static inline int scsw_is_valid_pno(union scsw *scsw) +{ + if (scsw_is_tm(scsw)) + return scsw_tm_is_valid_pno(scsw); + else + return scsw_cmd_is_valid_pno(scsw); +} + +/** + * scsw_is_valid_stctl - check stctl field validity + * @scsw: pointer to scsw + * + * Return non-zero if the stctl field of the specified scsw is valid, + * regardless of whether it is a transport mode or command mode scsw. + * Return zero if the field does not contain a valid value. + */ +static inline int scsw_is_valid_stctl(union scsw *scsw) +{ + if (scsw_is_tm(scsw)) + return scsw_tm_is_valid_stctl(scsw); + else + return scsw_cmd_is_valid_stctl(scsw); +} + +/** + * scsw_cmd_is_solicited - check for solicited scsw + * @scsw: pointer to scsw + * + * Return non-zero if the command mode scsw indicates that the associated + * status condition is solicited, zero if it is unsolicited. + */ +static inline int scsw_cmd_is_solicited(union scsw *scsw) +{ + return (scsw->cmd.cc != 0) || (scsw->cmd.stctl != + (SCSW_STCTL_STATUS_PEND | SCSW_STCTL_ALERT_STATUS)); +} + +/** + * scsw_tm_is_solicited - check for solicited scsw + * @scsw: pointer to scsw + * + * Return non-zero if the transport mode scsw indicates that the associated + * status condition is solicited, zero if it is unsolicited. + */ +static inline int scsw_tm_is_solicited(union scsw *scsw) +{ + return (scsw->tm.cc != 0) || (scsw->tm.stctl != + (SCSW_STCTL_STATUS_PEND | SCSW_STCTL_ALERT_STATUS)); +} + +/** + * scsw_is_solicited - check for solicited scsw + * @scsw: pointer to scsw + * + * Return non-zero if the transport or command mode scsw indicates that the + * associated status condition is solicited, zero if it is unsolicited. + */ +static inline int scsw_is_solicited(union scsw *scsw) +{ + if (scsw_is_tm(scsw)) + return scsw_tm_is_solicited(scsw); + else + return scsw_cmd_is_solicited(scsw); +} + +#endif /* _ASM_S390_SCSW_H_ */ diff --git a/arch/s390/include/asm/seccomp.h b/arch/s390/include/asm/seccomp.h new file mode 100644 index 00000000..781a9cf9 --- /dev/null +++ b/arch/s390/include/asm/seccomp.h @@ -0,0 +1,16 @@ +#ifndef _ASM_S390_SECCOMP_H +#define _ASM_S390_SECCOMP_H + +#include <linux/unistd.h> + +#define __NR_seccomp_read __NR_read +#define __NR_seccomp_write __NR_write +#define __NR_seccomp_exit __NR_exit +#define __NR_seccomp_sigreturn __NR_sigreturn + +#define __NR_seccomp_read_32 __NR_read +#define __NR_seccomp_write_32 __NR_write +#define __NR_seccomp_exit_32 __NR_exit +#define __NR_seccomp_sigreturn_32 __NR_sigreturn + +#endif /* _ASM_S390_SECCOMP_H */ diff --git a/arch/s390/include/asm/sections.h b/arch/s390/include/asm/sections.h new file mode 100644 index 00000000..fbd9116e --- /dev/null +++ b/arch/s390/include/asm/sections.h @@ -0,0 +1,8 @@ +#ifndef _S390_SECTIONS_H +#define _S390_SECTIONS_H + +#include <asm-generic/sections.h> + +extern char _eshared[], _ehead[]; + +#endif diff --git a/arch/s390/include/asm/segment.h b/arch/s390/include/asm/segment.h new file mode 100644 index 00000000..8bfce347 --- /dev/null +++ b/arch/s390/include/asm/segment.h @@ -0,0 +1,4 @@ +#ifndef _ASM_SEGMENT_H +#define _ASM_SEGMENT_H + +#endif diff --git a/arch/s390/include/asm/sembuf.h b/arch/s390/include/asm/sembuf.h new file mode 100644 index 00000000..32626b0c --- /dev/null +++ b/arch/s390/include/asm/sembuf.h @@ -0,0 +1,29 @@ +#ifndef _S390_SEMBUF_H +#define _S390_SEMBUF_H + +/* + * The semid64_ds structure for S/390 architecture. + * Note extra padding because this structure is passed back and forth + * between kernel and user space. + * + * Pad space is left for: + * - 64-bit time_t to solve y2038 problem (for !__s390x__) + * - 2 miscellaneous 32-bit values + */ + +struct semid64_ds { + struct ipc64_perm sem_perm; /* permissions .. see ipc.h */ + __kernel_time_t sem_otime; /* last semop time */ +#ifndef __s390x__ + unsigned long __unused1; +#endif /* ! __s390x__ */ + __kernel_time_t sem_ctime; /* last change time */ +#ifndef __s390x__ + unsigned long __unused2; +#endif /* ! __s390x__ */ + unsigned long sem_nsems; /* no. of semaphores in array */ + unsigned long __unused3; + unsigned long __unused4; +}; + +#endif /* _S390_SEMBUF_H */ diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h new file mode 100644 index 00000000..b21e46e5 --- /dev/null +++ b/arch/s390/include/asm/setup.h @@ -0,0 +1,176 @@ +/* + * include/asm-s390/setup.h + * + * S390 version + * Copyright IBM Corp. 1999,2010 + */ + +#ifndef _ASM_S390_SETUP_H +#define _ASM_S390_SETUP_H + +#define COMMAND_LINE_SIZE 4096 + +#define ARCH_COMMAND_LINE_SIZE 896 + +#ifdef __KERNEL__ + +#define PARMAREA 0x10400 +#define MEMORY_CHUNKS 256 + +#ifndef __ASSEMBLY__ + +#include <asm/lowcore.h> +#include <asm/types.h> + +#ifndef __s390x__ +#define IPL_DEVICE (*(unsigned long *) (0x10404)) +#define INITRD_START (*(unsigned long *) (0x1040C)) +#define INITRD_SIZE (*(unsigned long *) (0x10414)) +#define OLDMEM_BASE (*(unsigned long *) (0x1041C)) +#define OLDMEM_SIZE (*(unsigned long *) (0x10424)) +#else /* __s390x__ */ +#define IPL_DEVICE (*(unsigned long *) (0x10400)) +#define INITRD_START (*(unsigned long *) (0x10408)) +#define INITRD_SIZE (*(unsigned long *) (0x10410)) +#define OLDMEM_BASE (*(unsigned long *) (0x10418)) +#define OLDMEM_SIZE (*(unsigned long *) (0x10420)) +#endif /* __s390x__ */ +#define COMMAND_LINE ((char *) (0x10480)) + +#define CHUNK_READ_WRITE 0 +#define CHUNK_READ_ONLY 1 +#define CHUNK_OLDMEM 4 +#define CHUNK_CRASHK 5 + +struct mem_chunk { + unsigned long addr; + unsigned long size; + int type; +}; + +extern struct mem_chunk memory_chunk[]; +extern unsigned long real_memory_size; +extern int memory_end_set; +extern unsigned long memory_end; + +void detect_memory_layout(struct mem_chunk chunk[]); +void create_mem_hole(struct mem_chunk memory_chunk[], unsigned long addr, + unsigned long size, int type); + +#define PRIMARY_SPACE_MODE 0 +#define ACCESS_REGISTER_MODE 1 +#define SECONDARY_SPACE_MODE 2 +#define HOME_SPACE_MODE 3 + +extern unsigned int user_mode; + +/* + * Machine features detected in head.S + */ + +#define MACHINE_FLAG_VM (1UL << 0) +#define MACHINE_FLAG_IEEE (1UL << 1) +#define MACHINE_FLAG_CSP (1UL << 3) +#define MACHINE_FLAG_MVPG (1UL << 4) +#define MACHINE_FLAG_DIAG44 (1UL << 5) +#define MACHINE_FLAG_IDTE (1UL << 6) +#define MACHINE_FLAG_DIAG9C (1UL << 7) +#define MACHINE_FLAG_MVCOS (1UL << 8) +#define MACHINE_FLAG_KVM (1UL << 9) +#define MACHINE_FLAG_HPAGE (1UL << 10) +#define MACHINE_FLAG_PFMF (1UL << 11) +#define MACHINE_FLAG_LPAR (1UL << 12) +#define MACHINE_FLAG_SPP (1UL << 13) +#define MACHINE_FLAG_TOPOLOGY (1UL << 14) +#define MACHINE_FLAG_STCKF (1UL << 15) + +#define MACHINE_IS_VM (S390_lowcore.machine_flags & MACHINE_FLAG_VM) +#define MACHINE_IS_KVM (S390_lowcore.machine_flags & MACHINE_FLAG_KVM) +#define MACHINE_IS_LPAR (S390_lowcore.machine_flags & MACHINE_FLAG_LPAR) + +#define MACHINE_HAS_DIAG9C (S390_lowcore.machine_flags & MACHINE_FLAG_DIAG9C) + +#ifndef __s390x__ +#define MACHINE_HAS_IEEE (S390_lowcore.machine_flags & MACHINE_FLAG_IEEE) +#define MACHINE_HAS_CSP (S390_lowcore.machine_flags & MACHINE_FLAG_CSP) +#define MACHINE_HAS_IDTE (0) +#define MACHINE_HAS_DIAG44 (1) +#define MACHINE_HAS_MVPG (S390_lowcore.machine_flags & MACHINE_FLAG_MVPG) +#define MACHINE_HAS_MVCOS (0) +#define MACHINE_HAS_HPAGE (0) +#define MACHINE_HAS_PFMF (0) +#define MACHINE_HAS_SPP (0) +#define MACHINE_HAS_TOPOLOGY (0) +#define MACHINE_HAS_STCKF (0) +#else /* __s390x__ */ +#define MACHINE_HAS_IEEE (1) +#define MACHINE_HAS_CSP (1) +#define MACHINE_HAS_IDTE (S390_lowcore.machine_flags & MACHINE_FLAG_IDTE) +#define MACHINE_HAS_DIAG44 (S390_lowcore.machine_flags & MACHINE_FLAG_DIAG44) +#define MACHINE_HAS_MVPG (1) +#define MACHINE_HAS_MVCOS (S390_lowcore.machine_flags & MACHINE_FLAG_MVCOS) +#define MACHINE_HAS_HPAGE (S390_lowcore.machine_flags & MACHINE_FLAG_HPAGE) +#define MACHINE_HAS_PFMF (S390_lowcore.machine_flags & MACHINE_FLAG_PFMF) +#define MACHINE_HAS_SPP (S390_lowcore.machine_flags & MACHINE_FLAG_SPP) +#define MACHINE_HAS_TOPOLOGY (S390_lowcore.machine_flags & MACHINE_FLAG_TOPOLOGY) +#define MACHINE_HAS_STCKF (S390_lowcore.machine_flags & MACHINE_FLAG_STCKF) +#endif /* __s390x__ */ + +#define ZFCPDUMP_HSA_SIZE (32UL<<20) +#define ZFCPDUMP_HSA_SIZE_MAX (64UL<<20) + +/* + * Console mode. Override with conmode= + */ +extern unsigned int console_mode; +extern unsigned int console_devno; +extern unsigned int console_irq; + +extern char vmhalt_cmd[]; +extern char vmpoff_cmd[]; + +#define CONSOLE_IS_UNDEFINED (console_mode == 0) +#define CONSOLE_IS_SCLP (console_mode == 1) +#define CONSOLE_IS_3215 (console_mode == 2) +#define CONSOLE_IS_3270 (console_mode == 3) +#define SET_CONSOLE_SCLP do { console_mode = 1; } while (0) +#define SET_CONSOLE_3215 do { console_mode = 2; } while (0) +#define SET_CONSOLE_3270 do { console_mode = 3; } while (0) + +#define NSS_NAME_SIZE 8 +extern char kernel_nss_name[]; + +#ifdef CONFIG_PFAULT +extern int pfault_init(void); +extern void pfault_fini(void); +#else /* CONFIG_PFAULT */ +#define pfault_init() ({-1;}) +#define pfault_fini() do { } while (0) +#endif /* CONFIG_PFAULT */ + +extern void cmma_init(void); + +extern void (*_machine_restart)(char *command); +extern void (*_machine_halt)(void); +extern void (*_machine_power_off)(void); + +#else /* __ASSEMBLY__ */ + +#ifndef __s390x__ +#define IPL_DEVICE 0x10404 +#define INITRD_START 0x1040C +#define INITRD_SIZE 0x10414 +#define OLDMEM_BASE 0x1041C +#define OLDMEM_SIZE 0x10424 +#else /* __s390x__ */ +#define IPL_DEVICE 0x10400 +#define INITRD_START 0x10408 +#define INITRD_SIZE 0x10410 +#define OLDMEM_BASE 0x10418 +#define OLDMEM_SIZE 0x10420 +#endif /* __s390x__ */ +#define COMMAND_LINE 0x10480 + +#endif /* __ASSEMBLY__ */ +#endif /* __KERNEL__ */ +#endif /* _ASM_S390_SETUP_H */ diff --git a/arch/s390/include/asm/sfp-machine.h b/arch/s390/include/asm/sfp-machine.h new file mode 100644 index 00000000..4e16aede --- /dev/null +++ b/arch/s390/include/asm/sfp-machine.h @@ -0,0 +1,142 @@ +/* Machine-dependent software floating-point definitions. + S/390 kernel version. + Copyright (C) 1997,1998,1999 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson (rth@cygnus.com), + Jakub Jelinek (jj@ultra.linux.cz), + David S. Miller (davem@redhat.com) and + Peter Maydell (pmaydell@chiark.greenend.org.uk). + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If + not, write to the Free Software Foundation, Inc., + 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ + +#ifndef _SFP_MACHINE_H +#define _SFP_MACHINE_H + + +#define _FP_W_TYPE_SIZE 32 +#define _FP_W_TYPE unsigned int +#define _FP_WS_TYPE signed int +#define _FP_I_TYPE int + +#define _FP_MUL_MEAT_S(R,X,Y) \ + _FP_MUL_MEAT_1_wide(_FP_WFRACBITS_S,R,X,Y,umul_ppmm) +#define _FP_MUL_MEAT_D(R,X,Y) \ + _FP_MUL_MEAT_2_wide(_FP_WFRACBITS_D,R,X,Y,umul_ppmm) +#define _FP_MUL_MEAT_Q(R,X,Y) \ + _FP_MUL_MEAT_4_wide(_FP_WFRACBITS_Q,R,X,Y,umul_ppmm) + +#define _FP_DIV_MEAT_S(R,X,Y) _FP_DIV_MEAT_1_udiv(S,R,X,Y) +#define _FP_DIV_MEAT_D(R,X,Y) _FP_DIV_MEAT_2_udiv(D,R,X,Y) +#define _FP_DIV_MEAT_Q(R,X,Y) _FP_DIV_MEAT_4_udiv(Q,R,X,Y) + +#define _FP_NANFRAC_S ((_FP_QNANBIT_S << 1) - 1) +#define _FP_NANFRAC_D ((_FP_QNANBIT_D << 1) - 1), -1 +#define _FP_NANFRAC_Q ((_FP_QNANBIT_Q << 1) - 1), -1, -1, -1 +#define _FP_NANSIGN_S 0 +#define _FP_NANSIGN_D 0 +#define _FP_NANSIGN_Q 0 + +#define _FP_KEEPNANFRACP 1 + +/* + * If one NaN is signaling and the other is not, + * we choose that one, otherwise we choose X. + */ +#define _FP_CHOOSENAN(fs, wc, R, X, Y, OP) \ + do { \ + if ((_FP_FRAC_HIGH_RAW_##fs(X) & _FP_QNANBIT_##fs) \ + && !(_FP_FRAC_HIGH_RAW_##fs(Y) & _FP_QNANBIT_##fs)) \ + { \ + R##_s = Y##_s; \ + _FP_FRAC_COPY_##wc(R,Y); \ + } \ + else \ + { \ + R##_s = X##_s; \ + _FP_FRAC_COPY_##wc(R,X); \ + } \ + R##_c = FP_CLS_NAN; \ + } while (0) + +/* Some assembly to speed things up. */ +#define __FP_FRAC_ADD_3(r2,r1,r0,x2,x1,x0,y2,y1,y0) ({ \ + unsigned int __r2 = (x2) + (y2); \ + unsigned int __r1 = (x1); \ + unsigned int __r0 = (x0); \ + asm volatile( \ + " alr %2,%3\n" \ + " brc 12,0f\n" \ + " lhi 0,1\n" \ + " alr %1,0\n" \ + " brc 12,0f\n" \ + " alr %0,0\n" \ + "0:" \ + : "+&d" (__r2), "+&d" (__r1), "+&d" (__r0) \ + : "d" (y0), "i" (1) : "cc", "0" ); \ + asm volatile( \ + " alr %1,%2\n" \ + " brc 12,0f\n" \ + " ahi %0,1\n" \ + "0:" \ + : "+&d" (__r2), "+&d" (__r1) \ + : "d" (y1) : "cc"); \ + (r2) = __r2; \ + (r1) = __r1; \ + (r0) = __r0; \ +}) + +#define __FP_FRAC_SUB_3(r2,r1,r0,x2,x1,x0,y2,y1,y0) ({ \ + unsigned int __r2 = (x2) - (y2); \ + unsigned int __r1 = (x1); \ + unsigned int __r0 = (x0); \ + asm volatile( \ + " slr %2,%3\n" \ + " brc 3,0f\n" \ + " lhi 0,1\n" \ + " slr %1,0\n" \ + " brc 3,0f\n" \ + " slr %0,0\n" \ + "0:" \ + : "+&d" (__r2), "+&d" (__r1), "+&d" (__r0) \ + : "d" (y0) : "cc", "0"); \ + asm volatile( \ + " slr %1,%2\n" \ + " brc 3,0f\n" \ + " ahi %0,-1\n" \ + "0:" \ + : "+&d" (__r2), "+&d" (__r1) \ + : "d" (y1) : "cc"); \ + (r2) = __r2; \ + (r1) = __r1; \ + (r0) = __r0; \ +}) + +#define __FP_FRAC_DEC_3(x2,x1,x0,y2,y1,y0) __FP_FRAC_SUB_3(x2,x1,x0,x2,x1,x0,y2,y1,y0) + +/* Obtain the current rounding mode. */ +#define FP_ROUNDMODE mode + +/* Exception flags. */ +#define FP_EX_INVALID 0x800000 +#define FP_EX_DIVZERO 0x400000 +#define FP_EX_OVERFLOW 0x200000 +#define FP_EX_UNDERFLOW 0x100000 +#define FP_EX_INEXACT 0x080000 + +/* We write the results always */ +#define FP_INHIBIT_RESULTS 0 + +#endif diff --git a/arch/s390/include/asm/sfp-util.h b/arch/s390/include/asm/sfp-util.h new file mode 100644 index 00000000..ca3f8814 --- /dev/null +++ b/arch/s390/include/asm/sfp-util.h @@ -0,0 +1,77 @@ +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/types.h> +#include <asm/byteorder.h> + +#define add_ssaaaa(sh, sl, ah, al, bh, bl) ({ \ + unsigned int __sh = (ah); \ + unsigned int __sl = (al); \ + asm volatile( \ + " alr %1,%3\n" \ + " brc 12,0f\n" \ + " ahi %0,1\n" \ + "0: alr %0,%2" \ + : "+&d" (__sh), "+d" (__sl) \ + : "d" (bh), "d" (bl) : "cc"); \ + (sh) = __sh; \ + (sl) = __sl; \ +}) + +#define sub_ddmmss(sh, sl, ah, al, bh, bl) ({ \ + unsigned int __sh = (ah); \ + unsigned int __sl = (al); \ + asm volatile( \ + " slr %1,%3\n" \ + " brc 3,0f\n" \ + " ahi %0,-1\n" \ + "0: slr %0,%2" \ + : "+&d" (__sh), "+d" (__sl) \ + : "d" (bh), "d" (bl) : "cc"); \ + (sh) = __sh; \ + (sl) = __sl; \ +}) + +/* a umul b = a mul b + (a>=2<<31) ? b<<32:0 + (b>=2<<31) ? a<<32:0 */ +#define umul_ppmm(wh, wl, u, v) ({ \ + unsigned int __wh = u; \ + unsigned int __wl = v; \ + asm volatile( \ + " ltr 1,%0\n" \ + " mr 0,%1\n" \ + " jnm 0f\n" \ + " alr 0,%1\n" \ + "0: ltr %1,%1\n" \ + " jnm 1f\n" \ + " alr 0,%0\n" \ + "1: lr %0,0\n" \ + " lr %1,1\n" \ + : "+d" (__wh), "+d" (__wl) \ + : : "0", "1", "cc"); \ + wh = __wh; \ + wl = __wl; \ +}) + +#ifdef __s390x__ +#define udiv_qrnnd(q, r, n1, n0, d) \ + do { unsigned long __n; \ + unsigned int __r, __d; \ + __n = ((unsigned long)(n1) << 32) + n0; \ + __d = (d); \ + (q) = __n / __d; \ + (r) = __n % __d; \ + } while (0) +#else +#define udiv_qrnnd(q, r, n1, n0, d) \ + do { unsigned int __r; \ + (q) = __udiv_qrnnd (&__r, (n1), (n0), (d)); \ + (r) = __r; \ + } while (0) +extern unsigned long __udiv_qrnnd (unsigned int *, unsigned int, + unsigned int , unsigned int); +#endif + +#define UDIV_NEEDS_NORMALIZATION 0 + +#define abort() BUG() + +#define __BYTE_ORDER __BIG_ENDIAN diff --git a/arch/s390/include/asm/shmbuf.h b/arch/s390/include/asm/shmbuf.h new file mode 100644 index 00000000..eed2e280 --- /dev/null +++ b/arch/s390/include/asm/shmbuf.h @@ -0,0 +1,48 @@ +#ifndef _S390_SHMBUF_H +#define _S390_SHMBUF_H + +/* + * The shmid64_ds structure for S/390 architecture. + * Note extra padding because this structure is passed back and forth + * between kernel and user space. + * + * Pad space is left for: + * - 64-bit time_t to solve y2038 problem (for !__s390x__) + * - 2 miscellaneous 32-bit values + */ + +struct shmid64_ds { + struct ipc64_perm shm_perm; /* operation perms */ + size_t shm_segsz; /* size of segment (bytes) */ + __kernel_time_t shm_atime; /* last attach time */ +#ifndef __s390x__ + unsigned long __unused1; +#endif /* ! __s390x__ */ + __kernel_time_t shm_dtime; /* last detach time */ +#ifndef __s390x__ + unsigned long __unused2; +#endif /* ! __s390x__ */ + __kernel_time_t shm_ctime; /* last change time */ +#ifndef __s390x__ + unsigned long __unused3; +#endif /* ! __s390x__ */ + __kernel_pid_t shm_cpid; /* pid of creator */ + __kernel_pid_t shm_lpid; /* pid of last operator */ + unsigned long shm_nattch; /* no. of current attaches */ + unsigned long __unused4; + unsigned long __unused5; +}; + +struct shminfo64 { + unsigned long shmmax; + unsigned long shmmin; + unsigned long shmmni; + unsigned long shmseg; + unsigned long shmall; + unsigned long __unused1; + unsigned long __unused2; + unsigned long __unused3; + unsigned long __unused4; +}; + +#endif /* _S390_SHMBUF_H */ diff --git a/arch/s390/include/asm/shmparam.h b/arch/s390/include/asm/shmparam.h new file mode 100644 index 00000000..c2e0c050 --- /dev/null +++ b/arch/s390/include/asm/shmparam.h @@ -0,0 +1,13 @@ +/* + * include/asm-s390/shmparam.h + * + * S390 version + * + * Derived from "include/asm-i386/shmparam.h" + */ +#ifndef _ASM_S390_SHMPARAM_H +#define _ASM_S390_SHMPARAM_H + +#define SHMLBA PAGE_SIZE /* attach addr a multiple of this */ + +#endif /* _ASM_S390_SHMPARAM_H */ diff --git a/arch/s390/include/asm/sigcontext.h b/arch/s390/include/asm/sigcontext.h new file mode 100644 index 00000000..aeb6e0b1 --- /dev/null +++ b/arch/s390/include/asm/sigcontext.h @@ -0,0 +1,71 @@ +/* + * include/asm-s390/sigcontext.h + * + * S390 version + * Copyright (C) 1999,2000 IBM Deutschland Entwicklung GmbH, IBM Corporation + */ + +#ifndef _ASM_S390_SIGCONTEXT_H +#define _ASM_S390_SIGCONTEXT_H + +#include <linux/compiler.h> + +#define __NUM_GPRS 16 +#define __NUM_FPRS 16 +#define __NUM_ACRS 16 + +#ifndef __s390x__ + +/* Has to be at least _NSIG_WORDS from asm/signal.h */ +#define _SIGCONTEXT_NSIG 64 +#define _SIGCONTEXT_NSIG_BPW 32 +/* Size of stack frame allocated when calling signal handler. */ +#define __SIGNAL_FRAMESIZE 96 + +#else /* __s390x__ */ + +/* Has to be at least _NSIG_WORDS from asm/signal.h */ +#define _SIGCONTEXT_NSIG 64 +#define _SIGCONTEXT_NSIG_BPW 64 +/* Size of stack frame allocated when calling signal handler. */ +#define __SIGNAL_FRAMESIZE 160 + +#endif /* __s390x__ */ + +#define _SIGCONTEXT_NSIG_WORDS (_SIGCONTEXT_NSIG / _SIGCONTEXT_NSIG_BPW) +#define _SIGMASK_COPY_SIZE (sizeof(unsigned long)*_SIGCONTEXT_NSIG_WORDS) + +typedef struct +{ + unsigned long mask; + unsigned long addr; +} __attribute__ ((aligned(8))) _psw_t; + +typedef struct +{ + _psw_t psw; + unsigned long gprs[__NUM_GPRS]; + unsigned int acrs[__NUM_ACRS]; +} _s390_regs_common; + +typedef struct +{ + unsigned int fpc; + double fprs[__NUM_FPRS]; +} _s390_fp_regs; + +typedef struct +{ + _s390_regs_common regs; + _s390_fp_regs fpregs; +} _sigregs; + +struct sigcontext +{ + unsigned long oldmask[_SIGCONTEXT_NSIG_WORDS]; + _sigregs __user *sregs; +}; + + +#endif + diff --git a/arch/s390/include/asm/siginfo.h b/arch/s390/include/asm/siginfo.h new file mode 100644 index 00000000..e0ff1ab0 --- /dev/null +++ b/arch/s390/include/asm/siginfo.h @@ -0,0 +1,18 @@ +/* + * include/asm-s390/siginfo.h + * + * S390 version + * + * Derived from "include/asm-i386/siginfo.h" + */ + +#ifndef _S390_SIGINFO_H +#define _S390_SIGINFO_H + +#ifdef __s390x__ +#define __ARCH_SI_PREAMBLE_SIZE (4 * sizeof(int)) +#endif + +#include <asm-generic/siginfo.h> + +#endif diff --git a/arch/s390/include/asm/signal.h b/arch/s390/include/asm/signal.h new file mode 100644 index 00000000..cdf5cb2f --- /dev/null +++ b/arch/s390/include/asm/signal.h @@ -0,0 +1,172 @@ +/* + * include/asm-s390/signal.h + * + * S390 version + * + * Derived from "include/asm-i386/signal.h" + */ + +#ifndef _ASMS390_SIGNAL_H +#define _ASMS390_SIGNAL_H + +#include <linux/types.h> +#include <linux/time.h> + +/* Avoid too many header ordering problems. */ +struct siginfo; +struct pt_regs; + +#ifdef __KERNEL__ +/* Most things should be clean enough to redefine this at will, if care + is taken to make libc match. */ +#include <asm/sigcontext.h> +#define _NSIG _SIGCONTEXT_NSIG +#define _NSIG_BPW _SIGCONTEXT_NSIG_BPW +#define _NSIG_WORDS _SIGCONTEXT_NSIG_WORDS + +typedef unsigned long old_sigset_t; /* at least 32 bits */ + +typedef struct { + unsigned long sig[_NSIG_WORDS]; +} sigset_t; + +#else +/* Here we must cater to libcs that poke about in kernel headers. */ + +#define NSIG 32 +typedef unsigned long sigset_t; + +#endif /* __KERNEL__ */ + +#define SIGHUP 1 +#define SIGINT 2 +#define SIGQUIT 3 +#define SIGILL 4 +#define SIGTRAP 5 +#define SIGABRT 6 +#define SIGIOT 6 +#define SIGBUS 7 +#define SIGFPE 8 +#define SIGKILL 9 +#define SIGUSR1 10 +#define SIGSEGV 11 +#define SIGUSR2 12 +#define SIGPIPE 13 +#define SIGALRM 14 +#define SIGTERM 15 +#define SIGSTKFLT 16 +#define SIGCHLD 17 +#define SIGCONT 18 +#define SIGSTOP 19 +#define SIGTSTP 20 +#define SIGTTIN 21 +#define SIGTTOU 22 +#define SIGURG 23 +#define SIGXCPU 24 +#define SIGXFSZ 25 +#define SIGVTALRM 26 +#define SIGPROF 27 +#define SIGWINCH 28 +#define SIGIO 29 +#define SIGPOLL SIGIO +/* +#define SIGLOST 29 +*/ +#define SIGPWR 30 +#define SIGSYS 31 +#define SIGUNUSED 31 + +/* These should not be considered constants from userland. */ +#define SIGRTMIN 32 +#define SIGRTMAX _NSIG + +/* + * SA_FLAGS values: + * + * SA_ONSTACK indicates that a registered stack_t will be used. + * SA_RESTART flag to get restarting signals (which were the default long ago) + * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop. + * SA_RESETHAND clears the handler when the signal is delivered. + * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies. + * SA_NODEFER prevents the current signal from being masked in the handler. + * + * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single + * Unix names RESETHAND and NODEFER respectively. + */ +#define SA_NOCLDSTOP 0x00000001 +#define SA_NOCLDWAIT 0x00000002 +#define SA_SIGINFO 0x00000004 +#define SA_ONSTACK 0x08000000 +#define SA_RESTART 0x10000000 +#define SA_NODEFER 0x40000000 +#define SA_RESETHAND 0x80000000 + +#define SA_NOMASK SA_NODEFER +#define SA_ONESHOT SA_RESETHAND + +#define SA_RESTORER 0x04000000 + +/* + * sigaltstack controls + */ +#define SS_ONSTACK 1 +#define SS_DISABLE 2 + +#define MINSIGSTKSZ 2048 +#define SIGSTKSZ 8192 + +#include <asm-generic/signal-defs.h> + +#ifdef __KERNEL__ +struct old_sigaction { + __sighandler_t sa_handler; + old_sigset_t sa_mask; + unsigned long sa_flags; + void (*sa_restorer)(void); +}; + +struct sigaction { + __sighandler_t sa_handler; + unsigned long sa_flags; + void (*sa_restorer)(void); + sigset_t sa_mask; /* mask last for extensibility */ +}; + +struct k_sigaction { + struct sigaction sa; +}; + +#define ptrace_signal_deliver(regs, cookie) do { } while (0) + +#else +/* Here we must cater to libcs that poke about in kernel headers. */ + +struct sigaction { + union { + __sighandler_t _sa_handler; + void (*_sa_sigaction)(int, struct siginfo *, void *); + } _u; +#ifndef __s390x__ /* lovely */ + sigset_t sa_mask; + unsigned long sa_flags; + void (*sa_restorer)(void); +#else /* __s390x__ */ + unsigned long sa_flags; + void (*sa_restorer)(void); + sigset_t sa_mask; +#endif /* __s390x__ */ +}; + +#define sa_handler _u._sa_handler +#define sa_sigaction _u._sa_sigaction + +#endif /* __KERNEL__ */ + +typedef struct sigaltstack { + void __user *ss_sp; + int ss_flags; + size_t ss_size; +} stack_t; + + +#endif diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h new file mode 100644 index 00000000..c77c6de6 --- /dev/null +++ b/arch/s390/include/asm/smp.h @@ -0,0 +1,64 @@ +/* + * Copyright IBM Corp. 1999,2012 + * Author(s): Denis Joseph Barrow, + * Martin Schwidefsky <schwidefsky@de.ibm.com>, + * Heiko Carstens <heiko.carstens@de.ibm.com>, + */ +#ifndef __ASM_SMP_H +#define __ASM_SMP_H + +#ifdef CONFIG_SMP + +#include <asm/lowcore.h> + +#define raw_smp_processor_id() (S390_lowcore.cpu_nr) + +extern struct mutex smp_cpu_state_mutex; +extern struct save_area *zfcpdump_save_areas[NR_CPUS + 1]; + +extern int __cpu_up(unsigned int cpu); + +extern void arch_send_call_function_single_ipi(int cpu); +extern void arch_send_call_function_ipi_mask(const struct cpumask *mask); + +extern void smp_call_online_cpu(void (*func)(void *), void *); +extern void smp_call_ipl_cpu(void (*func)(void *), void *); + +extern int smp_find_processor_id(u16 address); +extern int smp_store_status(int cpu); +extern int smp_vcpu_scheduled(int cpu); +extern void smp_yield_cpu(int cpu); +extern void smp_yield(void); +extern void smp_stop_cpu(void); + +#else /* CONFIG_SMP */ + +static inline void smp_call_ipl_cpu(void (*func)(void *), void *data) +{ + func(data); +} + +static inline void smp_call_online_cpu(void (*func)(void *), void *data) +{ + func(data); +} + +static inline int smp_find_processor_id(int address) { return 0; } +static inline int smp_vcpu_scheduled(int cpu) { return 1; } +static inline void smp_yield_cpu(int cpu) { } +static inline void smp_yield(void) { } +static inline void smp_stop_cpu(void) { } + +#endif /* CONFIG_SMP */ + +#ifdef CONFIG_HOTPLUG_CPU +extern int smp_rescan_cpus(void); +extern void __noreturn cpu_die(void); +extern void __cpu_die(unsigned int cpu); +extern int __cpu_disable(void); +#else +static inline int smp_rescan_cpus(void) { return 0; } +static inline void cpu_die(void) { } +#endif + +#endif /* __ASM_SMP_H */ diff --git a/arch/s390/include/asm/socket.h b/arch/s390/include/asm/socket.h new file mode 100644 index 00000000..c91b7209 --- /dev/null +++ b/arch/s390/include/asm/socket.h @@ -0,0 +1,80 @@ +/* + * include/asm-s390/socket.h + * + * S390 version + * + * Derived from "include/asm-i386/socket.h" + */ + +#ifndef _ASM_SOCKET_H +#define _ASM_SOCKET_H + +#include <asm/sockios.h> + +/* For setsockopt(2) */ +#define SOL_SOCKET 1 + +#define SO_DEBUG 1 +#define SO_REUSEADDR 2 +#define SO_TYPE 3 +#define SO_ERROR 4 +#define SO_DONTROUTE 5 +#define SO_BROADCAST 6 +#define SO_SNDBUF 7 +#define SO_RCVBUF 8 +#define SO_SNDBUFFORCE 32 +#define SO_RCVBUFFORCE 33 +#define SO_KEEPALIVE 9 +#define SO_OOBINLINE 10 +#define SO_NO_CHECK 11 +#define SO_PRIORITY 12 +#define SO_LINGER 13 +#define SO_BSDCOMPAT 14 +/* To add :#define SO_REUSEPORT 15 */ +#define SO_PASSCRED 16 +#define SO_PEERCRED 17 +#define SO_RCVLOWAT 18 +#define SO_SNDLOWAT 19 +#define SO_RCVTIMEO 20 +#define SO_SNDTIMEO 21 + +/* Security levels - as per NRL IPv6 - don't actually do anything */ +#define SO_SECURITY_AUTHENTICATION 22 +#define SO_SECURITY_ENCRYPTION_TRANSPORT 23 +#define SO_SECURITY_ENCRYPTION_NETWORK 24 + +#define SO_BINDTODEVICE 25 + +/* Socket filtering */ +#define SO_ATTACH_FILTER 26 +#define SO_DETACH_FILTER 27 + +#define SO_PEERNAME 28 +#define SO_TIMESTAMP 29 +#define SCM_TIMESTAMP SO_TIMESTAMP + +#define SO_ACCEPTCONN 30 + +#define SO_PEERSEC 31 +#define SO_PASSSEC 34 +#define SO_TIMESTAMPNS 35 +#define SCM_TIMESTAMPNS SO_TIMESTAMPNS + +#define SO_MARK 36 + +#define SO_TIMESTAMPING 37 +#define SCM_TIMESTAMPING SO_TIMESTAMPING + +#define SO_PROTOCOL 38 +#define SO_DOMAIN 39 + +#define SO_RXQ_OVFL 40 + +#define SO_WIFI_STATUS 41 +#define SCM_WIFI_STATUS SO_WIFI_STATUS +#define SO_PEEK_OFF 42 + +/* Instruct lower device to use last 4-bytes of skb data as FCS */ +#define SO_NOFCS 43 + +#endif /* _ASM_SOCKET_H */ diff --git a/arch/s390/include/asm/sockios.h b/arch/s390/include/asm/sockios.h new file mode 100644 index 00000000..6f60eee7 --- /dev/null +++ b/arch/s390/include/asm/sockios.h @@ -0,0 +1,6 @@ +#ifndef _ASM_S390_SOCKIOS_H +#define _ASM_S390_SOCKIOS_H + +#include <asm-generic/sockios.h> + +#endif diff --git a/arch/s390/include/asm/sparsemem.h b/arch/s390/include/asm/sparsemem.h new file mode 100644 index 00000000..0fb34027 --- /dev/null +++ b/arch/s390/include/asm/sparsemem.h @@ -0,0 +1,18 @@ +#ifndef _ASM_S390_SPARSEMEM_H +#define _ASM_S390_SPARSEMEM_H + +#ifdef CONFIG_64BIT + +#define SECTION_SIZE_BITS 28 +#define MAX_PHYSADDR_BITS 46 +#define MAX_PHYSMEM_BITS 46 + +#else + +#define SECTION_SIZE_BITS 25 +#define MAX_PHYSADDR_BITS 31 +#define MAX_PHYSMEM_BITS 31 + +#endif /* CONFIG_64BIT */ + +#endif /* _ASM_S390_SPARSEMEM_H */ diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h new file mode 100644 index 00000000..fd94dfec --- /dev/null +++ b/arch/s390/include/asm/spinlock.h @@ -0,0 +1,178 @@ +/* + * include/asm-s390/spinlock.h + * + * S390 version + * Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) + * + * Derived from "include/asm-i386/spinlock.h" + */ + +#ifndef __ASM_SPINLOCK_H +#define __ASM_SPINLOCK_H + +#include <linux/smp.h> + +extern int spin_retry; + +static inline int +_raw_compare_and_swap(volatile unsigned int *lock, + unsigned int old, unsigned int new) +{ + asm volatile( + " cs %0,%3,%1" + : "=d" (old), "=Q" (*lock) + : "0" (old), "d" (new), "Q" (*lock) + : "cc", "memory" ); + return old; +} + +/* + * Simple spin lock operations. There are two variants, one clears IRQ's + * on the local processor, one does not. + * + * We make no fairness assumptions. They have a cost. + * + * (the type definitions are in asm/spinlock_types.h) + */ + +#define arch_spin_is_locked(x) ((x)->owner_cpu != 0) +#define arch_spin_unlock_wait(lock) \ + do { while (arch_spin_is_locked(lock)) \ + arch_spin_relax(lock); } while (0) + +extern void arch_spin_lock_wait(arch_spinlock_t *); +extern void arch_spin_lock_wait_flags(arch_spinlock_t *, unsigned long flags); +extern int arch_spin_trylock_retry(arch_spinlock_t *); +extern void arch_spin_relax(arch_spinlock_t *lock); + +static inline void arch_spin_lock(arch_spinlock_t *lp) +{ + int old; + + old = _raw_compare_and_swap(&lp->owner_cpu, 0, ~smp_processor_id()); + if (likely(old == 0)) + return; + arch_spin_lock_wait(lp); +} + +static inline void arch_spin_lock_flags(arch_spinlock_t *lp, + unsigned long flags) +{ + int old; + + old = _raw_compare_and_swap(&lp->owner_cpu, 0, ~smp_processor_id()); + if (likely(old == 0)) + return; + arch_spin_lock_wait_flags(lp, flags); +} + +static inline int arch_spin_trylock(arch_spinlock_t *lp) +{ + int old; + + old = _raw_compare_and_swap(&lp->owner_cpu, 0, ~smp_processor_id()); + if (likely(old == 0)) + return 1; + return arch_spin_trylock_retry(lp); +} + +static inline void arch_spin_unlock(arch_spinlock_t *lp) +{ + _raw_compare_and_swap(&lp->owner_cpu, lp->owner_cpu, 0); +} + +/* + * Read-write spinlocks, allowing multiple readers + * but only one writer. + * + * NOTE! it is quite common to have readers in interrupts + * but no interrupt writers. For those circumstances we + * can "mix" irq-safe locks - any writer needs to get a + * irq-safe write-lock, but readers can get non-irqsafe + * read-locks. + */ + +/** + * read_can_lock - would read_trylock() succeed? + * @lock: the rwlock in question. + */ +#define arch_read_can_lock(x) ((int)(x)->lock >= 0) + +/** + * write_can_lock - would write_trylock() succeed? + * @lock: the rwlock in question. + */ +#define arch_write_can_lock(x) ((x)->lock == 0) + +extern void _raw_read_lock_wait(arch_rwlock_t *lp); +extern void _raw_read_lock_wait_flags(arch_rwlock_t *lp, unsigned long flags); +extern int _raw_read_trylock_retry(arch_rwlock_t *lp); +extern void _raw_write_lock_wait(arch_rwlock_t *lp); +extern void _raw_write_lock_wait_flags(arch_rwlock_t *lp, unsigned long flags); +extern int _raw_write_trylock_retry(arch_rwlock_t *lp); + +static inline void arch_read_lock(arch_rwlock_t *rw) +{ + unsigned int old; + old = rw->lock & 0x7fffffffU; + if (_raw_compare_and_swap(&rw->lock, old, old + 1) != old) + _raw_read_lock_wait(rw); +} + +static inline void arch_read_lock_flags(arch_rwlock_t *rw, unsigned long flags) +{ + unsigned int old; + old = rw->lock & 0x7fffffffU; + if (_raw_compare_and_swap(&rw->lock, old, old + 1) != old) + _raw_read_lock_wait_flags(rw, flags); +} + +static inline void arch_read_unlock(arch_rwlock_t *rw) +{ + unsigned int old, cmp; + + old = rw->lock; + do { + cmp = old; + old = _raw_compare_and_swap(&rw->lock, old, old - 1); + } while (cmp != old); +} + +static inline void arch_write_lock(arch_rwlock_t *rw) +{ + if (unlikely(_raw_compare_and_swap(&rw->lock, 0, 0x80000000) != 0)) + _raw_write_lock_wait(rw); +} + +static inline void arch_write_lock_flags(arch_rwlock_t *rw, unsigned long flags) +{ + if (unlikely(_raw_compare_and_swap(&rw->lock, 0, 0x80000000) != 0)) + _raw_write_lock_wait_flags(rw, flags); +} + +static inline void arch_write_unlock(arch_rwlock_t *rw) +{ + _raw_compare_and_swap(&rw->lock, 0x80000000, 0); +} + +static inline int arch_read_trylock(arch_rwlock_t *rw) +{ + unsigned int old; + old = rw->lock & 0x7fffffffU; + if (likely(_raw_compare_and_swap(&rw->lock, old, old + 1) == old)) + return 1; + return _raw_read_trylock_retry(rw); +} + +static inline int arch_write_trylock(arch_rwlock_t *rw) +{ + if (likely(_raw_compare_and_swap(&rw->lock, 0, 0x80000000) == 0)) + return 1; + return _raw_write_trylock_retry(rw); +} + +#define arch_read_relax(lock) cpu_relax() +#define arch_write_relax(lock) cpu_relax() + +#endif /* __ASM_SPINLOCK_H */ diff --git a/arch/s390/include/asm/spinlock_types.h b/arch/s390/include/asm/spinlock_types.h new file mode 100644 index 00000000..9c76656a --- /dev/null +++ b/arch/s390/include/asm/spinlock_types.h @@ -0,0 +1,20 @@ +#ifndef __ASM_SPINLOCK_TYPES_H +#define __ASM_SPINLOCK_TYPES_H + +#ifndef __LINUX_SPINLOCK_TYPES_H +# error "please don't include this file directly" +#endif + +typedef struct { + volatile unsigned int owner_cpu; +} __attribute__ ((aligned (4))) arch_spinlock_t; + +#define __ARCH_SPIN_LOCK_UNLOCKED { 0 } + +typedef struct { + volatile unsigned int lock; +} arch_rwlock_t; + +#define __ARCH_RW_LOCK_UNLOCKED { 0 } + +#endif diff --git a/arch/s390/include/asm/stat.h b/arch/s390/include/asm/stat.h new file mode 100644 index 00000000..d92959ee --- /dev/null +++ b/arch/s390/include/asm/stat.h @@ -0,0 +1,105 @@ +/* + * include/asm-s390/stat.h + * + * S390 version + * + * Derived from "include/asm-i386/stat.h" + */ + +#ifndef _S390_STAT_H +#define _S390_STAT_H + +#ifndef __s390x__ +struct __old_kernel_stat { + unsigned short st_dev; + unsigned short st_ino; + unsigned short st_mode; + unsigned short st_nlink; + unsigned short st_uid; + unsigned short st_gid; + unsigned short st_rdev; + unsigned long st_size; + unsigned long st_atime; + unsigned long st_mtime; + unsigned long st_ctime; +}; + +struct stat { + unsigned short st_dev; + unsigned short __pad1; + unsigned long st_ino; + unsigned short st_mode; + unsigned short st_nlink; + unsigned short st_uid; + unsigned short st_gid; + unsigned short st_rdev; + unsigned short __pad2; + unsigned long st_size; + unsigned long st_blksize; + unsigned long st_blocks; + unsigned long st_atime; + unsigned long st_atime_nsec; + unsigned long st_mtime; + unsigned long st_mtime_nsec; + unsigned long st_ctime; + unsigned long st_ctime_nsec; + unsigned long __unused4; + unsigned long __unused5; +}; + +/* This matches struct stat64 in glibc2.1, hence the absolutely + * insane amounts of padding around dev_t's. + */ +struct stat64 { + unsigned long long st_dev; + unsigned int __pad1; +#define STAT64_HAS_BROKEN_ST_INO 1 + unsigned long __st_ino; + unsigned int st_mode; + unsigned int st_nlink; + unsigned long st_uid; + unsigned long st_gid; + unsigned long long st_rdev; + unsigned int __pad3; + long long st_size; + unsigned long st_blksize; + unsigned char __pad4[4]; + unsigned long __pad5; /* future possible st_blocks high bits */ + unsigned long st_blocks; /* Number 512-byte blocks allocated. */ + unsigned long st_atime; + unsigned long st_atime_nsec; + unsigned long st_mtime; + unsigned long st_mtime_nsec; + unsigned long st_ctime; + unsigned long st_ctime_nsec; /* will be high 32 bits of ctime someday */ + unsigned long long st_ino; +}; + +#else /* __s390x__ */ + +struct stat { + unsigned long st_dev; + unsigned long st_ino; + unsigned long st_nlink; + unsigned int st_mode; + unsigned int st_uid; + unsigned int st_gid; + unsigned int __pad1; + unsigned long st_rdev; + unsigned long st_size; + unsigned long st_atime; + unsigned long st_atime_nsec; + unsigned long st_mtime; + unsigned long st_mtime_nsec; + unsigned long st_ctime; + unsigned long st_ctime_nsec; + unsigned long st_blksize; + long st_blocks; + unsigned long __unused[3]; +}; + +#endif /* __s390x__ */ + +#define STAT_HAVE_NSEC 1 + +#endif diff --git a/arch/s390/include/asm/statfs.h b/arch/s390/include/asm/statfs.h new file mode 100644 index 00000000..3be7fbd4 --- /dev/null +++ b/arch/s390/include/asm/statfs.h @@ -0,0 +1,71 @@ +/* + * include/asm-s390/statfs.h + * + * S390 version + * + * Derived from "include/asm-i386/statfs.h" + */ + +#ifndef _S390_STATFS_H +#define _S390_STATFS_H + +#ifndef __s390x__ +#include <asm-generic/statfs.h> +#else +/* + * We can't use <asm-generic/statfs.h> because in 64-bit mode + * we mix ints of different sizes in our struct statfs. + */ + +#ifndef __KERNEL_STRICT_NAMES +#include <linux/types.h> +typedef __kernel_fsid_t fsid_t; +#endif + +struct statfs { + int f_type; + int f_bsize; + long f_blocks; + long f_bfree; + long f_bavail; + long f_files; + long f_ffree; + __kernel_fsid_t f_fsid; + int f_namelen; + int f_frsize; + int f_flags; + int f_spare[4]; +}; + +struct statfs64 { + int f_type; + int f_bsize; + long f_blocks; + long f_bfree; + long f_bavail; + long f_files; + long f_ffree; + __kernel_fsid_t f_fsid; + int f_namelen; + int f_frsize; + int f_flags; + int f_spare[4]; +}; + +struct compat_statfs64 { + __u32 f_type; + __u32 f_bsize; + __u64 f_blocks; + __u64 f_bfree; + __u64 f_bavail; + __u64 f_files; + __u64 f_ffree; + __kernel_fsid_t f_fsid; + __u32 f_namelen; + __u32 f_frsize; + __u32 f_flags; + __u32 f_spare[4]; +}; + +#endif /* __s390x__ */ +#endif diff --git a/arch/s390/include/asm/string.h b/arch/s390/include/asm/string.h new file mode 100644 index 00000000..cd0241db --- /dev/null +++ b/arch/s390/include/asm/string.h @@ -0,0 +1,157 @@ +/* + * include/asm-s390/string.h + * + * S390 version + * Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), + */ + +#ifndef _S390_STRING_H_ +#define _S390_STRING_H_ + +#ifdef __KERNEL__ + +#ifndef _LINUX_TYPES_H +#include <linux/types.h> +#endif + +#define __HAVE_ARCH_MEMCHR /* inline & arch function */ +#define __HAVE_ARCH_MEMCMP /* arch function */ +#define __HAVE_ARCH_MEMCPY /* gcc builtin & arch function */ +#define __HAVE_ARCH_MEMSCAN /* inline & arch function */ +#define __HAVE_ARCH_MEMSET /* gcc builtin & arch function */ +#define __HAVE_ARCH_STRCAT /* inline & arch function */ +#define __HAVE_ARCH_STRCMP /* arch function */ +#define __HAVE_ARCH_STRCPY /* inline & arch function */ +#define __HAVE_ARCH_STRLCAT /* arch function */ +#define __HAVE_ARCH_STRLCPY /* arch function */ +#define __HAVE_ARCH_STRLEN /* inline & arch function */ +#define __HAVE_ARCH_STRNCAT /* arch function */ +#define __HAVE_ARCH_STRNCPY /* arch function */ +#define __HAVE_ARCH_STRNLEN /* inline & arch function */ +#define __HAVE_ARCH_STRRCHR /* arch function */ +#define __HAVE_ARCH_STRSTR /* arch function */ + +/* Prototypes for non-inlined arch strings functions. */ +extern int memcmp(const void *, const void *, size_t); +extern void *memcpy(void *, const void *, size_t); +extern void *memset(void *, int, size_t); +extern int strcmp(const char *,const char *); +extern size_t strlcat(char *, const char *, size_t); +extern size_t strlcpy(char *, const char *, size_t); +extern char *strncat(char *, const char *, size_t); +extern char *strncpy(char *, const char *, size_t); +extern char *strrchr(const char *, int); +extern char *strstr(const char *, const char *); + +#undef __HAVE_ARCH_MEMMOVE +#undef __HAVE_ARCH_STRCHR +#undef __HAVE_ARCH_STRNCHR +#undef __HAVE_ARCH_STRNCMP +#undef __HAVE_ARCH_STRNICMP +#undef __HAVE_ARCH_STRPBRK +#undef __HAVE_ARCH_STRSEP +#undef __HAVE_ARCH_STRSPN + +#if !defined(IN_ARCH_STRING_C) + +static inline void *memchr(const void * s, int c, size_t n) +{ + register int r0 asm("0") = (char) c; + const void *ret = s + n; + + asm volatile( + "0: srst %0,%1\n" + " jo 0b\n" + " jl 1f\n" + " la %0,0\n" + "1:" + : "+a" (ret), "+&a" (s) : "d" (r0) : "cc"); + return (void *) ret; +} + +static inline void *memscan(void *s, int c, size_t n) +{ + register int r0 asm("0") = (char) c; + const void *ret = s + n; + + asm volatile( + "0: srst %0,%1\n" + " jo 0b\n" + : "+a" (ret), "+&a" (s) : "d" (r0) : "cc"); + return (void *) ret; +} + +static inline char *strcat(char *dst, const char *src) +{ + register int r0 asm("0") = 0; + unsigned long dummy; + char *ret = dst; + + asm volatile( + "0: srst %0,%1\n" + " jo 0b\n" + "1: mvst %0,%2\n" + " jo 1b" + : "=&a" (dummy), "+a" (dst), "+a" (src) + : "d" (r0), "0" (0) : "cc", "memory" ); + return ret; +} + +static inline char *strcpy(char *dst, const char *src) +{ +#if __GNUC__ < 4 + register int r0 asm("0") = 0; + char *ret = dst; + + asm volatile( + "0: mvst %0,%1\n" + " jo 0b" + : "+&a" (dst), "+&a" (src) : "d" (r0) + : "cc", "memory"); + return ret; +#else + return __builtin_strcpy(dst, src); +#endif +} + +static inline size_t strlen(const char *s) +{ +#if __GNUC__ < 4 + register unsigned long r0 asm("0") = 0; + const char *tmp = s; + + asm volatile( + "0: srst %0,%1\n" + " jo 0b" + : "+d" (r0), "+a" (tmp) : : "cc"); + return r0 - (unsigned long) s; +#else + return __builtin_strlen(s); +#endif +} + +static inline size_t strnlen(const char * s, size_t n) +{ + register int r0 asm("0") = 0; + const char *tmp = s; + const char *end = s + n; + + asm volatile( + "0: srst %0,%1\n" + " jo 0b" + : "+a" (end), "+a" (tmp) : "d" (r0) : "cc"); + return end - s; +} +#else /* IN_ARCH_STRING_C */ +void *memchr(const void * s, int c, size_t n); +void *memscan(void *s, int c, size_t n); +char *strcat(char *dst, const char *src); +char *strcpy(char *dst, const char *src); +size_t strlen(const char *s); +size_t strnlen(const char * s, size_t n); +#endif /* !IN_ARCH_STRING_C */ + +#endif /* __KERNEL__ */ + +#endif /* __S390_STRING_H_ */ diff --git a/arch/s390/include/asm/swab.h b/arch/s390/include/asm/swab.h new file mode 100644 index 00000000..a3e4ebb3 --- /dev/null +++ b/arch/s390/include/asm/swab.h @@ -0,0 +1,91 @@ +#ifndef _S390_SWAB_H +#define _S390_SWAB_H + +/* + * include/asm-s390/swab.h + * + * S390 version + * Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) + */ + +#include <linux/types.h> + +#ifndef __s390x__ +# define __SWAB_64_THRU_32__ +#endif + +#ifdef __s390x__ +static inline __u64 __arch_swab64p(const __u64 *x) +{ + __u64 result; + + asm volatile("lrvg %0,%1" : "=d" (result) : "m" (*x)); + return result; +} +#define __arch_swab64p __arch_swab64p + +static inline __u64 __arch_swab64(__u64 x) +{ + __u64 result; + + asm volatile("lrvgr %0,%1" : "=d" (result) : "d" (x)); + return result; +} +#define __arch_swab64 __arch_swab64 + +static inline void __arch_swab64s(__u64 *x) +{ + *x = __arch_swab64p(x); +} +#define __arch_swab64s __arch_swab64s +#endif /* __s390x__ */ + +static inline __u32 __arch_swab32p(const __u32 *x) +{ + __u32 result; + + asm volatile( +#ifndef __s390x__ + " icm %0,8,%O1+3(%R1)\n" + " icm %0,4,%O1+2(%R1)\n" + " icm %0,2,%O1+1(%R1)\n" + " ic %0,%1" + : "=&d" (result) : "Q" (*x) : "cc"); +#else /* __s390x__ */ + " lrv %0,%1" + : "=d" (result) : "m" (*x)); +#endif /* __s390x__ */ + return result; +} +#define __arch_swab32p __arch_swab32p + +#ifdef __s390x__ +static inline __u32 __arch_swab32(__u32 x) +{ + __u32 result; + + asm volatile("lrvr %0,%1" : "=d" (result) : "d" (x)); + return result; +} +#define __arch_swab32 __arch_swab32 +#endif /* __s390x__ */ + +static inline __u16 __arch_swab16p(const __u16 *x) +{ + __u16 result; + + asm volatile( +#ifndef __s390x__ + " icm %0,2,%O1+1(%R1)\n" + " ic %0,%1\n" + : "=&d" (result) : "Q" (*x) : "cc"); +#else /* __s390x__ */ + " lrvh %0,%1" + : "=d" (result) : "m" (*x)); +#endif /* __s390x__ */ + return result; +} +#define __arch_swab16p __arch_swab16p + +#endif /* _S390_SWAB_H */ diff --git a/arch/s390/include/asm/switch_to.h b/arch/s390/include/asm/switch_to.h new file mode 100644 index 00000000..f223068b --- /dev/null +++ b/arch/s390/include/asm/switch_to.h @@ -0,0 +1,100 @@ +/* + * Copyright IBM Corp. 1999, 2009 + * + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> + */ + +#ifndef __ASM_SWITCH_TO_H +#define __ASM_SWITCH_TO_H + +#include <linux/thread_info.h> + +extern struct task_struct *__switch_to(void *, void *); +extern void update_per_regs(struct task_struct *task); + +static inline void save_fp_regs(s390_fp_regs *fpregs) +{ + asm volatile( + " std 0,%O0+8(%R0)\n" + " std 2,%O0+24(%R0)\n" + " std 4,%O0+40(%R0)\n" + " std 6,%O0+56(%R0)" + : "=Q" (*fpregs) : "Q" (*fpregs)); + if (!MACHINE_HAS_IEEE) + return; + asm volatile( + " stfpc %0\n" + " std 1,%O0+16(%R0)\n" + " std 3,%O0+32(%R0)\n" + " std 5,%O0+48(%R0)\n" + " std 7,%O0+64(%R0)\n" + " std 8,%O0+72(%R0)\n" + " std 9,%O0+80(%R0)\n" + " std 10,%O0+88(%R0)\n" + " std 11,%O0+96(%R0)\n" + " std 12,%O0+104(%R0)\n" + " std 13,%O0+112(%R0)\n" + " std 14,%O0+120(%R0)\n" + " std 15,%O0+128(%R0)\n" + : "=Q" (*fpregs) : "Q" (*fpregs)); +} + +static inline void restore_fp_regs(s390_fp_regs *fpregs) +{ + asm volatile( + " ld 0,%O0+8(%R0)\n" + " ld 2,%O0+24(%R0)\n" + " ld 4,%O0+40(%R0)\n" + " ld 6,%O0+56(%R0)" + : : "Q" (*fpregs)); + if (!MACHINE_HAS_IEEE) + return; + asm volatile( + " lfpc %0\n" + " ld 1,%O0+16(%R0)\n" + " ld 3,%O0+32(%R0)\n" + " ld 5,%O0+48(%R0)\n" + " ld 7,%O0+64(%R0)\n" + " ld 8,%O0+72(%R0)\n" + " ld 9,%O0+80(%R0)\n" + " ld 10,%O0+88(%R0)\n" + " ld 11,%O0+96(%R0)\n" + " ld 12,%O0+104(%R0)\n" + " ld 13,%O0+112(%R0)\n" + " ld 14,%O0+120(%R0)\n" + " ld 15,%O0+128(%R0)\n" + : : "Q" (*fpregs)); +} + +static inline void save_access_regs(unsigned int *acrs) +{ + asm volatile("stam 0,15,%0" : "=Q" (*acrs)); +} + +static inline void restore_access_regs(unsigned int *acrs) +{ + asm volatile("lam 0,15,%0" : : "Q" (*acrs)); +} + +#define switch_to(prev,next,last) do { \ + if (prev->mm) { \ + save_fp_regs(&prev->thread.fp_regs); \ + save_access_regs(&prev->thread.acrs[0]); \ + } \ + if (next->mm) { \ + restore_fp_regs(&next->thread.fp_regs); \ + restore_access_regs(&next->thread.acrs[0]); \ + update_per_regs(next); \ + } \ + prev = __switch_to(prev,next); \ +} while (0) + +extern void account_vtime(struct task_struct *, struct task_struct *); +extern void account_tick_vtime(struct task_struct *); + +#define finish_arch_switch(prev) do { \ + set_fs(current->thread.mm_segment); \ + account_vtime(prev, current); \ +} while (0) + +#endif /* __ASM_SWITCH_TO_H */ diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h new file mode 100644 index 00000000..fb214dd9 --- /dev/null +++ b/arch/s390/include/asm/syscall.h @@ -0,0 +1,90 @@ +/* + * Access to user system call parameters and results + * + * Copyright IBM Corp. 2008 + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + */ + +#ifndef _ASM_SYSCALL_H +#define _ASM_SYSCALL_H 1 + +#include <linux/sched.h> +#include <linux/err.h> +#include <asm/ptrace.h> + +/* + * The syscall table always contains 32 bit pointers since we know that the + * address of the function to be called is (way) below 4GB. So the "int" + * type here is what we want [need] for both 32 bit and 64 bit systems. + */ +extern const unsigned int sys_call_table[]; + +static inline long syscall_get_nr(struct task_struct *task, + struct pt_regs *regs) +{ + return test_tsk_thread_flag(task, TIF_SYSCALL) ? + (regs->int_code & 0xffff) : -1; +} + +static inline void syscall_rollback(struct task_struct *task, + struct pt_regs *regs) +{ + regs->gprs[2] = regs->orig_gpr2; +} + +static inline long syscall_get_error(struct task_struct *task, + struct pt_regs *regs) +{ + return IS_ERR_VALUE(regs->gprs[2]) ? regs->gprs[2] : 0; +} + +static inline long syscall_get_return_value(struct task_struct *task, + struct pt_regs *regs) +{ + return regs->gprs[2]; +} + +static inline void syscall_set_return_value(struct task_struct *task, + struct pt_regs *regs, + int error, long val) +{ + regs->gprs[2] = error ? -error : val; +} + +static inline void syscall_get_arguments(struct task_struct *task, + struct pt_regs *regs, + unsigned int i, unsigned int n, + unsigned long *args) +{ + unsigned long mask = -1UL; + + BUG_ON(i + n > 6); +#ifdef CONFIG_COMPAT + if (test_tsk_thread_flag(task, TIF_31BIT)) + mask = 0xffffffff; +#endif + while (n-- > 0) + if (i + n > 0) + args[n] = regs->gprs[2 + i + n] & mask; + if (i == 0) + args[0] = regs->orig_gpr2 & mask; +} + +static inline void syscall_set_arguments(struct task_struct *task, + struct pt_regs *regs, + unsigned int i, unsigned int n, + const unsigned long *args) +{ + BUG_ON(i + n > 6); + while (n-- > 0) + if (i + n > 0) + regs->gprs[2 + i + n] = args[n]; + if (i == 0) + regs->orig_gpr2 = args[0]; +} + +#endif /* _ASM_SYSCALL_H */ diff --git a/arch/s390/include/asm/sysinfo.h b/arch/s390/include/asm/sysinfo.h new file mode 100644 index 00000000..79d3d6e2 --- /dev/null +++ b/arch/s390/include/asm/sysinfo.h @@ -0,0 +1,172 @@ +/* + * definition for store system information stsi + * + * Copyright IBM Corp. 2001,2008 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * Author(s): Ulrich Weigand <weigand@de.ibm.com> + * Christian Borntraeger <borntraeger@de.ibm.com> + */ + +#ifndef __ASM_S390_SYSINFO_H +#define __ASM_S390_SYSINFO_H + +#include <asm/bitsperlong.h> + +struct sysinfo_1_1_1 { + unsigned short :16; + unsigned char ccr; + unsigned char cai; + char reserved_0[28]; + char manufacturer[16]; + char type[4]; + char reserved_1[12]; + char model_capacity[16]; + char sequence[16]; + char plant[4]; + char model[16]; + char model_perm_cap[16]; + char model_temp_cap[16]; + char model_cap_rating[4]; + char model_perm_cap_rating[4]; + char model_temp_cap_rating[4]; +}; + +struct sysinfo_1_2_1 { + char reserved_0[80]; + char sequence[16]; + char plant[4]; + char reserved_1[2]; + unsigned short cpu_address; +}; + +struct sysinfo_1_2_2 { + char format; + char reserved_0[1]; + unsigned short acc_offset; + char reserved_1[24]; + unsigned int secondary_capability; + unsigned int capability; + unsigned short cpus_total; + unsigned short cpus_configured; + unsigned short cpus_standby; + unsigned short cpus_reserved; + unsigned short adjustment[0]; +}; + +struct sysinfo_1_2_2_extension { + unsigned int alt_capability; + unsigned short alt_adjustment[0]; +}; + +struct sysinfo_2_2_1 { + char reserved_0[80]; + char sequence[16]; + char plant[4]; + unsigned short cpu_id; + unsigned short cpu_address; +}; + +struct sysinfo_2_2_2 { + char reserved_0[32]; + unsigned short lpar_number; + char reserved_1; + unsigned char characteristics; + unsigned short cpus_total; + unsigned short cpus_configured; + unsigned short cpus_standby; + unsigned short cpus_reserved; + char name[8]; + unsigned int caf; + char reserved_2[16]; + unsigned short cpus_dedicated; + unsigned short cpus_shared; +}; + +#define LPAR_CHAR_DEDICATED (1 << 7) +#define LPAR_CHAR_SHARED (1 << 6) +#define LPAR_CHAR_LIMITED (1 << 5) + +struct sysinfo_3_2_2 { + char reserved_0[31]; + unsigned char :4; + unsigned char count:4; + struct { + char reserved_0[4]; + unsigned short cpus_total; + unsigned short cpus_configured; + unsigned short cpus_standby; + unsigned short cpus_reserved; + char name[8]; + unsigned int caf; + char cpi[16]; + char reserved_1[24]; + + } vm[8]; + char reserved_544[3552]; +}; + +#define TOPOLOGY_CPU_BITS 64 +#define TOPOLOGY_NR_MAG 6 + +struct topology_cpu { + unsigned char reserved0[4]; + unsigned char :6; + unsigned char pp:2; + unsigned char reserved1; + unsigned short origin; + unsigned long mask[TOPOLOGY_CPU_BITS / BITS_PER_LONG]; +}; + +struct topology_container { + unsigned char reserved[7]; + unsigned char id; +}; + +union topology_entry { + unsigned char nl; + struct topology_cpu cpu; + struct topology_container container; +}; + +struct sysinfo_15_1_x { + unsigned char reserved0[2]; + unsigned short length; + unsigned char mag[TOPOLOGY_NR_MAG]; + unsigned char reserved1; + unsigned char mnest; + unsigned char reserved2[4]; + union topology_entry tle[0]; +}; + +static inline int stsi(void *sysinfo, int fc, int sel1, int sel2) +{ + register int r0 asm("0") = (fc << 28) | sel1; + register int r1 asm("1") = sel2; + + asm volatile( + " stsi 0(%2)\n" + "0: jz 2f\n" + "1: lhi %0,%3\n" + "2:\n" + EX_TABLE(0b, 1b) + : "+d" (r0) : "d" (r1), "a" (sysinfo), "K" (-ENOSYS) + : "cc", "memory"); + return r0; +} + +/* + * Service level reporting interface. + */ +struct service_level { + struct list_head list; + void (*seq_print)(struct seq_file *, struct service_level *); +}; + +int register_service_level(struct service_level *); +int unregister_service_level(struct service_level *); + +#endif /* __ASM_S390_SYSINFO_H */ diff --git a/arch/s390/include/asm/tape390.h b/arch/s390/include/asm/tape390.h new file mode 100644 index 00000000..884fba48 --- /dev/null +++ b/arch/s390/include/asm/tape390.h @@ -0,0 +1,103 @@ +/************************************************************************* + * + * tape390.h + * enables user programs to display messages and control encryption + * on s390 tape devices + * + * Copyright IBM Corp. 2001,2006 + * Author(s): Michael Holzheu <holzheu@de.ibm.com> + * + *************************************************************************/ + +#ifndef _TAPE390_H +#define _TAPE390_H + +#define TAPE390_DISPLAY _IOW('d', 1, struct display_struct) + +/* + * The TAPE390_DISPLAY ioctl calls the Load Display command + * which transfers 17 bytes of data from the channel to the subsystem: + * - 1 format control byte, and + * - two 8-byte messages + * + * Format control byte: + * 0-2: New Message Overlay + * 3: Alternate Messages + * 4: Blink Message + * 5: Display Low/High Message + * 6: Reserved + * 7: Automatic Load Request + * + */ + +typedef struct display_struct { + char cntrl; + char message1[8]; + char message2[8]; +} display_struct; + +/* + * Tape encryption support + */ + +struct tape390_crypt_info { + char capability; + char status; + char medium_status; +} __attribute__ ((packed)); + + +/* Macros for "capable" field */ +#define TAPE390_CRYPT_SUPPORTED_MASK 0x01 +#define TAPE390_CRYPT_SUPPORTED(x) \ + ((x.capability & TAPE390_CRYPT_SUPPORTED_MASK)) + +/* Macros for "status" field */ +#define TAPE390_CRYPT_ON_MASK 0x01 +#define TAPE390_CRYPT_ON(x) (((x.status) & TAPE390_CRYPT_ON_MASK)) + +/* Macros for "medium status" field */ +#define TAPE390_MEDIUM_LOADED_MASK 0x01 +#define TAPE390_MEDIUM_ENCRYPTED_MASK 0x02 +#define TAPE390_MEDIUM_ENCRYPTED(x) \ + (((x.medium_status) & TAPE390_MEDIUM_ENCRYPTED_MASK)) +#define TAPE390_MEDIUM_LOADED(x) \ + (((x.medium_status) & TAPE390_MEDIUM_LOADED_MASK)) + +/* + * The TAPE390_CRYPT_SET ioctl is used to switch on/off encryption. + * The "encryption_capable" and "tape_status" fields are ignored for this ioctl! + */ +#define TAPE390_CRYPT_SET _IOW('d', 2, struct tape390_crypt_info) + +/* + * The TAPE390_CRYPT_QUERY ioctl is used to query the encryption state. + */ +#define TAPE390_CRYPT_QUERY _IOR('d', 3, struct tape390_crypt_info) + +/* Values for "kekl1/2_type" and "kekl1/2_type_on_tape" fields */ +#define TAPE390_KEKL_TYPE_NONE 0 +#define TAPE390_KEKL_TYPE_LABEL 1 +#define TAPE390_KEKL_TYPE_HASH 2 + +struct tape390_kekl { + unsigned char type; + unsigned char type_on_tape; + char label[65]; +} __attribute__ ((packed)); + +struct tape390_kekl_pair { + struct tape390_kekl kekl[2]; +} __attribute__ ((packed)); + +/* + * The TAPE390_KEKL_SET ioctl is used to set Key Encrypting Key labels. + */ +#define TAPE390_KEKL_SET _IOW('d', 4, struct tape390_kekl_pair) + +/* + * The TAPE390_KEKL_QUERY ioctl is used to query Key Encrypting Key labels. + */ +#define TAPE390_KEKL_QUERY _IOR('d', 5, struct tape390_kekl_pair) + +#endif diff --git a/arch/s390/include/asm/termbits.h b/arch/s390/include/asm/termbits.h new file mode 100644 index 00000000..71bf6ac6 --- /dev/null +++ b/arch/s390/include/asm/termbits.h @@ -0,0 +1,6 @@ +#ifndef _ASM_S390_TERMBITS_H +#define _ASM_S390_TERMBITS_H + +#include <asm-generic/termbits.h> + +#endif diff --git a/arch/s390/include/asm/termios.h b/arch/s390/include/asm/termios.h new file mode 100644 index 00000000..bc3a35ce --- /dev/null +++ b/arch/s390/include/asm/termios.h @@ -0,0 +1,67 @@ +/* + * include/asm-s390/termios.h + * + * S390 version + * + * Derived from "include/asm-i386/termios.h" + */ + +#ifndef _S390_TERMIOS_H +#define _S390_TERMIOS_H + +#include <asm/termbits.h> +#include <asm/ioctls.h> + +struct winsize { + unsigned short ws_row; + unsigned short ws_col; + unsigned short ws_xpixel; + unsigned short ws_ypixel; +}; + +#define NCC 8 +struct termio { + unsigned short c_iflag; /* input mode flags */ + unsigned short c_oflag; /* output mode flags */ + unsigned short c_cflag; /* control mode flags */ + unsigned short c_lflag; /* local mode flags */ + unsigned char c_line; /* line discipline */ + unsigned char c_cc[NCC]; /* control characters */ +}; + +/* modem lines */ +#define TIOCM_LE 0x001 +#define TIOCM_DTR 0x002 +#define TIOCM_RTS 0x004 +#define TIOCM_ST 0x008 +#define TIOCM_SR 0x010 +#define TIOCM_CTS 0x020 +#define TIOCM_CAR 0x040 +#define TIOCM_RNG 0x080 +#define TIOCM_DSR 0x100 +#define TIOCM_CD TIOCM_CAR +#define TIOCM_RI TIOCM_RNG +#define TIOCM_OUT1 0x2000 +#define TIOCM_OUT2 0x4000 +#define TIOCM_LOOP 0x8000 + +/* ioctl (fd, TIOCSERGETLSR, &result) where result may be as below */ + +#ifdef __KERNEL__ + +/* intr=^C quit=^\ erase=del kill=^U + eof=^D vtime=\0 vmin=\1 sxtc=\0 + start=^Q stop=^S susp=^Z eol=\0 + reprint=^R discard=^U werase=^W lnext=^V + eol2=\0 +*/ +#define INIT_C_CC "\003\034\177\025\004\0\1\0\021\023\032\0\022\017\027\026\0" + +#define user_termios_to_kernel_termios(k, u) copy_from_user(k, u, sizeof(struct termios2)) +#define kernel_termios_to_user_termios(u, k) copy_to_user(u, k, sizeof(struct termios2)) + +#include <asm-generic/termios-base.h> + +#endif /* __KERNEL__ */ + +#endif /* _S390_TERMIOS_H */ diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h new file mode 100644 index 00000000..a7303815 --- /dev/null +++ b/arch/s390/include/asm/thread_info.h @@ -0,0 +1,132 @@ +/* + * include/asm-s390/thread_info.h + * + * S390 version + * Copyright (C) IBM Corp. 2002,2006 + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) + */ + +#ifndef _ASM_THREAD_INFO_H +#define _ASM_THREAD_INFO_H + +#ifdef __KERNEL__ + +/* + * Size of kernel stack for each process + */ +#ifndef __s390x__ +#define THREAD_ORDER 1 +#define ASYNC_ORDER 1 +#else /* __s390x__ */ +#ifndef __SMALL_STACK +#define THREAD_ORDER 2 +#define ASYNC_ORDER 2 +#else +#define THREAD_ORDER 1 +#define ASYNC_ORDER 1 +#endif +#endif /* __s390x__ */ + +#define THREAD_SIZE (PAGE_SIZE << THREAD_ORDER) +#define ASYNC_SIZE (PAGE_SIZE << ASYNC_ORDER) + +#ifndef __ASSEMBLY__ +#include <asm/lowcore.h> +#include <asm/page.h> +#include <asm/processor.h> + +/* + * low level task data that entry.S needs immediate access to + * - this struct should fit entirely inside of one cache line + * - this struct shares the supervisor stack pages + * - if the contents of this structure are changed, the assembly constants must also be changed + */ +struct thread_info { + struct task_struct *task; /* main task structure */ + struct exec_domain *exec_domain; /* execution domain */ + unsigned long flags; /* low level flags */ + unsigned int cpu; /* current CPU */ + int preempt_count; /* 0 => preemptable, <0 => BUG */ + struct restart_block restart_block; + unsigned int system_call; + __u64 user_timer; + __u64 system_timer; + unsigned long last_break; /* last breaking-event-address. */ +}; + +/* + * macros/functions for gaining access to the thread information structure + */ +#define INIT_THREAD_INFO(tsk) \ +{ \ + .task = &tsk, \ + .exec_domain = &default_exec_domain, \ + .flags = 0, \ + .cpu = 0, \ + .preempt_count = INIT_PREEMPT_COUNT, \ + .restart_block = { \ + .fn = do_no_restart_syscall, \ + }, \ +} + +#define init_thread_info (init_thread_union.thread_info) +#define init_stack (init_thread_union.stack) + +/* how to get the thread information struct from C */ +static inline struct thread_info *current_thread_info(void) +{ + return (struct thread_info *) S390_lowcore.thread_info; +} + +#define THREAD_SIZE_ORDER THREAD_ORDER + +#endif + +/* + * thread information flags bit numbers + */ +#define TIF_SYSCALL 0 /* inside a system call */ +#define TIF_NOTIFY_RESUME 1 /* callback before returning to user */ +#define TIF_SIGPENDING 2 /* signal pending */ +#define TIF_NEED_RESCHED 3 /* rescheduling necessary */ +#define TIF_PER_TRAP 6 /* deliver sigtrap on return to user */ +#define TIF_MCCK_PENDING 7 /* machine check handling is pending */ +#define TIF_SYSCALL_TRACE 8 /* syscall trace active */ +#define TIF_SYSCALL_AUDIT 9 /* syscall auditing active */ +#define TIF_SECCOMP 10 /* secure computing */ +#define TIF_SYSCALL_TRACEPOINT 11 /* syscall tracepoint instrumentation */ +#define TIF_SIE 12 /* guest execution active */ +#define TIF_POLLING_NRFLAG 16 /* true if poll_idle() is polling + TIF_NEED_RESCHED */ +#define TIF_31BIT 17 /* 32bit process */ +#define TIF_MEMDIE 18 /* is terminating due to OOM killer */ +#define TIF_RESTORE_SIGMASK 19 /* restore signal mask in do_signal() */ +#define TIF_SINGLE_STEP 20 /* This task is single stepped */ + +#define _TIF_SYSCALL (1<<TIF_SYSCALL) +#define _TIF_NOTIFY_RESUME (1<<TIF_NOTIFY_RESUME) +#define _TIF_RESTORE_SIGMASK (1<<TIF_RESTORE_SIGMASK) +#define _TIF_SIGPENDING (1<<TIF_SIGPENDING) +#define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED) +#define _TIF_PER_TRAP (1<<TIF_PER_TRAP) +#define _TIF_MCCK_PENDING (1<<TIF_MCCK_PENDING) +#define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE) +#define _TIF_SYSCALL_AUDIT (1<<TIF_SYSCALL_AUDIT) +#define _TIF_SECCOMP (1<<TIF_SECCOMP) +#define _TIF_SYSCALL_TRACEPOINT (1<<TIF_SYSCALL_TRACEPOINT) +#define _TIF_SIE (1<<TIF_SIE) +#define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG) +#define _TIF_31BIT (1<<TIF_31BIT) +#define _TIF_SINGLE_STEP (1<<TIF_SINGLE_STEP) + +#ifdef CONFIG_64BIT +#define is_32bit_task() (test_thread_flag(TIF_31BIT)) +#else +#define is_32bit_task() (1) +#endif + +#endif /* __KERNEL__ */ + +#define PREEMPT_ACTIVE 0x4000000 + +#endif /* _ASM_THREAD_INFO_H */ diff --git a/arch/s390/include/asm/timer.h b/arch/s390/include/asm/timer.h new file mode 100644 index 00000000..e63069ba --- /dev/null +++ b/arch/s390/include/asm/timer.h @@ -0,0 +1,55 @@ +/* + * include/asm-s390/timer.h + * + * (C) Copyright IBM Corp. 2003,2006 + * Virtual CPU timer + * + * Author: Jan Glauber (jang@de.ibm.com) + */ + +#ifndef _ASM_S390_TIMER_H +#define _ASM_S390_TIMER_H + +#ifdef __KERNEL__ + +#include <linux/timer.h> + +#define VTIMER_MAX_SLICE (0x7ffffffffffff000LL) + +struct vtimer_list { + struct list_head entry; + + int cpu; + __u64 expires; + __u64 interval; + + void (*function)(unsigned long); + unsigned long data; +}; + +/* the vtimer value will wrap after ca. 71 years */ +struct vtimer_queue { + struct list_head list; + spinlock_t lock; + __u64 timer; /* last programmed timer */ + __u64 elapsed; /* elapsed time of timer expire values */ + __u64 idle_enter; /* cpu timer on idle enter */ + __u64 idle_exit; /* cpu timer on idle exit */ +}; + +extern void init_virt_timer(struct vtimer_list *timer); +extern void add_virt_timer(void *new); +extern void add_virt_timer_periodic(void *new); +extern int mod_virt_timer(struct vtimer_list *timer, __u64 expires); +extern int mod_virt_timer_periodic(struct vtimer_list *timer, __u64 expires); +extern int del_virt_timer(struct vtimer_list *timer); + +extern void init_cpu_vtimer(void); +extern void vtime_init(void); + +extern void vtime_stop_cpu(void); +extern void vtime_start_leave(void); + +#endif /* __KERNEL__ */ + +#endif /* _ASM_S390_TIMER_H */ diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h new file mode 100644 index 00000000..c447a27a --- /dev/null +++ b/arch/s390/include/asm/timex.h @@ -0,0 +1,140 @@ +/* + * include/asm-s390/timex.h + * + * S390 version + * Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation + * + * Derived from "include/asm-i386/timex.h" + * Copyright (C) 1992, Linus Torvalds + */ + +#ifndef _ASM_S390_TIMEX_H +#define _ASM_S390_TIMEX_H + +#include <asm/lowcore.h> + +/* The value of the TOD clock for 1.1.1970. */ +#define TOD_UNIX_EPOCH 0x7d91048bca000000ULL + +/* Inline functions for clock register access. */ +static inline int set_clock(__u64 time) +{ + int cc; + + asm volatile( + " sck %1\n" + " ipm %0\n" + " srl %0,28\n" + : "=d" (cc) : "Q" (time) : "cc"); + return cc; +} + +static inline int store_clock(__u64 *time) +{ + int cc; + + asm volatile( + " stck %1\n" + " ipm %0\n" + " srl %0,28\n" + : "=d" (cc), "=Q" (*time) : : "cc"); + return cc; +} + +static inline void set_clock_comparator(__u64 time) +{ + asm volatile("sckc %0" : : "Q" (time)); +} + +static inline void store_clock_comparator(__u64 *time) +{ + asm volatile("stckc %0" : "=Q" (*time)); +} + +void clock_comparator_work(void); + +static inline unsigned long long local_tick_disable(void) +{ + unsigned long long old; + + old = S390_lowcore.clock_comparator; + S390_lowcore.clock_comparator = -1ULL; + set_clock_comparator(S390_lowcore.clock_comparator); + return old; +} + +static inline void local_tick_enable(unsigned long long comp) +{ + S390_lowcore.clock_comparator = comp; + set_clock_comparator(S390_lowcore.clock_comparator); +} + +#define CLOCK_TICK_RATE 1193180 /* Underlying HZ */ + +typedef unsigned long long cycles_t; + +static inline unsigned long long get_clock (void) +{ + unsigned long long clk; + + asm volatile("stck %0" : "=Q" (clk) : : "cc"); + return clk; +} + +static inline void get_clock_ext(char *clk) +{ + asm volatile("stcke %0" : "=Q" (*clk) : : "cc"); +} + +static inline unsigned long long get_clock_fast(void) +{ + unsigned long long clk; + + if (MACHINE_HAS_STCKF) + asm volatile(".insn s,0xb27c0000,%0" : "=Q" (clk) : : "cc"); + else + clk = get_clock(); + return clk; +} + +static inline unsigned long long get_clock_xt(void) +{ + unsigned char clk[16]; + get_clock_ext(clk); + return *((unsigned long long *)&clk[1]); +} + +static inline cycles_t get_cycles(void) +{ + return (cycles_t) get_clock() >> 2; +} + +int get_sync_clock(unsigned long long *clock); +void init_cpu_timer(void); +unsigned long long monotonic_clock(void); + +void tod_to_timeval(__u64, struct timespec *); + +static inline +void stck_to_timespec(unsigned long long stck, struct timespec *ts) +{ + tod_to_timeval(stck - TOD_UNIX_EPOCH, ts); +} + +extern u64 sched_clock_base_cc; + +/** + * get_clock_monotonic - returns current time in clock rate units + * + * The caller must ensure that preemption is disabled. + * The clock and sched_clock_base get changed via stop_machine. + * Therefore preemption must be disabled when calling this + * function, otherwise the returned value is not guaranteed to + * be monotonic. + */ +static inline unsigned long long get_clock_monotonic(void) +{ + return get_clock_xt() - sched_clock_base_cc; +} + +#endif diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h new file mode 100644 index 00000000..775a5eea --- /dev/null +++ b/arch/s390/include/asm/tlb.h @@ -0,0 +1,142 @@ +#ifndef _S390_TLB_H +#define _S390_TLB_H + +/* + * TLB flushing on s390 is complicated. The following requirement + * from the principles of operation is the most arduous: + * + * "A valid table entry must not be changed while it is attached + * to any CPU and may be used for translation by that CPU except to + * (1) invalidate the entry by using INVALIDATE PAGE TABLE ENTRY, + * or INVALIDATE DAT TABLE ENTRY, (2) alter bits 56-63 of a page + * table entry, or (3) make a change by means of a COMPARE AND SWAP + * AND PURGE instruction that purges the TLB." + * + * The modification of a pte of an active mm struct therefore is + * a two step process: i) invalidate the pte, ii) store the new pte. + * This is true for the page protection bit as well. + * The only possible optimization is to flush at the beginning of + * a tlb_gather_mmu cycle if the mm_struct is currently not in use. + * + * Pages used for the page tables is a different story. FIXME: more + */ + +#include <linux/mm.h> +#include <linux/pagemap.h> +#include <linux/swap.h> +#include <asm/processor.h> +#include <asm/pgalloc.h> +#include <asm/tlbflush.h> + +struct mmu_gather { + struct mm_struct *mm; + struct mmu_table_batch *batch; + unsigned int fullmm; +}; + +struct mmu_table_batch { + struct rcu_head rcu; + unsigned int nr; + void *tables[0]; +}; + +#define MAX_TABLE_BATCH \ + ((PAGE_SIZE - sizeof(struct mmu_table_batch)) / sizeof(void *)) + +extern void tlb_table_flush(struct mmu_gather *tlb); +extern void tlb_remove_table(struct mmu_gather *tlb, void *table); + +static inline void tlb_gather_mmu(struct mmu_gather *tlb, + struct mm_struct *mm, + unsigned int full_mm_flush) +{ + tlb->mm = mm; + tlb->fullmm = full_mm_flush; + tlb->batch = NULL; + if (tlb->fullmm) + __tlb_flush_mm(mm); +} + +static inline void tlb_flush_mmu(struct mmu_gather *tlb) +{ + tlb_table_flush(tlb); +} + +static inline void tlb_finish_mmu(struct mmu_gather *tlb, + unsigned long start, unsigned long end) +{ + tlb_table_flush(tlb); +} + +/* + * Release the page cache reference for a pte removed by + * tlb_ptep_clear_flush. In both flush modes the tlb for a page cache page + * has already been freed, so just do free_page_and_swap_cache. + */ +static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page) +{ + free_page_and_swap_cache(page); + return 1; /* avoid calling tlb_flush_mmu */ +} + +static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page) +{ + free_page_and_swap_cache(page); +} + +/* + * pte_free_tlb frees a pte table and clears the CRSTE for the + * page table from the tlb. + */ +static inline void pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, + unsigned long address) +{ + if (!tlb->fullmm) + return page_table_free_rcu(tlb, (unsigned long *) pte); + page_table_free(tlb->mm, (unsigned long *) pte); +} + +/* + * pmd_free_tlb frees a pmd table and clears the CRSTE for the + * segment table entry from the tlb. + * If the mm uses a two level page table the single pmd is freed + * as the pgd. pmd_free_tlb checks the asce_limit against 2GB + * to avoid the double free of the pmd in this case. + */ +static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd, + unsigned long address) +{ +#ifdef __s390x__ + if (tlb->mm->context.asce_limit <= (1UL << 31)) + return; + if (!tlb->fullmm) + return tlb_remove_table(tlb, pmd); + crst_table_free(tlb->mm, (unsigned long *) pmd); +#endif +} + +/* + * pud_free_tlb frees a pud table and clears the CRSTE for the + * region third table entry from the tlb. + * If the mm uses a three level page table the single pud is freed + * as the pgd. pud_free_tlb checks the asce_limit against 4TB + * to avoid the double free of the pud in this case. + */ +static inline void pud_free_tlb(struct mmu_gather *tlb, pud_t *pud, + unsigned long address) +{ +#ifdef __s390x__ + if (tlb->mm->context.asce_limit <= (1UL << 42)) + return; + if (!tlb->fullmm) + return tlb_remove_table(tlb, pud); + crst_table_free(tlb->mm, (unsigned long *) pud); +#endif +} + +#define tlb_start_vma(tlb, vma) do { } while (0) +#define tlb_end_vma(tlb, vma) do { } while (0) +#define tlb_remove_tlb_entry(tlb, ptep, addr) do { } while (0) +#define tlb_migrate_finish(mm) do { } while (0) + +#endif /* _S390_TLB_H */ diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h new file mode 100644 index 00000000..1d8648cf --- /dev/null +++ b/arch/s390/include/asm/tlbflush.h @@ -0,0 +1,140 @@ +#ifndef _S390_TLBFLUSH_H +#define _S390_TLBFLUSH_H + +#include <linux/mm.h> +#include <linux/sched.h> +#include <asm/processor.h> +#include <asm/pgalloc.h> + +/* + * Flush all tlb entries on the local cpu. + */ +static inline void __tlb_flush_local(void) +{ + asm volatile("ptlb" : : : "memory"); +} + +#ifdef CONFIG_SMP +/* + * Flush all tlb entries on all cpus. + */ +void smp_ptlb_all(void); + +static inline void __tlb_flush_global(void) +{ + register unsigned long reg2 asm("2"); + register unsigned long reg3 asm("3"); + register unsigned long reg4 asm("4"); + long dummy; + +#ifndef __s390x__ + if (!MACHINE_HAS_CSP) { + smp_ptlb_all(); + return; + } +#endif /* __s390x__ */ + + dummy = 0; + reg2 = reg3 = 0; + reg4 = ((unsigned long) &dummy) + 1; + asm volatile( + " csp %0,%2" + : : "d" (reg2), "d" (reg3), "d" (reg4), "m" (dummy) : "cc" ); +} + +static inline void __tlb_flush_full(struct mm_struct *mm) +{ + cpumask_t local_cpumask; + + preempt_disable(); + /* + * If the process only ran on the local cpu, do a local flush. + */ + cpumask_copy(&local_cpumask, cpumask_of(smp_processor_id())); + if (cpumask_equal(mm_cpumask(mm), &local_cpumask)) + __tlb_flush_local(); + else + __tlb_flush_global(); + preempt_enable(); +} +#else +#define __tlb_flush_full(mm) __tlb_flush_local() +#define __tlb_flush_global() __tlb_flush_local() +#endif + +/* + * Flush all tlb entries of a page table on all cpus. + */ +static inline void __tlb_flush_idte(unsigned long asce) +{ + asm volatile( + " .insn rrf,0xb98e0000,0,%0,%1,0" + : : "a" (2048), "a" (asce) : "cc" ); +} + +static inline void __tlb_flush_mm(struct mm_struct * mm) +{ + if (unlikely(cpumask_empty(mm_cpumask(mm)))) + return; + /* + * If the machine has IDTE we prefer to do a per mm flush + * on all cpus instead of doing a local flush if the mm + * only ran on the local cpu. + */ + if (MACHINE_HAS_IDTE && list_empty(&mm->context.gmap_list)) + __tlb_flush_idte((unsigned long) mm->pgd | + mm->context.asce_bits); + else + __tlb_flush_full(mm); +} + +static inline void __tlb_flush_mm_cond(struct mm_struct * mm) +{ + spin_lock(&mm->page_table_lock); + if (mm->context.flush_mm) { + __tlb_flush_mm(mm); + mm->context.flush_mm = 0; + } + spin_unlock(&mm->page_table_lock); +} + +/* + * TLB flushing: + * flush_tlb() - flushes the current mm struct TLBs + * flush_tlb_all() - flushes all processes TLBs + * flush_tlb_mm(mm) - flushes the specified mm context TLB's + * flush_tlb_page(vma, vmaddr) - flushes one page + * flush_tlb_range(vma, start, end) - flushes a range of pages + * flush_tlb_kernel_range(start, end) - flushes a range of kernel pages + */ + +/* + * flush_tlb_mm goes together with ptep_set_wrprotect for the + * copy_page_range operation and flush_tlb_range is related to + * ptep_get_and_clear for change_protection. ptep_set_wrprotect and + * ptep_get_and_clear do not flush the TLBs directly if the mm has + * only one user. At the end of the update the flush_tlb_mm and + * flush_tlb_range functions need to do the flush. + */ +#define flush_tlb() do { } while (0) +#define flush_tlb_all() do { } while (0) +#define flush_tlb_page(vma, addr) do { } while (0) + +static inline void flush_tlb_mm(struct mm_struct *mm) +{ + __tlb_flush_mm_cond(mm); +} + +static inline void flush_tlb_range(struct vm_area_struct *vma, + unsigned long start, unsigned long end) +{ + __tlb_flush_mm_cond(vma->vm_mm); +} + +static inline void flush_tlb_kernel_range(unsigned long start, + unsigned long end) +{ + __tlb_flush_mm(&init_mm); +} + +#endif /* _S390_TLBFLUSH_H */ diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h new file mode 100644 index 00000000..0837de80 --- /dev/null +++ b/arch/s390/include/asm/topology.h @@ -0,0 +1,84 @@ +#ifndef _ASM_S390_TOPOLOGY_H +#define _ASM_S390_TOPOLOGY_H + +#include <linux/cpumask.h> +#include <asm/sysinfo.h> + +struct cpu; + +#ifdef CONFIG_SCHED_BOOK + +extern unsigned char cpu_core_id[NR_CPUS]; +extern cpumask_t cpu_core_map[NR_CPUS]; + +static inline const struct cpumask *cpu_coregroup_mask(int cpu) +{ + return &cpu_core_map[cpu]; +} + +#define topology_core_id(cpu) (cpu_core_id[cpu]) +#define topology_core_cpumask(cpu) (&cpu_core_map[cpu]) +#define mc_capable() (1) + +extern unsigned char cpu_book_id[NR_CPUS]; +extern cpumask_t cpu_book_map[NR_CPUS]; + +static inline const struct cpumask *cpu_book_mask(int cpu) +{ + return &cpu_book_map[cpu]; +} + +#define topology_book_id(cpu) (cpu_book_id[cpu]) +#define topology_book_cpumask(cpu) (&cpu_book_map[cpu]) + +int topology_cpu_init(struct cpu *); +int topology_set_cpu_management(int fc); +void topology_schedule_update(void); +void store_topology(struct sysinfo_15_1_x *info); +void topology_expect_change(void); + +#else /* CONFIG_SCHED_BOOK */ + +static inline void topology_schedule_update(void) { } +static inline int topology_cpu_init(struct cpu *cpu) { return 0; } +static inline void topology_expect_change(void) { } + +#endif /* CONFIG_SCHED_BOOK */ + +#define POLARIZATION_UNKNOWN (-1) +#define POLARIZATION_HRZ (0) +#define POLARIZATION_VL (1) +#define POLARIZATION_VM (2) +#define POLARIZATION_VH (3) + +extern int cpu_polarization[]; + +static inline void cpu_set_polarization(int cpu, int val) +{ +#ifdef CONFIG_SCHED_BOOK + cpu_polarization[cpu] = val; +#endif +} + +static inline int cpu_read_polarization(int cpu) +{ +#ifdef CONFIG_SCHED_BOOK + return cpu_polarization[cpu]; +#else + return POLARIZATION_HRZ; +#endif +} + +#ifdef CONFIG_SCHED_BOOK +void s390_init_cpu_topology(void); +#else +static inline void s390_init_cpu_topology(void) +{ +}; +#endif + +#define SD_BOOK_INIT SD_CPU_INIT + +#include <asm-generic/topology.h> + +#endif /* _ASM_S390_TOPOLOGY_H */ diff --git a/arch/s390/include/asm/types.h b/arch/s390/include/asm/types.h new file mode 100644 index 00000000..05ebbcdb --- /dev/null +++ b/arch/s390/include/asm/types.h @@ -0,0 +1,43 @@ +/* + * include/asm-s390/types.h + * + * S390 version + * + * Derived from "include/asm-i386/types.h" + */ + +#ifndef _S390_TYPES_H +#define _S390_TYPES_H + +#include <asm-generic/int-ll64.h> + +#ifndef __ASSEMBLY__ + +/* A address type so that arithmetic can be done on it & it can be upgraded to + 64 bit when necessary +*/ +typedef unsigned long addr_t; +typedef __signed__ long saddr_t; + +#endif /* __ASSEMBLY__ */ + +/* + * These aren't exported outside the kernel to avoid name space clashes + */ +#ifdef __KERNEL__ + +#ifndef __ASSEMBLY__ + +#ifndef __s390x__ +typedef union { + unsigned long long pair; + struct { + unsigned long even; + unsigned long odd; + } subreg; +} register_pair; + +#endif /* ! __s390x__ */ +#endif /* __ASSEMBLY__ */ +#endif /* __KERNEL__ */ +#endif /* _S390_TYPES_H */ diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h new file mode 100644 index 00000000..8f2cada4 --- /dev/null +++ b/arch/s390/include/asm/uaccess.h @@ -0,0 +1,384 @@ +/* + * include/asm-s390/uaccess.h + * + * S390 version + * Copyright (C) 1999,2000 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Author(s): Hartmut Penner (hp@de.ibm.com), + * Martin Schwidefsky (schwidefsky@de.ibm.com) + * + * Derived from "include/asm-i386/uaccess.h" + */ +#ifndef __S390_UACCESS_H +#define __S390_UACCESS_H + +/* + * User space memory access functions + */ +#include <linux/sched.h> +#include <linux/errno.h> +#include <asm/ctl_reg.h> + +#define VERIFY_READ 0 +#define VERIFY_WRITE 1 + + +/* + * The fs value determines whether argument validity checking should be + * performed or not. If get_fs() == USER_DS, checking is performed, with + * get_fs() == KERNEL_DS, checking is bypassed. + * + * For historical reasons, these macros are grossly misnamed. + */ + +#define MAKE_MM_SEG(a) ((mm_segment_t) { (a) }) + + +#define KERNEL_DS MAKE_MM_SEG(0) +#define USER_DS MAKE_MM_SEG(1) + +#define get_ds() (KERNEL_DS) +#define get_fs() (current->thread.mm_segment) + +#define set_fs(x) \ +({ \ + unsigned long __pto; \ + current->thread.mm_segment = (x); \ + __pto = current->thread.mm_segment.ar4 ? \ + S390_lowcore.user_asce : S390_lowcore.kernel_asce; \ + __ctl_load(__pto, 7, 7); \ +}) + +#define segment_eq(a,b) ((a).ar4 == (b).ar4) + +#define __access_ok(addr, size) \ +({ \ + __chk_user_ptr(addr); \ + 1; \ +}) + +#define access_ok(type, addr, size) __access_ok(addr, size) + +/* + * The exception table consists of pairs of addresses: the first is the + * address of an instruction that is allowed to fault, and the second is + * the address at which the program should continue. No registers are + * modified, so it is entirely up to the continuation code to figure out + * what to do. + * + * All the routines below use bits of fixup code that are out of line + * with the main instruction path. This means when everything is well, + * we don't even have to jump over them. Further, they do not intrude + * on our cache or tlb entries. + */ + +struct exception_table_entry +{ + unsigned long insn, fixup; +}; + +struct uaccess_ops { + size_t (*copy_from_user)(size_t, const void __user *, void *); + size_t (*copy_from_user_small)(size_t, const void __user *, void *); + size_t (*copy_to_user)(size_t, void __user *, const void *); + size_t (*copy_to_user_small)(size_t, void __user *, const void *); + size_t (*copy_in_user)(size_t, void __user *, const void __user *); + size_t (*clear_user)(size_t, void __user *); + size_t (*strnlen_user)(size_t, const char __user *); + size_t (*strncpy_from_user)(size_t, const char __user *, char *); + int (*futex_atomic_op)(int op, u32 __user *, int oparg, int *old); + int (*futex_atomic_cmpxchg)(u32 *, u32 __user *, u32 old, u32 new); +}; + +extern struct uaccess_ops uaccess; +extern struct uaccess_ops uaccess_std; +extern struct uaccess_ops uaccess_mvcos; +extern struct uaccess_ops uaccess_mvcos_switch; +extern struct uaccess_ops uaccess_pt; + +extern int __handle_fault(unsigned long, unsigned long, int); + +static inline int __put_user_fn(size_t size, void __user *ptr, void *x) +{ + size = uaccess.copy_to_user_small(size, ptr, x); + return size ? -EFAULT : size; +} + +static inline int __get_user_fn(size_t size, const void __user *ptr, void *x) +{ + size = uaccess.copy_from_user_small(size, ptr, x); + return size ? -EFAULT : size; +} + +/* + * These are the main single-value transfer routines. They automatically + * use the right size if we just have the right pointer type. + */ +#define __put_user(x, ptr) \ +({ \ + __typeof__(*(ptr)) __x = (x); \ + int __pu_err = -EFAULT; \ + __chk_user_ptr(ptr); \ + switch (sizeof (*(ptr))) { \ + case 1: \ + case 2: \ + case 4: \ + case 8: \ + __pu_err = __put_user_fn(sizeof (*(ptr)), \ + ptr, &__x); \ + break; \ + default: \ + __put_user_bad(); \ + break; \ + } \ + __pu_err; \ +}) + +#define put_user(x, ptr) \ +({ \ + might_fault(); \ + __put_user(x, ptr); \ +}) + + +extern int __put_user_bad(void) __attribute__((noreturn)); + +#define __get_user(x, ptr) \ +({ \ + int __gu_err = -EFAULT; \ + __chk_user_ptr(ptr); \ + switch (sizeof(*(ptr))) { \ + case 1: { \ + unsigned char __x; \ + __gu_err = __get_user_fn(sizeof (*(ptr)), \ + ptr, &__x); \ + (x) = *(__force __typeof__(*(ptr)) *) &__x; \ + break; \ + }; \ + case 2: { \ + unsigned short __x; \ + __gu_err = __get_user_fn(sizeof (*(ptr)), \ + ptr, &__x); \ + (x) = *(__force __typeof__(*(ptr)) *) &__x; \ + break; \ + }; \ + case 4: { \ + unsigned int __x; \ + __gu_err = __get_user_fn(sizeof (*(ptr)), \ + ptr, &__x); \ + (x) = *(__force __typeof__(*(ptr)) *) &__x; \ + break; \ + }; \ + case 8: { \ + unsigned long long __x; \ + __gu_err = __get_user_fn(sizeof (*(ptr)), \ + ptr, &__x); \ + (x) = *(__force __typeof__(*(ptr)) *) &__x; \ + break; \ + }; \ + default: \ + __get_user_bad(); \ + break; \ + } \ + __gu_err; \ +}) + +#define get_user(x, ptr) \ +({ \ + might_fault(); \ + __get_user(x, ptr); \ +}) + +extern int __get_user_bad(void) __attribute__((noreturn)); + +#define __put_user_unaligned __put_user +#define __get_user_unaligned __get_user + +/** + * __copy_to_user: - Copy a block of data into user space, with less checking. + * @to: Destination address, in user space. + * @from: Source address, in kernel space. + * @n: Number of bytes to copy. + * + * Context: User context only. This function may sleep. + * + * Copy data from kernel space to user space. Caller must check + * the specified block with access_ok() before calling this function. + * + * Returns number of bytes that could not be copied. + * On success, this will be zero. + */ +static inline unsigned long __must_check +__copy_to_user(void __user *to, const void *from, unsigned long n) +{ + if (__builtin_constant_p(n) && (n <= 256)) + return uaccess.copy_to_user_small(n, to, from); + else + return uaccess.copy_to_user(n, to, from); +} + +#define __copy_to_user_inatomic __copy_to_user +#define __copy_from_user_inatomic __copy_from_user + +/** + * copy_to_user: - Copy a block of data into user space. + * @to: Destination address, in user space. + * @from: Source address, in kernel space. + * @n: Number of bytes to copy. + * + * Context: User context only. This function may sleep. + * + * Copy data from kernel space to user space. + * + * Returns number of bytes that could not be copied. + * On success, this will be zero. + */ +static inline unsigned long __must_check +copy_to_user(void __user *to, const void *from, unsigned long n) +{ + might_fault(); + if (access_ok(VERIFY_WRITE, to, n)) + n = __copy_to_user(to, from, n); + return n; +} + +/** + * __copy_from_user: - Copy a block of data from user space, with less checking. + * @to: Destination address, in kernel space. + * @from: Source address, in user space. + * @n: Number of bytes to copy. + * + * Context: User context only. This function may sleep. + * + * Copy data from user space to kernel space. Caller must check + * the specified block with access_ok() before calling this function. + * + * Returns number of bytes that could not be copied. + * On success, this will be zero. + * + * If some data could not be copied, this function will pad the copied + * data to the requested size using zero bytes. + */ +static inline unsigned long __must_check +__copy_from_user(void *to, const void __user *from, unsigned long n) +{ + if (__builtin_constant_p(n) && (n <= 256)) + return uaccess.copy_from_user_small(n, from, to); + else + return uaccess.copy_from_user(n, from, to); +} + +extern void copy_from_user_overflow(void) +#ifdef CONFIG_DEBUG_STRICT_USER_COPY_CHECKS +__compiletime_warning("copy_from_user() buffer size is not provably correct") +#endif +; + +/** + * copy_from_user: - Copy a block of data from user space. + * @to: Destination address, in kernel space. + * @from: Source address, in user space. + * @n: Number of bytes to copy. + * + * Context: User context only. This function may sleep. + * + * Copy data from user space to kernel space. + * + * Returns number of bytes that could not be copied. + * On success, this will be zero. + * + * If some data could not be copied, this function will pad the copied + * data to the requested size using zero bytes. + */ +static inline unsigned long __must_check +copy_from_user(void *to, const void __user *from, unsigned long n) +{ + unsigned int sz = __compiletime_object_size(to); + + might_fault(); + if (unlikely(sz != -1 && sz < n)) { + copy_from_user_overflow(); + return n; + } + if (access_ok(VERIFY_READ, from, n)) + n = __copy_from_user(to, from, n); + else + memset(to, 0, n); + return n; +} + +static inline unsigned long __must_check +__copy_in_user(void __user *to, const void __user *from, unsigned long n) +{ + return uaccess.copy_in_user(n, to, from); +} + +static inline unsigned long __must_check +copy_in_user(void __user *to, const void __user *from, unsigned long n) +{ + might_fault(); + if (__access_ok(from,n) && __access_ok(to,n)) + n = __copy_in_user(to, from, n); + return n; +} + +/* + * Copy a null terminated string from userspace. + */ +static inline long __must_check +strncpy_from_user(char *dst, const char __user *src, long count) +{ + long res = -EFAULT; + might_fault(); + if (access_ok(VERIFY_READ, src, 1)) + res = uaccess.strncpy_from_user(count, src, dst); + return res; +} + +static inline unsigned long +strnlen_user(const char __user * src, unsigned long n) +{ + might_fault(); + return uaccess.strnlen_user(n, src); +} + +/** + * strlen_user: - Get the size of a string in user space. + * @str: The string to measure. + * + * Context: User context only. This function may sleep. + * + * Get the size of a NUL-terminated string in user space. + * + * Returns the size of the string INCLUDING the terminating NUL. + * On exception, returns 0. + * + * If there is a limit on the length of a valid string, you may wish to + * consider using strnlen_user() instead. + */ +#define strlen_user(str) strnlen_user(str, ~0UL) + +/* + * Zero Userspace + */ + +static inline unsigned long __must_check +__clear_user(void __user *to, unsigned long n) +{ + return uaccess.clear_user(n, to); +} + +static inline unsigned long __must_check +clear_user(void __user *to, unsigned long n) +{ + might_fault(); + if (access_ok(VERIFY_WRITE, to, n)) + n = uaccess.clear_user(n, to); + return n; +} + +extern int memcpy_real(void *, void *, size_t); +extern void copy_to_absolute_zero(void *dest, void *src, size_t count); +extern int copy_to_user_real(void __user *dest, void *src, size_t count); +extern int copy_from_user_real(void *dest, void __user *src, size_t count); + +#endif /* __S390_UACCESS_H */ diff --git a/arch/s390/include/asm/ucontext.h b/arch/s390/include/asm/ucontext.h new file mode 100644 index 00000000..cfb874e6 --- /dev/null +++ b/arch/s390/include/asm/ucontext.h @@ -0,0 +1,35 @@ +/* + * include/asm-s390/ucontext.h + * + * S390 version + * + * Derived from "include/asm-i386/ucontext.h" + */ + +#ifndef _ASM_S390_UCONTEXT_H +#define _ASM_S390_UCONTEXT_H + +#define UC_EXTENDED 0x00000001 + +#ifndef __s390x__ + +struct ucontext_extended { + unsigned long uc_flags; + struct ucontext *uc_link; + stack_t uc_stack; + _sigregs uc_mcontext; + unsigned long uc_sigmask[2]; + unsigned long uc_gprs_high[16]; +}; + +#endif + +struct ucontext { + unsigned long uc_flags; + struct ucontext *uc_link; + stack_t uc_stack; + _sigregs uc_mcontext; + sigset_t uc_sigmask; /* mask last for extensibility */ +}; + +#endif /* !_ASM_S390_UCONTEXT_H */ diff --git a/arch/s390/include/asm/unaligned.h b/arch/s390/include/asm/unaligned.h new file mode 100644 index 00000000..da9627af --- /dev/null +++ b/arch/s390/include/asm/unaligned.h @@ -0,0 +1,13 @@ +#ifndef _ASM_S390_UNALIGNED_H +#define _ASM_S390_UNALIGNED_H + +/* + * The S390 can do unaligned accesses itself. + */ +#include <linux/unaligned/access_ok.h> +#include <linux/unaligned/generic.h> + +#define get_unaligned __get_unaligned_be +#define put_unaligned __put_unaligned_be + +#endif /* _ASM_S390_UNALIGNED_H */ diff --git a/arch/s390/include/asm/unistd.h b/arch/s390/include/asm/unistd.h new file mode 100644 index 00000000..8a8008fe --- /dev/null +++ b/arch/s390/include/asm/unistd.h @@ -0,0 +1,431 @@ +/* + * include/asm-s390/unistd.h + * + * S390 version + * + * Derived from "include/asm-i386/unistd.h" + */ + +#ifndef _ASM_S390_UNISTD_H_ +#define _ASM_S390_UNISTD_H_ + +/* + * This file contains the system call numbers. + */ + +#define __NR_exit 1 +#define __NR_fork 2 +#define __NR_read 3 +#define __NR_write 4 +#define __NR_open 5 +#define __NR_close 6 +#define __NR_restart_syscall 7 +#define __NR_creat 8 +#define __NR_link 9 +#define __NR_unlink 10 +#define __NR_execve 11 +#define __NR_chdir 12 +#define __NR_mknod 14 +#define __NR_chmod 15 +#define __NR_lseek 19 +#define __NR_getpid 20 +#define __NR_mount 21 +#define __NR_umount 22 +#define __NR_ptrace 26 +#define __NR_alarm 27 +#define __NR_pause 29 +#define __NR_utime 30 +#define __NR_access 33 +#define __NR_nice 34 +#define __NR_sync 36 +#define __NR_kill 37 +#define __NR_rename 38 +#define __NR_mkdir 39 +#define __NR_rmdir 40 +#define __NR_dup 41 +#define __NR_pipe 42 +#define __NR_times 43 +#define __NR_brk 45 +#define __NR_signal 48 +#define __NR_acct 51 +#define __NR_umount2 52 +#define __NR_ioctl 54 +#define __NR_fcntl 55 +#define __NR_setpgid 57 +#define __NR_umask 60 +#define __NR_chroot 61 +#define __NR_ustat 62 +#define __NR_dup2 63 +#define __NR_getppid 64 +#define __NR_getpgrp 65 +#define __NR_setsid 66 +#define __NR_sigaction 67 +#define __NR_sigsuspend 72 +#define __NR_sigpending 73 +#define __NR_sethostname 74 +#define __NR_setrlimit 75 +#define __NR_getrusage 77 +#define __NR_gettimeofday 78 +#define __NR_settimeofday 79 +#define __NR_symlink 83 +#define __NR_readlink 85 +#define __NR_uselib 86 +#define __NR_swapon 87 +#define __NR_reboot 88 +#define __NR_readdir 89 +#define __NR_mmap 90 +#define __NR_munmap 91 +#define __NR_truncate 92 +#define __NR_ftruncate 93 +#define __NR_fchmod 94 +#define __NR_getpriority 96 +#define __NR_setpriority 97 +#define __NR_statfs 99 +#define __NR_fstatfs 100 +#define __NR_socketcall 102 +#define __NR_syslog 103 +#define __NR_setitimer 104 +#define __NR_getitimer 105 +#define __NR_stat 106 +#define __NR_lstat 107 +#define __NR_fstat 108 +#define __NR_lookup_dcookie 110 +#define __NR_vhangup 111 +#define __NR_idle 112 +#define __NR_wait4 114 +#define __NR_swapoff 115 +#define __NR_sysinfo 116 +#define __NR_ipc 117 +#define __NR_fsync 118 +#define __NR_sigreturn 119 +#define __NR_clone 120 +#define __NR_setdomainname 121 +#define __NR_uname 122 +#define __NR_adjtimex 124 +#define __NR_mprotect 125 +#define __NR_sigprocmask 126 +#define __NR_create_module 127 +#define __NR_init_module 128 +#define __NR_delete_module 129 +#define __NR_get_kernel_syms 130 +#define __NR_quotactl 131 +#define __NR_getpgid 132 +#define __NR_fchdir 133 +#define __NR_bdflush 134 +#define __NR_sysfs 135 +#define __NR_personality 136 +#define __NR_afs_syscall 137 /* Syscall for Andrew File System */ +#define __NR_getdents 141 +#define __NR_flock 143 +#define __NR_msync 144 +#define __NR_readv 145 +#define __NR_writev 146 +#define __NR_getsid 147 +#define __NR_fdatasync 148 +#define __NR__sysctl 149 +#define __NR_mlock 150 +#define __NR_munlock 151 +#define __NR_mlockall 152 +#define __NR_munlockall 153 +#define __NR_sched_setparam 154 +#define __NR_sched_getparam 155 +#define __NR_sched_setscheduler 156 +#define __NR_sched_getscheduler 157 +#define __NR_sched_yield 158 +#define __NR_sched_get_priority_max 159 +#define __NR_sched_get_priority_min 160 +#define __NR_sched_rr_get_interval 161 +#define __NR_nanosleep 162 +#define __NR_mremap 163 +#define __NR_query_module 167 +#define __NR_poll 168 +#define __NR_nfsservctl 169 +#define __NR_prctl 172 +#define __NR_rt_sigreturn 173 +#define __NR_rt_sigaction 174 +#define __NR_rt_sigprocmask 175 +#define __NR_rt_sigpending 176 +#define __NR_rt_sigtimedwait 177 +#define __NR_rt_sigqueueinfo 178 +#define __NR_rt_sigsuspend 179 +#define __NR_pread64 180 +#define __NR_pwrite64 181 +#define __NR_getcwd 183 +#define __NR_capget 184 +#define __NR_capset 185 +#define __NR_sigaltstack 186 +#define __NR_sendfile 187 +#define __NR_getpmsg 188 +#define __NR_putpmsg 189 +#define __NR_vfork 190 +#define __NR_pivot_root 217 +#define __NR_mincore 218 +#define __NR_madvise 219 +#define __NR_getdents64 220 +#define __NR_readahead 222 +#define __NR_setxattr 224 +#define __NR_lsetxattr 225 +#define __NR_fsetxattr 226 +#define __NR_getxattr 227 +#define __NR_lgetxattr 228 +#define __NR_fgetxattr 229 +#define __NR_listxattr 230 +#define __NR_llistxattr 231 +#define __NR_flistxattr 232 +#define __NR_removexattr 233 +#define __NR_lremovexattr 234 +#define __NR_fremovexattr 235 +#define __NR_gettid 236 +#define __NR_tkill 237 +#define __NR_futex 238 +#define __NR_sched_setaffinity 239 +#define __NR_sched_getaffinity 240 +#define __NR_tgkill 241 +/* Number 242 is reserved for tux */ +#define __NR_io_setup 243 +#define __NR_io_destroy 244 +#define __NR_io_getevents 245 +#define __NR_io_submit 246 +#define __NR_io_cancel 247 +#define __NR_exit_group 248 +#define __NR_epoll_create 249 +#define __NR_epoll_ctl 250 +#define __NR_epoll_wait 251 +#define __NR_set_tid_address 252 +#define __NR_fadvise64 253 +#define __NR_timer_create 254 +#define __NR_timer_settime (__NR_timer_create+1) +#define __NR_timer_gettime (__NR_timer_create+2) +#define __NR_timer_getoverrun (__NR_timer_create+3) +#define __NR_timer_delete (__NR_timer_create+4) +#define __NR_clock_settime (__NR_timer_create+5) +#define __NR_clock_gettime (__NR_timer_create+6) +#define __NR_clock_getres (__NR_timer_create+7) +#define __NR_clock_nanosleep (__NR_timer_create+8) +/* Number 263 is reserved for vserver */ +#define __NR_statfs64 265 +#define __NR_fstatfs64 266 +#define __NR_remap_file_pages 267 +/* Number 268 is reserved for new sys_mbind */ +/* Number 269 is reserved for new sys_get_mempolicy */ +/* Number 270 is reserved for new sys_set_mempolicy */ +#define __NR_mq_open 271 +#define __NR_mq_unlink 272 +#define __NR_mq_timedsend 273 +#define __NR_mq_timedreceive 274 +#define __NR_mq_notify 275 +#define __NR_mq_getsetattr 276 +#define __NR_kexec_load 277 +#define __NR_add_key 278 +#define __NR_request_key 279 +#define __NR_keyctl 280 +#define __NR_waitid 281 +#define __NR_ioprio_set 282 +#define __NR_ioprio_get 283 +#define __NR_inotify_init 284 +#define __NR_inotify_add_watch 285 +#define __NR_inotify_rm_watch 286 +/* Number 287 is reserved for new sys_migrate_pages */ +#define __NR_openat 288 +#define __NR_mkdirat 289 +#define __NR_mknodat 290 +#define __NR_fchownat 291 +#define __NR_futimesat 292 +#define __NR_unlinkat 294 +#define __NR_renameat 295 +#define __NR_linkat 296 +#define __NR_symlinkat 297 +#define __NR_readlinkat 298 +#define __NR_fchmodat 299 +#define __NR_faccessat 300 +#define __NR_pselect6 301 +#define __NR_ppoll 302 +#define __NR_unshare 303 +#define __NR_set_robust_list 304 +#define __NR_get_robust_list 305 +#define __NR_splice 306 +#define __NR_sync_file_range 307 +#define __NR_tee 308 +#define __NR_vmsplice 309 +/* Number 310 is reserved for new sys_move_pages */ +#define __NR_getcpu 311 +#define __NR_epoll_pwait 312 +#define __NR_utimes 313 +#define __NR_fallocate 314 +#define __NR_utimensat 315 +#define __NR_signalfd 316 +#define __NR_timerfd 317 +#define __NR_eventfd 318 +#define __NR_timerfd_create 319 +#define __NR_timerfd_settime 320 +#define __NR_timerfd_gettime 321 +#define __NR_signalfd4 322 +#define __NR_eventfd2 323 +#define __NR_inotify_init1 324 +#define __NR_pipe2 325 +#define __NR_dup3 326 +#define __NR_epoll_create1 327 +#define __NR_preadv 328 +#define __NR_pwritev 329 +#define __NR_rt_tgsigqueueinfo 330 +#define __NR_perf_event_open 331 +#define __NR_fanotify_init 332 +#define __NR_fanotify_mark 333 +#define __NR_prlimit64 334 +#define __NR_name_to_handle_at 335 +#define __NR_open_by_handle_at 336 +#define __NR_clock_adjtime 337 +#define __NR_syncfs 338 +#define __NR_setns 339 +#define __NR_process_vm_readv 340 +#define __NR_process_vm_writev 341 +#define NR_syscalls 342 + +/* + * There are some system calls that are not present on 64 bit, some + * have a different name although they do the same (e.g. __NR_chown32 + * is __NR_chown on 64 bit). + */ +#ifndef __s390x__ + +#define __NR_time 13 +#define __NR_lchown 16 +#define __NR_setuid 23 +#define __NR_getuid 24 +#define __NR_stime 25 +#define __NR_setgid 46 +#define __NR_getgid 47 +#define __NR_geteuid 49 +#define __NR_getegid 50 +#define __NR_setreuid 70 +#define __NR_setregid 71 +#define __NR_getrlimit 76 +#define __NR_getgroups 80 +#define __NR_setgroups 81 +#define __NR_fchown 95 +#define __NR_ioperm 101 +#define __NR_setfsuid 138 +#define __NR_setfsgid 139 +#define __NR__llseek 140 +#define __NR__newselect 142 +#define __NR_setresuid 164 +#define __NR_getresuid 165 +#define __NR_setresgid 170 +#define __NR_getresgid 171 +#define __NR_chown 182 +#define __NR_ugetrlimit 191 /* SuS compliant getrlimit */ +#define __NR_mmap2 192 +#define __NR_truncate64 193 +#define __NR_ftruncate64 194 +#define __NR_stat64 195 +#define __NR_lstat64 196 +#define __NR_fstat64 197 +#define __NR_lchown32 198 +#define __NR_getuid32 199 +#define __NR_getgid32 200 +#define __NR_geteuid32 201 +#define __NR_getegid32 202 +#define __NR_setreuid32 203 +#define __NR_setregid32 204 +#define __NR_getgroups32 205 +#define __NR_setgroups32 206 +#define __NR_fchown32 207 +#define __NR_setresuid32 208 +#define __NR_getresuid32 209 +#define __NR_setresgid32 210 +#define __NR_getresgid32 211 +#define __NR_chown32 212 +#define __NR_setuid32 213 +#define __NR_setgid32 214 +#define __NR_setfsuid32 215 +#define __NR_setfsgid32 216 +#define __NR_fcntl64 221 +#define __NR_sendfile64 223 +#define __NR_fadvise64_64 264 +#define __NR_fstatat64 293 + +#else + +#define __NR_select 142 +#define __NR_getrlimit 191 /* SuS compliant getrlimit */ +#define __NR_lchown 198 +#define __NR_getuid 199 +#define __NR_getgid 200 +#define __NR_geteuid 201 +#define __NR_getegid 202 +#define __NR_setreuid 203 +#define __NR_setregid 204 +#define __NR_getgroups 205 +#define __NR_setgroups 206 +#define __NR_fchown 207 +#define __NR_setresuid 208 +#define __NR_getresuid 209 +#define __NR_setresgid 210 +#define __NR_getresgid 211 +#define __NR_chown 212 +#define __NR_setuid 213 +#define __NR_setgid 214 +#define __NR_setfsuid 215 +#define __NR_setfsgid 216 +#define __NR_newfstatat 293 + +#endif + +#ifdef __KERNEL__ + +#ifndef CONFIG_64BIT +#define __IGNORE_select +#else +#define __IGNORE_time +#endif + +/* Ignore NUMA system calls. Not wired up on s390. */ +#define __IGNORE_mbind +#define __IGNORE_get_mempolicy +#define __IGNORE_set_mempolicy +#define __IGNORE_migrate_pages +#define __IGNORE_move_pages + +/* Ignore system calls that are also reachable via sys_socket */ +#define __IGNORE_recvmmsg +#define __IGNORE_sendmmsg + +#define __ARCH_WANT_IPC_PARSE_VERSION +#define __ARCH_WANT_OLD_READDIR +#define __ARCH_WANT_SYS_ALARM +#define __ARCH_WANT_SYS_GETHOSTNAME +#define __ARCH_WANT_SYS_PAUSE +#define __ARCH_WANT_SYS_SIGNAL +#define __ARCH_WANT_SYS_UTIME +#define __ARCH_WANT_SYS_SOCKETCALL +#define __ARCH_WANT_SYS_IPC +#define __ARCH_WANT_SYS_FADVISE64 +#define __ARCH_WANT_SYS_GETPGRP +#define __ARCH_WANT_SYS_LLSEEK +#define __ARCH_WANT_SYS_NICE +#define __ARCH_WANT_SYS_OLD_GETRLIMIT +#define __ARCH_WANT_SYS_OLD_MMAP +#define __ARCH_WANT_SYS_OLDUMOUNT +#define __ARCH_WANT_SYS_SIGPENDING +#define __ARCH_WANT_SYS_SIGPROCMASK +#define __ARCH_WANT_SYS_RT_SIGACTION +#define __ARCH_WANT_SYS_RT_SIGSUSPEND +# ifndef CONFIG_64BIT +# define __ARCH_WANT_STAT64 +# define __ARCH_WANT_SYS_TIME +# endif +# ifdef CONFIG_COMPAT +# define __ARCH_WANT_COMPAT_SYS_TIME +# define __ARCH_WANT_COMPAT_SYS_RT_SIGSUSPEND +# endif + +/* + * "Conditional" syscalls + * + * What we want is __attribute__((weak,alias("sys_ni_syscall"))), + * but it doesn't work on all toolchains, so we just do it by hand + */ +#define cond_syscall(x) asm(".weak\t" #x "\n\t.set\t" #x ",sys_ni_syscall") + +#endif /* __KERNEL__ */ +#endif /* _ASM_S390_UNISTD_H_ */ diff --git a/arch/s390/include/asm/user.h b/arch/s390/include/asm/user.h new file mode 100644 index 00000000..1b050e35 --- /dev/null +++ b/arch/s390/include/asm/user.h @@ -0,0 +1,76 @@ +/* + * include/asm-s390/user.h + * + * S390 version + * + * Derived from "include/asm-i386/usr.h" + */ + +#ifndef _S390_USER_H +#define _S390_USER_H + +#include <asm/page.h> +#include <asm/ptrace.h> +/* Core file format: The core file is written in such a way that gdb + can understand it and provide useful information to the user (under + linux we use the 'trad-core' bfd). There are quite a number of + obstacles to being able to view the contents of the floating point + registers, and until these are solved you will not be able to view the + contents of them. Actually, you can read in the core file and look at + the contents of the user struct to find out what the floating point + registers contain. + The actual file contents are as follows: + UPAGE: 1 page consisting of a user struct that tells gdb what is present + in the file. Directly after this is a copy of the task_struct, which + is currently not used by gdb, but it may come in useful at some point. + All of the registers are stored as part of the upage. The upage should + always be only one page. + DATA: The data area is stored. We use current->end_text to + current->brk to pick up all of the user variables, plus any memory + that may have been malloced. No attempt is made to determine if a page + is demand-zero or if a page is totally unused, we just cover the entire + range. All of the addresses are rounded in such a way that an integral + number of pages is written. + STACK: We need the stack information in order to get a meaningful + backtrace. We need to write the data from (esp) to + current->start_stack, so we round each of these off in order to be able + to write an integer number of pages. + The minimum core file size is 3 pages, or 12288 bytes. +*/ + + +/* + * This is the old layout of "struct pt_regs", and + * is still the layout used by user mode (the new + * pt_regs doesn't have all registers as the kernel + * doesn't use the extra segment registers) + */ + +/* When the kernel dumps core, it starts by dumping the user struct - + this will be used by gdb to figure out where the data and stack segments + are within the file, and what virtual addresses to use. */ +struct user { +/* We start with the registers, to mimic the way that "memory" is returned + from the ptrace(3,...) function. */ + struct user_regs_struct regs; /* Where the registers are actually stored */ +/* The rest of this junk is to help gdb figure out what goes where */ + unsigned long int u_tsize; /* Text segment size (pages). */ + unsigned long int u_dsize; /* Data segment size (pages). */ + unsigned long int u_ssize; /* Stack segment size (pages). */ + unsigned long start_code; /* Starting virtual address of text. */ + unsigned long start_stack; /* Starting virtual address of stack area. + This is actually the bottom of the stack, + the top of the stack is always found in the + esp register. */ + long int signal; /* Signal that caused the core dump. */ + unsigned long u_ar0; /* Used by gdb to help find the values for */ + /* the registers. */ + unsigned long magic; /* To uniquely identify a core file */ + char u_comm[32]; /* User command that was responsible */ +}; +#define NBPG PAGE_SIZE +#define UPAGES 1 +#define HOST_TEXT_START_ADDR (u.start_code) +#define HOST_STACK_END_ADDR (u.start_stack + u.u_ssize * NBPG) + +#endif /* _S390_USER_H */ diff --git a/arch/s390/include/asm/vdso.h b/arch/s390/include/asm/vdso.h new file mode 100644 index 00000000..c4a11cfa --- /dev/null +++ b/arch/s390/include/asm/vdso.h @@ -0,0 +1,51 @@ +#ifndef __S390_VDSO_H__ +#define __S390_VDSO_H__ + +#ifdef __KERNEL__ + +/* Default link addresses for the vDSOs */ +#define VDSO32_LBASE 0 +#define VDSO64_LBASE 0 + +#define VDSO_VERSION_STRING LINUX_2.6.29 + +#ifndef __ASSEMBLY__ + +/* + * Note about the vdso_data and vdso_per_cpu_data structures: + * + * NEVER USE THEM IN USERSPACE CODE DIRECTLY. The layout of the + * structure is supposed to be known only to the function in the vdso + * itself and may change without notice. + */ + +struct vdso_data { + __u64 tb_update_count; /* Timebase atomicity ctr 0x00 */ + __u64 xtime_tod_stamp; /* TOD clock for xtime 0x08 */ + __u64 xtime_clock_sec; /* Kernel time 0x10 */ + __u64 xtime_clock_nsec; /* 0x18 */ + __u64 wtom_clock_sec; /* Wall to monotonic clock 0x20 */ + __u64 wtom_clock_nsec; /* 0x28 */ + __u32 tz_minuteswest; /* Minutes west of Greenwich 0x30 */ + __u32 tz_dsttime; /* Type of dst correction 0x34 */ + __u32 ectg_available; + __u32 ntp_mult; /* NTP adjusted multiplier 0x3C */ +}; + +struct vdso_per_cpu_data { + __u64 ectg_timer_base; + __u64 ectg_user_time; +}; + +extern struct vdso_data *vdso_data; + +#ifdef CONFIG_64BIT +int vdso_alloc_per_cpu(struct _lowcore *lowcore); +void vdso_free_per_cpu(struct _lowcore *lowcore); +#endif + +#endif /* __ASSEMBLY__ */ + +#endif /* __KERNEL__ */ + +#endif /* __S390_VDSO_H__ */ diff --git a/arch/s390/include/asm/vtoc.h b/arch/s390/include/asm/vtoc.h new file mode 100644 index 00000000..8406a2b3 --- /dev/null +++ b/arch/s390/include/asm/vtoc.h @@ -0,0 +1,215 @@ +/* + * include/asm-s390/vtoc.h + * + * This file contains volume label definitions for DASD devices. + * + * (C) Copyright IBM Corp. 2005 + * + * Author(s): Volker Sameske <sameske@de.ibm.com> + * + */ + +#ifndef _ASM_S390_VTOC_H +#define _ASM_S390_VTOC_H + +#include <linux/types.h> + +struct vtoc_ttr +{ + __u16 tt; + __u8 r; +} __attribute__ ((packed)); + +struct vtoc_cchhb +{ + __u16 cc; + __u16 hh; + __u8 b; +} __attribute__ ((packed)); + +struct vtoc_cchh +{ + __u16 cc; + __u16 hh; +} __attribute__ ((packed)); + +struct vtoc_labeldate +{ + __u8 year; + __u16 day; +} __attribute__ ((packed)); + +struct vtoc_volume_label_cdl +{ + char volkey[4]; /* volume key = volume label */ + char vollbl[4]; /* volume label */ + char volid[6]; /* volume identifier */ + __u8 security; /* security byte */ + struct vtoc_cchhb vtoc; /* VTOC address */ + char res1[5]; /* reserved */ + char cisize[4]; /* CI-size for FBA,... */ + /* ...blanks for CKD */ + char blkperci[4]; /* no of blocks per CI (FBA), blanks for CKD */ + char labperci[4]; /* no of labels per CI (FBA), blanks for CKD */ + char res2[4]; /* reserved */ + char lvtoc[14]; /* owner code for LVTOC */ + char res3[29]; /* reserved */ +} __attribute__ ((packed)); + +struct vtoc_volume_label_ldl { + char vollbl[4]; /* volume label */ + char volid[6]; /* volume identifier */ + char res3[69]; /* reserved */ + char ldl_version; /* version number, valid for ldl format */ + __u64 formatted_blocks; /* valid when ldl_version >= f2 */ +} __attribute__ ((packed)); + +struct vtoc_extent +{ + __u8 typeind; /* extent type indicator */ + __u8 seqno; /* extent sequence number */ + struct vtoc_cchh llimit; /* starting point of this extent */ + struct vtoc_cchh ulimit; /* ending point of this extent */ +} __attribute__ ((packed)); + +struct vtoc_dev_const +{ + __u16 DS4DSCYL; /* number of logical cyls */ + __u16 DS4DSTRK; /* number of tracks in a logical cylinder */ + __u16 DS4DEVTK; /* device track length */ + __u8 DS4DEVI; /* non-last keyed record overhead */ + __u8 DS4DEVL; /* last keyed record overhead */ + __u8 DS4DEVK; /* non-keyed record overhead differential */ + __u8 DS4DEVFG; /* flag byte */ + __u16 DS4DEVTL; /* device tolerance */ + __u8 DS4DEVDT; /* number of DSCB's per track */ + __u8 DS4DEVDB; /* number of directory blocks per track */ +} __attribute__ ((packed)); + +struct vtoc_format1_label +{ + char DS1DSNAM[44]; /* data set name */ + __u8 DS1FMTID; /* format identifier */ + char DS1DSSN[6]; /* data set serial number */ + __u16 DS1VOLSQ; /* volume sequence number */ + struct vtoc_labeldate DS1CREDT; /* creation date: ydd */ + struct vtoc_labeldate DS1EXPDT; /* expiration date */ + __u8 DS1NOEPV; /* number of extents on volume */ + __u8 DS1NOBDB; /* no. of bytes used in last direction blk */ + __u8 DS1FLAG1; /* flag 1 */ + char DS1SYSCD[13]; /* system code */ + struct vtoc_labeldate DS1REFD; /* date last referenced */ + __u8 DS1SMSFG; /* system managed storage indicators */ + __u8 DS1SCXTF; /* sec. space extension flag byte */ + __u16 DS1SCXTV; /* secondary space extension value */ + __u8 DS1DSRG1; /* data set organisation byte 1 */ + __u8 DS1DSRG2; /* data set organisation byte 2 */ + __u8 DS1RECFM; /* record format */ + __u8 DS1OPTCD; /* option code */ + __u16 DS1BLKL; /* block length */ + __u16 DS1LRECL; /* record length */ + __u8 DS1KEYL; /* key length */ + __u16 DS1RKP; /* relative key position */ + __u8 DS1DSIND; /* data set indicators */ + __u8 DS1SCAL1; /* secondary allocation flag byte */ + char DS1SCAL3[3]; /* secondary allocation quantity */ + struct vtoc_ttr DS1LSTAR; /* last used track and block on track */ + __u16 DS1TRBAL; /* space remaining on last used track */ + __u16 res1; /* reserved */ + struct vtoc_extent DS1EXT1; /* first extent description */ + struct vtoc_extent DS1EXT2; /* second extent description */ + struct vtoc_extent DS1EXT3; /* third extent description */ + struct vtoc_cchhb DS1PTRDS; /* possible pointer to f2 or f3 DSCB */ +} __attribute__ ((packed)); + +struct vtoc_format4_label +{ + char DS4KEYCD[44]; /* key code for VTOC labels: 44 times 0x04 */ + __u8 DS4IDFMT; /* format identifier */ + struct vtoc_cchhb DS4HPCHR; /* highest address of a format 1 DSCB */ + __u16 DS4DSREC; /* number of available DSCB's */ + struct vtoc_cchh DS4HCCHH; /* CCHH of next available alternate track */ + __u16 DS4NOATK; /* number of remaining alternate tracks */ + __u8 DS4VTOCI; /* VTOC indicators */ + __u8 DS4NOEXT; /* number of extents in VTOC */ + __u8 DS4SMSFG; /* system managed storage indicators */ + __u8 DS4DEVAC; /* number of alternate cylinders. + * Subtract from first two bytes of + * DS4DEVSZ to get number of usable + * cylinders. can be zero. valid + * only if DS4DEVAV on. */ + struct vtoc_dev_const DS4DEVCT; /* device constants */ + char DS4AMTIM[8]; /* VSAM time stamp */ + char DS4AMCAT[3]; /* VSAM catalog indicator */ + char DS4R2TIM[8]; /* VSAM volume/catalog match time stamp */ + char res1[5]; /* reserved */ + char DS4F6PTR[5]; /* pointer to first format 6 DSCB */ + struct vtoc_extent DS4VTOCE; /* VTOC extent description */ + char res2[10]; /* reserved */ + __u8 DS4EFLVL; /* extended free-space management level */ + struct vtoc_cchhb DS4EFPTR; /* pointer to extended free-space info */ + char res3; /* reserved */ + __u32 DS4DCYL; /* number of logical cyls */ + char res4[2]; /* reserved */ + __u8 DS4DEVF2; /* device flags */ + char res5; /* reserved */ +} __attribute__ ((packed)); + +struct vtoc_ds5ext +{ + __u16 t; /* RTA of the first track of free extent */ + __u16 fc; /* number of whole cylinders in free ext. */ + __u8 ft; /* number of remaining free tracks */ +} __attribute__ ((packed)); + +struct vtoc_format5_label +{ + char DS5KEYID[4]; /* key identifier */ + struct vtoc_ds5ext DS5AVEXT; /* first available (free-space) extent. */ + struct vtoc_ds5ext DS5EXTAV[7]; /* seven available extents */ + __u8 DS5FMTID; /* format identifier */ + struct vtoc_ds5ext DS5MAVET[18]; /* eighteen available extents */ + struct vtoc_cchhb DS5PTRDS; /* pointer to next format5 DSCB */ +} __attribute__ ((packed)); + +struct vtoc_ds7ext +{ + __u32 a; /* starting RTA value */ + __u32 b; /* ending RTA value + 1 */ +} __attribute__ ((packed)); + +struct vtoc_format7_label +{ + char DS7KEYID[4]; /* key identifier */ + struct vtoc_ds7ext DS7EXTNT[5]; /* space for 5 extent descriptions */ + __u8 DS7FMTID; /* format identifier */ + struct vtoc_ds7ext DS7ADEXT[11]; /* space for 11 extent descriptions */ + char res1[2]; /* reserved */ + struct vtoc_cchhb DS7PTRDS; /* pointer to next FMT7 DSCB */ +} __attribute__ ((packed)); + +struct vtoc_cms_label { + __u8 label_id[4]; /* Label identifier */ + __u8 vol_id[6]; /* Volid */ + __u16 version_id; /* Version identifier */ + __u32 block_size; /* Disk block size */ + __u32 origin_ptr; /* Disk origin pointer */ + __u32 usable_count; /* Number of usable cylinders/blocks */ + __u32 formatted_count; /* Maximum number of formatted cylinders/ + * blocks */ + __u32 block_count; /* Disk size in CMS blocks */ + __u32 used_count; /* Number of CMS blocks in use */ + __u32 fst_size; /* File Status Table (FST) size */ + __u32 fst_count; /* Number of FSTs per CMS block */ + __u8 format_date[6]; /* Disk FORMAT date */ + __u8 reserved1[2]; + __u32 disk_offset; /* Disk offset when reserved*/ + __u32 map_block; /* Allocation Map Block with next hole */ + __u32 hblk_disp; /* Displacement into HBLK data of next hole */ + __u32 user_disp; /* Displacement into user part of Allocation + * map */ + __u8 reserved2[4]; + __u8 segment_name[8]; /* Name of shared segment */ +} __attribute__ ((packed)); + +#endif /* _ASM_S390_VTOC_H */ diff --git a/arch/s390/include/asm/xor.h b/arch/s390/include/asm/xor.h new file mode 100644 index 00000000..c82eb12a --- /dev/null +++ b/arch/s390/include/asm/xor.h @@ -0,0 +1 @@ +#include <asm-generic/xor.h> diff --git a/arch/s390/include/asm/zcrypt.h b/arch/s390/include/asm/zcrypt.h new file mode 100644 index 00000000..00d3bbd4 --- /dev/null +++ b/arch/s390/include/asm/zcrypt.h @@ -0,0 +1,276 @@ +/* + * include/asm-s390/zcrypt.h + * + * zcrypt 2.1.0 (user-visible header) + * + * Copyright (C) 2001, 2006 IBM Corporation + * Author(s): Robert Burroughs + * Eric Rossman (edrossma@us.ibm.com) + * + * Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef __ASM_S390_ZCRYPT_H +#define __ASM_S390_ZCRYPT_H + +#define ZCRYPT_VERSION 2 +#define ZCRYPT_RELEASE 1 +#define ZCRYPT_VARIANT 1 + +#include <linux/ioctl.h> +#include <linux/compiler.h> + +/** + * struct ica_rsa_modexpo + * + * Requirements: + * - outputdatalength is at least as large as inputdatalength. + * - All key parts are right justified in their fields, padded on + * the left with zeroes. + * - length(b_key) = inputdatalength + * - length(n_modulus) = inputdatalength + */ +struct ica_rsa_modexpo { + char __user * inputdata; + unsigned int inputdatalength; + char __user * outputdata; + unsigned int outputdatalength; + char __user * b_key; + char __user * n_modulus; +}; + +/** + * struct ica_rsa_modexpo_crt + * + * Requirements: + * - inputdatalength is even. + * - outputdatalength is at least as large as inputdatalength. + * - All key parts are right justified in their fields, padded on + * the left with zeroes. + * - length(bp_key) = inputdatalength/2 + 8 + * - length(bq_key) = inputdatalength/2 + * - length(np_key) = inputdatalength/2 + 8 + * - length(nq_key) = inputdatalength/2 + * - length(u_mult_inv) = inputdatalength/2 + 8 + */ +struct ica_rsa_modexpo_crt { + char __user * inputdata; + unsigned int inputdatalength; + char __user * outputdata; + unsigned int outputdatalength; + char __user * bp_key; + char __user * bq_key; + char __user * np_prime; + char __user * nq_prime; + char __user * u_mult_inv; +}; + +/** + * CPRBX + * Note that all shorts and ints are big-endian. + * All pointer fields are 16 bytes long, and mean nothing. + * + * A request CPRB is followed by a request_parameter_block. + * + * The request (or reply) parameter block is organized thus: + * function code + * VUD block + * key block + */ +struct CPRBX { + unsigned short cprb_len; /* CPRB length 220 */ + unsigned char cprb_ver_id; /* CPRB version id. 0x02 */ + unsigned char pad_000[3]; /* Alignment pad bytes */ + unsigned char func_id[2]; /* function id 0x5432 */ + unsigned char cprb_flags[4]; /* Flags */ + unsigned int req_parml; /* request parameter buffer len */ + unsigned int req_datal; /* request data buffer */ + unsigned int rpl_msgbl; /* reply message block length */ + unsigned int rpld_parml; /* replied parameter block len */ + unsigned int rpl_datal; /* reply data block len */ + unsigned int rpld_datal; /* replied data block len */ + unsigned int req_extbl; /* request extension block len */ + unsigned char pad_001[4]; /* reserved */ + unsigned int rpld_extbl; /* replied extension block len */ + unsigned char padx000[16 - sizeof (char *)]; + unsigned char * req_parmb; /* request parm block 'address' */ + unsigned char padx001[16 - sizeof (char *)]; + unsigned char * req_datab; /* request data block 'address' */ + unsigned char padx002[16 - sizeof (char *)]; + unsigned char * rpl_parmb; /* reply parm block 'address' */ + unsigned char padx003[16 - sizeof (char *)]; + unsigned char * rpl_datab; /* reply data block 'address' */ + unsigned char padx004[16 - sizeof (char *)]; + unsigned char * req_extb; /* request extension block 'addr'*/ + unsigned char padx005[16 - sizeof (char *)]; + unsigned char * rpl_extb; /* reply extension block 'address'*/ + unsigned short ccp_rtcode; /* server return code */ + unsigned short ccp_rscode; /* server reason code */ + unsigned int mac_data_len; /* Mac Data Length */ + unsigned char logon_id[8]; /* Logon Identifier */ + unsigned char mac_value[8]; /* Mac Value */ + unsigned char mac_content_flgs;/* Mac content flag byte */ + unsigned char pad_002; /* Alignment */ + unsigned short domain; /* Domain */ + unsigned char usage_domain[4];/* Usage domain */ + unsigned char cntrl_domain[4];/* Control domain */ + unsigned char S390enf_mask[4];/* S/390 enforcement mask */ + unsigned char pad_004[36]; /* reserved */ +} __attribute__((packed)); + +/** + * xcRB + */ +struct ica_xcRB { + unsigned short agent_ID; + unsigned int user_defined; + unsigned short request_ID; + unsigned int request_control_blk_length; + unsigned char padding1[16 - sizeof (char *)]; + char __user * request_control_blk_addr; + unsigned int request_data_length; + char padding2[16 - sizeof (char *)]; + char __user * request_data_address; + unsigned int reply_control_blk_length; + char padding3[16 - sizeof (char *)]; + char __user * reply_control_blk_addr; + unsigned int reply_data_length; + char padding4[16 - sizeof (char *)]; + char __user * reply_data_addr; + unsigned short priority_window; + unsigned int status; +} __attribute__((packed)); +#define AUTOSELECT ((unsigned int)0xFFFFFFFF) + +#define ZCRYPT_IOCTL_MAGIC 'z' + +/** + * Interface notes: + * + * The ioctl()s which are implemented (along with relevant details) + * are: + * + * ICARSAMODEXPO + * Perform an RSA operation using a Modulus-Exponent pair + * This takes an ica_rsa_modexpo struct as its arg. + * + * NOTE: please refer to the comments preceding this structure + * for the implementation details for the contents of the + * block + * + * ICARSACRT + * Perform an RSA operation using a Chinese-Remainder Theorem key + * This takes an ica_rsa_modexpo_crt struct as its arg. + * + * NOTE: please refer to the comments preceding this structure + * for the implementation details for the contents of the + * block + * + * ZSECSENDCPRB + * Send an arbitrary CPRB to a crypto card. + * + * Z90STAT_STATUS_MASK + * Return an 64 element array of unsigned chars for the status of + * all devices. + * 0x01: PCICA + * 0x02: PCICC + * 0x03: PCIXCC_MCL2 + * 0x04: PCIXCC_MCL3 + * 0x05: CEX2C + * 0x06: CEX2A + * 0x0d: device is disabled via the proc filesystem + * + * Z90STAT_QDEPTH_MASK + * Return an 64 element array of unsigned chars for the queue + * depth of all devices. + * + * Z90STAT_PERDEV_REQCNT + * Return an 64 element array of unsigned integers for the number + * of successfully completed requests per device since the device + * was detected and made available. + * + * Z90STAT_REQUESTQ_COUNT + * Return an integer count of the number of entries waiting to be + * sent to a device. + * + * Z90STAT_PENDINGQ_COUNT + * Return an integer count of the number of entries sent to all + * devices awaiting the reply. + * + * Z90STAT_TOTALOPEN_COUNT + * Return an integer count of the number of open file handles. + * + * Z90STAT_DOMAIN_INDEX + * Return the integer value of the Cryptographic Domain. + * + * The following ioctls are deprecated and should be no longer used: + * + * Z90STAT_TOTALCOUNT + * Return an integer count of all device types together. + * + * Z90STAT_PCICACOUNT + * Return an integer count of all PCICAs. + * + * Z90STAT_PCICCCOUNT + * Return an integer count of all PCICCs. + * + * Z90STAT_PCIXCCMCL2COUNT + * Return an integer count of all MCL2 PCIXCCs. + * + * Z90STAT_PCIXCCMCL3COUNT + * Return an integer count of all MCL3 PCIXCCs. + * + * Z90STAT_CEX2CCOUNT + * Return an integer count of all CEX2Cs. + * + * Z90STAT_CEX2ACOUNT + * Return an integer count of all CEX2As. + * + * ICAZ90STATUS + * Return some device driver status in a ica_z90_status struct + * This takes an ica_z90_status struct as its arg. + * + * Z90STAT_PCIXCCCOUNT + * Return an integer count of all PCIXCCs (MCL2 + MCL3). + * This is DEPRECATED now that MCL3 PCIXCCs are treated differently from + * MCL2 PCIXCCs. + */ + +/** + * Supported ioctl calls + */ +#define ICARSAMODEXPO _IOC(_IOC_READ|_IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x05, 0) +#define ICARSACRT _IOC(_IOC_READ|_IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x06, 0) +#define ZSECSENDCPRB _IOC(_IOC_READ|_IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x81, 0) + +/* New status calls */ +#define Z90STAT_TOTALCOUNT _IOR(ZCRYPT_IOCTL_MAGIC, 0x40, int) +#define Z90STAT_PCICACOUNT _IOR(ZCRYPT_IOCTL_MAGIC, 0x41, int) +#define Z90STAT_PCICCCOUNT _IOR(ZCRYPT_IOCTL_MAGIC, 0x42, int) +#define Z90STAT_PCIXCCMCL2COUNT _IOR(ZCRYPT_IOCTL_MAGIC, 0x4b, int) +#define Z90STAT_PCIXCCMCL3COUNT _IOR(ZCRYPT_IOCTL_MAGIC, 0x4c, int) +#define Z90STAT_CEX2CCOUNT _IOR(ZCRYPT_IOCTL_MAGIC, 0x4d, int) +#define Z90STAT_CEX2ACOUNT _IOR(ZCRYPT_IOCTL_MAGIC, 0x4e, int) +#define Z90STAT_REQUESTQ_COUNT _IOR(ZCRYPT_IOCTL_MAGIC, 0x44, int) +#define Z90STAT_PENDINGQ_COUNT _IOR(ZCRYPT_IOCTL_MAGIC, 0x45, int) +#define Z90STAT_TOTALOPEN_COUNT _IOR(ZCRYPT_IOCTL_MAGIC, 0x46, int) +#define Z90STAT_DOMAIN_INDEX _IOR(ZCRYPT_IOCTL_MAGIC, 0x47, int) +#define Z90STAT_STATUS_MASK _IOR(ZCRYPT_IOCTL_MAGIC, 0x48, char[64]) +#define Z90STAT_QDEPTH_MASK _IOR(ZCRYPT_IOCTL_MAGIC, 0x49, char[64]) +#define Z90STAT_PERDEV_REQCNT _IOR(ZCRYPT_IOCTL_MAGIC, 0x4a, int[64]) + +#endif /* __ASM_S390_ZCRYPT_H */ diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile new file mode 100644 index 00000000..884b18af --- /dev/null +++ b/arch/s390/kernel/Makefile @@ -0,0 +1,61 @@ +# +# Makefile for the linux kernel. +# + +ifdef CONFIG_FUNCTION_TRACER +# Don't trace early setup code and tracing code +CFLAGS_REMOVE_early.o = -pg +CFLAGS_REMOVE_ftrace.o = -pg +endif + +# +# Passing null pointers is ok for smp code, since we access the lowcore here. +# +CFLAGS_smp.o := -Wno-nonnull + +# +# Pass UTS_MACHINE for user_regset definition +# +CFLAGS_ptrace.o += -DUTS_MACHINE='"$(UTS_MACHINE)"' + +CFLAGS_sysinfo.o += -Iinclude/math-emu -Iarch/s390/math-emu -w + +obj-y := bitmap.o traps.o time.o process.o base.o early.o setup.o vtime.o \ + processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o \ + debug.o irq.o ipl.o dis.o diag.o mem_detect.o sclp.o vdso.o \ + sysinfo.o jump_label.o lgr.o os_info.o + +obj-y += $(if $(CONFIG_64BIT),entry64.o,entry.o) +obj-y += $(if $(CONFIG_64BIT),reipl64.o,reipl.o) + +extra-y += head.o init_task.o vmlinux.lds +extra-y += $(if $(CONFIG_64BIT),head64.o,head31.o) + +obj-$(CONFIG_MODULES) += s390_ksyms.o module.o +obj-$(CONFIG_SMP) += smp.o +obj-$(CONFIG_SCHED_BOOK) += topology.o +obj-$(CONFIG_HIBERNATION) += suspend.o swsusp_asm64.o +obj-$(CONFIG_AUDIT) += audit.o +compat-obj-$(CONFIG_AUDIT) += compat_audit.o +obj-$(CONFIG_COMPAT) += compat_linux.o compat_signal.o \ + compat_wrapper.o compat_exec_domain.o \ + $(compat-obj-y) + +obj-$(CONFIG_STACKTRACE) += stacktrace.o +obj-$(CONFIG_KPROBES) += kprobes.o +obj-$(CONFIG_FUNCTION_TRACER) += $(if $(CONFIG_64BIT),mcount64.o,mcount.o) +obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o +obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o +obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o +obj-$(CONFIG_CRASH_DUMP) += crash_dump.o +obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_cpum_cf.o + +# Kexec part +S390_KEXEC_OBJS := machine_kexec.o crash.o +S390_KEXEC_OBJS += $(if $(CONFIG_64BIT),relocate_kernel64.o,relocate_kernel.o) +obj-$(CONFIG_KEXEC) += $(S390_KEXEC_OBJS) + +# vdso +obj-$(CONFIG_64BIT) += vdso64/ +obj-$(CONFIG_32BIT) += vdso32/ +obj-$(CONFIG_COMPAT) += vdso32/ diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c new file mode 100644 index 00000000..83e6edf5 --- /dev/null +++ b/arch/s390/kernel/asm-offsets.c @@ -0,0 +1,163 @@ +/* + * Generate definitions needed by assembly language modules. + * This code generates raw asm output which is post-processed to extract + * and format the required data. + */ + +#define ASM_OFFSETS_C + +#include <linux/kbuild.h> +#include <linux/sched.h> +#include <asm/cputime.h> +#include <asm/timer.h> +#include <asm/vdso.h> +#include <asm/pgtable.h> + +/* + * Make sure that the compiler is new enough. We want a compiler that + * is known to work with the "Q" assembler constraint. + */ +#if __GNUC__ < 3 || (__GNUC__ == 3 && __GNUC_MINOR__ < 3) +#error Your compiler is too old; please use version 3.3.3 or newer +#endif + +int main(void) +{ + DEFINE(__THREAD_info, offsetof(struct task_struct, stack)); + DEFINE(__THREAD_ksp, offsetof(struct task_struct, thread.ksp)); + DEFINE(__THREAD_mm_segment, offsetof(struct task_struct, thread.mm_segment)); + BLANK(); + DEFINE(__TASK_pid, offsetof(struct task_struct, pid)); + BLANK(); + DEFINE(__THREAD_per_cause, offsetof(struct task_struct, thread.per_event.cause)); + DEFINE(__THREAD_per_address, offsetof(struct task_struct, thread.per_event.address)); + DEFINE(__THREAD_per_paid, offsetof(struct task_struct, thread.per_event.paid)); + BLANK(); + DEFINE(__TI_task, offsetof(struct thread_info, task)); + DEFINE(__TI_domain, offsetof(struct thread_info, exec_domain)); + DEFINE(__TI_flags, offsetof(struct thread_info, flags)); + DEFINE(__TI_cpu, offsetof(struct thread_info, cpu)); + DEFINE(__TI_precount, offsetof(struct thread_info, preempt_count)); + DEFINE(__TI_user_timer, offsetof(struct thread_info, user_timer)); + DEFINE(__TI_system_timer, offsetof(struct thread_info, system_timer)); + DEFINE(__TI_last_break, offsetof(struct thread_info, last_break)); + BLANK(); + DEFINE(__PT_ARGS, offsetof(struct pt_regs, args)); + DEFINE(__PT_PSW, offsetof(struct pt_regs, psw)); + DEFINE(__PT_GPRS, offsetof(struct pt_regs, gprs)); + DEFINE(__PT_ORIG_GPR2, offsetof(struct pt_regs, orig_gpr2)); + DEFINE(__PT_INT_CODE, offsetof(struct pt_regs, int_code)); + DEFINE(__PT_INT_PARM_LONG, offsetof(struct pt_regs, int_parm_long)); + DEFINE(__PT_SIZE, sizeof(struct pt_regs)); + BLANK(); + DEFINE(__SF_BACKCHAIN, offsetof(struct stack_frame, back_chain)); + DEFINE(__SF_GPRS, offsetof(struct stack_frame, gprs)); + DEFINE(__SF_EMPTY, offsetof(struct stack_frame, empty1)); + BLANK(); + /* timeval/timezone offsets for use by vdso */ + DEFINE(__VDSO_UPD_COUNT, offsetof(struct vdso_data, tb_update_count)); + DEFINE(__VDSO_XTIME_STAMP, offsetof(struct vdso_data, xtime_tod_stamp)); + DEFINE(__VDSO_XTIME_SEC, offsetof(struct vdso_data, xtime_clock_sec)); + DEFINE(__VDSO_XTIME_NSEC, offsetof(struct vdso_data, xtime_clock_nsec)); + DEFINE(__VDSO_WTOM_SEC, offsetof(struct vdso_data, wtom_clock_sec)); + DEFINE(__VDSO_WTOM_NSEC, offsetof(struct vdso_data, wtom_clock_nsec)); + DEFINE(__VDSO_TIMEZONE, offsetof(struct vdso_data, tz_minuteswest)); + DEFINE(__VDSO_ECTG_OK, offsetof(struct vdso_data, ectg_available)); + DEFINE(__VDSO_NTP_MULT, offsetof(struct vdso_data, ntp_mult)); + DEFINE(__VDSO_ECTG_BASE, offsetof(struct vdso_per_cpu_data, ectg_timer_base)); + DEFINE(__VDSO_ECTG_USER, offsetof(struct vdso_per_cpu_data, ectg_user_time)); + /* constants used by the vdso */ + DEFINE(__CLOCK_REALTIME, CLOCK_REALTIME); + DEFINE(__CLOCK_MONOTONIC, CLOCK_MONOTONIC); + DEFINE(__CLOCK_REALTIME_RES, MONOTONIC_RES_NSEC); + BLANK(); + /* idle data offsets */ + DEFINE(__IDLE_ENTER, offsetof(struct s390_idle_data, idle_enter)); + DEFINE(__IDLE_EXIT, offsetof(struct s390_idle_data, idle_exit)); + /* vtimer queue offsets */ + DEFINE(__VQ_IDLE_ENTER, offsetof(struct vtimer_queue, idle_enter)); + DEFINE(__VQ_IDLE_EXIT, offsetof(struct vtimer_queue, idle_exit)); + /* lowcore offsets */ + DEFINE(__LC_EXT_PARAMS, offsetof(struct _lowcore, ext_params)); + DEFINE(__LC_EXT_CPU_ADDR, offsetof(struct _lowcore, ext_cpu_addr)); + DEFINE(__LC_EXT_INT_CODE, offsetof(struct _lowcore, ext_int_code)); + DEFINE(__LC_SVC_ILC, offsetof(struct _lowcore, svc_ilc)); + DEFINE(__LC_SVC_INT_CODE, offsetof(struct _lowcore, svc_code)); + DEFINE(__LC_PGM_ILC, offsetof(struct _lowcore, pgm_ilc)); + DEFINE(__LC_PGM_INT_CODE, offsetof(struct _lowcore, pgm_code)); + DEFINE(__LC_TRANS_EXC_CODE, offsetof(struct _lowcore, trans_exc_code)); + DEFINE(__LC_PER_CAUSE, offsetof(struct _lowcore, per_perc_atmid)); + DEFINE(__LC_PER_ADDRESS, offsetof(struct _lowcore, per_address)); + DEFINE(__LC_PER_PAID, offsetof(struct _lowcore, per_access_id)); + DEFINE(__LC_AR_MODE_ID, offsetof(struct _lowcore, ar_access_id)); + DEFINE(__LC_SUBCHANNEL_ID, offsetof(struct _lowcore, subchannel_id)); + DEFINE(__LC_SUBCHANNEL_NR, offsetof(struct _lowcore, subchannel_nr)); + DEFINE(__LC_IO_INT_PARM, offsetof(struct _lowcore, io_int_parm)); + DEFINE(__LC_IO_INT_WORD, offsetof(struct _lowcore, io_int_word)); + DEFINE(__LC_STFL_FAC_LIST, offsetof(struct _lowcore, stfl_fac_list)); + DEFINE(__LC_MCCK_CODE, offsetof(struct _lowcore, mcck_interruption_code)); + DEFINE(__LC_RST_OLD_PSW, offsetof(struct _lowcore, restart_old_psw)); + DEFINE(__LC_EXT_OLD_PSW, offsetof(struct _lowcore, external_old_psw)); + DEFINE(__LC_SVC_OLD_PSW, offsetof(struct _lowcore, svc_old_psw)); + DEFINE(__LC_PGM_OLD_PSW, offsetof(struct _lowcore, program_old_psw)); + DEFINE(__LC_MCK_OLD_PSW, offsetof(struct _lowcore, mcck_old_psw)); + DEFINE(__LC_IO_OLD_PSW, offsetof(struct _lowcore, io_old_psw)); + DEFINE(__LC_RST_NEW_PSW, offsetof(struct _lowcore, restart_psw)); + DEFINE(__LC_EXT_NEW_PSW, offsetof(struct _lowcore, external_new_psw)); + DEFINE(__LC_SVC_NEW_PSW, offsetof(struct _lowcore, svc_new_psw)); + DEFINE(__LC_PGM_NEW_PSW, offsetof(struct _lowcore, program_new_psw)); + DEFINE(__LC_MCK_NEW_PSW, offsetof(struct _lowcore, mcck_new_psw)); + DEFINE(__LC_IO_NEW_PSW, offsetof(struct _lowcore, io_new_psw)); + BLANK(); + DEFINE(__LC_SAVE_AREA_SYNC, offsetof(struct _lowcore, save_area_sync)); + DEFINE(__LC_SAVE_AREA_ASYNC, offsetof(struct _lowcore, save_area_async)); + DEFINE(__LC_SAVE_AREA_RESTART, offsetof(struct _lowcore, save_area_restart)); + DEFINE(__LC_RETURN_PSW, offsetof(struct _lowcore, return_psw)); + DEFINE(__LC_RETURN_MCCK_PSW, offsetof(struct _lowcore, return_mcck_psw)); + DEFINE(__LC_SYNC_ENTER_TIMER, offsetof(struct _lowcore, sync_enter_timer)); + DEFINE(__LC_ASYNC_ENTER_TIMER, offsetof(struct _lowcore, async_enter_timer)); + DEFINE(__LC_MCCK_ENTER_TIMER, offsetof(struct _lowcore, mcck_enter_timer)); + DEFINE(__LC_EXIT_TIMER, offsetof(struct _lowcore, exit_timer)); + DEFINE(__LC_USER_TIMER, offsetof(struct _lowcore, user_timer)); + DEFINE(__LC_SYSTEM_TIMER, offsetof(struct _lowcore, system_timer)); + DEFINE(__LC_STEAL_TIMER, offsetof(struct _lowcore, steal_timer)); + DEFINE(__LC_LAST_UPDATE_TIMER, offsetof(struct _lowcore, last_update_timer)); + DEFINE(__LC_LAST_UPDATE_CLOCK, offsetof(struct _lowcore, last_update_clock)); + DEFINE(__LC_CURRENT, offsetof(struct _lowcore, current_task)); + DEFINE(__LC_CURRENT_PID, offsetof(struct _lowcore, current_pid)); + DEFINE(__LC_THREAD_INFO, offsetof(struct _lowcore, thread_info)); + DEFINE(__LC_KERNEL_STACK, offsetof(struct _lowcore, kernel_stack)); + DEFINE(__LC_ASYNC_STACK, offsetof(struct _lowcore, async_stack)); + DEFINE(__LC_PANIC_STACK, offsetof(struct _lowcore, panic_stack)); + DEFINE(__LC_RESTART_STACK, offsetof(struct _lowcore, restart_stack)); + DEFINE(__LC_RESTART_FN, offsetof(struct _lowcore, restart_fn)); + DEFINE(__LC_USER_ASCE, offsetof(struct _lowcore, user_asce)); + DEFINE(__LC_INT_CLOCK, offsetof(struct _lowcore, int_clock)); + DEFINE(__LC_MCCK_CLOCK, offsetof(struct _lowcore, mcck_clock)); + DEFINE(__LC_MACHINE_FLAGS, offsetof(struct _lowcore, machine_flags)); + DEFINE(__LC_FTRACE_FUNC, offsetof(struct _lowcore, ftrace_func)); + DEFINE(__LC_IRB, offsetof(struct _lowcore, irb)); + DEFINE(__LC_DUMP_REIPL, offsetof(struct _lowcore, ipib)); + BLANK(); + DEFINE(__LC_CPU_TIMER_SAVE_AREA, offsetof(struct _lowcore, cpu_timer_save_area)); + DEFINE(__LC_CLOCK_COMP_SAVE_AREA, offsetof(struct _lowcore, clock_comp_save_area)); + DEFINE(__LC_PSW_SAVE_AREA, offsetof(struct _lowcore, psw_save_area)); + DEFINE(__LC_PREFIX_SAVE_AREA, offsetof(struct _lowcore, prefixreg_save_area)); + DEFINE(__LC_AREGS_SAVE_AREA, offsetof(struct _lowcore, access_regs_save_area)); + DEFINE(__LC_FPREGS_SAVE_AREA, offsetof(struct _lowcore, floating_pt_save_area)); + DEFINE(__LC_GPREGS_SAVE_AREA, offsetof(struct _lowcore, gpregs_save_area)); + DEFINE(__LC_CREGS_SAVE_AREA, offsetof(struct _lowcore, cregs_save_area)); +#ifdef CONFIG_32BIT + DEFINE(SAVE_AREA_BASE, offsetof(struct _lowcore, extended_save_area_addr)); +#else /* CONFIG_32BIT */ + DEFINE(__LC_EXT_PARAMS2, offsetof(struct _lowcore, ext_params2)); + DEFINE(SAVE_AREA_BASE, offsetof(struct _lowcore, floating_pt_save_area)); + DEFINE(__LC_PASTE, offsetof(struct _lowcore, paste)); + DEFINE(__LC_FP_CREG_SAVE_AREA, offsetof(struct _lowcore, fpt_creg_save_area)); + DEFINE(__LC_LAST_BREAK, offsetof(struct _lowcore, breaking_event_addr)); + DEFINE(__LC_VDSO_PER_CPU, offsetof(struct _lowcore, vdso_per_cpu_data)); + DEFINE(__LC_GMAP, offsetof(struct _lowcore, gmap)); + DEFINE(__GMAP_ASCE, offsetof(struct gmap, asce)); +#endif /* CONFIG_32BIT */ + return 0; +} diff --git a/arch/s390/kernel/audit.c b/arch/s390/kernel/audit.c new file mode 100644 index 00000000..f4932c22 --- /dev/null +++ b/arch/s390/kernel/audit.c @@ -0,0 +1,78 @@ +#include <linux/init.h> +#include <linux/types.h> +#include <linux/audit.h> +#include <asm/unistd.h> +#include "audit.h" + +static unsigned dir_class[] = { +#include <asm-generic/audit_dir_write.h> +~0U +}; + +static unsigned read_class[] = { +#include <asm-generic/audit_read.h> +~0U +}; + +static unsigned write_class[] = { +#include <asm-generic/audit_write.h> +~0U +}; + +static unsigned chattr_class[] = { +#include <asm-generic/audit_change_attr.h> +~0U +}; + +static unsigned signal_class[] = { +#include <asm-generic/audit_signal.h> +~0U +}; + +int audit_classify_arch(int arch) +{ +#ifdef CONFIG_COMPAT + if (arch == AUDIT_ARCH_S390) + return 1; +#endif + return 0; +} + +int audit_classify_syscall(int abi, unsigned syscall) +{ +#ifdef CONFIG_COMPAT + if (abi == AUDIT_ARCH_S390) + return s390_classify_syscall(syscall); +#endif + switch(syscall) { + case __NR_open: + return 2; + case __NR_openat: + return 3; + case __NR_socketcall: + return 4; + case __NR_execve: + return 5; + default: + return 0; + } +} + +static int __init audit_classes_init(void) +{ +#ifdef CONFIG_COMPAT + audit_register_class(AUDIT_CLASS_WRITE_32, s390_write_class); + audit_register_class(AUDIT_CLASS_READ_32, s390_read_class); + audit_register_class(AUDIT_CLASS_DIR_WRITE_32, s390_dir_class); + audit_register_class(AUDIT_CLASS_CHATTR_32, s390_chattr_class); + audit_register_class(AUDIT_CLASS_SIGNAL_32, s390_signal_class); +#endif + audit_register_class(AUDIT_CLASS_WRITE, write_class); + audit_register_class(AUDIT_CLASS_READ, read_class); + audit_register_class(AUDIT_CLASS_DIR_WRITE, dir_class); + audit_register_class(AUDIT_CLASS_CHATTR, chattr_class); + audit_register_class(AUDIT_CLASS_SIGNAL, signal_class); + return 0; +} + +__initcall(audit_classes_init); diff --git a/arch/s390/kernel/audit.h b/arch/s390/kernel/audit.h new file mode 100644 index 00000000..12b56f4b --- /dev/null +++ b/arch/s390/kernel/audit.h @@ -0,0 +1,15 @@ +#ifndef __ARCH_S390_KERNEL_AUDIT_H +#define __ARCH_S390_KERNEL_AUDIT_H + +#include <linux/types.h> + +#ifdef CONFIG_COMPAT +extern int s390_classify_syscall(unsigned); +extern __u32 s390_dir_class[]; +extern __u32 s390_write_class[]; +extern __u32 s390_read_class[]; +extern __u32 s390_chattr_class[]; +extern __u32 s390_signal_class[]; +#endif /* CONFIG_COMPAT */ + +#endif /* __ARCH_S390_KERNEL_AUDIT_H */ diff --git a/arch/s390/kernel/base.S b/arch/s390/kernel/base.S new file mode 100644 index 00000000..3aa4d00a --- /dev/null +++ b/arch/s390/kernel/base.S @@ -0,0 +1,193 @@ +/* + * arch/s390/kernel/base.S + * + * Copyright IBM Corp. 2006,2007 + * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com> + * Michael Holzheu <holzheu@de.ibm.com> + */ + +#include <linux/linkage.h> +#include <asm/asm-offsets.h> +#include <asm/ptrace.h> + +#ifdef CONFIG_64BIT + +ENTRY(s390_base_mcck_handler) + basr %r13,0 +0: lg %r15,__LC_PANIC_STACK # load panic stack + aghi %r15,-STACK_FRAME_OVERHEAD + larl %r1,s390_base_mcck_handler_fn + lg %r1,0(%r1) + ltgr %r1,%r1 + jz 1f + basr %r14,%r1 +1: la %r1,4095 + lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1) + lpswe __LC_MCK_OLD_PSW + + .section .bss + .align 8 + .globl s390_base_mcck_handler_fn +s390_base_mcck_handler_fn: + .quad 0 + .previous + +ENTRY(s390_base_ext_handler) + stmg %r0,%r15,__LC_SAVE_AREA_ASYNC + basr %r13,0 +0: aghi %r15,-STACK_FRAME_OVERHEAD + larl %r1,s390_base_ext_handler_fn + lg %r1,0(%r1) + ltgr %r1,%r1 + jz 1f + basr %r14,%r1 +1: lmg %r0,%r15,__LC_SAVE_AREA_ASYNC + ni __LC_EXT_OLD_PSW+1,0xfd # clear wait state bit + lpswe __LC_EXT_OLD_PSW + + .section .bss + .align 8 + .globl s390_base_ext_handler_fn +s390_base_ext_handler_fn: + .quad 0 + .previous + +ENTRY(s390_base_pgm_handler) + stmg %r0,%r15,__LC_SAVE_AREA_SYNC + basr %r13,0 +0: aghi %r15,-STACK_FRAME_OVERHEAD + larl %r1,s390_base_pgm_handler_fn + lg %r1,0(%r1) + ltgr %r1,%r1 + jz 1f + basr %r14,%r1 + lmg %r0,%r15,__LC_SAVE_AREA_SYNC + lpswe __LC_PGM_OLD_PSW +1: lpswe disabled_wait_psw-0b(%r13) + + .align 8 +disabled_wait_psw: + .quad 0x0002000180000000,0x0000000000000000 + s390_base_pgm_handler + + .section .bss + .align 8 + .globl s390_base_pgm_handler_fn +s390_base_pgm_handler_fn: + .quad 0 + .previous + +# +# Calls diag 308 subcode 1 and continues execution +# +# The following conditions must be ensured before calling this function: +# * Prefix register = 0 +# * Lowcore protection is disabled +# +ENTRY(diag308_reset) + larl %r4,.Lctlregs # Save control registers + stctg %c0,%c15,0(%r4) + larl %r4,.Lfpctl # Floating point control register + stfpc 0(%r4) + larl %r4,.Lrestart_psw # Setup restart PSW at absolute 0 + lghi %r3,0 + lg %r4,0(%r4) # Save PSW + sturg %r4,%r3 # Use sturg, because of large pages + lghi %r1,1 + diag %r1,%r1,0x308 +.Lrestart_part2: + lhi %r0,0 # Load r0 with zero + lhi %r1,2 # Use mode 2 = ESAME (dump) + sigp %r1,%r0,0x12 # Switch to ESAME mode + sam64 # Switch to 64 bit addressing mode + larl %r4,.Lctlregs # Restore control registers + lctlg %c0,%c15,0(%r4) + larl %r4,.Lfpctl # Restore floating point ctl register + lfpc 0(%r4) + br %r14 +.align 16 +.Lrestart_psw: + .long 0x00080000,0x80000000 + .Lrestart_part2 + + .section .bss +.align 8 +.Lctlregs: + .rept 16 + .quad 0 + .endr +.Lfpctl: + .long 0 + .previous + +#else /* CONFIG_64BIT */ + +ENTRY(s390_base_mcck_handler) + basr %r13,0 +0: l %r15,__LC_PANIC_STACK # load panic stack + ahi %r15,-STACK_FRAME_OVERHEAD + l %r1,2f-0b(%r13) + l %r1,0(%r1) + ltr %r1,%r1 + jz 1f + basr %r14,%r1 +1: lm %r0,%r15,__LC_GPREGS_SAVE_AREA + lpsw __LC_MCK_OLD_PSW + +2: .long s390_base_mcck_handler_fn + + .section .bss + .align 4 + .globl s390_base_mcck_handler_fn +s390_base_mcck_handler_fn: + .long 0 + .previous + +ENTRY(s390_base_ext_handler) + stm %r0,%r15,__LC_SAVE_AREA_ASYNC + basr %r13,0 +0: ahi %r15,-STACK_FRAME_OVERHEAD + l %r1,2f-0b(%r13) + l %r1,0(%r1) + ltr %r1,%r1 + jz 1f + basr %r14,%r1 +1: lm %r0,%r15,__LC_SAVE_AREA_ASYNC + ni __LC_EXT_OLD_PSW+1,0xfd # clear wait state bit + lpsw __LC_EXT_OLD_PSW + +2: .long s390_base_ext_handler_fn + + .section .bss + .align 4 + .globl s390_base_ext_handler_fn +s390_base_ext_handler_fn: + .long 0 + .previous + +ENTRY(s390_base_pgm_handler) + stm %r0,%r15,__LC_SAVE_AREA_SYNC + basr %r13,0 +0: ahi %r15,-STACK_FRAME_OVERHEAD + l %r1,2f-0b(%r13) + l %r1,0(%r1) + ltr %r1,%r1 + jz 1f + basr %r14,%r1 + lm %r0,%r15,__LC_SAVE_AREA_SYNC + lpsw __LC_PGM_OLD_PSW + +1: lpsw disabled_wait_psw-0b(%r13) + +2: .long s390_base_pgm_handler_fn + +disabled_wait_psw: + .align 8 + .long 0x000a0000,0x00000000 + s390_base_pgm_handler + + .section .bss + .align 4 + .globl s390_base_pgm_handler_fn +s390_base_pgm_handler_fn: + .long 0 + .previous + +#endif /* CONFIG_64BIT */ diff --git a/arch/s390/kernel/bitmap.c b/arch/s390/kernel/bitmap.c new file mode 100644 index 00000000..3ae4757b --- /dev/null +++ b/arch/s390/kernel/bitmap.c @@ -0,0 +1,54 @@ +/* + * Bitmaps for set_bit, clear_bit, test_and_set_bit, ... + * See include/asm/{bitops.h|posix_types.h} for details + * + * Copyright IBM Corp. 1999,2009 + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>, + */ + +#include <linux/bitops.h> +#include <linux/module.h> + +const char _oi_bitmap[] = { 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80 }; +EXPORT_SYMBOL(_oi_bitmap); + +const char _ni_bitmap[] = { 0xfe, 0xfd, 0xfb, 0xf7, 0xef, 0xdf, 0xbf, 0x7f }; +EXPORT_SYMBOL(_ni_bitmap); + +const char _zb_findmap[] = { + 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4, + 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5, + 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4, + 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,6, + 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4, + 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5, + 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4, + 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,7, + 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4, + 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5, + 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4, + 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,6, + 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4, + 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5, + 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4, + 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,8 }; +EXPORT_SYMBOL(_zb_findmap); + +const char _sb_findmap[] = { + 8,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, + 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, + 5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, + 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, + 6,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, + 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, + 5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, + 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, + 7,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, + 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, + 5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, + 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, + 6,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, + 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, + 5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, + 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0 }; +EXPORT_SYMBOL(_sb_findmap); diff --git a/arch/s390/kernel/compat_audit.c b/arch/s390/kernel/compat_audit.c new file mode 100644 index 00000000..d6487bf8 --- /dev/null +++ b/arch/s390/kernel/compat_audit.c @@ -0,0 +1,44 @@ +#undef __s390x__ +#include <asm/unistd.h> +#include "audit.h" + +unsigned s390_dir_class[] = { +#include <asm-generic/audit_dir_write.h> +~0U +}; + +unsigned s390_chattr_class[] = { +#include <asm-generic/audit_change_attr.h> +~0U +}; + +unsigned s390_write_class[] = { +#include <asm-generic/audit_write.h> +~0U +}; + +unsigned s390_read_class[] = { +#include <asm-generic/audit_read.h> +~0U +}; + +unsigned s390_signal_class[] = { +#include <asm-generic/audit_signal.h> +~0U +}; + +int s390_classify_syscall(unsigned syscall) +{ + switch(syscall) { + case __NR_open: + return 2; + case __NR_openat: + return 3; + case __NR_socketcall: + return 4; + case __NR_execve: + return 5; + default: + return 1; + } +} diff --git a/arch/s390/kernel/compat_exec_domain.c b/arch/s390/kernel/compat_exec_domain.c new file mode 100644 index 00000000..914d4944 --- /dev/null +++ b/arch/s390/kernel/compat_exec_domain.c @@ -0,0 +1,29 @@ +/* + * Support for 32-bit Linux for S390 personality. + * + * Copyright (C) 2000 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Author(s): Gerhard Tonn (ton@de.ibm.com) + * + * + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/personality.h> +#include <linux/sched.h> + +static struct exec_domain s390_exec_domain; + +static int __init s390_init (void) +{ + s390_exec_domain.name = "Linux/s390"; + s390_exec_domain.handler = NULL; + s390_exec_domain.pers_low = PER_LINUX32; + s390_exec_domain.pers_high = PER_LINUX32; + s390_exec_domain.signal_map = default_exec_domain.signal_map; + s390_exec_domain.signal_invmap = default_exec_domain.signal_invmap; + register_exec_domain(&s390_exec_domain); + return 0; +} + +__initcall(s390_init); diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c new file mode 100644 index 00000000..ab64bdba --- /dev/null +++ b/arch/s390/kernel/compat_linux.c @@ -0,0 +1,681 @@ +/* + * arch/s390x/kernel/linux32.c + * + * S390 version + * Copyright (C) 2000 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), + * Gerhard Tonn (ton@de.ibm.com) + * Thomas Spatzier (tspat@de.ibm.com) + * + * Conversion between 31bit and 64bit native syscalls. + * + * Heavily inspired by the 32-bit Sparc compat code which is + * Copyright (C) 1997,1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz) + * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu) + * + */ + + +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/fs.h> +#include <linux/mm.h> +#include <linux/file.h> +#include <linux/signal.h> +#include <linux/resource.h> +#include <linux/times.h> +#include <linux/smp.h> +#include <linux/sem.h> +#include <linux/msg.h> +#include <linux/shm.h> +#include <linux/uio.h> +#include <linux/quota.h> +#include <linux/module.h> +#include <linux/poll.h> +#include <linux/personality.h> +#include <linux/stat.h> +#include <linux/filter.h> +#include <linux/highmem.h> +#include <linux/highuid.h> +#include <linux/mman.h> +#include <linux/ipv6.h> +#include <linux/in.h> +#include <linux/icmpv6.h> +#include <linux/syscalls.h> +#include <linux/sysctl.h> +#include <linux/binfmts.h> +#include <linux/capability.h> +#include <linux/compat.h> +#include <linux/vfs.h> +#include <linux/ptrace.h> +#include <linux/fadvise.h> +#include <linux/ipc.h> +#include <linux/slab.h> + +#include <asm/types.h> +#include <asm/uaccess.h> + +#include <net/scm.h> +#include <net/sock.h> + +#include "compat_linux.h" + +u32 psw32_user_bits = PSW32_MASK_DAT | PSW32_MASK_IO | PSW32_MASK_EXT | + PSW32_DEFAULT_KEY | PSW32_MASK_BASE | PSW32_MASK_MCHECK | + PSW32_MASK_PSTATE | PSW32_ASC_HOME; + +/* For this source file, we want overflow handling. */ + +#undef high2lowuid +#undef high2lowgid +#undef low2highuid +#undef low2highgid +#undef SET_UID16 +#undef SET_GID16 +#undef NEW_TO_OLD_UID +#undef NEW_TO_OLD_GID +#undef SET_OLDSTAT_UID +#undef SET_OLDSTAT_GID +#undef SET_STAT_UID +#undef SET_STAT_GID + +#define high2lowuid(uid) ((uid) > 65535) ? (u16)overflowuid : (u16)(uid) +#define high2lowgid(gid) ((gid) > 65535) ? (u16)overflowgid : (u16)(gid) +#define low2highuid(uid) ((uid) == (u16)-1) ? (uid_t)-1 : (uid_t)(uid) +#define low2highgid(gid) ((gid) == (u16)-1) ? (gid_t)-1 : (gid_t)(gid) +#define SET_UID16(var, uid) var = high2lowuid(uid) +#define SET_GID16(var, gid) var = high2lowgid(gid) +#define NEW_TO_OLD_UID(uid) high2lowuid(uid) +#define NEW_TO_OLD_GID(gid) high2lowgid(gid) +#define SET_OLDSTAT_UID(stat, uid) (stat).st_uid = high2lowuid(uid) +#define SET_OLDSTAT_GID(stat, gid) (stat).st_gid = high2lowgid(gid) +#define SET_STAT_UID(stat, uid) (stat).st_uid = high2lowuid(uid) +#define SET_STAT_GID(stat, gid) (stat).st_gid = high2lowgid(gid) + +asmlinkage long sys32_chown16(const char __user * filename, u16 user, u16 group) +{ + return sys_chown(filename, low2highuid(user), low2highgid(group)); +} + +asmlinkage long sys32_lchown16(const char __user * filename, u16 user, u16 group) +{ + return sys_lchown(filename, low2highuid(user), low2highgid(group)); +} + +asmlinkage long sys32_fchown16(unsigned int fd, u16 user, u16 group) +{ + return sys_fchown(fd, low2highuid(user), low2highgid(group)); +} + +asmlinkage long sys32_setregid16(u16 rgid, u16 egid) +{ + return sys_setregid(low2highgid(rgid), low2highgid(egid)); +} + +asmlinkage long sys32_setgid16(u16 gid) +{ + return sys_setgid((gid_t)gid); +} + +asmlinkage long sys32_setreuid16(u16 ruid, u16 euid) +{ + return sys_setreuid(low2highuid(ruid), low2highuid(euid)); +} + +asmlinkage long sys32_setuid16(u16 uid) +{ + return sys_setuid((uid_t)uid); +} + +asmlinkage long sys32_setresuid16(u16 ruid, u16 euid, u16 suid) +{ + return sys_setresuid(low2highuid(ruid), low2highuid(euid), + low2highuid(suid)); +} + +asmlinkage long sys32_getresuid16(u16 __user *ruid, u16 __user *euid, u16 __user *suid) +{ + int retval; + + if (!(retval = put_user(high2lowuid(current->cred->uid), ruid)) && + !(retval = put_user(high2lowuid(current->cred->euid), euid))) + retval = put_user(high2lowuid(current->cred->suid), suid); + + return retval; +} + +asmlinkage long sys32_setresgid16(u16 rgid, u16 egid, u16 sgid) +{ + return sys_setresgid(low2highgid(rgid), low2highgid(egid), + low2highgid(sgid)); +} + +asmlinkage long sys32_getresgid16(u16 __user *rgid, u16 __user *egid, u16 __user *sgid) +{ + int retval; + + if (!(retval = put_user(high2lowgid(current->cred->gid), rgid)) && + !(retval = put_user(high2lowgid(current->cred->egid), egid))) + retval = put_user(high2lowgid(current->cred->sgid), sgid); + + return retval; +} + +asmlinkage long sys32_setfsuid16(u16 uid) +{ + return sys_setfsuid((uid_t)uid); +} + +asmlinkage long sys32_setfsgid16(u16 gid) +{ + return sys_setfsgid((gid_t)gid); +} + +static int groups16_to_user(u16 __user *grouplist, struct group_info *group_info) +{ + int i; + u16 group; + + for (i = 0; i < group_info->ngroups; i++) { + group = (u16)GROUP_AT(group_info, i); + if (put_user(group, grouplist+i)) + return -EFAULT; + } + + return 0; +} + +static int groups16_from_user(struct group_info *group_info, u16 __user *grouplist) +{ + int i; + u16 group; + + for (i = 0; i < group_info->ngroups; i++) { + if (get_user(group, grouplist+i)) + return -EFAULT; + GROUP_AT(group_info, i) = (gid_t)group; + } + + return 0; +} + +asmlinkage long sys32_getgroups16(int gidsetsize, u16 __user *grouplist) +{ + int i; + + if (gidsetsize < 0) + return -EINVAL; + + get_group_info(current->cred->group_info); + i = current->cred->group_info->ngroups; + if (gidsetsize) { + if (i > gidsetsize) { + i = -EINVAL; + goto out; + } + if (groups16_to_user(grouplist, current->cred->group_info)) { + i = -EFAULT; + goto out; + } + } +out: + put_group_info(current->cred->group_info); + return i; +} + +asmlinkage long sys32_setgroups16(int gidsetsize, u16 __user *grouplist) +{ + struct group_info *group_info; + int retval; + + if (!capable(CAP_SETGID)) + return -EPERM; + if ((unsigned)gidsetsize > NGROUPS_MAX) + return -EINVAL; + + group_info = groups_alloc(gidsetsize); + if (!group_info) + return -ENOMEM; + retval = groups16_from_user(group_info, grouplist); + if (retval) { + put_group_info(group_info); + return retval; + } + + retval = set_current_groups(group_info); + put_group_info(group_info); + + return retval; +} + +asmlinkage long sys32_getuid16(void) +{ + return high2lowuid(current->cred->uid); +} + +asmlinkage long sys32_geteuid16(void) +{ + return high2lowuid(current->cred->euid); +} + +asmlinkage long sys32_getgid16(void) +{ + return high2lowgid(current->cred->gid); +} + +asmlinkage long sys32_getegid16(void) +{ + return high2lowgid(current->cred->egid); +} + +/* + * sys32_ipc() is the de-multiplexer for the SysV IPC calls in 32bit emulation. + * + * This is really horribly ugly. + */ +#ifdef CONFIG_SYSVIPC +asmlinkage long sys32_ipc(u32 call, int first, int second, int third, u32 ptr) +{ + if (call >> 16) /* hack for backward compatibility */ + return -EINVAL; + switch (call) { + case SEMTIMEDOP: + return compat_sys_semtimedop(first, compat_ptr(ptr), + second, compat_ptr(third)); + case SEMOP: + /* struct sembuf is the same on 32 and 64bit :)) */ + return sys_semtimedop(first, compat_ptr(ptr), + second, NULL); + case SEMGET: + return sys_semget(first, second, third); + case SEMCTL: + return compat_sys_semctl(first, second, third, + compat_ptr(ptr)); + case MSGSND: + return compat_sys_msgsnd(first, second, third, + compat_ptr(ptr)); + case MSGRCV: + return compat_sys_msgrcv(first, second, 0, third, + 0, compat_ptr(ptr)); + case MSGGET: + return sys_msgget((key_t) first, second); + case MSGCTL: + return compat_sys_msgctl(first, second, compat_ptr(ptr)); + case SHMAT: + return compat_sys_shmat(first, second, third, + 0, compat_ptr(ptr)); + case SHMDT: + return sys_shmdt(compat_ptr(ptr)); + case SHMGET: + return sys_shmget(first, (unsigned)second, third); + case SHMCTL: + return compat_sys_shmctl(first, second, compat_ptr(ptr)); + } + + return -ENOSYS; +} +#endif + +asmlinkage long sys32_truncate64(const char __user * path, unsigned long high, unsigned long low) +{ + if ((int)high < 0) + return -EINVAL; + else + return sys_truncate(path, (high << 32) | low); +} + +asmlinkage long sys32_ftruncate64(unsigned int fd, unsigned long high, unsigned long low) +{ + if ((int)high < 0) + return -EINVAL; + else + return sys_ftruncate(fd, (high << 32) | low); +} + +asmlinkage long sys32_sched_rr_get_interval(compat_pid_t pid, + struct compat_timespec __user *interval) +{ + struct timespec t; + int ret; + mm_segment_t old_fs = get_fs (); + + set_fs (KERNEL_DS); + ret = sys_sched_rr_get_interval(pid, + (struct timespec __force __user *) &t); + set_fs (old_fs); + if (put_compat_timespec(&t, interval)) + return -EFAULT; + return ret; +} + +asmlinkage long sys32_rt_sigprocmask(int how, compat_sigset_t __user *set, + compat_sigset_t __user *oset, size_t sigsetsize) +{ + sigset_t s; + compat_sigset_t s32; + int ret; + mm_segment_t old_fs = get_fs(); + + if (set) { + if (copy_from_user (&s32, set, sizeof(compat_sigset_t))) + return -EFAULT; + s.sig[0] = s32.sig[0] | (((long)s32.sig[1]) << 32); + } + set_fs (KERNEL_DS); + ret = sys_rt_sigprocmask(how, + set ? (sigset_t __force __user *) &s : NULL, + oset ? (sigset_t __force __user *) &s : NULL, + sigsetsize); + set_fs (old_fs); + if (ret) return ret; + if (oset) { + s32.sig[1] = (s.sig[0] >> 32); + s32.sig[0] = s.sig[0]; + if (copy_to_user (oset, &s32, sizeof(compat_sigset_t))) + return -EFAULT; + } + return 0; +} + +asmlinkage long sys32_rt_sigpending(compat_sigset_t __user *set, + size_t sigsetsize) +{ + sigset_t s; + compat_sigset_t s32; + int ret; + mm_segment_t old_fs = get_fs(); + + set_fs (KERNEL_DS); + ret = sys_rt_sigpending((sigset_t __force __user *) &s, sigsetsize); + set_fs (old_fs); + if (!ret) { + s32.sig[1] = (s.sig[0] >> 32); + s32.sig[0] = s.sig[0]; + if (copy_to_user (set, &s32, sizeof(compat_sigset_t))) + return -EFAULT; + } + return ret; +} + +asmlinkage long +sys32_rt_sigqueueinfo(int pid, int sig, compat_siginfo_t __user *uinfo) +{ + siginfo_t info; + int ret; + mm_segment_t old_fs = get_fs(); + + if (copy_siginfo_from_user32(&info, uinfo)) + return -EFAULT; + set_fs (KERNEL_DS); + ret = sys_rt_sigqueueinfo(pid, sig, (siginfo_t __force __user *) &info); + set_fs (old_fs); + return ret; +} + +/* + * sys32_execve() executes a new program after the asm stub has set + * things up for us. This should basically do what I want it to. + */ +asmlinkage long sys32_execve(const char __user *name, compat_uptr_t __user *argv, + compat_uptr_t __user *envp) +{ + struct pt_regs *regs = task_pt_regs(current); + char *filename; + long rc; + + filename = getname(name); + rc = PTR_ERR(filename); + if (IS_ERR(filename)) + return rc; + rc = compat_do_execve(filename, argv, envp, regs); + if (rc) + goto out; + current->thread.fp_regs.fpc=0; + asm volatile("sfpc %0,0" : : "d" (0)); + rc = regs->gprs[2]; +out: + putname(filename); + return rc; +} + +asmlinkage long sys32_pread64(unsigned int fd, char __user *ubuf, + size_t count, u32 poshi, u32 poslo) +{ + if ((compat_ssize_t) count < 0) + return -EINVAL; + return sys_pread64(fd, ubuf, count, ((loff_t)AA(poshi) << 32) | AA(poslo)); +} + +asmlinkage long sys32_pwrite64(unsigned int fd, const char __user *ubuf, + size_t count, u32 poshi, u32 poslo) +{ + if ((compat_ssize_t) count < 0) + return -EINVAL; + return sys_pwrite64(fd, ubuf, count, ((loff_t)AA(poshi) << 32) | AA(poslo)); +} + +asmlinkage compat_ssize_t sys32_readahead(int fd, u32 offhi, u32 offlo, s32 count) +{ + return sys_readahead(fd, ((loff_t)AA(offhi) << 32) | AA(offlo), count); +} + +asmlinkage long sys32_sendfile(int out_fd, int in_fd, compat_off_t __user *offset, size_t count) +{ + mm_segment_t old_fs = get_fs(); + int ret; + off_t of; + + if (offset && get_user(of, offset)) + return -EFAULT; + + set_fs(KERNEL_DS); + ret = sys_sendfile(out_fd, in_fd, + offset ? (off_t __force __user *) &of : NULL, count); + set_fs(old_fs); + + if (offset && put_user(of, offset)) + return -EFAULT; + + return ret; +} + +asmlinkage long sys32_sendfile64(int out_fd, int in_fd, + compat_loff_t __user *offset, s32 count) +{ + mm_segment_t old_fs = get_fs(); + int ret; + loff_t lof; + + if (offset && get_user(lof, offset)) + return -EFAULT; + + set_fs(KERNEL_DS); + ret = sys_sendfile64(out_fd, in_fd, + offset ? (loff_t __force __user *) &lof : NULL, + count); + set_fs(old_fs); + + if (offset && put_user(lof, offset)) + return -EFAULT; + + return ret; +} + +struct stat64_emu31 { + unsigned long long st_dev; + unsigned int __pad1; +#define STAT64_HAS_BROKEN_ST_INO 1 + u32 __st_ino; + unsigned int st_mode; + unsigned int st_nlink; + u32 st_uid; + u32 st_gid; + unsigned long long st_rdev; + unsigned int __pad3; + long st_size; + u32 st_blksize; + unsigned char __pad4[4]; + u32 __pad5; /* future possible st_blocks high bits */ + u32 st_blocks; /* Number 512-byte blocks allocated. */ + u32 st_atime; + u32 __pad6; + u32 st_mtime; + u32 __pad7; + u32 st_ctime; + u32 __pad8; /* will be high 32 bits of ctime someday */ + unsigned long st_ino; +}; + +static int cp_stat64(struct stat64_emu31 __user *ubuf, struct kstat *stat) +{ + struct stat64_emu31 tmp; + + memset(&tmp, 0, sizeof(tmp)); + + tmp.st_dev = huge_encode_dev(stat->dev); + tmp.st_ino = stat->ino; + tmp.__st_ino = (u32)stat->ino; + tmp.st_mode = stat->mode; + tmp.st_nlink = (unsigned int)stat->nlink; + tmp.st_uid = stat->uid; + tmp.st_gid = stat->gid; + tmp.st_rdev = huge_encode_dev(stat->rdev); + tmp.st_size = stat->size; + tmp.st_blksize = (u32)stat->blksize; + tmp.st_blocks = (u32)stat->blocks; + tmp.st_atime = (u32)stat->atime.tv_sec; + tmp.st_mtime = (u32)stat->mtime.tv_sec; + tmp.st_ctime = (u32)stat->ctime.tv_sec; + + return copy_to_user(ubuf,&tmp,sizeof(tmp)) ? -EFAULT : 0; +} + +asmlinkage long sys32_stat64(const char __user * filename, struct stat64_emu31 __user * statbuf) +{ + struct kstat stat; + int ret = vfs_stat(filename, &stat); + if (!ret) + ret = cp_stat64(statbuf, &stat); + return ret; +} + +asmlinkage long sys32_lstat64(const char __user * filename, struct stat64_emu31 __user * statbuf) +{ + struct kstat stat; + int ret = vfs_lstat(filename, &stat); + if (!ret) + ret = cp_stat64(statbuf, &stat); + return ret; +} + +asmlinkage long sys32_fstat64(unsigned long fd, struct stat64_emu31 __user * statbuf) +{ + struct kstat stat; + int ret = vfs_fstat(fd, &stat); + if (!ret) + ret = cp_stat64(statbuf, &stat); + return ret; +} + +asmlinkage long sys32_fstatat64(unsigned int dfd, const char __user *filename, + struct stat64_emu31 __user* statbuf, int flag) +{ + struct kstat stat; + int error; + + error = vfs_fstatat(dfd, filename, &stat, flag); + if (error) + return error; + return cp_stat64(statbuf, &stat); +} + +/* + * Linux/i386 didn't use to be able to handle more than + * 4 system call parameters, so these system calls used a memory + * block for parameter passing.. + */ + +struct mmap_arg_struct_emu31 { + compat_ulong_t addr; + compat_ulong_t len; + compat_ulong_t prot; + compat_ulong_t flags; + compat_ulong_t fd; + compat_ulong_t offset; +}; + +asmlinkage unsigned long old32_mmap(struct mmap_arg_struct_emu31 __user *arg) +{ + struct mmap_arg_struct_emu31 a; + + if (copy_from_user(&a, arg, sizeof(a))) + return -EFAULT; + if (a.offset & ~PAGE_MASK) + return -EINVAL; + a.addr = (unsigned long) compat_ptr(a.addr); + return sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, + a.offset >> PAGE_SHIFT); +} + +asmlinkage long sys32_mmap2(struct mmap_arg_struct_emu31 __user *arg) +{ + struct mmap_arg_struct_emu31 a; + + if (copy_from_user(&a, arg, sizeof(a))) + return -EFAULT; + a.addr = (unsigned long) compat_ptr(a.addr); + return sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, a.offset); +} + +asmlinkage long sys32_read(unsigned int fd, char __user * buf, size_t count) +{ + if ((compat_ssize_t) count < 0) + return -EINVAL; + + return sys_read(fd, buf, count); +} + +asmlinkage long sys32_write(unsigned int fd, const char __user * buf, size_t count) +{ + if ((compat_ssize_t) count < 0) + return -EINVAL; + + return sys_write(fd, buf, count); +} + +/* + * 31 bit emulation wrapper functions for sys_fadvise64/fadvise64_64. + * These need to rewrite the advise values for POSIX_FADV_{DONTNEED,NOREUSE} + * because the 31 bit values differ from the 64 bit values. + */ + +asmlinkage long +sys32_fadvise64(int fd, loff_t offset, size_t len, int advise) +{ + if (advise == 4) + advise = POSIX_FADV_DONTNEED; + else if (advise == 5) + advise = POSIX_FADV_NOREUSE; + return sys_fadvise64(fd, offset, len, advise); +} + +struct fadvise64_64_args { + int fd; + long long offset; + long long len; + int advice; +}; + +asmlinkage long +sys32_fadvise64_64(struct fadvise64_64_args __user *args) +{ + struct fadvise64_64_args a; + + if ( copy_from_user(&a, args, sizeof(a)) ) + return -EFAULT; + if (a.advice == 4) + a.advice = POSIX_FADV_DONTNEED; + else if (a.advice == 5) + a.advice = POSIX_FADV_NOREUSE; + return sys_fadvise64_64(a.fd, a.offset, a.len, a.advice); +} diff --git a/arch/s390/kernel/compat_linux.h b/arch/s390/kernel/compat_linux.h new file mode 100644 index 00000000..9635d759 --- /dev/null +++ b/arch/s390/kernel/compat_linux.h @@ -0,0 +1,227 @@ +#ifndef _ASM_S390X_S390_H +#define _ASM_S390X_S390_H + +#include <linux/compat.h> +#include <linux/socket.h> +#include <linux/syscalls.h> + +/* Macro that masks the high order bit of an 32 bit pointer and converts it*/ +/* to a 64 bit pointer */ +#define A(__x) ((unsigned long)((__x) & 0x7FFFFFFFUL)) +#define AA(__x) \ + ((unsigned long)(__x)) + +/* Now 32bit compatibility types */ +struct ipc_kludge_32 { + __u32 msgp; /* pointer */ + __s32 msgtyp; +}; + +struct old_sigaction32 { + __u32 sa_handler; /* Really a pointer, but need to deal with 32 bits */ + compat_old_sigset_t sa_mask; /* A 32 bit mask */ + __u32 sa_flags; + __u32 sa_restorer; /* Another 32 bit pointer */ +}; + +typedef struct compat_siginfo { + int si_signo; + int si_errno; + int si_code; + + union { + int _pad[((128/sizeof(int)) - 3)]; + + /* kill() */ + struct { + pid_t _pid; /* sender's pid */ + uid_t _uid; /* sender's uid */ + } _kill; + + /* POSIX.1b timers */ + struct { + compat_timer_t _tid; /* timer id */ + int _overrun; /* overrun count */ + compat_sigval_t _sigval; /* same as below */ + int _sys_private; /* not to be passed to user */ + } _timer; + + /* POSIX.1b signals */ + struct { + pid_t _pid; /* sender's pid */ + uid_t _uid; /* sender's uid */ + compat_sigval_t _sigval; + } _rt; + + /* SIGCHLD */ + struct { + pid_t _pid; /* which child */ + uid_t _uid; /* sender's uid */ + int _status;/* exit code */ + compat_clock_t _utime; + compat_clock_t _stime; + } _sigchld; + + /* SIGILL, SIGFPE, SIGSEGV, SIGBUS */ + struct { + __u32 _addr; /* faulting insn/memory ref. - pointer */ + } _sigfault; + + /* SIGPOLL */ + struct { + int _band; /* POLL_IN, POLL_OUT, POLL_MSG */ + int _fd; + } _sigpoll; + } _sifields; +} compat_siginfo_t; + +/* + * How these fields are to be accessed. + */ +#define si_pid _sifields._kill._pid +#define si_uid _sifields._kill._uid +#define si_status _sifields._sigchld._status +#define si_utime _sifields._sigchld._utime +#define si_stime _sifields._sigchld._stime +#define si_value _sifields._rt._sigval +#define si_int _sifields._rt._sigval.sival_int +#define si_ptr _sifields._rt._sigval.sival_ptr +#define si_addr _sifields._sigfault._addr +#define si_band _sifields._sigpoll._band +#define si_fd _sifields._sigpoll._fd +#define si_tid _sifields._timer._tid +#define si_overrun _sifields._timer._overrun + +/* asm/sigcontext.h */ +typedef union +{ + __u64 d; + __u32 f; +} freg_t32; + +typedef struct +{ + unsigned int fpc; + freg_t32 fprs[__NUM_FPRS]; +} _s390_fp_regs32; + +typedef struct +{ + __u32 mask; + __u32 addr; +} _psw_t32 __attribute__ ((aligned(8))); + +typedef struct +{ + _psw_t32 psw; + __u32 gprs[__NUM_GPRS]; + __u32 acrs[__NUM_ACRS]; +} _s390_regs_common32; + +typedef struct +{ + _s390_regs_common32 regs; + _s390_fp_regs32 fpregs; +} _sigregs32; + +#define _SIGCONTEXT_NSIG32 64 +#define _SIGCONTEXT_NSIG_BPW32 32 +#define __SIGNAL_FRAMESIZE32 96 +#define _SIGMASK_COPY_SIZE32 (sizeof(u32)*2) + +struct sigcontext32 +{ + __u32 oldmask[_COMPAT_NSIG_WORDS]; + __u32 sregs; /* pointer */ +}; + +/* asm/signal.h */ +struct sigaction32 { + __u32 sa_handler; /* pointer */ + __u32 sa_flags; + __u32 sa_restorer; /* pointer */ + compat_sigset_t sa_mask; /* mask last for extensibility */ +}; + +typedef struct { + __u32 ss_sp; /* pointer */ + int ss_flags; + compat_size_t ss_size; +} stack_t32; + +/* asm/ucontext.h */ +struct ucontext32 { + __u32 uc_flags; + __u32 uc_link; /* pointer */ + stack_t32 uc_stack; + _sigregs32 uc_mcontext; + compat_sigset_t uc_sigmask; /* mask last for extensibility */ +}; + +struct stat64_emu31; +struct mmap_arg_struct_emu31; +struct fadvise64_64_args; +struct old_sigaction32; +struct old_sigaction32; + +long sys32_chown16(const char __user * filename, u16 user, u16 group); +long sys32_lchown16(const char __user * filename, u16 user, u16 group); +long sys32_fchown16(unsigned int fd, u16 user, u16 group); +long sys32_setregid16(u16 rgid, u16 egid); +long sys32_setgid16(u16 gid); +long sys32_setreuid16(u16 ruid, u16 euid); +long sys32_setuid16(u16 uid); +long sys32_setresuid16(u16 ruid, u16 euid, u16 suid); +long sys32_getresuid16(u16 __user *ruid, u16 __user *euid, u16 __user *suid); +long sys32_setresgid16(u16 rgid, u16 egid, u16 sgid); +long sys32_getresgid16(u16 __user *rgid, u16 __user *egid, u16 __user *sgid); +long sys32_setfsuid16(u16 uid); +long sys32_setfsgid16(u16 gid); +long sys32_getgroups16(int gidsetsize, u16 __user *grouplist); +long sys32_setgroups16(int gidsetsize, u16 __user *grouplist); +long sys32_getuid16(void); +long sys32_geteuid16(void); +long sys32_getgid16(void); +long sys32_getegid16(void); +long sys32_ipc(u32 call, int first, int second, int third, u32 ptr); +long sys32_truncate64(const char __user * path, unsigned long high, + unsigned long low); +long sys32_ftruncate64(unsigned int fd, unsigned long high, unsigned long low); +long sys32_sched_rr_get_interval(compat_pid_t pid, + struct compat_timespec __user *interval); +long sys32_rt_sigprocmask(int how, compat_sigset_t __user *set, + compat_sigset_t __user *oset, size_t sigsetsize); +long sys32_rt_sigpending(compat_sigset_t __user *set, size_t sigsetsize); +long sys32_rt_sigqueueinfo(int pid, int sig, compat_siginfo_t __user *uinfo); +long sys32_execve(const char __user *name, compat_uptr_t __user *argv, + compat_uptr_t __user *envp); +long sys32_init_module(void __user *umod, unsigned long len, + const char __user *uargs); +long sys32_delete_module(const char __user *name_user, unsigned int flags); +long sys32_pread64(unsigned int fd, char __user *ubuf, size_t count, + u32 poshi, u32 poslo); +long sys32_pwrite64(unsigned int fd, const char __user *ubuf, + size_t count, u32 poshi, u32 poslo); +compat_ssize_t sys32_readahead(int fd, u32 offhi, u32 offlo, s32 count); +long sys32_sendfile(int out_fd, int in_fd, compat_off_t __user *offset, + size_t count); +long sys32_sendfile64(int out_fd, int in_fd, compat_loff_t __user *offset, + s32 count); +long sys32_stat64(const char __user * filename, struct stat64_emu31 __user * statbuf); +long sys32_lstat64(const char __user * filename, + struct stat64_emu31 __user * statbuf); +long sys32_fstat64(unsigned long fd, struct stat64_emu31 __user * statbuf); +long sys32_fstatat64(unsigned int dfd, const char __user *filename, + struct stat64_emu31 __user* statbuf, int flag); +unsigned long old32_mmap(struct mmap_arg_struct_emu31 __user *arg); +long sys32_mmap2(struct mmap_arg_struct_emu31 __user *arg); +long sys32_read(unsigned int fd, char __user * buf, size_t count); +long sys32_write(unsigned int fd, const char __user * buf, size_t count); +long sys32_fadvise64(int fd, loff_t offset, size_t len, int advise); +long sys32_fadvise64_64(struct fadvise64_64_args __user *args); +long sys32_sigaction(int sig, const struct old_sigaction32 __user *act, + struct old_sigaction32 __user *oact); +long sys32_rt_sigaction(int sig, const struct sigaction32 __user *act, + struct sigaction32 __user *oact, size_t sigsetsize); +long sys32_sigaltstack(const stack_t32 __user *uss, stack_t32 __user *uoss); +#endif /* _ASM_S390X_S390_H */ diff --git a/arch/s390/kernel/compat_ptrace.h b/arch/s390/kernel/compat_ptrace.h new file mode 100644 index 00000000..12b82383 --- /dev/null +++ b/arch/s390/kernel/compat_ptrace.h @@ -0,0 +1,63 @@ +#ifndef _PTRACE32_H +#define _PTRACE32_H + +#include <asm/ptrace.h> /* needed for NUM_CR_WORDS */ +#include "compat_linux.h" /* needed for psw_compat_t */ + +struct compat_per_struct_kernel { + __u32 cr9; /* PER control bits */ + __u32 cr10; /* PER starting address */ + __u32 cr11; /* PER ending address */ + __u32 bits; /* Obsolete software bits */ + __u32 starting_addr; /* User specified start address */ + __u32 ending_addr; /* User specified end address */ + __u16 perc_atmid; /* PER trap ATMID */ + __u32 address; /* PER trap instruction address */ + __u8 access_id; /* PER trap access identification */ +}; + +struct compat_user_regs_struct +{ + psw_compat_t psw; + u32 gprs[NUM_GPRS]; + u32 acrs[NUM_ACRS]; + u32 orig_gpr2; + /* nb: there's a 4-byte hole here */ + s390_fp_regs fp_regs; + /* + * These per registers are in here so that gdb can modify them + * itself as there is no "official" ptrace interface for hardware + * watchpoints. This is the way intel does it. + */ + struct compat_per_struct_kernel per_info; + u32 ieee_instruction_pointer; /* obsolete, always 0 */ +}; + +struct compat_user { + /* We start with the registers, to mimic the way that "memory" + is returned from the ptrace(3,...) function. */ + struct compat_user_regs_struct regs; + /* The rest of this junk is to help gdb figure out what goes where */ + u32 u_tsize; /* Text segment size (pages). */ + u32 u_dsize; /* Data segment size (pages). */ + u32 u_ssize; /* Stack segment size (pages). */ + u32 start_code; /* Starting virtual address of text. */ + u32 start_stack; /* Starting virtual address of stack area. + This is actually the bottom of the stack, + the top of the stack is always found in the + esp register. */ + s32 signal; /* Signal that caused the core dump. */ + u32 u_ar0; /* Used by gdb to help find the values for */ + /* the registers. */ + u32 magic; /* To uniquely identify a core file */ + char u_comm[32]; /* User command that was responsible */ +}; + +typedef struct +{ + __u32 len; + __u32 kernel_addr; + __u32 process_addr; +} compat_ptrace_area; + +#endif /* _PTRACE32_H */ diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c new file mode 100644 index 00000000..28040fd5 --- /dev/null +++ b/arch/s390/kernel/compat_signal.c @@ -0,0 +1,597 @@ +/* + * arch/s390/kernel/compat_signal.c + * + * Copyright (C) IBM Corp. 2000,2006 + * Author(s): Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com) + * Gerhard Tonn (ton@de.ibm.com) + * + * Copyright (C) 1991, 1992 Linus Torvalds + * + * 1997-11-28 Modified for POSIX.1b signals by Richard Henderson + */ + +#include <linux/compat.h> +#include <linux/sched.h> +#include <linux/mm.h> +#include <linux/smp.h> +#include <linux/kernel.h> +#include <linux/signal.h> +#include <linux/errno.h> +#include <linux/wait.h> +#include <linux/ptrace.h> +#include <linux/unistd.h> +#include <linux/stddef.h> +#include <linux/tty.h> +#include <linux/personality.h> +#include <linux/binfmts.h> +#include <asm/ucontext.h> +#include <asm/uaccess.h> +#include <asm/lowcore.h> +#include <asm/switch_to.h> +#include "compat_linux.h" +#include "compat_ptrace.h" +#include "entry.h" + +#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) + +typedef struct +{ + __u8 callee_used_stack[__SIGNAL_FRAMESIZE32]; + struct sigcontext32 sc; + _sigregs32 sregs; + int signo; + __u32 gprs_high[NUM_GPRS]; + __u8 retcode[S390_SYSCALL_SIZE]; +} sigframe32; + +typedef struct +{ + __u8 callee_used_stack[__SIGNAL_FRAMESIZE32]; + __u8 retcode[S390_SYSCALL_SIZE]; + compat_siginfo_t info; + struct ucontext32 uc; + __u32 gprs_high[NUM_GPRS]; +} rt_sigframe32; + +int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from) +{ + int err; + + if (!access_ok (VERIFY_WRITE, to, sizeof(compat_siginfo_t))) + return -EFAULT; + + /* If you change siginfo_t structure, please be sure + this code is fixed accordingly. + It should never copy any pad contained in the structure + to avoid security leaks, but must copy the generic + 3 ints plus the relevant union member. + This routine must convert siginfo from 64bit to 32bit as well + at the same time. */ + err = __put_user(from->si_signo, &to->si_signo); + err |= __put_user(from->si_errno, &to->si_errno); + err |= __put_user((short)from->si_code, &to->si_code); + if (from->si_code < 0) + err |= __copy_to_user(&to->_sifields._pad, &from->_sifields._pad, SI_PAD_SIZE); + else { + switch (from->si_code >> 16) { + case __SI_RT >> 16: /* This is not generated by the kernel as of now. */ + case __SI_MESGQ >> 16: + err |= __put_user(from->si_int, &to->si_int); + /* fallthrough */ + case __SI_KILL >> 16: + err |= __put_user(from->si_pid, &to->si_pid); + err |= __put_user(from->si_uid, &to->si_uid); + break; + case __SI_CHLD >> 16: + err |= __put_user(from->si_pid, &to->si_pid); + err |= __put_user(from->si_uid, &to->si_uid); + err |= __put_user(from->si_utime, &to->si_utime); + err |= __put_user(from->si_stime, &to->si_stime); + err |= __put_user(from->si_status, &to->si_status); + break; + case __SI_FAULT >> 16: + err |= __put_user((unsigned long) from->si_addr, + &to->si_addr); + break; + case __SI_POLL >> 16: + err |= __put_user(from->si_band, &to->si_band); + err |= __put_user(from->si_fd, &to->si_fd); + break; + case __SI_TIMER >> 16: + err |= __put_user(from->si_tid, &to->si_tid); + err |= __put_user(from->si_overrun, &to->si_overrun); + err |= __put_user(from->si_int, &to->si_int); + break; + default: + break; + } + } + return err; +} + +int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from) +{ + int err; + u32 tmp; + + if (!access_ok (VERIFY_READ, from, sizeof(compat_siginfo_t))) + return -EFAULT; + + err = __get_user(to->si_signo, &from->si_signo); + err |= __get_user(to->si_errno, &from->si_errno); + err |= __get_user(to->si_code, &from->si_code); + + if (to->si_code < 0) + err |= __copy_from_user(&to->_sifields._pad, &from->_sifields._pad, SI_PAD_SIZE); + else { + switch (to->si_code >> 16) { + case __SI_RT >> 16: /* This is not generated by the kernel as of now. */ + case __SI_MESGQ >> 16: + err |= __get_user(to->si_int, &from->si_int); + /* fallthrough */ + case __SI_KILL >> 16: + err |= __get_user(to->si_pid, &from->si_pid); + err |= __get_user(to->si_uid, &from->si_uid); + break; + case __SI_CHLD >> 16: + err |= __get_user(to->si_pid, &from->si_pid); + err |= __get_user(to->si_uid, &from->si_uid); + err |= __get_user(to->si_utime, &from->si_utime); + err |= __get_user(to->si_stime, &from->si_stime); + err |= __get_user(to->si_status, &from->si_status); + break; + case __SI_FAULT >> 16: + err |= __get_user(tmp, &from->si_addr); + to->si_addr = (void __force __user *) + (u64) (tmp & PSW32_ADDR_INSN); + break; + case __SI_POLL >> 16: + err |= __get_user(to->si_band, &from->si_band); + err |= __get_user(to->si_fd, &from->si_fd); + break; + case __SI_TIMER >> 16: + err |= __get_user(to->si_tid, &from->si_tid); + err |= __get_user(to->si_overrun, &from->si_overrun); + err |= __get_user(to->si_int, &from->si_int); + break; + default: + break; + } + } + return err; +} + +asmlinkage long +sys32_sigaction(int sig, const struct old_sigaction32 __user *act, + struct old_sigaction32 __user *oact) +{ + struct k_sigaction new_ka, old_ka; + unsigned long sa_handler, sa_restorer; + int ret; + + if (act) { + compat_old_sigset_t mask; + if (!access_ok(VERIFY_READ, act, sizeof(*act)) || + __get_user(sa_handler, &act->sa_handler) || + __get_user(sa_restorer, &act->sa_restorer) || + __get_user(new_ka.sa.sa_flags, &act->sa_flags) || + __get_user(mask, &act->sa_mask)) + return -EFAULT; + new_ka.sa.sa_handler = (__sighandler_t) sa_handler; + new_ka.sa.sa_restorer = (void (*)(void)) sa_restorer; + siginitset(&new_ka.sa.sa_mask, mask); + } + + ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); + + if (!ret && oact) { + sa_handler = (unsigned long) old_ka.sa.sa_handler; + sa_restorer = (unsigned long) old_ka.sa.sa_restorer; + if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) || + __put_user(sa_handler, &oact->sa_handler) || + __put_user(sa_restorer, &oact->sa_restorer) || + __put_user(old_ka.sa.sa_flags, &oact->sa_flags) || + __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask)) + return -EFAULT; + } + + return ret; +} + +asmlinkage long +sys32_rt_sigaction(int sig, const struct sigaction32 __user *act, + struct sigaction32 __user *oact, size_t sigsetsize) +{ + struct k_sigaction new_ka, old_ka; + unsigned long sa_handler; + int ret; + compat_sigset_t set32; + + /* XXX: Don't preclude handling different sized sigset_t's. */ + if (sigsetsize != sizeof(compat_sigset_t)) + return -EINVAL; + + if (act) { + ret = get_user(sa_handler, &act->sa_handler); + ret |= __copy_from_user(&set32, &act->sa_mask, + sizeof(compat_sigset_t)); + new_ka.sa.sa_mask.sig[0] = + set32.sig[0] | (((long)set32.sig[1]) << 32); + ret |= __get_user(new_ka.sa.sa_flags, &act->sa_flags); + + if (ret) + return -EFAULT; + new_ka.sa.sa_handler = (__sighandler_t) sa_handler; + } + + ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); + + if (!ret && oact) { + set32.sig[1] = (old_ka.sa.sa_mask.sig[0] >> 32); + set32.sig[0] = old_ka.sa.sa_mask.sig[0]; + ret = put_user((unsigned long)old_ka.sa.sa_handler, &oact->sa_handler); + ret |= __copy_to_user(&oact->sa_mask, &set32, + sizeof(compat_sigset_t)); + ret |= __put_user(old_ka.sa.sa_flags, &oact->sa_flags); + } + + return ret; +} + +asmlinkage long +sys32_sigaltstack(const stack_t32 __user *uss, stack_t32 __user *uoss) +{ + struct pt_regs *regs = task_pt_regs(current); + stack_t kss, koss; + unsigned long ss_sp; + int ret, err = 0; + mm_segment_t old_fs = get_fs(); + + if (uss) { + if (!access_ok(VERIFY_READ, uss, sizeof(*uss))) + return -EFAULT; + err |= __get_user(ss_sp, &uss->ss_sp); + err |= __get_user(kss.ss_size, &uss->ss_size); + err |= __get_user(kss.ss_flags, &uss->ss_flags); + if (err) + return -EFAULT; + kss.ss_sp = (void __user *) ss_sp; + } + + set_fs (KERNEL_DS); + ret = do_sigaltstack((stack_t __force __user *) (uss ? &kss : NULL), + (stack_t __force __user *) (uoss ? &koss : NULL), + regs->gprs[15]); + set_fs (old_fs); + + if (!ret && uoss) { + if (!access_ok(VERIFY_WRITE, uoss, sizeof(*uoss))) + return -EFAULT; + ss_sp = (unsigned long) koss.ss_sp; + err |= __put_user(ss_sp, &uoss->ss_sp); + err |= __put_user(koss.ss_size, &uoss->ss_size); + err |= __put_user(koss.ss_flags, &uoss->ss_flags); + if (err) + return -EFAULT; + } + return ret; +} + +static int save_sigregs32(struct pt_regs *regs, _sigregs32 __user *sregs) +{ + _s390_regs_common32 regs32; + int err, i; + + regs32.psw.mask = psw32_user_bits | + ((__u32)(regs->psw.mask >> 32) & PSW32_MASK_USER); + regs32.psw.addr = (__u32) regs->psw.addr | + (__u32)(regs->psw.mask & PSW_MASK_BA); + for (i = 0; i < NUM_GPRS; i++) + regs32.gprs[i] = (__u32) regs->gprs[i]; + save_access_regs(current->thread.acrs); + memcpy(regs32.acrs, current->thread.acrs, sizeof(regs32.acrs)); + err = __copy_to_user(&sregs->regs, ®s32, sizeof(regs32)); + if (err) + return err; + save_fp_regs(¤t->thread.fp_regs); + /* s390_fp_regs and _s390_fp_regs32 are the same ! */ + return __copy_to_user(&sregs->fpregs, ¤t->thread.fp_regs, + sizeof(_s390_fp_regs32)); +} + +static int restore_sigregs32(struct pt_regs *regs,_sigregs32 __user *sregs) +{ + _s390_regs_common32 regs32; + int err, i; + + /* Alwys make any pending restarted system call return -EINTR */ + current_thread_info()->restart_block.fn = do_no_restart_syscall; + + err = __copy_from_user(®s32, &sregs->regs, sizeof(regs32)); + if (err) + return err; + regs->psw.mask = (regs->psw.mask & ~PSW_MASK_USER) | + (__u64)(regs32.psw.mask & PSW32_MASK_USER) << 32 | + (__u64)(regs32.psw.addr & PSW32_ADDR_AMODE); + regs->psw.addr = (__u64)(regs32.psw.addr & PSW32_ADDR_INSN); + for (i = 0; i < NUM_GPRS; i++) + regs->gprs[i] = (__u64) regs32.gprs[i]; + memcpy(current->thread.acrs, regs32.acrs, sizeof(current->thread.acrs)); + restore_access_regs(current->thread.acrs); + + err = __copy_from_user(¤t->thread.fp_regs, &sregs->fpregs, + sizeof(_s390_fp_regs32)); + current->thread.fp_regs.fpc &= FPC_VALID_MASK; + if (err) + return err; + + restore_fp_regs(¤t->thread.fp_regs); + clear_thread_flag(TIF_SYSCALL); /* No longer in a system call */ + return 0; +} + +static int save_sigregs_gprs_high(struct pt_regs *regs, __u32 __user *uregs) +{ + __u32 gprs_high[NUM_GPRS]; + int i; + + for (i = 0; i < NUM_GPRS; i++) + gprs_high[i] = regs->gprs[i] >> 32; + + return __copy_to_user(uregs, &gprs_high, sizeof(gprs_high)); +} + +static int restore_sigregs_gprs_high(struct pt_regs *regs, __u32 __user *uregs) +{ + __u32 gprs_high[NUM_GPRS]; + int err, i; + + err = __copy_from_user(&gprs_high, uregs, sizeof(gprs_high)); + if (err) + return err; + for (i = 0; i < NUM_GPRS; i++) + *(__u32 *)®s->gprs[i] = gprs_high[i]; + return 0; +} + +asmlinkage long sys32_sigreturn(void) +{ + struct pt_regs *regs = task_pt_regs(current); + sigframe32 __user *frame = (sigframe32 __user *)regs->gprs[15]; + sigset_t set; + + if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) + goto badframe; + if (__copy_from_user(&set.sig, &frame->sc.oldmask, _SIGMASK_COPY_SIZE32)) + goto badframe; + sigdelsetmask(&set, ~_BLOCKABLE); + set_current_blocked(&set); + if (restore_sigregs32(regs, &frame->sregs)) + goto badframe; + if (restore_sigregs_gprs_high(regs, frame->gprs_high)) + goto badframe; + return regs->gprs[2]; +badframe: + force_sig(SIGSEGV, current); + return 0; +} + +asmlinkage long sys32_rt_sigreturn(void) +{ + struct pt_regs *regs = task_pt_regs(current); + rt_sigframe32 __user *frame = (rt_sigframe32 __user *)regs->gprs[15]; + sigset_t set; + stack_t st; + __u32 ss_sp; + int err; + mm_segment_t old_fs = get_fs(); + + if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) + goto badframe; + if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) + goto badframe; + sigdelsetmask(&set, ~_BLOCKABLE); + set_current_blocked(&set); + if (restore_sigregs32(regs, &frame->uc.uc_mcontext)) + goto badframe; + if (restore_sigregs_gprs_high(regs, frame->gprs_high)) + goto badframe; + err = __get_user(ss_sp, &frame->uc.uc_stack.ss_sp); + st.ss_sp = compat_ptr(ss_sp); + err |= __get_user(st.ss_size, &frame->uc.uc_stack.ss_size); + err |= __get_user(st.ss_flags, &frame->uc.uc_stack.ss_flags); + if (err) + goto badframe; + set_fs (KERNEL_DS); + do_sigaltstack((stack_t __force __user *)&st, NULL, regs->gprs[15]); + set_fs (old_fs); + return regs->gprs[2]; +badframe: + force_sig(SIGSEGV, current); + return 0; +} + +/* + * Set up a signal frame. + */ + + +/* + * Determine which stack to use.. + */ +static inline void __user * +get_sigframe(struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size) +{ + unsigned long sp; + + /* Default to using normal stack */ + sp = (unsigned long) A(regs->gprs[15]); + + /* Overflow on alternate signal stack gives SIGSEGV. */ + if (on_sig_stack(sp) && !on_sig_stack((sp - frame_size) & -8UL)) + return (void __user *) -1UL; + + /* This is the X/Open sanctioned signal stack switching. */ + if (ka->sa.sa_flags & SA_ONSTACK) { + if (! sas_ss_flags(sp)) + sp = current->sas_ss_sp + current->sas_ss_size; + } + + /* This is the legacy signal stack switching. */ + else if (!user_mode(regs) && + !(ka->sa.sa_flags & SA_RESTORER) && + ka->sa.sa_restorer) { + sp = (unsigned long) ka->sa.sa_restorer; + } + + return (void __user *)((sp - frame_size) & -8ul); +} + +static inline int map_signal(int sig) +{ + if (current_thread_info()->exec_domain + && current_thread_info()->exec_domain->signal_invmap + && sig < 32) + return current_thread_info()->exec_domain->signal_invmap[sig]; + else + return sig; +} + +static int setup_frame32(int sig, struct k_sigaction *ka, + sigset_t *set, struct pt_regs * regs) +{ + sigframe32 __user *frame = get_sigframe(ka, regs, sizeof(sigframe32)); + if (!access_ok(VERIFY_WRITE, frame, sizeof(sigframe32))) + goto give_sigsegv; + + if (frame == (void __user *) -1UL) + goto give_sigsegv; + + if (__copy_to_user(&frame->sc.oldmask, &set->sig, _SIGMASK_COPY_SIZE32)) + goto give_sigsegv; + + if (save_sigregs32(regs, &frame->sregs)) + goto give_sigsegv; + if (save_sigregs_gprs_high(regs, frame->gprs_high)) + goto give_sigsegv; + if (__put_user((unsigned long) &frame->sregs, &frame->sc.sregs)) + goto give_sigsegv; + + /* Set up to return from userspace. If provided, use a stub + already in userspace. */ + if (ka->sa.sa_flags & SA_RESTORER) { + regs->gprs[14] = (__u64) ka->sa.sa_restorer | PSW32_ADDR_AMODE; + } else { + regs->gprs[14] = (__u64) frame->retcode | PSW32_ADDR_AMODE; + if (__put_user(S390_SYSCALL_OPCODE | __NR_sigreturn, + (u16 __force __user *)(frame->retcode))) + goto give_sigsegv; + } + + /* Set up backchain. */ + if (__put_user(regs->gprs[15], (unsigned int __user *) frame)) + goto give_sigsegv; + + /* Set up registers for signal handler */ + regs->gprs[15] = (__force __u64) frame; + regs->psw.mask |= PSW_MASK_BA; /* force amode 31 */ + regs->psw.addr = (__force __u64) ka->sa.sa_handler; + + regs->gprs[2] = map_signal(sig); + regs->gprs[3] = (__force __u64) &frame->sc; + + /* We forgot to include these in the sigcontext. + To avoid breaking binary compatibility, they are passed as args. */ + if (sig == SIGSEGV || sig == SIGBUS || sig == SIGILL || + sig == SIGTRAP || sig == SIGFPE) { + /* set extra registers only for synchronous signals */ + regs->gprs[4] = regs->int_code & 127; + regs->gprs[5] = regs->int_parm_long; + } + + /* Place signal number on stack to allow backtrace from handler. */ + if (__put_user(regs->gprs[2], (int __force __user *) &frame->signo)) + goto give_sigsegv; + return 0; + +give_sigsegv: + force_sigsegv(sig, current); + return -EFAULT; +} + +static int setup_rt_frame32(int sig, struct k_sigaction *ka, siginfo_t *info, + sigset_t *set, struct pt_regs * regs) +{ + int err = 0; + rt_sigframe32 __user *frame = get_sigframe(ka, regs, sizeof(rt_sigframe32)); + if (!access_ok(VERIFY_WRITE, frame, sizeof(rt_sigframe32))) + goto give_sigsegv; + + if (frame == (void __user *) -1UL) + goto give_sigsegv; + + if (copy_siginfo_to_user32(&frame->info, info)) + goto give_sigsegv; + + /* Create the ucontext. */ + err |= __put_user(UC_EXTENDED, &frame->uc.uc_flags); + err |= __put_user(0, &frame->uc.uc_link); + err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); + err |= __put_user(sas_ss_flags(regs->gprs[15]), + &frame->uc.uc_stack.ss_flags); + err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size); + err |= save_sigregs32(regs, &frame->uc.uc_mcontext); + err |= save_sigregs_gprs_high(regs, frame->gprs_high); + err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); + if (err) + goto give_sigsegv; + + /* Set up to return from userspace. If provided, use a stub + already in userspace. */ + if (ka->sa.sa_flags & SA_RESTORER) { + regs->gprs[14] = (__u64) ka->sa.sa_restorer | PSW32_ADDR_AMODE; + } else { + regs->gprs[14] = (__u64) frame->retcode | PSW32_ADDR_AMODE; + err |= __put_user(S390_SYSCALL_OPCODE | __NR_rt_sigreturn, + (u16 __force __user *)(frame->retcode)); + } + + /* Set up backchain. */ + if (__put_user(regs->gprs[15], (unsigned int __force __user *) frame)) + goto give_sigsegv; + + /* Set up registers for signal handler */ + regs->gprs[15] = (__force __u64) frame; + regs->psw.mask |= PSW_MASK_BA; /* force amode 31 */ + regs->psw.addr = (__u64) ka->sa.sa_handler; + + regs->gprs[2] = map_signal(sig); + regs->gprs[3] = (__force __u64) &frame->info; + regs->gprs[4] = (__force __u64) &frame->uc; + return 0; + +give_sigsegv: + force_sigsegv(sig, current); + return -EFAULT; +} + +/* + * OK, we're invoking a handler + */ + +int handle_signal32(unsigned long sig, struct k_sigaction *ka, + siginfo_t *info, sigset_t *oldset, struct pt_regs *regs) +{ + int ret; + + /* Set up the stack frame */ + if (ka->sa.sa_flags & SA_SIGINFO) + ret = setup_rt_frame32(sig, ka, info, oldset, regs); + else + ret = setup_frame32(sig, ka, oldset, regs); + if (ret) + return ret; + block_sigmask(ka, sig); + return 0; +} + diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S new file mode 100644 index 00000000..ff605a39 --- /dev/null +++ b/arch/s390/kernel/compat_wrapper.S @@ -0,0 +1,1649 @@ +/* +* arch/s390/kernel/compat_wrapper.S +* wrapper for 31 bit compatible system calls. +* +* Copyright (C) IBM Corp. 2000,2006 +* Author(s): Gerhard Tonn (ton@de.ibm.com), +* Thomas Spatzier (tspat@de.ibm.com) +*/ + +#include <linux/linkage.h> + +ENTRY(sys32_exit_wrapper) + lgfr %r2,%r2 # int + jg sys_exit # branch to sys_exit + +ENTRY(sys32_read_wrapper) + llgfr %r2,%r2 # unsigned int + llgtr %r3,%r3 # char * + llgfr %r4,%r4 # size_t + jg sys32_read # branch to sys_read + +ENTRY(sys32_write_wrapper) + llgfr %r2,%r2 # unsigned int + llgtr %r3,%r3 # const char * + llgfr %r4,%r4 # size_t + jg sys32_write # branch to system call + +ENTRY(sys32_open_wrapper) + llgtr %r2,%r2 # const char * + lgfr %r3,%r3 # int + lgfr %r4,%r4 # int + jg sys_open # branch to system call + +ENTRY(sys32_close_wrapper) + llgfr %r2,%r2 # unsigned int + jg sys_close # branch to system call + +ENTRY(sys32_creat_wrapper) + llgtr %r2,%r2 # const char * + lgfr %r3,%r3 # int + jg sys_creat # branch to system call + +ENTRY(sys32_link_wrapper) + llgtr %r2,%r2 # const char * + llgtr %r3,%r3 # const char * + jg sys_link # branch to system call + +ENTRY(sys32_unlink_wrapper) + llgtr %r2,%r2 # const char * + jg sys_unlink # branch to system call + +ENTRY(sys32_chdir_wrapper) + llgtr %r2,%r2 # const char * + jg sys_chdir # branch to system call + +ENTRY(sys32_time_wrapper) + llgtr %r2,%r2 # int * + jg compat_sys_time # branch to system call + +ENTRY(sys32_mknod_wrapper) + llgtr %r2,%r2 # const char * + lgfr %r3,%r3 # int + llgfr %r4,%r4 # dev + jg sys_mknod # branch to system call + +ENTRY(sys32_chmod_wrapper) + llgtr %r2,%r2 # const char * + llgfr %r3,%r3 # mode_t + jg sys_chmod # branch to system call + +ENTRY(sys32_lchown16_wrapper) + llgtr %r2,%r2 # const char * + llgfr %r3,%r3 # __kernel_old_uid_emu31_t + llgfr %r4,%r4 # __kernel_old_uid_emu31_t + jg sys32_lchown16 # branch to system call + +ENTRY(sys32_lseek_wrapper) + llgfr %r2,%r2 # unsigned int + lgfr %r3,%r3 # off_t + llgfr %r4,%r4 # unsigned int + jg sys_lseek # branch to system call + +#sys32_getpid_wrapper # void + +ENTRY(sys32_mount_wrapper) + llgtr %r2,%r2 # char * + llgtr %r3,%r3 # char * + llgtr %r4,%r4 # char * + llgfr %r5,%r5 # unsigned long + llgtr %r6,%r6 # void * + jg compat_sys_mount # branch to system call + +ENTRY(sys32_oldumount_wrapper) + llgtr %r2,%r2 # char * + jg sys_oldumount # branch to system call + +ENTRY(sys32_setuid16_wrapper) + llgfr %r2,%r2 # __kernel_old_uid_emu31_t + jg sys32_setuid16 # branch to system call + +#sys32_getuid16_wrapper # void + +ENTRY(sys32_ptrace_wrapper) + lgfr %r2,%r2 # long + lgfr %r3,%r3 # long + llgtr %r4,%r4 # long + llgfr %r5,%r5 # long + jg compat_sys_ptrace # branch to system call + +ENTRY(sys32_alarm_wrapper) + llgfr %r2,%r2 # unsigned int + jg sys_alarm # branch to system call + +ENTRY(compat_sys_utime_wrapper) + llgtr %r2,%r2 # char * + llgtr %r3,%r3 # struct compat_utimbuf * + jg compat_sys_utime # branch to system call + +ENTRY(sys32_access_wrapper) + llgtr %r2,%r2 # const char * + lgfr %r3,%r3 # int + jg sys_access # branch to system call + +ENTRY(sys32_nice_wrapper) + lgfr %r2,%r2 # int + jg sys_nice # branch to system call + +#sys32_sync_wrapper # void + +ENTRY(sys32_kill_wrapper) + lgfr %r2,%r2 # int + lgfr %r3,%r3 # int + jg sys_kill # branch to system call + +ENTRY(sys32_rename_wrapper) + llgtr %r2,%r2 # const char * + llgtr %r3,%r3 # const char * + jg sys_rename # branch to system call + +ENTRY(sys32_mkdir_wrapper) + llgtr %r2,%r2 # const char * + lgfr %r3,%r3 # int + jg sys_mkdir # branch to system call + +ENTRY(sys32_rmdir_wrapper) + llgtr %r2,%r2 # const char * + jg sys_rmdir # branch to system call + +ENTRY(sys32_dup_wrapper) + llgfr %r2,%r2 # unsigned int + jg sys_dup # branch to system call + +ENTRY(sys32_pipe_wrapper) + llgtr %r2,%r2 # u32 * + jg sys_pipe # branch to system call + +ENTRY(compat_sys_times_wrapper) + llgtr %r2,%r2 # struct compat_tms * + jg compat_sys_times # branch to system call + +ENTRY(sys32_brk_wrapper) + llgtr %r2,%r2 # unsigned long + jg sys_brk # branch to system call + +ENTRY(sys32_setgid16_wrapper) + llgfr %r2,%r2 # __kernel_old_gid_emu31_t + jg sys32_setgid16 # branch to system call + +#sys32_getgid16_wrapper # void + +ENTRY(sys32_signal_wrapper) + lgfr %r2,%r2 # int + llgtr %r3,%r3 # __sighandler_t + jg sys_signal + +#sys32_geteuid16_wrapper # void + +#sys32_getegid16_wrapper # void + +ENTRY(sys32_acct_wrapper) + llgtr %r2,%r2 # char * + jg sys_acct # branch to system call + +ENTRY(sys32_umount_wrapper) + llgtr %r2,%r2 # char * + lgfr %r3,%r3 # int + jg sys_umount # branch to system call + +ENTRY(compat_sys_ioctl_wrapper) + llgfr %r2,%r2 # unsigned int + llgfr %r3,%r3 # unsigned int + llgfr %r4,%r4 # unsigned int + jg compat_sys_ioctl # branch to system call + +ENTRY(compat_sys_fcntl_wrapper) + llgfr %r2,%r2 # unsigned int + llgfr %r3,%r3 # unsigned int + llgfr %r4,%r4 # unsigned long + jg compat_sys_fcntl # branch to system call + +ENTRY(sys32_setpgid_wrapper) + lgfr %r2,%r2 # pid_t + lgfr %r3,%r3 # pid_t + jg sys_setpgid # branch to system call + +ENTRY(sys32_umask_wrapper) + lgfr %r2,%r2 # int + jg sys_umask # branch to system call + +ENTRY(sys32_chroot_wrapper) + llgtr %r2,%r2 # char * + jg sys_chroot # branch to system call + +ENTRY(sys32_ustat_wrapper) + llgfr %r2,%r2 # dev_t + llgtr %r3,%r3 # struct ustat * + jg compat_sys_ustat + +ENTRY(sys32_dup2_wrapper) + llgfr %r2,%r2 # unsigned int + llgfr %r3,%r3 # unsigned int + jg sys_dup2 # branch to system call + +#sys32_getppid_wrapper # void + +#sys32_getpgrp_wrapper # void + +#sys32_setsid_wrapper # void + +ENTRY(sys32_sigaction_wrapper) + lgfr %r2,%r2 # int + llgtr %r3,%r3 # const struct old_sigaction * + llgtr %r4,%r4 # struct old_sigaction32 * + jg sys32_sigaction # branch to system call + +ENTRY(sys32_setreuid16_wrapper) + llgfr %r2,%r2 # __kernel_old_uid_emu31_t + llgfr %r3,%r3 # __kernel_old_uid_emu31_t + jg sys32_setreuid16 # branch to system call + +ENTRY(sys32_setregid16_wrapper) + llgfr %r2,%r2 # __kernel_old_gid_emu31_t + llgfr %r3,%r3 # __kernel_old_gid_emu31_t + jg sys32_setregid16 # branch to system call + +ENTRY(sys_sigsuspend_wrapper) + lgfr %r2,%r2 # int + lgfr %r3,%r3 # int + llgfr %r4,%r4 # old_sigset_t + jg sys_sigsuspend + +ENTRY(compat_sys_sigpending_wrapper) + llgtr %r2,%r2 # compat_old_sigset_t * + jg compat_sys_sigpending # branch to system call + +ENTRY(sys32_sethostname_wrapper) + llgtr %r2,%r2 # char * + lgfr %r3,%r3 # int + jg sys_sethostname # branch to system call + +ENTRY(compat_sys_setrlimit_wrapper) + llgfr %r2,%r2 # unsigned int + llgtr %r3,%r3 # struct rlimit_emu31 * + jg compat_sys_setrlimit # branch to system call + +ENTRY(compat_sys_old_getrlimit_wrapper) + llgfr %r2,%r2 # unsigned int + llgtr %r3,%r3 # struct rlimit_emu31 * + jg compat_sys_old_getrlimit # branch to system call + +ENTRY(compat_sys_getrlimit_wrapper) + llgfr %r2,%r2 # unsigned int + llgtr %r3,%r3 # struct rlimit_emu31 * + jg compat_sys_getrlimit # branch to system call + +ENTRY(sys32_mmap2_wrapper) + llgtr %r2,%r2 # struct mmap_arg_struct_emu31 * + jg sys32_mmap2 # branch to system call + +ENTRY(compat_sys_getrusage_wrapper) + lgfr %r2,%r2 # int + llgtr %r3,%r3 # struct rusage_emu31 * + jg compat_sys_getrusage # branch to system call + +ENTRY(compat_sys_gettimeofday_wrapper) + llgtr %r2,%r2 # struct timeval_emu31 * + llgtr %r3,%r3 # struct timezone * + jg compat_sys_gettimeofday # branch to system call + +ENTRY(compat_sys_settimeofday_wrapper) + llgtr %r2,%r2 # struct timeval_emu31 * + llgtr %r3,%r3 # struct timezone * + jg compat_sys_settimeofday # branch to system call + +ENTRY(sys32_getgroups16_wrapper) + lgfr %r2,%r2 # int + llgtr %r3,%r3 # __kernel_old_gid_emu31_t * + jg sys32_getgroups16 # branch to system call + +ENTRY(sys32_setgroups16_wrapper) + lgfr %r2,%r2 # int + llgtr %r3,%r3 # __kernel_old_gid_emu31_t * + jg sys32_setgroups16 # branch to system call + +ENTRY(sys32_symlink_wrapper) + llgtr %r2,%r2 # const char * + llgtr %r3,%r3 # const char * + jg sys_symlink # branch to system call + +ENTRY(sys32_readlink_wrapper) + llgtr %r2,%r2 # const char * + llgtr %r3,%r3 # char * + lgfr %r4,%r4 # int + jg sys_readlink # branch to system call + +ENTRY(sys32_uselib_wrapper) + llgtr %r2,%r2 # const char * + jg sys_uselib # branch to system call + +ENTRY(sys32_swapon_wrapper) + llgtr %r2,%r2 # const char * + lgfr %r3,%r3 # int + jg sys_swapon # branch to system call + +ENTRY(sys32_reboot_wrapper) + lgfr %r2,%r2 # int + lgfr %r3,%r3 # int + llgfr %r4,%r4 # unsigned int + llgtr %r5,%r5 # void * + jg sys_reboot # branch to system call + +ENTRY(old32_readdir_wrapper) + llgfr %r2,%r2 # unsigned int + llgtr %r3,%r3 # void * + llgfr %r4,%r4 # unsigned int + jg compat_sys_old_readdir # branch to system call + +ENTRY(old32_mmap_wrapper) + llgtr %r2,%r2 # struct mmap_arg_struct_emu31 * + jg old32_mmap # branch to system call + +ENTRY(sys32_munmap_wrapper) + llgfr %r2,%r2 # unsigned long + llgfr %r3,%r3 # size_t + jg sys_munmap # branch to system call + +ENTRY(sys32_truncate_wrapper) + llgtr %r2,%r2 # const char * + lgfr %r3,%r3 # long + jg sys_truncate # branch to system call + +ENTRY(sys32_ftruncate_wrapper) + llgfr %r2,%r2 # unsigned int + llgfr %r3,%r3 # unsigned long + jg sys_ftruncate # branch to system call + +ENTRY(sys32_fchmod_wrapper) + llgfr %r2,%r2 # unsigned int + llgfr %r3,%r3 # mode_t + jg sys_fchmod # branch to system call + +ENTRY(sys32_fchown16_wrapper) + llgfr %r2,%r2 # unsigned int + llgfr %r3,%r3 # compat_uid_t + llgfr %r4,%r4 # compat_uid_t + jg sys32_fchown16 # branch to system call + +ENTRY(sys32_getpriority_wrapper) + lgfr %r2,%r2 # int + lgfr %r3,%r3 # int + jg sys_getpriority # branch to system call + +ENTRY(sys32_setpriority_wrapper) + lgfr %r2,%r2 # int + lgfr %r3,%r3 # int + lgfr %r4,%r4 # int + jg sys_setpriority # branch to system call + +ENTRY(compat_sys_statfs_wrapper) + llgtr %r2,%r2 # char * + llgtr %r3,%r3 # struct compat_statfs * + jg compat_sys_statfs # branch to system call + +ENTRY(compat_sys_fstatfs_wrapper) + llgfr %r2,%r2 # unsigned int + llgtr %r3,%r3 # struct compat_statfs * + jg compat_sys_fstatfs # branch to system call + +ENTRY(compat_sys_socketcall_wrapper) + lgfr %r2,%r2 # int + llgtr %r3,%r3 # u32 * + jg compat_sys_socketcall # branch to system call + +ENTRY(sys32_syslog_wrapper) + lgfr %r2,%r2 # int + llgtr %r3,%r3 # char * + lgfr %r4,%r4 # int + jg sys_syslog # branch to system call + +ENTRY(compat_sys_setitimer_wrapper) + lgfr %r2,%r2 # int + llgtr %r3,%r3 # struct itimerval_emu31 * + llgtr %r4,%r4 # struct itimerval_emu31 * + jg compat_sys_setitimer # branch to system call + +ENTRY(compat_sys_getitimer_wrapper) + lgfr %r2,%r2 # int + llgtr %r3,%r3 # struct itimerval_emu31 * + jg compat_sys_getitimer # branch to system call + +ENTRY(compat_sys_newstat_wrapper) + llgtr %r2,%r2 # char * + llgtr %r3,%r3 # struct stat_emu31 * + jg compat_sys_newstat # branch to system call + +ENTRY(compat_sys_newlstat_wrapper) + llgtr %r2,%r2 # char * + llgtr %r3,%r3 # struct stat_emu31 * + jg compat_sys_newlstat # branch to system call + +ENTRY(compat_sys_newfstat_wrapper) + llgfr %r2,%r2 # unsigned int + llgtr %r3,%r3 # struct stat_emu31 * + jg compat_sys_newfstat # branch to system call + +#sys32_vhangup_wrapper # void + +ENTRY(compat_sys_wait4_wrapper) + lgfr %r2,%r2 # pid_t + llgtr %r3,%r3 # unsigned int * + lgfr %r4,%r4 # int + llgtr %r5,%r5 # struct rusage * + jg compat_sys_wait4 # branch to system call + +ENTRY(sys32_swapoff_wrapper) + llgtr %r2,%r2 # const char * + jg sys_swapoff # branch to system call + +ENTRY(compat_sys_sysinfo_wrapper) + llgtr %r2,%r2 # struct sysinfo_emu31 * + jg compat_sys_sysinfo # branch to system call + +ENTRY(sys32_ipc_wrapper) + llgfr %r2,%r2 # uint + lgfr %r3,%r3 # int + lgfr %r4,%r4 # int + lgfr %r5,%r5 # int + llgfr %r6,%r6 # u32 + jg sys32_ipc # branch to system call + +ENTRY(sys32_fsync_wrapper) + llgfr %r2,%r2 # unsigned int + jg sys_fsync # branch to system call + +#sys32_sigreturn_wrapper # done in sigreturn_glue + +#sys32_clone_wrapper # done in clone_glue + +ENTRY(sys32_setdomainname_wrapper) + llgtr %r2,%r2 # char * + lgfr %r3,%r3 # int + jg sys_setdomainname # branch to system call + +ENTRY(sys32_newuname_wrapper) + llgtr %r2,%r2 # struct new_utsname * + jg sys_newuname # branch to system call + +ENTRY(compat_sys_adjtimex_wrapper) + llgtr %r2,%r2 # struct compat_timex * + jg compat_sys_adjtimex # branch to system call + +ENTRY(sys32_mprotect_wrapper) + llgtr %r2,%r2 # unsigned long (actually pointer + llgfr %r3,%r3 # size_t + llgfr %r4,%r4 # unsigned long + jg sys_mprotect # branch to system call + +ENTRY(compat_sys_sigprocmask_wrapper) + lgfr %r2,%r2 # int + llgtr %r3,%r3 # compat_old_sigset_t * + llgtr %r4,%r4 # compat_old_sigset_t * + jg compat_sys_sigprocmask # branch to system call + +ENTRY(sys_init_module_wrapper) + llgtr %r2,%r2 # void * + llgfr %r3,%r3 # unsigned long + llgtr %r4,%r4 # char * + jg sys_init_module # branch to system call + +ENTRY(sys_delete_module_wrapper) + llgtr %r2,%r2 # const char * + llgfr %r3,%r3 # unsigned int + jg sys_delete_module # branch to system call + +ENTRY(sys32_quotactl_wrapper) + llgfr %r2,%r2 # unsigned int + llgtr %r3,%r3 # const char * + llgfr %r4,%r4 # qid_t + llgtr %r5,%r5 # caddr_t + jg sys_quotactl # branch to system call + +ENTRY(sys32_getpgid_wrapper) + lgfr %r2,%r2 # pid_t + jg sys_getpgid # branch to system call + +ENTRY(sys32_fchdir_wrapper) + llgfr %r2,%r2 # unsigned int + jg sys_fchdir # branch to system call + +ENTRY(sys32_bdflush_wrapper) + lgfr %r2,%r2 # int + lgfr %r3,%r3 # long + jg sys_bdflush # branch to system call + +ENTRY(sys32_sysfs_wrapper) + lgfr %r2,%r2 # int + llgfr %r3,%r3 # unsigned long + llgfr %r4,%r4 # unsigned long + jg sys_sysfs # branch to system call + +ENTRY(sys32_personality_wrapper) + llgfr %r2,%r2 # unsigned int + jg sys_s390_personality # branch to system call + +ENTRY(sys32_setfsuid16_wrapper) + llgfr %r2,%r2 # __kernel_old_uid_emu31_t + jg sys32_setfsuid16 # branch to system call + +ENTRY(sys32_setfsgid16_wrapper) + llgfr %r2,%r2 # __kernel_old_gid_emu31_t + jg sys32_setfsgid16 # branch to system call + +ENTRY(sys32_llseek_wrapper) + llgfr %r2,%r2 # unsigned int + llgfr %r3,%r3 # unsigned long + llgfr %r4,%r4 # unsigned long + llgtr %r5,%r5 # loff_t * + llgfr %r6,%r6 # unsigned int + jg sys_llseek # branch to system call + +ENTRY(sys32_getdents_wrapper) + llgfr %r2,%r2 # unsigned int + llgtr %r3,%r3 # void * + llgfr %r4,%r4 # unsigned int + jg compat_sys_getdents # branch to system call + +ENTRY(compat_sys_select_wrapper) + lgfr %r2,%r2 # int + llgtr %r3,%r3 # compat_fd_set * + llgtr %r4,%r4 # compat_fd_set * + llgtr %r5,%r5 # compat_fd_set * + llgtr %r6,%r6 # struct compat_timeval * + jg compat_sys_select # branch to system call + +ENTRY(sys32_flock_wrapper) + llgfr %r2,%r2 # unsigned int + llgfr %r3,%r3 # unsigned int + jg sys_flock # branch to system call + +ENTRY(sys32_msync_wrapper) + llgfr %r2,%r2 # unsigned long + llgfr %r3,%r3 # size_t + lgfr %r4,%r4 # int + jg sys_msync # branch to system call + +ENTRY(compat_sys_readv_wrapper) + lgfr %r2,%r2 # int + llgtr %r3,%r3 # const struct compat_iovec * + llgfr %r4,%r4 # unsigned long + jg compat_sys_readv # branch to system call + +ENTRY(compat_sys_writev_wrapper) + lgfr %r2,%r2 # int + llgtr %r3,%r3 # const struct compat_iovec * + llgfr %r4,%r4 # unsigned long + jg compat_sys_writev # branch to system call + +ENTRY(sys32_getsid_wrapper) + lgfr %r2,%r2 # pid_t + jg sys_getsid # branch to system call + +ENTRY(sys32_fdatasync_wrapper) + llgfr %r2,%r2 # unsigned int + jg sys_fdatasync # branch to system call + +ENTRY(sys32_mlock_wrapper) + llgfr %r2,%r2 # unsigned long + llgfr %r3,%r3 # size_t + jg sys_mlock # branch to system call + +ENTRY(sys32_munlock_wrapper) + llgfr %r2,%r2 # unsigned long + llgfr %r3,%r3 # size_t + jg sys_munlock # branch to system call + +ENTRY(sys32_mlockall_wrapper) + lgfr %r2,%r2 # int + jg sys_mlockall # branch to system call + +#sys32_munlockall_wrapper # void + +ENTRY(sys32_sched_setparam_wrapper) + lgfr %r2,%r2 # pid_t + llgtr %r3,%r3 # struct sched_param * + jg sys_sched_setparam # branch to system call + +ENTRY(sys32_sched_getparam_wrapper) + lgfr %r2,%r2 # pid_t + llgtr %r3,%r3 # struct sched_param * + jg sys_sched_getparam # branch to system call + +ENTRY(sys32_sched_setscheduler_wrapper) + lgfr %r2,%r2 # pid_t + lgfr %r3,%r3 # int + llgtr %r4,%r4 # struct sched_param * + jg sys_sched_setscheduler # branch to system call + +ENTRY(sys32_sched_getscheduler_wrapper) + lgfr %r2,%r2 # pid_t + jg sys_sched_getscheduler # branch to system call + +#sys32_sched_yield_wrapper # void + +ENTRY(sys32_sched_get_priority_max_wrapper) + lgfr %r2,%r2 # int + jg sys_sched_get_priority_max # branch to system call + +ENTRY(sys32_sched_get_priority_min_wrapper) + lgfr %r2,%r2 # int + jg sys_sched_get_priority_min # branch to system call + +ENTRY(sys32_sched_rr_get_interval_wrapper) + lgfr %r2,%r2 # pid_t + llgtr %r3,%r3 # struct compat_timespec * + jg sys32_sched_rr_get_interval # branch to system call + +ENTRY(compat_sys_nanosleep_wrapper) + llgtr %r2,%r2 # struct compat_timespec * + llgtr %r3,%r3 # struct compat_timespec * + jg compat_sys_nanosleep # branch to system call + +ENTRY(sys32_mremap_wrapper) + llgfr %r2,%r2 # unsigned long + llgfr %r3,%r3 # unsigned long + llgfr %r4,%r4 # unsigned long + llgfr %r5,%r5 # unsigned long + llgfr %r6,%r6 # unsigned long + jg sys_mremap # branch to system call + +ENTRY(sys32_setresuid16_wrapper) + llgfr %r2,%r2 # __kernel_old_uid_emu31_t + llgfr %r3,%r3 # __kernel_old_uid_emu31_t + llgfr %r4,%r4 # __kernel_old_uid_emu31_t + jg sys32_setresuid16 # branch to system call + +ENTRY(sys32_getresuid16_wrapper) + llgtr %r2,%r2 # __kernel_old_uid_emu31_t * + llgtr %r3,%r3 # __kernel_old_uid_emu31_t * + llgtr %r4,%r4 # __kernel_old_uid_emu31_t * + jg sys32_getresuid16 # branch to system call + +ENTRY(sys32_poll_wrapper) + llgtr %r2,%r2 # struct pollfd * + llgfr %r3,%r3 # unsigned int + lgfr %r4,%r4 # int + jg sys_poll # branch to system call + +ENTRY(sys32_setresgid16_wrapper) + llgfr %r2,%r2 # __kernel_old_gid_emu31_t + llgfr %r3,%r3 # __kernel_old_gid_emu31_t + llgfr %r4,%r4 # __kernel_old_gid_emu31_t + jg sys32_setresgid16 # branch to system call + +ENTRY(sys32_getresgid16_wrapper) + llgtr %r2,%r2 # __kernel_old_gid_emu31_t * + llgtr %r3,%r3 # __kernel_old_gid_emu31_t * + llgtr %r4,%r4 # __kernel_old_gid_emu31_t * + jg sys32_getresgid16 # branch to system call + +ENTRY(sys32_prctl_wrapper) + lgfr %r2,%r2 # int + llgfr %r3,%r3 # unsigned long + llgfr %r4,%r4 # unsigned long + llgfr %r5,%r5 # unsigned long + llgfr %r6,%r6 # unsigned long + jg sys_prctl # branch to system call + +#sys32_rt_sigreturn_wrapper # done in rt_sigreturn_glue + +ENTRY(sys32_rt_sigaction_wrapper) + lgfr %r2,%r2 # int + llgtr %r3,%r3 # const struct sigaction_emu31 * + llgtr %r4,%r4 # const struct sigaction_emu31 * + llgfr %r5,%r5 # size_t + jg sys32_rt_sigaction # branch to system call + +ENTRY(sys32_rt_sigprocmask_wrapper) + lgfr %r2,%r2 # int + llgtr %r3,%r3 # old_sigset_emu31 * + llgtr %r4,%r4 # old_sigset_emu31 * + llgfr %r5,%r5 # size_t + jg sys32_rt_sigprocmask # branch to system call + +ENTRY(sys32_rt_sigpending_wrapper) + llgtr %r2,%r2 # sigset_emu31 * + llgfr %r3,%r3 # size_t + jg sys32_rt_sigpending # branch to system call + +ENTRY(compat_sys_rt_sigtimedwait_wrapper) + llgtr %r2,%r2 # const sigset_emu31_t * + llgtr %r3,%r3 # siginfo_emu31_t * + llgtr %r4,%r4 # const struct compat_timespec * + llgfr %r5,%r5 # size_t + jg compat_sys_rt_sigtimedwait # branch to system call + +ENTRY(sys32_rt_sigqueueinfo_wrapper) + lgfr %r2,%r2 # int + lgfr %r3,%r3 # int + llgtr %r4,%r4 # siginfo_emu31_t * + jg sys32_rt_sigqueueinfo # branch to system call + +ENTRY(compat_sys_rt_sigsuspend_wrapper) + llgtr %r2,%r2 # compat_sigset_t * + llgfr %r3,%r3 # compat_size_t + jg compat_sys_rt_sigsuspend + +ENTRY(sys32_pread64_wrapper) + llgfr %r2,%r2 # unsigned int + llgtr %r3,%r3 # char * + llgfr %r4,%r4 # size_t + llgfr %r5,%r5 # u32 + llgfr %r6,%r6 # u32 + jg sys32_pread64 # branch to system call + +ENTRY(sys32_pwrite64_wrapper) + llgfr %r2,%r2 # unsigned int + llgtr %r3,%r3 # const char * + llgfr %r4,%r4 # size_t + llgfr %r5,%r5 # u32 + llgfr %r6,%r6 # u32 + jg sys32_pwrite64 # branch to system call + +ENTRY(sys32_chown16_wrapper) + llgtr %r2,%r2 # const char * + llgfr %r3,%r3 # __kernel_old_uid_emu31_t + llgfr %r4,%r4 # __kernel_old_gid_emu31_t + jg sys32_chown16 # branch to system call + +ENTRY(sys32_getcwd_wrapper) + llgtr %r2,%r2 # char * + llgfr %r3,%r3 # unsigned long + jg sys_getcwd # branch to system call + +ENTRY(sys32_capget_wrapper) + llgtr %r2,%r2 # cap_user_header_t + llgtr %r3,%r3 # cap_user_data_t + jg sys_capget # branch to system call + +ENTRY(sys32_capset_wrapper) + llgtr %r2,%r2 # cap_user_header_t + llgtr %r3,%r3 # const cap_user_data_t + jg sys_capset # branch to system call + +ENTRY(sys32_sigaltstack_wrapper) + llgtr %r2,%r2 # const stack_emu31_t * + llgtr %r3,%r3 # stack_emu31_t * + jg sys32_sigaltstack + +ENTRY(sys32_sendfile_wrapper) + lgfr %r2,%r2 # int + lgfr %r3,%r3 # int + llgtr %r4,%r4 # __kernel_off_emu31_t * + llgfr %r5,%r5 # size_t + jg sys32_sendfile # branch to system call + +#sys32_vfork_wrapper # done in vfork_glue + +ENTRY(sys32_truncate64_wrapper) + llgtr %r2,%r2 # const char * + llgfr %r3,%r3 # unsigned long + llgfr %r4,%r4 # unsigned long + jg sys32_truncate64 # branch to system call + +ENTRY(sys32_ftruncate64_wrapper) + llgfr %r2,%r2 # unsigned int + llgfr %r3,%r3 # unsigned long + llgfr %r4,%r4 # unsigned long + jg sys32_ftruncate64 # branch to system call + +ENTRY(sys32_lchown_wrapper) + llgtr %r2,%r2 # const char * + llgfr %r3,%r3 # uid_t + llgfr %r4,%r4 # gid_t + jg sys_lchown # branch to system call + +#sys32_getuid_wrapper # void +#sys32_getgid_wrapper # void +#sys32_geteuid_wrapper # void +#sys32_getegid_wrapper # void + +ENTRY(sys32_setreuid_wrapper) + llgfr %r2,%r2 # uid_t + llgfr %r3,%r3 # uid_t + jg sys_setreuid # branch to system call + +ENTRY(sys32_setregid_wrapper) + llgfr %r2,%r2 # gid_t + llgfr %r3,%r3 # gid_t + jg sys_setregid # branch to system call + +ENTRY(sys32_getgroups_wrapper) + lgfr %r2,%r2 # int + llgtr %r3,%r3 # gid_t * + jg sys_getgroups # branch to system call + +ENTRY(sys32_setgroups_wrapper) + lgfr %r2,%r2 # int + llgtr %r3,%r3 # gid_t * + jg sys_setgroups # branch to system call + +ENTRY(sys32_fchown_wrapper) + llgfr %r2,%r2 # unsigned int + llgfr %r3,%r3 # uid_t + llgfr %r4,%r4 # gid_t + jg sys_fchown # branch to system call + +ENTRY(sys32_setresuid_wrapper) + llgfr %r2,%r2 # uid_t + llgfr %r3,%r3 # uid_t + llgfr %r4,%r4 # uid_t + jg sys_setresuid # branch to system call + +ENTRY(sys32_getresuid_wrapper) + llgtr %r2,%r2 # uid_t * + llgtr %r3,%r3 # uid_t * + llgtr %r4,%r4 # uid_t * + jg sys_getresuid # branch to system call + +ENTRY(sys32_setresgid_wrapper) + llgfr %r2,%r2 # gid_t + llgfr %r3,%r3 # gid_t + llgfr %r4,%r4 # gid_t + jg sys_setresgid # branch to system call + +ENTRY(sys32_getresgid_wrapper) + llgtr %r2,%r2 # gid_t * + llgtr %r3,%r3 # gid_t * + llgtr %r4,%r4 # gid_t * + jg sys_getresgid # branch to system call + +ENTRY(sys32_chown_wrapper) + llgtr %r2,%r2 # const char * + llgfr %r3,%r3 # uid_t + llgfr %r4,%r4 # gid_t + jg sys_chown # branch to system call + +ENTRY(sys32_setuid_wrapper) + llgfr %r2,%r2 # uid_t + jg sys_setuid # branch to system call + +ENTRY(sys32_setgid_wrapper) + llgfr %r2,%r2 # gid_t + jg sys_setgid # branch to system call + +ENTRY(sys32_setfsuid_wrapper) + llgfr %r2,%r2 # uid_t + jg sys_setfsuid # branch to system call + +ENTRY(sys32_setfsgid_wrapper) + llgfr %r2,%r2 # gid_t + jg sys_setfsgid # branch to system call + +ENTRY(sys32_pivot_root_wrapper) + llgtr %r2,%r2 # const char * + llgtr %r3,%r3 # const char * + jg sys_pivot_root # branch to system call + +ENTRY(sys32_mincore_wrapper) + llgfr %r2,%r2 # unsigned long + llgfr %r3,%r3 # size_t + llgtr %r4,%r4 # unsigned char * + jg sys_mincore # branch to system call + +ENTRY(sys32_madvise_wrapper) + llgfr %r2,%r2 # unsigned long + llgfr %r3,%r3 # size_t + lgfr %r4,%r4 # int + jg sys_madvise # branch to system call + +ENTRY(sys32_getdents64_wrapper) + llgfr %r2,%r2 # unsigned int + llgtr %r3,%r3 # void * + llgfr %r4,%r4 # unsigned int + jg sys_getdents64 # branch to system call + +ENTRY(compat_sys_fcntl64_wrapper) + llgfr %r2,%r2 # unsigned int + llgfr %r3,%r3 # unsigned int + llgfr %r4,%r4 # unsigned long + jg compat_sys_fcntl64 # branch to system call + +ENTRY(sys32_stat64_wrapper) + llgtr %r2,%r2 # char * + llgtr %r3,%r3 # struct stat64 * + jg sys32_stat64 # branch to system call + +ENTRY(sys32_lstat64_wrapper) + llgtr %r2,%r2 # char * + llgtr %r3,%r3 # struct stat64 * + jg sys32_lstat64 # branch to system call + +ENTRY(sys32_stime_wrapper) + llgtr %r2,%r2 # long * + jg compat_sys_stime # branch to system call + +ENTRY(sys32_sysctl_wrapper) + llgtr %r2,%r2 # struct compat_sysctl_args * + jg compat_sys_sysctl + +ENTRY(sys32_fstat64_wrapper) + llgfr %r2,%r2 # unsigned long + llgtr %r3,%r3 # struct stat64 * + jg sys32_fstat64 # branch to system call + +ENTRY(compat_sys_futex_wrapper) + llgtr %r2,%r2 # u32 * + lgfr %r3,%r3 # int + lgfr %r4,%r4 # int + llgtr %r5,%r5 # struct compat_timespec * + llgtr %r6,%r6 # u32 * + lgf %r0,164(%r15) # int + stg %r0,160(%r15) + jg compat_sys_futex # branch to system call + +ENTRY(sys32_setxattr_wrapper) + llgtr %r2,%r2 # char * + llgtr %r3,%r3 # char * + llgtr %r4,%r4 # void * + llgfr %r5,%r5 # size_t + lgfr %r6,%r6 # int + jg sys_setxattr + +ENTRY(sys32_lsetxattr_wrapper) + llgtr %r2,%r2 # char * + llgtr %r3,%r3 # char * + llgtr %r4,%r4 # void * + llgfr %r5,%r5 # size_t + lgfr %r6,%r6 # int + jg sys_lsetxattr + +ENTRY(sys32_fsetxattr_wrapper) + lgfr %r2,%r2 # int + llgtr %r3,%r3 # char * + llgtr %r4,%r4 # void * + llgfr %r5,%r5 # size_t + lgfr %r6,%r6 # int + jg sys_fsetxattr + +ENTRY(sys32_getxattr_wrapper) + llgtr %r2,%r2 # char * + llgtr %r3,%r3 # char * + llgtr %r4,%r4 # void * + llgfr %r5,%r5 # size_t + jg sys_getxattr + +ENTRY(sys32_lgetxattr_wrapper) + llgtr %r2,%r2 # char * + llgtr %r3,%r3 # char * + llgtr %r4,%r4 # void * + llgfr %r5,%r5 # size_t + jg sys_lgetxattr + +ENTRY(sys32_fgetxattr_wrapper) + lgfr %r2,%r2 # int + llgtr %r3,%r3 # char * + llgtr %r4,%r4 # void * + llgfr %r5,%r5 # size_t + jg sys_fgetxattr + +ENTRY(sys32_listxattr_wrapper) + llgtr %r2,%r2 # char * + llgtr %r3,%r3 # char * + llgfr %r4,%r4 # size_t + jg sys_listxattr + +ENTRY(sys32_llistxattr_wrapper) + llgtr %r2,%r2 # char * + llgtr %r3,%r3 # char * + llgfr %r4,%r4 # size_t + jg sys_llistxattr + +ENTRY(sys32_flistxattr_wrapper) + lgfr %r2,%r2 # int + llgtr %r3,%r3 # char * + llgfr %r4,%r4 # size_t + jg sys_flistxattr + +ENTRY(sys32_removexattr_wrapper) + llgtr %r2,%r2 # char * + llgtr %r3,%r3 # char * + jg sys_removexattr + +ENTRY(sys32_lremovexattr_wrapper) + llgtr %r2,%r2 # char * + llgtr %r3,%r3 # char * + jg sys_lremovexattr + +ENTRY(sys32_fremovexattr_wrapper) + lgfr %r2,%r2 # int + llgtr %r3,%r3 # char * + jg sys_fremovexattr + +ENTRY(sys32_sched_setaffinity_wrapper) + lgfr %r2,%r2 # int + llgfr %r3,%r3 # unsigned int + llgtr %r4,%r4 # unsigned long * + jg compat_sys_sched_setaffinity + +ENTRY(sys32_sched_getaffinity_wrapper) + lgfr %r2,%r2 # int + llgfr %r3,%r3 # unsigned int + llgtr %r4,%r4 # unsigned long * + jg compat_sys_sched_getaffinity + +ENTRY(sys32_exit_group_wrapper) + lgfr %r2,%r2 # int + jg sys_exit_group # branch to system call + +ENTRY(sys32_set_tid_address_wrapper) + llgtr %r2,%r2 # int * + jg sys_set_tid_address # branch to system call + +ENTRY(sys_epoll_create_wrapper) + lgfr %r2,%r2 # int + jg sys_epoll_create # branch to system call + +ENTRY(sys_epoll_ctl_wrapper) + lgfr %r2,%r2 # int + lgfr %r3,%r3 # int + lgfr %r4,%r4 # int + llgtr %r5,%r5 # struct epoll_event * + jg sys_epoll_ctl # branch to system call + +ENTRY(sys_epoll_wait_wrapper) + lgfr %r2,%r2 # int + llgtr %r3,%r3 # struct epoll_event * + lgfr %r4,%r4 # int + lgfr %r5,%r5 # int + jg sys_epoll_wait # branch to system call + +ENTRY(sys32_lookup_dcookie_wrapper) + sllg %r2,%r2,32 # get high word of 64bit dcookie + or %r2,%r3 # get low word of 64bit dcookie + llgtr %r3,%r4 # char * + llgfr %r4,%r5 # size_t + jg sys_lookup_dcookie + +ENTRY(sys32_fadvise64_wrapper) + lgfr %r2,%r2 # int + sllg %r3,%r3,32 # get high word of 64bit loff_t + or %r3,%r4 # get low word of 64bit loff_t + llgfr %r4,%r5 # size_t (unsigned long) + lgfr %r5,%r6 # int + jg sys32_fadvise64 + +ENTRY(sys32_fadvise64_64_wrapper) + llgtr %r2,%r2 # struct fadvise64_64_args * + jg sys32_fadvise64_64 + +ENTRY(sys32_clock_settime_wrapper) + lgfr %r2,%r2 # clockid_t (int) + llgtr %r3,%r3 # struct compat_timespec * + jg compat_sys_clock_settime + +ENTRY(sys32_clock_gettime_wrapper) + lgfr %r2,%r2 # clockid_t (int) + llgtr %r3,%r3 # struct compat_timespec * + jg compat_sys_clock_gettime + +ENTRY(sys32_clock_getres_wrapper) + lgfr %r2,%r2 # clockid_t (int) + llgtr %r3,%r3 # struct compat_timespec * + jg compat_sys_clock_getres + +ENTRY(sys32_clock_nanosleep_wrapper) + lgfr %r2,%r2 # clockid_t (int) + lgfr %r3,%r3 # int + llgtr %r4,%r4 # struct compat_timespec * + llgtr %r5,%r5 # struct compat_timespec * + jg compat_sys_clock_nanosleep + +ENTRY(sys32_timer_create_wrapper) + lgfr %r2,%r2 # timer_t (int) + llgtr %r3,%r3 # struct compat_sigevent * + llgtr %r4,%r4 # timer_t * + jg compat_sys_timer_create + +ENTRY(sys32_timer_settime_wrapper) + lgfr %r2,%r2 # timer_t (int) + lgfr %r3,%r3 # int + llgtr %r4,%r4 # struct compat_itimerspec * + llgtr %r5,%r5 # struct compat_itimerspec * + jg compat_sys_timer_settime + +ENTRY(sys32_timer_gettime_wrapper) + lgfr %r2,%r2 # timer_t (int) + llgtr %r3,%r3 # struct compat_itimerspec * + jg compat_sys_timer_gettime + +ENTRY(sys32_timer_getoverrun_wrapper) + lgfr %r2,%r2 # timer_t (int) + jg sys_timer_getoverrun + +ENTRY(sys32_timer_delete_wrapper) + lgfr %r2,%r2 # timer_t (int) + jg sys_timer_delete + +ENTRY(sys32_io_setup_wrapper) + llgfr %r2,%r2 # unsigned int + llgtr %r3,%r3 # u32 * + jg compat_sys_io_setup + +ENTRY(sys32_io_destroy_wrapper) + llgfr %r2,%r2 # (aio_context_t) u32 + jg sys_io_destroy + +ENTRY(sys32_io_getevents_wrapper) + llgfr %r2,%r2 # (aio_context_t) u32 + lgfr %r3,%r3 # long + lgfr %r4,%r4 # long + llgtr %r5,%r5 # struct io_event * + llgtr %r6,%r6 # struct compat_timespec * + jg compat_sys_io_getevents + +ENTRY(sys32_io_submit_wrapper) + llgfr %r2,%r2 # (aio_context_t) u32 + lgfr %r3,%r3 # long + llgtr %r4,%r4 # struct iocb ** + jg compat_sys_io_submit + +ENTRY(sys32_io_cancel_wrapper) + llgfr %r2,%r2 # (aio_context_t) u32 + llgtr %r3,%r3 # struct iocb * + llgtr %r4,%r4 # struct io_event * + jg sys_io_cancel + +ENTRY(compat_sys_statfs64_wrapper) + llgtr %r2,%r2 # const char * + llgfr %r3,%r3 # compat_size_t + llgtr %r4,%r4 # struct compat_statfs64 * + jg compat_sys_statfs64 + +ENTRY(compat_sys_fstatfs64_wrapper) + llgfr %r2,%r2 # unsigned int fd + llgfr %r3,%r3 # compat_size_t + llgtr %r4,%r4 # struct compat_statfs64 * + jg compat_sys_fstatfs64 + +ENTRY(compat_sys_mq_open_wrapper) + llgtr %r2,%r2 # const char * + lgfr %r3,%r3 # int + llgfr %r4,%r4 # mode_t + llgtr %r5,%r5 # struct compat_mq_attr * + jg compat_sys_mq_open + +ENTRY(sys32_mq_unlink_wrapper) + llgtr %r2,%r2 # const char * + jg sys_mq_unlink + +ENTRY(compat_sys_mq_timedsend_wrapper) + lgfr %r2,%r2 # mqd_t + llgtr %r3,%r3 # const char * + llgfr %r4,%r4 # size_t + llgfr %r5,%r5 # unsigned int + llgtr %r6,%r6 # const struct compat_timespec * + jg compat_sys_mq_timedsend + +ENTRY(compat_sys_mq_timedreceive_wrapper) + lgfr %r2,%r2 # mqd_t + llgtr %r3,%r3 # char * + llgfr %r4,%r4 # size_t + llgtr %r5,%r5 # unsigned int * + llgtr %r6,%r6 # const struct compat_timespec * + jg compat_sys_mq_timedreceive + +ENTRY(compat_sys_mq_notify_wrapper) + lgfr %r2,%r2 # mqd_t + llgtr %r3,%r3 # struct compat_sigevent * + jg compat_sys_mq_notify + +ENTRY(compat_sys_mq_getsetattr_wrapper) + lgfr %r2,%r2 # mqd_t + llgtr %r3,%r3 # struct compat_mq_attr * + llgtr %r4,%r4 # struct compat_mq_attr * + jg compat_sys_mq_getsetattr + +ENTRY(compat_sys_add_key_wrapper) + llgtr %r2,%r2 # const char * + llgtr %r3,%r3 # const char * + llgtr %r4,%r4 # const void * + llgfr %r5,%r5 # size_t + llgfr %r6,%r6 # (key_serial_t) u32 + jg sys_add_key + +ENTRY(compat_sys_request_key_wrapper) + llgtr %r2,%r2 # const char * + llgtr %r3,%r3 # const char * + llgtr %r4,%r4 # const void * + llgfr %r5,%r5 # (key_serial_t) u32 + jg sys_request_key + +ENTRY(sys32_remap_file_pages_wrapper) + llgfr %r2,%r2 # unsigned long + llgfr %r3,%r3 # unsigned long + llgfr %r4,%r4 # unsigned long + llgfr %r5,%r5 # unsigned long + llgfr %r6,%r6 # unsigned long + jg sys_remap_file_pages + +ENTRY(compat_sys_waitid_wrapper) + lgfr %r2,%r2 # int + lgfr %r3,%r3 # pid_t + llgtr %r4,%r4 # siginfo_emu31_t * + lgfr %r5,%r5 # int + llgtr %r6,%r6 # struct rusage_emu31 * + jg compat_sys_waitid + +ENTRY(compat_sys_kexec_load_wrapper) + llgfr %r2,%r2 # unsigned long + llgfr %r3,%r3 # unsigned long + llgtr %r4,%r4 # struct kexec_segment * + llgfr %r5,%r5 # unsigned long + jg compat_sys_kexec_load + +ENTRY(sys_ioprio_set_wrapper) + lgfr %r2,%r2 # int + lgfr %r3,%r3 # int + lgfr %r4,%r4 # int + jg sys_ioprio_set + +ENTRY(sys_ioprio_get_wrapper) + lgfr %r2,%r2 # int + lgfr %r3,%r3 # int + jg sys_ioprio_get + +ENTRY(sys_inotify_add_watch_wrapper) + lgfr %r2,%r2 # int + llgtr %r3,%r3 # const char * + llgfr %r4,%r4 # u32 + jg sys_inotify_add_watch + +ENTRY(sys_inotify_rm_watch_wrapper) + lgfr %r2,%r2 # int + llgfr %r3,%r3 # u32 + jg sys_inotify_rm_watch + +ENTRY(compat_sys_openat_wrapper) + llgfr %r2,%r2 # unsigned int + llgtr %r3,%r3 # const char * + lgfr %r4,%r4 # int + lgfr %r5,%r5 # int + jg compat_sys_openat + +ENTRY(sys_mkdirat_wrapper) + lgfr %r2,%r2 # int + llgtr %r3,%r3 # const char * + lgfr %r4,%r4 # int + jg sys_mkdirat + +ENTRY(sys_mknodat_wrapper) + lgfr %r2,%r2 # int + llgtr %r3,%r3 # const char * + lgfr %r4,%r4 # int + llgfr %r5,%r5 # unsigned int + jg sys_mknodat + +ENTRY(sys_fchownat_wrapper) + lgfr %r2,%r2 # int + llgtr %r3,%r3 # const char * + llgfr %r4,%r4 # uid_t + llgfr %r5,%r5 # gid_t + lgfr %r6,%r6 # int + jg sys_fchownat + +ENTRY(compat_sys_futimesat_wrapper) + llgfr %r2,%r2 # unsigned int + llgtr %r3,%r3 # char * + llgtr %r4,%r4 # struct timeval * + jg compat_sys_futimesat + +ENTRY(sys32_fstatat64_wrapper) + llgfr %r2,%r2 # unsigned int + llgtr %r3,%r3 # char * + llgtr %r4,%r4 # struct stat64 * + lgfr %r5,%r5 # int + jg sys32_fstatat64 + +ENTRY(sys_unlinkat_wrapper) + lgfr %r2,%r2 # int + llgtr %r3,%r3 # const char * + lgfr %r4,%r4 # int + jg sys_unlinkat + +ENTRY(sys_renameat_wrapper) + lgfr %r2,%r2 # int + llgtr %r3,%r3 # const char * + lgfr %r4,%r4 # int + llgtr %r5,%r5 # const char * + jg sys_renameat + +ENTRY(sys_linkat_wrapper) + lgfr %r2,%r2 # int + llgtr %r3,%r3 # const char * + lgfr %r4,%r4 # int + llgtr %r5,%r5 # const char * + lgfr %r6,%r6 # int + jg sys_linkat + +ENTRY(sys_symlinkat_wrapper) + llgtr %r2,%r2 # const char * + lgfr %r3,%r3 # int + llgtr %r4,%r4 # const char * + jg sys_symlinkat + +ENTRY(sys_readlinkat_wrapper) + lgfr %r2,%r2 # int + llgtr %r3,%r3 # const char * + llgtr %r4,%r4 # char * + lgfr %r5,%r5 # int + jg sys_readlinkat + +ENTRY(sys_fchmodat_wrapper) + lgfr %r2,%r2 # int + llgtr %r3,%r3 # const char * + llgfr %r4,%r4 # mode_t + jg sys_fchmodat + +ENTRY(sys_faccessat_wrapper) + lgfr %r2,%r2 # int + llgtr %r3,%r3 # const char * + lgfr %r4,%r4 # int + jg sys_faccessat + +ENTRY(compat_sys_pselect6_wrapper) + lgfr %r2,%r2 # int + llgtr %r3,%r3 # fd_set * + llgtr %r4,%r4 # fd_set * + llgtr %r5,%r5 # fd_set * + llgtr %r6,%r6 # struct timespec * + llgt %r0,164(%r15) # void * + stg %r0,160(%r15) + jg compat_sys_pselect6 + +ENTRY(compat_sys_ppoll_wrapper) + llgtr %r2,%r2 # struct pollfd * + llgfr %r3,%r3 # unsigned int + llgtr %r4,%r4 # struct timespec * + llgtr %r5,%r5 # const sigset_t * + llgfr %r6,%r6 # size_t + jg compat_sys_ppoll + +ENTRY(sys_unshare_wrapper) + llgfr %r2,%r2 # unsigned long + jg sys_unshare + +ENTRY(compat_sys_set_robust_list_wrapper) + llgtr %r2,%r2 # struct compat_robust_list_head * + llgfr %r3,%r3 # size_t + jg compat_sys_set_robust_list + +ENTRY(compat_sys_get_robust_list_wrapper) + lgfr %r2,%r2 # int + llgtr %r3,%r3 # compat_uptr_t_t * + llgtr %r4,%r4 # compat_size_t * + jg compat_sys_get_robust_list + +ENTRY(sys_splice_wrapper) + lgfr %r2,%r2 # int + llgtr %r3,%r3 # loff_t * + lgfr %r4,%r4 # int + llgtr %r5,%r5 # loff_t * + llgfr %r6,%r6 # size_t + llgf %r0,164(%r15) # unsigned int + stg %r0,160(%r15) + jg sys_splice + +ENTRY(sys_sync_file_range_wrapper) + lgfr %r2,%r2 # int + sllg %r3,%r3,32 # get high word of 64bit loff_t + or %r3,%r4 # get low word of 64bit loff_t + sllg %r4,%r5,32 # get high word of 64bit loff_t + or %r4,%r6 # get low word of 64bit loff_t + llgf %r5,164(%r15) # unsigned int + jg sys_sync_file_range + +ENTRY(sys_tee_wrapper) + lgfr %r2,%r2 # int + lgfr %r3,%r3 # int + llgfr %r4,%r4 # size_t + llgfr %r5,%r5 # unsigned int + jg sys_tee + +ENTRY(compat_sys_vmsplice_wrapper) + lgfr %r2,%r2 # int + llgtr %r3,%r3 # compat_iovec * + llgfr %r4,%r4 # unsigned int + llgfr %r5,%r5 # unsigned int + jg compat_sys_vmsplice + +ENTRY(sys_getcpu_wrapper) + llgtr %r2,%r2 # unsigned * + llgtr %r3,%r3 # unsigned * + llgtr %r4,%r4 # struct getcpu_cache * + jg sys_getcpu + +ENTRY(compat_sys_epoll_pwait_wrapper) + lgfr %r2,%r2 # int + llgtr %r3,%r3 # struct compat_epoll_event * + lgfr %r4,%r4 # int + lgfr %r5,%r5 # int + llgtr %r6,%r6 # compat_sigset_t * + llgf %r0,164(%r15) # compat_size_t + stg %r0,160(%r15) + jg compat_sys_epoll_pwait + +ENTRY(compat_sys_utimes_wrapper) + llgtr %r2,%r2 # char * + llgtr %r3,%r3 # struct compat_timeval * + jg compat_sys_utimes + +ENTRY(compat_sys_utimensat_wrapper) + llgfr %r2,%r2 # unsigned int + llgtr %r3,%r3 # char * + llgtr %r4,%r4 # struct compat_timespec * + lgfr %r5,%r5 # int + jg compat_sys_utimensat + +ENTRY(compat_sys_signalfd_wrapper) + lgfr %r2,%r2 # int + llgtr %r3,%r3 # compat_sigset_t * + llgfr %r4,%r4 # compat_size_t + jg compat_sys_signalfd + +ENTRY(sys_eventfd_wrapper) + llgfr %r2,%r2 # unsigned int + jg sys_eventfd + +ENTRY(sys_fallocate_wrapper) + lgfr %r2,%r2 # int + lgfr %r3,%r3 # int + sllg %r4,%r4,32 # get high word of 64bit loff_t + lr %r4,%r5 # get low word of 64bit loff_t + sllg %r5,%r6,32 # get high word of 64bit loff_t + l %r5,164(%r15) # get low word of 64bit loff_t + jg sys_fallocate + +ENTRY(sys_timerfd_create_wrapper) + lgfr %r2,%r2 # int + lgfr %r3,%r3 # int + jg sys_timerfd_create + +ENTRY(compat_sys_timerfd_settime_wrapper) + lgfr %r2,%r2 # int + lgfr %r3,%r3 # int + llgtr %r4,%r4 # struct compat_itimerspec * + llgtr %r5,%r5 # struct compat_itimerspec * + jg compat_sys_timerfd_settime + +ENTRY(compat_sys_timerfd_gettime_wrapper) + lgfr %r2,%r2 # int + llgtr %r3,%r3 # struct compat_itimerspec * + jg compat_sys_timerfd_gettime + +ENTRY(compat_sys_signalfd4_wrapper) + lgfr %r2,%r2 # int + llgtr %r3,%r3 # compat_sigset_t * + llgfr %r4,%r4 # compat_size_t + lgfr %r5,%r5 # int + jg compat_sys_signalfd4 + +ENTRY(sys_eventfd2_wrapper) + llgfr %r2,%r2 # unsigned int + lgfr %r3,%r3 # int + jg sys_eventfd2 + +ENTRY(sys_inotify_init1_wrapper) + lgfr %r2,%r2 # int + jg sys_inotify_init1 + +ENTRY(sys_pipe2_wrapper) + llgtr %r2,%r2 # u32 * + lgfr %r3,%r3 # int + jg sys_pipe2 # branch to system call + +ENTRY(sys_dup3_wrapper) + llgfr %r2,%r2 # unsigned int + llgfr %r3,%r3 # unsigned int + lgfr %r4,%r4 # int + jg sys_dup3 # branch to system call + +ENTRY(sys_epoll_create1_wrapper) + lgfr %r2,%r2 # int + jg sys_epoll_create1 # branch to system call + +ENTRY(sys32_readahead_wrapper) + lgfr %r2,%r2 # int + llgfr %r3,%r3 # u32 + llgfr %r4,%r4 # u32 + lgfr %r5,%r5 # s32 + jg sys32_readahead # branch to system call + +ENTRY(sys32_sendfile64_wrapper) + lgfr %r2,%r2 # int + lgfr %r3,%r3 # int + llgtr %r4,%r4 # compat_loff_t * + lgfr %r5,%r5 # s32 + jg sys32_sendfile64 # branch to system call + +ENTRY(sys_tkill_wrapper) + lgfr %r2,%r2 # pid_t + lgfr %r3,%r3 # int + jg sys_tkill # branch to system call + +ENTRY(sys_tgkill_wrapper) + lgfr %r2,%r2 # pid_t + lgfr %r3,%r3 # pid_t + lgfr %r4,%r4 # int + jg sys_tgkill # branch to system call + +ENTRY(compat_sys_keyctl_wrapper) + llgfr %r2,%r2 # u32 + llgfr %r3,%r3 # u32 + llgfr %r4,%r4 # u32 + llgfr %r5,%r5 # u32 + llgfr %r6,%r6 # u32 + jg compat_sys_keyctl # branch to system call + +ENTRY(compat_sys_preadv_wrapper) + llgfr %r2,%r2 # unsigned long + llgtr %r3,%r3 # compat_iovec * + llgfr %r4,%r4 # unsigned long + llgfr %r5,%r5 # u32 + llgfr %r6,%r6 # u32 + jg compat_sys_preadv # branch to system call + +ENTRY(compat_sys_pwritev_wrapper) + llgfr %r2,%r2 # unsigned long + llgtr %r3,%r3 # compat_iovec * + llgfr %r4,%r4 # unsigned long + llgfr %r5,%r5 # u32 + llgfr %r6,%r6 # u32 + jg compat_sys_pwritev # branch to system call + +ENTRY(compat_sys_rt_tgsigqueueinfo_wrapper) + lgfr %r2,%r2 # compat_pid_t + lgfr %r3,%r3 # compat_pid_t + lgfr %r4,%r4 # int + llgtr %r5,%r5 # struct compat_siginfo * + jg compat_sys_rt_tgsigqueueinfo_wrapper # branch to system call + +ENTRY(sys_perf_event_open_wrapper) + llgtr %r2,%r2 # const struct perf_event_attr * + lgfr %r3,%r3 # pid_t + lgfr %r4,%r4 # int + lgfr %r5,%r5 # int + llgfr %r6,%r6 # unsigned long + jg sys_perf_event_open # branch to system call + +ENTRY(sys_clone_wrapper) + llgfr %r2,%r2 # unsigned long + llgfr %r3,%r3 # unsigned long + llgtr %r4,%r4 # int * + llgtr %r5,%r5 # int * + jg sys_clone # branch to system call + +ENTRY(sys32_execve_wrapper) + llgtr %r2,%r2 # char * + llgtr %r3,%r3 # compat_uptr_t * + llgtr %r4,%r4 # compat_uptr_t * + jg sys32_execve # branch to system call + +ENTRY(sys_fanotify_init_wrapper) + llgfr %r2,%r2 # unsigned int + llgfr %r3,%r3 # unsigned int + jg sys_fanotify_init # branch to system call + +ENTRY(sys_fanotify_mark_wrapper) + lgfr %r2,%r2 # int + llgfr %r3,%r3 # unsigned int + sllg %r4,%r4,32 # get high word of 64bit mask + lr %r4,%r5 # get low word of 64bit mask + llgfr %r5,%r6 # unsigned int + llgt %r6,164(%r15) # char * + jg sys_fanotify_mark # branch to system call + +ENTRY(sys_prlimit64_wrapper) + lgfr %r2,%r2 # pid_t + llgfr %r3,%r3 # unsigned int + llgtr %r4,%r4 # const struct rlimit64 __user * + llgtr %r5,%r5 # struct rlimit64 __user * + jg sys_prlimit64 # branch to system call + +ENTRY(sys_name_to_handle_at_wrapper) + lgfr %r2,%r2 # int + llgtr %r3,%r3 # const char __user * + llgtr %r4,%r4 # struct file_handle __user * + llgtr %r5,%r5 # int __user * + lgfr %r6,%r6 # int + jg sys_name_to_handle_at + +ENTRY(compat_sys_open_by_handle_at_wrapper) + lgfr %r2,%r2 # int + llgtr %r3,%r3 # struct file_handle __user * + lgfr %r4,%r4 # int + jg compat_sys_open_by_handle_at + +ENTRY(compat_sys_clock_adjtime_wrapper) + lgfr %r2,%r2 # clockid_t (int) + llgtr %r3,%r3 # struct compat_timex __user * + jg compat_sys_clock_adjtime + +ENTRY(sys_syncfs_wrapper) + lgfr %r2,%r2 # int + jg sys_syncfs + +ENTRY(sys_setns_wrapper) + lgfr %r2,%r2 # int + lgfr %r3,%r3 # int + jg sys_setns + +ENTRY(compat_sys_process_vm_readv_wrapper) + lgfr %r2,%r2 # compat_pid_t + llgtr %r3,%r3 # struct compat_iovec __user * + llgfr %r4,%r4 # unsigned long + llgtr %r5,%r5 # struct compat_iovec __user * + llgfr %r6,%r6 # unsigned long + llgf %r0,164(%r15) # unsigned long + stg %r0,160(%r15) + jg sys_process_vm_readv + +ENTRY(compat_sys_process_vm_writev_wrapper) + lgfr %r2,%r2 # compat_pid_t + llgtr %r3,%r3 # struct compat_iovec __user * + llgfr %r4,%r4 # unsigned long + llgtr %r5,%r5 # struct compat_iovec __user * + llgfr %r6,%r6 # unsigned long + llgf %r0,164(%r15) # unsigned long + stg %r0,160(%r15) + jg sys_process_vm_writev diff --git a/arch/s390/kernel/cpcmd.c b/arch/s390/kernel/cpcmd.c new file mode 100644 index 00000000..e3dd886e --- /dev/null +++ b/arch/s390/kernel/cpcmd.c @@ -0,0 +1,125 @@ +/* + * arch/s390/kernel/cpcmd.c + * + * S390 version + * Copyright IBM Corp. 1999,2007 + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), + * Christian Borntraeger (cborntra@de.ibm.com), + */ + +#define KMSG_COMPONENT "cpcmd" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/spinlock.h> +#include <linux/stddef.h> +#include <linux/string.h> +#include <asm/ebcdic.h> +#include <asm/cpcmd.h> +#include <asm/io.h> + +static DEFINE_SPINLOCK(cpcmd_lock); +static char cpcmd_buf[241]; + +static int diag8_noresponse(int cmdlen) +{ + register unsigned long reg2 asm ("2") = (addr_t) cpcmd_buf; + register unsigned long reg3 asm ("3") = cmdlen; + + asm volatile( +#ifndef CONFIG_64BIT + " diag %1,%0,0x8\n" +#else /* CONFIG_64BIT */ + " sam31\n" + " diag %1,%0,0x8\n" + " sam64\n" +#endif /* CONFIG_64BIT */ + : "+d" (reg3) : "d" (reg2) : "cc"); + return reg3; +} + +static int diag8_response(int cmdlen, char *response, int *rlen) +{ + register unsigned long reg2 asm ("2") = (addr_t) cpcmd_buf; + register unsigned long reg3 asm ("3") = (addr_t) response; + register unsigned long reg4 asm ("4") = cmdlen | 0x40000000L; + register unsigned long reg5 asm ("5") = *rlen; + + asm volatile( +#ifndef CONFIG_64BIT + " diag %2,%0,0x8\n" + " brc 8,1f\n" + " ar %1,%4\n" +#else /* CONFIG_64BIT */ + " sam31\n" + " diag %2,%0,0x8\n" + " sam64\n" + " brc 8,1f\n" + " agr %1,%4\n" +#endif /* CONFIG_64BIT */ + "1:\n" + : "+d" (reg4), "+d" (reg5) + : "d" (reg2), "d" (reg3), "d" (*rlen) : "cc"); + *rlen = reg5; + return reg4; +} + +/* + * __cpcmd has some restrictions over cpcmd + * - the response buffer must reside below 2GB (if any) + * - __cpcmd is unlocked and therefore not SMP-safe + */ +int __cpcmd(const char *cmd, char *response, int rlen, int *response_code) +{ + int cmdlen; + int rc; + int response_len; + + cmdlen = strlen(cmd); + BUG_ON(cmdlen > 240); + memcpy(cpcmd_buf, cmd, cmdlen); + ASCEBC(cpcmd_buf, cmdlen); + + if (response) { + memset(response, 0, rlen); + response_len = rlen; + rc = diag8_response(cmdlen, response, &rlen); + EBCASC(response, response_len); + } else { + rc = diag8_noresponse(cmdlen); + } + if (response_code) + *response_code = rc; + return rlen; +} +EXPORT_SYMBOL(__cpcmd); + +int cpcmd(const char *cmd, char *response, int rlen, int *response_code) +{ + char *lowbuf; + int len; + unsigned long flags; + + if ((virt_to_phys(response) != (unsigned long) response) || + (((unsigned long)response + rlen) >> 31)) { + lowbuf = kmalloc(rlen, GFP_KERNEL | GFP_DMA); + if (!lowbuf) { + pr_warning("The cpcmd kernel function failed to " + "allocate a response buffer\n"); + return -ENOMEM; + } + spin_lock_irqsave(&cpcmd_lock, flags); + len = __cpcmd(cmd, lowbuf, rlen, response_code); + spin_unlock_irqrestore(&cpcmd_lock, flags); + memcpy(response, lowbuf, rlen); + kfree(lowbuf); + } else { + spin_lock_irqsave(&cpcmd_lock, flags); + len = __cpcmd(cmd, response, rlen, response_code); + spin_unlock_irqrestore(&cpcmd_lock, flags); + } + return len; +} +EXPORT_SYMBOL(cpcmd); diff --git a/arch/s390/kernel/crash.c b/arch/s390/kernel/crash.c new file mode 100644 index 00000000..8cc7c9fa --- /dev/null +++ b/arch/s390/kernel/crash.c @@ -0,0 +1,16 @@ +/* + * arch/s390/kernel/crash.c + * + * (C) Copyright IBM Corp. 2005 + * + * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com> + * + */ + +#include <linux/threads.h> +#include <linux/kexec.h> +#include <linux/reboot.h> + +void machine_crash_shutdown(struct pt_regs *regs) +{ +} diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c new file mode 100644 index 00000000..cc1172b2 --- /dev/null +++ b/arch/s390/kernel/crash_dump.c @@ -0,0 +1,442 @@ +/* + * S390 kdump implementation + * + * Copyright IBM Corp. 2011 + * Author(s): Michael Holzheu <holzheu@linux.vnet.ibm.com> + */ + +#include <linux/crash_dump.h> +#include <asm/lowcore.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/gfp.h> +#include <linux/slab.h> +#include <linux/bootmem.h> +#include <linux/elf.h> +#include <asm/ipl.h> +#include <asm/os_info.h> + +#define PTR_ADD(x, y) (((char *) (x)) + ((unsigned long) (y))) +#define PTR_SUB(x, y) (((char *) (x)) - ((unsigned long) (y))) +#define PTR_DIFF(x, y) ((unsigned long)(((char *) (x)) - ((unsigned long) (y)))) + +/* + * Copy one page from "oldmem" + * + * For the kdump reserved memory this functions performs a swap operation: + * - [OLDMEM_BASE - OLDMEM_BASE + OLDMEM_SIZE] is mapped to [0 - OLDMEM_SIZE]. + * - [0 - OLDMEM_SIZE] is mapped to [OLDMEM_BASE - OLDMEM_BASE + OLDMEM_SIZE] + */ +ssize_t copy_oldmem_page(unsigned long pfn, char *buf, + size_t csize, unsigned long offset, int userbuf) +{ + unsigned long src; + + if (!csize) + return 0; + + src = (pfn << PAGE_SHIFT) + offset; + if (src < OLDMEM_SIZE) + src += OLDMEM_BASE; + else if (src > OLDMEM_BASE && + src < OLDMEM_BASE + OLDMEM_SIZE) + src -= OLDMEM_BASE; + if (userbuf) + copy_to_user_real((void __force __user *) buf, (void *) src, + csize); + else + memcpy_real(buf, (void *) src, csize); + return csize; +} + +/* + * Copy memory from old kernel + */ +int copy_from_oldmem(void *dest, void *src, size_t count) +{ + unsigned long copied = 0; + int rc; + + if ((unsigned long) src < OLDMEM_SIZE) { + copied = min(count, OLDMEM_SIZE - (unsigned long) src); + rc = memcpy_real(dest, src + OLDMEM_BASE, copied); + if (rc) + return rc; + } + return memcpy_real(dest + copied, src + copied, count - copied); +} + +/* + * Alloc memory and panic in case of ENOMEM + */ +static void *kzalloc_panic(int len) +{ + void *rc; + + rc = kzalloc(len, GFP_KERNEL); + if (!rc) + panic("s390 kdump kzalloc (%d) failed", len); + return rc; +} + +/* + * Get memory layout and create hole for oldmem + */ +static struct mem_chunk *get_memory_layout(void) +{ + struct mem_chunk *chunk_array; + + chunk_array = kzalloc_panic(MEMORY_CHUNKS * sizeof(struct mem_chunk)); + detect_memory_layout(chunk_array); + create_mem_hole(chunk_array, OLDMEM_BASE, OLDMEM_SIZE, CHUNK_CRASHK); + return chunk_array; +} + +/* + * Initialize ELF note + */ +static void *nt_init(void *buf, Elf64_Word type, void *desc, int d_len, + const char *name) +{ + Elf64_Nhdr *note; + u64 len; + + note = (Elf64_Nhdr *)buf; + note->n_namesz = strlen(name) + 1; + note->n_descsz = d_len; + note->n_type = type; + len = sizeof(Elf64_Nhdr); + + memcpy(buf + len, name, note->n_namesz); + len = roundup(len + note->n_namesz, 4); + + memcpy(buf + len, desc, note->n_descsz); + len = roundup(len + note->n_descsz, 4); + + return PTR_ADD(buf, len); +} + +/* + * Initialize prstatus note + */ +static void *nt_prstatus(void *ptr, struct save_area *sa) +{ + struct elf_prstatus nt_prstatus; + static int cpu_nr = 1; + + memset(&nt_prstatus, 0, sizeof(nt_prstatus)); + memcpy(&nt_prstatus.pr_reg.gprs, sa->gp_regs, sizeof(sa->gp_regs)); + memcpy(&nt_prstatus.pr_reg.psw, sa->psw, sizeof(sa->psw)); + memcpy(&nt_prstatus.pr_reg.acrs, sa->acc_regs, sizeof(sa->acc_regs)); + nt_prstatus.pr_pid = cpu_nr; + cpu_nr++; + + return nt_init(ptr, NT_PRSTATUS, &nt_prstatus, sizeof(nt_prstatus), + "CORE"); +} + +/* + * Initialize fpregset (floating point) note + */ +static void *nt_fpregset(void *ptr, struct save_area *sa) +{ + elf_fpregset_t nt_fpregset; + + memset(&nt_fpregset, 0, sizeof(nt_fpregset)); + memcpy(&nt_fpregset.fpc, &sa->fp_ctrl_reg, sizeof(sa->fp_ctrl_reg)); + memcpy(&nt_fpregset.fprs, &sa->fp_regs, sizeof(sa->fp_regs)); + + return nt_init(ptr, NT_PRFPREG, &nt_fpregset, sizeof(nt_fpregset), + "CORE"); +} + +/* + * Initialize timer note + */ +static void *nt_s390_timer(void *ptr, struct save_area *sa) +{ + return nt_init(ptr, NT_S390_TIMER, &sa->timer, sizeof(sa->timer), + KEXEC_CORE_NOTE_NAME); +} + +/* + * Initialize TOD clock comparator note + */ +static void *nt_s390_tod_cmp(void *ptr, struct save_area *sa) +{ + return nt_init(ptr, NT_S390_TODCMP, &sa->clk_cmp, + sizeof(sa->clk_cmp), KEXEC_CORE_NOTE_NAME); +} + +/* + * Initialize TOD programmable register note + */ +static void *nt_s390_tod_preg(void *ptr, struct save_area *sa) +{ + return nt_init(ptr, NT_S390_TODPREG, &sa->tod_reg, + sizeof(sa->tod_reg), KEXEC_CORE_NOTE_NAME); +} + +/* + * Initialize control register note + */ +static void *nt_s390_ctrs(void *ptr, struct save_area *sa) +{ + return nt_init(ptr, NT_S390_CTRS, &sa->ctrl_regs, + sizeof(sa->ctrl_regs), KEXEC_CORE_NOTE_NAME); +} + +/* + * Initialize prefix register note + */ +static void *nt_s390_prefix(void *ptr, struct save_area *sa) +{ + return nt_init(ptr, NT_S390_PREFIX, &sa->pref_reg, + sizeof(sa->pref_reg), KEXEC_CORE_NOTE_NAME); +} + +/* + * Fill ELF notes for one CPU with save area registers + */ +void *fill_cpu_elf_notes(void *ptr, struct save_area *sa) +{ + ptr = nt_prstatus(ptr, sa); + ptr = nt_fpregset(ptr, sa); + ptr = nt_s390_timer(ptr, sa); + ptr = nt_s390_tod_cmp(ptr, sa); + ptr = nt_s390_tod_preg(ptr, sa); + ptr = nt_s390_ctrs(ptr, sa); + ptr = nt_s390_prefix(ptr, sa); + return ptr; +} + +/* + * Initialize prpsinfo note (new kernel) + */ +static void *nt_prpsinfo(void *ptr) +{ + struct elf_prpsinfo prpsinfo; + + memset(&prpsinfo, 0, sizeof(prpsinfo)); + prpsinfo.pr_sname = 'R'; + strcpy(prpsinfo.pr_fname, "vmlinux"); + return nt_init(ptr, NT_PRPSINFO, &prpsinfo, sizeof(prpsinfo), + KEXEC_CORE_NOTE_NAME); +} + +/* + * Get vmcoreinfo using lowcore->vmcore_info (new kernel) + */ +static void *get_vmcoreinfo_old(unsigned long *size) +{ + char nt_name[11], *vmcoreinfo; + Elf64_Nhdr note; + void *addr; + + if (copy_from_oldmem(&addr, &S390_lowcore.vmcore_info, sizeof(addr))) + return NULL; + memset(nt_name, 0, sizeof(nt_name)); + if (copy_from_oldmem(¬e, addr, sizeof(note))) + return NULL; + if (copy_from_oldmem(nt_name, addr + sizeof(note), sizeof(nt_name) - 1)) + return NULL; + if (strcmp(nt_name, "VMCOREINFO") != 0) + return NULL; + vmcoreinfo = kzalloc_panic(note.n_descsz); + if (copy_from_oldmem(vmcoreinfo, addr + 24, note.n_descsz)) + return NULL; + *size = note.n_descsz; + return vmcoreinfo; +} + +/* + * Initialize vmcoreinfo note (new kernel) + */ +static void *nt_vmcoreinfo(void *ptr) +{ + unsigned long size; + void *vmcoreinfo; + + vmcoreinfo = os_info_old_entry(OS_INFO_VMCOREINFO, &size); + if (!vmcoreinfo) + vmcoreinfo = get_vmcoreinfo_old(&size); + if (!vmcoreinfo) + return ptr; + return nt_init(ptr, 0, vmcoreinfo, size, "VMCOREINFO"); +} + +/* + * Initialize ELF header (new kernel) + */ +static void *ehdr_init(Elf64_Ehdr *ehdr, int mem_chunk_cnt) +{ + memset(ehdr, 0, sizeof(*ehdr)); + memcpy(ehdr->e_ident, ELFMAG, SELFMAG); + ehdr->e_ident[EI_CLASS] = ELFCLASS64; + ehdr->e_ident[EI_DATA] = ELFDATA2MSB; + ehdr->e_ident[EI_VERSION] = EV_CURRENT; + memset(ehdr->e_ident + EI_PAD, 0, EI_NIDENT - EI_PAD); + ehdr->e_type = ET_CORE; + ehdr->e_machine = EM_S390; + ehdr->e_version = EV_CURRENT; + ehdr->e_phoff = sizeof(Elf64_Ehdr); + ehdr->e_ehsize = sizeof(Elf64_Ehdr); + ehdr->e_phentsize = sizeof(Elf64_Phdr); + ehdr->e_phnum = mem_chunk_cnt + 1; + return ehdr + 1; +} + +/* + * Return CPU count for ELF header (new kernel) + */ +static int get_cpu_cnt(void) +{ + int i, cpus = 0; + + for (i = 0; zfcpdump_save_areas[i]; i++) { + if (zfcpdump_save_areas[i]->pref_reg == 0) + continue; + cpus++; + } + return cpus; +} + +/* + * Return memory chunk count for ELF header (new kernel) + */ +static int get_mem_chunk_cnt(void) +{ + struct mem_chunk *chunk_array, *mem_chunk; + int i, cnt = 0; + + chunk_array = get_memory_layout(); + for (i = 0; i < MEMORY_CHUNKS; i++) { + mem_chunk = &chunk_array[i]; + if (chunk_array[i].type != CHUNK_READ_WRITE && + chunk_array[i].type != CHUNK_READ_ONLY) + continue; + if (mem_chunk->size == 0) + continue; + cnt++; + } + kfree(chunk_array); + return cnt; +} + +/* + * Relocate pointer in order to allow vmcore code access the data + */ +static inline unsigned long relocate(unsigned long addr) +{ + return OLDMEM_BASE + addr; +} + +/* + * Initialize ELF loads (new kernel) + */ +static int loads_init(Elf64_Phdr *phdr, u64 loads_offset) +{ + struct mem_chunk *chunk_array, *mem_chunk; + int i; + + chunk_array = get_memory_layout(); + for (i = 0; i < MEMORY_CHUNKS; i++) { + mem_chunk = &chunk_array[i]; + if (mem_chunk->size == 0) + break; + if (chunk_array[i].type != CHUNK_READ_WRITE && + chunk_array[i].type != CHUNK_READ_ONLY) + continue; + else + phdr->p_filesz = mem_chunk->size; + phdr->p_type = PT_LOAD; + phdr->p_offset = mem_chunk->addr; + phdr->p_vaddr = mem_chunk->addr; + phdr->p_paddr = mem_chunk->addr; + phdr->p_memsz = mem_chunk->size; + phdr->p_flags = PF_R | PF_W | PF_X; + phdr->p_align = PAGE_SIZE; + phdr++; + } + kfree(chunk_array); + return i; +} + +/* + * Initialize notes (new kernel) + */ +static void *notes_init(Elf64_Phdr *phdr, void *ptr, u64 notes_offset) +{ + struct save_area *sa; + void *ptr_start = ptr; + int i; + + ptr = nt_prpsinfo(ptr); + + for (i = 0; zfcpdump_save_areas[i]; i++) { + sa = zfcpdump_save_areas[i]; + if (sa->pref_reg == 0) + continue; + ptr = fill_cpu_elf_notes(ptr, sa); + } + ptr = nt_vmcoreinfo(ptr); + memset(phdr, 0, sizeof(*phdr)); + phdr->p_type = PT_NOTE; + phdr->p_offset = relocate(notes_offset); + phdr->p_filesz = (unsigned long) PTR_SUB(ptr, ptr_start); + phdr->p_memsz = phdr->p_filesz; + return ptr; +} + +/* + * Create ELF core header (new kernel) + */ +static void s390_elf_corehdr_create(char **elfcorebuf, size_t *elfcorebuf_sz) +{ + Elf64_Phdr *phdr_notes, *phdr_loads; + int mem_chunk_cnt; + void *ptr, *hdr; + u32 alloc_size; + u64 hdr_off; + + mem_chunk_cnt = get_mem_chunk_cnt(); + + alloc_size = 0x1000 + get_cpu_cnt() * 0x300 + + mem_chunk_cnt * sizeof(Elf64_Phdr); + hdr = kzalloc_panic(alloc_size); + /* Init elf header */ + ptr = ehdr_init(hdr, mem_chunk_cnt); + /* Init program headers */ + phdr_notes = ptr; + ptr = PTR_ADD(ptr, sizeof(Elf64_Phdr)); + phdr_loads = ptr; + ptr = PTR_ADD(ptr, sizeof(Elf64_Phdr) * mem_chunk_cnt); + /* Init notes */ + hdr_off = PTR_DIFF(ptr, hdr); + ptr = notes_init(phdr_notes, ptr, ((unsigned long) hdr) + hdr_off); + /* Init loads */ + hdr_off = PTR_DIFF(ptr, hdr); + loads_init(phdr_loads, ((unsigned long) hdr) + hdr_off); + *elfcorebuf_sz = hdr_off; + *elfcorebuf = (void *) relocate((unsigned long) hdr); + BUG_ON(*elfcorebuf_sz > alloc_size); +} + +/* + * Create kdump ELF core header in new kernel, if it has not been passed via + * the "elfcorehdr" kernel parameter + */ +static int setup_kdump_elfcorehdr(void) +{ + size_t elfcorebuf_sz; + char *elfcorebuf; + + if (!OLDMEM_BASE || is_kdump_kernel()) + return -EINVAL; + s390_elf_corehdr_create(&elfcorebuf, &elfcorebuf_sz); + elfcorehdr_addr = (unsigned long long) elfcorebuf; + elfcorehdr_size = elfcorebuf_sz; + return 0; +} + +subsys_initcall(setup_kdump_elfcorehdr); diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c new file mode 100644 index 00000000..19e5e9eb --- /dev/null +++ b/arch/s390/kernel/debug.c @@ -0,0 +1,1566 @@ +/* + * arch/s390/kernel/debug.c + * S/390 debug facility + * + * Copyright IBM Corp. 1999, 2012 + * + * Author(s): Michael Holzheu (holzheu@de.ibm.com), + * Holger Smolinski (Holger.Smolinski@de.ibm.com) + * + * Bugreports to: <Linux390@de.ibm.com> + */ + +#define KMSG_COMPONENT "s390dbf" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include <linux/stddef.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/slab.h> +#include <linux/ctype.h> +#include <linux/string.h> +#include <linux/sysctl.h> +#include <asm/uaccess.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/fs.h> +#include <linux/debugfs.h> + +#include <asm/debug.h> + +#define DEBUG_PROLOG_ENTRY -1 + +#define ALL_AREAS 0 /* copy all debug areas */ +#define NO_AREAS 1 /* copy no debug areas */ + +/* typedefs */ + +typedef struct file_private_info { + loff_t offset; /* offset of last read in file */ + int act_area; /* number of last formated area */ + int act_page; /* act page in given area */ + int act_entry; /* last formated entry (offset */ + /* relative to beginning of last */ + /* formated page) */ + size_t act_entry_offset; /* up to this offset we copied */ + /* in last read the last formated */ + /* entry to userland */ + char temp_buf[2048]; /* buffer for output */ + debug_info_t *debug_info_org; /* original debug information */ + debug_info_t *debug_info_snap; /* snapshot of debug information */ + struct debug_view *view; /* used view of debug info */ +} file_private_info_t; + +typedef struct +{ + char *string; + /* + * This assumes that all args are converted into longs + * on L/390 this is the case for all types of parameter + * except of floats, and long long (32 bit) + * + */ + long args[0]; +} debug_sprintf_entry_t; + + +/* internal function prototyes */ + +static int debug_init(void); +static ssize_t debug_output(struct file *file, char __user *user_buf, + size_t user_len, loff_t * offset); +static ssize_t debug_input(struct file *file, const char __user *user_buf, + size_t user_len, loff_t * offset); +static int debug_open(struct inode *inode, struct file *file); +static int debug_close(struct inode *inode, struct file *file); +static debug_info_t *debug_info_create(const char *name, int pages_per_area, + int nr_areas, int buf_size, umode_t mode); +static void debug_info_get(debug_info_t *); +static void debug_info_put(debug_info_t *); +static int debug_prolog_level_fn(debug_info_t * id, + struct debug_view *view, char *out_buf); +static int debug_input_level_fn(debug_info_t * id, struct debug_view *view, + struct file *file, const char __user *user_buf, + size_t user_buf_size, loff_t * offset); +static int debug_prolog_pages_fn(debug_info_t * id, + struct debug_view *view, char *out_buf); +static int debug_input_pages_fn(debug_info_t * id, struct debug_view *view, + struct file *file, const char __user *user_buf, + size_t user_buf_size, loff_t * offset); +static int debug_input_flush_fn(debug_info_t * id, struct debug_view *view, + struct file *file, const char __user *user_buf, + size_t user_buf_size, loff_t * offset); +static int debug_hex_ascii_format_fn(debug_info_t * id, struct debug_view *view, + char *out_buf, const char *in_buf); +static int debug_raw_format_fn(debug_info_t * id, + struct debug_view *view, char *out_buf, + const char *in_buf); +static int debug_raw_header_fn(debug_info_t * id, struct debug_view *view, + int area, debug_entry_t * entry, char *out_buf); + +static int debug_sprintf_format_fn(debug_info_t * id, struct debug_view *view, + char *out_buf, debug_sprintf_entry_t *curr_event); + +/* globals */ + +struct debug_view debug_raw_view = { + "raw", + NULL, + &debug_raw_header_fn, + &debug_raw_format_fn, + NULL, + NULL +}; + +struct debug_view debug_hex_ascii_view = { + "hex_ascii", + NULL, + &debug_dflt_header_fn, + &debug_hex_ascii_format_fn, + NULL, + NULL +}; + +static struct debug_view debug_level_view = { + "level", + &debug_prolog_level_fn, + NULL, + NULL, + &debug_input_level_fn, + NULL +}; + +static struct debug_view debug_pages_view = { + "pages", + &debug_prolog_pages_fn, + NULL, + NULL, + &debug_input_pages_fn, + NULL +}; + +static struct debug_view debug_flush_view = { + "flush", + NULL, + NULL, + NULL, + &debug_input_flush_fn, + NULL +}; + +struct debug_view debug_sprintf_view = { + "sprintf", + NULL, + &debug_dflt_header_fn, + (debug_format_proc_t*)&debug_sprintf_format_fn, + NULL, + NULL +}; + +/* used by dump analysis tools to determine version of debug feature */ +static unsigned int __used debug_feature_version = __DEBUG_FEATURE_VERSION; + +/* static globals */ + +static debug_info_t *debug_area_first = NULL; +static debug_info_t *debug_area_last = NULL; +static DEFINE_MUTEX(debug_mutex); + +static int initialized; +static int debug_critical; + +static const struct file_operations debug_file_ops = { + .owner = THIS_MODULE, + .read = debug_output, + .write = debug_input, + .open = debug_open, + .release = debug_close, + .llseek = no_llseek, +}; + +static struct dentry *debug_debugfs_root_entry; + +/* functions */ + +/* + * debug_areas_alloc + * - Debug areas are implemented as a threedimensonal array: + * areas[areanumber][pagenumber][pageoffset] + */ + +static debug_entry_t*** +debug_areas_alloc(int pages_per_area, int nr_areas) +{ + debug_entry_t*** areas; + int i,j; + + areas = kmalloc(nr_areas * + sizeof(debug_entry_t**), + GFP_KERNEL); + if (!areas) + goto fail_malloc_areas; + for (i = 0; i < nr_areas; i++) { + areas[i] = kmalloc(pages_per_area * + sizeof(debug_entry_t*),GFP_KERNEL); + if (!areas[i]) { + goto fail_malloc_areas2; + } + for(j = 0; j < pages_per_area; j++) { + areas[i][j] = kzalloc(PAGE_SIZE, GFP_KERNEL); + if(!areas[i][j]) { + for(j--; j >=0 ; j--) { + kfree(areas[i][j]); + } + kfree(areas[i]); + goto fail_malloc_areas2; + } + } + } + return areas; + +fail_malloc_areas2: + for(i--; i >= 0; i--){ + for(j=0; j < pages_per_area;j++){ + kfree(areas[i][j]); + } + kfree(areas[i]); + } + kfree(areas); +fail_malloc_areas: + return NULL; + +} + + +/* + * debug_info_alloc + * - alloc new debug-info + */ + +static debug_info_t* +debug_info_alloc(const char *name, int pages_per_area, int nr_areas, + int buf_size, int level, int mode) +{ + debug_info_t* rc; + + /* alloc everything */ + + rc = kmalloc(sizeof(debug_info_t), GFP_KERNEL); + if(!rc) + goto fail_malloc_rc; + rc->active_entries = kcalloc(nr_areas, sizeof(int), GFP_KERNEL); + if(!rc->active_entries) + goto fail_malloc_active_entries; + rc->active_pages = kcalloc(nr_areas, sizeof(int), GFP_KERNEL); + if(!rc->active_pages) + goto fail_malloc_active_pages; + if((mode == ALL_AREAS) && (pages_per_area != 0)){ + rc->areas = debug_areas_alloc(pages_per_area, nr_areas); + if(!rc->areas) + goto fail_malloc_areas; + } else { + rc->areas = NULL; + } + + /* initialize members */ + + spin_lock_init(&rc->lock); + rc->pages_per_area = pages_per_area; + rc->nr_areas = nr_areas; + rc->active_area = 0; + rc->level = level; + rc->buf_size = buf_size; + rc->entry_size = sizeof(debug_entry_t) + buf_size; + strlcpy(rc->name, name, sizeof(rc->name)); + memset(rc->views, 0, DEBUG_MAX_VIEWS * sizeof(struct debug_view *)); + memset(rc->debugfs_entries, 0 ,DEBUG_MAX_VIEWS * + sizeof(struct dentry*)); + atomic_set(&(rc->ref_count), 0); + + return rc; + +fail_malloc_areas: + kfree(rc->active_pages); +fail_malloc_active_pages: + kfree(rc->active_entries); +fail_malloc_active_entries: + kfree(rc); +fail_malloc_rc: + return NULL; +} + +/* + * debug_areas_free + * - free all debug areas + */ + +static void +debug_areas_free(debug_info_t* db_info) +{ + int i,j; + + if(!db_info->areas) + return; + for (i = 0; i < db_info->nr_areas; i++) { + for(j = 0; j < db_info->pages_per_area; j++) { + kfree(db_info->areas[i][j]); + } + kfree(db_info->areas[i]); + } + kfree(db_info->areas); + db_info->areas = NULL; +} + +/* + * debug_info_free + * - free memory debug-info + */ + +static void +debug_info_free(debug_info_t* db_info){ + debug_areas_free(db_info); + kfree(db_info->active_entries); + kfree(db_info->active_pages); + kfree(db_info); +} + +/* + * debug_info_create + * - create new debug-info + */ + +static debug_info_t* +debug_info_create(const char *name, int pages_per_area, int nr_areas, + int buf_size, umode_t mode) +{ + debug_info_t* rc; + + rc = debug_info_alloc(name, pages_per_area, nr_areas, buf_size, + DEBUG_DEFAULT_LEVEL, ALL_AREAS); + if(!rc) + goto out; + + rc->mode = mode & ~S_IFMT; + + /* create root directory */ + rc->debugfs_root_entry = debugfs_create_dir(rc->name, + debug_debugfs_root_entry); + + /* append new element to linked list */ + if (!debug_area_first) { + /* first element in list */ + debug_area_first = rc; + rc->prev = NULL; + } else { + /* append element to end of list */ + debug_area_last->next = rc; + rc->prev = debug_area_last; + } + debug_area_last = rc; + rc->next = NULL; + + debug_info_get(rc); +out: + return rc; +} + +/* + * debug_info_copy + * - copy debug-info + */ + +static debug_info_t* +debug_info_copy(debug_info_t* in, int mode) +{ + int i,j; + debug_info_t* rc; + unsigned long flags; + + /* get a consistent copy of the debug areas */ + do { + rc = debug_info_alloc(in->name, in->pages_per_area, + in->nr_areas, in->buf_size, in->level, mode); + spin_lock_irqsave(&in->lock, flags); + if(!rc) + goto out; + /* has something changed in the meantime ? */ + if((rc->pages_per_area == in->pages_per_area) && + (rc->nr_areas == in->nr_areas)) { + break; + } + spin_unlock_irqrestore(&in->lock, flags); + debug_info_free(rc); + } while (1); + + if (mode == NO_AREAS) + goto out; + + for(i = 0; i < in->nr_areas; i++){ + for(j = 0; j < in->pages_per_area; j++) { + memcpy(rc->areas[i][j], in->areas[i][j],PAGE_SIZE); + } + } +out: + spin_unlock_irqrestore(&in->lock, flags); + return rc; +} + +/* + * debug_info_get + * - increments reference count for debug-info + */ + +static void +debug_info_get(debug_info_t * db_info) +{ + if (db_info) + atomic_inc(&db_info->ref_count); +} + +/* + * debug_info_put: + * - decreases reference count for debug-info and frees it if necessary + */ + +static void +debug_info_put(debug_info_t *db_info) +{ + int i; + + if (!db_info) + return; + if (atomic_dec_and_test(&db_info->ref_count)) { + for (i = 0; i < DEBUG_MAX_VIEWS; i++) { + if (!db_info->views[i]) + continue; + debugfs_remove(db_info->debugfs_entries[i]); + } + debugfs_remove(db_info->debugfs_root_entry); + if(db_info == debug_area_first) + debug_area_first = db_info->next; + if(db_info == debug_area_last) + debug_area_last = db_info->prev; + if(db_info->prev) db_info->prev->next = db_info->next; + if(db_info->next) db_info->next->prev = db_info->prev; + debug_info_free(db_info); + } +} + +/* + * debug_format_entry: + * - format one debug entry and return size of formated data + */ + +static int +debug_format_entry(file_private_info_t *p_info) +{ + debug_info_t *id_snap = p_info->debug_info_snap; + struct debug_view *view = p_info->view; + debug_entry_t *act_entry; + size_t len = 0; + if(p_info->act_entry == DEBUG_PROLOG_ENTRY){ + /* print prolog */ + if (view->prolog_proc) + len += view->prolog_proc(id_snap,view,p_info->temp_buf); + goto out; + } + if (!id_snap->areas) /* this is true, if we have a prolog only view */ + goto out; /* or if 'pages_per_area' is 0 */ + act_entry = (debug_entry_t *) ((char*)id_snap->areas[p_info->act_area] + [p_info->act_page] + p_info->act_entry); + + if (act_entry->id.stck == 0LL) + goto out; /* empty entry */ + if (view->header_proc) + len += view->header_proc(id_snap, view, p_info->act_area, + act_entry, p_info->temp_buf + len); + if (view->format_proc) + len += view->format_proc(id_snap, view, p_info->temp_buf + len, + DEBUG_DATA(act_entry)); +out: + return len; +} + +/* + * debug_next_entry: + * - goto next entry in p_info + */ + +static inline int +debug_next_entry(file_private_info_t *p_info) +{ + debug_info_t *id; + + id = p_info->debug_info_snap; + if(p_info->act_entry == DEBUG_PROLOG_ENTRY){ + p_info->act_entry = 0; + p_info->act_page = 0; + goto out; + } + if(!id->areas) + return 1; + p_info->act_entry += id->entry_size; + /* switch to next page, if we reached the end of the page */ + if (p_info->act_entry > (PAGE_SIZE - id->entry_size)){ + /* next page */ + p_info->act_entry = 0; + p_info->act_page += 1; + if((p_info->act_page % id->pages_per_area) == 0) { + /* next area */ + p_info->act_area++; + p_info->act_page=0; + } + if(p_info->act_area >= id->nr_areas) + return 1; + } +out: + return 0; +} + +/* + * debug_output: + * - called for user read() + * - copies formated debug entries to the user buffer + */ + +static ssize_t +debug_output(struct file *file, /* file descriptor */ + char __user *user_buf, /* user buffer */ + size_t len, /* length of buffer */ + loff_t *offset) /* offset in the file */ +{ + size_t count = 0; + size_t entry_offset; + file_private_info_t *p_info; + + p_info = ((file_private_info_t *) file->private_data); + if (*offset != p_info->offset) + return -EPIPE; + if(p_info->act_area >= p_info->debug_info_snap->nr_areas) + return 0; + entry_offset = p_info->act_entry_offset; + while(count < len){ + int formatted_line_size; + int formatted_line_residue; + int user_buf_residue; + size_t copy_size; + + formatted_line_size = debug_format_entry(p_info); + formatted_line_residue = formatted_line_size - entry_offset; + user_buf_residue = len-count; + copy_size = min(user_buf_residue, formatted_line_residue); + if(copy_size){ + if (copy_to_user(user_buf + count, p_info->temp_buf + + entry_offset, copy_size)) + return -EFAULT; + count += copy_size; + entry_offset += copy_size; + } + if(copy_size == formatted_line_residue){ + entry_offset = 0; + if(debug_next_entry(p_info)) + goto out; + } + } +out: + p_info->offset = *offset + count; + p_info->act_entry_offset = entry_offset; + *offset = p_info->offset; + return count; +} + +/* + * debug_input: + * - called for user write() + * - calls input function of view + */ + +static ssize_t +debug_input(struct file *file, const char __user *user_buf, size_t length, + loff_t *offset) +{ + int rc = 0; + file_private_info_t *p_info; + + mutex_lock(&debug_mutex); + p_info = ((file_private_info_t *) file->private_data); + if (p_info->view->input_proc) + rc = p_info->view->input_proc(p_info->debug_info_org, + p_info->view, file, user_buf, + length, offset); + else + rc = -EPERM; + mutex_unlock(&debug_mutex); + return rc; /* number of input characters */ +} + +/* + * debug_open: + * - called for user open() + * - copies formated output to private_data area of the file + * handle + */ + +static int +debug_open(struct inode *inode, struct file *file) +{ + int i, rc = 0; + file_private_info_t *p_info; + debug_info_t *debug_info, *debug_info_snapshot; + + mutex_lock(&debug_mutex); + debug_info = file->f_path.dentry->d_inode->i_private; + /* find debug view */ + for (i = 0; i < DEBUG_MAX_VIEWS; i++) { + if (!debug_info->views[i]) + continue; + else if (debug_info->debugfs_entries[i] == + file->f_path.dentry) { + goto found; /* found view ! */ + } + } + /* no entry found */ + rc = -EINVAL; + goto out; + +found: + + /* Make snapshot of current debug areas to get it consistent. */ + /* To copy all the areas is only needed, if we have a view which */ + /* formats the debug areas. */ + + if(!debug_info->views[i]->format_proc && + !debug_info->views[i]->header_proc){ + debug_info_snapshot = debug_info_copy(debug_info, NO_AREAS); + } else { + debug_info_snapshot = debug_info_copy(debug_info, ALL_AREAS); + } + + if(!debug_info_snapshot){ + rc = -ENOMEM; + goto out; + } + p_info = kmalloc(sizeof(file_private_info_t), + GFP_KERNEL); + if(!p_info){ + debug_info_free(debug_info_snapshot); + rc = -ENOMEM; + goto out; + } + p_info->offset = 0; + p_info->debug_info_snap = debug_info_snapshot; + p_info->debug_info_org = debug_info; + p_info->view = debug_info->views[i]; + p_info->act_area = 0; + p_info->act_page = 0; + p_info->act_entry = DEBUG_PROLOG_ENTRY; + p_info->act_entry_offset = 0; + file->private_data = p_info; + debug_info_get(debug_info); + nonseekable_open(inode, file); +out: + mutex_unlock(&debug_mutex); + return rc; +} + +/* + * debug_close: + * - called for user close() + * - deletes private_data area of the file handle + */ + +static int +debug_close(struct inode *inode, struct file *file) +{ + file_private_info_t *p_info; + p_info = (file_private_info_t *) file->private_data; + if(p_info->debug_info_snap) + debug_info_free(p_info->debug_info_snap); + debug_info_put(p_info->debug_info_org); + kfree(file->private_data); + return 0; /* success */ +} + +/* + * debug_register_mode: + * - Creates and initializes debug area for the caller + * The mode parameter allows to specify access rights for the s390dbf files + * - Returns handle for debug area + */ + +debug_info_t *debug_register_mode(const char *name, int pages_per_area, + int nr_areas, int buf_size, umode_t mode, + uid_t uid, gid_t gid) +{ + debug_info_t *rc = NULL; + + /* Since debugfs currently does not support uid/gid other than root, */ + /* we do not allow gid/uid != 0 until we get support for that. */ + if ((uid != 0) || (gid != 0)) + pr_warning("Root becomes the owner of all s390dbf files " + "in sysfs\n"); + BUG_ON(!initialized); + mutex_lock(&debug_mutex); + + /* create new debug_info */ + + rc = debug_info_create(name, pages_per_area, nr_areas, buf_size, mode); + if(!rc) + goto out; + debug_register_view(rc, &debug_level_view); + debug_register_view(rc, &debug_flush_view); + debug_register_view(rc, &debug_pages_view); +out: + if (!rc){ + pr_err("Registering debug feature %s failed\n", name); + } + mutex_unlock(&debug_mutex); + return rc; +} +EXPORT_SYMBOL(debug_register_mode); + +/* + * debug_register: + * - creates and initializes debug area for the caller + * - returns handle for debug area + */ + +debug_info_t *debug_register(const char *name, int pages_per_area, + int nr_areas, int buf_size) +{ + return debug_register_mode(name, pages_per_area, nr_areas, buf_size, + S_IRUSR | S_IWUSR, 0, 0); +} + +/* + * debug_unregister: + * - give back debug area + */ + +void +debug_unregister(debug_info_t * id) +{ + if (!id) + goto out; + mutex_lock(&debug_mutex); + debug_info_put(id); + mutex_unlock(&debug_mutex); + +out: + return; +} + +/* + * debug_set_size: + * - set area size (number of pages) and number of areas + */ +static int +debug_set_size(debug_info_t* id, int nr_areas, int pages_per_area) +{ + unsigned long flags; + debug_entry_t *** new_areas; + int rc=0; + + if(!id || (nr_areas <= 0) || (pages_per_area < 0)) + return -EINVAL; + if(pages_per_area > 0){ + new_areas = debug_areas_alloc(pages_per_area, nr_areas); + if(!new_areas) { + pr_info("Allocating memory for %i pages failed\n", + pages_per_area); + rc = -ENOMEM; + goto out; + } + } else { + new_areas = NULL; + } + spin_lock_irqsave(&id->lock,flags); + debug_areas_free(id); + id->areas = new_areas; + id->nr_areas = nr_areas; + id->pages_per_area = pages_per_area; + id->active_area = 0; + memset(id->active_entries,0,sizeof(int)*id->nr_areas); + memset(id->active_pages, 0, sizeof(int)*id->nr_areas); + spin_unlock_irqrestore(&id->lock,flags); + pr_info("%s: set new size (%i pages)\n" ,id->name, pages_per_area); +out: + return rc; +} + +/* + * debug_set_level: + * - set actual debug level + */ + +void +debug_set_level(debug_info_t* id, int new_level) +{ + unsigned long flags; + if(!id) + return; + spin_lock_irqsave(&id->lock,flags); + if(new_level == DEBUG_OFF_LEVEL){ + id->level = DEBUG_OFF_LEVEL; + pr_info("%s: switched off\n",id->name); + } else if ((new_level > DEBUG_MAX_LEVEL) || (new_level < 0)) { + pr_info("%s: level %i is out of range (%i - %i)\n", + id->name, new_level, 0, DEBUG_MAX_LEVEL); + } else { + id->level = new_level; + } + spin_unlock_irqrestore(&id->lock,flags); +} + + +/* + * proceed_active_entry: + * - set active entry to next in the ring buffer + */ + +static inline void +proceed_active_entry(debug_info_t * id) +{ + if ((id->active_entries[id->active_area] += id->entry_size) + > (PAGE_SIZE - id->entry_size)){ + id->active_entries[id->active_area] = 0; + id->active_pages[id->active_area] = + (id->active_pages[id->active_area] + 1) % + id->pages_per_area; + } +} + +/* + * proceed_active_area: + * - set active area to next in the ring buffer + */ + +static inline void +proceed_active_area(debug_info_t * id) +{ + id->active_area++; + id->active_area = id->active_area % id->nr_areas; +} + +/* + * get_active_entry: + */ + +static inline debug_entry_t* +get_active_entry(debug_info_t * id) +{ + return (debug_entry_t *) (((char *) id->areas[id->active_area] + [id->active_pages[id->active_area]]) + + id->active_entries[id->active_area]); +} + +/* + * debug_finish_entry: + * - set timestamp, caller address, cpu number etc. + */ + +static inline void +debug_finish_entry(debug_info_t * id, debug_entry_t* active, int level, + int exception) +{ + active->id.stck = get_clock(); + active->id.fields.cpuid = smp_processor_id(); + active->caller = __builtin_return_address(0); + active->id.fields.exception = exception; + active->id.fields.level = level; + proceed_active_entry(id); + if(exception) + proceed_active_area(id); +} + +static int debug_stoppable=1; +static int debug_active=1; + +#define CTL_S390DBF_STOPPABLE 5678 +#define CTL_S390DBF_ACTIVE 5679 + +/* + * proc handler for the running debug_active sysctl + * always allow read, allow write only if debug_stoppable is set or + * if debug_active is already off + */ +static int +s390dbf_procactive(ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + if (!write || debug_stoppable || !debug_active) + return proc_dointvec(table, write, buffer, lenp, ppos); + else + return 0; +} + + +static struct ctl_table s390dbf_table[] = { + { + .procname = "debug_stoppable", + .data = &debug_stoppable, + .maxlen = sizeof(int), + .mode = S_IRUGO | S_IWUSR, + .proc_handler = proc_dointvec, + }, + { + .procname = "debug_active", + .data = &debug_active, + .maxlen = sizeof(int), + .mode = S_IRUGO | S_IWUSR, + .proc_handler = s390dbf_procactive, + }, + { } +}; + +static struct ctl_table s390dbf_dir_table[] = { + { + .procname = "s390dbf", + .maxlen = 0, + .mode = S_IRUGO | S_IXUGO, + .child = s390dbf_table, + }, + { } +}; + +static struct ctl_table_header *s390dbf_sysctl_header; + +void +debug_stop_all(void) +{ + if (debug_stoppable) + debug_active = 0; +} + + +void debug_set_critical(void) +{ + debug_critical = 1; +} + +/* + * debug_event_common: + * - write debug entry with given size + */ + +debug_entry_t* +debug_event_common(debug_info_t * id, int level, const void *buf, int len) +{ + unsigned long flags; + debug_entry_t *active; + + if (!debug_active || !id->areas) + return NULL; + if (debug_critical) { + if (!spin_trylock_irqsave(&id->lock, flags)) + return NULL; + } else + spin_lock_irqsave(&id->lock, flags); + active = get_active_entry(id); + memset(DEBUG_DATA(active), 0, id->buf_size); + memcpy(DEBUG_DATA(active), buf, min(len, id->buf_size)); + debug_finish_entry(id, active, level, 0); + spin_unlock_irqrestore(&id->lock, flags); + + return active; +} + +/* + * debug_exception_common: + * - write debug entry with given size and switch to next debug area + */ + +debug_entry_t +*debug_exception_common(debug_info_t * id, int level, const void *buf, int len) +{ + unsigned long flags; + debug_entry_t *active; + + if (!debug_active || !id->areas) + return NULL; + if (debug_critical) { + if (!spin_trylock_irqsave(&id->lock, flags)) + return NULL; + } else + spin_lock_irqsave(&id->lock, flags); + active = get_active_entry(id); + memset(DEBUG_DATA(active), 0, id->buf_size); + memcpy(DEBUG_DATA(active), buf, min(len, id->buf_size)); + debug_finish_entry(id, active, level, 1); + spin_unlock_irqrestore(&id->lock, flags); + + return active; +} + +/* + * counts arguments in format string for sprintf view + */ + +static inline int +debug_count_numargs(char *string) +{ + int numargs=0; + + while(*string) { + if(*string++=='%') + numargs++; + } + return(numargs); +} + +/* + * debug_sprintf_event: + */ + +debug_entry_t* +debug_sprintf_event(debug_info_t* id, int level,char *string,...) +{ + va_list ap; + int numargs,idx; + unsigned long flags; + debug_sprintf_entry_t *curr_event; + debug_entry_t *active; + + if((!id) || (level > id->level)) + return NULL; + if (!debug_active || !id->areas) + return NULL; + numargs=debug_count_numargs(string); + + if (debug_critical) { + if (!spin_trylock_irqsave(&id->lock, flags)) + return NULL; + } else + spin_lock_irqsave(&id->lock, flags); + active = get_active_entry(id); + curr_event=(debug_sprintf_entry_t *) DEBUG_DATA(active); + va_start(ap,string); + curr_event->string=string; + for(idx=0;idx<min(numargs,(int)(id->buf_size / sizeof(long))-1);idx++) + curr_event->args[idx]=va_arg(ap,long); + va_end(ap); + debug_finish_entry(id, active, level, 0); + spin_unlock_irqrestore(&id->lock, flags); + + return active; +} + +/* + * debug_sprintf_exception: + */ + +debug_entry_t* +debug_sprintf_exception(debug_info_t* id, int level,char *string,...) +{ + va_list ap; + int numargs,idx; + unsigned long flags; + debug_sprintf_entry_t *curr_event; + debug_entry_t *active; + + if((!id) || (level > id->level)) + return NULL; + if (!debug_active || !id->areas) + return NULL; + + numargs=debug_count_numargs(string); + + if (debug_critical) { + if (!spin_trylock_irqsave(&id->lock, flags)) + return NULL; + } else + spin_lock_irqsave(&id->lock, flags); + active = get_active_entry(id); + curr_event=(debug_sprintf_entry_t *)DEBUG_DATA(active); + va_start(ap,string); + curr_event->string=string; + for(idx=0;idx<min(numargs,(int)(id->buf_size / sizeof(long))-1);idx++) + curr_event->args[idx]=va_arg(ap,long); + va_end(ap); + debug_finish_entry(id, active, level, 1); + spin_unlock_irqrestore(&id->lock, flags); + + return active; +} + +/* + * debug_init: + * - is called exactly once to initialize the debug feature + */ + +static int +__init debug_init(void) +{ + int rc = 0; + + s390dbf_sysctl_header = register_sysctl_table(s390dbf_dir_table); + mutex_lock(&debug_mutex); + debug_debugfs_root_entry = debugfs_create_dir(DEBUG_DIR_ROOT,NULL); + initialized = 1; + mutex_unlock(&debug_mutex); + + return rc; +} + +/* + * debug_register_view: + */ + +int +debug_register_view(debug_info_t * id, struct debug_view *view) +{ + int rc = 0; + int i; + unsigned long flags; + umode_t mode; + struct dentry *pde; + + if (!id) + goto out; + mode = (id->mode | S_IFREG) & ~S_IXUGO; + if (!(view->prolog_proc || view->format_proc || view->header_proc)) + mode &= ~(S_IRUSR | S_IRGRP | S_IROTH); + if (!view->input_proc) + mode &= ~(S_IWUSR | S_IWGRP | S_IWOTH); + pde = debugfs_create_file(view->name, mode, id->debugfs_root_entry, + id , &debug_file_ops); + if (!pde){ + pr_err("Registering view %s/%s failed due to out of " + "memory\n", id->name,view->name); + rc = -1; + goto out; + } + spin_lock_irqsave(&id->lock, flags); + for (i = 0; i < DEBUG_MAX_VIEWS; i++) { + if (!id->views[i]) + break; + } + if (i == DEBUG_MAX_VIEWS) { + pr_err("Registering view %s/%s would exceed the maximum " + "number of views %i\n", id->name, view->name, i); + debugfs_remove(pde); + rc = -1; + } else { + id->views[i] = view; + id->debugfs_entries[i] = pde; + } + spin_unlock_irqrestore(&id->lock, flags); +out: + return rc; +} + +/* + * debug_unregister_view: + */ + +int +debug_unregister_view(debug_info_t * id, struct debug_view *view) +{ + int rc = 0; + int i; + unsigned long flags; + + if (!id) + goto out; + spin_lock_irqsave(&id->lock, flags); + for (i = 0; i < DEBUG_MAX_VIEWS; i++) { + if (id->views[i] == view) + break; + } + if (i == DEBUG_MAX_VIEWS) + rc = -1; + else { + debugfs_remove(id->debugfs_entries[i]); + id->views[i] = NULL; + } + spin_unlock_irqrestore(&id->lock, flags); +out: + return rc; +} + +static inline char * +debug_get_user_string(const char __user *user_buf, size_t user_len) +{ + char* buffer; + + buffer = kmalloc(user_len + 1, GFP_KERNEL); + if (!buffer) + return ERR_PTR(-ENOMEM); + if (copy_from_user(buffer, user_buf, user_len) != 0) { + kfree(buffer); + return ERR_PTR(-EFAULT); + } + /* got the string, now strip linefeed. */ + if (buffer[user_len - 1] == '\n') + buffer[user_len - 1] = 0; + else + buffer[user_len] = 0; + return buffer; +} + +static inline int +debug_get_uint(char *buf) +{ + int rc; + + buf = skip_spaces(buf); + rc = simple_strtoul(buf, &buf, 10); + if(*buf){ + rc = -EINVAL; + } + return rc; +} + +/* + * functions for debug-views + *********************************** +*/ + +/* + * prints out actual debug level + */ + +static int +debug_prolog_pages_fn(debug_info_t * id, + struct debug_view *view, char *out_buf) +{ + return sprintf(out_buf, "%i\n", id->pages_per_area); +} + +/* + * reads new size (number of pages per debug area) + */ + +static int +debug_input_pages_fn(debug_info_t * id, struct debug_view *view, + struct file *file, const char __user *user_buf, + size_t user_len, loff_t * offset) +{ + char *str; + int rc,new_pages; + + if (user_len > 0x10000) + user_len = 0x10000; + if (*offset != 0){ + rc = -EPIPE; + goto out; + } + str = debug_get_user_string(user_buf,user_len); + if(IS_ERR(str)){ + rc = PTR_ERR(str); + goto out; + } + new_pages = debug_get_uint(str); + if(new_pages < 0){ + rc = -EINVAL; + goto free_str; + } + rc = debug_set_size(id,id->nr_areas, new_pages); + if(rc != 0){ + rc = -EINVAL; + goto free_str; + } + rc = user_len; +free_str: + kfree(str); +out: + *offset += user_len; + return rc; /* number of input characters */ +} + +/* + * prints out actual debug level + */ + +static int +debug_prolog_level_fn(debug_info_t * id, struct debug_view *view, char *out_buf) +{ + int rc = 0; + + if(id->level == DEBUG_OFF_LEVEL) { + rc = sprintf(out_buf,"-\n"); + } + else { + rc = sprintf(out_buf, "%i\n", id->level); + } + return rc; +} + +/* + * reads new debug level + */ + +static int +debug_input_level_fn(debug_info_t * id, struct debug_view *view, + struct file *file, const char __user *user_buf, + size_t user_len, loff_t * offset) +{ + char *str; + int rc,new_level; + + if (user_len > 0x10000) + user_len = 0x10000; + if (*offset != 0){ + rc = -EPIPE; + goto out; + } + str = debug_get_user_string(user_buf,user_len); + if(IS_ERR(str)){ + rc = PTR_ERR(str); + goto out; + } + if(str[0] == '-'){ + debug_set_level(id, DEBUG_OFF_LEVEL); + rc = user_len; + goto free_str; + } else { + new_level = debug_get_uint(str); + } + if(new_level < 0) { + pr_warning("%s is not a valid level for a debug " + "feature\n", str); + rc = -EINVAL; + } else { + debug_set_level(id, new_level); + rc = user_len; + } +free_str: + kfree(str); +out: + *offset += user_len; + return rc; /* number of input characters */ +} + + +/* + * flushes debug areas + */ + +static void debug_flush(debug_info_t* id, int area) +{ + unsigned long flags; + int i,j; + + if(!id || !id->areas) + return; + spin_lock_irqsave(&id->lock,flags); + if(area == DEBUG_FLUSH_ALL){ + id->active_area = 0; + memset(id->active_entries, 0, id->nr_areas * sizeof(int)); + for (i = 0; i < id->nr_areas; i++) { + id->active_pages[i] = 0; + for(j = 0; j < id->pages_per_area; j++) { + memset(id->areas[i][j], 0, PAGE_SIZE); + } + } + } else if(area >= 0 && area < id->nr_areas) { + id->active_entries[area] = 0; + id->active_pages[area] = 0; + for(i = 0; i < id->pages_per_area; i++) { + memset(id->areas[area][i],0,PAGE_SIZE); + } + } + spin_unlock_irqrestore(&id->lock,flags); +} + +/* + * view function: flushes debug areas + */ + +static int +debug_input_flush_fn(debug_info_t * id, struct debug_view *view, + struct file *file, const char __user *user_buf, + size_t user_len, loff_t * offset) +{ + char input_buf[1]; + int rc = user_len; + + if (user_len > 0x10000) + user_len = 0x10000; + if (*offset != 0){ + rc = -EPIPE; + goto out; + } + if (copy_from_user(input_buf, user_buf, 1)){ + rc = -EFAULT; + goto out; + } + if(input_buf[0] == '-') { + debug_flush(id, DEBUG_FLUSH_ALL); + goto out; + } + if (isdigit(input_buf[0])) { + int area = ((int) input_buf[0] - (int) '0'); + debug_flush(id, area); + goto out; + } + + pr_info("Flushing debug data failed because %c is not a valid " + "area\n", input_buf[0]); + +out: + *offset += user_len; + return rc; /* number of input characters */ +} + +/* + * prints debug header in raw format + */ + +static int +debug_raw_header_fn(debug_info_t * id, struct debug_view *view, + int area, debug_entry_t * entry, char *out_buf) +{ + int rc; + + rc = sizeof(debug_entry_t); + memcpy(out_buf,entry,sizeof(debug_entry_t)); + return rc; +} + +/* + * prints debug data in raw format + */ + +static int +debug_raw_format_fn(debug_info_t * id, struct debug_view *view, + char *out_buf, const char *in_buf) +{ + int rc; + + rc = id->buf_size; + memcpy(out_buf, in_buf, id->buf_size); + return rc; +} + +/* + * prints debug data in hex/ascii format + */ + +static int +debug_hex_ascii_format_fn(debug_info_t * id, struct debug_view *view, + char *out_buf, const char *in_buf) +{ + int i, rc = 0; + + for (i = 0; i < id->buf_size; i++) { + rc += sprintf(out_buf + rc, "%02x ", + ((unsigned char *) in_buf)[i]); + } + rc += sprintf(out_buf + rc, "| "); + for (i = 0; i < id->buf_size; i++) { + unsigned char c = in_buf[i]; + if (isascii(c) && isprint(c)) + rc += sprintf(out_buf + rc, "%c", c); + else + rc += sprintf(out_buf + rc, "."); + } + rc += sprintf(out_buf + rc, "\n"); + return rc; +} + +/* + * prints header for debug entry + */ + +int +debug_dflt_header_fn(debug_info_t * id, struct debug_view *view, + int area, debug_entry_t * entry, char *out_buf) +{ + struct timespec time_spec; + char *except_str; + unsigned long caller; + int rc = 0; + unsigned int level; + + level = entry->id.fields.level; + stck_to_timespec(entry->id.stck, &time_spec); + + if (entry->id.fields.exception) + except_str = "*"; + else + except_str = "-"; + caller = ((unsigned long) entry->caller) & PSW_ADDR_INSN; + rc += sprintf(out_buf, "%02i %011lu:%06lu %1u %1s %02i %p ", + area, time_spec.tv_sec, time_spec.tv_nsec / 1000, level, + except_str, entry->id.fields.cpuid, (void *) caller); + return rc; +} + +/* + * prints debug data sprintf-formated: + * debug_sprinf_event/exception calls must be used together with this view + */ + +#define DEBUG_SPRINTF_MAX_ARGS 10 + +static int +debug_sprintf_format_fn(debug_info_t * id, struct debug_view *view, + char *out_buf, debug_sprintf_entry_t *curr_event) +{ + int num_longs, num_used_args = 0,i, rc = 0; + int index[DEBUG_SPRINTF_MAX_ARGS]; + + /* count of longs fit into one entry */ + num_longs = id->buf_size / sizeof(long); + + if(num_longs < 1) + goto out; /* bufsize of entry too small */ + if(num_longs == 1) { + /* no args, we use only the string */ + strcpy(out_buf, curr_event->string); + rc = strlen(curr_event->string); + goto out; + } + + /* number of arguments used for sprintf (without the format string) */ + num_used_args = min(DEBUG_SPRINTF_MAX_ARGS, (num_longs - 1)); + + memset(index,0, DEBUG_SPRINTF_MAX_ARGS * sizeof(int)); + + for(i = 0; i < num_used_args; i++) + index[i] = i; + + rc = sprintf(out_buf, curr_event->string, curr_event->args[index[0]], + curr_event->args[index[1]], curr_event->args[index[2]], + curr_event->args[index[3]], curr_event->args[index[4]], + curr_event->args[index[5]], curr_event->args[index[6]], + curr_event->args[index[7]], curr_event->args[index[8]], + curr_event->args[index[9]]); + +out: + + return rc; +} + +/* + * clean up module + */ +static void __exit debug_exit(void) +{ + debugfs_remove(debug_debugfs_root_entry); + unregister_sysctl_table(s390dbf_sysctl_header); + return; +} + +/* + * module definitions + */ +postcore_initcall(debug_init); +module_exit(debug_exit); +MODULE_LICENSE("GPL"); + +EXPORT_SYMBOL(debug_register); +EXPORT_SYMBOL(debug_unregister); +EXPORT_SYMBOL(debug_set_level); +EXPORT_SYMBOL(debug_stop_all); +EXPORT_SYMBOL(debug_register_view); +EXPORT_SYMBOL(debug_unregister_view); +EXPORT_SYMBOL(debug_event_common); +EXPORT_SYMBOL(debug_exception_common); +EXPORT_SYMBOL(debug_hex_ascii_view); +EXPORT_SYMBOL(debug_raw_view); +EXPORT_SYMBOL(debug_dflt_header_fn); +EXPORT_SYMBOL(debug_sprintf_view); +EXPORT_SYMBOL(debug_sprintf_exception); +EXPORT_SYMBOL(debug_sprintf_event); diff --git a/arch/s390/kernel/diag.c b/arch/s390/kernel/diag.c new file mode 100644 index 00000000..8237fc07 --- /dev/null +++ b/arch/s390/kernel/diag.c @@ -0,0 +1,81 @@ +/* + * Implementation of s390 diagnose codes + * + * Copyright IBM Corp. 2007 + * Author(s): Michael Holzheu <holzheu@de.ibm.com> + */ + +#include <linux/module.h> +#include <asm/diag.h> + +/* + * Diagnose 14: Input spool file manipulation + */ +int diag14(unsigned long rx, unsigned long ry1, unsigned long subcode) +{ + register unsigned long _ry1 asm("2") = ry1; + register unsigned long _ry2 asm("3") = subcode; + int rc = 0; + + asm volatile( +#ifdef CONFIG_64BIT + " sam31\n" + " diag %2,2,0x14\n" + " sam64\n" +#else + " diag %2,2,0x14\n" +#endif + " ipm %0\n" + " srl %0,28\n" + : "=d" (rc), "+d" (_ry2) + : "d" (rx), "d" (_ry1) + : "cc"); + + return rc; +} +EXPORT_SYMBOL(diag14); + +/* + * Diagnose 210: Get information about a virtual device + */ +int diag210(struct diag210 *addr) +{ + /* + * diag 210 needs its data below the 2GB border, so we + * use a static data area to be sure + */ + static struct diag210 diag210_tmp; + static DEFINE_SPINLOCK(diag210_lock); + unsigned long flags; + int ccode; + + spin_lock_irqsave(&diag210_lock, flags); + diag210_tmp = *addr; + +#ifdef CONFIG_64BIT + asm volatile( + " lhi %0,-1\n" + " sam31\n" + " diag %1,0,0x210\n" + "0: ipm %0\n" + " srl %0,28\n" + "1: sam64\n" + EX_TABLE(0b, 1b) + : "=&d" (ccode) : "a" (&diag210_tmp) : "cc", "memory"); +#else + asm volatile( + " lhi %0,-1\n" + " diag %1,0,0x210\n" + "0: ipm %0\n" + " srl %0,28\n" + "1:\n" + EX_TABLE(0b, 1b) + : "=&d" (ccode) : "a" (&diag210_tmp) : "cc", "memory"); +#endif + + *addr = diag210_tmp; + spin_unlock_irqrestore(&diag210_lock, flags); + + return ccode; +} +EXPORT_SYMBOL(diag210); diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c new file mode 100644 index 00000000..3221c6fc --- /dev/null +++ b/arch/s390/kernel/dis.c @@ -0,0 +1,1604 @@ +/* + * arch/s390/kernel/dis.c + * + * Disassemble s390 instructions. + * + * Copyright IBM Corp. 2007 + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), + */ + +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/string.h> +#include <linux/errno.h> +#include <linux/ptrace.h> +#include <linux/timer.h> +#include <linux/mm.h> +#include <linux/smp.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/delay.h> +#include <linux/module.h> +#include <linux/kallsyms.h> +#include <linux/reboot.h> +#include <linux/kprobes.h> +#include <linux/kdebug.h> + +#include <asm/uaccess.h> +#include <asm/io.h> +#include <linux/atomic.h> +#include <asm/mathemu.h> +#include <asm/cpcmd.h> +#include <asm/lowcore.h> +#include <asm/debug.h> +#include <asm/irq.h> + +#ifndef CONFIG_64BIT +#define ONELONG "%08lx: " +#else /* CONFIG_64BIT */ +#define ONELONG "%016lx: " +#endif /* CONFIG_64BIT */ + +#define OPERAND_GPR 0x1 /* Operand printed as %rx */ +#define OPERAND_FPR 0x2 /* Operand printed as %fx */ +#define OPERAND_AR 0x4 /* Operand printed as %ax */ +#define OPERAND_CR 0x8 /* Operand printed as %cx */ +#define OPERAND_DISP 0x10 /* Operand printed as displacement */ +#define OPERAND_BASE 0x20 /* Operand printed as base register */ +#define OPERAND_INDEX 0x40 /* Operand printed as index register */ +#define OPERAND_PCREL 0x80 /* Operand printed as pc-relative symbol */ +#define OPERAND_SIGNED 0x100 /* Operand printed as signed value */ +#define OPERAND_LENGTH 0x200 /* Operand printed as length (+1) */ + +enum { + UNUSED, /* Indicates the end of the operand list */ + R_8, /* GPR starting at position 8 */ + R_12, /* GPR starting at position 12 */ + R_16, /* GPR starting at position 16 */ + R_20, /* GPR starting at position 20 */ + R_24, /* GPR starting at position 24 */ + R_28, /* GPR starting at position 28 */ + R_32, /* GPR starting at position 32 */ + F_8, /* FPR starting at position 8 */ + F_12, /* FPR starting at position 12 */ + F_16, /* FPR starting at position 16 */ + F_20, /* FPR starting at position 16 */ + F_24, /* FPR starting at position 24 */ + F_28, /* FPR starting at position 28 */ + F_32, /* FPR starting at position 32 */ + A_8, /* Access reg. starting at position 8 */ + A_12, /* Access reg. starting at position 12 */ + A_24, /* Access reg. starting at position 24 */ + A_28, /* Access reg. starting at position 28 */ + C_8, /* Control reg. starting at position 8 */ + C_12, /* Control reg. starting at position 12 */ + B_16, /* Base register starting at position 16 */ + B_32, /* Base register starting at position 32 */ + X_12, /* Index register starting at position 12 */ + D_20, /* Displacement starting at position 20 */ + D_36, /* Displacement starting at position 36 */ + D20_20, /* 20 bit displacement starting at 20 */ + L4_8, /* 4 bit length starting at position 8 */ + L4_12, /* 4 bit length starting at position 12 */ + L8_8, /* 8 bit length starting at position 8 */ + U4_8, /* 4 bit unsigned value starting at 8 */ + U4_12, /* 4 bit unsigned value starting at 12 */ + U4_16, /* 4 bit unsigned value starting at 16 */ + U4_20, /* 4 bit unsigned value starting at 20 */ + U4_32, /* 4 bit unsigned value starting at 32 */ + U8_8, /* 8 bit unsigned value starting at 8 */ + U8_16, /* 8 bit unsigned value starting at 16 */ + U8_24, /* 8 bit unsigned value starting at 24 */ + U8_32, /* 8 bit unsigned value starting at 32 */ + I8_8, /* 8 bit signed value starting at 8 */ + I8_32, /* 8 bit signed value starting at 32 */ + I16_16, /* 16 bit signed value starting at 16 */ + I16_32, /* 32 bit signed value starting at 16 */ + U16_16, /* 16 bit unsigned value starting at 16 */ + U16_32, /* 32 bit unsigned value starting at 16 */ + J16_16, /* PC relative jump offset at 16 */ + J32_16, /* PC relative long offset at 16 */ + I32_16, /* 32 bit signed value starting at 16 */ + U32_16, /* 32 bit unsigned value starting at 16 */ + M_16, /* 4 bit optional mask starting at 16 */ + RO_28, /* optional GPR starting at position 28 */ +}; + +/* + * Enumeration of the different instruction formats. + * For details consult the principles of operation. + */ +enum { + INSTR_INVALID, + INSTR_E, + INSTR_RIE_R0IU, INSTR_RIE_R0UU, INSTR_RIE_RRP, INSTR_RIE_RRPU, + INSTR_RIE_RRUUU, INSTR_RIE_RUPI, INSTR_RIE_RUPU, INSTR_RIE_RRI0, + INSTR_RIL_RI, INSTR_RIL_RP, INSTR_RIL_RU, INSTR_RIL_UP, + INSTR_RIS_R0RDU, INSTR_RIS_R0UU, INSTR_RIS_RURDI, INSTR_RIS_RURDU, + INSTR_RI_RI, INSTR_RI_RP, INSTR_RI_RU, INSTR_RI_UP, + INSTR_RRE_00, INSTR_RRE_0R, INSTR_RRE_AA, INSTR_RRE_AR, INSTR_RRE_F0, + INSTR_RRE_FF, INSTR_RRE_FR, INSTR_RRE_R0, INSTR_RRE_RA, INSTR_RRE_RF, + INSTR_RRE_RR, INSTR_RRE_RR_OPT, + INSTR_RRF_0UFF, INSTR_RRF_F0FF, INSTR_RRF_F0FF2, INSTR_RRF_F0FR, + INSTR_RRF_FFRU, INSTR_RRF_FUFF, INSTR_RRF_M0RR, INSTR_RRF_R0RR, + INSTR_RRF_R0RR2, INSTR_RRF_RURR, INSTR_RRF_U0FF, INSTR_RRF_U0RF, + INSTR_RRF_U0RR, INSTR_RRF_UUFF, INSTR_RRR_F0FF, INSTR_RRS_RRRDU, + INSTR_RR_FF, INSTR_RR_R0, INSTR_RR_RR, INSTR_RR_U0, INSTR_RR_UR, + INSTR_RSE_CCRD, INSTR_RSE_RRRD, INSTR_RSE_RURD, + INSTR_RSI_RRP, + INSTR_RSL_R0RD, + INSTR_RSY_AARD, INSTR_RSY_CCRD, INSTR_RSY_RRRD, INSTR_RSY_RURD, + INSTR_RSY_RDRM, + INSTR_RS_AARD, INSTR_RS_CCRD, INSTR_RS_R0RD, INSTR_RS_RRRD, + INSTR_RS_RURD, + INSTR_RXE_FRRD, INSTR_RXE_RRRD, + INSTR_RXF_FRRDF, + INSTR_RXY_FRRD, INSTR_RXY_RRRD, INSTR_RXY_URRD, + INSTR_RX_FRRD, INSTR_RX_RRRD, INSTR_RX_URRD, + INSTR_SIL_RDI, INSTR_SIL_RDU, + INSTR_SIY_IRD, INSTR_SIY_URD, + INSTR_SI_URD, + INSTR_SSE_RDRD, + INSTR_SSF_RRDRD, INSTR_SSF_RRDRD2, + INSTR_SS_L0RDRD, INSTR_SS_LIRDRD, INSTR_SS_LLRDRD, INSTR_SS_RRRDRD, + INSTR_SS_RRRDRD2, INSTR_SS_RRRDRD3, + INSTR_S_00, INSTR_S_RD, +}; + +struct operand { + int bits; /* The number of bits in the operand. */ + int shift; /* The number of bits to shift. */ + int flags; /* One bit syntax flags. */ +}; + +struct insn { + const char name[5]; + unsigned char opfrag; + unsigned char format; +}; + +static const struct operand operands[] = +{ + [UNUSED] = { 0, 0, 0 }, + [R_8] = { 4, 8, OPERAND_GPR }, + [R_12] = { 4, 12, OPERAND_GPR }, + [R_16] = { 4, 16, OPERAND_GPR }, + [R_20] = { 4, 20, OPERAND_GPR }, + [R_24] = { 4, 24, OPERAND_GPR }, + [R_28] = { 4, 28, OPERAND_GPR }, + [R_32] = { 4, 32, OPERAND_GPR }, + [F_8] = { 4, 8, OPERAND_FPR }, + [F_12] = { 4, 12, OPERAND_FPR }, + [F_16] = { 4, 16, OPERAND_FPR }, + [F_20] = { 4, 16, OPERAND_FPR }, + [F_24] = { 4, 24, OPERAND_FPR }, + [F_28] = { 4, 28, OPERAND_FPR }, + [F_32] = { 4, 32, OPERAND_FPR }, + [A_8] = { 4, 8, OPERAND_AR }, + [A_12] = { 4, 12, OPERAND_AR }, + [A_24] = { 4, 24, OPERAND_AR }, + [A_28] = { 4, 28, OPERAND_AR }, + [C_8] = { 4, 8, OPERAND_CR }, + [C_12] = { 4, 12, OPERAND_CR }, + [B_16] = { 4, 16, OPERAND_BASE | OPERAND_GPR }, + [B_32] = { 4, 32, OPERAND_BASE | OPERAND_GPR }, + [X_12] = { 4, 12, OPERAND_INDEX | OPERAND_GPR }, + [D_20] = { 12, 20, OPERAND_DISP }, + [D_36] = { 12, 36, OPERAND_DISP }, + [D20_20] = { 20, 20, OPERAND_DISP | OPERAND_SIGNED }, + [L4_8] = { 4, 8, OPERAND_LENGTH }, + [L4_12] = { 4, 12, OPERAND_LENGTH }, + [L8_8] = { 8, 8, OPERAND_LENGTH }, + [U4_8] = { 4, 8, 0 }, + [U4_12] = { 4, 12, 0 }, + [U4_16] = { 4, 16, 0 }, + [U4_20] = { 4, 20, 0 }, + [U4_32] = { 4, 32, 0 }, + [U8_8] = { 8, 8, 0 }, + [U8_16] = { 8, 16, 0 }, + [U8_24] = { 8, 24, 0 }, + [U8_32] = { 8, 32, 0 }, + [I16_16] = { 16, 16, OPERAND_SIGNED }, + [U16_16] = { 16, 16, 0 }, + [U16_32] = { 16, 32, 0 }, + [J16_16] = { 16, 16, OPERAND_PCREL }, + [I16_32] = { 16, 32, OPERAND_SIGNED }, + [J32_16] = { 32, 16, OPERAND_PCREL }, + [I32_16] = { 32, 16, OPERAND_SIGNED }, + [U32_16] = { 32, 16, 0 }, + [M_16] = { 4, 16, 0 }, + [RO_28] = { 4, 28, OPERAND_GPR } +}; + +static const unsigned char formats[][7] = { + [INSTR_E] = { 0xff, 0,0,0,0,0,0 }, + [INSTR_RIE_R0UU] = { 0xff, R_8,U16_16,U4_32,0,0,0 }, + [INSTR_RIE_RRPU] = { 0xff, R_8,R_12,U4_32,J16_16,0,0 }, + [INSTR_RIE_RRP] = { 0xff, R_8,R_12,J16_16,0,0,0 }, + [INSTR_RIE_RRUUU] = { 0xff, R_8,R_12,U8_16,U8_24,U8_32,0 }, + [INSTR_RIE_RUPI] = { 0xff, R_8,I8_32,U4_12,J16_16,0,0 }, + [INSTR_RIE_RRI0] = { 0xff, R_8,R_12,I16_16,0,0,0 }, + [INSTR_RIL_RI] = { 0x0f, R_8,I32_16,0,0,0,0 }, + [INSTR_RIL_RP] = { 0x0f, R_8,J32_16,0,0,0,0 }, + [INSTR_RIL_RU] = { 0x0f, R_8,U32_16,0,0,0,0 }, + [INSTR_RIL_UP] = { 0x0f, U4_8,J32_16,0,0,0,0 }, + [INSTR_RIS_R0RDU] = { 0xff, R_8,U8_32,D_20,B_16,0,0 }, + [INSTR_RIS_RURDI] = { 0xff, R_8,I8_32,U4_12,D_20,B_16,0 }, + [INSTR_RIS_RURDU] = { 0xff, R_8,U8_32,U4_12,D_20,B_16,0 }, + [INSTR_RI_RI] = { 0x0f, R_8,I16_16,0,0,0,0 }, + [INSTR_RI_RP] = { 0x0f, R_8,J16_16,0,0,0,0 }, + [INSTR_RI_RU] = { 0x0f, R_8,U16_16,0,0,0,0 }, + [INSTR_RI_UP] = { 0x0f, U4_8,J16_16,0,0,0,0 }, + [INSTR_RRE_00] = { 0xff, 0,0,0,0,0,0 }, + [INSTR_RRE_0R] = { 0xff, R_28,0,0,0,0,0 }, + [INSTR_RRE_AA] = { 0xff, A_24,A_28,0,0,0,0 }, + [INSTR_RRE_AR] = { 0xff, A_24,R_28,0,0,0,0 }, + [INSTR_RRE_F0] = { 0xff, F_24,0,0,0,0,0 }, + [INSTR_RRE_FF] = { 0xff, F_24,F_28,0,0,0,0 }, + [INSTR_RRE_FR] = { 0xff, F_24,R_28,0,0,0,0 }, + [INSTR_RRE_R0] = { 0xff, R_24,0,0,0,0,0 }, + [INSTR_RRE_RA] = { 0xff, R_24,A_28,0,0,0,0 }, + [INSTR_RRE_RF] = { 0xff, R_24,F_28,0,0,0,0 }, + [INSTR_RRE_RR] = { 0xff, R_24,R_28,0,0,0,0 }, + [INSTR_RRE_RR_OPT]= { 0xff, R_24,RO_28,0,0,0,0 }, + [INSTR_RRF_0UFF] = { 0xff, F_24,F_28,U4_20,0,0,0 }, + [INSTR_RRF_F0FF2] = { 0xff, F_24,F_16,F_28,0,0,0 }, + [INSTR_RRF_F0FF] = { 0xff, F_16,F_24,F_28,0,0,0 }, + [INSTR_RRF_F0FR] = { 0xff, F_24,F_16,R_28,0,0,0 }, + [INSTR_RRF_FFRU] = { 0xff, F_24,F_16,R_28,U4_20,0,0 }, + [INSTR_RRF_FUFF] = { 0xff, F_24,F_16,F_28,U4_20,0,0 }, + [INSTR_RRF_M0RR] = { 0xff, R_24,R_28,M_16,0,0,0 }, + [INSTR_RRF_R0RR] = { 0xff, R_24,R_16,R_28,0,0,0 }, + [INSTR_RRF_R0RR2] = { 0xff, R_24,R_28,R_16,0,0,0 }, + [INSTR_RRF_RURR] = { 0xff, R_24,R_28,R_16,U4_20,0,0 }, + [INSTR_RRF_U0FF] = { 0xff, F_24,U4_16,F_28,0,0,0 }, + [INSTR_RRF_U0RF] = { 0xff, R_24,U4_16,F_28,0,0,0 }, + [INSTR_RRF_U0RR] = { 0xff, R_24,R_28,U4_16,0,0,0 }, + [INSTR_RRF_UUFF] = { 0xff, F_24,U4_16,F_28,U4_20,0,0 }, + [INSTR_RRR_F0FF] = { 0xff, F_24,F_28,F_16,0,0,0 }, + [INSTR_RRS_RRRDU] = { 0xff, R_8,R_12,U4_32,D_20,B_16,0 }, + [INSTR_RR_FF] = { 0xff, F_8,F_12,0,0,0,0 }, + [INSTR_RR_R0] = { 0xff, R_8, 0,0,0,0,0 }, + [INSTR_RR_RR] = { 0xff, R_8,R_12,0,0,0,0 }, + [INSTR_RR_U0] = { 0xff, U8_8, 0,0,0,0,0 }, + [INSTR_RR_UR] = { 0xff, U4_8,R_12,0,0,0,0 }, + [INSTR_RSE_CCRD] = { 0xff, C_8,C_12,D_20,B_16,0,0 }, + [INSTR_RSE_RRRD] = { 0xff, R_8,R_12,D_20,B_16,0,0 }, + [INSTR_RSE_RURD] = { 0xff, R_8,U4_12,D_20,B_16,0,0 }, + [INSTR_RSI_RRP] = { 0xff, R_8,R_12,J16_16,0,0,0 }, + [INSTR_RSL_R0RD] = { 0xff, D_20,L4_8,B_16,0,0,0 }, + [INSTR_RSY_AARD] = { 0xff, A_8,A_12,D20_20,B_16,0,0 }, + [INSTR_RSY_CCRD] = { 0xff, C_8,C_12,D20_20,B_16,0,0 }, + [INSTR_RSY_RRRD] = { 0xff, R_8,R_12,D20_20,B_16,0,0 }, + [INSTR_RSY_RURD] = { 0xff, R_8,U4_12,D20_20,B_16,0,0 }, + [INSTR_RSY_RDRM] = { 0xff, R_8,D20_20,B_16,U4_12,0,0 }, + [INSTR_RS_AARD] = { 0xff, A_8,A_12,D_20,B_16,0,0 }, + [INSTR_RS_CCRD] = { 0xff, C_8,C_12,D_20,B_16,0,0 }, + [INSTR_RS_R0RD] = { 0xff, R_8,D_20,B_16,0,0,0 }, + [INSTR_RS_RRRD] = { 0xff, R_8,R_12,D_20,B_16,0,0 }, + [INSTR_RS_RURD] = { 0xff, R_8,U4_12,D_20,B_16,0,0 }, + [INSTR_RXE_FRRD] = { 0xff, F_8,D_20,X_12,B_16,0,0 }, + [INSTR_RXE_RRRD] = { 0xff, R_8,D_20,X_12,B_16,0,0 }, + [INSTR_RXF_FRRDF] = { 0xff, F_32,F_8,D_20,X_12,B_16,0 }, + [INSTR_RXY_FRRD] = { 0xff, F_8,D20_20,X_12,B_16,0,0 }, + [INSTR_RXY_RRRD] = { 0xff, R_8,D20_20,X_12,B_16,0,0 }, + [INSTR_RXY_URRD] = { 0xff, U4_8,D20_20,X_12,B_16,0,0 }, + [INSTR_RX_FRRD] = { 0xff, F_8,D_20,X_12,B_16,0,0 }, + [INSTR_RX_RRRD] = { 0xff, R_8,D_20,X_12,B_16,0,0 }, + [INSTR_RX_URRD] = { 0xff, U4_8,D_20,X_12,B_16,0,0 }, + [INSTR_SIL_RDI] = { 0xff, D_20,B_16,I16_32,0,0,0 }, + [INSTR_SIL_RDU] = { 0xff, D_20,B_16,U16_32,0,0,0 }, + [INSTR_SIY_IRD] = { 0xff, D20_20,B_16,I8_8,0,0,0 }, + [INSTR_SIY_URD] = { 0xff, D20_20,B_16,U8_8,0,0,0 }, + [INSTR_SI_URD] = { 0xff, D_20,B_16,U8_8,0,0,0 }, + [INSTR_SSE_RDRD] = { 0xff, D_20,B_16,D_36,B_32,0,0 }, + [INSTR_SSF_RRDRD] = { 0x00, D_20,B_16,D_36,B_32,R_8,0 }, + [INSTR_SSF_RRDRD2]= { 0x00, R_8,D_20,B_16,D_36,B_32,0 }, + [INSTR_SS_L0RDRD] = { 0xff, D_20,L8_8,B_16,D_36,B_32,0 }, + [INSTR_SS_LIRDRD] = { 0xff, D_20,L4_8,B_16,D_36,B_32,U4_12 }, + [INSTR_SS_LLRDRD] = { 0xff, D_20,L4_8,B_16,D_36,L4_12,B_32 }, + [INSTR_SS_RRRDRD2]= { 0xff, R_8,D_20,B_16,R_12,D_36,B_32 }, + [INSTR_SS_RRRDRD3]= { 0xff, R_8,R_12,D_20,B_16,D_36,B_32 }, + [INSTR_SS_RRRDRD] = { 0xff, D_20,R_8,B_16,D_36,B_32,R_12 }, + [INSTR_S_00] = { 0xff, 0,0,0,0,0,0 }, + [INSTR_S_RD] = { 0xff, D_20,B_16,0,0,0,0 }, +}; + +enum { + LONG_INSN_ALGHSIK, + LONG_INSN_ALHSIK, + LONG_INSN_CLFHSI, + LONG_INSN_CLGFRL, + LONG_INSN_CLGHRL, + LONG_INSN_CLGHSI, + LONG_INSN_CLHHSI, + LONG_INSN_LLGFRL, + LONG_INSN_LLGHRL, + LONG_INSN_POPCNT, + LONG_INSN_RISBHG, + LONG_INSN_RISBLG, +}; + +static char *long_insn_name[] = { + [LONG_INSN_ALGHSIK] = "alghsik", + [LONG_INSN_ALHSIK] = "alhsik", + [LONG_INSN_CLFHSI] = "clfhsi", + [LONG_INSN_CLGFRL] = "clgfrl", + [LONG_INSN_CLGHRL] = "clghrl", + [LONG_INSN_CLGHSI] = "clghsi", + [LONG_INSN_CLHHSI] = "clhhsi", + [LONG_INSN_LLGFRL] = "llgfrl", + [LONG_INSN_LLGHRL] = "llghrl", + [LONG_INSN_POPCNT] = "popcnt", + [LONG_INSN_RISBHG] = "risbhg", + [LONG_INSN_RISBLG] = "risblk", +}; + +static struct insn opcode[] = { +#ifdef CONFIG_64BIT + { "lmd", 0xef, INSTR_SS_RRRDRD3 }, +#endif + { "spm", 0x04, INSTR_RR_R0 }, + { "balr", 0x05, INSTR_RR_RR }, + { "bctr", 0x06, INSTR_RR_RR }, + { "bcr", 0x07, INSTR_RR_UR }, + { "svc", 0x0a, INSTR_RR_U0 }, + { "bsm", 0x0b, INSTR_RR_RR }, + { "bassm", 0x0c, INSTR_RR_RR }, + { "basr", 0x0d, INSTR_RR_RR }, + { "mvcl", 0x0e, INSTR_RR_RR }, + { "clcl", 0x0f, INSTR_RR_RR }, + { "lpr", 0x10, INSTR_RR_RR }, + { "lnr", 0x11, INSTR_RR_RR }, + { "ltr", 0x12, INSTR_RR_RR }, + { "lcr", 0x13, INSTR_RR_RR }, + { "nr", 0x14, INSTR_RR_RR }, + { "clr", 0x15, INSTR_RR_RR }, + { "or", 0x16, INSTR_RR_RR }, + { "xr", 0x17, INSTR_RR_RR }, + { "lr", 0x18, INSTR_RR_RR }, + { "cr", 0x19, INSTR_RR_RR }, + { "ar", 0x1a, INSTR_RR_RR }, + { "sr", 0x1b, INSTR_RR_RR }, + { "mr", 0x1c, INSTR_RR_RR }, + { "dr", 0x1d, INSTR_RR_RR }, + { "alr", 0x1e, INSTR_RR_RR }, + { "slr", 0x1f, INSTR_RR_RR }, + { "lpdr", 0x20, INSTR_RR_FF }, + { "lndr", 0x21, INSTR_RR_FF }, + { "ltdr", 0x22, INSTR_RR_FF }, + { "lcdr", 0x23, INSTR_RR_FF }, + { "hdr", 0x24, INSTR_RR_FF }, + { "ldxr", 0x25, INSTR_RR_FF }, + { "lrdr", 0x25, INSTR_RR_FF }, + { "mxr", 0x26, INSTR_RR_FF }, + { "mxdr", 0x27, INSTR_RR_FF }, + { "ldr", 0x28, INSTR_RR_FF }, + { "cdr", 0x29, INSTR_RR_FF }, + { "adr", 0x2a, INSTR_RR_FF }, + { "sdr", 0x2b, INSTR_RR_FF }, + { "mdr", 0x2c, INSTR_RR_FF }, + { "ddr", 0x2d, INSTR_RR_FF }, + { "awr", 0x2e, INSTR_RR_FF }, + { "swr", 0x2f, INSTR_RR_FF }, + { "lper", 0x30, INSTR_RR_FF }, + { "lner", 0x31, INSTR_RR_FF }, + { "lter", 0x32, INSTR_RR_FF }, + { "lcer", 0x33, INSTR_RR_FF }, + { "her", 0x34, INSTR_RR_FF }, + { "ledr", 0x35, INSTR_RR_FF }, + { "lrer", 0x35, INSTR_RR_FF }, + { "axr", 0x36, INSTR_RR_FF }, + { "sxr", 0x37, INSTR_RR_FF }, + { "ler", 0x38, INSTR_RR_FF }, + { "cer", 0x39, INSTR_RR_FF }, + { "aer", 0x3a, INSTR_RR_FF }, + { "ser", 0x3b, INSTR_RR_FF }, + { "mder", 0x3c, INSTR_RR_FF }, + { "mer", 0x3c, INSTR_RR_FF }, + { "der", 0x3d, INSTR_RR_FF }, + { "aur", 0x3e, INSTR_RR_FF }, + { "sur", 0x3f, INSTR_RR_FF }, + { "sth", 0x40, INSTR_RX_RRRD }, + { "la", 0x41, INSTR_RX_RRRD }, + { "stc", 0x42, INSTR_RX_RRRD }, + { "ic", 0x43, INSTR_RX_RRRD }, + { "ex", 0x44, INSTR_RX_RRRD }, + { "bal", 0x45, INSTR_RX_RRRD }, + { "bct", 0x46, INSTR_RX_RRRD }, + { "bc", 0x47, INSTR_RX_URRD }, + { "lh", 0x48, INSTR_RX_RRRD }, + { "ch", 0x49, INSTR_RX_RRRD }, + { "ah", 0x4a, INSTR_RX_RRRD }, + { "sh", 0x4b, INSTR_RX_RRRD }, + { "mh", 0x4c, INSTR_RX_RRRD }, + { "bas", 0x4d, INSTR_RX_RRRD }, + { "cvd", 0x4e, INSTR_RX_RRRD }, + { "cvb", 0x4f, INSTR_RX_RRRD }, + { "st", 0x50, INSTR_RX_RRRD }, + { "lae", 0x51, INSTR_RX_RRRD }, + { "n", 0x54, INSTR_RX_RRRD }, + { "cl", 0x55, INSTR_RX_RRRD }, + { "o", 0x56, INSTR_RX_RRRD }, + { "x", 0x57, INSTR_RX_RRRD }, + { "l", 0x58, INSTR_RX_RRRD }, + { "c", 0x59, INSTR_RX_RRRD }, + { "a", 0x5a, INSTR_RX_RRRD }, + { "s", 0x5b, INSTR_RX_RRRD }, + { "m", 0x5c, INSTR_RX_RRRD }, + { "d", 0x5d, INSTR_RX_RRRD }, + { "al", 0x5e, INSTR_RX_RRRD }, + { "sl", 0x5f, INSTR_RX_RRRD }, + { "std", 0x60, INSTR_RX_FRRD }, + { "mxd", 0x67, INSTR_RX_FRRD }, + { "ld", 0x68, INSTR_RX_FRRD }, + { "cd", 0x69, INSTR_RX_FRRD }, + { "ad", 0x6a, INSTR_RX_FRRD }, + { "sd", 0x6b, INSTR_RX_FRRD }, + { "md", 0x6c, INSTR_RX_FRRD }, + { "dd", 0x6d, INSTR_RX_FRRD }, + { "aw", 0x6e, INSTR_RX_FRRD }, + { "sw", 0x6f, INSTR_RX_FRRD }, + { "ste", 0x70, INSTR_RX_FRRD }, + { "ms", 0x71, INSTR_RX_RRRD }, + { "le", 0x78, INSTR_RX_FRRD }, + { "ce", 0x79, INSTR_RX_FRRD }, + { "ae", 0x7a, INSTR_RX_FRRD }, + { "se", 0x7b, INSTR_RX_FRRD }, + { "mde", 0x7c, INSTR_RX_FRRD }, + { "me", 0x7c, INSTR_RX_FRRD }, + { "de", 0x7d, INSTR_RX_FRRD }, + { "au", 0x7e, INSTR_RX_FRRD }, + { "su", 0x7f, INSTR_RX_FRRD }, + { "ssm", 0x80, INSTR_S_RD }, + { "lpsw", 0x82, INSTR_S_RD }, + { "diag", 0x83, INSTR_RS_RRRD }, + { "brxh", 0x84, INSTR_RSI_RRP }, + { "brxle", 0x85, INSTR_RSI_RRP }, + { "bxh", 0x86, INSTR_RS_RRRD }, + { "bxle", 0x87, INSTR_RS_RRRD }, + { "srl", 0x88, INSTR_RS_R0RD }, + { "sll", 0x89, INSTR_RS_R0RD }, + { "sra", 0x8a, INSTR_RS_R0RD }, + { "sla", 0x8b, INSTR_RS_R0RD }, + { "srdl", 0x8c, INSTR_RS_R0RD }, + { "sldl", 0x8d, INSTR_RS_R0RD }, + { "srda", 0x8e, INSTR_RS_R0RD }, + { "slda", 0x8f, INSTR_RS_R0RD }, + { "stm", 0x90, INSTR_RS_RRRD }, + { "tm", 0x91, INSTR_SI_URD }, + { "mvi", 0x92, INSTR_SI_URD }, + { "ts", 0x93, INSTR_S_RD }, + { "ni", 0x94, INSTR_SI_URD }, + { "cli", 0x95, INSTR_SI_URD }, + { "oi", 0x96, INSTR_SI_URD }, + { "xi", 0x97, INSTR_SI_URD }, + { "lm", 0x98, INSTR_RS_RRRD }, + { "trace", 0x99, INSTR_RS_RRRD }, + { "lam", 0x9a, INSTR_RS_AARD }, + { "stam", 0x9b, INSTR_RS_AARD }, + { "mvcle", 0xa8, INSTR_RS_RRRD }, + { "clcle", 0xa9, INSTR_RS_RRRD }, + { "stnsm", 0xac, INSTR_SI_URD }, + { "stosm", 0xad, INSTR_SI_URD }, + { "sigp", 0xae, INSTR_RS_RRRD }, + { "mc", 0xaf, INSTR_SI_URD }, + { "lra", 0xb1, INSTR_RX_RRRD }, + { "stctl", 0xb6, INSTR_RS_CCRD }, + { "lctl", 0xb7, INSTR_RS_CCRD }, + { "cs", 0xba, INSTR_RS_RRRD }, + { "cds", 0xbb, INSTR_RS_RRRD }, + { "clm", 0xbd, INSTR_RS_RURD }, + { "stcm", 0xbe, INSTR_RS_RURD }, + { "icm", 0xbf, INSTR_RS_RURD }, + { "mvn", 0xd1, INSTR_SS_L0RDRD }, + { "mvc", 0xd2, INSTR_SS_L0RDRD }, + { "mvz", 0xd3, INSTR_SS_L0RDRD }, + { "nc", 0xd4, INSTR_SS_L0RDRD }, + { "clc", 0xd5, INSTR_SS_L0RDRD }, + { "oc", 0xd6, INSTR_SS_L0RDRD }, + { "xc", 0xd7, INSTR_SS_L0RDRD }, + { "mvck", 0xd9, INSTR_SS_RRRDRD }, + { "mvcp", 0xda, INSTR_SS_RRRDRD }, + { "mvcs", 0xdb, INSTR_SS_RRRDRD }, + { "tr", 0xdc, INSTR_SS_L0RDRD }, + { "trt", 0xdd, INSTR_SS_L0RDRD }, + { "ed", 0xde, INSTR_SS_L0RDRD }, + { "edmk", 0xdf, INSTR_SS_L0RDRD }, + { "pku", 0xe1, INSTR_SS_L0RDRD }, + { "unpku", 0xe2, INSTR_SS_L0RDRD }, + { "mvcin", 0xe8, INSTR_SS_L0RDRD }, + { "pka", 0xe9, INSTR_SS_L0RDRD }, + { "unpka", 0xea, INSTR_SS_L0RDRD }, + { "plo", 0xee, INSTR_SS_RRRDRD2 }, + { "srp", 0xf0, INSTR_SS_LIRDRD }, + { "mvo", 0xf1, INSTR_SS_LLRDRD }, + { "pack", 0xf2, INSTR_SS_LLRDRD }, + { "unpk", 0xf3, INSTR_SS_LLRDRD }, + { "zap", 0xf8, INSTR_SS_LLRDRD }, + { "cp", 0xf9, INSTR_SS_LLRDRD }, + { "ap", 0xfa, INSTR_SS_LLRDRD }, + { "sp", 0xfb, INSTR_SS_LLRDRD }, + { "mp", 0xfc, INSTR_SS_LLRDRD }, + { "dp", 0xfd, INSTR_SS_LLRDRD }, + { "", 0, INSTR_INVALID } +}; + +static struct insn opcode_01[] = { +#ifdef CONFIG_64BIT + { "sam64", 0x0e, INSTR_E }, + { "pfpo", 0x0a, INSTR_E }, + { "ptff", 0x04, INSTR_E }, +#endif + { "pr", 0x01, INSTR_E }, + { "upt", 0x02, INSTR_E }, + { "sckpf", 0x07, INSTR_E }, + { "tam", 0x0b, INSTR_E }, + { "sam24", 0x0c, INSTR_E }, + { "sam31", 0x0d, INSTR_E }, + { "trap2", 0xff, INSTR_E }, + { "", 0, INSTR_INVALID } +}; + +static struct insn opcode_a5[] = { +#ifdef CONFIG_64BIT + { "iihh", 0x00, INSTR_RI_RU }, + { "iihl", 0x01, INSTR_RI_RU }, + { "iilh", 0x02, INSTR_RI_RU }, + { "iill", 0x03, INSTR_RI_RU }, + { "nihh", 0x04, INSTR_RI_RU }, + { "nihl", 0x05, INSTR_RI_RU }, + { "nilh", 0x06, INSTR_RI_RU }, + { "nill", 0x07, INSTR_RI_RU }, + { "oihh", 0x08, INSTR_RI_RU }, + { "oihl", 0x09, INSTR_RI_RU }, + { "oilh", 0x0a, INSTR_RI_RU }, + { "oill", 0x0b, INSTR_RI_RU }, + { "llihh", 0x0c, INSTR_RI_RU }, + { "llihl", 0x0d, INSTR_RI_RU }, + { "llilh", 0x0e, INSTR_RI_RU }, + { "llill", 0x0f, INSTR_RI_RU }, +#endif + { "", 0, INSTR_INVALID } +}; + +static struct insn opcode_a7[] = { +#ifdef CONFIG_64BIT + { "tmhh", 0x02, INSTR_RI_RU }, + { "tmhl", 0x03, INSTR_RI_RU }, + { "brctg", 0x07, INSTR_RI_RP }, + { "lghi", 0x09, INSTR_RI_RI }, + { "aghi", 0x0b, INSTR_RI_RI }, + { "mghi", 0x0d, INSTR_RI_RI }, + { "cghi", 0x0f, INSTR_RI_RI }, +#endif + { "tmlh", 0x00, INSTR_RI_RU }, + { "tmll", 0x01, INSTR_RI_RU }, + { "brc", 0x04, INSTR_RI_UP }, + { "bras", 0x05, INSTR_RI_RP }, + { "brct", 0x06, INSTR_RI_RP }, + { "lhi", 0x08, INSTR_RI_RI }, + { "ahi", 0x0a, INSTR_RI_RI }, + { "mhi", 0x0c, INSTR_RI_RI }, + { "chi", 0x0e, INSTR_RI_RI }, + { "", 0, INSTR_INVALID } +}; + +static struct insn opcode_b2[] = { +#ifdef CONFIG_64BIT + { "sske", 0x2b, INSTR_RRF_M0RR }, + { "stckf", 0x7c, INSTR_S_RD }, + { "cu21", 0xa6, INSTR_RRF_M0RR }, + { "cuutf", 0xa6, INSTR_RRF_M0RR }, + { "cu12", 0xa7, INSTR_RRF_M0RR }, + { "cutfu", 0xa7, INSTR_RRF_M0RR }, + { "stfle", 0xb0, INSTR_S_RD }, + { "lpswe", 0xb2, INSTR_S_RD }, + { "srnmt", 0xb9, INSTR_S_RD }, + { "lfas", 0xbd, INSTR_S_RD }, +#endif + { "stidp", 0x02, INSTR_S_RD }, + { "sck", 0x04, INSTR_S_RD }, + { "stck", 0x05, INSTR_S_RD }, + { "sckc", 0x06, INSTR_S_RD }, + { "stckc", 0x07, INSTR_S_RD }, + { "spt", 0x08, INSTR_S_RD }, + { "stpt", 0x09, INSTR_S_RD }, + { "spka", 0x0a, INSTR_S_RD }, + { "ipk", 0x0b, INSTR_S_00 }, + { "ptlb", 0x0d, INSTR_S_00 }, + { "spx", 0x10, INSTR_S_RD }, + { "stpx", 0x11, INSTR_S_RD }, + { "stap", 0x12, INSTR_S_RD }, + { "sie", 0x14, INSTR_S_RD }, + { "pc", 0x18, INSTR_S_RD }, + { "sac", 0x19, INSTR_S_RD }, + { "cfc", 0x1a, INSTR_S_RD }, + { "ipte", 0x21, INSTR_RRE_RR }, + { "ipm", 0x22, INSTR_RRE_R0 }, + { "ivsk", 0x23, INSTR_RRE_RR }, + { "iac", 0x24, INSTR_RRE_R0 }, + { "ssar", 0x25, INSTR_RRE_R0 }, + { "epar", 0x26, INSTR_RRE_R0 }, + { "esar", 0x27, INSTR_RRE_R0 }, + { "pt", 0x28, INSTR_RRE_RR }, + { "iske", 0x29, INSTR_RRE_RR }, + { "rrbe", 0x2a, INSTR_RRE_RR }, + { "sske", 0x2b, INSTR_RRE_RR }, + { "tb", 0x2c, INSTR_RRE_0R }, + { "dxr", 0x2d, INSTR_RRE_F0 }, + { "pgin", 0x2e, INSTR_RRE_RR }, + { "pgout", 0x2f, INSTR_RRE_RR }, + { "csch", 0x30, INSTR_S_00 }, + { "hsch", 0x31, INSTR_S_00 }, + { "msch", 0x32, INSTR_S_RD }, + { "ssch", 0x33, INSTR_S_RD }, + { "stsch", 0x34, INSTR_S_RD }, + { "tsch", 0x35, INSTR_S_RD }, + { "tpi", 0x36, INSTR_S_RD }, + { "sal", 0x37, INSTR_S_00 }, + { "rsch", 0x38, INSTR_S_00 }, + { "stcrw", 0x39, INSTR_S_RD }, + { "stcps", 0x3a, INSTR_S_RD }, + { "rchp", 0x3b, INSTR_S_00 }, + { "schm", 0x3c, INSTR_S_00 }, + { "bakr", 0x40, INSTR_RRE_RR }, + { "cksm", 0x41, INSTR_RRE_RR }, + { "sqdr", 0x44, INSTR_RRE_F0 }, + { "sqer", 0x45, INSTR_RRE_F0 }, + { "stura", 0x46, INSTR_RRE_RR }, + { "msta", 0x47, INSTR_RRE_R0 }, + { "palb", 0x48, INSTR_RRE_00 }, + { "ereg", 0x49, INSTR_RRE_RR }, + { "esta", 0x4a, INSTR_RRE_RR }, + { "lura", 0x4b, INSTR_RRE_RR }, + { "tar", 0x4c, INSTR_RRE_AR }, + { "cpya", 0x4d, INSTR_RRE_AA }, + { "sar", 0x4e, INSTR_RRE_AR }, + { "ear", 0x4f, INSTR_RRE_RA }, + { "csp", 0x50, INSTR_RRE_RR }, + { "msr", 0x52, INSTR_RRE_RR }, + { "mvpg", 0x54, INSTR_RRE_RR }, + { "mvst", 0x55, INSTR_RRE_RR }, + { "cuse", 0x57, INSTR_RRE_RR }, + { "bsg", 0x58, INSTR_RRE_RR }, + { "bsa", 0x5a, INSTR_RRE_RR }, + { "clst", 0x5d, INSTR_RRE_RR }, + { "srst", 0x5e, INSTR_RRE_RR }, + { "cmpsc", 0x63, INSTR_RRE_RR }, + { "siga", 0x74, INSTR_S_RD }, + { "xsch", 0x76, INSTR_S_00 }, + { "rp", 0x77, INSTR_S_RD }, + { "stcke", 0x78, INSTR_S_RD }, + { "sacf", 0x79, INSTR_S_RD }, + { "spp", 0x80, INSTR_S_RD }, + { "stsi", 0x7d, INSTR_S_RD }, + { "srnm", 0x99, INSTR_S_RD }, + { "stfpc", 0x9c, INSTR_S_RD }, + { "lfpc", 0x9d, INSTR_S_RD }, + { "tre", 0xa5, INSTR_RRE_RR }, + { "cuutf", 0xa6, INSTR_RRE_RR }, + { "cutfu", 0xa7, INSTR_RRE_RR }, + { "stfl", 0xb1, INSTR_S_RD }, + { "trap4", 0xff, INSTR_S_RD }, + { "", 0, INSTR_INVALID } +}; + +static struct insn opcode_b3[] = { +#ifdef CONFIG_64BIT + { "maylr", 0x38, INSTR_RRF_F0FF }, + { "mylr", 0x39, INSTR_RRF_F0FF }, + { "mayr", 0x3a, INSTR_RRF_F0FF }, + { "myr", 0x3b, INSTR_RRF_F0FF }, + { "mayhr", 0x3c, INSTR_RRF_F0FF }, + { "myhr", 0x3d, INSTR_RRF_F0FF }, + { "cegbr", 0xa4, INSTR_RRE_RR }, + { "cdgbr", 0xa5, INSTR_RRE_RR }, + { "cxgbr", 0xa6, INSTR_RRE_RR }, + { "cgebr", 0xa8, INSTR_RRF_U0RF }, + { "cgdbr", 0xa9, INSTR_RRF_U0RF }, + { "cgxbr", 0xaa, INSTR_RRF_U0RF }, + { "cfer", 0xb8, INSTR_RRF_U0RF }, + { "cfdr", 0xb9, INSTR_RRF_U0RF }, + { "cfxr", 0xba, INSTR_RRF_U0RF }, + { "cegr", 0xc4, INSTR_RRE_RR }, + { "cdgr", 0xc5, INSTR_RRE_RR }, + { "cxgr", 0xc6, INSTR_RRE_RR }, + { "cger", 0xc8, INSTR_RRF_U0RF }, + { "cgdr", 0xc9, INSTR_RRF_U0RF }, + { "cgxr", 0xca, INSTR_RRF_U0RF }, + { "lpdfr", 0x70, INSTR_RRE_FF }, + { "lndfr", 0x71, INSTR_RRE_FF }, + { "cpsdr", 0x72, INSTR_RRF_F0FF2 }, + { "lcdfr", 0x73, INSTR_RRE_FF }, + { "ldgr", 0xc1, INSTR_RRE_FR }, + { "lgdr", 0xcd, INSTR_RRE_RF }, + { "adtr", 0xd2, INSTR_RRR_F0FF }, + { "axtr", 0xda, INSTR_RRR_F0FF }, + { "cdtr", 0xe4, INSTR_RRE_FF }, + { "cxtr", 0xec, INSTR_RRE_FF }, + { "kdtr", 0xe0, INSTR_RRE_FF }, + { "kxtr", 0xe8, INSTR_RRE_FF }, + { "cedtr", 0xf4, INSTR_RRE_FF }, + { "cextr", 0xfc, INSTR_RRE_FF }, + { "cdgtr", 0xf1, INSTR_RRE_FR }, + { "cxgtr", 0xf9, INSTR_RRE_FR }, + { "cdstr", 0xf3, INSTR_RRE_FR }, + { "cxstr", 0xfb, INSTR_RRE_FR }, + { "cdutr", 0xf2, INSTR_RRE_FR }, + { "cxutr", 0xfa, INSTR_RRE_FR }, + { "cgdtr", 0xe1, INSTR_RRF_U0RF }, + { "cgxtr", 0xe9, INSTR_RRF_U0RF }, + { "csdtr", 0xe3, INSTR_RRE_RF }, + { "csxtr", 0xeb, INSTR_RRE_RF }, + { "cudtr", 0xe2, INSTR_RRE_RF }, + { "cuxtr", 0xea, INSTR_RRE_RF }, + { "ddtr", 0xd1, INSTR_RRR_F0FF }, + { "dxtr", 0xd9, INSTR_RRR_F0FF }, + { "eedtr", 0xe5, INSTR_RRE_RF }, + { "eextr", 0xed, INSTR_RRE_RF }, + { "esdtr", 0xe7, INSTR_RRE_RF }, + { "esxtr", 0xef, INSTR_RRE_RF }, + { "iedtr", 0xf6, INSTR_RRF_F0FR }, + { "iextr", 0xfe, INSTR_RRF_F0FR }, + { "ltdtr", 0xd6, INSTR_RRE_FF }, + { "ltxtr", 0xde, INSTR_RRE_FF }, + { "fidtr", 0xd7, INSTR_RRF_UUFF }, + { "fixtr", 0xdf, INSTR_RRF_UUFF }, + { "ldetr", 0xd4, INSTR_RRF_0UFF }, + { "lxdtr", 0xdc, INSTR_RRF_0UFF }, + { "ledtr", 0xd5, INSTR_RRF_UUFF }, + { "ldxtr", 0xdd, INSTR_RRF_UUFF }, + { "mdtr", 0xd0, INSTR_RRR_F0FF }, + { "mxtr", 0xd8, INSTR_RRR_F0FF }, + { "qadtr", 0xf5, INSTR_RRF_FUFF }, + { "qaxtr", 0xfd, INSTR_RRF_FUFF }, + { "rrdtr", 0xf7, INSTR_RRF_FFRU }, + { "rrxtr", 0xff, INSTR_RRF_FFRU }, + { "sfasr", 0x85, INSTR_RRE_R0 }, + { "sdtr", 0xd3, INSTR_RRR_F0FF }, + { "sxtr", 0xdb, INSTR_RRR_F0FF }, +#endif + { "lpebr", 0x00, INSTR_RRE_FF }, + { "lnebr", 0x01, INSTR_RRE_FF }, + { "ltebr", 0x02, INSTR_RRE_FF }, + { "lcebr", 0x03, INSTR_RRE_FF }, + { "ldebr", 0x04, INSTR_RRE_FF }, + { "lxdbr", 0x05, INSTR_RRE_FF }, + { "lxebr", 0x06, INSTR_RRE_FF }, + { "mxdbr", 0x07, INSTR_RRE_FF }, + { "kebr", 0x08, INSTR_RRE_FF }, + { "cebr", 0x09, INSTR_RRE_FF }, + { "aebr", 0x0a, INSTR_RRE_FF }, + { "sebr", 0x0b, INSTR_RRE_FF }, + { "mdebr", 0x0c, INSTR_RRE_FF }, + { "debr", 0x0d, INSTR_RRE_FF }, + { "maebr", 0x0e, INSTR_RRF_F0FF }, + { "msebr", 0x0f, INSTR_RRF_F0FF }, + { "lpdbr", 0x10, INSTR_RRE_FF }, + { "lndbr", 0x11, INSTR_RRE_FF }, + { "ltdbr", 0x12, INSTR_RRE_FF }, + { "lcdbr", 0x13, INSTR_RRE_FF }, + { "sqebr", 0x14, INSTR_RRE_FF }, + { "sqdbr", 0x15, INSTR_RRE_FF }, + { "sqxbr", 0x16, INSTR_RRE_FF }, + { "meebr", 0x17, INSTR_RRE_FF }, + { "kdbr", 0x18, INSTR_RRE_FF }, + { "cdbr", 0x19, INSTR_RRE_FF }, + { "adbr", 0x1a, INSTR_RRE_FF }, + { "sdbr", 0x1b, INSTR_RRE_FF }, + { "mdbr", 0x1c, INSTR_RRE_FF }, + { "ddbr", 0x1d, INSTR_RRE_FF }, + { "madbr", 0x1e, INSTR_RRF_F0FF }, + { "msdbr", 0x1f, INSTR_RRF_F0FF }, + { "lder", 0x24, INSTR_RRE_FF }, + { "lxdr", 0x25, INSTR_RRE_FF }, + { "lxer", 0x26, INSTR_RRE_FF }, + { "maer", 0x2e, INSTR_RRF_F0FF }, + { "mser", 0x2f, INSTR_RRF_F0FF }, + { "sqxr", 0x36, INSTR_RRE_FF }, + { "meer", 0x37, INSTR_RRE_FF }, + { "madr", 0x3e, INSTR_RRF_F0FF }, + { "msdr", 0x3f, INSTR_RRF_F0FF }, + { "lpxbr", 0x40, INSTR_RRE_FF }, + { "lnxbr", 0x41, INSTR_RRE_FF }, + { "ltxbr", 0x42, INSTR_RRE_FF }, + { "lcxbr", 0x43, INSTR_RRE_FF }, + { "ledbr", 0x44, INSTR_RRE_FF }, + { "ldxbr", 0x45, INSTR_RRE_FF }, + { "lexbr", 0x46, INSTR_RRE_FF }, + { "fixbr", 0x47, INSTR_RRF_U0FF }, + { "kxbr", 0x48, INSTR_RRE_FF }, + { "cxbr", 0x49, INSTR_RRE_FF }, + { "axbr", 0x4a, INSTR_RRE_FF }, + { "sxbr", 0x4b, INSTR_RRE_FF }, + { "mxbr", 0x4c, INSTR_RRE_FF }, + { "dxbr", 0x4d, INSTR_RRE_FF }, + { "tbedr", 0x50, INSTR_RRF_U0FF }, + { "tbdr", 0x51, INSTR_RRF_U0FF }, + { "diebr", 0x53, INSTR_RRF_FUFF }, + { "fiebr", 0x57, INSTR_RRF_U0FF }, + { "thder", 0x58, INSTR_RRE_RR }, + { "thdr", 0x59, INSTR_RRE_RR }, + { "didbr", 0x5b, INSTR_RRF_FUFF }, + { "fidbr", 0x5f, INSTR_RRF_U0FF }, + { "lpxr", 0x60, INSTR_RRE_FF }, + { "lnxr", 0x61, INSTR_RRE_FF }, + { "ltxr", 0x62, INSTR_RRE_FF }, + { "lcxr", 0x63, INSTR_RRE_FF }, + { "lxr", 0x65, INSTR_RRE_RR }, + { "lexr", 0x66, INSTR_RRE_FF }, + { "fixr", 0x67, INSTR_RRF_U0FF }, + { "cxr", 0x69, INSTR_RRE_FF }, + { "lzer", 0x74, INSTR_RRE_R0 }, + { "lzdr", 0x75, INSTR_RRE_R0 }, + { "lzxr", 0x76, INSTR_RRE_R0 }, + { "fier", 0x77, INSTR_RRF_U0FF }, + { "fidr", 0x7f, INSTR_RRF_U0FF }, + { "sfpc", 0x84, INSTR_RRE_RR_OPT }, + { "efpc", 0x8c, INSTR_RRE_RR_OPT }, + { "cefbr", 0x94, INSTR_RRE_RF }, + { "cdfbr", 0x95, INSTR_RRE_RF }, + { "cxfbr", 0x96, INSTR_RRE_RF }, + { "cfebr", 0x98, INSTR_RRF_U0RF }, + { "cfdbr", 0x99, INSTR_RRF_U0RF }, + { "cfxbr", 0x9a, INSTR_RRF_U0RF }, + { "cefr", 0xb4, INSTR_RRE_RF }, + { "cdfr", 0xb5, INSTR_RRE_RF }, + { "cxfr", 0xb6, INSTR_RRE_RF }, + { "", 0, INSTR_INVALID } +}; + +static struct insn opcode_b9[] = { +#ifdef CONFIG_64BIT + { "lpgr", 0x00, INSTR_RRE_RR }, + { "lngr", 0x01, INSTR_RRE_RR }, + { "ltgr", 0x02, INSTR_RRE_RR }, + { "lcgr", 0x03, INSTR_RRE_RR }, + { "lgr", 0x04, INSTR_RRE_RR }, + { "lurag", 0x05, INSTR_RRE_RR }, + { "lgbr", 0x06, INSTR_RRE_RR }, + { "lghr", 0x07, INSTR_RRE_RR }, + { "agr", 0x08, INSTR_RRE_RR }, + { "sgr", 0x09, INSTR_RRE_RR }, + { "algr", 0x0a, INSTR_RRE_RR }, + { "slgr", 0x0b, INSTR_RRE_RR }, + { "msgr", 0x0c, INSTR_RRE_RR }, + { "dsgr", 0x0d, INSTR_RRE_RR }, + { "eregg", 0x0e, INSTR_RRE_RR }, + { "lrvgr", 0x0f, INSTR_RRE_RR }, + { "lpgfr", 0x10, INSTR_RRE_RR }, + { "lngfr", 0x11, INSTR_RRE_RR }, + { "ltgfr", 0x12, INSTR_RRE_RR }, + { "lcgfr", 0x13, INSTR_RRE_RR }, + { "lgfr", 0x14, INSTR_RRE_RR }, + { "llgfr", 0x16, INSTR_RRE_RR }, + { "llgtr", 0x17, INSTR_RRE_RR }, + { "agfr", 0x18, INSTR_RRE_RR }, + { "sgfr", 0x19, INSTR_RRE_RR }, + { "algfr", 0x1a, INSTR_RRE_RR }, + { "slgfr", 0x1b, INSTR_RRE_RR }, + { "msgfr", 0x1c, INSTR_RRE_RR }, + { "dsgfr", 0x1d, INSTR_RRE_RR }, + { "cgr", 0x20, INSTR_RRE_RR }, + { "clgr", 0x21, INSTR_RRE_RR }, + { "sturg", 0x25, INSTR_RRE_RR }, + { "lbr", 0x26, INSTR_RRE_RR }, + { "lhr", 0x27, INSTR_RRE_RR }, + { "cgfr", 0x30, INSTR_RRE_RR }, + { "clgfr", 0x31, INSTR_RRE_RR }, + { "bctgr", 0x46, INSTR_RRE_RR }, + { "ngr", 0x80, INSTR_RRE_RR }, + { "ogr", 0x81, INSTR_RRE_RR }, + { "xgr", 0x82, INSTR_RRE_RR }, + { "flogr", 0x83, INSTR_RRE_RR }, + { "llgcr", 0x84, INSTR_RRE_RR }, + { "llghr", 0x85, INSTR_RRE_RR }, + { "mlgr", 0x86, INSTR_RRE_RR }, + { "dlgr", 0x87, INSTR_RRE_RR }, + { "alcgr", 0x88, INSTR_RRE_RR }, + { "slbgr", 0x89, INSTR_RRE_RR }, + { "cspg", 0x8a, INSTR_RRE_RR }, + { "idte", 0x8e, INSTR_RRF_R0RR }, + { "llcr", 0x94, INSTR_RRE_RR }, + { "llhr", 0x95, INSTR_RRE_RR }, + { "esea", 0x9d, INSTR_RRE_R0 }, + { "lptea", 0xaa, INSTR_RRF_RURR }, + { "cu14", 0xb0, INSTR_RRF_M0RR }, + { "cu24", 0xb1, INSTR_RRF_M0RR }, + { "cu41", 0xb2, INSTR_RRF_M0RR }, + { "cu42", 0xb3, INSTR_RRF_M0RR }, + { "crt", 0x72, INSTR_RRF_U0RR }, + { "cgrt", 0x60, INSTR_RRF_U0RR }, + { "clrt", 0x73, INSTR_RRF_U0RR }, + { "clgrt", 0x61, INSTR_RRF_U0RR }, + { "ptf", 0xa2, INSTR_RRE_R0 }, + { "pfmf", 0xaf, INSTR_RRE_RR }, + { "trte", 0xbf, INSTR_RRF_M0RR }, + { "trtre", 0xbd, INSTR_RRF_M0RR }, + { "ahhhr", 0xc8, INSTR_RRF_R0RR2 }, + { "shhhr", 0xc9, INSTR_RRF_R0RR2 }, + { "alhhh", 0xca, INSTR_RRF_R0RR2 }, + { "alhhl", 0xca, INSTR_RRF_R0RR2 }, + { "slhhh", 0xcb, INSTR_RRF_R0RR2 }, + { "chhr ", 0xcd, INSTR_RRE_RR }, + { "clhhr", 0xcf, INSTR_RRE_RR }, + { "ahhlr", 0xd8, INSTR_RRF_R0RR2 }, + { "shhlr", 0xd9, INSTR_RRF_R0RR2 }, + { "slhhl", 0xdb, INSTR_RRF_R0RR2 }, + { "chlr", 0xdd, INSTR_RRE_RR }, + { "clhlr", 0xdf, INSTR_RRE_RR }, + { { 0, LONG_INSN_POPCNT }, 0xe1, INSTR_RRE_RR }, + { "locgr", 0xe2, INSTR_RRF_M0RR }, + { "ngrk", 0xe4, INSTR_RRF_R0RR2 }, + { "ogrk", 0xe6, INSTR_RRF_R0RR2 }, + { "xgrk", 0xe7, INSTR_RRF_R0RR2 }, + { "agrk", 0xe8, INSTR_RRF_R0RR2 }, + { "sgrk", 0xe9, INSTR_RRF_R0RR2 }, + { "algrk", 0xea, INSTR_RRF_R0RR2 }, + { "slgrk", 0xeb, INSTR_RRF_R0RR2 }, + { "locr", 0xf2, INSTR_RRF_M0RR }, + { "nrk", 0xf4, INSTR_RRF_R0RR2 }, + { "ork", 0xf6, INSTR_RRF_R0RR2 }, + { "xrk", 0xf7, INSTR_RRF_R0RR2 }, + { "ark", 0xf8, INSTR_RRF_R0RR2 }, + { "srk", 0xf9, INSTR_RRF_R0RR2 }, + { "alrk", 0xfa, INSTR_RRF_R0RR2 }, + { "slrk", 0xfb, INSTR_RRF_R0RR2 }, +#endif + { "kmac", 0x1e, INSTR_RRE_RR }, + { "lrvr", 0x1f, INSTR_RRE_RR }, + { "km", 0x2e, INSTR_RRE_RR }, + { "kmc", 0x2f, INSTR_RRE_RR }, + { "kimd", 0x3e, INSTR_RRE_RR }, + { "klmd", 0x3f, INSTR_RRE_RR }, + { "epsw", 0x8d, INSTR_RRE_RR }, + { "trtt", 0x90, INSTR_RRE_RR }, + { "trtt", 0x90, INSTR_RRF_M0RR }, + { "trto", 0x91, INSTR_RRE_RR }, + { "trto", 0x91, INSTR_RRF_M0RR }, + { "trot", 0x92, INSTR_RRE_RR }, + { "trot", 0x92, INSTR_RRF_M0RR }, + { "troo", 0x93, INSTR_RRE_RR }, + { "troo", 0x93, INSTR_RRF_M0RR }, + { "mlr", 0x96, INSTR_RRE_RR }, + { "dlr", 0x97, INSTR_RRE_RR }, + { "alcr", 0x98, INSTR_RRE_RR }, + { "slbr", 0x99, INSTR_RRE_RR }, + { "", 0, INSTR_INVALID } +}; + +static struct insn opcode_c0[] = { +#ifdef CONFIG_64BIT + { "lgfi", 0x01, INSTR_RIL_RI }, + { "xihf", 0x06, INSTR_RIL_RU }, + { "xilf", 0x07, INSTR_RIL_RU }, + { "iihf", 0x08, INSTR_RIL_RU }, + { "iilf", 0x09, INSTR_RIL_RU }, + { "nihf", 0x0a, INSTR_RIL_RU }, + { "nilf", 0x0b, INSTR_RIL_RU }, + { "oihf", 0x0c, INSTR_RIL_RU }, + { "oilf", 0x0d, INSTR_RIL_RU }, + { "llihf", 0x0e, INSTR_RIL_RU }, + { "llilf", 0x0f, INSTR_RIL_RU }, +#endif + { "larl", 0x00, INSTR_RIL_RP }, + { "brcl", 0x04, INSTR_RIL_UP }, + { "brasl", 0x05, INSTR_RIL_RP }, + { "", 0, INSTR_INVALID } +}; + +static struct insn opcode_c2[] = { +#ifdef CONFIG_64BIT + { "slgfi", 0x04, INSTR_RIL_RU }, + { "slfi", 0x05, INSTR_RIL_RU }, + { "agfi", 0x08, INSTR_RIL_RI }, + { "afi", 0x09, INSTR_RIL_RI }, + { "algfi", 0x0a, INSTR_RIL_RU }, + { "alfi", 0x0b, INSTR_RIL_RU }, + { "cgfi", 0x0c, INSTR_RIL_RI }, + { "cfi", 0x0d, INSTR_RIL_RI }, + { "clgfi", 0x0e, INSTR_RIL_RU }, + { "clfi", 0x0f, INSTR_RIL_RU }, + { "msfi", 0x01, INSTR_RIL_RI }, + { "msgfi", 0x00, INSTR_RIL_RI }, +#endif + { "", 0, INSTR_INVALID } +}; + +static struct insn opcode_c4[] = { +#ifdef CONFIG_64BIT + { "lrl", 0x0d, INSTR_RIL_RP }, + { "lgrl", 0x08, INSTR_RIL_RP }, + { "lgfrl", 0x0c, INSTR_RIL_RP }, + { "lhrl", 0x05, INSTR_RIL_RP }, + { "lghrl", 0x04, INSTR_RIL_RP }, + { { 0, LONG_INSN_LLGFRL }, 0x0e, INSTR_RIL_RP }, + { "llhrl", 0x02, INSTR_RIL_RP }, + { { 0, LONG_INSN_LLGHRL }, 0x06, INSTR_RIL_RP }, + { "strl", 0x0f, INSTR_RIL_RP }, + { "stgrl", 0x0b, INSTR_RIL_RP }, + { "sthrl", 0x07, INSTR_RIL_RP }, +#endif + { "", 0, INSTR_INVALID } +}; + +static struct insn opcode_c6[] = { +#ifdef CONFIG_64BIT + { "crl", 0x0d, INSTR_RIL_RP }, + { "cgrl", 0x08, INSTR_RIL_RP }, + { "cgfrl", 0x0c, INSTR_RIL_RP }, + { "chrl", 0x05, INSTR_RIL_RP }, + { "cghrl", 0x04, INSTR_RIL_RP }, + { "clrl", 0x0f, INSTR_RIL_RP }, + { "clgrl", 0x0a, INSTR_RIL_RP }, + { { 0, LONG_INSN_CLGFRL }, 0x0e, INSTR_RIL_RP }, + { "clhrl", 0x07, INSTR_RIL_RP }, + { { 0, LONG_INSN_CLGHRL }, 0x06, INSTR_RIL_RP }, + { "pfdrl", 0x02, INSTR_RIL_UP }, + { "exrl", 0x00, INSTR_RIL_RP }, +#endif + { "", 0, INSTR_INVALID } +}; + +static struct insn opcode_c8[] = { +#ifdef CONFIG_64BIT + { "mvcos", 0x00, INSTR_SSF_RRDRD }, + { "ectg", 0x01, INSTR_SSF_RRDRD }, + { "csst", 0x02, INSTR_SSF_RRDRD }, + { "lpd", 0x04, INSTR_SSF_RRDRD2 }, + { "lpdg ", 0x05, INSTR_SSF_RRDRD2 }, +#endif + { "", 0, INSTR_INVALID } +}; + +static struct insn opcode_cc[] = { +#ifdef CONFIG_64BIT + { "brcth", 0x06, INSTR_RIL_RP }, + { "aih", 0x08, INSTR_RIL_RI }, + { "alsih", 0x0a, INSTR_RIL_RI }, + { "alsih", 0x0b, INSTR_RIL_RI }, + { "cih", 0x0d, INSTR_RIL_RI }, + { "clih ", 0x0f, INSTR_RIL_RI }, +#endif + { "", 0, INSTR_INVALID } +}; + +static struct insn opcode_e3[] = { +#ifdef CONFIG_64BIT + { "ltg", 0x02, INSTR_RXY_RRRD }, + { "lrag", 0x03, INSTR_RXY_RRRD }, + { "lg", 0x04, INSTR_RXY_RRRD }, + { "cvby", 0x06, INSTR_RXY_RRRD }, + { "ag", 0x08, INSTR_RXY_RRRD }, + { "sg", 0x09, INSTR_RXY_RRRD }, + { "alg", 0x0a, INSTR_RXY_RRRD }, + { "slg", 0x0b, INSTR_RXY_RRRD }, + { "msg", 0x0c, INSTR_RXY_RRRD }, + { "dsg", 0x0d, INSTR_RXY_RRRD }, + { "cvbg", 0x0e, INSTR_RXY_RRRD }, + { "lrvg", 0x0f, INSTR_RXY_RRRD }, + { "lt", 0x12, INSTR_RXY_RRRD }, + { "lray", 0x13, INSTR_RXY_RRRD }, + { "lgf", 0x14, INSTR_RXY_RRRD }, + { "lgh", 0x15, INSTR_RXY_RRRD }, + { "llgf", 0x16, INSTR_RXY_RRRD }, + { "llgt", 0x17, INSTR_RXY_RRRD }, + { "agf", 0x18, INSTR_RXY_RRRD }, + { "sgf", 0x19, INSTR_RXY_RRRD }, + { "algf", 0x1a, INSTR_RXY_RRRD }, + { "slgf", 0x1b, INSTR_RXY_RRRD }, + { "msgf", 0x1c, INSTR_RXY_RRRD }, + { "dsgf", 0x1d, INSTR_RXY_RRRD }, + { "cg", 0x20, INSTR_RXY_RRRD }, + { "clg", 0x21, INSTR_RXY_RRRD }, + { "stg", 0x24, INSTR_RXY_RRRD }, + { "cvdy", 0x26, INSTR_RXY_RRRD }, + { "cvdg", 0x2e, INSTR_RXY_RRRD }, + { "strvg", 0x2f, INSTR_RXY_RRRD }, + { "cgf", 0x30, INSTR_RXY_RRRD }, + { "clgf", 0x31, INSTR_RXY_RRRD }, + { "strvh", 0x3f, INSTR_RXY_RRRD }, + { "bctg", 0x46, INSTR_RXY_RRRD }, + { "sty", 0x50, INSTR_RXY_RRRD }, + { "msy", 0x51, INSTR_RXY_RRRD }, + { "ny", 0x54, INSTR_RXY_RRRD }, + { "cly", 0x55, INSTR_RXY_RRRD }, + { "oy", 0x56, INSTR_RXY_RRRD }, + { "xy", 0x57, INSTR_RXY_RRRD }, + { "ly", 0x58, INSTR_RXY_RRRD }, + { "cy", 0x59, INSTR_RXY_RRRD }, + { "ay", 0x5a, INSTR_RXY_RRRD }, + { "sy", 0x5b, INSTR_RXY_RRRD }, + { "aly", 0x5e, INSTR_RXY_RRRD }, + { "sly", 0x5f, INSTR_RXY_RRRD }, + { "sthy", 0x70, INSTR_RXY_RRRD }, + { "lay", 0x71, INSTR_RXY_RRRD }, + { "stcy", 0x72, INSTR_RXY_RRRD }, + { "icy", 0x73, INSTR_RXY_RRRD }, + { "lb", 0x76, INSTR_RXY_RRRD }, + { "lgb", 0x77, INSTR_RXY_RRRD }, + { "lhy", 0x78, INSTR_RXY_RRRD }, + { "chy", 0x79, INSTR_RXY_RRRD }, + { "ahy", 0x7a, INSTR_RXY_RRRD }, + { "shy", 0x7b, INSTR_RXY_RRRD }, + { "ng", 0x80, INSTR_RXY_RRRD }, + { "og", 0x81, INSTR_RXY_RRRD }, + { "xg", 0x82, INSTR_RXY_RRRD }, + { "mlg", 0x86, INSTR_RXY_RRRD }, + { "dlg", 0x87, INSTR_RXY_RRRD }, + { "alcg", 0x88, INSTR_RXY_RRRD }, + { "slbg", 0x89, INSTR_RXY_RRRD }, + { "stpq", 0x8e, INSTR_RXY_RRRD }, + { "lpq", 0x8f, INSTR_RXY_RRRD }, + { "llgc", 0x90, INSTR_RXY_RRRD }, + { "llgh", 0x91, INSTR_RXY_RRRD }, + { "llc", 0x94, INSTR_RXY_RRRD }, + { "llh", 0x95, INSTR_RXY_RRRD }, + { "cgh", 0x34, INSTR_RXY_RRRD }, + { "laey", 0x75, INSTR_RXY_RRRD }, + { "ltgf", 0x32, INSTR_RXY_RRRD }, + { "mfy", 0x5c, INSTR_RXY_RRRD }, + { "mhy", 0x7c, INSTR_RXY_RRRD }, + { "pfd", 0x36, INSTR_RXY_URRD }, + { "lbh", 0xc0, INSTR_RXY_RRRD }, + { "llch", 0xc2, INSTR_RXY_RRRD }, + { "stch", 0xc3, INSTR_RXY_RRRD }, + { "lhh", 0xc4, INSTR_RXY_RRRD }, + { "llhh", 0xc6, INSTR_RXY_RRRD }, + { "sthh", 0xc7, INSTR_RXY_RRRD }, + { "lfh", 0xca, INSTR_RXY_RRRD }, + { "stfh", 0xcb, INSTR_RXY_RRRD }, + { "chf", 0xcd, INSTR_RXY_RRRD }, + { "clhf", 0xcf, INSTR_RXY_RRRD }, +#endif + { "lrv", 0x1e, INSTR_RXY_RRRD }, + { "lrvh", 0x1f, INSTR_RXY_RRRD }, + { "strv", 0x3e, INSTR_RXY_RRRD }, + { "ml", 0x96, INSTR_RXY_RRRD }, + { "dl", 0x97, INSTR_RXY_RRRD }, + { "alc", 0x98, INSTR_RXY_RRRD }, + { "slb", 0x99, INSTR_RXY_RRRD }, + { "", 0, INSTR_INVALID } +}; + +static struct insn opcode_e5[] = { +#ifdef CONFIG_64BIT + { "strag", 0x02, INSTR_SSE_RDRD }, + { "chhsi", 0x54, INSTR_SIL_RDI }, + { "chsi", 0x5c, INSTR_SIL_RDI }, + { "cghsi", 0x58, INSTR_SIL_RDI }, + { { 0, LONG_INSN_CLHHSI }, 0x55, INSTR_SIL_RDU }, + { { 0, LONG_INSN_CLFHSI }, 0x5d, INSTR_SIL_RDU }, + { { 0, LONG_INSN_CLGHSI }, 0x59, INSTR_SIL_RDU }, + { "mvhhi", 0x44, INSTR_SIL_RDI }, + { "mvhi", 0x4c, INSTR_SIL_RDI }, + { "mvghi", 0x48, INSTR_SIL_RDI }, +#endif + { "lasp", 0x00, INSTR_SSE_RDRD }, + { "tprot", 0x01, INSTR_SSE_RDRD }, + { "mvcsk", 0x0e, INSTR_SSE_RDRD }, + { "mvcdk", 0x0f, INSTR_SSE_RDRD }, + { "", 0, INSTR_INVALID } +}; + +static struct insn opcode_eb[] = { +#ifdef CONFIG_64BIT + { "lmg", 0x04, INSTR_RSY_RRRD }, + { "srag", 0x0a, INSTR_RSY_RRRD }, + { "slag", 0x0b, INSTR_RSY_RRRD }, + { "srlg", 0x0c, INSTR_RSY_RRRD }, + { "sllg", 0x0d, INSTR_RSY_RRRD }, + { "tracg", 0x0f, INSTR_RSY_RRRD }, + { "csy", 0x14, INSTR_RSY_RRRD }, + { "rllg", 0x1c, INSTR_RSY_RRRD }, + { "clmh", 0x20, INSTR_RSY_RURD }, + { "clmy", 0x21, INSTR_RSY_RURD }, + { "stmg", 0x24, INSTR_RSY_RRRD }, + { "stctg", 0x25, INSTR_RSY_CCRD }, + { "stmh", 0x26, INSTR_RSY_RRRD }, + { "stcmh", 0x2c, INSTR_RSY_RURD }, + { "stcmy", 0x2d, INSTR_RSY_RURD }, + { "lctlg", 0x2f, INSTR_RSY_CCRD }, + { "csg", 0x30, INSTR_RSY_RRRD }, + { "cdsy", 0x31, INSTR_RSY_RRRD }, + { "cdsg", 0x3e, INSTR_RSY_RRRD }, + { "bxhg", 0x44, INSTR_RSY_RRRD }, + { "bxleg", 0x45, INSTR_RSY_RRRD }, + { "tmy", 0x51, INSTR_SIY_URD }, + { "mviy", 0x52, INSTR_SIY_URD }, + { "niy", 0x54, INSTR_SIY_URD }, + { "cliy", 0x55, INSTR_SIY_URD }, + { "oiy", 0x56, INSTR_SIY_URD }, + { "xiy", 0x57, INSTR_SIY_URD }, + { "icmh", 0x80, INSTR_RSE_RURD }, + { "icmh", 0x80, INSTR_RSY_RURD }, + { "icmy", 0x81, INSTR_RSY_RURD }, + { "clclu", 0x8f, INSTR_RSY_RRRD }, + { "stmy", 0x90, INSTR_RSY_RRRD }, + { "lmh", 0x96, INSTR_RSY_RRRD }, + { "lmy", 0x98, INSTR_RSY_RRRD }, + { "lamy", 0x9a, INSTR_RSY_AARD }, + { "stamy", 0x9b, INSTR_RSY_AARD }, + { "asi", 0x6a, INSTR_SIY_IRD }, + { "agsi", 0x7a, INSTR_SIY_IRD }, + { "alsi", 0x6e, INSTR_SIY_IRD }, + { "algsi", 0x7e, INSTR_SIY_IRD }, + { "ecag", 0x4c, INSTR_RSY_RRRD }, + { "srak", 0xdc, INSTR_RSY_RRRD }, + { "slak", 0xdd, INSTR_RSY_RRRD }, + { "srlk", 0xde, INSTR_RSY_RRRD }, + { "sllk", 0xdf, INSTR_RSY_RRRD }, + { "locg", 0xe2, INSTR_RSY_RDRM }, + { "stocg", 0xe3, INSTR_RSY_RDRM }, + { "lang", 0xe4, INSTR_RSY_RRRD }, + { "laog", 0xe6, INSTR_RSY_RRRD }, + { "laxg", 0xe7, INSTR_RSY_RRRD }, + { "laag", 0xe8, INSTR_RSY_RRRD }, + { "laalg", 0xea, INSTR_RSY_RRRD }, + { "loc", 0xf2, INSTR_RSY_RDRM }, + { "stoc", 0xf3, INSTR_RSY_RDRM }, + { "lan", 0xf4, INSTR_RSY_RRRD }, + { "lao", 0xf6, INSTR_RSY_RRRD }, + { "lax", 0xf7, INSTR_RSY_RRRD }, + { "laa", 0xf8, INSTR_RSY_RRRD }, + { "laal", 0xfa, INSTR_RSY_RRRD }, +#endif + { "rll", 0x1d, INSTR_RSY_RRRD }, + { "mvclu", 0x8e, INSTR_RSY_RRRD }, + { "tp", 0xc0, INSTR_RSL_R0RD }, + { "", 0, INSTR_INVALID } +}; + +static struct insn opcode_ec[] = { +#ifdef CONFIG_64BIT + { "brxhg", 0x44, INSTR_RIE_RRP }, + { "brxlg", 0x45, INSTR_RIE_RRP }, + { "crb", 0xf6, INSTR_RRS_RRRDU }, + { "cgrb", 0xe4, INSTR_RRS_RRRDU }, + { "crj", 0x76, INSTR_RIE_RRPU }, + { "cgrj", 0x64, INSTR_RIE_RRPU }, + { "cib", 0xfe, INSTR_RIS_RURDI }, + { "cgib", 0xfc, INSTR_RIS_RURDI }, + { "cij", 0x7e, INSTR_RIE_RUPI }, + { "cgij", 0x7c, INSTR_RIE_RUPI }, + { "cit", 0x72, INSTR_RIE_R0IU }, + { "cgit", 0x70, INSTR_RIE_R0IU }, + { "clrb", 0xf7, INSTR_RRS_RRRDU }, + { "clgrb", 0xe5, INSTR_RRS_RRRDU }, + { "clrj", 0x77, INSTR_RIE_RRPU }, + { "clgrj", 0x65, INSTR_RIE_RRPU }, + { "clib", 0xff, INSTR_RIS_RURDU }, + { "clgib", 0xfd, INSTR_RIS_RURDU }, + { "clij", 0x7f, INSTR_RIE_RUPU }, + { "clgij", 0x7d, INSTR_RIE_RUPU }, + { "clfit", 0x73, INSTR_RIE_R0UU }, + { "clgit", 0x71, INSTR_RIE_R0UU }, + { "rnsbg", 0x54, INSTR_RIE_RRUUU }, + { "rxsbg", 0x57, INSTR_RIE_RRUUU }, + { "rosbg", 0x56, INSTR_RIE_RRUUU }, + { "risbg", 0x55, INSTR_RIE_RRUUU }, + { { 0, LONG_INSN_RISBLG }, 0x51, INSTR_RIE_RRUUU }, + { { 0, LONG_INSN_RISBHG }, 0x5D, INSTR_RIE_RRUUU }, + { "ahik", 0xd8, INSTR_RIE_RRI0 }, + { "aghik", 0xd9, INSTR_RIE_RRI0 }, + { { 0, LONG_INSN_ALHSIK }, 0xda, INSTR_RIE_RRI0 }, + { { 0, LONG_INSN_ALGHSIK }, 0xdb, INSTR_RIE_RRI0 }, +#endif + { "", 0, INSTR_INVALID } +}; + +static struct insn opcode_ed[] = { +#ifdef CONFIG_64BIT + { "mayl", 0x38, INSTR_RXF_FRRDF }, + { "myl", 0x39, INSTR_RXF_FRRDF }, + { "may", 0x3a, INSTR_RXF_FRRDF }, + { "my", 0x3b, INSTR_RXF_FRRDF }, + { "mayh", 0x3c, INSTR_RXF_FRRDF }, + { "myh", 0x3d, INSTR_RXF_FRRDF }, + { "ley", 0x64, INSTR_RXY_FRRD }, + { "ldy", 0x65, INSTR_RXY_FRRD }, + { "stey", 0x66, INSTR_RXY_FRRD }, + { "stdy", 0x67, INSTR_RXY_FRRD }, + { "sldt", 0x40, INSTR_RXF_FRRDF }, + { "slxt", 0x48, INSTR_RXF_FRRDF }, + { "srdt", 0x41, INSTR_RXF_FRRDF }, + { "srxt", 0x49, INSTR_RXF_FRRDF }, + { "tdcet", 0x50, INSTR_RXE_FRRD }, + { "tdcdt", 0x54, INSTR_RXE_FRRD }, + { "tdcxt", 0x58, INSTR_RXE_FRRD }, + { "tdget", 0x51, INSTR_RXE_FRRD }, + { "tdgdt", 0x55, INSTR_RXE_FRRD }, + { "tdgxt", 0x59, INSTR_RXE_FRRD }, +#endif + { "ldeb", 0x04, INSTR_RXE_FRRD }, + { "lxdb", 0x05, INSTR_RXE_FRRD }, + { "lxeb", 0x06, INSTR_RXE_FRRD }, + { "mxdb", 0x07, INSTR_RXE_FRRD }, + { "keb", 0x08, INSTR_RXE_FRRD }, + { "ceb", 0x09, INSTR_RXE_FRRD }, + { "aeb", 0x0a, INSTR_RXE_FRRD }, + { "seb", 0x0b, INSTR_RXE_FRRD }, + { "mdeb", 0x0c, INSTR_RXE_FRRD }, + { "deb", 0x0d, INSTR_RXE_FRRD }, + { "maeb", 0x0e, INSTR_RXF_FRRDF }, + { "mseb", 0x0f, INSTR_RXF_FRRDF }, + { "tceb", 0x10, INSTR_RXE_FRRD }, + { "tcdb", 0x11, INSTR_RXE_FRRD }, + { "tcxb", 0x12, INSTR_RXE_FRRD }, + { "sqeb", 0x14, INSTR_RXE_FRRD }, + { "sqdb", 0x15, INSTR_RXE_FRRD }, + { "meeb", 0x17, INSTR_RXE_FRRD }, + { "kdb", 0x18, INSTR_RXE_FRRD }, + { "cdb", 0x19, INSTR_RXE_FRRD }, + { "adb", 0x1a, INSTR_RXE_FRRD }, + { "sdb", 0x1b, INSTR_RXE_FRRD }, + { "mdb", 0x1c, INSTR_RXE_FRRD }, + { "ddb", 0x1d, INSTR_RXE_FRRD }, + { "madb", 0x1e, INSTR_RXF_FRRDF }, + { "msdb", 0x1f, INSTR_RXF_FRRDF }, + { "lde", 0x24, INSTR_RXE_FRRD }, + { "lxd", 0x25, INSTR_RXE_FRRD }, + { "lxe", 0x26, INSTR_RXE_FRRD }, + { "mae", 0x2e, INSTR_RXF_FRRDF }, + { "mse", 0x2f, INSTR_RXF_FRRDF }, + { "sqe", 0x34, INSTR_RXE_FRRD }, + { "sqd", 0x35, INSTR_RXE_FRRD }, + { "mee", 0x37, INSTR_RXE_FRRD }, + { "mad", 0x3e, INSTR_RXF_FRRDF }, + { "msd", 0x3f, INSTR_RXF_FRRDF }, + { "", 0, INSTR_INVALID } +}; + +/* Extracts an operand value from an instruction. */ +static unsigned int extract_operand(unsigned char *code, + const struct operand *operand) +{ + unsigned int val; + int bits; + + /* Extract fragments of the operand byte for byte. */ + code += operand->shift / 8; + bits = (operand->shift & 7) + operand->bits; + val = 0; + do { + val <<= 8; + val |= (unsigned int) *code++; + bits -= 8; + } while (bits > 0); + val >>= -bits; + val &= ((1U << (operand->bits - 1)) << 1) - 1; + + /* Check for special long displacement case. */ + if (operand->bits == 20 && operand->shift == 20) + val = (val & 0xff) << 12 | (val & 0xfff00) >> 8; + + /* Sign extend value if the operand is signed or pc relative. */ + if ((operand->flags & (OPERAND_SIGNED | OPERAND_PCREL)) && + (val & (1U << (operand->bits - 1)))) + val |= (-1U << (operand->bits - 1)) << 1; + + /* Double value if the operand is pc relative. */ + if (operand->flags & OPERAND_PCREL) + val <<= 1; + + /* Length x in an instructions has real length x + 1. */ + if (operand->flags & OPERAND_LENGTH) + val++; + return val; +} + +static inline int insn_length(unsigned char code) +{ + return ((((int) code + 64) >> 7) + 1) << 1; +} + +static struct insn *find_insn(unsigned char *code) +{ + unsigned char opfrag = code[1]; + unsigned char opmask; + struct insn *table; + + switch (code[0]) { + case 0x01: + table = opcode_01; + break; + case 0xa5: + table = opcode_a5; + break; + case 0xa7: + table = opcode_a7; + break; + case 0xb2: + table = opcode_b2; + break; + case 0xb3: + table = opcode_b3; + break; + case 0xb9: + table = opcode_b9; + break; + case 0xc0: + table = opcode_c0; + break; + case 0xc2: + table = opcode_c2; + break; + case 0xc4: + table = opcode_c4; + break; + case 0xc6: + table = opcode_c6; + break; + case 0xc8: + table = opcode_c8; + break; + case 0xcc: + table = opcode_cc; + break; + case 0xe3: + table = opcode_e3; + opfrag = code[5]; + break; + case 0xe5: + table = opcode_e5; + break; + case 0xeb: + table = opcode_eb; + opfrag = code[5]; + break; + case 0xec: + table = opcode_ec; + opfrag = code[5]; + break; + case 0xed: + table = opcode_ed; + opfrag = code[5]; + break; + default: + table = opcode; + opfrag = code[0]; + break; + } + while (table->format != INSTR_INVALID) { + opmask = formats[table->format][0]; + if (table->opfrag == (opfrag & opmask)) + return table; + table++; + } + return NULL; +} + +static int print_insn(char *buffer, unsigned char *code, unsigned long addr) +{ + struct insn *insn; + const unsigned char *ops; + const struct operand *operand; + unsigned int value; + char separator; + char *ptr; + int i; + + ptr = buffer; + insn = find_insn(code); + if (insn) { + if (insn->name[0] == '\0') + ptr += sprintf(ptr, "%s\t", + long_insn_name[(int) insn->name[1]]); + else + ptr += sprintf(ptr, "%.5s\t", insn->name); + /* Extract the operands. */ + separator = 0; + for (ops = formats[insn->format] + 1, i = 0; + *ops != 0 && i < 6; ops++, i++) { + operand = operands + *ops; + value = extract_operand(code, operand); + if ((operand->flags & OPERAND_INDEX) && value == 0) + continue; + if ((operand->flags & OPERAND_BASE) && + value == 0 && separator == '(') { + separator = ','; + continue; + } + if (separator) + ptr += sprintf(ptr, "%c", separator); + if (operand->flags & OPERAND_GPR) + ptr += sprintf(ptr, "%%r%i", value); + else if (operand->flags & OPERAND_FPR) + ptr += sprintf(ptr, "%%f%i", value); + else if (operand->flags & OPERAND_AR) + ptr += sprintf(ptr, "%%a%i", value); + else if (operand->flags & OPERAND_CR) + ptr += sprintf(ptr, "%%c%i", value); + else if (operand->flags & OPERAND_PCREL) + ptr += sprintf(ptr, "%lx", (signed int) value + + addr); + else if (operand->flags & OPERAND_SIGNED) + ptr += sprintf(ptr, "%i", value); + else + ptr += sprintf(ptr, "%u", value); + if (operand->flags & OPERAND_DISP) + separator = '('; + else if (operand->flags & OPERAND_BASE) { + ptr += sprintf(ptr, ")"); + separator = ','; + } else + separator = ','; + } + } else + ptr += sprintf(ptr, "unknown"); + return (int) (ptr - buffer); +} + +void show_code(struct pt_regs *regs) +{ + char *mode = (regs->psw.mask & PSW_MASK_PSTATE) ? "User" : "Krnl"; + unsigned char code[64]; + char buffer[64], *ptr; + mm_segment_t old_fs; + unsigned long addr; + int start, end, opsize, hops, i; + + /* Get a snapshot of the 64 bytes surrounding the fault address. */ + old_fs = get_fs(); + set_fs((regs->psw.mask & PSW_MASK_PSTATE) ? USER_DS : KERNEL_DS); + for (start = 32; start && regs->psw.addr >= 34 - start; start -= 2) { + addr = regs->psw.addr - 34 + start; + if (__copy_from_user(code + start - 2, + (char __user *) addr, 2)) + break; + } + for (end = 32; end < 64; end += 2) { + addr = regs->psw.addr + end - 32; + if (__copy_from_user(code + end, + (char __user *) addr, 2)) + break; + } + set_fs(old_fs); + /* Code snapshot useable ? */ + if ((regs->psw.addr & 1) || start >= end) { + printk("%s Code: Bad PSW.\n", mode); + return; + } + /* Find a starting point for the disassembly. */ + while (start < 32) { + for (i = 0, hops = 0; start + i < 32 && hops < 3; hops++) { + if (!find_insn(code + start + i)) + break; + i += insn_length(code[start + i]); + } + if (start + i == 32) + /* Looks good, sequence ends at PSW. */ + break; + start += 2; + } + /* Decode the instructions. */ + ptr = buffer; + ptr += sprintf(ptr, "%s Code:", mode); + hops = 0; + while (start < end && hops < 8) { + opsize = insn_length(code[start]); + if (start + opsize == 32) + *ptr++ = '#'; + else if (start == 32) + *ptr++ = '>'; + else + *ptr++ = ' '; + addr = regs->psw.addr + start - 32; + ptr += sprintf(ptr, ONELONG, addr); + if (start + opsize >= end) + break; + for (i = 0; i < opsize; i++) + ptr += sprintf(ptr, "%02x", code[start + i]); + *ptr++ = '\t'; + if (i < 6) + *ptr++ = '\t'; + ptr += print_insn(ptr, code + start, addr); + start += opsize; + printk(buffer); + ptr = buffer; + ptr += sprintf(ptr, "\n "); + hops++; + } + printk("\n"); +} diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c new file mode 100644 index 00000000..9475e682 --- /dev/null +++ b/arch/s390/kernel/early.c @@ -0,0 +1,482 @@ +/* + * arch/s390/kernel/early.c + * + * Copyright IBM Corp. 2007, 2009 + * Author(s): Hongjie Yang <hongjie@us.ibm.com>, + * Heiko Carstens <heiko.carstens@de.ibm.com> + */ + +#define KMSG_COMPONENT "setup" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include <linux/compiler.h> +#include <linux/init.h> +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/ctype.h> +#include <linux/ftrace.h> +#include <linux/lockdep.h> +#include <linux/module.h> +#include <linux/pfn.h> +#include <linux/uaccess.h> +#include <linux/kernel.h> +#include <asm/ebcdic.h> +#include <asm/ipl.h> +#include <asm/lowcore.h> +#include <asm/processor.h> +#include <asm/sections.h> +#include <asm/setup.h> +#include <asm/sysinfo.h> +#include <asm/cpcmd.h> +#include <asm/sclp.h> +#include <asm/facility.h> +#include "entry.h" + +/* + * Create a Kernel NSS if the SAVESYS= parameter is defined + */ +#define DEFSYS_CMD_SIZE 128 +#define SAVESYS_CMD_SIZE 32 + +char kernel_nss_name[NSS_NAME_SIZE + 1]; + +static void __init setup_boot_command_line(void); + +/* + * Get the TOD clock running. + */ +static void __init reset_tod_clock(void) +{ + u64 time; + + if (store_clock(&time) == 0) + return; + /* TOD clock not running. Set the clock to Unix Epoch. */ + if (set_clock(TOD_UNIX_EPOCH) != 0 || store_clock(&time) != 0) + disabled_wait(0); + + sched_clock_base_cc = TOD_UNIX_EPOCH; + S390_lowcore.last_update_clock = sched_clock_base_cc; +} + +#ifdef CONFIG_SHARED_KERNEL +int __init savesys_ipl_nss(char *cmd, const int cmdlen); + +asm( + " .section .init.text,\"ax\",@progbits\n" + " .align 4\n" + " .type savesys_ipl_nss, @function\n" + "savesys_ipl_nss:\n" +#ifdef CONFIG_64BIT + " stmg 6,15,48(15)\n" + " lgr 14,3\n" + " sam31\n" + " diag 2,14,0x8\n" + " sam64\n" + " lgr 2,14\n" + " lmg 6,15,48(15)\n" +#else + " stm 6,15,24(15)\n" + " lr 14,3\n" + " diag 2,14,0x8\n" + " lr 2,14\n" + " lm 6,15,24(15)\n" +#endif + " br 14\n" + " .size savesys_ipl_nss, .-savesys_ipl_nss\n" + " .previous\n"); + +static __initdata char upper_command_line[COMMAND_LINE_SIZE]; + +static noinline __init void create_kernel_nss(void) +{ + unsigned int i, stext_pfn, eshared_pfn, end_pfn, min_size; +#ifdef CONFIG_BLK_DEV_INITRD + unsigned int sinitrd_pfn, einitrd_pfn; +#endif + int response; + int hlen; + size_t len; + char *savesys_ptr; + char defsys_cmd[DEFSYS_CMD_SIZE]; + char savesys_cmd[SAVESYS_CMD_SIZE]; + + /* Do nothing if we are not running under VM */ + if (!MACHINE_IS_VM) + return; + + /* Convert COMMAND_LINE to upper case */ + for (i = 0; i < strlen(boot_command_line); i++) + upper_command_line[i] = toupper(boot_command_line[i]); + + savesys_ptr = strstr(upper_command_line, "SAVESYS="); + + if (!savesys_ptr) + return; + + savesys_ptr += 8; /* Point to the beginning of the NSS name */ + for (i = 0; i < NSS_NAME_SIZE; i++) { + if (savesys_ptr[i] == ' ' || savesys_ptr[i] == '\0') + break; + kernel_nss_name[i] = savesys_ptr[i]; + } + + stext_pfn = PFN_DOWN(__pa(&_stext)); + eshared_pfn = PFN_DOWN(__pa(&_eshared)); + end_pfn = PFN_UP(__pa(&_end)); + min_size = end_pfn << 2; + + hlen = snprintf(defsys_cmd, DEFSYS_CMD_SIZE, + "DEFSYS %s 00000-%.5X EW %.5X-%.5X SR %.5X-%.5X", + kernel_nss_name, stext_pfn - 1, stext_pfn, + eshared_pfn - 1, eshared_pfn, end_pfn); + +#ifdef CONFIG_BLK_DEV_INITRD + if (INITRD_START && INITRD_SIZE) { + sinitrd_pfn = PFN_DOWN(__pa(INITRD_START)); + einitrd_pfn = PFN_UP(__pa(INITRD_START + INITRD_SIZE)); + min_size = einitrd_pfn << 2; + hlen += snprintf(defsys_cmd + hlen, DEFSYS_CMD_SIZE - hlen, + " EW %.5X-%.5X", sinitrd_pfn, einitrd_pfn); + } +#endif + + snprintf(defsys_cmd + hlen, DEFSYS_CMD_SIZE - hlen, + " EW MINSIZE=%.7iK PARMREGS=0-13", min_size); + defsys_cmd[DEFSYS_CMD_SIZE - 1] = '\0'; + snprintf(savesys_cmd, SAVESYS_CMD_SIZE, "SAVESYS %s \n IPL %s", + kernel_nss_name, kernel_nss_name); + savesys_cmd[SAVESYS_CMD_SIZE - 1] = '\0'; + + __cpcmd(defsys_cmd, NULL, 0, &response); + + if (response != 0) { + pr_err("Defining the Linux kernel NSS failed with rc=%d\n", + response); + kernel_nss_name[0] = '\0'; + return; + } + + len = strlen(savesys_cmd); + ASCEBC(savesys_cmd, len); + response = savesys_ipl_nss(savesys_cmd, len); + + /* On success: response is equal to the command size, + * max SAVESYS_CMD_SIZE + * On error: response contains the numeric portion of cp error message. + * for SAVESYS it will be >= 263 + * for missing privilege class, it will be 1 + */ + if (response > SAVESYS_CMD_SIZE || response == 1) { + pr_err("Saving the Linux kernel NSS failed with rc=%d\n", + response); + kernel_nss_name[0] = '\0'; + return; + } + + /* re-initialize cputime accounting. */ + sched_clock_base_cc = get_clock(); + S390_lowcore.last_update_clock = sched_clock_base_cc; + S390_lowcore.last_update_timer = 0x7fffffffffffffffULL; + S390_lowcore.user_timer = 0; + S390_lowcore.system_timer = 0; + asm volatile("SPT 0(%0)" : : "a" (&S390_lowcore.last_update_timer)); + + /* re-setup boot command line with new ipl vm parms */ + ipl_update_parameters(); + setup_boot_command_line(); + + ipl_flags = IPL_NSS_VALID; +} + +#else /* CONFIG_SHARED_KERNEL */ + +static inline void create_kernel_nss(void) { } + +#endif /* CONFIG_SHARED_KERNEL */ + +/* + * Clear bss memory + */ +static noinline __init void clear_bss_section(void) +{ + memset(__bss_start, 0, __bss_stop - __bss_start); +} + +/* + * Initialize storage key for kernel pages + */ +static noinline __init void init_kernel_storage_key(void) +{ + unsigned long end_pfn, init_pfn; + + end_pfn = PFN_UP(__pa(&_end)); + + for (init_pfn = 0 ; init_pfn < end_pfn; init_pfn++) + page_set_storage_key(init_pfn << PAGE_SHIFT, + PAGE_DEFAULT_KEY, 0); +} + +static __initdata struct sysinfo_3_2_2 vmms __aligned(PAGE_SIZE); + +static noinline __init void detect_machine_type(void) +{ + /* Check current-configuration-level */ + if ((stsi(NULL, 0, 0, 0) >> 28) <= 2) { + S390_lowcore.machine_flags |= MACHINE_FLAG_LPAR; + return; + } + /* Get virtual-machine cpu information. */ + if (stsi(&vmms, 3, 2, 2) == -ENOSYS || !vmms.count) + return; + + /* Running under KVM? If not we assume z/VM */ + if (!memcmp(vmms.vm[0].cpi, "\xd2\xe5\xd4", 3)) + S390_lowcore.machine_flags |= MACHINE_FLAG_KVM; + else + S390_lowcore.machine_flags |= MACHINE_FLAG_VM; +} + +static __init void early_pgm_check_handler(void) +{ + unsigned long addr; + const struct exception_table_entry *fixup; + + addr = S390_lowcore.program_old_psw.addr; + fixup = search_exception_tables(addr & PSW_ADDR_INSN); + if (!fixup) + disabled_wait(0); + S390_lowcore.program_old_psw.addr = fixup->fixup | PSW_ADDR_AMODE; +} + +static noinline __init void setup_lowcore_early(void) +{ + psw_t psw; + + psw.mask = PSW_MASK_BASE | PSW_DEFAULT_KEY | PSW_MASK_EA | PSW_MASK_BA; + psw.addr = PSW_ADDR_AMODE | (unsigned long) s390_base_ext_handler; + S390_lowcore.external_new_psw = psw; + psw.addr = PSW_ADDR_AMODE | (unsigned long) s390_base_pgm_handler; + S390_lowcore.program_new_psw = psw; + s390_base_pgm_handler_fn = early_pgm_check_handler; +} + +static noinline __init void setup_facility_list(void) +{ + stfle(S390_lowcore.stfle_fac_list, + ARRAY_SIZE(S390_lowcore.stfle_fac_list)); +} + +static noinline __init void setup_hpage(void) +{ +#ifndef CONFIG_DEBUG_PAGEALLOC + if (!test_facility(2) || !test_facility(8)) + return; + S390_lowcore.machine_flags |= MACHINE_FLAG_HPAGE; + __ctl_set_bit(0, 23); +#endif +} + +static __init void detect_mvpg(void) +{ +#ifndef CONFIG_64BIT + int rc; + + asm volatile( + " la 0,0\n" + " mvpg %2,%2\n" + "0: la %0,0\n" + "1:\n" + EX_TABLE(0b,1b) + : "=d" (rc) : "0" (-EOPNOTSUPP), "a" (0) : "memory", "cc", "0"); + if (!rc) + S390_lowcore.machine_flags |= MACHINE_FLAG_MVPG; +#endif +} + +static __init void detect_ieee(void) +{ +#ifndef CONFIG_64BIT + int rc, tmp; + + asm volatile( + " efpc %1,0\n" + "0: la %0,0\n" + "1:\n" + EX_TABLE(0b,1b) + : "=d" (rc), "=d" (tmp): "0" (-EOPNOTSUPP) : "cc"); + if (!rc) + S390_lowcore.machine_flags |= MACHINE_FLAG_IEEE; +#endif +} + +static __init void detect_csp(void) +{ +#ifndef CONFIG_64BIT + int rc; + + asm volatile( + " la 0,0\n" + " la 1,0\n" + " la 2,4\n" + " csp 0,2\n" + "0: la %0,0\n" + "1:\n" + EX_TABLE(0b,1b) + : "=d" (rc) : "0" (-EOPNOTSUPP) : "cc", "0", "1", "2"); + if (!rc) + S390_lowcore.machine_flags |= MACHINE_FLAG_CSP; +#endif +} + +static __init void detect_diag9c(void) +{ + unsigned int cpu_address; + int rc; + + cpu_address = stap(); + asm volatile( + " diag %2,0,0x9c\n" + "0: la %0,0\n" + "1:\n" + EX_TABLE(0b,1b) + : "=d" (rc) : "0" (-EOPNOTSUPP), "d" (cpu_address) : "cc"); + if (!rc) + S390_lowcore.machine_flags |= MACHINE_FLAG_DIAG9C; +} + +static __init void detect_diag44(void) +{ +#ifdef CONFIG_64BIT + int rc; + + asm volatile( + " diag 0,0,0x44\n" + "0: la %0,0\n" + "1:\n" + EX_TABLE(0b,1b) + : "=d" (rc) : "0" (-EOPNOTSUPP) : "cc"); + if (!rc) + S390_lowcore.machine_flags |= MACHINE_FLAG_DIAG44; +#endif +} + +static __init void detect_machine_facilities(void) +{ +#ifdef CONFIG_64BIT + if (test_facility(3)) + S390_lowcore.machine_flags |= MACHINE_FLAG_IDTE; + if (test_facility(8)) + S390_lowcore.machine_flags |= MACHINE_FLAG_PFMF; + if (test_facility(11)) + S390_lowcore.machine_flags |= MACHINE_FLAG_TOPOLOGY; + if (test_facility(27)) + S390_lowcore.machine_flags |= MACHINE_FLAG_MVCOS; + if (test_facility(40)) + S390_lowcore.machine_flags |= MACHINE_FLAG_SPP; + if (test_facility(25)) + S390_lowcore.machine_flags |= MACHINE_FLAG_STCKF; +#endif +} + +static __init void rescue_initrd(void) +{ +#ifdef CONFIG_BLK_DEV_INITRD + unsigned long min_initrd_addr = (unsigned long) _end + (4UL << 20); + /* + * Just like in case of IPL from VM reader we make sure there is a + * gap of 4MB between end of kernel and start of initrd. + * That way we can also be sure that saving an NSS will succeed, + * which however only requires different segments. + */ + if (!INITRD_START || !INITRD_SIZE) + return; + if (INITRD_START >= min_initrd_addr) + return; + memmove((void *) min_initrd_addr, (void *) INITRD_START, INITRD_SIZE); + INITRD_START = min_initrd_addr; +#endif +} + +/* Set up boot command line */ +static void __init append_to_cmdline(size_t (*ipl_data)(char *, size_t)) +{ + char *parm, *delim; + size_t rc, len; + + len = strlen(boot_command_line); + + delim = boot_command_line + len; /* '\0' character position */ + parm = boot_command_line + len + 1; /* append right after '\0' */ + + rc = ipl_data(parm, COMMAND_LINE_SIZE - len - 1); + if (rc) { + if (*parm == '=') + memmove(boot_command_line, parm + 1, rc); + else + *delim = ' '; /* replace '\0' with space */ + } +} + +static inline int has_ebcdic_char(const char *str) +{ + int i; + + for (i = 0; str[i]; i++) + if (str[i] & 0x80) + return 1; + return 0; +} + +static void __init setup_boot_command_line(void) +{ + COMMAND_LINE[ARCH_COMMAND_LINE_SIZE - 1] = 0; + /* convert arch command line to ascii if necessary */ + if (has_ebcdic_char(COMMAND_LINE)) + EBCASC(COMMAND_LINE, ARCH_COMMAND_LINE_SIZE); + /* copy arch command line */ + strlcpy(boot_command_line, strstrip(COMMAND_LINE), + ARCH_COMMAND_LINE_SIZE); + + /* append IPL PARM data to the boot command line */ + if (MACHINE_IS_VM) + append_to_cmdline(append_ipl_vmparm); + + append_to_cmdline(append_ipl_scpdata); +} + + +/* + * Save ipl parameters, clear bss memory, initialize storage keys + * and create a kernel NSS at startup if the SAVESYS= parm is defined + */ +void __init startup_init(void) +{ + reset_tod_clock(); + ipl_save_parameters(); + rescue_initrd(); + clear_bss_section(); + init_kernel_storage_key(); + lockdep_init(); + lockdep_off(); + sort_main_extable(); + setup_lowcore_early(); + setup_facility_list(); + detect_machine_type(); + ipl_update_parameters(); + setup_boot_command_line(); + create_kernel_nss(); + detect_mvpg(); + detect_ieee(); + detect_csp(); + detect_diag9c(); + detect_diag44(); + detect_machine_facilities(); + setup_hpage(); + sclp_facilities_detect(); + detect_memory_layout(memory_chunk); +#ifdef CONFIG_DYNAMIC_FTRACE + S390_lowcore.ftrace_func = (unsigned long)ftrace_caller; +#endif + lockdep_on(); +} diff --git a/arch/s390/kernel/ebcdic.c b/arch/s390/kernel/ebcdic.c new file mode 100644 index 00000000..cc0dc609 --- /dev/null +++ b/arch/s390/kernel/ebcdic.c @@ -0,0 +1,401 @@ +/* + * arch/s390/kernel/ebcdic.c + * ECBDIC -> ASCII, ASCII -> ECBDIC, + * upper to lower case (EBCDIC) conversion tables. + * + * S390 version + * Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> + * Martin Peschke <peschke@fh-brandenburg.de> + */ + +#include <linux/module.h> +#include <asm/types.h> +#include <asm/ebcdic.h> + +/* + * ASCII (IBM PC 437) -> EBCDIC 037 + */ +__u8 _ascebc[256] = +{ + /*00 NUL SOH STX ETX EOT ENQ ACK BEL */ + 0x00, 0x01, 0x02, 0x03, 0x37, 0x2D, 0x2E, 0x2F, + /*08 BS HT LF VT FF CR SO SI */ + /* ->NL */ + 0x16, 0x05, 0x15, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, + /*10 DLE DC1 DC2 DC3 DC4 NAK SYN ETB */ + 0x10, 0x11, 0x12, 0x13, 0x3C, 0x3D, 0x32, 0x26, + /*18 CAN EM SUB ESC FS GS RS US */ + /* ->IGS ->IRS ->IUS */ + 0x18, 0x19, 0x3F, 0x27, 0x22, 0x1D, 0x1E, 0x1F, + /*20 SP ! " # $ % & ' */ + 0x40, 0x5A, 0x7F, 0x7B, 0x5B, 0x6C, 0x50, 0x7D, + /*28 ( ) * + , - . / */ + 0x4D, 0x5D, 0x5C, 0x4E, 0x6B, 0x60, 0x4B, 0x61, + /*30 0 1 2 3 4 5 6 7 */ + 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, + /*38 8 9 : ; < = > ? */ + 0xF8, 0xF9, 0x7A, 0x5E, 0x4C, 0x7E, 0x6E, 0x6F, + /*40 @ A B C D E F G */ + 0x7C, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, + /*48 H I J K L M N O */ + 0xC8, 0xC9, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, + /*50 P Q R S T U V W */ + 0xD7, 0xD8, 0xD9, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, + /*58 X Y Z [ \ ] ^ _ */ + 0xE7, 0xE8, 0xE9, 0xBA, 0xE0, 0xBB, 0xB0, 0x6D, + /*60 ` a b c d e f g */ + 0x79, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + /*68 h i j k l m n o */ + 0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, + /*70 p q r s t u v w */ + 0x97, 0x98, 0x99, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, + /*78 x y z { | } ~ DL */ + 0xA7, 0xA8, 0xA9, 0xC0, 0x4F, 0xD0, 0xA1, 0x07, + /*80*/ + 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, + /*88*/ + 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, + /*90*/ + 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, + /*98*/ + 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, + /*A0*/ + 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, + /*A8*/ + 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, + /*B0*/ + 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, + /*B8*/ + 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, + /*C0*/ + 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, + /*C8*/ + 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, + /*D0*/ + 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, + /*D8*/ + 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, + /*E0 sz */ + 0x3F, 0x59, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, + /*E8*/ + 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, + /*F0*/ + 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, + /*F8*/ + 0x90, 0x3F, 0x3F, 0x3F, 0x3F, 0xEA, 0x3F, 0xFF +}; + +/* + * EBCDIC 037 -> ASCII (IBM PC 437) + */ +__u8 _ebcasc[256] = +{ + /* 0x00 NUL SOH STX ETX *SEL HT *RNL DEL */ + 0x00, 0x01, 0x02, 0x03, 0x07, 0x09, 0x07, 0x7F, + /* 0x08 -GE -SPS -RPT VT FF CR SO SI */ + 0x07, 0x07, 0x07, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, + /* 0x10 DLE DC1 DC2 DC3 -RES -NL BS -POC + -ENP ->LF */ + 0x10, 0x11, 0x12, 0x13, 0x07, 0x0A, 0x08, 0x07, + /* 0x18 CAN EM -UBS -CU1 -IFS -IGS -IRS -ITB + -IUS */ + 0x18, 0x19, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, + /* 0x20 -DS -SOS FS -WUS -BYP LF ETB ESC + -INP */ + 0x07, 0x07, 0x1C, 0x07, 0x07, 0x0A, 0x17, 0x1B, + /* 0x28 -SA -SFE -SM -CSP -MFA ENQ ACK BEL + -SW */ + 0x07, 0x07, 0x07, 0x07, 0x07, 0x05, 0x06, 0x07, + /* 0x30 ---- ---- SYN -IR -PP -TRN -NBS EOT */ + 0x07, 0x07, 0x16, 0x07, 0x07, 0x07, 0x07, 0x04, + /* 0x38 -SBS -IT -RFF -CU3 DC4 NAK ---- SUB */ + 0x07, 0x07, 0x07, 0x07, 0x14, 0x15, 0x07, 0x1A, + /* 0x40 SP RSP ä ---- */ + 0x20, 0xFF, 0x83, 0x84, 0x85, 0xA0, 0x07, 0x86, + /* 0x48 . < ( + | */ + 0x87, 0xA4, 0x9B, 0x2E, 0x3C, 0x28, 0x2B, 0x7C, + /* 0x50 & ---- */ + 0x26, 0x82, 0x88, 0x89, 0x8A, 0xA1, 0x8C, 0x07, + /* 0x58 ß ! $ * ) ; */ + 0x8D, 0xE1, 0x21, 0x24, 0x2A, 0x29, 0x3B, 0xAA, + /* 0x60 - / ---- Ä ---- ---- ---- */ + 0x2D, 0x2F, 0x07, 0x8E, 0x07, 0x07, 0x07, 0x8F, + /* 0x68 ---- , % _ > ? */ + 0x80, 0xA5, 0x07, 0x2C, 0x25, 0x5F, 0x3E, 0x3F, + /* 0x70 ---- ---- ---- ---- ---- ---- ---- */ + 0x07, 0x90, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, + /* 0x78 * ` : # @ ' = " */ + 0x70, 0x60, 0x3A, 0x23, 0x40, 0x27, 0x3D, 0x22, + /* 0x80 * a b c d e f g */ + 0x07, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, + /* 0x88 h i ---- ---- ---- */ + 0x68, 0x69, 0xAE, 0xAF, 0x07, 0x07, 0x07, 0xF1, + /* 0x90 ° j k l m n o p */ + 0xF8, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, + /* 0x98 q r ---- ---- */ + 0x71, 0x72, 0xA6, 0xA7, 0x91, 0x07, 0x92, 0x07, + /* 0xA0 ~ s t u v w x */ + 0xE6, 0x7E, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, + /* 0xA8 y z ---- ---- ---- ---- */ + 0x79, 0x7A, 0xAD, 0xAB, 0x07, 0x07, 0x07, 0x07, + /* 0xB0 ^ ---- § ---- */ + 0x5E, 0x9C, 0x9D, 0xFA, 0x07, 0x07, 0x07, 0xAC, + /* 0xB8 ---- [ ] ---- ---- ---- ---- */ + 0xAB, 0x07, 0x5B, 0x5D, 0x07, 0x07, 0x07, 0x07, + /* 0xC0 { A B C D E F G */ + 0x7B, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, + /* 0xC8 H I ---- ö ---- */ + 0x48, 0x49, 0x07, 0x93, 0x94, 0x95, 0xA2, 0x07, + /* 0xD0 } J K L M N O P */ + 0x7D, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, + /* 0xD8 Q R ---- ü */ + 0x51, 0x52, 0x07, 0x96, 0x81, 0x97, 0xA3, 0x98, + /* 0xE0 \ S T U V W X */ + 0x5C, 0xF6, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, + /* 0xE8 Y Z ---- Ö ---- ---- ---- */ + 0x59, 0x5A, 0xFD, 0x07, 0x99, 0x07, 0x07, 0x07, + /* 0xF0 0 1 2 3 4 5 6 7 */ + 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, + /* 0xF8 8 9 ---- ---- Ü ---- ---- ---- */ + 0x38, 0x39, 0x07, 0x07, 0x9A, 0x07, 0x07, 0x07 +}; + + +/* + * ASCII (IBM PC 437) -> EBCDIC 500 + */ +__u8 _ascebc_500[256] = +{ + /*00 NUL SOH STX ETX EOT ENQ ACK BEL */ + 0x00, 0x01, 0x02, 0x03, 0x37, 0x2D, 0x2E, 0x2F, + /*08 BS HT LF VT FF CR SO SI */ + /* ->NL */ + 0x16, 0x05, 0x15, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, + /*10 DLE DC1 DC2 DC3 DC4 NAK SYN ETB */ + 0x10, 0x11, 0x12, 0x13, 0x3C, 0x3D, 0x32, 0x26, + /*18 CAN EM SUB ESC FS GS RS US */ + /* ->IGS ->IRS ->IUS */ + 0x18, 0x19, 0x3F, 0x27, 0x22, 0x1D, 0x1E, 0x1F, + /*20 SP ! " # $ % & ' */ + 0x40, 0x4F, 0x7F, 0x7B, 0x5B, 0x6C, 0x50, 0x7D, + /*28 ( ) * + , - . / */ + 0x4D, 0x5D, 0x5C, 0x4E, 0x6B, 0x60, 0x4B, 0x61, + /*30 0 1 2 3 4 5 6 7 */ + 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, + /*38 8 9 : ; < = > ? */ + 0xF8, 0xF9, 0x7A, 0x5E, 0x4C, 0x7E, 0x6E, 0x6F, + /*40 @ A B C D E F G */ + 0x7C, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, + /*48 H I J K L M N O */ + 0xC8, 0xC9, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, + /*50 P Q R S T U V W */ + 0xD7, 0xD8, 0xD9, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, + /*58 X Y Z [ \ ] ^ _ */ + 0xE7, 0xE8, 0xE9, 0x4A, 0xE0, 0x5A, 0x5F, 0x6D, + /*60 ` a b c d e f g */ + 0x79, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + /*68 h i j k l m n o */ + 0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, + /*70 p q r s t u v w */ + 0x97, 0x98, 0x99, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, + /*78 x y z { | } ~ DL */ + 0xA7, 0xA8, 0xA9, 0xC0, 0xBB, 0xD0, 0xA1, 0x07, + /*80*/ + 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, + /*88*/ + 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, + /*90*/ + 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, + /*98*/ + 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, + /*A0*/ + 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, + /*A8*/ + 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, + /*B0*/ + 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, + /*B8*/ + 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, + /*C0*/ + 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, + /*C8*/ + 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, + /*D0*/ + 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, + /*D8*/ + 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, + /*E0 sz */ + 0x3F, 0x59, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, + /*E8*/ + 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, + /*F0*/ + 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, + /*F8*/ + 0x90, 0x3F, 0x3F, 0x3F, 0x3F, 0xEA, 0x3F, 0xFF +}; + +/* + * EBCDIC 500 -> ASCII (IBM PC 437) + */ +__u8 _ebcasc_500[256] = +{ + /* 0x00 NUL SOH STX ETX *SEL HT *RNL DEL */ + 0x00, 0x01, 0x02, 0x03, 0x07, 0x09, 0x07, 0x7F, + /* 0x08 -GE -SPS -RPT VT FF CR SO SI */ + 0x07, 0x07, 0x07, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, + /* 0x10 DLE DC1 DC2 DC3 -RES -NL BS -POC + -ENP ->LF */ + 0x10, 0x11, 0x12, 0x13, 0x07, 0x0A, 0x08, 0x07, + /* 0x18 CAN EM -UBS -CU1 -IFS -IGS -IRS -ITB + -IUS */ + 0x18, 0x19, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, + /* 0x20 -DS -SOS FS -WUS -BYP LF ETB ESC + -INP */ + 0x07, 0x07, 0x1C, 0x07, 0x07, 0x0A, 0x17, 0x1B, + /* 0x28 -SA -SFE -SM -CSP -MFA ENQ ACK BEL + -SW */ + 0x07, 0x07, 0x07, 0x07, 0x07, 0x05, 0x06, 0x07, + /* 0x30 ---- ---- SYN -IR -PP -TRN -NBS EOT */ + 0x07, 0x07, 0x16, 0x07, 0x07, 0x07, 0x07, 0x04, + /* 0x38 -SBS -IT -RFF -CU3 DC4 NAK ---- SUB */ + 0x07, 0x07, 0x07, 0x07, 0x14, 0x15, 0x07, 0x1A, + /* 0x40 SP RSP ä ---- */ + 0x20, 0xFF, 0x83, 0x84, 0x85, 0xA0, 0x07, 0x86, + /* 0x48 [ . < ( + ! */ + 0x87, 0xA4, 0x5B, 0x2E, 0x3C, 0x28, 0x2B, 0x21, + /* 0x50 & ---- */ + 0x26, 0x82, 0x88, 0x89, 0x8A, 0xA1, 0x8C, 0x07, + /* 0x58 ß ] $ * ) ; ^ */ + 0x8D, 0xE1, 0x5D, 0x24, 0x2A, 0x29, 0x3B, 0x5E, + /* 0x60 - / ---- Ä ---- ---- ---- */ + 0x2D, 0x2F, 0x07, 0x8E, 0x07, 0x07, 0x07, 0x8F, + /* 0x68 ---- , % _ > ? */ + 0x80, 0xA5, 0x07, 0x2C, 0x25, 0x5F, 0x3E, 0x3F, + /* 0x70 ---- ---- ---- ---- ---- ---- ---- */ + 0x07, 0x90, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, + /* 0x78 * ` : # @ ' = " */ + 0x70, 0x60, 0x3A, 0x23, 0x40, 0x27, 0x3D, 0x22, + /* 0x80 * a b c d e f g */ + 0x07, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, + /* 0x88 h i ---- ---- ---- */ + 0x68, 0x69, 0xAE, 0xAF, 0x07, 0x07, 0x07, 0xF1, + /* 0x90 ° j k l m n o p */ + 0xF8, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, + /* 0x98 q r ---- ---- */ + 0x71, 0x72, 0xA6, 0xA7, 0x91, 0x07, 0x92, 0x07, + /* 0xA0 ~ s t u v w x */ + 0xE6, 0x7E, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, + /* 0xA8 y z ---- ---- ---- ---- */ + 0x79, 0x7A, 0xAD, 0xAB, 0x07, 0x07, 0x07, 0x07, + /* 0xB0 ---- § ---- */ + 0x9B, 0x9C, 0x9D, 0xFA, 0x07, 0x07, 0x07, 0xAC, + /* 0xB8 ---- | ---- ---- ---- ---- */ + 0xAB, 0x07, 0xAA, 0x7C, 0x07, 0x07, 0x07, 0x07, + /* 0xC0 { A B C D E F G */ + 0x7B, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, + /* 0xC8 H I ---- ö ---- */ + 0x48, 0x49, 0x07, 0x93, 0x94, 0x95, 0xA2, 0x07, + /* 0xD0 } J K L M N O P */ + 0x7D, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, + /* 0xD8 Q R ---- ü */ + 0x51, 0x52, 0x07, 0x96, 0x81, 0x97, 0xA3, 0x98, + /* 0xE0 \ S T U V W X */ + 0x5C, 0xF6, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, + /* 0xE8 Y Z ---- Ö ---- ---- ---- */ + 0x59, 0x5A, 0xFD, 0x07, 0x99, 0x07, 0x07, 0x07, + /* 0xF0 0 1 2 3 4 5 6 7 */ + 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, + /* 0xF8 8 9 ---- ---- Ü ---- ---- ---- */ + 0x38, 0x39, 0x07, 0x07, 0x9A, 0x07, 0x07, 0x07 +}; + + +/* + * EBCDIC 037/500 conversion table: + * from upper to lower case + */ +__u8 _ebc_tolower[256] = +{ + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, + 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, + 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, + 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, + 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, + 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F, + 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, + 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, + 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, + 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F, + 0x60, 0x61, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, + 0x48, 0x49, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, + 0x70, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, + 0x58, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F, + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9C, 0x9F, + 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, + 0xA8, 0xA9, 0xAA, 0xAB, 0x8C, 0x8D, 0x8E, 0xAF, + 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, + 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF, + 0xC0, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, + 0xD0, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + 0x98, 0x99, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, + 0xE0, 0xE1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, + 0xA8, 0xA9, 0xEA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, + 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, + 0xF8, 0xF9, 0xFA, 0xDB, 0xDC, 0xDD, 0xDE, 0xFF +}; + + +/* + * EBCDIC 037/500 conversion table: + * from lower to upper case + */ +__u8 _ebc_toupper[256] = +{ + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, + 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, + 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, + 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, + 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, + 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F, + 0x40, 0x41, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, + 0x68, 0x69, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, + 0x50, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, + 0x78, 0x59, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F, + 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, + 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, + 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F, + 0x80, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, + 0xC8, 0xC9, 0x8A, 0x8B, 0xAC, 0xAD, 0xAE, 0x8F, + 0x90, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, + 0xD8, 0xD9, 0x9A, 0x9B, 0x9E, 0x9D, 0x9E, 0x9F, + 0xA0, 0xA1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, + 0xE8, 0xE9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF, + 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, + 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF, + 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, + 0xC8, 0xC9, 0xCA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, + 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, + 0xD8, 0xD9, 0xDA, 0xFB, 0xFC, 0xFD, 0xFE, 0xDF, + 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, + 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, + 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, + 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF +}; + +EXPORT_SYMBOL(_ascebc_500); +EXPORT_SYMBOL(_ebcasc_500); +EXPORT_SYMBOL(_ascebc); +EXPORT_SYMBOL(_ebcasc); +EXPORT_SYMBOL(_ebc_tolower); +EXPORT_SYMBOL(_ebc_toupper); + diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S new file mode 100644 index 00000000..74ee563f --- /dev/null +++ b/arch/s390/kernel/entry.S @@ -0,0 +1,967 @@ +/* + * arch/s390/kernel/entry.S + * S390 low-level entry points. + * + * Copyright (C) IBM Corp. 1999,2012 + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), + * Hartmut Penner (hp@de.ibm.com), + * Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com), + * Heiko Carstens <heiko.carstens@de.ibm.com> + */ + +#include <linux/init.h> +#include <linux/linkage.h> +#include <asm/cache.h> +#include <asm/errno.h> +#include <asm/ptrace.h> +#include <asm/thread_info.h> +#include <asm/asm-offsets.h> +#include <asm/unistd.h> +#include <asm/page.h> + +__PT_R0 = __PT_GPRS +__PT_R1 = __PT_GPRS + 4 +__PT_R2 = __PT_GPRS + 8 +__PT_R3 = __PT_GPRS + 12 +__PT_R4 = __PT_GPRS + 16 +__PT_R5 = __PT_GPRS + 20 +__PT_R6 = __PT_GPRS + 24 +__PT_R7 = __PT_GPRS + 28 +__PT_R8 = __PT_GPRS + 32 +__PT_R9 = __PT_GPRS + 36 +__PT_R10 = __PT_GPRS + 40 +__PT_R11 = __PT_GPRS + 44 +__PT_R12 = __PT_GPRS + 48 +__PT_R13 = __PT_GPRS + 524 +__PT_R14 = __PT_GPRS + 56 +__PT_R15 = __PT_GPRS + 60 + +_TIF_WORK_SVC = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \ + _TIF_MCCK_PENDING | _TIF_PER_TRAP ) +_TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \ + _TIF_MCCK_PENDING) +_TIF_TRACE = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \ + _TIF_SYSCALL_TRACEPOINT) + +STACK_SHIFT = PAGE_SHIFT + THREAD_ORDER +STACK_SIZE = 1 << STACK_SHIFT + +#define BASED(name) name-system_call(%r13) + + .macro TRACE_IRQS_ON +#ifdef CONFIG_TRACE_IRQFLAGS + basr %r2,%r0 + l %r1,BASED(.Lhardirqs_on) + basr %r14,%r1 # call trace_hardirqs_on_caller +#endif + .endm + + .macro TRACE_IRQS_OFF +#ifdef CONFIG_TRACE_IRQFLAGS + basr %r2,%r0 + l %r1,BASED(.Lhardirqs_off) + basr %r14,%r1 # call trace_hardirqs_off_caller +#endif + .endm + + .macro LOCKDEP_SYS_EXIT +#ifdef CONFIG_LOCKDEP + tm __PT_PSW+1(%r11),0x01 # returning to user ? + jz .+10 + l %r1,BASED(.Llockdep_sys_exit) + basr %r14,%r1 # call lockdep_sys_exit +#endif + .endm + + .macro CHECK_STACK stacksize,savearea +#ifdef CONFIG_CHECK_STACK + tml %r15,\stacksize - CONFIG_STACK_GUARD + la %r14,\savearea + jz stack_overflow +#endif + .endm + + .macro SWITCH_ASYNC savearea,stack,shift + tmh %r8,0x0001 # interrupting from user ? + jnz 1f + lr %r14,%r9 + sl %r14,BASED(.Lcritical_start) + cl %r14,BASED(.Lcritical_length) + jhe 0f + la %r11,\savearea # inside critical section, do cleanup + bras %r14,cleanup_critical + tmh %r8,0x0001 # retest problem state after cleanup + jnz 1f +0: l %r14,\stack # are we already on the target stack? + slr %r14,%r15 + sra %r14,\shift + jnz 1f + CHECK_STACK 1<<\shift,\savearea + j 2f +1: l %r15,\stack # load target stack +2: ahi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) + la %r11,STACK_FRAME_OVERHEAD(%r15) + .endm + + .macro ADD64 high,low,timer + al \high,\timer + al \low,4+\timer + brc 12,.+8 + ahi \high,1 + .endm + + .macro SUB64 high,low,timer + sl \high,\timer + sl \low,4+\timer + brc 3,.+8 + ahi \high,-1 + .endm + + .macro UPDATE_VTIME high,low,enter_timer + lm \high,\low,__LC_EXIT_TIMER + SUB64 \high,\low,\enter_timer + ADD64 \high,\low,__LC_USER_TIMER + stm \high,\low,__LC_USER_TIMER + lm \high,\low,__LC_LAST_UPDATE_TIMER + SUB64 \high,\low,__LC_EXIT_TIMER + ADD64 \high,\low,__LC_SYSTEM_TIMER + stm \high,\low,__LC_SYSTEM_TIMER + mvc __LC_LAST_UPDATE_TIMER(8),\enter_timer + .endm + + .macro REENABLE_IRQS + st %r8,__LC_RETURN_PSW + ni __LC_RETURN_PSW,0xbf + ssm __LC_RETURN_PSW + .endm + + .section .kprobes.text, "ax" + +/* + * Scheduler resume function, called by switch_to + * gpr2 = (task_struct *) prev + * gpr3 = (task_struct *) next + * Returns: + * gpr2 = prev + */ +ENTRY(__switch_to) + l %r4,__THREAD_info(%r2) # get thread_info of prev + l %r5,__THREAD_info(%r3) # get thread_info of next + tm __TI_flags+3(%r4),_TIF_MCCK_PENDING # machine check pending? + jz 0f + ni __TI_flags+3(%r4),255-_TIF_MCCK_PENDING # clear flag in prev + oi __TI_flags+3(%r5),_TIF_MCCK_PENDING # set it in next +0: stm %r6,%r15,__SF_GPRS(%r15) # store gprs of prev task + st %r15,__THREAD_ksp(%r2) # store kernel stack of prev + l %r15,__THREAD_ksp(%r3) # load kernel stack of next + lctl %c4,%c4,__TASK_pid(%r3) # load pid to control reg. 4 + lm %r6,%r15,__SF_GPRS(%r15) # load gprs of next task + st %r3,__LC_CURRENT # store task struct of next + mvc __LC_CURRENT_PID(4,%r0),__TASK_pid(%r3) # store pid of next + st %r5,__LC_THREAD_INFO # store thread info of next + ahi %r5,STACK_SIZE # end of kernel stack of next + st %r5,__LC_KERNEL_STACK # store end of kernel stack + br %r14 + +__critical_start: +/* + * SVC interrupt handler routine. System calls are synchronous events and + * are executed with interrupts enabled. + */ + +ENTRY(system_call) + stpt __LC_SYNC_ENTER_TIMER +sysc_stm: + stm %r8,%r15,__LC_SAVE_AREA_SYNC + l %r12,__LC_THREAD_INFO + l %r13,__LC_SVC_NEW_PSW+4 +sysc_per: + l %r15,__LC_KERNEL_STACK + ahi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) + la %r11,STACK_FRAME_OVERHEAD(%r15) # pointer to pt_regs +sysc_vtime: + UPDATE_VTIME %r8,%r9,__LC_SYNC_ENTER_TIMER + stm %r0,%r7,__PT_R0(%r11) + mvc __PT_R8(32,%r11),__LC_SAVE_AREA_SYNC + mvc __PT_PSW(8,%r11),__LC_SVC_OLD_PSW + mvc __PT_INT_CODE(4,%r11),__LC_SVC_ILC +sysc_do_svc: + oi __TI_flags+3(%r12),_TIF_SYSCALL + lh %r8,__PT_INT_CODE+2(%r11) + sla %r8,2 # shift and test for svc0 + jnz sysc_nr_ok + # svc 0: system call number in %r1 + cl %r1,BASED(.Lnr_syscalls) + jnl sysc_nr_ok + sth %r1,__PT_INT_CODE+2(%r11) + lr %r8,%r1 + sla %r8,2 +sysc_nr_ok: + l %r10,BASED(.Lsys_call_table) # 31 bit system call table + xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) + st %r2,__PT_ORIG_GPR2(%r11) + st %r7,STACK_FRAME_OVERHEAD(%r15) + l %r9,0(%r8,%r10) # get system call addr. + tm __TI_flags+2(%r12),_TIF_TRACE >> 8 + jnz sysc_tracesys + basr %r14,%r9 # call sys_xxxx + st %r2,__PT_R2(%r11) # store return value + +sysc_return: + LOCKDEP_SYS_EXIT +sysc_tif: + tm __PT_PSW+1(%r11),0x01 # returning to user ? + jno sysc_restore + tm __TI_flags+3(%r12),_TIF_WORK_SVC + jnz sysc_work # check for work + ni __TI_flags+3(%r12),255-_TIF_SYSCALL +sysc_restore: + mvc __LC_RETURN_PSW(8),__PT_PSW(%r11) + stpt __LC_EXIT_TIMER + lm %r0,%r15,__PT_R0(%r11) + lpsw __LC_RETURN_PSW +sysc_done: + +# +# One of the work bits is on. Find out which one. +# +sysc_work: + tm __TI_flags+3(%r12),_TIF_MCCK_PENDING + jo sysc_mcck_pending + tm __TI_flags+3(%r12),_TIF_NEED_RESCHED + jo sysc_reschedule + tm __TI_flags+3(%r12),_TIF_SIGPENDING + jo sysc_sigpending + tm __TI_flags+3(%r12),_TIF_NOTIFY_RESUME + jo sysc_notify_resume + tm __TI_flags+3(%r12),_TIF_PER_TRAP + jo sysc_singlestep + j sysc_return # beware of critical section cleanup + +# +# _TIF_NEED_RESCHED is set, call schedule +# +sysc_reschedule: + l %r1,BASED(.Lschedule) + la %r14,BASED(sysc_return) + br %r1 # call schedule + +# +# _TIF_MCCK_PENDING is set, call handler +# +sysc_mcck_pending: + l %r1,BASED(.Lhandle_mcck) + la %r14,BASED(sysc_return) + br %r1 # TIF bit will be cleared by handler + +# +# _TIF_SIGPENDING is set, call do_signal +# +sysc_sigpending: + ni __TI_flags+3(%r12),255-_TIF_PER_TRAP # clear TIF_PER_TRAP + lr %r2,%r11 # pass pointer to pt_regs + l %r1,BASED(.Ldo_signal) + basr %r14,%r1 # call do_signal + tm __TI_flags+3(%r12),_TIF_SYSCALL + jno sysc_return + lm %r2,%r7,__PT_R2(%r11) # load svc arguments + xr %r8,%r8 # svc 0 returns -ENOSYS + clc __PT_INT_CODE+2(2,%r11),BASED(.Lnr_syscalls+2) + jnl sysc_nr_ok # invalid svc number -> do svc 0 + lh %r8,__PT_INT_CODE+2(%r11) # load new svc number + sla %r8,2 + j sysc_nr_ok # restart svc + +# +# _TIF_NOTIFY_RESUME is set, call do_notify_resume +# +sysc_notify_resume: + lr %r2,%r11 # pass pointer to pt_regs + l %r1,BASED(.Ldo_notify_resume) + la %r14,BASED(sysc_return) + br %r1 # call do_notify_resume + +# +# _TIF_PER_TRAP is set, call do_per_trap +# +sysc_singlestep: + ni __TI_flags+3(%r12),255-(_TIF_SYSCALL | _TIF_PER_TRAP) + lr %r2,%r11 # pass pointer to pt_regs + l %r1,BASED(.Ldo_per_trap) + la %r14,BASED(sysc_return) + br %r1 # call do_per_trap + +# +# call tracehook_report_syscall_entry/tracehook_report_syscall_exit before +# and after the system call +# +sysc_tracesys: + l %r1,BASED(.Ltrace_enter) + lr %r2,%r11 # pass pointer to pt_regs + la %r3,0 + xr %r0,%r0 + icm %r0,3,__PT_INT_CODE+2(%r11) + st %r0,__PT_R2(%r11) + basr %r14,%r1 # call do_syscall_trace_enter + cl %r2,BASED(.Lnr_syscalls) + jnl sysc_tracenogo + lr %r8,%r2 + sll %r8,2 + l %r9,0(%r8,%r10) +sysc_tracego: + lm %r3,%r7,__PT_R3(%r11) + st %r7,STACK_FRAME_OVERHEAD(%r15) + l %r2,__PT_ORIG_GPR2(%r11) + basr %r14,%r9 # call sys_xxx + st %r2,__PT_R2(%r11) # store return value +sysc_tracenogo: + tm __TI_flags+2(%r12),_TIF_TRACE >> 8 + jz sysc_return + l %r1,BASED(.Ltrace_exit) + lr %r2,%r11 # pass pointer to pt_regs + la %r14,BASED(sysc_return) + br %r1 # call do_syscall_trace_exit + +# +# a new process exits the kernel with ret_from_fork +# +ENTRY(ret_from_fork) + la %r11,STACK_FRAME_OVERHEAD(%r15) + l %r12,__LC_THREAD_INFO + l %r13,__LC_SVC_NEW_PSW+4 + tm __PT_PSW+1(%r11),0x01 # forking a kernel thread ? + jo 0f + st %r15,__PT_R15(%r11) # store stack pointer for new kthread +0: l %r1,BASED(.Lschedule_tail) + basr %r14,%r1 # call schedule_tail + TRACE_IRQS_ON + ssm __LC_SVC_NEW_PSW # reenable interrupts + j sysc_tracenogo + +# +# kernel_execve function needs to deal with pt_regs that is not +# at the usual place +# +ENTRY(kernel_execve) + stm %r12,%r15,48(%r15) + lr %r14,%r15 + l %r13,__LC_SVC_NEW_PSW+4 + ahi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) + st %r14,__SF_BACKCHAIN(%r15) + la %r12,STACK_FRAME_OVERHEAD(%r15) + xc 0(__PT_SIZE,%r12),0(%r12) + l %r1,BASED(.Ldo_execve) + lr %r5,%r12 + basr %r14,%r1 # call do_execve + ltr %r2,%r2 + je 0f + ahi %r15,(STACK_FRAME_OVERHEAD + __PT_SIZE) + lm %r12,%r15,48(%r15) + br %r14 + # execve succeeded. +0: ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts + l %r15,__LC_KERNEL_STACK # load ksp + ahi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) + la %r11,STACK_FRAME_OVERHEAD(%r15) + mvc 0(__PT_SIZE,%r11),0(%r12) # copy pt_regs + l %r12,__LC_THREAD_INFO + xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) + ssm __LC_SVC_NEW_PSW # reenable interrupts + l %r1,BASED(.Lexecve_tail) + basr %r14,%r1 # call execve_tail + j sysc_return + +/* + * Program check handler routine + */ + +ENTRY(pgm_check_handler) + stpt __LC_SYNC_ENTER_TIMER + stm %r8,%r15,__LC_SAVE_AREA_SYNC + l %r12,__LC_THREAD_INFO + l %r13,__LC_SVC_NEW_PSW+4 + lm %r8,%r9,__LC_PGM_OLD_PSW + tmh %r8,0x0001 # test problem state bit + jnz 1f # -> fault in user space + tmh %r8,0x4000 # PER bit set in old PSW ? + jnz 0f # -> enabled, can't be a double fault + tm __LC_PGM_ILC+3,0x80 # check for per exception + jnz pgm_svcper # -> single stepped svc +0: CHECK_STACK STACK_SIZE,__LC_SAVE_AREA_SYNC + j 2f +1: UPDATE_VTIME %r14,%r15,__LC_SYNC_ENTER_TIMER + l %r15,__LC_KERNEL_STACK +2: ahi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) + la %r11,STACK_FRAME_OVERHEAD(%r15) + stm %r0,%r7,__PT_R0(%r11) + mvc __PT_R8(32,%r11),__LC_SAVE_AREA_SYNC + stm %r8,%r9,__PT_PSW(%r11) + mvc __PT_INT_CODE(4,%r11),__LC_PGM_ILC + mvc __PT_INT_PARM_LONG(4,%r11),__LC_TRANS_EXC_CODE + tm __LC_PGM_ILC+3,0x80 # check for per exception + jz 0f + l %r1,__TI_task(%r12) + tmh %r8,0x0001 # kernel per event ? + jz pgm_kprobe + oi __TI_flags+3(%r12),_TIF_PER_TRAP + mvc __THREAD_per_address(4,%r1),__LC_PER_ADDRESS + mvc __THREAD_per_cause(2,%r1),__LC_PER_CAUSE + mvc __THREAD_per_paid(1,%r1),__LC_PER_PAID +0: REENABLE_IRQS + xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) + l %r1,BASED(.Ljump_table) + la %r10,0x7f + n %r10,__PT_INT_CODE(%r11) + je sysc_return + sll %r10,2 + l %r1,0(%r10,%r1) # load address of handler routine + lr %r2,%r11 # pass pointer to pt_regs + basr %r14,%r1 # branch to interrupt-handler + j sysc_return + +# +# PER event in supervisor state, must be kprobes +# +pgm_kprobe: + REENABLE_IRQS + xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) + l %r1,BASED(.Ldo_per_trap) + lr %r2,%r11 # pass pointer to pt_regs + basr %r14,%r1 # call do_per_trap + j sysc_return + +# +# single stepped system call +# +pgm_svcper: + oi __TI_flags+3(%r12),_TIF_PER_TRAP + mvc __LC_RETURN_PSW(4),__LC_SVC_NEW_PSW + mvc __LC_RETURN_PSW+4(4),BASED(.Lsysc_per) + lpsw __LC_RETURN_PSW # branch to sysc_per and enable irqs + +/* + * IO interrupt handler routine + */ + +ENTRY(io_int_handler) + stck __LC_INT_CLOCK + stpt __LC_ASYNC_ENTER_TIMER + stm %r8,%r15,__LC_SAVE_AREA_ASYNC + l %r12,__LC_THREAD_INFO + l %r13,__LC_SVC_NEW_PSW+4 + lm %r8,%r9,__LC_IO_OLD_PSW + tmh %r8,0x0001 # interrupting from user ? + jz io_skip + UPDATE_VTIME %r14,%r15,__LC_ASYNC_ENTER_TIMER +io_skip: + SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_STACK,STACK_SHIFT + stm %r0,%r7,__PT_R0(%r11) + mvc __PT_R8(32,%r11),__LC_SAVE_AREA_ASYNC + stm %r8,%r9,__PT_PSW(%r11) + TRACE_IRQS_OFF + xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) + l %r1,BASED(.Ldo_IRQ) + lr %r2,%r11 # pass pointer to pt_regs + basr %r14,%r1 # call do_IRQ +io_return: + LOCKDEP_SYS_EXIT + TRACE_IRQS_ON +io_tif: + tm __TI_flags+3(%r12),_TIF_WORK_INT + jnz io_work # there is work to do (signals etc.) +io_restore: + mvc __LC_RETURN_PSW(8),__PT_PSW(%r11) + stpt __LC_EXIT_TIMER + lm %r0,%r15,__PT_R0(%r11) + lpsw __LC_RETURN_PSW +io_done: + +# +# There is work todo, find out in which context we have been interrupted: +# 1) if we return to user space we can do all _TIF_WORK_INT work +# 2) if we return to kernel code and preemptive scheduling is enabled check +# the preemption counter and if it is zero call preempt_schedule_irq +# Before any work can be done, a switch to the kernel stack is required. +# +io_work: + tm __PT_PSW+1(%r11),0x01 # returning to user ? + jo io_work_user # yes -> do resched & signal +#ifdef CONFIG_PREEMPT + # check for preemptive scheduling + icm %r0,15,__TI_precount(%r12) + jnz io_restore # preemption disabled + tm __TI_flags+3(%r12),_TIF_NEED_RESCHED + jno io_restore + # switch to kernel stack + l %r1,__PT_R15(%r11) + ahi %r1,-(STACK_FRAME_OVERHEAD + __PT_SIZE) + mvc STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11) + xc __SF_BACKCHAIN(4,%r1),__SF_BACKCHAIN(%r1) + la %r11,STACK_FRAME_OVERHEAD(%r1) + lr %r15,%r1 + # TRACE_IRQS_ON already done at io_return, call + # TRACE_IRQS_OFF to keep things symmetrical + TRACE_IRQS_OFF + l %r1,BASED(.Lpreempt_irq) + basr %r14,%r1 # call preempt_schedule_irq + j io_return +#else + j io_restore +#endif + +# +# Need to do work before returning to userspace, switch to kernel stack +# +io_work_user: + l %r1,__LC_KERNEL_STACK + ahi %r1,-(STACK_FRAME_OVERHEAD + __PT_SIZE) + mvc STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11) + xc __SF_BACKCHAIN(4,%r1),__SF_BACKCHAIN(%r1) + la %r11,STACK_FRAME_OVERHEAD(%r1) + lr %r15,%r1 + +# +# One of the work bits is on. Find out which one. +# Checked are: _TIF_SIGPENDING, _TIF_NOTIFY_RESUME, _TIF_NEED_RESCHED +# and _TIF_MCCK_PENDING +# +io_work_tif: + tm __TI_flags+3(%r12),_TIF_MCCK_PENDING + jo io_mcck_pending + tm __TI_flags+3(%r12),_TIF_NEED_RESCHED + jo io_reschedule + tm __TI_flags+3(%r12),_TIF_SIGPENDING + jo io_sigpending + tm __TI_flags+3(%r12),_TIF_NOTIFY_RESUME + jo io_notify_resume + j io_return # beware of critical section cleanup + +# +# _TIF_MCCK_PENDING is set, call handler +# +io_mcck_pending: + # TRACE_IRQS_ON already done at io_return + l %r1,BASED(.Lhandle_mcck) + basr %r14,%r1 # TIF bit will be cleared by handler + TRACE_IRQS_OFF + j io_return + +# +# _TIF_NEED_RESCHED is set, call schedule +# +io_reschedule: + # TRACE_IRQS_ON already done at io_return + l %r1,BASED(.Lschedule) + ssm __LC_SVC_NEW_PSW # reenable interrupts + basr %r14,%r1 # call scheduler + ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts + TRACE_IRQS_OFF + j io_return + +# +# _TIF_SIGPENDING is set, call do_signal +# +io_sigpending: + # TRACE_IRQS_ON already done at io_return + l %r1,BASED(.Ldo_signal) + ssm __LC_SVC_NEW_PSW # reenable interrupts + lr %r2,%r11 # pass pointer to pt_regs + basr %r14,%r1 # call do_signal + ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts + TRACE_IRQS_OFF + j io_return + +# +# _TIF_SIGPENDING is set, call do_signal +# +io_notify_resume: + # TRACE_IRQS_ON already done at io_return + l %r1,BASED(.Ldo_notify_resume) + ssm __LC_SVC_NEW_PSW # reenable interrupts + lr %r2,%r11 # pass pointer to pt_regs + basr %r14,%r1 # call do_notify_resume + ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts + TRACE_IRQS_OFF + j io_return + +/* + * External interrupt handler routine + */ + +ENTRY(ext_int_handler) + stck __LC_INT_CLOCK + stpt __LC_ASYNC_ENTER_TIMER + stm %r8,%r15,__LC_SAVE_AREA_ASYNC + l %r12,__LC_THREAD_INFO + l %r13,__LC_SVC_NEW_PSW+4 + lm %r8,%r9,__LC_EXT_OLD_PSW + tmh %r8,0x0001 # interrupting from user ? + jz ext_skip + UPDATE_VTIME %r14,%r15,__LC_ASYNC_ENTER_TIMER +ext_skip: + SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_STACK,STACK_SHIFT + stm %r0,%r7,__PT_R0(%r11) + mvc __PT_R8(32,%r11),__LC_SAVE_AREA_ASYNC + stm %r8,%r9,__PT_PSW(%r11) + TRACE_IRQS_OFF + lr %r2,%r11 # pass pointer to pt_regs + l %r3,__LC_EXT_CPU_ADDR # get cpu address + interruption code + l %r4,__LC_EXT_PARAMS # get external parameters + l %r1,BASED(.Ldo_extint) + basr %r14,%r1 # call do_extint + j io_return + +/* + * Load idle PSW. The second "half" of this function is in cleanup_idle. + */ +ENTRY(psw_idle) + st %r4,__SF_EMPTY(%r15) + basr %r1,0 + la %r1,psw_idle_lpsw+4-.(%r1) + st %r1,__SF_EMPTY+4(%r15) + oi __SF_EMPTY+4(%r15),0x80 + la %r1,.Lvtimer_max-psw_idle_lpsw-4(%r1) + stck __IDLE_ENTER(%r2) + ltr %r5,%r5 + stpt __VQ_IDLE_ENTER(%r3) + jz psw_idle_lpsw + spt 0(%r1) +psw_idle_lpsw: + lpsw __SF_EMPTY(%r15) + br %r14 +psw_idle_end: + +__critical_end: + +/* + * Machine check handler routines + */ + +ENTRY(mcck_int_handler) + stck __LC_MCCK_CLOCK + spt __LC_CPU_TIMER_SAVE_AREA # revalidate cpu timer + lm %r0,%r15,__LC_GPREGS_SAVE_AREA # revalidate gprs + l %r12,__LC_THREAD_INFO + l %r13,__LC_SVC_NEW_PSW+4 + lm %r8,%r9,__LC_MCK_OLD_PSW + tm __LC_MCCK_CODE,0x80 # system damage? + jo mcck_panic # yes -> rest of mcck code invalid + la %r14,__LC_CPU_TIMER_SAVE_AREA + mvc __LC_MCCK_ENTER_TIMER(8),0(%r14) + tm __LC_MCCK_CODE+5,0x02 # stored cpu timer value valid? + jo 3f + la %r14,__LC_SYNC_ENTER_TIMER + clc 0(8,%r14),__LC_ASYNC_ENTER_TIMER + jl 0f + la %r14,__LC_ASYNC_ENTER_TIMER +0: clc 0(8,%r14),__LC_EXIT_TIMER + jl 1f + la %r14,__LC_EXIT_TIMER +1: clc 0(8,%r14),__LC_LAST_UPDATE_TIMER + jl 2f + la %r14,__LC_LAST_UPDATE_TIMER +2: spt 0(%r14) + mvc __LC_MCCK_ENTER_TIMER(8),0(%r14) +3: tm __LC_MCCK_CODE+2,0x09 # mwp + ia of old psw valid? + jno mcck_panic # no -> skip cleanup critical + tm %r8,0x0001 # interrupting from user ? + jz mcck_skip + UPDATE_VTIME %r14,%r15,__LC_MCCK_ENTER_TIMER +mcck_skip: + SWITCH_ASYNC __LC_GPREGS_SAVE_AREA+32,__LC_PANIC_STACK,PAGE_SHIFT + mvc __PT_R0(64,%r11),__LC_GPREGS_SAVE_AREA + stm %r8,%r9,__PT_PSW(%r11) + xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) + l %r1,BASED(.Ldo_machine_check) + lr %r2,%r11 # pass pointer to pt_regs + basr %r14,%r1 # call s390_do_machine_check + tm __PT_PSW+1(%r11),0x01 # returning to user ? + jno mcck_return + l %r1,__LC_KERNEL_STACK # switch to kernel stack + ahi %r1,-(STACK_FRAME_OVERHEAD + __PT_SIZE) + mvc STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11) + xc __SF_BACKCHAIN(4,%r1),__SF_BACKCHAIN(%r1) + la %r11,STACK_FRAME_OVERHEAD(%r15) + lr %r15,%r1 + ssm __LC_PGM_NEW_PSW # turn dat on, keep irqs off + tm __TI_flags+3(%r12),_TIF_MCCK_PENDING + jno mcck_return + TRACE_IRQS_OFF + l %r1,BASED(.Lhandle_mcck) + basr %r14,%r1 # call s390_handle_mcck + TRACE_IRQS_ON +mcck_return: + mvc __LC_RETURN_MCCK_PSW(8),__PT_PSW(%r11) # move return PSW + tm __LC_RETURN_MCCK_PSW+1,0x01 # returning to user ? + jno 0f + lm %r0,%r15,__PT_R0(%r11) + stpt __LC_EXIT_TIMER + lpsw __LC_RETURN_MCCK_PSW +0: lm %r0,%r15,__PT_R0(%r11) + lpsw __LC_RETURN_MCCK_PSW + +mcck_panic: + l %r14,__LC_PANIC_STACK + slr %r14,%r15 + sra %r14,PAGE_SHIFT + jz 0f + l %r15,__LC_PANIC_STACK +0: ahi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) + j mcck_skip + +# +# PSW restart interrupt handler +# +ENTRY(restart_int_handler) + st %r15,__LC_SAVE_AREA_RESTART + l %r15,__LC_RESTART_STACK + ahi %r15,-__PT_SIZE # create pt_regs on stack + xc 0(__PT_SIZE,%r15),0(%r15) + stm %r0,%r14,__PT_R0(%r15) + mvc __PT_R15(4,%r15),__LC_SAVE_AREA_RESTART + mvc __PT_PSW(8,%r15),__LC_RST_OLD_PSW # store restart old psw + ahi %r15,-STACK_FRAME_OVERHEAD # create stack frame on stack + xc 0(STACK_FRAME_OVERHEAD,%r15),0(%r15) + lm %r1,%r3,__LC_RESTART_FN # load fn, parm & source cpu + ltr %r3,%r3 # test source cpu address + jm 1f # negative -> skip source stop +0: sigp %r4,%r3,1 # sigp sense to source cpu + brc 10,0b # wait for status stored +1: basr %r14,%r1 # call function + stap __SF_EMPTY(%r15) # store cpu address + lh %r3,__SF_EMPTY(%r15) +2: sigp %r4,%r3,5 # sigp stop to current cpu + brc 2,2b +3: j 3b + + .section .kprobes.text, "ax" + +#ifdef CONFIG_CHECK_STACK +/* + * The synchronous or the asynchronous stack overflowed. We are dead. + * No need to properly save the registers, we are going to panic anyway. + * Setup a pt_regs so that show_trace can provide a good call trace. + */ +stack_overflow: + l %r15,__LC_PANIC_STACK # change to panic stack + ahi %r15,-__PT_SIZE # create pt_regs + stm %r0,%r7,__PT_R0(%r15) + stm %r8,%r9,__PT_PSW(%r15) + mvc __PT_R8(32,%r11),0(%r14) + lr %r15,%r11 + ahi %r15,-STACK_FRAME_OVERHEAD + l %r1,BASED(1f) + xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) + lr %r2,%r11 # pass pointer to pt_regs + br %r1 # branch to kernel_stack_overflow +1: .long kernel_stack_overflow +#endif + +cleanup_table: + .long system_call + 0x80000000 + .long sysc_do_svc + 0x80000000 + .long sysc_tif + 0x80000000 + .long sysc_restore + 0x80000000 + .long sysc_done + 0x80000000 + .long io_tif + 0x80000000 + .long io_restore + 0x80000000 + .long io_done + 0x80000000 + .long psw_idle + 0x80000000 + .long psw_idle_end + 0x80000000 + +cleanup_critical: + cl %r9,BASED(cleanup_table) # system_call + jl 0f + cl %r9,BASED(cleanup_table+4) # sysc_do_svc + jl cleanup_system_call + cl %r9,BASED(cleanup_table+8) # sysc_tif + jl 0f + cl %r9,BASED(cleanup_table+12) # sysc_restore + jl cleanup_sysc_tif + cl %r9,BASED(cleanup_table+16) # sysc_done + jl cleanup_sysc_restore + cl %r9,BASED(cleanup_table+20) # io_tif + jl 0f + cl %r9,BASED(cleanup_table+24) # io_restore + jl cleanup_io_tif + cl %r9,BASED(cleanup_table+28) # io_done + jl cleanup_io_restore + cl %r9,BASED(cleanup_table+32) # psw_idle + jl 0f + cl %r9,BASED(cleanup_table+36) # psw_idle_end + jl cleanup_idle +0: br %r14 + +cleanup_system_call: + # check if stpt has been executed + cl %r9,BASED(cleanup_system_call_insn) + jh 0f + mvc __LC_SYNC_ENTER_TIMER(8),__LC_ASYNC_ENTER_TIMER + chi %r11,__LC_SAVE_AREA_ASYNC + je 0f + mvc __LC_SYNC_ENTER_TIMER(8),__LC_MCCK_ENTER_TIMER +0: # check if stm has been executed + cl %r9,BASED(cleanup_system_call_insn+4) + jh 0f + mvc __LC_SAVE_AREA_SYNC(32),0(%r11) +0: # set up saved registers r12, and r13 + st %r12,16(%r11) # r12 thread-info pointer + st %r13,20(%r11) # r13 literal-pool pointer + # check if the user time calculation has been done + cl %r9,BASED(cleanup_system_call_insn+8) + jh 0f + l %r10,__LC_EXIT_TIMER + l %r15,__LC_EXIT_TIMER+4 + SUB64 %r10,%r15,__LC_SYNC_ENTER_TIMER + ADD64 %r10,%r15,__LC_USER_TIMER + st %r10,__LC_USER_TIMER + st %r15,__LC_USER_TIMER+4 +0: # check if the system time calculation has been done + cl %r9,BASED(cleanup_system_call_insn+12) + jh 0f + l %r10,__LC_LAST_UPDATE_TIMER + l %r15,__LC_LAST_UPDATE_TIMER+4 + SUB64 %r10,%r15,__LC_EXIT_TIMER + ADD64 %r10,%r15,__LC_SYSTEM_TIMER + st %r10,__LC_SYSTEM_TIMER + st %r15,__LC_SYSTEM_TIMER+4 +0: # update accounting time stamp + mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER + # set up saved register 11 + l %r15,__LC_KERNEL_STACK + ahi %r15,-__PT_SIZE + st %r15,12(%r11) # r11 pt_regs pointer + # fill pt_regs + mvc __PT_R8(32,%r15),__LC_SAVE_AREA_SYNC + stm %r0,%r7,__PT_R0(%r15) + mvc __PT_PSW(8,%r15),__LC_SVC_OLD_PSW + mvc __PT_INT_CODE(4,%r15),__LC_SVC_ILC + # setup saved register 15 + ahi %r15,-STACK_FRAME_OVERHEAD + st %r15,28(%r11) # r15 stack pointer + # set new psw address and exit + l %r9,BASED(cleanup_table+4) # sysc_do_svc + 0x80000000 + br %r14 +cleanup_system_call_insn: + .long system_call + 0x80000000 + .long sysc_stm + 0x80000000 + .long sysc_vtime + 0x80000000 + 36 + .long sysc_vtime + 0x80000000 + 76 + +cleanup_sysc_tif: + l %r9,BASED(cleanup_table+8) # sysc_tif + 0x80000000 + br %r14 + +cleanup_sysc_restore: + cl %r9,BASED(cleanup_sysc_restore_insn) + jhe 0f + l %r9,12(%r11) # get saved pointer to pt_regs + mvc __LC_RETURN_PSW(8),__PT_PSW(%r9) + mvc 0(32,%r11),__PT_R8(%r9) + lm %r0,%r7,__PT_R0(%r9) +0: lm %r8,%r9,__LC_RETURN_PSW + br %r14 +cleanup_sysc_restore_insn: + .long sysc_done - 4 + 0x80000000 + +cleanup_io_tif: + l %r9,BASED(cleanup_table+20) # io_tif + 0x80000000 + br %r14 + +cleanup_io_restore: + cl %r9,BASED(cleanup_io_restore_insn) + jhe 0f + l %r9,12(%r11) # get saved r11 pointer to pt_regs + mvc __LC_RETURN_PSW(8),__PT_PSW(%r9) + mvc 0(32,%r11),__PT_R8(%r9) + lm %r0,%r7,__PT_R0(%r9) +0: lm %r8,%r9,__LC_RETURN_PSW + br %r14 +cleanup_io_restore_insn: + .long io_done - 4 + 0x80000000 + +cleanup_idle: + # copy interrupt clock & cpu timer + mvc __IDLE_EXIT(8,%r2),__LC_INT_CLOCK + mvc __VQ_IDLE_EXIT(8,%r3),__LC_ASYNC_ENTER_TIMER + chi %r11,__LC_SAVE_AREA_ASYNC + je 0f + mvc __IDLE_EXIT(8,%r2),__LC_MCCK_CLOCK + mvc __VQ_IDLE_EXIT(8,%r3),__LC_MCCK_ENTER_TIMER +0: # check if stck has been executed + cl %r9,BASED(cleanup_idle_insn) + jhe 1f + mvc __IDLE_ENTER(8,%r2),__IDLE_EXIT(%r2) + mvc __VQ_IDLE_ENTER(8,%r3),__VQ_IDLE_EXIT(%r3) + j 2f +1: # check if the cpu timer has been reprogrammed + ltr %r5,%r5 + jz 2f + spt __VQ_IDLE_ENTER(%r3) +2: # account system time going idle + lm %r9,%r10,__LC_STEAL_TIMER + ADD64 %r9,%r10,__IDLE_ENTER(%r2) + SUB64 %r9,%r10,__LC_LAST_UPDATE_CLOCK + stm %r9,%r10,__LC_STEAL_TIMER + mvc __LC_LAST_UPDATE_CLOCK(8),__IDLE_EXIT(%r2) + lm %r9,%r10,__LC_SYSTEM_TIMER + ADD64 %r9,%r10,__LC_LAST_UPDATE_TIMER + SUB64 %r9,%r10,__VQ_IDLE_ENTER(%r3) + stm %r9,%r10,__LC_SYSTEM_TIMER + mvc __LC_LAST_UPDATE_TIMER(8),__VQ_IDLE_EXIT(%r3) + # prepare return psw + n %r8,BASED(cleanup_idle_wait) # clear wait state bit + l %r9,24(%r11) # return from psw_idle + br %r14 +cleanup_idle_insn: + .long psw_idle_lpsw + 0x80000000 +cleanup_idle_wait: + .long 0xfffdffff + +/* + * Integer constants + */ + .align 4 +.Lnr_syscalls: + .long NR_syscalls +.Lvtimer_max: + .quad 0x7fffffffffffffff + +/* + * Symbol constants + */ +.Ldo_machine_check: .long s390_do_machine_check +.Lhandle_mcck: .long s390_handle_mcck +.Ldo_IRQ: .long do_IRQ +.Ldo_extint: .long do_extint +.Ldo_signal: .long do_signal +.Ldo_notify_resume: .long do_notify_resume +.Ldo_per_trap: .long do_per_trap +.Ldo_execve: .long do_execve +.Lexecve_tail: .long execve_tail +.Ljump_table: .long pgm_check_table +.Lschedule: .long schedule +#ifdef CONFIG_PREEMPT +.Lpreempt_irq: .long preempt_schedule_irq +#endif +.Ltrace_enter: .long do_syscall_trace_enter +.Ltrace_exit: .long do_syscall_trace_exit +.Lschedule_tail: .long schedule_tail +.Lsys_call_table: .long sys_call_table +.Lsysc_per: .long sysc_per + 0x80000000 +#ifdef CONFIG_TRACE_IRQFLAGS +.Lhardirqs_on: .long trace_hardirqs_on_caller +.Lhardirqs_off: .long trace_hardirqs_off_caller +#endif +#ifdef CONFIG_LOCKDEP +.Llockdep_sys_exit: .long lockdep_sys_exit +#endif +.Lcritical_start: .long __critical_start + 0x80000000 +.Lcritical_length: .long __critical_end - __critical_start + + .section .rodata, "a" +#define SYSCALL(esa,esame,emu) .long esa + .globl sys_call_table +sys_call_table: +#include "syscalls.S" +#undef SYSCALL diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h new file mode 100644 index 00000000..6cdddac9 --- /dev/null +++ b/arch/s390/kernel/entry.h @@ -0,0 +1,75 @@ +#ifndef _ENTRY_H +#define _ENTRY_H + +#include <linux/types.h> +#include <linux/signal.h> +#include <asm/ptrace.h> +#include <asm/cputime.h> +#include <asm/timer.h> + +extern void (*pgm_check_table[128])(struct pt_regs *); +extern void *restart_stack; + +void system_call(void); +void pgm_check_handler(void); +void ext_int_handler(void); +void io_int_handler(void); +void mcck_int_handler(void); +void restart_int_handler(void); +void restart_call_handler(void); +void psw_idle(struct s390_idle_data *, struct vtimer_queue *, + unsigned long, int); + +asmlinkage long do_syscall_trace_enter(struct pt_regs *regs); +asmlinkage void do_syscall_trace_exit(struct pt_regs *regs); + +void do_protection_exception(struct pt_regs *regs); +void do_dat_exception(struct pt_regs *regs); +void do_asce_exception(struct pt_regs *regs); + +void do_per_trap(struct pt_regs *regs); +void syscall_trace(struct pt_regs *regs, int entryexit); +void kernel_stack_overflow(struct pt_regs * regs); +void do_signal(struct pt_regs *regs); +int handle_signal32(unsigned long sig, struct k_sigaction *ka, + siginfo_t *info, sigset_t *oldset, struct pt_regs *regs); +void do_notify_resume(struct pt_regs *regs); + +struct ext_code; +void do_extint(struct pt_regs *regs, struct ext_code, unsigned int, unsigned long); +void do_restart(void); +void __init startup_init(void); +void die(struct pt_regs *regs, const char *str); + +void __init time_init(void); + +struct s390_mmap_arg_struct; +struct fadvise64_64_args; +struct old_sigaction; + +long sys_mmap2(struct s390_mmap_arg_struct __user *arg); +long sys_s390_ipc(uint call, int first, unsigned long second, + unsigned long third, void __user *ptr); +long sys_s390_personality(unsigned int personality); +long sys_s390_fadvise64(int fd, u32 offset_high, u32 offset_low, + size_t len, int advice); +long sys_s390_fadvise64_64(struct fadvise64_64_args __user *args); +long sys_s390_fallocate(int fd, int mode, loff_t offset, u32 len_high, + u32 len_low); +long sys_fork(void); +long sys_clone(unsigned long newsp, unsigned long clone_flags, + int __user *parent_tidptr, int __user *child_tidptr); +long sys_vfork(void); +void execve_tail(void); +long sys_execve(const char __user *name, const char __user *const __user *argv, + const char __user *const __user *envp); +long sys_sigsuspend(int history0, int history1, old_sigset_t mask); +long sys_sigaction(int sig, const struct old_sigaction __user *act, + struct old_sigaction __user *oact); +long sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss); +long sys_sigreturn(void); +long sys_rt_sigreturn(void); +long sys32_sigreturn(void); +long sys32_rt_sigreturn(void); + +#endif /* _ENTRY_H */ diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S new file mode 100644 index 00000000..4e1c292f --- /dev/null +++ b/arch/s390/kernel/entry64.S @@ -0,0 +1,1030 @@ +/* + * arch/s390/kernel/entry64.S + * S390 low-level entry points. + * + * Copyright (C) IBM Corp. 1999,2012 + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), + * Hartmut Penner (hp@de.ibm.com), + * Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com), + * Heiko Carstens <heiko.carstens@de.ibm.com> + */ + +#include <linux/init.h> +#include <linux/linkage.h> +#include <asm/cache.h> +#include <asm/errno.h> +#include <asm/ptrace.h> +#include <asm/thread_info.h> +#include <asm/asm-offsets.h> +#include <asm/unistd.h> +#include <asm/page.h> + +__PT_R0 = __PT_GPRS +__PT_R1 = __PT_GPRS + 8 +__PT_R2 = __PT_GPRS + 16 +__PT_R3 = __PT_GPRS + 24 +__PT_R4 = __PT_GPRS + 32 +__PT_R5 = __PT_GPRS + 40 +__PT_R6 = __PT_GPRS + 48 +__PT_R7 = __PT_GPRS + 56 +__PT_R8 = __PT_GPRS + 64 +__PT_R9 = __PT_GPRS + 72 +__PT_R10 = __PT_GPRS + 80 +__PT_R11 = __PT_GPRS + 88 +__PT_R12 = __PT_GPRS + 96 +__PT_R13 = __PT_GPRS + 104 +__PT_R14 = __PT_GPRS + 112 +__PT_R15 = __PT_GPRS + 120 + +STACK_SHIFT = PAGE_SHIFT + THREAD_ORDER +STACK_SIZE = 1 << STACK_SHIFT + +_TIF_WORK_SVC = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \ + _TIF_MCCK_PENDING | _TIF_PER_TRAP ) +_TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \ + _TIF_MCCK_PENDING) +_TIF_TRACE = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \ + _TIF_SYSCALL_TRACEPOINT) +_TIF_EXIT_SIE = (_TIF_SIGPENDING | _TIF_NEED_RESCHED | _TIF_MCCK_PENDING) + +#define BASED(name) name-system_call(%r13) + + .macro TRACE_IRQS_ON +#ifdef CONFIG_TRACE_IRQFLAGS + basr %r2,%r0 + brasl %r14,trace_hardirqs_on_caller +#endif + .endm + + .macro TRACE_IRQS_OFF +#ifdef CONFIG_TRACE_IRQFLAGS + basr %r2,%r0 + brasl %r14,trace_hardirqs_off_caller +#endif + .endm + + .macro LOCKDEP_SYS_EXIT +#ifdef CONFIG_LOCKDEP + tm __PT_PSW+1(%r11),0x01 # returning to user ? + jz .+10 + brasl %r14,lockdep_sys_exit +#endif + .endm + + .macro SPP newpp +#if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE) + tm __LC_MACHINE_FLAGS+6,0x20 # MACHINE_FLAG_SPP + jz .+8 + .insn s,0xb2800000,\newpp +#endif + .endm + + .macro HANDLE_SIE_INTERCEPT scratch +#if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE) + tm __TI_flags+6(%r12),_TIF_SIE>>8 + jz .+42 + tm __LC_MACHINE_FLAGS+6,0x20 # MACHINE_FLAG_SPP + jz .+8 + .insn s,0xb2800000,BASED(.Lhost_id) # set host id + lgr \scratch,%r9 + slg \scratch,BASED(.Lsie_loop) + clg \scratch,BASED(.Lsie_length) + jhe .+10 + lg %r9,BASED(.Lsie_loop) +#endif + .endm + + .macro CHECK_STACK stacksize,savearea +#ifdef CONFIG_CHECK_STACK + tml %r15,\stacksize - CONFIG_STACK_GUARD + lghi %r14,\savearea + jz stack_overflow +#endif + .endm + + .macro SWITCH_ASYNC savearea,stack,shift + tmhh %r8,0x0001 # interrupting from user ? + jnz 1f + lgr %r14,%r9 + slg %r14,BASED(.Lcritical_start) + clg %r14,BASED(.Lcritical_length) + jhe 0f + lghi %r11,\savearea # inside critical section, do cleanup + brasl %r14,cleanup_critical + tmhh %r8,0x0001 # retest problem state after cleanup + jnz 1f +0: lg %r14,\stack # are we already on the target stack? + slgr %r14,%r15 + srag %r14,%r14,\shift + jnz 1f + CHECK_STACK 1<<\shift,\savearea + j 2f +1: lg %r15,\stack # load target stack +2: aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) + la %r11,STACK_FRAME_OVERHEAD(%r15) + .endm + + .macro UPDATE_VTIME scratch,enter_timer + lg \scratch,__LC_EXIT_TIMER + slg \scratch,\enter_timer + alg \scratch,__LC_USER_TIMER + stg \scratch,__LC_USER_TIMER + lg \scratch,__LC_LAST_UPDATE_TIMER + slg \scratch,__LC_EXIT_TIMER + alg \scratch,__LC_SYSTEM_TIMER + stg \scratch,__LC_SYSTEM_TIMER + mvc __LC_LAST_UPDATE_TIMER(8),\enter_timer + .endm + + .macro LAST_BREAK scratch + srag \scratch,%r10,23 + jz .+10 + stg %r10,__TI_last_break(%r12) + .endm + + .macro REENABLE_IRQS + stg %r8,__LC_RETURN_PSW + ni __LC_RETURN_PSW,0xbf + ssm __LC_RETURN_PSW + .endm + + .section .kprobes.text, "ax" + +/* + * Scheduler resume function, called by switch_to + * gpr2 = (task_struct *) prev + * gpr3 = (task_struct *) next + * Returns: + * gpr2 = prev + */ +ENTRY(__switch_to) + lg %r4,__THREAD_info(%r2) # get thread_info of prev + lg %r5,__THREAD_info(%r3) # get thread_info of next + tm __TI_flags+7(%r4),_TIF_MCCK_PENDING # machine check pending? + jz 0f + ni __TI_flags+7(%r4),255-_TIF_MCCK_PENDING # clear flag in prev + oi __TI_flags+7(%r5),_TIF_MCCK_PENDING # set it in next +0: stmg %r6,%r15,__SF_GPRS(%r15) # store gprs of prev task + stg %r15,__THREAD_ksp(%r2) # store kernel stack of prev + lg %r15,__THREAD_ksp(%r3) # load kernel stack of next + lctl %c4,%c4,__TASK_pid(%r3) # load pid to control reg. 4 + lmg %r6,%r15,__SF_GPRS(%r15) # load gprs of next task + stg %r3,__LC_CURRENT # store task struct of next + mvc __LC_CURRENT_PID+4(4,%r0),__TASK_pid(%r3) # store pid of next + stg %r5,__LC_THREAD_INFO # store thread info of next + aghi %r5,STACK_SIZE # end of kernel stack of next + stg %r5,__LC_KERNEL_STACK # store end of kernel stack + br %r14 + +__critical_start: +/* + * SVC interrupt handler routine. System calls are synchronous events and + * are executed with interrupts enabled. + */ + +ENTRY(system_call) + stpt __LC_SYNC_ENTER_TIMER +sysc_stmg: + stmg %r8,%r15,__LC_SAVE_AREA_SYNC + lg %r10,__LC_LAST_BREAK + lg %r12,__LC_THREAD_INFO + larl %r13,system_call +sysc_per: + lg %r15,__LC_KERNEL_STACK + aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) + la %r11,STACK_FRAME_OVERHEAD(%r15) # pointer to pt_regs +sysc_vtime: + UPDATE_VTIME %r13,__LC_SYNC_ENTER_TIMER + LAST_BREAK %r13 + stmg %r0,%r7,__PT_R0(%r11) + mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC + mvc __PT_PSW(16,%r11),__LC_SVC_OLD_PSW + mvc __PT_INT_CODE(4,%r11),__LC_SVC_ILC +sysc_do_svc: + oi __TI_flags+7(%r12),_TIF_SYSCALL + llgh %r8,__PT_INT_CODE+2(%r11) + slag %r8,%r8,2 # shift and test for svc 0 + jnz sysc_nr_ok + # svc 0: system call number in %r1 + llgfr %r1,%r1 # clear high word in r1 + cghi %r1,NR_syscalls + jnl sysc_nr_ok + sth %r1,__PT_INT_CODE+2(%r11) + slag %r8,%r1,2 +sysc_nr_ok: + larl %r10,sys_call_table # 64 bit system call table +#ifdef CONFIG_COMPAT + tm __TI_flags+5(%r12),(_TIF_31BIT>>16) + jno sysc_noemu + larl %r10,sys_call_table_emu # 31 bit system call table +sysc_noemu: +#endif + xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) + stg %r2,__PT_ORIG_GPR2(%r11) + stg %r7,STACK_FRAME_OVERHEAD(%r15) + lgf %r9,0(%r8,%r10) # get system call add. + tm __TI_flags+6(%r12),_TIF_TRACE >> 8 + jnz sysc_tracesys + basr %r14,%r9 # call sys_xxxx + stg %r2,__PT_R2(%r11) # store return value + +sysc_return: + LOCKDEP_SYS_EXIT +sysc_tif: + tm __PT_PSW+1(%r11),0x01 # returning to user ? + jno sysc_restore + tm __TI_flags+7(%r12),_TIF_WORK_SVC + jnz sysc_work # check for work + ni __TI_flags+7(%r12),255-_TIF_SYSCALL +sysc_restore: + lg %r14,__LC_VDSO_PER_CPU + lmg %r0,%r10,__PT_R0(%r11) + mvc __LC_RETURN_PSW(16),__PT_PSW(%r11) + stpt __LC_EXIT_TIMER + mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER + lmg %r11,%r15,__PT_R11(%r11) + lpswe __LC_RETURN_PSW +sysc_done: + +# +# One of the work bits is on. Find out which one. +# +sysc_work: + tm __TI_flags+7(%r12),_TIF_MCCK_PENDING + jo sysc_mcck_pending + tm __TI_flags+7(%r12),_TIF_NEED_RESCHED + jo sysc_reschedule + tm __TI_flags+7(%r12),_TIF_SIGPENDING + jo sysc_sigpending + tm __TI_flags+7(%r12),_TIF_NOTIFY_RESUME + jo sysc_notify_resume + tm __TI_flags+7(%r12),_TIF_PER_TRAP + jo sysc_singlestep + j sysc_return # beware of critical section cleanup + +# +# _TIF_NEED_RESCHED is set, call schedule +# +sysc_reschedule: + larl %r14,sysc_return + jg schedule + +# +# _TIF_MCCK_PENDING is set, call handler +# +sysc_mcck_pending: + larl %r14,sysc_return + jg s390_handle_mcck # TIF bit will be cleared by handler + +# +# _TIF_SIGPENDING is set, call do_signal +# +sysc_sigpending: + ni __TI_flags+7(%r12),255-_TIF_PER_TRAP # clear TIF_PER_TRAP + lgr %r2,%r11 # pass pointer to pt_regs + brasl %r14,do_signal + tm __TI_flags+7(%r12),_TIF_SYSCALL + jno sysc_return + lmg %r2,%r7,__PT_R2(%r11) # load svc arguments + lghi %r8,0 # svc 0 returns -ENOSYS + lh %r1,__PT_INT_CODE+2(%r11) # load new svc number + cghi %r1,NR_syscalls + jnl sysc_nr_ok # invalid svc number -> do svc 0 + slag %r8,%r1,2 + j sysc_nr_ok # restart svc + +# +# _TIF_NOTIFY_RESUME is set, call do_notify_resume +# +sysc_notify_resume: + lgr %r2,%r11 # pass pointer to pt_regs + larl %r14,sysc_return + jg do_notify_resume + +# +# _TIF_PER_TRAP is set, call do_per_trap +# +sysc_singlestep: + ni __TI_flags+7(%r12),255-(_TIF_SYSCALL | _TIF_PER_TRAP) + lgr %r2,%r11 # pass pointer to pt_regs + larl %r14,sysc_return + jg do_per_trap + +# +# call tracehook_report_syscall_entry/tracehook_report_syscall_exit before +# and after the system call +# +sysc_tracesys: + lgr %r2,%r11 # pass pointer to pt_regs + la %r3,0 + llgh %r0,__PT_INT_CODE+2(%r11) + stg %r0,__PT_R2(%r11) + brasl %r14,do_syscall_trace_enter + lghi %r0,NR_syscalls + clgr %r0,%r2 + jnh sysc_tracenogo + sllg %r8,%r2,2 + lgf %r9,0(%r8,%r10) +sysc_tracego: + lmg %r3,%r7,__PT_R3(%r11) + stg %r7,STACK_FRAME_OVERHEAD(%r15) + lg %r2,__PT_ORIG_GPR2(%r11) + basr %r14,%r9 # call sys_xxx + stg %r2,__PT_R2(%r11) # store return value +sysc_tracenogo: + tm __TI_flags+6(%r12),_TIF_TRACE >> 8 + jz sysc_return + lgr %r2,%r11 # pass pointer to pt_regs + larl %r14,sysc_return + jg do_syscall_trace_exit + +# +# a new process exits the kernel with ret_from_fork +# +ENTRY(ret_from_fork) + la %r11,STACK_FRAME_OVERHEAD(%r15) + lg %r12,__LC_THREAD_INFO + tm __PT_PSW+1(%r11),0x01 # forking a kernel thread ? + jo 0f + stg %r15,__PT_R15(%r11) # store stack pointer for new kthread +0: brasl %r14,schedule_tail + TRACE_IRQS_ON + ssm __LC_SVC_NEW_PSW # reenable interrupts + j sysc_tracenogo + +# +# kernel_execve function needs to deal with pt_regs that is not +# at the usual place +# +ENTRY(kernel_execve) + stmg %r12,%r15,96(%r15) + lgr %r14,%r15 + aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) + stg %r14,__SF_BACKCHAIN(%r15) + la %r12,STACK_FRAME_OVERHEAD(%r15) + xc 0(__PT_SIZE,%r12),0(%r12) + lgr %r5,%r12 + brasl %r14,do_execve + ltgfr %r2,%r2 + je 0f + aghi %r15,(STACK_FRAME_OVERHEAD + __PT_SIZE) + lmg %r12,%r15,96(%r15) + br %r14 + # execve succeeded. +0: ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts + lg %r15,__LC_KERNEL_STACK # load ksp + aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) + la %r11,STACK_FRAME_OVERHEAD(%r15) + mvc 0(__PT_SIZE,%r11),0(%r12) # copy pt_regs + lg %r12,__LC_THREAD_INFO + xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) + ssm __LC_SVC_NEW_PSW # reenable interrupts + brasl %r14,execve_tail + j sysc_return + +/* + * Program check handler routine + */ + +ENTRY(pgm_check_handler) + stpt __LC_SYNC_ENTER_TIMER + stmg %r8,%r15,__LC_SAVE_AREA_SYNC + lg %r10,__LC_LAST_BREAK + lg %r12,__LC_THREAD_INFO + larl %r13,system_call + lmg %r8,%r9,__LC_PGM_OLD_PSW + HANDLE_SIE_INTERCEPT %r14 + tmhh %r8,0x0001 # test problem state bit + jnz 1f # -> fault in user space + tmhh %r8,0x4000 # PER bit set in old PSW ? + jnz 0f # -> enabled, can't be a double fault + tm __LC_PGM_ILC+3,0x80 # check for per exception + jnz pgm_svcper # -> single stepped svc +0: CHECK_STACK STACK_SIZE,__LC_SAVE_AREA_SYNC + j 2f +1: UPDATE_VTIME %r14,__LC_SYNC_ENTER_TIMER + LAST_BREAK %r14 + lg %r15,__LC_KERNEL_STACK +2: aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) + la %r11,STACK_FRAME_OVERHEAD(%r15) + stmg %r0,%r7,__PT_R0(%r11) + mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC + stmg %r8,%r9,__PT_PSW(%r11) + mvc __PT_INT_CODE(4,%r11),__LC_PGM_ILC + mvc __PT_INT_PARM_LONG(8,%r11),__LC_TRANS_EXC_CODE + stg %r10,__PT_ARGS(%r11) + tm __LC_PGM_ILC+3,0x80 # check for per exception + jz 0f + lg %r1,__TI_task(%r12) + tmhh %r8,0x0001 # kernel per event ? + jz pgm_kprobe + oi __TI_flags+7(%r12),_TIF_PER_TRAP + mvc __THREAD_per_address(8,%r1),__LC_PER_ADDRESS + mvc __THREAD_per_cause(2,%r1),__LC_PER_CAUSE + mvc __THREAD_per_paid(1,%r1),__LC_PER_PAID +0: REENABLE_IRQS + xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) + larl %r1,pgm_check_table + llgh %r10,__PT_INT_CODE+2(%r11) + nill %r10,0x007f + sll %r10,3 + je sysc_return + lg %r1,0(%r10,%r1) # load address of handler routine + lgr %r2,%r11 # pass pointer to pt_regs + basr %r14,%r1 # branch to interrupt-handler + j sysc_return + +# +# PER event in supervisor state, must be kprobes +# +pgm_kprobe: + REENABLE_IRQS + xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) + lgr %r2,%r11 # pass pointer to pt_regs + brasl %r14,do_per_trap + j sysc_return + +# +# single stepped system call +# +pgm_svcper: + oi __TI_flags+7(%r12),_TIF_PER_TRAP + mvc __LC_RETURN_PSW(8),__LC_SVC_NEW_PSW + larl %r14,sysc_per + stg %r14,__LC_RETURN_PSW+8 + lpswe __LC_RETURN_PSW # branch to sysc_per and enable irqs + +/* + * IO interrupt handler routine + */ +ENTRY(io_int_handler) + stck __LC_INT_CLOCK + stpt __LC_ASYNC_ENTER_TIMER + stmg %r8,%r15,__LC_SAVE_AREA_ASYNC + lg %r10,__LC_LAST_BREAK + lg %r12,__LC_THREAD_INFO + larl %r13,system_call + lmg %r8,%r9,__LC_IO_OLD_PSW + HANDLE_SIE_INTERCEPT %r14 + SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_STACK,STACK_SHIFT + tmhh %r8,0x0001 # interrupting from user? + jz io_skip + UPDATE_VTIME %r14,__LC_ASYNC_ENTER_TIMER + LAST_BREAK %r14 +io_skip: + stmg %r0,%r7,__PT_R0(%r11) + mvc __PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC + stmg %r8,%r9,__PT_PSW(%r11) + TRACE_IRQS_OFF + xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) + lgr %r2,%r11 # pass pointer to pt_regs + brasl %r14,do_IRQ +io_return: + LOCKDEP_SYS_EXIT + TRACE_IRQS_ON +io_tif: + tm __TI_flags+7(%r12),_TIF_WORK_INT + jnz io_work # there is work to do (signals etc.) +io_restore: + lg %r14,__LC_VDSO_PER_CPU + lmg %r0,%r10,__PT_R0(%r11) + mvc __LC_RETURN_PSW(16),__PT_PSW(%r11) + stpt __LC_EXIT_TIMER + mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER + lmg %r11,%r15,__PT_R11(%r11) + lpswe __LC_RETURN_PSW +io_done: + +# +# There is work todo, find out in which context we have been interrupted: +# 1) if we return to user space we can do all _TIF_WORK_INT work +# 2) if we return to kernel code and kvm is enabled check if we need to +# modify the psw to leave SIE +# 3) if we return to kernel code and preemptive scheduling is enabled check +# the preemption counter and if it is zero call preempt_schedule_irq +# Before any work can be done, a switch to the kernel stack is required. +# +io_work: + tm __PT_PSW+1(%r11),0x01 # returning to user ? + jo io_work_user # yes -> do resched & signal +#ifdef CONFIG_PREEMPT + # check for preemptive scheduling + icm %r0,15,__TI_precount(%r12) + jnz io_restore # preemption is disabled + tm __TI_flags+7(%r12),_TIF_NEED_RESCHED + jno io_restore + # switch to kernel stack + lg %r1,__PT_R15(%r11) + aghi %r1,-(STACK_FRAME_OVERHEAD + __PT_SIZE) + mvc STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11) + xc __SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1) + la %r11,STACK_FRAME_OVERHEAD(%r1) + lgr %r15,%r1 + # TRACE_IRQS_ON already done at io_return, call + # TRACE_IRQS_OFF to keep things symmetrical + TRACE_IRQS_OFF + brasl %r14,preempt_schedule_irq + j io_return +#else + j io_restore +#endif + +# +# Need to do work before returning to userspace, switch to kernel stack +# +io_work_user: + lg %r1,__LC_KERNEL_STACK + aghi %r1,-(STACK_FRAME_OVERHEAD + __PT_SIZE) + mvc STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11) + xc __SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1) + la %r11,STACK_FRAME_OVERHEAD(%r1) + lgr %r15,%r1 + +# +# One of the work bits is on. Find out which one. +# Checked are: _TIF_SIGPENDING, _TIF_NOTIFY_RESUME, _TIF_NEED_RESCHED +# and _TIF_MCCK_PENDING +# +io_work_tif: + tm __TI_flags+7(%r12),_TIF_MCCK_PENDING + jo io_mcck_pending + tm __TI_flags+7(%r12),_TIF_NEED_RESCHED + jo io_reschedule + tm __TI_flags+7(%r12),_TIF_SIGPENDING + jo io_sigpending + tm __TI_flags+7(%r12),_TIF_NOTIFY_RESUME + jo io_notify_resume + j io_return # beware of critical section cleanup + +# +# _TIF_MCCK_PENDING is set, call handler +# +io_mcck_pending: + # TRACE_IRQS_ON already done at io_return + brasl %r14,s390_handle_mcck # TIF bit will be cleared by handler + TRACE_IRQS_OFF + j io_return + +# +# _TIF_NEED_RESCHED is set, call schedule +# +io_reschedule: + # TRACE_IRQS_ON already done at io_return + ssm __LC_SVC_NEW_PSW # reenable interrupts + brasl %r14,schedule # call scheduler + ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts + TRACE_IRQS_OFF + j io_return + +# +# _TIF_SIGPENDING or is set, call do_signal +# +io_sigpending: + # TRACE_IRQS_ON already done at io_return + ssm __LC_SVC_NEW_PSW # reenable interrupts + lgr %r2,%r11 # pass pointer to pt_regs + brasl %r14,do_signal + ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts + TRACE_IRQS_OFF + j io_return + +# +# _TIF_NOTIFY_RESUME or is set, call do_notify_resume +# +io_notify_resume: + # TRACE_IRQS_ON already done at io_return + ssm __LC_SVC_NEW_PSW # reenable interrupts + lgr %r2,%r11 # pass pointer to pt_regs + brasl %r14,do_notify_resume + ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts + TRACE_IRQS_OFF + j io_return + +/* + * External interrupt handler routine + */ +ENTRY(ext_int_handler) + stck __LC_INT_CLOCK + stpt __LC_ASYNC_ENTER_TIMER + stmg %r8,%r15,__LC_SAVE_AREA_ASYNC + lg %r10,__LC_LAST_BREAK + lg %r12,__LC_THREAD_INFO + larl %r13,system_call + lmg %r8,%r9,__LC_EXT_OLD_PSW + HANDLE_SIE_INTERCEPT %r14 + SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_STACK,STACK_SHIFT + tmhh %r8,0x0001 # interrupting from user ? + jz ext_skip + UPDATE_VTIME %r14,__LC_ASYNC_ENTER_TIMER + LAST_BREAK %r14 +ext_skip: + stmg %r0,%r7,__PT_R0(%r11) + mvc __PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC + stmg %r8,%r9,__PT_PSW(%r11) + TRACE_IRQS_OFF + lghi %r1,4096 + lgr %r2,%r11 # pass pointer to pt_regs + llgf %r3,__LC_EXT_CPU_ADDR # get cpu address + interruption code + llgf %r4,__LC_EXT_PARAMS # get external parameter + lg %r5,__LC_EXT_PARAMS2-4096(%r1) # get 64 bit external parameter + brasl %r14,do_extint + j io_return + +/* + * Load idle PSW. The second "half" of this function is in cleanup_idle. + */ +ENTRY(psw_idle) + stg %r4,__SF_EMPTY(%r15) + larl %r1,psw_idle_lpsw+4 + stg %r1,__SF_EMPTY+8(%r15) + larl %r1,.Lvtimer_max + stck __IDLE_ENTER(%r2) + ltr %r5,%r5 + stpt __VQ_IDLE_ENTER(%r3) + jz psw_idle_lpsw + spt 0(%r1) +psw_idle_lpsw: + lpswe __SF_EMPTY(%r15) + br %r14 +psw_idle_end: + +__critical_end: + +/* + * Machine check handler routines + */ +ENTRY(mcck_int_handler) + stck __LC_MCCK_CLOCK + la %r1,4095 # revalidate r1 + spt __LC_CPU_TIMER_SAVE_AREA-4095(%r1) # revalidate cpu timer + lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1)# revalidate gprs + lg %r10,__LC_LAST_BREAK + lg %r12,__LC_THREAD_INFO + larl %r13,system_call + lmg %r8,%r9,__LC_MCK_OLD_PSW + HANDLE_SIE_INTERCEPT %r14 + tm __LC_MCCK_CODE,0x80 # system damage? + jo mcck_panic # yes -> rest of mcck code invalid + lghi %r14,__LC_CPU_TIMER_SAVE_AREA + mvc __LC_MCCK_ENTER_TIMER(8),0(%r14) + tm __LC_MCCK_CODE+5,0x02 # stored cpu timer value valid? + jo 3f + la %r14,__LC_SYNC_ENTER_TIMER + clc 0(8,%r14),__LC_ASYNC_ENTER_TIMER + jl 0f + la %r14,__LC_ASYNC_ENTER_TIMER +0: clc 0(8,%r14),__LC_EXIT_TIMER + jl 1f + la %r14,__LC_EXIT_TIMER +1: clc 0(8,%r14),__LC_LAST_UPDATE_TIMER + jl 2f + la %r14,__LC_LAST_UPDATE_TIMER +2: spt 0(%r14) + mvc __LC_MCCK_ENTER_TIMER(8),0(%r14) +3: tm __LC_MCCK_CODE+2,0x09 # mwp + ia of old psw valid? + jno mcck_panic # no -> skip cleanup critical + SWITCH_ASYNC __LC_GPREGS_SAVE_AREA+64,__LC_PANIC_STACK,PAGE_SHIFT + tm %r8,0x0001 # interrupting from user ? + jz mcck_skip + UPDATE_VTIME %r14,__LC_MCCK_ENTER_TIMER + LAST_BREAK %r14 +mcck_skip: + lghi %r14,__LC_GPREGS_SAVE_AREA + mvc __PT_R0(128,%r11),0(%r14) + stmg %r8,%r9,__PT_PSW(%r11) + xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) + lgr %r2,%r11 # pass pointer to pt_regs + brasl %r14,s390_do_machine_check + tm __PT_PSW+1(%r11),0x01 # returning to user ? + jno mcck_return + lg %r1,__LC_KERNEL_STACK # switch to kernel stack + aghi %r1,-(STACK_FRAME_OVERHEAD + __PT_SIZE) + mvc STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11) + xc __SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1) + la %r11,STACK_FRAME_OVERHEAD(%r1) + lgr %r15,%r1 + ssm __LC_PGM_NEW_PSW # turn dat on, keep irqs off + tm __TI_flags+7(%r12),_TIF_MCCK_PENDING + jno mcck_return + TRACE_IRQS_OFF + brasl %r14,s390_handle_mcck + TRACE_IRQS_ON +mcck_return: + lg %r14,__LC_VDSO_PER_CPU + lmg %r0,%r10,__PT_R0(%r11) + mvc __LC_RETURN_MCCK_PSW(16),__PT_PSW(%r11) # move return PSW + tm __LC_RETURN_MCCK_PSW+1,0x01 # returning to user ? + jno 0f + stpt __LC_EXIT_TIMER + mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER +0: lmg %r11,%r15,__PT_R11(%r11) + lpswe __LC_RETURN_MCCK_PSW + +mcck_panic: + lg %r14,__LC_PANIC_STACK + slgr %r14,%r15 + srag %r14,%r14,PAGE_SHIFT + jz 0f + lg %r15,__LC_PANIC_STACK +0: aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) + j mcck_skip + +# +# PSW restart interrupt handler +# +ENTRY(restart_int_handler) + stg %r15,__LC_SAVE_AREA_RESTART + lg %r15,__LC_RESTART_STACK + aghi %r15,-__PT_SIZE # create pt_regs on stack + xc 0(__PT_SIZE,%r15),0(%r15) + stmg %r0,%r14,__PT_R0(%r15) + mvc __PT_R15(8,%r15),__LC_SAVE_AREA_RESTART + mvc __PT_PSW(16,%r15),__LC_RST_OLD_PSW # store restart old psw + aghi %r15,-STACK_FRAME_OVERHEAD # create stack frame on stack + xc 0(STACK_FRAME_OVERHEAD,%r15),0(%r15) + lmg %r1,%r3,__LC_RESTART_FN # load fn, parm & source cpu + ltgr %r3,%r3 # test source cpu address + jm 1f # negative -> skip source stop +0: sigp %r4,%r3,1 # sigp sense to source cpu + brc 10,0b # wait for status stored +1: basr %r14,%r1 # call function + stap __SF_EMPTY(%r15) # store cpu address + llgh %r3,__SF_EMPTY(%r15) +2: sigp %r4,%r3,5 # sigp stop to current cpu + brc 2,2b +3: j 3b + + .section .kprobes.text, "ax" + +#ifdef CONFIG_CHECK_STACK +/* + * The synchronous or the asynchronous stack overflowed. We are dead. + * No need to properly save the registers, we are going to panic anyway. + * Setup a pt_regs so that show_trace can provide a good call trace. + */ +stack_overflow: + lg %r11,__LC_PANIC_STACK # change to panic stack + aghi %r11,-__PT_SIZE # create pt_regs + stmg %r0,%r7,__PT_R0(%r11) + stmg %r8,%r9,__PT_PSW(%r11) + mvc __PT_R8(64,%r11),0(%r14) + stg %r10,__PT_ORIG_GPR2(%r11) # store last break to orig_gpr2 + lgr %r15,%r11 + aghi %r15,-STACK_FRAME_OVERHEAD + xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) + lgr %r2,%r11 # pass pointer to pt_regs + jg kernel_stack_overflow +#endif + + .align 8 +cleanup_table: + .quad system_call + .quad sysc_do_svc + .quad sysc_tif + .quad sysc_restore + .quad sysc_done + .quad io_tif + .quad io_restore + .quad io_done + .quad psw_idle + .quad psw_idle_end + +cleanup_critical: + clg %r9,BASED(cleanup_table) # system_call + jl 0f + clg %r9,BASED(cleanup_table+8) # sysc_do_svc + jl cleanup_system_call + clg %r9,BASED(cleanup_table+16) # sysc_tif + jl 0f + clg %r9,BASED(cleanup_table+24) # sysc_restore + jl cleanup_sysc_tif + clg %r9,BASED(cleanup_table+32) # sysc_done + jl cleanup_sysc_restore + clg %r9,BASED(cleanup_table+40) # io_tif + jl 0f + clg %r9,BASED(cleanup_table+48) # io_restore + jl cleanup_io_tif + clg %r9,BASED(cleanup_table+56) # io_done + jl cleanup_io_restore + clg %r9,BASED(cleanup_table+64) # psw_idle + jl 0f + clg %r9,BASED(cleanup_table+72) # psw_idle_end + jl cleanup_idle +0: br %r14 + + +cleanup_system_call: + # check if stpt has been executed + clg %r9,BASED(cleanup_system_call_insn) + jh 0f + mvc __LC_SYNC_ENTER_TIMER(8),__LC_ASYNC_ENTER_TIMER + cghi %r11,__LC_SAVE_AREA_ASYNC + je 0f + mvc __LC_SYNC_ENTER_TIMER(8),__LC_MCCK_ENTER_TIMER +0: # check if stmg has been executed + clg %r9,BASED(cleanup_system_call_insn+8) + jh 0f + mvc __LC_SAVE_AREA_SYNC(64),0(%r11) +0: # check if base register setup + TIF bit load has been done + clg %r9,BASED(cleanup_system_call_insn+16) + jhe 0f + # set up saved registers r10 and r12 + stg %r10,16(%r11) # r10 last break + stg %r12,32(%r11) # r12 thread-info pointer +0: # check if the user time update has been done + clg %r9,BASED(cleanup_system_call_insn+24) + jh 0f + lg %r15,__LC_EXIT_TIMER + slg %r15,__LC_SYNC_ENTER_TIMER + alg %r15,__LC_USER_TIMER + stg %r15,__LC_USER_TIMER +0: # check if the system time update has been done + clg %r9,BASED(cleanup_system_call_insn+32) + jh 0f + lg %r15,__LC_LAST_UPDATE_TIMER + slg %r15,__LC_EXIT_TIMER + alg %r15,__LC_SYSTEM_TIMER + stg %r15,__LC_SYSTEM_TIMER +0: # update accounting time stamp + mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER + # do LAST_BREAK + lg %r9,16(%r11) + srag %r9,%r9,23 + jz 0f + mvc __TI_last_break(8,%r12),16(%r11) +0: # set up saved register r11 + lg %r15,__LC_KERNEL_STACK + aghi %r15,-__PT_SIZE + stg %r15,24(%r11) # r11 pt_regs pointer + # fill pt_regs + mvc __PT_R8(64,%r15),__LC_SAVE_AREA_SYNC + stmg %r0,%r7,__PT_R0(%r15) + mvc __PT_PSW(16,%r15),__LC_SVC_OLD_PSW + mvc __PT_INT_CODE(4,%r15),__LC_SVC_ILC + # setup saved register r15 + aghi %r15,-STACK_FRAME_OVERHEAD + stg %r15,56(%r11) # r15 stack pointer + # set new psw address and exit + larl %r9,sysc_do_svc + br %r14 +cleanup_system_call_insn: + .quad system_call + .quad sysc_stmg + .quad sysc_per + .quad sysc_vtime+18 + .quad sysc_vtime+42 + +cleanup_sysc_tif: + larl %r9,sysc_tif + br %r14 + +cleanup_sysc_restore: + clg %r9,BASED(cleanup_sysc_restore_insn) + je 0f + lg %r9,24(%r11) # get saved pointer to pt_regs + mvc __LC_RETURN_PSW(16),__PT_PSW(%r9) + mvc 0(64,%r11),__PT_R8(%r9) + lmg %r0,%r7,__PT_R0(%r9) +0: lmg %r8,%r9,__LC_RETURN_PSW + br %r14 +cleanup_sysc_restore_insn: + .quad sysc_done - 4 + +cleanup_io_tif: + larl %r9,io_tif + br %r14 + +cleanup_io_restore: + clg %r9,BASED(cleanup_io_restore_insn) + je 0f + lg %r9,24(%r11) # get saved r11 pointer to pt_regs + mvc __LC_RETURN_PSW(16),__PT_PSW(%r9) + mvc 0(64,%r11),__PT_R8(%r9) + lmg %r0,%r7,__PT_R0(%r9) +0: lmg %r8,%r9,__LC_RETURN_PSW + br %r14 +cleanup_io_restore_insn: + .quad io_done - 4 + +cleanup_idle: + # copy interrupt clock & cpu timer + mvc __IDLE_EXIT(8,%r2),__LC_INT_CLOCK + mvc __VQ_IDLE_EXIT(8,%r3),__LC_ASYNC_ENTER_TIMER + cghi %r11,__LC_SAVE_AREA_ASYNC + je 0f + mvc __IDLE_EXIT(8,%r2),__LC_MCCK_CLOCK + mvc __VQ_IDLE_EXIT(8,%r3),__LC_MCCK_ENTER_TIMER +0: # check if stck & stpt have been executed + clg %r9,BASED(cleanup_idle_insn) + jhe 1f + mvc __IDLE_ENTER(8,%r2),__IDLE_EXIT(%r2) + mvc __VQ_IDLE_ENTER(8,%r3),__VQ_IDLE_EXIT(%r3) + j 2f +1: # check if the cpu timer has been reprogrammed + ltr %r5,%r5 + jz 2f + spt __VQ_IDLE_ENTER(%r3) +2: # account system time going idle + lg %r9,__LC_STEAL_TIMER + alg %r9,__IDLE_ENTER(%r2) + slg %r9,__LC_LAST_UPDATE_CLOCK + stg %r9,__LC_STEAL_TIMER + mvc __LC_LAST_UPDATE_CLOCK(8),__IDLE_EXIT(%r2) + lg %r9,__LC_SYSTEM_TIMER + alg %r9,__LC_LAST_UPDATE_TIMER + slg %r9,__VQ_IDLE_ENTER(%r3) + stg %r9,__LC_SYSTEM_TIMER + mvc __LC_LAST_UPDATE_TIMER(8),__VQ_IDLE_EXIT(%r3) + # prepare return psw + nihh %r8,0xfffd # clear wait state bit + lg %r9,48(%r11) # return from psw_idle + br %r14 +cleanup_idle_insn: + .quad psw_idle_lpsw + +/* + * Integer constants + */ + .align 8 +.Lcritical_start: + .quad __critical_start +.Lcritical_length: + .quad __critical_end - __critical_start +.Lvtimer_max: + .quad 0x7fffffffffffffff + + +#if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE) +/* + * sie64a calling convention: + * %r2 pointer to sie control block + * %r3 guest register save area + */ +ENTRY(sie64a) + stmg %r6,%r14,__SF_GPRS(%r15) # save kernel registers + stg %r2,__SF_EMPTY(%r15) # save control block pointer + stg %r3,__SF_EMPTY+8(%r15) # save guest register save area + xc __SF_EMPTY+16(8,%r15),__SF_EMPTY+16(%r15) # host id == 0 + lmg %r0,%r13,0(%r3) # load guest gprs 0-13 + lg %r14,__LC_THREAD_INFO # pointer thread_info struct + oi __TI_flags+6(%r14),_TIF_SIE>>8 +sie_loop: + lg %r14,__LC_THREAD_INFO # pointer thread_info struct + tm __TI_flags+7(%r14),_TIF_EXIT_SIE + jnz sie_exit + lg %r14,__LC_GMAP # get gmap pointer + ltgr %r14,%r14 + jz sie_gmap + lctlg %c1,%c1,__GMAP_ASCE(%r14) # load primary asce +sie_gmap: + lg %r14,__SF_EMPTY(%r15) # get control block pointer + SPP __SF_EMPTY(%r15) # set guest id + sie 0(%r14) +sie_done: + SPP __SF_EMPTY+16(%r15) # set host id + lg %r14,__LC_THREAD_INFO # pointer thread_info struct +sie_exit: + lctlg %c1,%c1,__LC_USER_ASCE # load primary asce + ni __TI_flags+6(%r14),255-(_TIF_SIE>>8) + lg %r14,__SF_EMPTY+8(%r15) # load guest register save area + stmg %r0,%r13,0(%r14) # save guest gprs 0-13 + lmg %r6,%r14,__SF_GPRS(%r15) # restore kernel registers + lghi %r2,0 + br %r14 +sie_fault: + lctlg %c1,%c1,__LC_USER_ASCE # load primary asce + lg %r14,__LC_THREAD_INFO # pointer thread_info struct + ni __TI_flags+6(%r14),255-(_TIF_SIE>>8) + lg %r14,__SF_EMPTY+8(%r15) # load guest register save area + stmg %r0,%r13,0(%r14) # save guest gprs 0-13 + lmg %r6,%r14,__SF_GPRS(%r15) # restore kernel registers + lghi %r2,-EFAULT + br %r14 + + .align 8 +.Lsie_loop: + .quad sie_loop +.Lsie_length: + .quad sie_done - sie_loop +.Lhost_id: + .quad 0 + + .section __ex_table,"a" + .quad sie_loop,sie_fault + .previous +#endif + + .section .rodata, "a" +#define SYSCALL(esa,esame,emu) .long esame + .globl sys_call_table +sys_call_table: +#include "syscalls.S" +#undef SYSCALL + +#ifdef CONFIG_COMPAT + +#define SYSCALL(esa,esame,emu) .long emu +sys_call_table_emu: +#include "syscalls.S" +#undef SYSCALL +#endif diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c new file mode 100644 index 00000000..78bdf0e5 --- /dev/null +++ b/arch/s390/kernel/ftrace.c @@ -0,0 +1,198 @@ +/* + * Dynamic function tracer architecture backend. + * + * Copyright IBM Corp. 2009 + * + * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>, + * Martin Schwidefsky <schwidefsky@de.ibm.com> + */ + +#include <linux/hardirq.h> +#include <linux/uaccess.h> +#include <linux/ftrace.h> +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/kprobes.h> +#include <trace/syscall.h> +#include <asm/asm-offsets.h> + +#ifdef CONFIG_64BIT +#define MCOUNT_OFFSET_RET 12 +#else +#define MCOUNT_OFFSET_RET 22 +#endif + +#ifdef CONFIG_DYNAMIC_FTRACE + +void ftrace_disable_code(void); +void ftrace_enable_insn(void); + +#ifdef CONFIG_64BIT +/* + * The 64-bit mcount code looks like this: + * stg %r14,8(%r15) # offset 0 + * > larl %r1,<&counter> # offset 6 + * > brasl %r14,_mcount # offset 12 + * lg %r14,8(%r15) # offset 18 + * Total length is 24 bytes. The middle two instructions of the mcount + * block get overwritten by ftrace_make_nop / ftrace_make_call. + * The 64-bit enabled ftrace code block looks like this: + * stg %r14,8(%r15) # offset 0 + * > lg %r1,__LC_FTRACE_FUNC # offset 6 + * > lgr %r0,%r0 # offset 12 + * > basr %r14,%r1 # offset 16 + * lg %r14,8(%15) # offset 18 + * The return points of the mcount/ftrace function have the same offset 18. + * The 64-bit disable ftrace code block looks like this: + * stg %r14,8(%r15) # offset 0 + * > jg .+18 # offset 6 + * > lgr %r0,%r0 # offset 12 + * > basr %r14,%r1 # offset 16 + * lg %r14,8(%15) # offset 18 + * The jg instruction branches to offset 24 to skip as many instructions + * as possible. + */ +asm( + " .align 4\n" + "ftrace_disable_code:\n" + " jg 0f\n" + " lgr %r0,%r0\n" + " basr %r14,%r1\n" + "0:\n" + " .align 4\n" + "ftrace_enable_insn:\n" + " lg %r1,"__stringify(__LC_FTRACE_FUNC)"\n"); + +#define FTRACE_INSN_SIZE 6 + +#else /* CONFIG_64BIT */ +/* + * The 31-bit mcount code looks like this: + * st %r14,4(%r15) # offset 0 + * > bras %r1,0f # offset 4 + * > .long _mcount # offset 8 + * > .long <&counter> # offset 12 + * > 0: l %r14,0(%r1) # offset 16 + * > l %r1,4(%r1) # offset 20 + * basr %r14,%r14 # offset 24 + * l %r14,4(%r15) # offset 26 + * Total length is 30 bytes. The twenty bytes starting from offset 4 + * to offset 24 get overwritten by ftrace_make_nop / ftrace_make_call. + * The 31-bit enabled ftrace code block looks like this: + * st %r14,4(%r15) # offset 0 + * > l %r14,__LC_FTRACE_FUNC # offset 4 + * > j 0f # offset 8 + * > .fill 12,1,0x07 # offset 12 + * 0: basr %r14,%r14 # offset 24 + * l %r14,4(%r14) # offset 26 + * The return points of the mcount/ftrace function have the same offset 26. + * The 31-bit disabled ftrace code block looks like this: + * st %r14,4(%r15) # offset 0 + * > j .+26 # offset 4 + * > j 0f # offset 8 + * > .fill 12,1,0x07 # offset 12 + * 0: basr %r14,%r14 # offset 24 + * l %r14,4(%r14) # offset 26 + * The j instruction branches to offset 30 to skip as many instructions + * as possible. + */ +asm( + " .align 4\n" + "ftrace_disable_code:\n" + " j 1f\n" + " j 0f\n" + " .fill 12,1,0x07\n" + "0: basr %r14,%r14\n" + "1:\n" + " .align 4\n" + "ftrace_enable_insn:\n" + " l %r14,"__stringify(__LC_FTRACE_FUNC)"\n"); + +#define FTRACE_INSN_SIZE 4 + +#endif /* CONFIG_64BIT */ + + +int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, + unsigned long addr) +{ + if (probe_kernel_write((void *) rec->ip, ftrace_disable_code, + MCOUNT_INSN_SIZE)) + return -EPERM; + return 0; +} + +int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) +{ + if (probe_kernel_write((void *) rec->ip, ftrace_enable_insn, + FTRACE_INSN_SIZE)) + return -EPERM; + return 0; +} + +int ftrace_update_ftrace_func(ftrace_func_t func) +{ + return 0; +} + +int __init ftrace_dyn_arch_init(void *data) +{ + *(unsigned long *) data = 0; + return 0; +} + +#endif /* CONFIG_DYNAMIC_FTRACE */ + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +/* + * Hook the return address and push it in the stack of return addresses + * in current thread info. + */ +unsigned long __kprobes prepare_ftrace_return(unsigned long parent, + unsigned long ip) +{ + struct ftrace_graph_ent trace; + + if (unlikely(atomic_read(¤t->tracing_graph_pause))) + goto out; + if (ftrace_push_return_trace(parent, ip, &trace.depth, 0) == -EBUSY) + goto out; + trace.func = (ip & PSW_ADDR_INSN) - MCOUNT_OFFSET_RET; + /* Only trace if the calling function expects to. */ + if (!ftrace_graph_entry(&trace)) { + current->curr_ret_stack--; + goto out; + } + parent = (unsigned long) return_to_handler; +out: + return parent; +} + +#ifdef CONFIG_DYNAMIC_FTRACE +/* + * Patch the kernel code at ftrace_graph_caller location. The instruction + * there is branch relative and save to prepare_ftrace_return. To disable + * the call to prepare_ftrace_return we patch the bras offset to point + * directly after the instructions. To enable the call we calculate + * the original offset to prepare_ftrace_return and put it back. + */ +int ftrace_enable_ftrace_graph_caller(void) +{ + unsigned short offset; + + offset = ((void *) prepare_ftrace_return - + (void *) ftrace_graph_caller) / 2; + return probe_kernel_write(ftrace_graph_caller + 2, + &offset, sizeof(offset)); +} + +int ftrace_disable_ftrace_graph_caller(void) +{ + static unsigned short offset = 0x0002; + + return probe_kernel_write(ftrace_graph_caller + 2, + &offset, sizeof(offset)); +} + +#endif /* CONFIG_DYNAMIC_FTRACE */ +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/s390/kernel/head.S b/arch/s390/kernel/head.S new file mode 100644 index 00000000..adccd908 --- /dev/null +++ b/arch/s390/kernel/head.S @@ -0,0 +1,571 @@ +/* + * Copyright IBM Corp. 1999,2010 + * + * Author(s): Hartmut Penner <hp@de.ibm.com> + * Martin Schwidefsky <schwidefsky@de.ibm.com> + * Rob van der Heij <rvdhei@iae.nl> + * Heiko Carstens <heiko.carstens@de.ibm.com> + * + * There are 5 different IPL methods + * 1) load the image directly into ram at address 0 and do an PSW restart + * 2) linload will load the image from address 0x10000 to memory 0x10000 + * and start the code thru LPSW 0x0008000080010000 (VM only, deprecated) + * 3) generate the tape ipl header, store the generated image on a tape + * and ipl from it + * In case of SL tape you need to IPL 5 times to get past VOL1 etc + * 4) generate the vm reader ipl header, move the generated image to the + * VM reader (use option NOH!) and do a ipl from reader (VM only) + * 5) direct call of start by the SALIPL loader + * We use the cpuid to distinguish between VM and native ipl + * params for kernel are pushed to 0x10400 (see setup.h) + * + */ + +#include <linux/init.h> +#include <linux/linkage.h> +#include <asm/asm-offsets.h> +#include <asm/thread_info.h> +#include <asm/page.h> + +#ifdef CONFIG_64BIT +#define ARCH_OFFSET 4 +#else +#define ARCH_OFFSET 0 +#endif + +__HEAD +#ifndef CONFIG_IPL + .org 0 + .long 0x00080000,0x80000000+startup # Just a restart PSW +#else +#ifdef CONFIG_IPL_TAPE +#define IPL_BS 1024 + .org 0 + .long 0x00080000,0x80000000+iplstart # The first 24 bytes are loaded + .long 0x27000000,0x60000001 # by ipl to addresses 0-23. + .long 0x02000000,0x20000000+IPL_BS # (a PSW and two CCWs). + .long 0x00000000,0x00000000 # external old psw + .long 0x00000000,0x00000000 # svc old psw + .long 0x00000000,0x00000000 # program check old psw + .long 0x00000000,0x00000000 # machine check old psw + .long 0x00000000,0x00000000 # io old psw + .long 0x00000000,0x00000000 + .long 0x00000000,0x00000000 + .long 0x00000000,0x00000000 + .long 0x000a0000,0x00000058 # external new psw + .long 0x000a0000,0x00000060 # svc new psw + .long 0x000a0000,0x00000068 # program check new psw + .long 0x000a0000,0x00000070 # machine check new psw + .long 0x00080000,0x80000000+.Lioint # io new psw + + .org 0x100 +# +# subroutine for loading from tape +# Parameters: +# R1 = device number +# R2 = load address +.Lloader: + st %r14,.Lldret + la %r3,.Lorbread # r3 = address of orb + la %r5,.Lirb # r5 = address of irb + st %r2,.Lccwread+4 # initialize CCW data addresses + lctl %c6,%c6,.Lcr6 + slr %r2,%r2 +.Lldlp: + la %r6,3 # 3 retries +.Lssch: + ssch 0(%r3) # load chunk of IPL_BS bytes + bnz .Llderr +.Lw4end: + bas %r14,.Lwait4io + tm 8(%r5),0x82 # do we have a problem ? + bnz .Lrecov + slr %r7,%r7 + icm %r7,3,10(%r5) # get residual count + lcr %r7,%r7 + la %r7,IPL_BS(%r7) # IPL_BS-residual=#bytes read + ar %r2,%r7 # add to total size + tm 8(%r5),0x01 # found a tape mark ? + bnz .Ldone + l %r0,.Lccwread+4 # update CCW data addresses + ar %r0,%r7 + st %r0,.Lccwread+4 + b .Lldlp +.Ldone: + l %r14,.Lldret + br %r14 # r2 contains the total size +.Lrecov: + bas %r14,.Lsense # do the sensing + bct %r6,.Lssch # dec. retry count & branch + b .Llderr +# +# Sense subroutine +# +.Lsense: + st %r14,.Lsnsret + la %r7,.Lorbsense + ssch 0(%r7) # start sense command + bnz .Llderr + bas %r14,.Lwait4io + l %r14,.Lsnsret + tm 8(%r5),0x82 # do we have a problem ? + bnz .Llderr + br %r14 +# +# Wait for interrupt subroutine +# +.Lwait4io: + lpsw .Lwaitpsw +.Lioint: + c %r1,0xb8 # compare subchannel number + bne .Lwait4io + tsch 0(%r5) + slr %r0,%r0 + tm 8(%r5),0x82 # do we have a problem ? + bnz .Lwtexit + tm 8(%r5),0x04 # got device end ? + bz .Lwait4io +.Lwtexit: + br %r14 +.Llderr: + lpsw .Lcrash + + .align 8 +.Lorbread: + .long 0x00000000,0x0080ff00,.Lccwread + .align 8 +.Lorbsense: + .long 0x00000000,0x0080ff00,.Lccwsense + .align 8 +.Lccwread: + .long 0x02200000+IPL_BS,0x00000000 +.Lccwsense: + .long 0x04200001,0x00000000 +.Lwaitpsw: + .long 0x020a0000,0x80000000+.Lioint + +.Lirb: .long 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +.Lcr6: .long 0xff000000 + .align 8 +.Lcrash:.long 0x000a0000,0x00000000 +.Lldret:.long 0 +.Lsnsret: .long 0 +#endif /* CONFIG_IPL_TAPE */ + +#ifdef CONFIG_IPL_VM +#define IPL_BS 0x730 + .org 0 + .long 0x00080000,0x80000000+iplstart # The first 24 bytes are loaded + .long 0x02000018,0x60000050 # by ipl to addresses 0-23. + .long 0x02000068,0x60000050 # (a PSW and two CCWs). + .fill 80-24,1,0x40 # bytes 24-79 are discarded !! + .long 0x020000f0,0x60000050 # The next 160 byte are loaded + .long 0x02000140,0x60000050 # to addresses 0x18-0xb7 + .long 0x02000190,0x60000050 # They form the continuation + .long 0x020001e0,0x60000050 # of the CCW program started + .long 0x02000230,0x60000050 # by ipl and load the range + .long 0x02000280,0x60000050 # 0x0f0-0x730 from the image + .long 0x020002d0,0x60000050 # to the range 0x0f0-0x730 + .long 0x02000320,0x60000050 # in memory. At the end of + .long 0x02000370,0x60000050 # the channel program the PSW + .long 0x020003c0,0x60000050 # at location 0 is loaded. + .long 0x02000410,0x60000050 # Initial processing starts + .long 0x02000460,0x60000050 # at 0xf0 = iplstart. + .long 0x020004b0,0x60000050 + .long 0x02000500,0x60000050 + .long 0x02000550,0x60000050 + .long 0x020005a0,0x60000050 + .long 0x020005f0,0x60000050 + .long 0x02000640,0x60000050 + .long 0x02000690,0x60000050 + .long 0x020006e0,0x20000050 + + .org 0xf0 +# +# subroutine for loading cards from the reader +# +.Lloader: + la %r3,.Lorb # r2 = address of orb into r2 + la %r5,.Lirb # r4 = address of irb + la %r6,.Lccws + la %r7,20 +.Linit: + st %r2,4(%r6) # initialize CCW data addresses + la %r2,0x50(%r2) + la %r6,8(%r6) + bct 7,.Linit + + lctl %c6,%c6,.Lcr6 # set IO subclass mask + slr %r2,%r2 +.Lldlp: + ssch 0(%r3) # load chunk of 1600 bytes + bnz .Llderr +.Lwait4irq: + mvc 0x78(8),.Lnewpsw # set up IO interrupt psw + lpsw .Lwaitpsw +.Lioint: + c %r1,0xb8 # compare subchannel number + bne .Lwait4irq + tsch 0(%r5) + + slr %r0,%r0 + ic %r0,8(%r5) # get device status + chi %r0,8 # channel end ? + be .Lcont + chi %r0,12 # channel end + device end ? + be .Lcont + + l %r0,4(%r5) + s %r0,8(%r3) # r0/8 = number of ccws executed + mhi %r0,10 # *10 = number of bytes in ccws + lh %r3,10(%r5) # get residual count + sr %r0,%r3 # #ccws*80-residual=#bytes read + ar %r2,%r0 + + br %r14 # r2 contains the total size + +.Lcont: + ahi %r2,0x640 # add 0x640 to total size + la %r6,.Lccws + la %r7,20 +.Lincr: + l %r0,4(%r6) # update CCW data addresses + ahi %r0,0x640 + st %r0,4(%r6) + ahi %r6,8 + bct 7,.Lincr + + b .Lldlp +.Llderr: + lpsw .Lcrash + + .align 8 +.Lorb: .long 0x00000000,0x0080ff00,.Lccws +.Lirb: .long 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +.Lcr6: .long 0xff000000 +.Lloadp:.long 0,0 + .align 8 +.Lcrash:.long 0x000a0000,0x00000000 +.Lnewpsw: + .long 0x00080000,0x80000000+.Lioint +.Lwaitpsw: + .long 0x020a0000,0x80000000+.Lioint + + .align 8 +.Lccws: .rept 19 + .long 0x02600050,0x00000000 + .endr + .long 0x02200050,0x00000000 +#endif /* CONFIG_IPL_VM */ + +iplstart: + lh %r1,0xb8 # test if subchannel number + bct %r1,.Lnoload # is valid + l %r1,0xb8 # load ipl subchannel number + la %r2,IPL_BS # load start address + bas %r14,.Lloader # load rest of ipl image + l %r12,.Lparm # pointer to parameter area + st %r1,IPL_DEVICE+ARCH_OFFSET-PARMAREA(%r12) # save ipl device number + +# +# load parameter file from ipl device +# +.Lagain1: + l %r2,.Linitrd # ramdisk loc. is temp + bas %r14,.Lloader # load parameter file + ltr %r2,%r2 # got anything ? + bz .Lnopf + chi %r2,895 + bnh .Lnotrunc + la %r2,895 +.Lnotrunc: + l %r4,.Linitrd + clc 0(3,%r4),.L_hdr # if it is HDRx + bz .Lagain1 # skip dataset header + clc 0(3,%r4),.L_eof # if it is EOFx + bz .Lagain1 # skip dateset trailer + la %r5,0(%r4,%r2) + lr %r3,%r2 + la %r3,COMMAND_LINE-PARMAREA(%r12) # load adr. of command line + mvc 0(256,%r3),0(%r4) + mvc 256(256,%r3),256(%r4) + mvc 512(256,%r3),512(%r4) + mvc 768(122,%r3),768(%r4) + slr %r0,%r0 + b .Lcntlp +.Ldelspc: + ic %r0,0(%r2,%r3) + chi %r0,0x20 # is it a space ? + be .Lcntlp + ahi %r2,1 + b .Leolp +.Lcntlp: + brct %r2,.Ldelspc +.Leolp: + slr %r0,%r0 + stc %r0,0(%r2,%r3) # terminate buffer +.Lnopf: + +# +# load ramdisk from ipl device +# +.Lagain2: + l %r2,.Linitrd # addr of ramdisk + st %r2,INITRD_START+ARCH_OFFSET-PARMAREA(%r12) + bas %r14,.Lloader # load ramdisk + st %r2,INITRD_SIZE+ARCH_OFFSET-PARMAREA(%r12) # store size of rd + ltr %r2,%r2 + bnz .Lrdcont + st %r2,INITRD_START+ARCH_OFFSET-PARMAREA(%r12) # no ramdisk found +.Lrdcont: + l %r2,.Linitrd + + clc 0(3,%r2),.L_hdr # skip HDRx and EOFx + bz .Lagain2 + clc 0(3,%r2),.L_eof + bz .Lagain2 + +#ifdef CONFIG_IPL_VM +# +# reset files in VM reader +# + stidp __LC_SAVE_AREA_SYNC # store cpuid + tm __LC_SAVE_AREA_SYNC,0xff# running VM ? + bno .Lnoreset + la %r2,.Lreset + lhi %r3,26 + diag %r2,%r3,8 + la %r5,.Lirb + stsch 0(%r5) # check if irq is pending + tm 30(%r5),0x0f # by verifying if any of the + bnz .Lwaitforirq # activity or status control + tm 31(%r5),0xff # bits is set in the schib + bz .Lnoreset +.Lwaitforirq: + mvc 0x78(8),.Lrdrnewpsw # set up IO interrupt psw +.Lwaitrdrirq: + lpsw .Lrdrwaitpsw +.Lrdrint: + c %r1,0xb8 # compare subchannel number + bne .Lwaitrdrirq + la %r5,.Lirb + tsch 0(%r5) +.Lnoreset: + b .Lnoload + + .align 8 +.Lrdrnewpsw: + .long 0x00080000,0x80000000+.Lrdrint +.Lrdrwaitpsw: + .long 0x020a0000,0x80000000+.Lrdrint +#endif + +# +# everything loaded, go for it +# +.Lnoload: + l %r1,.Lstartup + br %r1 + +.Linitrd:.long _end # default address of initrd +.Lparm: .long PARMAREA +.Lstartup: .long startup +.Lreset:.byte 0xc3,0xc8,0xc1,0xd5,0xc7,0xc5,0x40,0xd9,0xc4,0xd9,0x40 + .byte 0xc1,0xd3,0xd3,0x40,0xd2,0xc5,0xc5,0xd7,0x40,0xd5,0xd6 + .byte 0xc8,0xd6,0xd3,0xc4 # "change rdr all keep nohold" +.L_eof: .long 0xc5d6c600 /* C'EOF' */ +.L_hdr: .long 0xc8c4d900 /* C'HDR' */ + +#endif /* CONFIG_IPL */ + +# +# SALIPL loader support. Based on a patch by Rob van der Heij. +# This entry point is called directly from the SALIPL loader and +# doesn't need a builtin ipl record. +# + .org 0x800 +ENTRY(start) + stm %r0,%r15,0x07b0 # store registers + basr %r12,%r0 +.base: + l %r11,.parm + l %r8,.cmd # pointer to command buffer + + ltr %r9,%r9 # do we have SALIPL parameters? + bp .sk8x8 + + mvc 0(64,%r8),0x00b0 # copy saved registers + xc 64(240-64,%r8),0(%r8) # remainder of buffer + tr 0(64,%r8),.lowcase + b .gotr +.sk8x8: + mvc 0(240,%r8),0(%r9) # copy iplparms into buffer +.gotr: + slr %r0,%r0 + st %r0,INITRD_SIZE+ARCH_OFFSET-PARMAREA(%r11) + st %r0,INITRD_START+ARCH_OFFSET-PARMAREA(%r11) + j startup # continue with startup +.cmd: .long COMMAND_LINE # address of command line buffer +.parm: .long PARMAREA +.lowcase: + .byte 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07 + .byte 0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f + .byte 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17 + .byte 0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f + .byte 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27 + .byte 0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f + .byte 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37 + .byte 0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f + .byte 0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47 + .byte 0x48,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f + .byte 0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57 + .byte 0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f + .byte 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67 + .byte 0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f + .byte 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77 + .byte 0x78,0x79,0x7a,0x7b,0x7c,0x7d,0x7e,0x7f + + .byte 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87 + .byte 0x88,0x89,0x8a,0x8b,0x8c,0x8d,0x8e,0x8f + .byte 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97 + .byte 0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9f + .byte 0xa0,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7 + .byte 0xa8,0xa9,0xaa,0xab,0xac,0xad,0xae,0xaf + .byte 0xb0,0xb1,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7 + .byte 0xb8,0xb9,0xba,0xbb,0xbc,0xbd,0xbe,0xbf + .byte 0xc0,0x81,0x82,0x83,0x84,0x85,0x86,0x87 # .abcdefg + .byte 0x88,0x89,0xca,0xcb,0xcc,0xcd,0xce,0xcf # hi + .byte 0xd0,0x91,0x92,0x93,0x94,0x95,0x96,0x97 # .jklmnop + .byte 0x98,0x99,0xda,0xdb,0xdc,0xdd,0xde,0xdf # qr + .byte 0xe0,0xe1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7 # ..stuvwx + .byte 0xa8,0xa9,0xea,0xeb,0xec,0xed,0xee,0xef # yz + .byte 0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7 + .byte 0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xff + +# +# startup-code at 0x10000, running in absolute addressing mode +# this is called either by the ipl loader or directly by PSW restart +# or linload or SALIPL +# + .org 0x10000 +ENTRY(startup) + j .Lep_startup_normal + .org 0x10008 +# +# This is a list of s390 kernel entry points. At address 0x1000f the number of +# valid entry points is stored. +# +# IMPORTANT: Do not change this table, it is s390 kernel ABI! +# + .ascii "S390EP" + .byte 0x00,0x01 +# +# kdump startup-code at 0x10010, running in 64 bit absolute addressing mode +# + .org 0x10010 +ENTRY(startup_kdump) + j .Lep_startup_kdump +.Lep_startup_normal: + basr %r13,0 # get base +.LPG0: + xc 0x200(256),0x200 # partially clear lowcore + xc 0x300(256),0x300 + xc 0xe00(256),0xe00 + stck __LC_LAST_UPDATE_CLOCK + spt 5f-.LPG0(%r13) + mvc __LC_LAST_UPDATE_TIMER(8),5f-.LPG0(%r13) + xc __LC_STFL_FAC_LIST(8),__LC_STFL_FAC_LIST +#ifndef CONFIG_MARCH_G5 + # check capabilities against MARCH_{G5,Z900,Z990,Z9_109,Z10} + .insn s,0xb2b10000,__LC_STFL_FAC_LIST # store facility list + tm __LC_STFL_FAC_LIST,0x01 # stfle available ? + jz 0f + la %r0,0 + .insn s,0xb2b00000,__LC_STFL_FAC_LIST # store facility list extended +0: l %r0,__LC_STFL_FAC_LIST + n %r0,2f+8-.LPG0(%r13) + cl %r0,2f+8-.LPG0(%r13) + jne 1f + l %r0,__LC_STFL_FAC_LIST+4 + n %r0,2f+12-.LPG0(%r13) + cl %r0,2f+12-.LPG0(%r13) + je 3f +1: l %r15,.Lstack-.LPG0(%r13) + ahi %r15,-96 + la %r2,.Lals_string-.LPG0(%r13) + l %r3,.Lsclp_print-.LPG0(%r13) + basr %r14,%r3 + lpsw 2f-.LPG0(%r13) # machine type not good enough, crash +.Lals_string: + .asciz "The Linux kernel requires more recent processor hardware" +.Lsclp_print: + .long _sclp_print_early +.Lstack: + .long 0x8000 + (1<<(PAGE_SHIFT+THREAD_ORDER)) + .align 16 +2: .long 0x000a0000,0x8badcccc +#if defined(CONFIG_64BIT) +#if defined(CONFIG_MARCH_Z196) + .long 0xc100efe3, 0xf46c0000 +#elif defined(CONFIG_MARCH_Z10) + .long 0xc100efe3, 0xf0680000 +#elif defined(CONFIG_MARCH_Z9_109) + .long 0xc100efc3, 0x00000000 +#elif defined(CONFIG_MARCH_Z990) + .long 0xc0002000, 0x00000000 +#elif defined(CONFIG_MARCH_Z900) + .long 0xc0000000, 0x00000000 +#endif +#else +#if defined(CONFIG_MARCH_Z196) + .long 0x8100c880, 0x00000000 +#elif defined(CONFIG_MARCH_Z10) + .long 0x8100c880, 0x00000000 +#elif defined(CONFIG_MARCH_Z9_109) + .long 0x8100c880, 0x00000000 +#elif defined(CONFIG_MARCH_Z990) + .long 0x80002000, 0x00000000 +#elif defined(CONFIG_MARCH_Z900) + .long 0x80000000, 0x00000000 +#endif +#endif +3: +#endif + +#ifdef CONFIG_64BIT + mvi __LC_AR_MODE_ID,1 # set esame flag + slr %r0,%r0 # set cpuid to zero + lhi %r1,2 # mode 2 = esame (dump) + sigp %r1,%r0,0x12 # switch to esame mode + sam64 # switch to 64 bit mode + larl %r13,4f + lmh %r0,%r15,0(%r13) # clear high-order half + jg startup_continue +4: .fill 16,4,0x0 +#else + mvi __LC_AR_MODE_ID,0 # set ESA flag (mode 0) + l %r13,4f-.LPG0(%r13) + b 0(%r13) + .align 8 +4: .long startup_continue +#endif + .align 8 +5: .long 0x7fffffff,0xffffffff + +#include "head_kdump.S" + +# +# params at 10400 (setup.h) +# + .org PARMAREA + .long 0,0 # IPL_DEVICE + .long 0,0 # INITRD_START + .long 0,0 # INITRD_SIZE + .long 0,0 # OLDMEM_BASE + .long 0,0 # OLDMEM_SIZE + + .org COMMAND_LINE + .byte "root=/dev/ram0 ro" + .byte 0 + + .org 0x11000 diff --git a/arch/s390/kernel/head31.S b/arch/s390/kernel/head31.S new file mode 100644 index 00000000..d3f1ab7d --- /dev/null +++ b/arch/s390/kernel/head31.S @@ -0,0 +1,112 @@ +/* + * arch/s390/kernel/head31.S + * + * Copyright (C) IBM Corp. 2005,2010 + * + * Author(s): Hartmut Penner <hp@de.ibm.com> + * Martin Schwidefsky <schwidefsky@de.ibm.com> + * Rob van der Heij <rvdhei@iae.nl> + * Heiko Carstens <heiko.carstens@de.ibm.com> + * + */ + +#include <linux/init.h> +#include <linux/linkage.h> +#include <asm/asm-offsets.h> +#include <asm/thread_info.h> +#include <asm/page.h> + +__HEAD +ENTRY(startup_continue) + basr %r13,0 # get base +.LPG1: + + l %r1,.Lbase_cc-.LPG1(%r13) + mvc 0(8,%r1),__LC_LAST_UPDATE_CLOCK + lctl %c0,%c15,.Lctl-.LPG1(%r13) # load control registers + l %r12,.Lparmaddr-.LPG1(%r13) # pointer to parameter area + # move IPL device to lowcore +# +# Setup stack +# + l %r15,.Linittu-.LPG1(%r13) + st %r15,__LC_THREAD_INFO # cache thread info in lowcore + mvc __LC_CURRENT(4),__TI_task(%r15) + ahi %r15,1<<(PAGE_SHIFT+THREAD_ORDER) # init_task_union+THREAD_SIZE + st %r15,__LC_KERNEL_STACK # set end of kernel stack + ahi %r15,-96 +# +# Save ipl parameters, clear bss memory, initialize storage key for kernel pages, +# and create a kernel NSS if the SAVESYS= parm is defined +# + l %r14,.Lstartup_init-.LPG1(%r13) + basr %r14,%r14 + lpsw .Lentry-.LPG1(13) # jump to _stext in primary-space, + # virtual and never return ... + .align 8 +.Lentry:.long 0x00080000,0x80000000 + _stext +.Lctl: .long 0x04b50000 # cr0: various things + .long 0 # cr1: primary space segment table + .long .Lduct # cr2: dispatchable unit control table + .long 0 # cr3: instruction authorization + .long 0 # cr4: instruction authorization + .long .Lduct # cr5: primary-aste origin + .long 0 # cr6: I/O interrupts + .long 0 # cr7: secondary space segment table + .long 0 # cr8: access registers translation + .long 0 # cr9: tracing off + .long 0 # cr10: tracing off + .long 0 # cr11: tracing off + .long 0 # cr12: tracing off + .long 0 # cr13: home space segment table + .long 0xc0000000 # cr14: machine check handling off + .long 0 # cr15: linkage stack operations +.Lmchunk:.long memory_chunk +.Lbss_bgn: .long __bss_start +.Lbss_end: .long _end +.Lparmaddr: .long PARMAREA +.Linittu: .long init_thread_union +.Lstartup_init: + .long startup_init + .align 64 +.Lduct: .long 0,0,0,0,.Lduald,0,0,0 + .long 0,0,0,0,0,0,0,0 + .align 128 +.Lduald:.rept 8 + .long 0x80000000,0,0,0 # invalid access-list entries + .endr +.Lbase_cc: + .long sched_clock_base_cc + +ENTRY(_ehead) + +#ifdef CONFIG_SHARED_KERNEL + .org 0x100000 - 0x11000 # head.o ends at 0x11000 +#endif + +# +# startup-code, running in absolute addressing mode +# +ENTRY(_stext) + basr %r13,0 # get base +.LPG3: +# check control registers + stctl %c0,%c15,0(%r15) + oi 2(%r15),0x60 # enable sigp emergency & external call + oi 0(%r15),0x10 # switch on low address protection + lctl %c0,%c15,0(%r15) + +# + lam 0,15,.Laregs-.LPG3(%r13) # load access regs needed by uaccess + l %r14,.Lstart-.LPG3(%r13) + basr %r14,%r14 # call start_kernel +# +# We returned from start_kernel ?!? PANIK +# + basr %r13,0 + lpsw .Ldw-.(%r13) # load disabled wait psw +# + .align 8 +.Ldw: .long 0x000a0000,0x00000000 +.Lstart:.long start_kernel +.Laregs:.long 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S new file mode 100644 index 00000000..99348c0e --- /dev/null +++ b/arch/s390/kernel/head64.S @@ -0,0 +1,107 @@ +/* + * arch/s390/kernel/head64.S + * + * Copyright (C) IBM Corp. 1999,2010 + * + * Author(s): Hartmut Penner <hp@de.ibm.com> + * Martin Schwidefsky <schwidefsky@de.ibm.com> + * Rob van der Heij <rvdhei@iae.nl> + * Heiko Carstens <heiko.carstens@de.ibm.com> + * + */ + +#include <linux/init.h> +#include <linux/linkage.h> +#include <asm/asm-offsets.h> +#include <asm/thread_info.h> +#include <asm/page.h> + +__HEAD +ENTRY(startup_continue) + larl %r1,sched_clock_base_cc + mvc 0(8,%r1),__LC_LAST_UPDATE_CLOCK + larl %r13,.LPG1 # get base + lctlg %c0,%c15,.Lctl-.LPG1(%r13) # load control registers + lg %r12,.Lparmaddr-.LPG1(%r13) # pointer to parameter area + # move IPL device to lowcore + lghi %r0,__LC_PASTE + stg %r0,__LC_VDSO_PER_CPU +# +# Setup stack +# + larl %r15,init_thread_union + stg %r15,__LC_THREAD_INFO # cache thread info in lowcore + lg %r14,__TI_task(%r15) # cache current in lowcore + stg %r14,__LC_CURRENT + aghi %r15,1<<(PAGE_SHIFT+THREAD_ORDER) # init_task_union + THREAD_SIZE + stg %r15,__LC_KERNEL_STACK # set end of kernel stack + aghi %r15,-160 +# +# Save ipl parameters, clear bss memory, initialize storage key for kernel pages, +# and create a kernel NSS if the SAVESYS= parm is defined +# + brasl %r14,startup_init + lpswe .Lentry-.LPG1(13) # jump to _stext in primary-space, + # virtual and never return ... + .align 16 +.LPG1: +.Lentry:.quad 0x0000000180000000,_stext +.Lctl: .quad 0x04040000 # cr0: AFP registers & secondary space + .quad 0 # cr1: primary space segment table + .quad .Lduct # cr2: dispatchable unit control table + .quad 0 # cr3: instruction authorization + .quad 0 # cr4: instruction authorization + .quad .Lduct # cr5: primary-aste origin + .quad 0 # cr6: I/O interrupts + .quad 0 # cr7: secondary space segment table + .quad 0 # cr8: access registers translation + .quad 0 # cr9: tracing off + .quad 0 # cr10: tracing off + .quad 0 # cr11: tracing off + .quad 0 # cr12: tracing off + .quad 0 # cr13: home space segment table + .quad 0xc0000000 # cr14: machine check handling off + .quad 0 # cr15: linkage stack operations +.Lpcmsk:.quad 0x0000000180000000 +.L4malign:.quad 0xffffffffffc00000 +.Lscan2g:.quad 0x80000000 + 0x20000 - 8 # 2GB + 128K - 8 +.Lnop: .long 0x07000700 +.Lparmaddr: + .quad PARMAREA + .align 64 +.Lduct: .long 0,0,0,0,.Lduald,0,0,0 + .long 0,0,0,0,0,0,0,0 + .align 128 +.Lduald:.rept 8 + .long 0x80000000,0,0,0 # invalid access-list entries + .endr + +ENTRY(_ehead) + +#ifdef CONFIG_SHARED_KERNEL + .org 0x100000 - 0x11000 # head.o ends at 0x11000 +#endif + +# +# startup-code, running in absolute addressing mode +# +ENTRY(_stext) + basr %r13,0 # get base +.LPG3: +# check control registers + stctg %c0,%c15,0(%r15) + oi 6(%r15),0x60 # enable sigp emergency & external call + oi 4(%r15),0x10 # switch on low address proctection + lctlg %c0,%c15,0(%r15) + + lam 0,15,.Laregs-.LPG3(%r13) # load acrs needed by uaccess + brasl %r14,start_kernel # go to C code +# +# We returned from start_kernel ?!? PANIK +# + basr %r13,0 + lpswe .Ldw-.(%r13) # load disabled wait psw + + .align 8 +.Ldw: .quad 0x0002000180000000,0x0000000000000000 +.Laregs:.long 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 diff --git a/arch/s390/kernel/head_kdump.S b/arch/s390/kernel/head_kdump.S new file mode 100644 index 00000000..e1ac3893 --- /dev/null +++ b/arch/s390/kernel/head_kdump.S @@ -0,0 +1,119 @@ +/* + * S390 kdump lowlevel functions (new kernel) + * + * Copyright IBM Corp. 2011 + * Author(s): Michael Holzheu <holzheu@linux.vnet.ibm.com> + */ + +#define DATAMOVER_ADDR 0x4000 +#define COPY_PAGE_ADDR 0x6000 + +#ifdef CONFIG_CRASH_DUMP + +# +# kdump entry (new kernel - not yet relocated) +# +# Note: This code has to be position independent +# + +.align 2 +.Lep_startup_kdump: + lhi %r1,2 # mode 2 = esame (dump) + sigp %r1,%r0,0x12 # Switch to esame mode + sam64 # Switch to 64 bit addressing + basr %r13,0 +.Lbase: + larl %r2,.Lbase_addr # Check, if we have been + lg %r2,0(%r2) # already relocated: + clgr %r2,%r13 # + jne .Lrelocate # No : Start data mover + lghi %r2,0 # Yes: Start kdump kernel + brasl %r14,startup_kdump_relocated + +.Lrelocate: + larl %r4,startup + lg %r2,0x418(%r4) # Get kdump base + lg %r3,0x420(%r4) # Get kdump size + + larl %r10,.Lcopy_start # Source of data mover + lghi %r8,DATAMOVER_ADDR # Target of data mover + mvc 0(256,%r8),0(%r10) # Copy data mover code + + agr %r8,%r2 # Copy data mover to + mvc 0(256,%r8),0(%r10) # reserved mem + + lghi %r14,DATAMOVER_ADDR # Jump to copied data mover + basr %r14,%r14 +.Lbase_addr: + .quad .Lbase + +# +# kdump data mover code (runs at address DATAMOVER_ADDR) +# +# r2: kdump base address +# r3: kdump size +# +.Lcopy_start: + basr %r13,0 # Base +0: + lgr %r11,%r2 # Save kdump base address + lgr %r12,%r2 + agr %r12,%r3 # Compute kdump end address + + lghi %r5,0 + lghi %r10,COPY_PAGE_ADDR # Load copy page address +1: + mvc 0(256,%r10),0(%r5) # Copy old kernel to tmp + mvc 0(256,%r5),0(%r11) # Copy new kernel to old + mvc 0(256,%r11),0(%r10) # Copy tmp to new + aghi %r11,256 + aghi %r5,256 + clgr %r11,%r12 + jl 1b + + lg %r14,.Lstartup_kdump-0b(%r13) + basr %r14,%r14 # Start relocated kernel +.Lstartup_kdump: + .long 0x00000000,0x00000000 + startup_kdump_relocated +.Lcopy_end: + +# +# Startup of kdump (relocated new kernel) +# +.align 2 +startup_kdump_relocated: + basr %r13,0 +0: + mvc 0(8,%r0),.Lrestart_psw-0b(%r13) # Setup restart PSW + mvc 464(16,%r0),.Lpgm_psw-0b(%r13) # Setup pgm check PSW + lhi %r1,1 # Start new kernel + diag %r1,%r1,0x308 # with diag 308 + +.Lno_diag308: # No diag 308 + sam31 # Switch to 31 bit addr mode + sr %r1,%r1 # Erase register r1 + sr %r2,%r2 # Erase register r2 + sigp %r1,%r2,0x12 # Switch to 31 bit arch mode + lpsw 0 # Start new kernel... +.align 8 +.Lrestart_psw: + .long 0x00080000,0x80000000 + startup +.Lpgm_psw: + .quad 0x0000000180000000,0x0000000000000000 + .Lno_diag308 +#else +.align 2 +.Lep_startup_kdump: +#ifdef CONFIG_64BIT + larl %r13,startup_kdump_crash + lpswe 0(%r13) +.align 8 +startup_kdump_crash: + .quad 0x0002000080000000,0x0000000000000000 + startup_kdump_crash +#else + basr %r13,0 +0: lpsw startup_kdump_crash-0b(%r13) +.align 8 +startup_kdump_crash: + .long 0x000a0000,0x00000000 + startup_kdump_crash +#endif /* CONFIG_64BIT */ +#endif /* CONFIG_CRASH_DUMP */ diff --git a/arch/s390/kernel/init_task.c b/arch/s390/kernel/init_task.c new file mode 100644 index 00000000..4d1c9fb0 --- /dev/null +++ b/arch/s390/kernel/init_task.c @@ -0,0 +1,38 @@ +/* + * arch/s390/kernel/init_task.c + * + * S390 version + * + * Derived from "arch/i386/kernel/init_task.c" + */ + +#include <linux/mm.h> +#include <linux/fs.h> +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/init_task.h> +#include <linux/mqueue.h> + +#include <asm/uaccess.h> +#include <asm/pgtable.h> + +static struct signal_struct init_signals = INIT_SIGNALS(init_signals); +static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); +/* + * Initial thread structure. + * + * We need to make sure that this is THREAD_SIZE aligned due to the + * way process stacks are handled. This is done by having a special + * "init_task" linker map entry.. + */ +union thread_union init_thread_union __init_task_data = + { INIT_THREAD_INFO(init_task) }; + +/* + * Initial task structure. + * + * All other task structs will be allocated on slabs in fork.c + */ +struct task_struct init_task = INIT_TASK(init_task); + +EXPORT_SYMBOL(init_task); diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c new file mode 100644 index 00000000..8342e65a --- /dev/null +++ b/arch/s390/kernel/ipl.c @@ -0,0 +1,2069 @@ +/* + * arch/s390/kernel/ipl.c + * ipl/reipl/dump support for Linux on s390. + * + * Copyright IBM Corp. 2005,2012 + * Author(s): Michael Holzheu <holzheu@de.ibm.com> + * Heiko Carstens <heiko.carstens@de.ibm.com> + * Volker Sameske <sameske@de.ibm.com> + */ + +#include <linux/types.h> +#include <linux/module.h> +#include <linux/device.h> +#include <linux/delay.h> +#include <linux/reboot.h> +#include <linux/ctype.h> +#include <linux/fs.h> +#include <linux/gfp.h> +#include <linux/crash_dump.h> +#include <linux/debug_locks.h> +#include <asm/ipl.h> +#include <asm/smp.h> +#include <asm/setup.h> +#include <asm/cpcmd.h> +#include <asm/cio.h> +#include <asm/ebcdic.h> +#include <asm/reset.h> +#include <asm/sclp.h> +#include <asm/checksum.h> +#include <asm/debug.h> +#include <asm/os_info.h> +#include "entry.h" + +#define IPL_PARM_BLOCK_VERSION 0 + +#define IPL_UNKNOWN_STR "unknown" +#define IPL_CCW_STR "ccw" +#define IPL_FCP_STR "fcp" +#define IPL_FCP_DUMP_STR "fcp_dump" +#define IPL_NSS_STR "nss" + +#define DUMP_CCW_STR "ccw" +#define DUMP_FCP_STR "fcp" +#define DUMP_NONE_STR "none" + +/* + * Four shutdown trigger types are supported: + * - panic + * - halt + * - power off + * - reipl + * - restart + */ +#define ON_PANIC_STR "on_panic" +#define ON_HALT_STR "on_halt" +#define ON_POFF_STR "on_poff" +#define ON_REIPL_STR "on_reboot" +#define ON_RESTART_STR "on_restart" + +struct shutdown_action; +struct shutdown_trigger { + char *name; + struct shutdown_action *action; +}; + +/* + * The following shutdown action types are supported: + */ +#define SHUTDOWN_ACTION_IPL_STR "ipl" +#define SHUTDOWN_ACTION_REIPL_STR "reipl" +#define SHUTDOWN_ACTION_DUMP_STR "dump" +#define SHUTDOWN_ACTION_VMCMD_STR "vmcmd" +#define SHUTDOWN_ACTION_STOP_STR "stop" +#define SHUTDOWN_ACTION_DUMP_REIPL_STR "dump_reipl" + +struct shutdown_action { + char *name; + void (*fn) (struct shutdown_trigger *trigger); + int (*init) (void); + int init_rc; +}; + +static char *ipl_type_str(enum ipl_type type) +{ + switch (type) { + case IPL_TYPE_CCW: + return IPL_CCW_STR; + case IPL_TYPE_FCP: + return IPL_FCP_STR; + case IPL_TYPE_FCP_DUMP: + return IPL_FCP_DUMP_STR; + case IPL_TYPE_NSS: + return IPL_NSS_STR; + case IPL_TYPE_UNKNOWN: + default: + return IPL_UNKNOWN_STR; + } +} + +enum dump_type { + DUMP_TYPE_NONE = 1, + DUMP_TYPE_CCW = 2, + DUMP_TYPE_FCP = 4, +}; + +static char *dump_type_str(enum dump_type type) +{ + switch (type) { + case DUMP_TYPE_NONE: + return DUMP_NONE_STR; + case DUMP_TYPE_CCW: + return DUMP_CCW_STR; + case DUMP_TYPE_FCP: + return DUMP_FCP_STR; + default: + return NULL; + } +} + +/* + * Must be in data section since the bss section + * is not cleared when these are accessed. + */ +static u16 ipl_devno __attribute__((__section__(".data"))) = 0; +u32 ipl_flags __attribute__((__section__(".data"))) = 0; + +enum ipl_method { + REIPL_METHOD_CCW_CIO, + REIPL_METHOD_CCW_DIAG, + REIPL_METHOD_CCW_VM, + REIPL_METHOD_FCP_RO_DIAG, + REIPL_METHOD_FCP_RW_DIAG, + REIPL_METHOD_FCP_RO_VM, + REIPL_METHOD_FCP_DUMP, + REIPL_METHOD_NSS, + REIPL_METHOD_NSS_DIAG, + REIPL_METHOD_DEFAULT, +}; + +enum dump_method { + DUMP_METHOD_NONE, + DUMP_METHOD_CCW_CIO, + DUMP_METHOD_CCW_DIAG, + DUMP_METHOD_CCW_VM, + DUMP_METHOD_FCP_DIAG, +}; + +static int diag308_set_works = 0; + +static struct ipl_parameter_block ipl_block; + +static int reipl_capabilities = IPL_TYPE_UNKNOWN; + +static enum ipl_type reipl_type = IPL_TYPE_UNKNOWN; +static enum ipl_method reipl_method = REIPL_METHOD_DEFAULT; +static struct ipl_parameter_block *reipl_block_fcp; +static struct ipl_parameter_block *reipl_block_ccw; +static struct ipl_parameter_block *reipl_block_nss; +static struct ipl_parameter_block *reipl_block_actual; + +static int dump_capabilities = DUMP_TYPE_NONE; +static enum dump_type dump_type = DUMP_TYPE_NONE; +static enum dump_method dump_method = DUMP_METHOD_NONE; +static struct ipl_parameter_block *dump_block_fcp; +static struct ipl_parameter_block *dump_block_ccw; + +static struct sclp_ipl_info sclp_ipl_info; + +int diag308(unsigned long subcode, void *addr) +{ + register unsigned long _addr asm("0") = (unsigned long) addr; + register unsigned long _rc asm("1") = 0; + + asm volatile( + " diag %0,%2,0x308\n" + "0:\n" + EX_TABLE(0b,0b) + : "+d" (_addr), "+d" (_rc) + : "d" (subcode) : "cc", "memory"); + return _rc; +} +EXPORT_SYMBOL_GPL(diag308); + +/* SYSFS */ + +#define DEFINE_IPL_ATTR_RO(_prefix, _name, _format, _value) \ +static ssize_t sys_##_prefix##_##_name##_show(struct kobject *kobj, \ + struct kobj_attribute *attr, \ + char *page) \ +{ \ + return sprintf(page, _format, _value); \ +} \ +static struct kobj_attribute sys_##_prefix##_##_name##_attr = \ + __ATTR(_name, S_IRUGO, sys_##_prefix##_##_name##_show, NULL); + +#define DEFINE_IPL_ATTR_RW(_prefix, _name, _fmt_out, _fmt_in, _value) \ +static ssize_t sys_##_prefix##_##_name##_show(struct kobject *kobj, \ + struct kobj_attribute *attr, \ + char *page) \ +{ \ + return sprintf(page, _fmt_out, \ + (unsigned long long) _value); \ +} \ +static ssize_t sys_##_prefix##_##_name##_store(struct kobject *kobj, \ + struct kobj_attribute *attr, \ + const char *buf, size_t len) \ +{ \ + unsigned long long value; \ + if (sscanf(buf, _fmt_in, &value) != 1) \ + return -EINVAL; \ + _value = value; \ + return len; \ +} \ +static struct kobj_attribute sys_##_prefix##_##_name##_attr = \ + __ATTR(_name,(S_IRUGO | S_IWUSR), \ + sys_##_prefix##_##_name##_show, \ + sys_##_prefix##_##_name##_store); + +#define DEFINE_IPL_ATTR_STR_RW(_prefix, _name, _fmt_out, _fmt_in, _value)\ +static ssize_t sys_##_prefix##_##_name##_show(struct kobject *kobj, \ + struct kobj_attribute *attr, \ + char *page) \ +{ \ + return sprintf(page, _fmt_out, _value); \ +} \ +static ssize_t sys_##_prefix##_##_name##_store(struct kobject *kobj, \ + struct kobj_attribute *attr, \ + const char *buf, size_t len) \ +{ \ + strncpy(_value, buf, sizeof(_value) - 1); \ + strim(_value); \ + return len; \ +} \ +static struct kobj_attribute sys_##_prefix##_##_name##_attr = \ + __ATTR(_name,(S_IRUGO | S_IWUSR), \ + sys_##_prefix##_##_name##_show, \ + sys_##_prefix##_##_name##_store); + +static void make_attrs_ro(struct attribute **attrs) +{ + while (*attrs) { + (*attrs)->mode = S_IRUGO; + attrs++; + } +} + +/* + * ipl section + */ + +static __init enum ipl_type get_ipl_type(void) +{ + struct ipl_parameter_block *ipl = IPL_PARMBLOCK_START; + + if (ipl_flags & IPL_NSS_VALID) + return IPL_TYPE_NSS; + if (!(ipl_flags & IPL_DEVNO_VALID)) + return IPL_TYPE_UNKNOWN; + if (!(ipl_flags & IPL_PARMBLOCK_VALID)) + return IPL_TYPE_CCW; + if (ipl->hdr.version > IPL_MAX_SUPPORTED_VERSION) + return IPL_TYPE_UNKNOWN; + if (ipl->hdr.pbt != DIAG308_IPL_TYPE_FCP) + return IPL_TYPE_UNKNOWN; + if (ipl->ipl_info.fcp.opt == DIAG308_IPL_OPT_DUMP) + return IPL_TYPE_FCP_DUMP; + return IPL_TYPE_FCP; +} + +struct ipl_info ipl_info; +EXPORT_SYMBOL_GPL(ipl_info); + +static ssize_t ipl_type_show(struct kobject *kobj, struct kobj_attribute *attr, + char *page) +{ + return sprintf(page, "%s\n", ipl_type_str(ipl_info.type)); +} + +static struct kobj_attribute sys_ipl_type_attr = __ATTR_RO(ipl_type); + +/* VM IPL PARM routines */ +static size_t reipl_get_ascii_vmparm(char *dest, size_t size, + const struct ipl_parameter_block *ipb) +{ + int i; + size_t len; + char has_lowercase = 0; + + len = 0; + if ((ipb->ipl_info.ccw.vm_flags & DIAG308_VM_FLAGS_VP_VALID) && + (ipb->ipl_info.ccw.vm_parm_len > 0)) { + + len = min_t(size_t, size - 1, ipb->ipl_info.ccw.vm_parm_len); + memcpy(dest, ipb->ipl_info.ccw.vm_parm, len); + /* If at least one character is lowercase, we assume mixed + * case; otherwise we convert everything to lowercase. + */ + for (i = 0; i < len; i++) + if ((dest[i] > 0x80 && dest[i] < 0x8a) || /* a-i */ + (dest[i] > 0x90 && dest[i] < 0x9a) || /* j-r */ + (dest[i] > 0xa1 && dest[i] < 0xaa)) { /* s-z */ + has_lowercase = 1; + break; + } + if (!has_lowercase) + EBC_TOLOWER(dest, len); + EBCASC(dest, len); + } + dest[len] = 0; + + return len; +} + +size_t append_ipl_vmparm(char *dest, size_t size) +{ + size_t rc; + + rc = 0; + if (diag308_set_works && (ipl_block.hdr.pbt == DIAG308_IPL_TYPE_CCW)) + rc = reipl_get_ascii_vmparm(dest, size, &ipl_block); + else + dest[0] = 0; + return rc; +} + +static ssize_t ipl_vm_parm_show(struct kobject *kobj, + struct kobj_attribute *attr, char *page) +{ + char parm[DIAG308_VMPARM_SIZE + 1] = {}; + + append_ipl_vmparm(parm, sizeof(parm)); + return sprintf(page, "%s\n", parm); +} + +static size_t scpdata_length(const char* buf, size_t count) +{ + while (count) { + if (buf[count - 1] != '\0' && buf[count - 1] != ' ') + break; + count--; + } + return count; +} + +static size_t reipl_append_ascii_scpdata(char *dest, size_t size, + const struct ipl_parameter_block *ipb) +{ + size_t count; + size_t i; + int has_lowercase; + + count = min(size - 1, scpdata_length(ipb->ipl_info.fcp.scp_data, + ipb->ipl_info.fcp.scp_data_len)); + if (!count) + goto out; + + has_lowercase = 0; + for (i = 0; i < count; i++) { + if (!isascii(ipb->ipl_info.fcp.scp_data[i])) { + count = 0; + goto out; + } + if (!has_lowercase && islower(ipb->ipl_info.fcp.scp_data[i])) + has_lowercase = 1; + } + + if (has_lowercase) + memcpy(dest, ipb->ipl_info.fcp.scp_data, count); + else + for (i = 0; i < count; i++) + dest[i] = tolower(ipb->ipl_info.fcp.scp_data[i]); +out: + dest[count] = '\0'; + return count; +} + +size_t append_ipl_scpdata(char *dest, size_t len) +{ + size_t rc; + + rc = 0; + if (ipl_block.hdr.pbt == DIAG308_IPL_TYPE_FCP) + rc = reipl_append_ascii_scpdata(dest, len, &ipl_block); + else + dest[0] = 0; + return rc; +} + + +static struct kobj_attribute sys_ipl_vm_parm_attr = + __ATTR(parm, S_IRUGO, ipl_vm_parm_show, NULL); + +static ssize_t sys_ipl_device_show(struct kobject *kobj, + struct kobj_attribute *attr, char *page) +{ + struct ipl_parameter_block *ipl = IPL_PARMBLOCK_START; + + switch (ipl_info.type) { + case IPL_TYPE_CCW: + return sprintf(page, "0.0.%04x\n", ipl_devno); + case IPL_TYPE_FCP: + case IPL_TYPE_FCP_DUMP: + return sprintf(page, "0.0.%04x\n", ipl->ipl_info.fcp.devno); + default: + return 0; + } +} + +static struct kobj_attribute sys_ipl_device_attr = + __ATTR(device, S_IRUGO, sys_ipl_device_show, NULL); + +static ssize_t ipl_parameter_read(struct file *filp, struct kobject *kobj, + struct bin_attribute *attr, char *buf, + loff_t off, size_t count) +{ + return memory_read_from_buffer(buf, count, &off, IPL_PARMBLOCK_START, + IPL_PARMBLOCK_SIZE); +} + +static struct bin_attribute ipl_parameter_attr = { + .attr = { + .name = "binary_parameter", + .mode = S_IRUGO, + }, + .size = PAGE_SIZE, + .read = &ipl_parameter_read, +}; + +static ssize_t ipl_scp_data_read(struct file *filp, struct kobject *kobj, + struct bin_attribute *attr, char *buf, + loff_t off, size_t count) +{ + unsigned int size = IPL_PARMBLOCK_START->ipl_info.fcp.scp_data_len; + void *scp_data = &IPL_PARMBLOCK_START->ipl_info.fcp.scp_data; + + return memory_read_from_buffer(buf, count, &off, scp_data, size); +} + +static struct bin_attribute ipl_scp_data_attr = { + .attr = { + .name = "scp_data", + .mode = S_IRUGO, + }, + .size = PAGE_SIZE, + .read = ipl_scp_data_read, +}; + +/* FCP ipl device attributes */ + +DEFINE_IPL_ATTR_RO(ipl_fcp, wwpn, "0x%016llx\n", (unsigned long long) + IPL_PARMBLOCK_START->ipl_info.fcp.wwpn); +DEFINE_IPL_ATTR_RO(ipl_fcp, lun, "0x%016llx\n", (unsigned long long) + IPL_PARMBLOCK_START->ipl_info.fcp.lun); +DEFINE_IPL_ATTR_RO(ipl_fcp, bootprog, "%lld\n", (unsigned long long) + IPL_PARMBLOCK_START->ipl_info.fcp.bootprog); +DEFINE_IPL_ATTR_RO(ipl_fcp, br_lba, "%lld\n", (unsigned long long) + IPL_PARMBLOCK_START->ipl_info.fcp.br_lba); + +static struct attribute *ipl_fcp_attrs[] = { + &sys_ipl_type_attr.attr, + &sys_ipl_device_attr.attr, + &sys_ipl_fcp_wwpn_attr.attr, + &sys_ipl_fcp_lun_attr.attr, + &sys_ipl_fcp_bootprog_attr.attr, + &sys_ipl_fcp_br_lba_attr.attr, + NULL, +}; + +static struct attribute_group ipl_fcp_attr_group = { + .attrs = ipl_fcp_attrs, +}; + +/* CCW ipl device attributes */ + +static ssize_t ipl_ccw_loadparm_show(struct kobject *kobj, + struct kobj_attribute *attr, char *page) +{ + char loadparm[LOADPARM_LEN + 1] = {}; + + if (!sclp_ipl_info.is_valid) + return sprintf(page, "#unknown#\n"); + memcpy(loadparm, &sclp_ipl_info.loadparm, LOADPARM_LEN); + EBCASC(loadparm, LOADPARM_LEN); + strim(loadparm); + return sprintf(page, "%s\n", loadparm); +} + +static struct kobj_attribute sys_ipl_ccw_loadparm_attr = + __ATTR(loadparm, 0444, ipl_ccw_loadparm_show, NULL); + +static struct attribute *ipl_ccw_attrs_vm[] = { + &sys_ipl_type_attr.attr, + &sys_ipl_device_attr.attr, + &sys_ipl_ccw_loadparm_attr.attr, + &sys_ipl_vm_parm_attr.attr, + NULL, +}; + +static struct attribute *ipl_ccw_attrs_lpar[] = { + &sys_ipl_type_attr.attr, + &sys_ipl_device_attr.attr, + &sys_ipl_ccw_loadparm_attr.attr, + NULL, +}; + +static struct attribute_group ipl_ccw_attr_group_vm = { + .attrs = ipl_ccw_attrs_vm, +}; + +static struct attribute_group ipl_ccw_attr_group_lpar = { + .attrs = ipl_ccw_attrs_lpar +}; + +/* NSS ipl device attributes */ + +DEFINE_IPL_ATTR_RO(ipl_nss, name, "%s\n", kernel_nss_name); + +static struct attribute *ipl_nss_attrs[] = { + &sys_ipl_type_attr.attr, + &sys_ipl_nss_name_attr.attr, + &sys_ipl_ccw_loadparm_attr.attr, + &sys_ipl_vm_parm_attr.attr, + NULL, +}; + +static struct attribute_group ipl_nss_attr_group = { + .attrs = ipl_nss_attrs, +}; + +/* UNKNOWN ipl device attributes */ + +static struct attribute *ipl_unknown_attrs[] = { + &sys_ipl_type_attr.attr, + NULL, +}; + +static struct attribute_group ipl_unknown_attr_group = { + .attrs = ipl_unknown_attrs, +}; + +static struct kset *ipl_kset; + +static int __init ipl_register_fcp_files(void) +{ + int rc; + + rc = sysfs_create_group(&ipl_kset->kobj, &ipl_fcp_attr_group); + if (rc) + goto out; + rc = sysfs_create_bin_file(&ipl_kset->kobj, &ipl_parameter_attr); + if (rc) + goto out_ipl_parm; + rc = sysfs_create_bin_file(&ipl_kset->kobj, &ipl_scp_data_attr); + if (!rc) + goto out; + + sysfs_remove_bin_file(&ipl_kset->kobj, &ipl_parameter_attr); + +out_ipl_parm: + sysfs_remove_group(&ipl_kset->kobj, &ipl_fcp_attr_group); +out: + return rc; +} + +static void __ipl_run(void *unused) +{ + diag308(DIAG308_IPL, NULL); + if (MACHINE_IS_VM) + __cpcmd("IPL", NULL, 0, NULL); + else if (ipl_info.type == IPL_TYPE_CCW) + reipl_ccw_dev(&ipl_info.data.ccw.dev_id); +} + +static void ipl_run(struct shutdown_trigger *trigger) +{ + smp_call_ipl_cpu(__ipl_run, NULL); +} + +static int __init ipl_init(void) +{ + int rc; + + ipl_kset = kset_create_and_add("ipl", NULL, firmware_kobj); + if (!ipl_kset) { + rc = -ENOMEM; + goto out; + } + switch (ipl_info.type) { + case IPL_TYPE_CCW: + if (MACHINE_IS_VM) + rc = sysfs_create_group(&ipl_kset->kobj, + &ipl_ccw_attr_group_vm); + else + rc = sysfs_create_group(&ipl_kset->kobj, + &ipl_ccw_attr_group_lpar); + break; + case IPL_TYPE_FCP: + case IPL_TYPE_FCP_DUMP: + rc = ipl_register_fcp_files(); + break; + case IPL_TYPE_NSS: + rc = sysfs_create_group(&ipl_kset->kobj, &ipl_nss_attr_group); + break; + default: + rc = sysfs_create_group(&ipl_kset->kobj, + &ipl_unknown_attr_group); + break; + } +out: + if (rc) + panic("ipl_init failed: rc = %i\n", rc); + + return 0; +} + +static struct shutdown_action __refdata ipl_action = { + .name = SHUTDOWN_ACTION_IPL_STR, + .fn = ipl_run, + .init = ipl_init, +}; + +/* + * reipl shutdown action: Reboot Linux on shutdown. + */ + +/* VM IPL PARM attributes */ +static ssize_t reipl_generic_vmparm_show(struct ipl_parameter_block *ipb, + char *page) +{ + char vmparm[DIAG308_VMPARM_SIZE + 1] = {}; + + reipl_get_ascii_vmparm(vmparm, sizeof(vmparm), ipb); + return sprintf(page, "%s\n", vmparm); +} + +static ssize_t reipl_generic_vmparm_store(struct ipl_parameter_block *ipb, + size_t vmparm_max, + const char *buf, size_t len) +{ + int i, ip_len; + + /* ignore trailing newline */ + ip_len = len; + if ((len > 0) && (buf[len - 1] == '\n')) + ip_len--; + + if (ip_len > vmparm_max) + return -EINVAL; + + /* parm is used to store kernel options, check for common chars */ + for (i = 0; i < ip_len; i++) + if (!(isalnum(buf[i]) || isascii(buf[i]) || isprint(buf[i]))) + return -EINVAL; + + memset(ipb->ipl_info.ccw.vm_parm, 0, DIAG308_VMPARM_SIZE); + ipb->ipl_info.ccw.vm_parm_len = ip_len; + if (ip_len > 0) { + ipb->ipl_info.ccw.vm_flags |= DIAG308_VM_FLAGS_VP_VALID; + memcpy(ipb->ipl_info.ccw.vm_parm, buf, ip_len); + ASCEBC(ipb->ipl_info.ccw.vm_parm, ip_len); + } else { + ipb->ipl_info.ccw.vm_flags &= ~DIAG308_VM_FLAGS_VP_VALID; + } + + return len; +} + +/* NSS wrapper */ +static ssize_t reipl_nss_vmparm_show(struct kobject *kobj, + struct kobj_attribute *attr, char *page) +{ + return reipl_generic_vmparm_show(reipl_block_nss, page); +} + +static ssize_t reipl_nss_vmparm_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t len) +{ + return reipl_generic_vmparm_store(reipl_block_nss, 56, buf, len); +} + +/* CCW wrapper */ +static ssize_t reipl_ccw_vmparm_show(struct kobject *kobj, + struct kobj_attribute *attr, char *page) +{ + return reipl_generic_vmparm_show(reipl_block_ccw, page); +} + +static ssize_t reipl_ccw_vmparm_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t len) +{ + return reipl_generic_vmparm_store(reipl_block_ccw, 64, buf, len); +} + +static struct kobj_attribute sys_reipl_nss_vmparm_attr = + __ATTR(parm, S_IRUGO | S_IWUSR, reipl_nss_vmparm_show, + reipl_nss_vmparm_store); +static struct kobj_attribute sys_reipl_ccw_vmparm_attr = + __ATTR(parm, S_IRUGO | S_IWUSR, reipl_ccw_vmparm_show, + reipl_ccw_vmparm_store); + +/* FCP reipl device attributes */ + +static ssize_t reipl_fcp_scpdata_read(struct file *filp, struct kobject *kobj, + struct bin_attribute *attr, + char *buf, loff_t off, size_t count) +{ + size_t size = reipl_block_fcp->ipl_info.fcp.scp_data_len; + void *scp_data = reipl_block_fcp->ipl_info.fcp.scp_data; + + return memory_read_from_buffer(buf, count, &off, scp_data, size); +} + +static ssize_t reipl_fcp_scpdata_write(struct file *filp, struct kobject *kobj, + struct bin_attribute *attr, + char *buf, loff_t off, size_t count) +{ + size_t padding; + size_t scpdata_len; + + if (off < 0) + return -EINVAL; + + if (off >= DIAG308_SCPDATA_SIZE) + return -ENOSPC; + + if (count > DIAG308_SCPDATA_SIZE - off) + count = DIAG308_SCPDATA_SIZE - off; + + memcpy(reipl_block_fcp->ipl_info.fcp.scp_data, buf + off, count); + scpdata_len = off + count; + + if (scpdata_len % 8) { + padding = 8 - (scpdata_len % 8); + memset(reipl_block_fcp->ipl_info.fcp.scp_data + scpdata_len, + 0, padding); + scpdata_len += padding; + } + + reipl_block_fcp->ipl_info.fcp.scp_data_len = scpdata_len; + reipl_block_fcp->hdr.len = IPL_PARM_BLK_FCP_LEN + scpdata_len; + reipl_block_fcp->hdr.blk0_len = IPL_PARM_BLK0_FCP_LEN + scpdata_len; + + return count; +} + +static struct bin_attribute sys_reipl_fcp_scp_data_attr = { + .attr = { + .name = "scp_data", + .mode = S_IRUGO | S_IWUSR, + }, + .size = PAGE_SIZE, + .read = reipl_fcp_scpdata_read, + .write = reipl_fcp_scpdata_write, +}; + +DEFINE_IPL_ATTR_RW(reipl_fcp, wwpn, "0x%016llx\n", "%016llx\n", + reipl_block_fcp->ipl_info.fcp.wwpn); +DEFINE_IPL_ATTR_RW(reipl_fcp, lun, "0x%016llx\n", "%016llx\n", + reipl_block_fcp->ipl_info.fcp.lun); +DEFINE_IPL_ATTR_RW(reipl_fcp, bootprog, "%lld\n", "%lld\n", + reipl_block_fcp->ipl_info.fcp.bootprog); +DEFINE_IPL_ATTR_RW(reipl_fcp, br_lba, "%lld\n", "%lld\n", + reipl_block_fcp->ipl_info.fcp.br_lba); +DEFINE_IPL_ATTR_RW(reipl_fcp, device, "0.0.%04llx\n", "0.0.%llx\n", + reipl_block_fcp->ipl_info.fcp.devno); + +static struct attribute *reipl_fcp_attrs[] = { + &sys_reipl_fcp_device_attr.attr, + &sys_reipl_fcp_wwpn_attr.attr, + &sys_reipl_fcp_lun_attr.attr, + &sys_reipl_fcp_bootprog_attr.attr, + &sys_reipl_fcp_br_lba_attr.attr, + NULL, +}; + +static struct attribute_group reipl_fcp_attr_group = { + .attrs = reipl_fcp_attrs, +}; + +/* CCW reipl device attributes */ + +DEFINE_IPL_ATTR_RW(reipl_ccw, device, "0.0.%04llx\n", "0.0.%llx\n", + reipl_block_ccw->ipl_info.ccw.devno); + +static void reipl_get_ascii_loadparm(char *loadparm, + struct ipl_parameter_block *ibp) +{ + memcpy(loadparm, ibp->ipl_info.ccw.load_parm, LOADPARM_LEN); + EBCASC(loadparm, LOADPARM_LEN); + loadparm[LOADPARM_LEN] = 0; + strim(loadparm); +} + +static ssize_t reipl_generic_loadparm_show(struct ipl_parameter_block *ipb, + char *page) +{ + char buf[LOADPARM_LEN + 1]; + + reipl_get_ascii_loadparm(buf, ipb); + return sprintf(page, "%s\n", buf); +} + +static ssize_t reipl_generic_loadparm_store(struct ipl_parameter_block *ipb, + const char *buf, size_t len) +{ + int i, lp_len; + + /* ignore trailing newline */ + lp_len = len; + if ((len > 0) && (buf[len - 1] == '\n')) + lp_len--; + /* loadparm can have max 8 characters and must not start with a blank */ + if ((lp_len > LOADPARM_LEN) || ((lp_len > 0) && (buf[0] == ' '))) + return -EINVAL; + /* loadparm can only contain "a-z,A-Z,0-9,SP,." */ + for (i = 0; i < lp_len; i++) { + if (isalpha(buf[i]) || isdigit(buf[i]) || (buf[i] == ' ') || + (buf[i] == '.')) + continue; + return -EINVAL; + } + /* initialize loadparm with blanks */ + memset(ipb->ipl_info.ccw.load_parm, ' ', LOADPARM_LEN); + /* copy and convert to ebcdic */ + memcpy(ipb->ipl_info.ccw.load_parm, buf, lp_len); + ASCEBC(ipb->ipl_info.ccw.load_parm, LOADPARM_LEN); + return len; +} + +/* NSS wrapper */ +static ssize_t reipl_nss_loadparm_show(struct kobject *kobj, + struct kobj_attribute *attr, char *page) +{ + return reipl_generic_loadparm_show(reipl_block_nss, page); +} + +static ssize_t reipl_nss_loadparm_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t len) +{ + return reipl_generic_loadparm_store(reipl_block_nss, buf, len); +} + +/* CCW wrapper */ +static ssize_t reipl_ccw_loadparm_show(struct kobject *kobj, + struct kobj_attribute *attr, char *page) +{ + return reipl_generic_loadparm_show(reipl_block_ccw, page); +} + +static ssize_t reipl_ccw_loadparm_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t len) +{ + return reipl_generic_loadparm_store(reipl_block_ccw, buf, len); +} + +static struct kobj_attribute sys_reipl_ccw_loadparm_attr = + __ATTR(loadparm, S_IRUGO | S_IWUSR, reipl_ccw_loadparm_show, + reipl_ccw_loadparm_store); + +static struct attribute *reipl_ccw_attrs_vm[] = { + &sys_reipl_ccw_device_attr.attr, + &sys_reipl_ccw_loadparm_attr.attr, + &sys_reipl_ccw_vmparm_attr.attr, + NULL, +}; + +static struct attribute *reipl_ccw_attrs_lpar[] = { + &sys_reipl_ccw_device_attr.attr, + &sys_reipl_ccw_loadparm_attr.attr, + NULL, +}; + +static struct attribute_group reipl_ccw_attr_group_vm = { + .name = IPL_CCW_STR, + .attrs = reipl_ccw_attrs_vm, +}; + +static struct attribute_group reipl_ccw_attr_group_lpar = { + .name = IPL_CCW_STR, + .attrs = reipl_ccw_attrs_lpar, +}; + + +/* NSS reipl device attributes */ +static void reipl_get_ascii_nss_name(char *dst, + struct ipl_parameter_block *ipb) +{ + memcpy(dst, ipb->ipl_info.ccw.nss_name, NSS_NAME_SIZE); + EBCASC(dst, NSS_NAME_SIZE); + dst[NSS_NAME_SIZE] = 0; +} + +static ssize_t reipl_nss_name_show(struct kobject *kobj, + struct kobj_attribute *attr, char *page) +{ + char nss_name[NSS_NAME_SIZE + 1] = {}; + + reipl_get_ascii_nss_name(nss_name, reipl_block_nss); + return sprintf(page, "%s\n", nss_name); +} + +static ssize_t reipl_nss_name_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t len) +{ + int nss_len; + + /* ignore trailing newline */ + nss_len = len; + if ((len > 0) && (buf[len - 1] == '\n')) + nss_len--; + + if (nss_len > NSS_NAME_SIZE) + return -EINVAL; + + memset(reipl_block_nss->ipl_info.ccw.nss_name, 0x40, NSS_NAME_SIZE); + if (nss_len > 0) { + reipl_block_nss->ipl_info.ccw.vm_flags |= + DIAG308_VM_FLAGS_NSS_VALID; + memcpy(reipl_block_nss->ipl_info.ccw.nss_name, buf, nss_len); + ASCEBC(reipl_block_nss->ipl_info.ccw.nss_name, nss_len); + EBC_TOUPPER(reipl_block_nss->ipl_info.ccw.nss_name, nss_len); + } else { + reipl_block_nss->ipl_info.ccw.vm_flags &= + ~DIAG308_VM_FLAGS_NSS_VALID; + } + + return len; +} + +static struct kobj_attribute sys_reipl_nss_name_attr = + __ATTR(name, S_IRUGO | S_IWUSR, reipl_nss_name_show, + reipl_nss_name_store); + +static struct kobj_attribute sys_reipl_nss_loadparm_attr = + __ATTR(loadparm, S_IRUGO | S_IWUSR, reipl_nss_loadparm_show, + reipl_nss_loadparm_store); + +static struct attribute *reipl_nss_attrs[] = { + &sys_reipl_nss_name_attr.attr, + &sys_reipl_nss_loadparm_attr.attr, + &sys_reipl_nss_vmparm_attr.attr, + NULL, +}; + +static struct attribute_group reipl_nss_attr_group = { + .name = IPL_NSS_STR, + .attrs = reipl_nss_attrs, +}; + +static void set_reipl_block_actual(struct ipl_parameter_block *reipl_block) +{ + reipl_block_actual = reipl_block; + os_info_entry_add(OS_INFO_REIPL_BLOCK, reipl_block_actual, + reipl_block->hdr.len); +} + +/* reipl type */ + +static int reipl_set_type(enum ipl_type type) +{ + if (!(reipl_capabilities & type)) + return -EINVAL; + + switch(type) { + case IPL_TYPE_CCW: + if (diag308_set_works) + reipl_method = REIPL_METHOD_CCW_DIAG; + else if (MACHINE_IS_VM) + reipl_method = REIPL_METHOD_CCW_VM; + else + reipl_method = REIPL_METHOD_CCW_CIO; + set_reipl_block_actual(reipl_block_ccw); + break; + case IPL_TYPE_FCP: + if (diag308_set_works) + reipl_method = REIPL_METHOD_FCP_RW_DIAG; + else if (MACHINE_IS_VM) + reipl_method = REIPL_METHOD_FCP_RO_VM; + else + reipl_method = REIPL_METHOD_FCP_RO_DIAG; + set_reipl_block_actual(reipl_block_fcp); + break; + case IPL_TYPE_FCP_DUMP: + reipl_method = REIPL_METHOD_FCP_DUMP; + break; + case IPL_TYPE_NSS: + if (diag308_set_works) + reipl_method = REIPL_METHOD_NSS_DIAG; + else + reipl_method = REIPL_METHOD_NSS; + set_reipl_block_actual(reipl_block_nss); + break; + case IPL_TYPE_UNKNOWN: + reipl_method = REIPL_METHOD_DEFAULT; + break; + default: + BUG(); + } + reipl_type = type; + return 0; +} + +static ssize_t reipl_type_show(struct kobject *kobj, + struct kobj_attribute *attr, char *page) +{ + return sprintf(page, "%s\n", ipl_type_str(reipl_type)); +} + +static ssize_t reipl_type_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t len) +{ + int rc = -EINVAL; + + if (strncmp(buf, IPL_CCW_STR, strlen(IPL_CCW_STR)) == 0) + rc = reipl_set_type(IPL_TYPE_CCW); + else if (strncmp(buf, IPL_FCP_STR, strlen(IPL_FCP_STR)) == 0) + rc = reipl_set_type(IPL_TYPE_FCP); + else if (strncmp(buf, IPL_NSS_STR, strlen(IPL_NSS_STR)) == 0) + rc = reipl_set_type(IPL_TYPE_NSS); + return (rc != 0) ? rc : len; +} + +static struct kobj_attribute reipl_type_attr = + __ATTR(reipl_type, 0644, reipl_type_show, reipl_type_store); + +static struct kset *reipl_kset; +static struct kset *reipl_fcp_kset; + +static void get_ipl_string(char *dst, struct ipl_parameter_block *ipb, + const enum ipl_method m) +{ + char loadparm[LOADPARM_LEN + 1] = {}; + char vmparm[DIAG308_VMPARM_SIZE + 1] = {}; + char nss_name[NSS_NAME_SIZE + 1] = {}; + size_t pos = 0; + + reipl_get_ascii_loadparm(loadparm, ipb); + reipl_get_ascii_nss_name(nss_name, ipb); + reipl_get_ascii_vmparm(vmparm, sizeof(vmparm), ipb); + + switch (m) { + case REIPL_METHOD_CCW_VM: + pos = sprintf(dst, "IPL %X CLEAR", ipb->ipl_info.ccw.devno); + break; + case REIPL_METHOD_NSS: + pos = sprintf(dst, "IPL %s", nss_name); + break; + default: + break; + } + if (strlen(loadparm) > 0) + pos += sprintf(dst + pos, " LOADPARM '%s'", loadparm); + if (strlen(vmparm) > 0) + sprintf(dst + pos, " PARM %s", vmparm); +} + +static void __reipl_run(void *unused) +{ + struct ccw_dev_id devid; + static char buf[128]; + + switch (reipl_method) { + case REIPL_METHOD_CCW_CIO: + devid.devno = reipl_block_ccw->ipl_info.ccw.devno; + devid.ssid = 0; + reipl_ccw_dev(&devid); + break; + case REIPL_METHOD_CCW_VM: + get_ipl_string(buf, reipl_block_ccw, REIPL_METHOD_CCW_VM); + __cpcmd(buf, NULL, 0, NULL); + break; + case REIPL_METHOD_CCW_DIAG: + diag308(DIAG308_SET, reipl_block_ccw); + diag308(DIAG308_IPL, NULL); + break; + case REIPL_METHOD_FCP_RW_DIAG: + diag308(DIAG308_SET, reipl_block_fcp); + diag308(DIAG308_IPL, NULL); + break; + case REIPL_METHOD_FCP_RO_DIAG: + diag308(DIAG308_IPL, NULL); + break; + case REIPL_METHOD_FCP_RO_VM: + __cpcmd("IPL", NULL, 0, NULL); + break; + case REIPL_METHOD_NSS_DIAG: + diag308(DIAG308_SET, reipl_block_nss); + diag308(DIAG308_IPL, NULL); + break; + case REIPL_METHOD_NSS: + get_ipl_string(buf, reipl_block_nss, REIPL_METHOD_NSS); + __cpcmd(buf, NULL, 0, NULL); + break; + case REIPL_METHOD_DEFAULT: + if (MACHINE_IS_VM) + __cpcmd("IPL", NULL, 0, NULL); + diag308(DIAG308_IPL, NULL); + break; + case REIPL_METHOD_FCP_DUMP: + break; + } + disabled_wait((unsigned long) __builtin_return_address(0)); +} + +static void reipl_run(struct shutdown_trigger *trigger) +{ + smp_call_ipl_cpu(__reipl_run, NULL); +} + +static void reipl_block_ccw_init(struct ipl_parameter_block *ipb) +{ + ipb->hdr.len = IPL_PARM_BLK_CCW_LEN; + ipb->hdr.version = IPL_PARM_BLOCK_VERSION; + ipb->hdr.blk0_len = IPL_PARM_BLK0_CCW_LEN; + ipb->hdr.pbt = DIAG308_IPL_TYPE_CCW; +} + +static void reipl_block_ccw_fill_parms(struct ipl_parameter_block *ipb) +{ + /* LOADPARM */ + /* check if read scp info worked and set loadparm */ + if (sclp_ipl_info.is_valid) + memcpy(ipb->ipl_info.ccw.load_parm, + &sclp_ipl_info.loadparm, LOADPARM_LEN); + else + /* read scp info failed: set empty loadparm (EBCDIC blanks) */ + memset(ipb->ipl_info.ccw.load_parm, 0x40, LOADPARM_LEN); + ipb->hdr.flags = DIAG308_FLAGS_LP_VALID; + + /* VM PARM */ + if (MACHINE_IS_VM && diag308_set_works && + (ipl_block.ipl_info.ccw.vm_flags & DIAG308_VM_FLAGS_VP_VALID)) { + + ipb->ipl_info.ccw.vm_flags |= DIAG308_VM_FLAGS_VP_VALID; + ipb->ipl_info.ccw.vm_parm_len = + ipl_block.ipl_info.ccw.vm_parm_len; + memcpy(ipb->ipl_info.ccw.vm_parm, + ipl_block.ipl_info.ccw.vm_parm, DIAG308_VMPARM_SIZE); + } +} + +static int __init reipl_nss_init(void) +{ + int rc; + + if (!MACHINE_IS_VM) + return 0; + + reipl_block_nss = (void *) get_zeroed_page(GFP_KERNEL); + if (!reipl_block_nss) + return -ENOMEM; + + if (!diag308_set_works) + sys_reipl_nss_vmparm_attr.attr.mode = S_IRUGO; + + rc = sysfs_create_group(&reipl_kset->kobj, &reipl_nss_attr_group); + if (rc) + return rc; + + reipl_block_ccw_init(reipl_block_nss); + if (ipl_info.type == IPL_TYPE_NSS) { + memset(reipl_block_nss->ipl_info.ccw.nss_name, + ' ', NSS_NAME_SIZE); + memcpy(reipl_block_nss->ipl_info.ccw.nss_name, + kernel_nss_name, strlen(kernel_nss_name)); + ASCEBC(reipl_block_nss->ipl_info.ccw.nss_name, NSS_NAME_SIZE); + reipl_block_nss->ipl_info.ccw.vm_flags |= + DIAG308_VM_FLAGS_NSS_VALID; + + reipl_block_ccw_fill_parms(reipl_block_nss); + } + + reipl_capabilities |= IPL_TYPE_NSS; + return 0; +} + +static int __init reipl_ccw_init(void) +{ + int rc; + + reipl_block_ccw = (void *) get_zeroed_page(GFP_KERNEL); + if (!reipl_block_ccw) + return -ENOMEM; + + if (MACHINE_IS_VM) { + if (!diag308_set_works) + sys_reipl_ccw_vmparm_attr.attr.mode = S_IRUGO; + rc = sysfs_create_group(&reipl_kset->kobj, + &reipl_ccw_attr_group_vm); + } else { + if(!diag308_set_works) + sys_reipl_ccw_loadparm_attr.attr.mode = S_IRUGO; + rc = sysfs_create_group(&reipl_kset->kobj, + &reipl_ccw_attr_group_lpar); + } + if (rc) + return rc; + + reipl_block_ccw_init(reipl_block_ccw); + if (ipl_info.type == IPL_TYPE_CCW) { + reipl_block_ccw->ipl_info.ccw.devno = ipl_devno; + reipl_block_ccw_fill_parms(reipl_block_ccw); + } + + reipl_capabilities |= IPL_TYPE_CCW; + return 0; +} + +static int __init reipl_fcp_init(void) +{ + int rc; + + if (!diag308_set_works) { + if (ipl_info.type == IPL_TYPE_FCP) { + make_attrs_ro(reipl_fcp_attrs); + sys_reipl_fcp_scp_data_attr.attr.mode = S_IRUGO; + } else + return 0; + } + + reipl_block_fcp = (void *) get_zeroed_page(GFP_KERNEL); + if (!reipl_block_fcp) + return -ENOMEM; + + /* sysfs: create fcp kset for mixing attr group and bin attrs */ + reipl_fcp_kset = kset_create_and_add(IPL_FCP_STR, NULL, + &reipl_kset->kobj); + if (!reipl_fcp_kset) { + free_page((unsigned long) reipl_block_fcp); + return -ENOMEM; + } + + rc = sysfs_create_group(&reipl_fcp_kset->kobj, &reipl_fcp_attr_group); + if (rc) { + kset_unregister(reipl_fcp_kset); + free_page((unsigned long) reipl_block_fcp); + return rc; + } + + rc = sysfs_create_bin_file(&reipl_fcp_kset->kobj, + &sys_reipl_fcp_scp_data_attr); + if (rc) { + sysfs_remove_group(&reipl_fcp_kset->kobj, &reipl_fcp_attr_group); + kset_unregister(reipl_fcp_kset); + free_page((unsigned long) reipl_block_fcp); + return rc; + } + + if (ipl_info.type == IPL_TYPE_FCP) + memcpy(reipl_block_fcp, IPL_PARMBLOCK_START, PAGE_SIZE); + else { + reipl_block_fcp->hdr.len = IPL_PARM_BLK_FCP_LEN; + reipl_block_fcp->hdr.version = IPL_PARM_BLOCK_VERSION; + reipl_block_fcp->hdr.blk0_len = IPL_PARM_BLK0_FCP_LEN; + reipl_block_fcp->hdr.pbt = DIAG308_IPL_TYPE_FCP; + reipl_block_fcp->ipl_info.fcp.opt = DIAG308_IPL_OPT_IPL; + } + reipl_capabilities |= IPL_TYPE_FCP; + return 0; +} + +static int __init reipl_type_init(void) +{ + enum ipl_type reipl_type = ipl_info.type; + struct ipl_parameter_block *reipl_block; + unsigned long size; + + reipl_block = os_info_old_entry(OS_INFO_REIPL_BLOCK, &size); + if (!reipl_block) + goto out; + /* + * If we have an OS info reipl block, this will be used + */ + if (reipl_block->hdr.pbt == DIAG308_IPL_TYPE_FCP) { + memcpy(reipl_block_fcp, reipl_block, size); + reipl_type = IPL_TYPE_FCP; + } else if (reipl_block->hdr.pbt == DIAG308_IPL_TYPE_CCW) { + memcpy(reipl_block_ccw, reipl_block, size); + reipl_type = IPL_TYPE_CCW; + } +out: + return reipl_set_type(reipl_type); +} + +static int __init reipl_init(void) +{ + int rc; + + reipl_kset = kset_create_and_add("reipl", NULL, firmware_kobj); + if (!reipl_kset) + return -ENOMEM; + rc = sysfs_create_file(&reipl_kset->kobj, &reipl_type_attr.attr); + if (rc) { + kset_unregister(reipl_kset); + return rc; + } + rc = reipl_ccw_init(); + if (rc) + return rc; + rc = reipl_fcp_init(); + if (rc) + return rc; + rc = reipl_nss_init(); + if (rc) + return rc; + return reipl_type_init(); +} + +static struct shutdown_action __refdata reipl_action = { + .name = SHUTDOWN_ACTION_REIPL_STR, + .fn = reipl_run, + .init = reipl_init, +}; + +/* + * dump shutdown action: Dump Linux on shutdown. + */ + +/* FCP dump device attributes */ + +DEFINE_IPL_ATTR_RW(dump_fcp, wwpn, "0x%016llx\n", "%016llx\n", + dump_block_fcp->ipl_info.fcp.wwpn); +DEFINE_IPL_ATTR_RW(dump_fcp, lun, "0x%016llx\n", "%016llx\n", + dump_block_fcp->ipl_info.fcp.lun); +DEFINE_IPL_ATTR_RW(dump_fcp, bootprog, "%lld\n", "%lld\n", + dump_block_fcp->ipl_info.fcp.bootprog); +DEFINE_IPL_ATTR_RW(dump_fcp, br_lba, "%lld\n", "%lld\n", + dump_block_fcp->ipl_info.fcp.br_lba); +DEFINE_IPL_ATTR_RW(dump_fcp, device, "0.0.%04llx\n", "0.0.%llx\n", + dump_block_fcp->ipl_info.fcp.devno); + +static struct attribute *dump_fcp_attrs[] = { + &sys_dump_fcp_device_attr.attr, + &sys_dump_fcp_wwpn_attr.attr, + &sys_dump_fcp_lun_attr.attr, + &sys_dump_fcp_bootprog_attr.attr, + &sys_dump_fcp_br_lba_attr.attr, + NULL, +}; + +static struct attribute_group dump_fcp_attr_group = { + .name = IPL_FCP_STR, + .attrs = dump_fcp_attrs, +}; + +/* CCW dump device attributes */ + +DEFINE_IPL_ATTR_RW(dump_ccw, device, "0.0.%04llx\n", "0.0.%llx\n", + dump_block_ccw->ipl_info.ccw.devno); + +static struct attribute *dump_ccw_attrs[] = { + &sys_dump_ccw_device_attr.attr, + NULL, +}; + +static struct attribute_group dump_ccw_attr_group = { + .name = IPL_CCW_STR, + .attrs = dump_ccw_attrs, +}; + +/* dump type */ + +static int dump_set_type(enum dump_type type) +{ + if (!(dump_capabilities & type)) + return -EINVAL; + switch (type) { + case DUMP_TYPE_CCW: + if (diag308_set_works) + dump_method = DUMP_METHOD_CCW_DIAG; + else if (MACHINE_IS_VM) + dump_method = DUMP_METHOD_CCW_VM; + else + dump_method = DUMP_METHOD_CCW_CIO; + break; + case DUMP_TYPE_FCP: + dump_method = DUMP_METHOD_FCP_DIAG; + break; + default: + dump_method = DUMP_METHOD_NONE; + } + dump_type = type; + return 0; +} + +static ssize_t dump_type_show(struct kobject *kobj, + struct kobj_attribute *attr, char *page) +{ + return sprintf(page, "%s\n", dump_type_str(dump_type)); +} + +static ssize_t dump_type_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t len) +{ + int rc = -EINVAL; + + if (strncmp(buf, DUMP_NONE_STR, strlen(DUMP_NONE_STR)) == 0) + rc = dump_set_type(DUMP_TYPE_NONE); + else if (strncmp(buf, DUMP_CCW_STR, strlen(DUMP_CCW_STR)) == 0) + rc = dump_set_type(DUMP_TYPE_CCW); + else if (strncmp(buf, DUMP_FCP_STR, strlen(DUMP_FCP_STR)) == 0) + rc = dump_set_type(DUMP_TYPE_FCP); + return (rc != 0) ? rc : len; +} + +static struct kobj_attribute dump_type_attr = + __ATTR(dump_type, 0644, dump_type_show, dump_type_store); + +static struct kset *dump_kset; + +static void __dump_run(void *unused) +{ + struct ccw_dev_id devid; + static char buf[100]; + + switch (dump_method) { + case DUMP_METHOD_CCW_CIO: + devid.devno = dump_block_ccw->ipl_info.ccw.devno; + devid.ssid = 0; + reipl_ccw_dev(&devid); + break; + case DUMP_METHOD_CCW_VM: + sprintf(buf, "STORE STATUS"); + __cpcmd(buf, NULL, 0, NULL); + sprintf(buf, "IPL %X", dump_block_ccw->ipl_info.ccw.devno); + __cpcmd(buf, NULL, 0, NULL); + break; + case DUMP_METHOD_CCW_DIAG: + diag308(DIAG308_SET, dump_block_ccw); + diag308(DIAG308_DUMP, NULL); + break; + case DUMP_METHOD_FCP_DIAG: + diag308(DIAG308_SET, dump_block_fcp); + diag308(DIAG308_DUMP, NULL); + break; + default: + break; + } +} + +static void dump_run(struct shutdown_trigger *trigger) +{ + if (dump_method == DUMP_METHOD_NONE) + return; + smp_send_stop(); + smp_call_ipl_cpu(__dump_run, NULL); +} + +static int __init dump_ccw_init(void) +{ + int rc; + + dump_block_ccw = (void *) get_zeroed_page(GFP_KERNEL); + if (!dump_block_ccw) + return -ENOMEM; + rc = sysfs_create_group(&dump_kset->kobj, &dump_ccw_attr_group); + if (rc) { + free_page((unsigned long)dump_block_ccw); + return rc; + } + dump_block_ccw->hdr.len = IPL_PARM_BLK_CCW_LEN; + dump_block_ccw->hdr.version = IPL_PARM_BLOCK_VERSION; + dump_block_ccw->hdr.blk0_len = IPL_PARM_BLK0_CCW_LEN; + dump_block_ccw->hdr.pbt = DIAG308_IPL_TYPE_CCW; + dump_capabilities |= DUMP_TYPE_CCW; + return 0; +} + +static int __init dump_fcp_init(void) +{ + int rc; + + if (!sclp_ipl_info.has_dump) + return 0; /* LDIPL DUMP is not installed */ + if (!diag308_set_works) + return 0; + dump_block_fcp = (void *) get_zeroed_page(GFP_KERNEL); + if (!dump_block_fcp) + return -ENOMEM; + rc = sysfs_create_group(&dump_kset->kobj, &dump_fcp_attr_group); + if (rc) { + free_page((unsigned long)dump_block_fcp); + return rc; + } + dump_block_fcp->hdr.len = IPL_PARM_BLK_FCP_LEN; + dump_block_fcp->hdr.version = IPL_PARM_BLOCK_VERSION; + dump_block_fcp->hdr.blk0_len = IPL_PARM_BLK0_FCP_LEN; + dump_block_fcp->hdr.pbt = DIAG308_IPL_TYPE_FCP; + dump_block_fcp->ipl_info.fcp.opt = DIAG308_IPL_OPT_DUMP; + dump_capabilities |= DUMP_TYPE_FCP; + return 0; +} + +static int __init dump_init(void) +{ + int rc; + + dump_kset = kset_create_and_add("dump", NULL, firmware_kobj); + if (!dump_kset) + return -ENOMEM; + rc = sysfs_create_file(&dump_kset->kobj, &dump_type_attr.attr); + if (rc) { + kset_unregister(dump_kset); + return rc; + } + rc = dump_ccw_init(); + if (rc) + return rc; + rc = dump_fcp_init(); + if (rc) + return rc; + dump_set_type(DUMP_TYPE_NONE); + return 0; +} + +static struct shutdown_action __refdata dump_action = { + .name = SHUTDOWN_ACTION_DUMP_STR, + .fn = dump_run, + .init = dump_init, +}; + +static void dump_reipl_run(struct shutdown_trigger *trigger) +{ + u32 csum; + + csum = csum_partial(reipl_block_actual, reipl_block_actual->hdr.len, 0); + copy_to_absolute_zero(&S390_lowcore.ipib_checksum, &csum, sizeof(csum)); + copy_to_absolute_zero(&S390_lowcore.ipib, &reipl_block_actual, + sizeof(reipl_block_actual)); + dump_run(trigger); +} + +static int __init dump_reipl_init(void) +{ + if (!diag308_set_works) + return -EOPNOTSUPP; + else + return 0; +} + +static struct shutdown_action __refdata dump_reipl_action = { + .name = SHUTDOWN_ACTION_DUMP_REIPL_STR, + .fn = dump_reipl_run, + .init = dump_reipl_init, +}; + +/* + * vmcmd shutdown action: Trigger vm command on shutdown. + */ + +static char vmcmd_on_reboot[128]; +static char vmcmd_on_panic[128]; +static char vmcmd_on_halt[128]; +static char vmcmd_on_poff[128]; +static char vmcmd_on_restart[128]; + +DEFINE_IPL_ATTR_STR_RW(vmcmd, on_reboot, "%s\n", "%s\n", vmcmd_on_reboot); +DEFINE_IPL_ATTR_STR_RW(vmcmd, on_panic, "%s\n", "%s\n", vmcmd_on_panic); +DEFINE_IPL_ATTR_STR_RW(vmcmd, on_halt, "%s\n", "%s\n", vmcmd_on_halt); +DEFINE_IPL_ATTR_STR_RW(vmcmd, on_poff, "%s\n", "%s\n", vmcmd_on_poff); +DEFINE_IPL_ATTR_STR_RW(vmcmd, on_restart, "%s\n", "%s\n", vmcmd_on_restart); + +static struct attribute *vmcmd_attrs[] = { + &sys_vmcmd_on_reboot_attr.attr, + &sys_vmcmd_on_panic_attr.attr, + &sys_vmcmd_on_halt_attr.attr, + &sys_vmcmd_on_poff_attr.attr, + &sys_vmcmd_on_restart_attr.attr, + NULL, +}; + +static struct attribute_group vmcmd_attr_group = { + .attrs = vmcmd_attrs, +}; + +static struct kset *vmcmd_kset; + +static void vmcmd_run(struct shutdown_trigger *trigger) +{ + char *cmd, *next_cmd; + + if (strcmp(trigger->name, ON_REIPL_STR) == 0) + cmd = vmcmd_on_reboot; + else if (strcmp(trigger->name, ON_PANIC_STR) == 0) + cmd = vmcmd_on_panic; + else if (strcmp(trigger->name, ON_HALT_STR) == 0) + cmd = vmcmd_on_halt; + else if (strcmp(trigger->name, ON_POFF_STR) == 0) + cmd = vmcmd_on_poff; + else if (strcmp(trigger->name, ON_RESTART_STR) == 0) + cmd = vmcmd_on_restart; + else + return; + + if (strlen(cmd) == 0) + return; + do { + next_cmd = strchr(cmd, '\n'); + if (next_cmd) { + next_cmd[0] = 0; + next_cmd += 1; + } + __cpcmd(cmd, NULL, 0, NULL); + cmd = next_cmd; + } while (cmd != NULL); +} + +static int vmcmd_init(void) +{ + if (!MACHINE_IS_VM) + return -EOPNOTSUPP; + vmcmd_kset = kset_create_and_add("vmcmd", NULL, firmware_kobj); + if (!vmcmd_kset) + return -ENOMEM; + return sysfs_create_group(&vmcmd_kset->kobj, &vmcmd_attr_group); +} + +static struct shutdown_action vmcmd_action = {SHUTDOWN_ACTION_VMCMD_STR, + vmcmd_run, vmcmd_init}; + +/* + * stop shutdown action: Stop Linux on shutdown. + */ + +static void stop_run(struct shutdown_trigger *trigger) +{ + if (strcmp(trigger->name, ON_PANIC_STR) == 0 || + strcmp(trigger->name, ON_RESTART_STR) == 0) + disabled_wait((unsigned long) __builtin_return_address(0)); + smp_stop_cpu(); +} + +static struct shutdown_action stop_action = {SHUTDOWN_ACTION_STOP_STR, + stop_run, NULL}; + +/* action list */ + +static struct shutdown_action *shutdown_actions_list[] = { + &ipl_action, &reipl_action, &dump_reipl_action, &dump_action, + &vmcmd_action, &stop_action}; +#define SHUTDOWN_ACTIONS_COUNT (sizeof(shutdown_actions_list) / sizeof(void *)) + +/* + * Trigger section + */ + +static struct kset *shutdown_actions_kset; + +static int set_trigger(const char *buf, struct shutdown_trigger *trigger, + size_t len) +{ + int i; + + for (i = 0; i < SHUTDOWN_ACTIONS_COUNT; i++) { + if (sysfs_streq(buf, shutdown_actions_list[i]->name)) { + if (shutdown_actions_list[i]->init_rc) { + return shutdown_actions_list[i]->init_rc; + } else { + trigger->action = shutdown_actions_list[i]; + return len; + } + } + } + return -EINVAL; +} + +/* on reipl */ + +static struct shutdown_trigger on_reboot_trigger = {ON_REIPL_STR, + &reipl_action}; + +static ssize_t on_reboot_show(struct kobject *kobj, + struct kobj_attribute *attr, char *page) +{ + return sprintf(page, "%s\n", on_reboot_trigger.action->name); +} + +static ssize_t on_reboot_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t len) +{ + return set_trigger(buf, &on_reboot_trigger, len); +} + +static struct kobj_attribute on_reboot_attr = + __ATTR(on_reboot, 0644, on_reboot_show, on_reboot_store); + +static void do_machine_restart(char *__unused) +{ + smp_send_stop(); + on_reboot_trigger.action->fn(&on_reboot_trigger); + reipl_run(NULL); +} +void (*_machine_restart)(char *command) = do_machine_restart; + +/* on panic */ + +static struct shutdown_trigger on_panic_trigger = {ON_PANIC_STR, &stop_action}; + +static ssize_t on_panic_show(struct kobject *kobj, + struct kobj_attribute *attr, char *page) +{ + return sprintf(page, "%s\n", on_panic_trigger.action->name); +} + +static ssize_t on_panic_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t len) +{ + return set_trigger(buf, &on_panic_trigger, len); +} + +static struct kobj_attribute on_panic_attr = + __ATTR(on_panic, 0644, on_panic_show, on_panic_store); + +static void do_panic(void) +{ + lgr_info_log(); + on_panic_trigger.action->fn(&on_panic_trigger); + stop_run(&on_panic_trigger); +} + +/* on restart */ + +static struct shutdown_trigger on_restart_trigger = {ON_RESTART_STR, + &stop_action}; + +static ssize_t on_restart_show(struct kobject *kobj, + struct kobj_attribute *attr, char *page) +{ + return sprintf(page, "%s\n", on_restart_trigger.action->name); +} + +static ssize_t on_restart_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t len) +{ + return set_trigger(buf, &on_restart_trigger, len); +} + +static struct kobj_attribute on_restart_attr = + __ATTR(on_restart, 0644, on_restart_show, on_restart_store); + +static void __do_restart(void *ignore) +{ + smp_send_stop(); +#ifdef CONFIG_CRASH_DUMP + crash_kexec(NULL); +#endif + on_restart_trigger.action->fn(&on_restart_trigger); + stop_run(&on_restart_trigger); +} + +void do_restart(void) +{ + tracing_off(); + debug_locks_off(); + lgr_info_log(); + smp_call_online_cpu(__do_restart, NULL); +} + +/* on halt */ + +static struct shutdown_trigger on_halt_trigger = {ON_HALT_STR, &stop_action}; + +static ssize_t on_halt_show(struct kobject *kobj, + struct kobj_attribute *attr, char *page) +{ + return sprintf(page, "%s\n", on_halt_trigger.action->name); +} + +static ssize_t on_halt_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t len) +{ + return set_trigger(buf, &on_halt_trigger, len); +} + +static struct kobj_attribute on_halt_attr = + __ATTR(on_halt, 0644, on_halt_show, on_halt_store); + + +static void do_machine_halt(void) +{ + smp_send_stop(); + on_halt_trigger.action->fn(&on_halt_trigger); + stop_run(&on_halt_trigger); +} +void (*_machine_halt)(void) = do_machine_halt; + +/* on power off */ + +static struct shutdown_trigger on_poff_trigger = {ON_POFF_STR, &stop_action}; + +static ssize_t on_poff_show(struct kobject *kobj, + struct kobj_attribute *attr, char *page) +{ + return sprintf(page, "%s\n", on_poff_trigger.action->name); +} + +static ssize_t on_poff_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t len) +{ + return set_trigger(buf, &on_poff_trigger, len); +} + +static struct kobj_attribute on_poff_attr = + __ATTR(on_poff, 0644, on_poff_show, on_poff_store); + + +static void do_machine_power_off(void) +{ + smp_send_stop(); + on_poff_trigger.action->fn(&on_poff_trigger); + stop_run(&on_poff_trigger); +} +void (*_machine_power_off)(void) = do_machine_power_off; + +static void __init shutdown_triggers_init(void) +{ + shutdown_actions_kset = kset_create_and_add("shutdown_actions", NULL, + firmware_kobj); + if (!shutdown_actions_kset) + goto fail; + if (sysfs_create_file(&shutdown_actions_kset->kobj, + &on_reboot_attr.attr)) + goto fail; + if (sysfs_create_file(&shutdown_actions_kset->kobj, + &on_panic_attr.attr)) + goto fail; + if (sysfs_create_file(&shutdown_actions_kset->kobj, + &on_halt_attr.attr)) + goto fail; + if (sysfs_create_file(&shutdown_actions_kset->kobj, + &on_poff_attr.attr)) + goto fail; + if (sysfs_create_file(&shutdown_actions_kset->kobj, + &on_restart_attr.attr)) + goto fail; + return; +fail: + panic("shutdown_triggers_init failed\n"); +} + +static void __init shutdown_actions_init(void) +{ + int i; + + for (i = 0; i < SHUTDOWN_ACTIONS_COUNT; i++) { + if (!shutdown_actions_list[i]->init) + continue; + shutdown_actions_list[i]->init_rc = + shutdown_actions_list[i]->init(); + } +} + +static int __init s390_ipl_init(void) +{ + sclp_get_ipl_info(&sclp_ipl_info); + shutdown_actions_init(); + shutdown_triggers_init(); + return 0; +} + +__initcall(s390_ipl_init); + +static void __init strncpy_skip_quote(char *dst, char *src, int n) +{ + int sx, dx; + + dx = 0; + for (sx = 0; src[sx] != 0; sx++) { + if (src[sx] == '"') + continue; + dst[dx++] = src[sx]; + if (dx >= n) + break; + } +} + +static int __init vmcmd_on_reboot_setup(char *str) +{ + if (!MACHINE_IS_VM) + return 1; + strncpy_skip_quote(vmcmd_on_reboot, str, 127); + vmcmd_on_reboot[127] = 0; + on_reboot_trigger.action = &vmcmd_action; + return 1; +} +__setup("vmreboot=", vmcmd_on_reboot_setup); + +static int __init vmcmd_on_panic_setup(char *str) +{ + if (!MACHINE_IS_VM) + return 1; + strncpy_skip_quote(vmcmd_on_panic, str, 127); + vmcmd_on_panic[127] = 0; + on_panic_trigger.action = &vmcmd_action; + return 1; +} +__setup("vmpanic=", vmcmd_on_panic_setup); + +static int __init vmcmd_on_halt_setup(char *str) +{ + if (!MACHINE_IS_VM) + return 1; + strncpy_skip_quote(vmcmd_on_halt, str, 127); + vmcmd_on_halt[127] = 0; + on_halt_trigger.action = &vmcmd_action; + return 1; +} +__setup("vmhalt=", vmcmd_on_halt_setup); + +static int __init vmcmd_on_poff_setup(char *str) +{ + if (!MACHINE_IS_VM) + return 1; + strncpy_skip_quote(vmcmd_on_poff, str, 127); + vmcmd_on_poff[127] = 0; + on_poff_trigger.action = &vmcmd_action; + return 1; +} +__setup("vmpoff=", vmcmd_on_poff_setup); + +static int on_panic_notify(struct notifier_block *self, + unsigned long event, void *data) +{ + do_panic(); + return NOTIFY_OK; +} + +static struct notifier_block on_panic_nb = { + .notifier_call = on_panic_notify, + .priority = INT_MIN, +}; + +void __init setup_ipl(void) +{ + ipl_info.type = get_ipl_type(); + switch (ipl_info.type) { + case IPL_TYPE_CCW: + ipl_info.data.ccw.dev_id.devno = ipl_devno; + ipl_info.data.ccw.dev_id.ssid = 0; + break; + case IPL_TYPE_FCP: + case IPL_TYPE_FCP_DUMP: + ipl_info.data.fcp.dev_id.devno = + IPL_PARMBLOCK_START->ipl_info.fcp.devno; + ipl_info.data.fcp.dev_id.ssid = 0; + ipl_info.data.fcp.wwpn = IPL_PARMBLOCK_START->ipl_info.fcp.wwpn; + ipl_info.data.fcp.lun = IPL_PARMBLOCK_START->ipl_info.fcp.lun; + break; + case IPL_TYPE_NSS: + strncpy(ipl_info.data.nss.name, kernel_nss_name, + sizeof(ipl_info.data.nss.name)); + break; + case IPL_TYPE_UNKNOWN: + /* We have no info to copy */ + break; + } + atomic_notifier_chain_register(&panic_notifier_list, &on_panic_nb); +} + +void __init ipl_update_parameters(void) +{ + int rc; + + rc = diag308(DIAG308_STORE, &ipl_block); + if ((rc == DIAG308_RC_OK) || (rc == DIAG308_RC_NOCONFIG)) + diag308_set_works = 1; +} + +void __init ipl_save_parameters(void) +{ + struct cio_iplinfo iplinfo; + void *src, *dst; + + if (cio_get_iplinfo(&iplinfo)) + return; + + ipl_devno = iplinfo.devno; + ipl_flags |= IPL_DEVNO_VALID; + if (!iplinfo.is_qdio) + return; + ipl_flags |= IPL_PARMBLOCK_VALID; + src = (void *)(unsigned long)S390_lowcore.ipl_parmblock_ptr; + dst = (void *)IPL_PARMBLOCK_ORIGIN; + memmove(dst, src, PAGE_SIZE); + S390_lowcore.ipl_parmblock_ptr = IPL_PARMBLOCK_ORIGIN; +} + +static LIST_HEAD(rcall); +static DEFINE_MUTEX(rcall_mutex); + +void register_reset_call(struct reset_call *reset) +{ + mutex_lock(&rcall_mutex); + list_add(&reset->list, &rcall); + mutex_unlock(&rcall_mutex); +} +EXPORT_SYMBOL_GPL(register_reset_call); + +void unregister_reset_call(struct reset_call *reset) +{ + mutex_lock(&rcall_mutex); + list_del(&reset->list); + mutex_unlock(&rcall_mutex); +} +EXPORT_SYMBOL_GPL(unregister_reset_call); + +static void do_reset_calls(void) +{ + struct reset_call *reset; + +#ifdef CONFIG_64BIT + if (diag308_set_works) { + diag308_reset(); + return; + } +#endif + list_for_each_entry(reset, &rcall, list) + reset->fn(); +} + +u32 dump_prefix_page; + +void s390_reset_system(void (*func)(void *), void *data) +{ + struct _lowcore *lc; + + lc = (struct _lowcore *)(unsigned long) store_prefix(); + + /* Stack for interrupt/machine check handler */ + lc->panic_stack = S390_lowcore.panic_stack; + + /* Save prefix page address for dump case */ + dump_prefix_page = (u32)(unsigned long) lc; + + /* Disable prefixing */ + set_prefix(0); + + /* Disable lowcore protection */ + __ctl_clear_bit(0,28); + + /* Set new machine check handler */ + S390_lowcore.mcck_new_psw.mask = psw_kernel_bits | PSW_MASK_DAT; + S390_lowcore.mcck_new_psw.addr = + PSW_ADDR_AMODE | (unsigned long) s390_base_mcck_handler; + + /* Set new program check handler */ + S390_lowcore.program_new_psw.mask = psw_kernel_bits | PSW_MASK_DAT; + S390_lowcore.program_new_psw.addr = + PSW_ADDR_AMODE | (unsigned long) s390_base_pgm_handler; + + /* Store status at absolute zero */ + store_status(); + + do_reset_calls(); + if (func) + func(data); +} diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c new file mode 100644 index 00000000..8a22c272 --- /dev/null +++ b/arch/s390/kernel/irq.c @@ -0,0 +1,283 @@ +/* + * Copyright IBM Corp. 2004,2011 + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>, + * Holger Smolinski <Holger.Smolinski@de.ibm.com>, + * Thomas Spatzier <tspat@de.ibm.com>, + * + * This file contains interrupt related functions. + */ + +#include <linux/kernel_stat.h> +#include <linux/interrupt.h> +#include <linux/seq_file.h> +#include <linux/proc_fs.h> +#include <linux/profile.h> +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/ftrace.h> +#include <linux/errno.h> +#include <linux/slab.h> +#include <linux/cpu.h> +#include <asm/irq_regs.h> +#include <asm/cputime.h> +#include <asm/lowcore.h> +#include <asm/irq.h> +#include "entry.h" + +struct irq_class { + char *name; + char *desc; +}; + +static const struct irq_class intrclass_names[] = { + {.name = "EXT" }, + {.name = "I/O" }, + {.name = "CLK", .desc = "[EXT] Clock Comparator" }, + {.name = "EXC", .desc = "[EXT] External Call" }, + {.name = "EMS", .desc = "[EXT] Emergency Signal" }, + {.name = "TMR", .desc = "[EXT] CPU Timer" }, + {.name = "TAL", .desc = "[EXT] Timing Alert" }, + {.name = "PFL", .desc = "[EXT] Pseudo Page Fault" }, + {.name = "DSD", .desc = "[EXT] DASD Diag" }, + {.name = "VRT", .desc = "[EXT] Virtio" }, + {.name = "SCP", .desc = "[EXT] Service Call" }, + {.name = "IUC", .desc = "[EXT] IUCV" }, + {.name = "CPM", .desc = "[EXT] CPU Measurement" }, + {.name = "CIO", .desc = "[I/O] Common I/O Layer Interrupt" }, + {.name = "QAI", .desc = "[I/O] QDIO Adapter Interrupt" }, + {.name = "DAS", .desc = "[I/O] DASD" }, + {.name = "C15", .desc = "[I/O] 3215" }, + {.name = "C70", .desc = "[I/O] 3270" }, + {.name = "TAP", .desc = "[I/O] Tape" }, + {.name = "VMR", .desc = "[I/O] Unit Record Devices" }, + {.name = "LCS", .desc = "[I/O] LCS" }, + {.name = "CLW", .desc = "[I/O] CLAW" }, + {.name = "CTC", .desc = "[I/O] CTC" }, + {.name = "APB", .desc = "[I/O] AP Bus" }, + {.name = "CSC", .desc = "[I/O] CHSC Subchannel" }, + {.name = "NMI", .desc = "[NMI] Machine Check" }, +}; + +/* + * show_interrupts is needed by /proc/interrupts. + */ +int show_interrupts(struct seq_file *p, void *v) +{ + int i = *(loff_t *) v, j; + + get_online_cpus(); + if (i == 0) { + seq_puts(p, " "); + for_each_online_cpu(j) + seq_printf(p, "CPU%d ",j); + seq_putc(p, '\n'); + } + + if (i < NR_IRQS) { + seq_printf(p, "%s: ", intrclass_names[i].name); +#ifndef CONFIG_SMP + seq_printf(p, "%10u ", kstat_irqs(i)); +#else + for_each_online_cpu(j) + seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]); +#endif + if (intrclass_names[i].desc) + seq_printf(p, " %s", intrclass_names[i].desc); + seq_putc(p, '\n'); + } + put_online_cpus(); + return 0; +} + +/* + * Switch to the asynchronous interrupt stack for softirq execution. + */ +asmlinkage void do_softirq(void) +{ + unsigned long flags, old, new; + + if (in_interrupt()) + return; + + local_irq_save(flags); + + if (local_softirq_pending()) { + /* Get current stack pointer. */ + asm volatile("la %0,0(15)" : "=a" (old)); + /* Check against async. stack address range. */ + new = S390_lowcore.async_stack; + if (((new - old) >> (PAGE_SHIFT + THREAD_ORDER)) != 0) { + /* Need to switch to the async. stack. */ + new -= STACK_FRAME_OVERHEAD; + ((struct stack_frame *) new)->back_chain = old; + + asm volatile(" la 15,0(%0)\n" + " basr 14,%2\n" + " la 15,0(%1)\n" + : : "a" (new), "a" (old), + "a" (__do_softirq) + : "0", "1", "2", "3", "4", "5", "14", + "cc", "memory" ); + } else { + /* We are already on the async stack. */ + __do_softirq(); + } + } + + local_irq_restore(flags); +} + +#ifdef CONFIG_PROC_FS +void init_irq_proc(void) +{ + struct proc_dir_entry *root_irq_dir; + + root_irq_dir = proc_mkdir("irq", NULL); + create_prof_cpu_mask(root_irq_dir); +} +#endif + +/* + * ext_int_hash[index] is the list head for all external interrupts that hash + * to this index. + */ +static struct list_head ext_int_hash[256]; + +struct ext_int_info { + ext_int_handler_t handler; + u16 code; + struct list_head entry; + struct rcu_head rcu; +}; + +/* ext_int_hash_lock protects the handler lists for external interrupts */ +DEFINE_SPINLOCK(ext_int_hash_lock); + +static void __init init_external_interrupts(void) +{ + int idx; + + for (idx = 0; idx < ARRAY_SIZE(ext_int_hash); idx++) + INIT_LIST_HEAD(&ext_int_hash[idx]); +} + +static inline int ext_hash(u16 code) +{ + return (code + (code >> 9)) & 0xff; +} + +int register_external_interrupt(u16 code, ext_int_handler_t handler) +{ + struct ext_int_info *p; + unsigned long flags; + int index; + + p = kmalloc(sizeof(*p), GFP_ATOMIC); + if (!p) + return -ENOMEM; + p->code = code; + p->handler = handler; + index = ext_hash(code); + + spin_lock_irqsave(&ext_int_hash_lock, flags); + list_add_rcu(&p->entry, &ext_int_hash[index]); + spin_unlock_irqrestore(&ext_int_hash_lock, flags); + return 0; +} +EXPORT_SYMBOL(register_external_interrupt); + +int unregister_external_interrupt(u16 code, ext_int_handler_t handler) +{ + struct ext_int_info *p; + unsigned long flags; + int index = ext_hash(code); + + spin_lock_irqsave(&ext_int_hash_lock, flags); + list_for_each_entry_rcu(p, &ext_int_hash[index], entry) { + if (p->code == code && p->handler == handler) { + list_del_rcu(&p->entry); + kfree_rcu(p, rcu); + } + } + spin_unlock_irqrestore(&ext_int_hash_lock, flags); + return 0; +} +EXPORT_SYMBOL(unregister_external_interrupt); + +void __irq_entry do_extint(struct pt_regs *regs, struct ext_code ext_code, + unsigned int param32, unsigned long param64) +{ + struct pt_regs *old_regs; + struct ext_int_info *p; + int index; + + old_regs = set_irq_regs(regs); + irq_enter(); + if (S390_lowcore.int_clock >= S390_lowcore.clock_comparator) { + /* Serve timer interrupts first. */ + clock_comparator_work(); + } + kstat_cpu(smp_processor_id()).irqs[EXTERNAL_INTERRUPT]++; + if (ext_code.code != 0x1004) + __get_cpu_var(s390_idle).nohz_delay = 1; + + index = ext_hash(ext_code.code); + rcu_read_lock(); + list_for_each_entry_rcu(p, &ext_int_hash[index], entry) + if (likely(p->code == ext_code.code)) + p->handler(ext_code, param32, param64); + rcu_read_unlock(); + irq_exit(); + set_irq_regs(old_regs); +} + +void __init init_IRQ(void) +{ + init_external_interrupts(); +} + +static DEFINE_SPINLOCK(sc_irq_lock); +static int sc_irq_refcount; + +void service_subclass_irq_register(void) +{ + spin_lock(&sc_irq_lock); + if (!sc_irq_refcount) + ctl_set_bit(0, 9); + sc_irq_refcount++; + spin_unlock(&sc_irq_lock); +} +EXPORT_SYMBOL(service_subclass_irq_register); + +void service_subclass_irq_unregister(void) +{ + spin_lock(&sc_irq_lock); + sc_irq_refcount--; + if (!sc_irq_refcount) + ctl_clear_bit(0, 9); + spin_unlock(&sc_irq_lock); +} +EXPORT_SYMBOL(service_subclass_irq_unregister); + +static DEFINE_SPINLOCK(ma_subclass_lock); +static int ma_subclass_refcount; + +void measurement_alert_subclass_register(void) +{ + spin_lock(&ma_subclass_lock); + if (!ma_subclass_refcount) + ctl_set_bit(0, 5); + ma_subclass_refcount++; + spin_unlock(&ma_subclass_lock); +} +EXPORT_SYMBOL(measurement_alert_subclass_register); + +void measurement_alert_subclass_unregister(void) +{ + spin_lock(&ma_subclass_lock); + ma_subclass_refcount--; + if (!ma_subclass_refcount) + ctl_clear_bit(0, 5); + spin_unlock(&ma_subclass_lock); +} +EXPORT_SYMBOL(measurement_alert_subclass_unregister); diff --git a/arch/s390/kernel/jump_label.c b/arch/s390/kernel/jump_label.c new file mode 100644 index 00000000..b987ab2c --- /dev/null +++ b/arch/s390/kernel/jump_label.c @@ -0,0 +1,70 @@ +/* + * Jump label s390 support + * + * Copyright IBM Corp. 2011 + * Author(s): Jan Glauber <jang@linux.vnet.ibm.com> + */ +#include <linux/module.h> +#include <linux/uaccess.h> +#include <linux/stop_machine.h> +#include <linux/jump_label.h> +#include <asm/ipl.h> + +#ifdef HAVE_JUMP_LABEL + +struct insn { + u16 opcode; + s32 offset; +} __packed; + +struct insn_args { + struct jump_entry *entry; + enum jump_label_type type; +}; + +static void __jump_label_transform(struct jump_entry *entry, + enum jump_label_type type) +{ + struct insn insn; + int rc; + + if (type == JUMP_LABEL_ENABLE) { + /* brcl 15,offset */ + insn.opcode = 0xc0f4; + insn.offset = (entry->target - entry->code) >> 1; + } else { + /* brcl 0,0 */ + insn.opcode = 0xc004; + insn.offset = 0; + } + + rc = probe_kernel_write((void *)entry->code, &insn, JUMP_LABEL_NOP_SIZE); + WARN_ON_ONCE(rc < 0); +} + +static int __sm_arch_jump_label_transform(void *data) +{ + struct insn_args *args = data; + + __jump_label_transform(args->entry, args->type); + return 0; +} + +void arch_jump_label_transform(struct jump_entry *entry, + enum jump_label_type type) +{ + struct insn_args args; + + args.entry = entry; + args.type = type; + + stop_machine(__sm_arch_jump_label_transform, &args, NULL); +} + +void arch_jump_label_transform_static(struct jump_entry *entry, + enum jump_label_type type) +{ + __jump_label_transform(entry, type); +} + +#endif diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c new file mode 100644 index 00000000..64b761ae --- /dev/null +++ b/arch/s390/kernel/kprobes.c @@ -0,0 +1,671 @@ +/* + * Kernel Probes (KProbes) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2002, 2006 + * + * s390 port, used ppc64 as template. Mike Grundy <grundym@us.ibm.com> + */ + +#include <linux/kprobes.h> +#include <linux/ptrace.h> +#include <linux/preempt.h> +#include <linux/stop_machine.h> +#include <linux/kdebug.h> +#include <linux/uaccess.h> +#include <asm/cacheflush.h> +#include <asm/sections.h> +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/hardirq.h> + +DEFINE_PER_CPU(struct kprobe *, current_kprobe); +DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); + +struct kretprobe_blackpoint kretprobe_blacklist[] = { }; + +static int __kprobes is_prohibited_opcode(kprobe_opcode_t *insn) +{ + switch (insn[0] >> 8) { + case 0x0c: /* bassm */ + case 0x0b: /* bsm */ + case 0x83: /* diag */ + case 0x44: /* ex */ + case 0xac: /* stnsm */ + case 0xad: /* stosm */ + return -EINVAL; + } + switch (insn[0]) { + case 0x0101: /* pr */ + case 0xb25a: /* bsa */ + case 0xb240: /* bakr */ + case 0xb258: /* bsg */ + case 0xb218: /* pc */ + case 0xb228: /* pt */ + case 0xb98d: /* epsw */ + return -EINVAL; + } + return 0; +} + +static int __kprobes get_fixup_type(kprobe_opcode_t *insn) +{ + /* default fixup method */ + int fixup = FIXUP_PSW_NORMAL; + + switch (insn[0] >> 8) { + case 0x05: /* balr */ + case 0x0d: /* basr */ + fixup = FIXUP_RETURN_REGISTER; + /* if r2 = 0, no branch will be taken */ + if ((insn[0] & 0x0f) == 0) + fixup |= FIXUP_BRANCH_NOT_TAKEN; + break; + case 0x06: /* bctr */ + case 0x07: /* bcr */ + fixup = FIXUP_BRANCH_NOT_TAKEN; + break; + case 0x45: /* bal */ + case 0x4d: /* bas */ + fixup = FIXUP_RETURN_REGISTER; + break; + case 0x47: /* bc */ + case 0x46: /* bct */ + case 0x86: /* bxh */ + case 0x87: /* bxle */ + fixup = FIXUP_BRANCH_NOT_TAKEN; + break; + case 0x82: /* lpsw */ + fixup = FIXUP_NOT_REQUIRED; + break; + case 0xb2: /* lpswe */ + if ((insn[0] & 0xff) == 0xb2) + fixup = FIXUP_NOT_REQUIRED; + break; + case 0xa7: /* bras */ + if ((insn[0] & 0x0f) == 0x05) + fixup |= FIXUP_RETURN_REGISTER; + break; + case 0xc0: + if ((insn[0] & 0x0f) == 0x00 || /* larl */ + (insn[0] & 0x0f) == 0x05) /* brasl */ + fixup |= FIXUP_RETURN_REGISTER; + break; + case 0xeb: + if ((insn[2] & 0xff) == 0x44 || /* bxhg */ + (insn[2] & 0xff) == 0x45) /* bxleg */ + fixup = FIXUP_BRANCH_NOT_TAKEN; + break; + case 0xe3: /* bctg */ + if ((insn[2] & 0xff) == 0x46) + fixup = FIXUP_BRANCH_NOT_TAKEN; + break; + } + return fixup; +} + +int __kprobes arch_prepare_kprobe(struct kprobe *p) +{ + if ((unsigned long) p->addr & 0x01) + return -EINVAL; + + /* Make sure the probe isn't going on a difficult instruction */ + if (is_prohibited_opcode(p->addr)) + return -EINVAL; + + p->opcode = *p->addr; + memcpy(p->ainsn.insn, p->addr, ((p->opcode >> 14) + 3) & -2); + + return 0; +} + +struct ins_replace_args { + kprobe_opcode_t *ptr; + kprobe_opcode_t opcode; +}; + +static int __kprobes swap_instruction(void *aref) +{ + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + unsigned long status = kcb->kprobe_status; + struct ins_replace_args *args = aref; + + kcb->kprobe_status = KPROBE_SWAP_INST; + probe_kernel_write(args->ptr, &args->opcode, sizeof(args->opcode)); + kcb->kprobe_status = status; + return 0; +} + +void __kprobes arch_arm_kprobe(struct kprobe *p) +{ + struct ins_replace_args args; + + args.ptr = p->addr; + args.opcode = BREAKPOINT_INSTRUCTION; + stop_machine(swap_instruction, &args, NULL); +} + +void __kprobes arch_disarm_kprobe(struct kprobe *p) +{ + struct ins_replace_args args; + + args.ptr = p->addr; + args.opcode = p->opcode; + stop_machine(swap_instruction, &args, NULL); +} + +void __kprobes arch_remove_kprobe(struct kprobe *p) +{ +} + +static void __kprobes enable_singlestep(struct kprobe_ctlblk *kcb, + struct pt_regs *regs, + unsigned long ip) +{ + struct per_regs per_kprobe; + + /* Set up the PER control registers %cr9-%cr11 */ + per_kprobe.control = PER_EVENT_IFETCH; + per_kprobe.start = ip; + per_kprobe.end = ip; + + /* Save control regs and psw mask */ + __ctl_store(kcb->kprobe_saved_ctl, 9, 11); + kcb->kprobe_saved_imask = regs->psw.mask & + (PSW_MASK_PER | PSW_MASK_IO | PSW_MASK_EXT); + + /* Set PER control regs, turns on single step for the given address */ + __ctl_load(per_kprobe, 9, 11); + regs->psw.mask |= PSW_MASK_PER; + regs->psw.mask &= ~(PSW_MASK_IO | PSW_MASK_EXT); + regs->psw.addr = ip | PSW_ADDR_AMODE; +} + +static void __kprobes disable_singlestep(struct kprobe_ctlblk *kcb, + struct pt_regs *regs, + unsigned long ip) +{ + /* Restore control regs and psw mask, set new psw address */ + __ctl_load(kcb->kprobe_saved_ctl, 9, 11); + regs->psw.mask &= ~PSW_MASK_PER; + regs->psw.mask |= kcb->kprobe_saved_imask; + regs->psw.addr = ip | PSW_ADDR_AMODE; +} + +/* + * Activate a kprobe by storing its pointer to current_kprobe. The + * previous kprobe is stored in kcb->prev_kprobe. A stack of up to + * two kprobes can be active, see KPROBE_REENTER. + */ +static void __kprobes push_kprobe(struct kprobe_ctlblk *kcb, struct kprobe *p) +{ + kcb->prev_kprobe.kp = __get_cpu_var(current_kprobe); + kcb->prev_kprobe.status = kcb->kprobe_status; + __get_cpu_var(current_kprobe) = p; +} + +/* + * Deactivate a kprobe by backing up to the previous state. If the + * current state is KPROBE_REENTER prev_kprobe.kp will be non-NULL, + * for any other state prev_kprobe.kp will be NULL. + */ +static void __kprobes pop_kprobe(struct kprobe_ctlblk *kcb) +{ + __get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp; + kcb->kprobe_status = kcb->prev_kprobe.status; +} + +void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, + struct pt_regs *regs) +{ + ri->ret_addr = (kprobe_opcode_t *) regs->gprs[14]; + + /* Replace the return addr with trampoline addr */ + regs->gprs[14] = (unsigned long) &kretprobe_trampoline; +} + +static void __kprobes kprobe_reenter_check(struct kprobe_ctlblk *kcb, + struct kprobe *p) +{ + switch (kcb->kprobe_status) { + case KPROBE_HIT_SSDONE: + case KPROBE_HIT_ACTIVE: + kprobes_inc_nmissed_count(p); + break; + case KPROBE_HIT_SS: + case KPROBE_REENTER: + default: + /* + * A kprobe on the code path to single step an instruction + * is a BUG. The code path resides in the .kprobes.text + * section and is executed with interrupts disabled. + */ + printk(KERN_EMERG "Invalid kprobe detected at %p.\n", p->addr); + dump_kprobe(p); + BUG(); + } +} + +static int __kprobes kprobe_handler(struct pt_regs *regs) +{ + struct kprobe_ctlblk *kcb; + struct kprobe *p; + + /* + * We want to disable preemption for the entire duration of kprobe + * processing. That includes the calls to the pre/post handlers + * and single stepping the kprobe instruction. + */ + preempt_disable(); + kcb = get_kprobe_ctlblk(); + p = get_kprobe((void *)((regs->psw.addr & PSW_ADDR_INSN) - 2)); + + if (p) { + if (kprobe_running()) { + /* + * We have hit a kprobe while another is still + * active. This can happen in the pre and post + * handler. Single step the instruction of the + * new probe but do not call any handler function + * of this secondary kprobe. + * push_kprobe and pop_kprobe saves and restores + * the currently active kprobe. + */ + kprobe_reenter_check(kcb, p); + push_kprobe(kcb, p); + kcb->kprobe_status = KPROBE_REENTER; + } else { + /* + * If we have no pre-handler or it returned 0, we + * continue with single stepping. If we have a + * pre-handler and it returned non-zero, it prepped + * for calling the break_handler below on re-entry + * for jprobe processing, so get out doing nothing + * more here. + */ + push_kprobe(kcb, p); + kcb->kprobe_status = KPROBE_HIT_ACTIVE; + if (p->pre_handler && p->pre_handler(p, regs)) + return 1; + kcb->kprobe_status = KPROBE_HIT_SS; + } + enable_singlestep(kcb, regs, (unsigned long) p->ainsn.insn); + return 1; + } else if (kprobe_running()) { + p = __get_cpu_var(current_kprobe); + if (p->break_handler && p->break_handler(p, regs)) { + /* + * Continuation after the jprobe completed and + * caused the jprobe_return trap. The jprobe + * break_handler "returns" to the original + * function that still has the kprobe breakpoint + * installed. We continue with single stepping. + */ + kcb->kprobe_status = KPROBE_HIT_SS; + enable_singlestep(kcb, regs, + (unsigned long) p->ainsn.insn); + return 1; + } /* else: + * No kprobe at this address and the current kprobe + * has no break handler (no jprobe!). The kernel just + * exploded, let the standard trap handler pick up the + * pieces. + */ + } /* else: + * No kprobe at this address and no active kprobe. The trap has + * not been caused by a kprobe breakpoint. The race of breakpoint + * vs. kprobe remove does not exist because on s390 as we use + * stop_machine to arm/disarm the breakpoints. + */ + preempt_enable_no_resched(); + return 0; +} + +/* + * Function return probe trampoline: + * - init_kprobes() establishes a probepoint here + * - When the probed function returns, this probe + * causes the handlers to fire + */ +static void __used kretprobe_trampoline_holder(void) +{ + asm volatile(".global kretprobe_trampoline\n" + "kretprobe_trampoline: bcr 0,0\n"); +} + +/* + * Called when the probe at kretprobe trampoline is hit + */ +static int __kprobes trampoline_probe_handler(struct kprobe *p, + struct pt_regs *regs) +{ + struct kretprobe_instance *ri; + struct hlist_head *head, empty_rp; + struct hlist_node *node, *tmp; + unsigned long flags, orig_ret_address; + unsigned long trampoline_address; + kprobe_opcode_t *correct_ret_addr; + + INIT_HLIST_HEAD(&empty_rp); + kretprobe_hash_lock(current, &head, &flags); + + /* + * It is possible to have multiple instances associated with a given + * task either because an multiple functions in the call path + * have a return probe installed on them, and/or more than one return + * return probe was registered for a target function. + * + * We can handle this because: + * - instances are always inserted at the head of the list + * - when multiple return probes are registered for the same + * function, the first instance's ret_addr will point to the + * real return address, and all the rest will point to + * kretprobe_trampoline + */ + ri = NULL; + orig_ret_address = 0; + correct_ret_addr = NULL; + trampoline_address = (unsigned long) &kretprobe_trampoline; + hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { + if (ri->task != current) + /* another task is sharing our hash bucket */ + continue; + + orig_ret_address = (unsigned long) ri->ret_addr; + + if (orig_ret_address != trampoline_address) + /* + * This is the real return address. Any other + * instances associated with this task are for + * other calls deeper on the call stack + */ + break; + } + + kretprobe_assert(ri, orig_ret_address, trampoline_address); + + correct_ret_addr = ri->ret_addr; + hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { + if (ri->task != current) + /* another task is sharing our hash bucket */ + continue; + + orig_ret_address = (unsigned long) ri->ret_addr; + + if (ri->rp && ri->rp->handler) { + ri->ret_addr = correct_ret_addr; + ri->rp->handler(ri, regs); + } + + recycle_rp_inst(ri, &empty_rp); + + if (orig_ret_address != trampoline_address) + /* + * This is the real return address. Any other + * instances associated with this task are for + * other calls deeper on the call stack + */ + break; + } + + regs->psw.addr = orig_ret_address | PSW_ADDR_AMODE; + + pop_kprobe(get_kprobe_ctlblk()); + kretprobe_hash_unlock(current, &flags); + preempt_enable_no_resched(); + + hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { + hlist_del(&ri->hlist); + kfree(ri); + } + /* + * By returning a non-zero value, we are telling + * kprobe_handler() that we don't want the post_handler + * to run (and have re-enabled preemption) + */ + return 1; +} + +/* + * Called after single-stepping. p->addr is the address of the + * instruction whose first byte has been replaced by the "breakpoint" + * instruction. To avoid the SMP problems that can occur when we + * temporarily put back the original opcode to single-step, we + * single-stepped a copy of the instruction. The address of this + * copy is p->ainsn.insn. + */ +static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs) +{ + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + unsigned long ip = regs->psw.addr & PSW_ADDR_INSN; + int fixup = get_fixup_type(p->ainsn.insn); + + if (fixup & FIXUP_PSW_NORMAL) + ip += (unsigned long) p->addr - (unsigned long) p->ainsn.insn; + + if (fixup & FIXUP_BRANCH_NOT_TAKEN) { + int ilen = ((p->ainsn.insn[0] >> 14) + 3) & -2; + if (ip - (unsigned long) p->ainsn.insn == ilen) + ip = (unsigned long) p->addr + ilen; + } + + if (fixup & FIXUP_RETURN_REGISTER) { + int reg = (p->ainsn.insn[0] & 0xf0) >> 4; + regs->gprs[reg] += (unsigned long) p->addr - + (unsigned long) p->ainsn.insn; + } + + disable_singlestep(kcb, regs, ip); +} + +static int __kprobes post_kprobe_handler(struct pt_regs *regs) +{ + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + struct kprobe *p = kprobe_running(); + + if (!p) + return 0; + + if (kcb->kprobe_status != KPROBE_REENTER && p->post_handler) { + kcb->kprobe_status = KPROBE_HIT_SSDONE; + p->post_handler(p, regs, 0); + } + + resume_execution(p, regs); + pop_kprobe(kcb); + preempt_enable_no_resched(); + + /* + * if somebody else is singlestepping across a probe point, psw mask + * will have PER set, in which case, continue the remaining processing + * of do_single_step, as if this is not a probe hit. + */ + if (regs->psw.mask & PSW_MASK_PER) + return 0; + + return 1; +} + +static int __kprobes kprobe_trap_handler(struct pt_regs *regs, int trapnr) +{ + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + struct kprobe *p = kprobe_running(); + const struct exception_table_entry *entry; + + switch(kcb->kprobe_status) { + case KPROBE_SWAP_INST: + /* We are here because the instruction replacement failed */ + return 0; + case KPROBE_HIT_SS: + case KPROBE_REENTER: + /* + * We are here because the instruction being single + * stepped caused a page fault. We reset the current + * kprobe and the nip points back to the probe address + * and allow the page fault handler to continue as a + * normal page fault. + */ + disable_singlestep(kcb, regs, (unsigned long) p->addr); + pop_kprobe(kcb); + preempt_enable_no_resched(); + break; + case KPROBE_HIT_ACTIVE: + case KPROBE_HIT_SSDONE: + /* + * We increment the nmissed count for accounting, + * we can also use npre/npostfault count for accouting + * these specific fault cases. + */ + kprobes_inc_nmissed_count(p); + + /* + * We come here because instructions in the pre/post + * handler caused the page_fault, this could happen + * if handler tries to access user space by + * copy_from_user(), get_user() etc. Let the + * user-specified handler try to fix it first. + */ + if (p->fault_handler && p->fault_handler(p, regs, trapnr)) + return 1; + + /* + * In case the user-specified fault handler returned + * zero, try to fix up. + */ + entry = search_exception_tables(regs->psw.addr & PSW_ADDR_INSN); + if (entry) { + regs->psw.addr = entry->fixup | PSW_ADDR_AMODE; + return 1; + } + + /* + * fixup_exception() could not handle it, + * Let do_page_fault() fix it. + */ + break; + default: + break; + } + return 0; +} + +int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) +{ + int ret; + + if (regs->psw.mask & (PSW_MASK_IO | PSW_MASK_EXT)) + local_irq_disable(); + ret = kprobe_trap_handler(regs, trapnr); + if (regs->psw.mask & (PSW_MASK_IO | PSW_MASK_EXT)) + local_irq_restore(regs->psw.mask & ~PSW_MASK_PER); + return ret; +} + +/* + * Wrapper routine to for handling exceptions. + */ +int __kprobes kprobe_exceptions_notify(struct notifier_block *self, + unsigned long val, void *data) +{ + struct die_args *args = (struct die_args *) data; + struct pt_regs *regs = args->regs; + int ret = NOTIFY_DONE; + + if (regs->psw.mask & (PSW_MASK_IO | PSW_MASK_EXT)) + local_irq_disable(); + + switch (val) { + case DIE_BPT: + if (kprobe_handler(regs)) + ret = NOTIFY_STOP; + break; + case DIE_SSTEP: + if (post_kprobe_handler(regs)) + ret = NOTIFY_STOP; + break; + case DIE_TRAP: + if (!preemptible() && kprobe_running() && + kprobe_trap_handler(regs, args->trapnr)) + ret = NOTIFY_STOP; + break; + default: + break; + } + + if (regs->psw.mask & (PSW_MASK_IO | PSW_MASK_EXT)) + local_irq_restore(regs->psw.mask & ~PSW_MASK_PER); + + return ret; +} + +int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) +{ + struct jprobe *jp = container_of(p, struct jprobe, kp); + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + unsigned long stack; + + memcpy(&kcb->jprobe_saved_regs, regs, sizeof(struct pt_regs)); + + /* setup return addr to the jprobe handler routine */ + regs->psw.addr = (unsigned long) jp->entry | PSW_ADDR_AMODE; + regs->psw.mask &= ~(PSW_MASK_IO | PSW_MASK_EXT); + + /* r15 is the stack pointer */ + stack = (unsigned long) regs->gprs[15]; + + memcpy(kcb->jprobes_stack, (void *) stack, MIN_STACK_SIZE(stack)); + return 1; +} + +void __kprobes jprobe_return(void) +{ + asm volatile(".word 0x0002"); +} + +static void __used __kprobes jprobe_return_end(void) +{ + asm volatile("bcr 0,0"); +} + +int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) +{ + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + unsigned long stack; + + stack = (unsigned long) kcb->jprobe_saved_regs.gprs[15]; + + /* Put the regs back */ + memcpy(regs, &kcb->jprobe_saved_regs, sizeof(struct pt_regs)); + /* put the stack back */ + memcpy((void *) stack, kcb->jprobes_stack, MIN_STACK_SIZE(stack)); + preempt_enable_no_resched(); + return 1; +} + +static struct kprobe trampoline = { + .addr = (kprobe_opcode_t *) &kretprobe_trampoline, + .pre_handler = trampoline_probe_handler +}; + +int __init arch_init_kprobes(void) +{ + return register_kprobe(&trampoline); +} + +int __kprobes arch_trampoline_kprobe(struct kprobe *p) +{ + return p->addr == (kprobe_opcode_t *) &kretprobe_trampoline; +} diff --git a/arch/s390/kernel/lgr.c b/arch/s390/kernel/lgr.c new file mode 100644 index 00000000..87f080b1 --- /dev/null +++ b/arch/s390/kernel/lgr.c @@ -0,0 +1,200 @@ +/* + * Linux Guest Relocation (LGR) detection + * + * Copyright IBM Corp. 2012 + * Author(s): Michael Holzheu <holzheu@linux.vnet.ibm.com> + */ + +#include <linux/module.h> +#include <linux/timer.h> +#include <linux/slab.h> +#include <asm/facility.h> +#include <asm/sysinfo.h> +#include <asm/ebcdic.h> +#include <asm/debug.h> +#include <asm/ipl.h> + +#define LGR_TIMER_INTERVAL_SECS (30 * 60) +#define VM_LEVEL_MAX 2 /* Maximum is 8, but we only record two levels */ + +/* + * LGR info: Contains stfle and stsi data + */ +struct lgr_info { + /* Bit field with facility information: 4 DWORDs are stored */ + u64 stfle_fac_list[4]; + /* Level of system (1 = CEC, 2 = LPAR, 3 = z/VM */ + u32 level; + /* Level 1: CEC info (stsi 1.1.1) */ + char manufacturer[16]; + char type[4]; + char sequence[16]; + char plant[4]; + char model[16]; + /* Level 2: LPAR info (stsi 2.2.2) */ + u16 lpar_number; + char name[8]; + /* Level 3: VM info (stsi 3.2.2) */ + u8 vm_count; + struct { + char name[8]; + char cpi[16]; + } vm[VM_LEVEL_MAX]; +} __packed __aligned(8); + +/* + * LGR globals + */ +static void *lgr_page; +static struct lgr_info lgr_info_last; +static struct lgr_info lgr_info_cur; +static struct debug_info *lgr_dbf; + +/* + * Return number of valid stsi levels + */ +static inline int stsi_0(void) +{ + int rc = stsi(NULL, 0, 0, 0); + + return rc == -ENOSYS ? rc : (((unsigned int) rc) >> 28); +} + +/* + * Copy buffer and then convert it to ASCII + */ +static void cpascii(char *dst, char *src, int size) +{ + memcpy(dst, src, size); + EBCASC(dst, size); +} + +/* + * Fill LGR info with 1.1.1 stsi data + */ +static void lgr_stsi_1_1_1(struct lgr_info *lgr_info) +{ + struct sysinfo_1_1_1 *si = lgr_page; + + if (stsi(si, 1, 1, 1) == -ENOSYS) + return; + cpascii(lgr_info->manufacturer, si->manufacturer, + sizeof(si->manufacturer)); + cpascii(lgr_info->type, si->type, sizeof(si->type)); + cpascii(lgr_info->model, si->model, sizeof(si->model)); + cpascii(lgr_info->sequence, si->sequence, sizeof(si->sequence)); + cpascii(lgr_info->plant, si->plant, sizeof(si->plant)); +} + +/* + * Fill LGR info with 2.2.2 stsi data + */ +static void lgr_stsi_2_2_2(struct lgr_info *lgr_info) +{ + struct sysinfo_2_2_2 *si = lgr_page; + + if (stsi(si, 2, 2, 2) == -ENOSYS) + return; + cpascii(lgr_info->name, si->name, sizeof(si->name)); + memcpy(&lgr_info->lpar_number, &si->lpar_number, + sizeof(lgr_info->lpar_number)); +} + +/* + * Fill LGR info with 3.2.2 stsi data + */ +static void lgr_stsi_3_2_2(struct lgr_info *lgr_info) +{ + struct sysinfo_3_2_2 *si = lgr_page; + int i; + + if (stsi(si, 3, 2, 2) == -ENOSYS) + return; + for (i = 0; i < min_t(u8, si->count, VM_LEVEL_MAX); i++) { + cpascii(lgr_info->vm[i].name, si->vm[i].name, + sizeof(si->vm[i].name)); + cpascii(lgr_info->vm[i].cpi, si->vm[i].cpi, + sizeof(si->vm[i].cpi)); + } + lgr_info->vm_count = si->count; +} + +/* + * Fill LGR info with current data + */ +static void lgr_info_get(struct lgr_info *lgr_info) +{ + memset(lgr_info, 0, sizeof(*lgr_info)); + stfle(lgr_info->stfle_fac_list, ARRAY_SIZE(lgr_info->stfle_fac_list)); + lgr_info->level = stsi_0(); + if (lgr_info->level == -ENOSYS) + return; + if (lgr_info->level >= 1) + lgr_stsi_1_1_1(lgr_info); + if (lgr_info->level >= 2) + lgr_stsi_2_2_2(lgr_info); + if (lgr_info->level >= 3) + lgr_stsi_3_2_2(lgr_info); +} + +/* + * Check if LGR info has changed and if yes log new LGR info to s390dbf + */ +void lgr_info_log(void) +{ + static DEFINE_SPINLOCK(lgr_info_lock); + unsigned long flags; + + if (!spin_trylock_irqsave(&lgr_info_lock, flags)) + return; + lgr_info_get(&lgr_info_cur); + if (memcmp(&lgr_info_last, &lgr_info_cur, sizeof(lgr_info_cur)) != 0) { + debug_event(lgr_dbf, 1, &lgr_info_cur, sizeof(lgr_info_cur)); + lgr_info_last = lgr_info_cur; + } + spin_unlock_irqrestore(&lgr_info_lock, flags); +} +EXPORT_SYMBOL_GPL(lgr_info_log); + +static void lgr_timer_set(void); + +/* + * LGR timer callback + */ +static void lgr_timer_fn(unsigned long ignored) +{ + lgr_info_log(); + lgr_timer_set(); +} + +static struct timer_list lgr_timer = + TIMER_DEFERRED_INITIALIZER(lgr_timer_fn, 0, 0); + +/* + * Setup next LGR timer + */ +static void lgr_timer_set(void) +{ + mod_timer(&lgr_timer, jiffies + LGR_TIMER_INTERVAL_SECS * HZ); +} + +/* + * Initialize LGR: Add s390dbf, write initial lgr_info and setup timer + */ +static int __init lgr_init(void) +{ + lgr_page = (void *) __get_free_pages(GFP_KERNEL, 0); + if (!lgr_page) + return -ENOMEM; + lgr_dbf = debug_register("lgr", 1, 1, sizeof(struct lgr_info)); + if (!lgr_dbf) { + free_page((unsigned long) lgr_page); + return -ENOMEM; + } + debug_register_view(lgr_dbf, &debug_hex_ascii_view); + lgr_info_get(&lgr_info_last); + debug_event(lgr_dbf, 1, &lgr_info_last, sizeof(lgr_info_last)); + lgr_timer_set(); + return 0; +} +module_init(lgr_init); diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c new file mode 100644 index 00000000..bdad47d5 --- /dev/null +++ b/arch/s390/kernel/machine_kexec.c @@ -0,0 +1,234 @@ +/* + * arch/s390/kernel/machine_kexec.c + * + * Copyright IBM Corp. 2005,2011 + * + * Author(s): Rolf Adelsberger, + * Heiko Carstens <heiko.carstens@de.ibm.com> + * Michael Holzheu <holzheu@linux.vnet.ibm.com> + */ + +#include <linux/device.h> +#include <linux/mm.h> +#include <linux/kexec.h> +#include <linux/delay.h> +#include <linux/reboot.h> +#include <linux/ftrace.h> +#include <linux/debug_locks.h> +#include <asm/cio.h> +#include <asm/setup.h> +#include <asm/pgtable.h> +#include <asm/pgalloc.h> +#include <asm/smp.h> +#include <asm/reset.h> +#include <asm/ipl.h> +#include <asm/diag.h> +#include <asm/asm-offsets.h> + +typedef void (*relocate_kernel_t)(kimage_entry_t *, unsigned long); + +extern const unsigned char relocate_kernel[]; +extern const unsigned long long relocate_kernel_len; + +#ifdef CONFIG_CRASH_DUMP + +void *fill_cpu_elf_notes(void *ptr, struct save_area *sa); + +/* + * Create ELF notes for one CPU + */ +static void add_elf_notes(int cpu) +{ + struct save_area *sa = (void *) 4608 + store_prefix(); + void *ptr; + + memcpy((void *) (4608UL + sa->pref_reg), sa, sizeof(*sa)); + ptr = (u64 *) per_cpu_ptr(crash_notes, cpu); + ptr = fill_cpu_elf_notes(ptr, sa); + memset(ptr, 0, sizeof(struct elf_note)); +} + +/* + * Initialize CPU ELF notes + */ +void setup_regs(void) +{ + unsigned long sa = S390_lowcore.prefixreg_save_area + SAVE_AREA_BASE; + int cpu, this_cpu; + + this_cpu = smp_find_processor_id(stap()); + add_elf_notes(this_cpu); + for_each_online_cpu(cpu) { + if (cpu == this_cpu) + continue; + if (smp_store_status(cpu)) + continue; + add_elf_notes(cpu); + } + /* Copy dump CPU store status info to absolute zero */ + memcpy((void *) SAVE_AREA_BASE, (void *) sa, sizeof(struct save_area)); +} + +#endif + +/* + * Start kdump: We expect here that a store status has been done on our CPU + */ +static void __do_machine_kdump(void *image) +{ +#ifdef CONFIG_CRASH_DUMP + int (*start_kdump)(int) = (void *)((struct kimage *) image)->start; + + __load_psw_mask(PSW_MASK_BASE | PSW_DEFAULT_KEY | PSW_MASK_EA | PSW_MASK_BA); + setup_regs(); + start_kdump(1); +#endif +} + +/* + * Check if kdump checksums are valid: We call purgatory with parameter "0" + */ +static int kdump_csum_valid(struct kimage *image) +{ +#ifdef CONFIG_CRASH_DUMP + int (*start_kdump)(int) = (void *)image->start; + int rc; + + __arch_local_irq_stnsm(0xfb); /* disable DAT */ + rc = start_kdump(0); + __arch_local_irq_stosm(0x04); /* enable DAT */ + return rc ? 0 : -EINVAL; +#else + return -EINVAL; +#endif +} + +/* + * Map or unmap crashkernel memory + */ +static void crash_map_pages(int enable) +{ + unsigned long size = resource_size(&crashk_res); + + BUG_ON(crashk_res.start % KEXEC_CRASH_MEM_ALIGN || + size % KEXEC_CRASH_MEM_ALIGN); + if (enable) + vmem_add_mapping(crashk_res.start, size); + else + vmem_remove_mapping(crashk_res.start, size); +} + +/* + * Map crashkernel memory + */ +void crash_map_reserved_pages(void) +{ + crash_map_pages(1); +} + +/* + * Unmap crashkernel memory + */ +void crash_unmap_reserved_pages(void) +{ + crash_map_pages(0); +} + +/* + * Give back memory to hypervisor before new kdump is loaded + */ +static int machine_kexec_prepare_kdump(void) +{ +#ifdef CONFIG_CRASH_DUMP + if (MACHINE_IS_VM) + diag10_range(PFN_DOWN(crashk_res.start), + PFN_DOWN(crashk_res.end - crashk_res.start + 1)); + return 0; +#else + return -EINVAL; +#endif +} + +int machine_kexec_prepare(struct kimage *image) +{ + void *reboot_code_buffer; + + /* Can't replace kernel image since it is read-only. */ + if (ipl_flags & IPL_NSS_VALID) + return -ENOSYS; + + if (image->type == KEXEC_TYPE_CRASH) + return machine_kexec_prepare_kdump(); + + /* We don't support anything but the default image type for now. */ + if (image->type != KEXEC_TYPE_DEFAULT) + return -EINVAL; + + /* Get the destination where the assembler code should be copied to.*/ + reboot_code_buffer = (void *) page_to_phys(image->control_code_page); + + /* Then copy it */ + memcpy(reboot_code_buffer, relocate_kernel, relocate_kernel_len); + return 0; +} + +void machine_kexec_cleanup(struct kimage *image) +{ +} + +void arch_crash_save_vmcoreinfo(void) +{ + VMCOREINFO_SYMBOL(lowcore_ptr); + VMCOREINFO_SYMBOL(high_memory); + VMCOREINFO_LENGTH(lowcore_ptr, NR_CPUS); +} + +void machine_shutdown(void) +{ +} + +/* + * Do normal kexec + */ +static void __do_machine_kexec(void *data) +{ + relocate_kernel_t data_mover; + struct kimage *image = data; + + data_mover = (relocate_kernel_t) page_to_phys(image->control_code_page); + + /* Call the moving routine */ + (*data_mover)(&image->head, image->start); +} + +/* + * Reset system and call either kdump or normal kexec + */ +static void __machine_kexec(void *data) +{ + struct kimage *image = data; + + pfault_fini(); + tracing_off(); + debug_locks_off(); + if (image->type == KEXEC_TYPE_CRASH) { + lgr_info_log(); + s390_reset_system(__do_machine_kdump, data); + } else { + s390_reset_system(__do_machine_kexec, data); + } + disabled_wait((unsigned long) __builtin_return_address(0)); +} + +/* + * Do either kdump or normal kexec. In case of kdump we first ask + * purgatory, if kdump checksums are valid. + */ +void machine_kexec(struct kimage *image) +{ + if (image->type == KEXEC_TYPE_CRASH && !kdump_csum_valid(image)) + return; + tracer_disable(); + smp_send_stop(); + smp_call_ipl_cpu(__machine_kexec, image); +} diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S new file mode 100644 index 00000000..7e2c38ba --- /dev/null +++ b/arch/s390/kernel/mcount.S @@ -0,0 +1,71 @@ +/* + * Copyright IBM Corp. 2008,2009 + * + * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>, + * + */ + +#include <linux/linkage.h> +#include <asm/asm-offsets.h> + + .section .kprobes.text, "ax" + +ENTRY(ftrace_stub) + br %r14 + +ENTRY(_mcount) +#ifdef CONFIG_DYNAMIC_FTRACE + br %r14 + +ENTRY(ftrace_caller) +#endif + stm %r2,%r5,16(%r15) + bras %r1,2f +0: .long ftrace_trace_function +1: .long function_trace_stop +2: l %r2,1b-0b(%r1) + icm %r2,0xf,0(%r2) + jnz 3f + st %r14,56(%r15) + lr %r0,%r15 + ahi %r15,-96 + l %r3,100(%r15) + la %r2,0(%r14) + st %r0,__SF_BACKCHAIN(%r15) + la %r3,0(%r3) + l %r14,0b-0b(%r1) + l %r14,0(%r14) + basr %r14,%r14 +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + l %r2,100(%r15) + l %r3,152(%r15) +ENTRY(ftrace_graph_caller) +# The bras instruction gets runtime patched to call prepare_ftrace_return. +# See ftrace_enable_ftrace_graph_caller. The patched instruction is: +# bras %r14,prepare_ftrace_return + bras %r14,0f +0: st %r2,100(%r15) +#endif + ahi %r15,96 + l %r14,56(%r15) +3: lm %r2,%r5,16(%r15) + br %r14 + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + +ENTRY(return_to_handler) + stm %r2,%r5,16(%r15) + st %r14,56(%r15) + lr %r0,%r15 + ahi %r15,-96 + st %r0,__SF_BACKCHAIN(%r15) + bras %r1,0f + .long ftrace_return_to_handler +0: l %r2,0b-0b(%r1) + basr %r14,%r2 + lr %r14,%r2 + ahi %r15,96 + lm %r2,%r5,16(%r15) + br %r14 + +#endif diff --git a/arch/s390/kernel/mcount64.S b/arch/s390/kernel/mcount64.S new file mode 100644 index 00000000..f70cadec --- /dev/null +++ b/arch/s390/kernel/mcount64.S @@ -0,0 +1,63 @@ +/* + * Copyright IBM Corp. 2008,2009 + * + * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>, + * + */ + +#include <linux/linkage.h> +#include <asm/asm-offsets.h> + + .section .kprobes.text, "ax" + +ENTRY(ftrace_stub) + br %r14 + +ENTRY(_mcount) +#ifdef CONFIG_DYNAMIC_FTRACE + br %r14 + +ENTRY(ftrace_caller) +#endif + larl %r1,function_trace_stop + icm %r1,0xf,0(%r1) + bnzr %r14 + stmg %r2,%r5,32(%r15) + stg %r14,112(%r15) + lgr %r1,%r15 + aghi %r15,-160 + stg %r1,__SF_BACKCHAIN(%r15) + lgr %r2,%r14 + lg %r3,168(%r15) + larl %r14,ftrace_trace_function + lg %r14,0(%r14) + basr %r14,%r14 +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + lg %r2,168(%r15) + lg %r3,272(%r15) +ENTRY(ftrace_graph_caller) +# The bras instruction gets runtime patched to call prepare_ftrace_return. +# See ftrace_enable_ftrace_graph_caller. The patched instruction is: +# bras %r14,prepare_ftrace_return + bras %r14,0f +0: stg %r2,168(%r15) +#endif + aghi %r15,160 + lmg %r2,%r5,32(%r15) + lg %r14,112(%r15) + br %r14 + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + +ENTRY(return_to_handler) + stmg %r2,%r5,32(%r15) + lgr %r1,%r15 + aghi %r15,-160 + stg %r1,__SF_BACKCHAIN(%r15) + brasl %r14,ftrace_return_to_handler + aghi %r15,160 + lgr %r14,%r2 + lmg %r2,%r5,32(%r15) + br %r14 + +#endif diff --git a/arch/s390/kernel/mem_detect.c b/arch/s390/kernel/mem_detect.c new file mode 100644 index 00000000..22d502e8 --- /dev/null +++ b/arch/s390/kernel/mem_detect.c @@ -0,0 +1,145 @@ +/* + * Copyright IBM Corp. 2008, 2009 + * + * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com> + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <asm/ipl.h> +#include <asm/sclp.h> +#include <asm/setup.h> + +#define ADDR2G (1ULL << 31) + +static void find_memory_chunks(struct mem_chunk chunk[]) +{ + unsigned long long memsize, rnmax, rzm; + unsigned long addr = 0, size; + int i = 0, type; + + rzm = sclp_get_rzm(); + rnmax = sclp_get_rnmax(); + memsize = rzm * rnmax; + if (!rzm) + rzm = 1ULL << 17; + if (sizeof(long) == 4) { + rzm = min(ADDR2G, rzm); + memsize = memsize ? min(ADDR2G, memsize) : ADDR2G; + } + do { + size = 0; + type = tprot(addr); + do { + size += rzm; + if (memsize && addr + size >= memsize) + break; + } while (type == tprot(addr + size)); + if (type == CHUNK_READ_WRITE || type == CHUNK_READ_ONLY) { + chunk[i].addr = addr; + chunk[i].size = size; + chunk[i].type = type; + i++; + } + addr += size; + } while (addr < memsize && i < MEMORY_CHUNKS); +} + +void detect_memory_layout(struct mem_chunk chunk[]) +{ + unsigned long flags, cr0; + + memset(chunk, 0, MEMORY_CHUNKS * sizeof(struct mem_chunk)); + /* Disable IRQs, DAT and low address protection so tprot does the + * right thing and we don't get scheduled away with low address + * protection disabled. + */ + flags = __arch_local_irq_stnsm(0xf8); + __ctl_store(cr0, 0, 0); + __ctl_clear_bit(0, 28); + find_memory_chunks(chunk); + __ctl_load(cr0, 0, 0); + arch_local_irq_restore(flags); +} +EXPORT_SYMBOL(detect_memory_layout); + +/* + * Move memory chunks array from index "from" to index "to" + */ +static void mem_chunk_move(struct mem_chunk chunk[], int to, int from) +{ + int cnt = MEMORY_CHUNKS - to; + + memmove(&chunk[to], &chunk[from], cnt * sizeof(struct mem_chunk)); +} + +/* + * Initialize memory chunk + */ +static void mem_chunk_init(struct mem_chunk *chunk, unsigned long addr, + unsigned long size, int type) +{ + chunk->type = type; + chunk->addr = addr; + chunk->size = size; +} + +/* + * Create memory hole with given address, size, and type + */ +void create_mem_hole(struct mem_chunk chunk[], unsigned long addr, + unsigned long size, int type) +{ + unsigned long lh_start, lh_end, lh_size, ch_start, ch_end, ch_size; + int i, ch_type; + + for (i = 0; i < MEMORY_CHUNKS; i++) { + if (chunk[i].size == 0) + continue; + + /* Define chunk properties */ + ch_start = chunk[i].addr; + ch_size = chunk[i].size; + ch_end = ch_start + ch_size - 1; + ch_type = chunk[i].type; + + /* Is memory chunk hit by memory hole? */ + if (addr + size <= ch_start) + continue; /* No: memory hole in front of chunk */ + if (addr > ch_end) + continue; /* No: memory hole after chunk */ + + /* Yes: Define local hole properties */ + lh_start = max(addr, chunk[i].addr); + lh_end = min(addr + size - 1, ch_end); + lh_size = lh_end - lh_start + 1; + + if (lh_start == ch_start && lh_end == ch_end) { + /* Hole covers complete memory chunk */ + mem_chunk_init(&chunk[i], lh_start, lh_size, type); + } else if (lh_end == ch_end) { + /* Hole starts in memory chunk and convers chunk end */ + mem_chunk_move(chunk, i + 1, i); + mem_chunk_init(&chunk[i], ch_start, ch_size - lh_size, + ch_type); + mem_chunk_init(&chunk[i + 1], lh_start, lh_size, type); + i += 1; + } else if (lh_start == ch_start) { + /* Hole ends in memory chunk */ + mem_chunk_move(chunk, i + 1, i); + mem_chunk_init(&chunk[i], lh_start, lh_size, type); + mem_chunk_init(&chunk[i + 1], lh_end + 1, + ch_size - lh_size, ch_type); + break; + } else { + /* Hole splits memory chunk */ + mem_chunk_move(chunk, i + 2, i); + mem_chunk_init(&chunk[i], ch_start, + lh_start - ch_start, ch_type); + mem_chunk_init(&chunk[i + 1], lh_start, lh_size, type); + mem_chunk_init(&chunk[i + 2], lh_end + 1, + ch_end - lh_end, ch_type); + break; + } + } +} diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c new file mode 100644 index 00000000..dfcb3436 --- /dev/null +++ b/arch/s390/kernel/module.c @@ -0,0 +1,395 @@ +/* + * arch/s390/kernel/module.c - Kernel module help for s390. + * + * S390 version + * Copyright (C) 2002, 2003 IBM Deutschland Entwicklung GmbH, + * IBM Corporation + * Author(s): Arnd Bergmann (arndb@de.ibm.com) + * Martin Schwidefsky (schwidefsky@de.ibm.com) + * + * based on i386 version + * Copyright (C) 2001 Rusty Russell. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#include <linux/module.h> +#include <linux/elf.h> +#include <linux/vmalloc.h> +#include <linux/fs.h> +#include <linux/string.h> +#include <linux/kernel.h> +#include <linux/moduleloader.h> +#include <linux/bug.h> + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(fmt , ...) +#endif + +#ifndef CONFIG_64BIT +#define PLT_ENTRY_SIZE 12 +#else /* CONFIG_64BIT */ +#define PLT_ENTRY_SIZE 20 +#endif /* CONFIG_64BIT */ + +/* Free memory returned from module_alloc */ +void module_free(struct module *mod, void *module_region) +{ + if (mod) { + vfree(mod->arch.syminfo); + mod->arch.syminfo = NULL; + } + vfree(module_region); +} + +static void +check_rela(Elf_Rela *rela, struct module *me) +{ + struct mod_arch_syminfo *info; + + info = me->arch.syminfo + ELF_R_SYM (rela->r_info); + switch (ELF_R_TYPE (rela->r_info)) { + case R_390_GOT12: /* 12 bit GOT offset. */ + case R_390_GOT16: /* 16 bit GOT offset. */ + case R_390_GOT20: /* 20 bit GOT offset. */ + case R_390_GOT32: /* 32 bit GOT offset. */ + case R_390_GOT64: /* 64 bit GOT offset. */ + case R_390_GOTENT: /* 32 bit PC rel. to GOT entry shifted by 1. */ + case R_390_GOTPLT12: /* 12 bit offset to jump slot. */ + case R_390_GOTPLT16: /* 16 bit offset to jump slot. */ + case R_390_GOTPLT20: /* 20 bit offset to jump slot. */ + case R_390_GOTPLT32: /* 32 bit offset to jump slot. */ + case R_390_GOTPLT64: /* 64 bit offset to jump slot. */ + case R_390_GOTPLTENT: /* 32 bit rel. offset to jump slot >> 1. */ + if (info->got_offset == -1UL) { + info->got_offset = me->arch.got_size; + me->arch.got_size += sizeof(void*); + } + break; + case R_390_PLT16DBL: /* 16 bit PC rel. PLT shifted by 1. */ + case R_390_PLT32DBL: /* 32 bit PC rel. PLT shifted by 1. */ + case R_390_PLT32: /* 32 bit PC relative PLT address. */ + case R_390_PLT64: /* 64 bit PC relative PLT address. */ + case R_390_PLTOFF16: /* 16 bit offset from GOT to PLT. */ + case R_390_PLTOFF32: /* 32 bit offset from GOT to PLT. */ + case R_390_PLTOFF64: /* 16 bit offset from GOT to PLT. */ + if (info->plt_offset == -1UL) { + info->plt_offset = me->arch.plt_size; + me->arch.plt_size += PLT_ENTRY_SIZE; + } + break; + case R_390_COPY: + case R_390_GLOB_DAT: + case R_390_JMP_SLOT: + case R_390_RELATIVE: + /* Only needed if we want to support loading of + modules linked with -shared. */ + break; + } +} + +/* + * Account for GOT and PLT relocations. We can't add sections for + * got and plt but we can increase the core module size. + */ +int +module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs, + char *secstrings, struct module *me) +{ + Elf_Shdr *symtab; + Elf_Sym *symbols; + Elf_Rela *rela; + char *strings; + int nrela, i, j; + + /* Find symbol table and string table. */ + symtab = NULL; + for (i = 0; i < hdr->e_shnum; i++) + switch (sechdrs[i].sh_type) { + case SHT_SYMTAB: + symtab = sechdrs + i; + break; + } + if (!symtab) { + printk(KERN_ERR "module %s: no symbol table\n", me->name); + return -ENOEXEC; + } + + /* Allocate one syminfo structure per symbol. */ + me->arch.nsyms = symtab->sh_size / sizeof(Elf_Sym); + me->arch.syminfo = vmalloc(me->arch.nsyms * + sizeof(struct mod_arch_syminfo)); + if (!me->arch.syminfo) + return -ENOMEM; + symbols = (void *) hdr + symtab->sh_offset; + strings = (void *) hdr + sechdrs[symtab->sh_link].sh_offset; + for (i = 0; i < me->arch.nsyms; i++) { + if (symbols[i].st_shndx == SHN_UNDEF && + strcmp(strings + symbols[i].st_name, + "_GLOBAL_OFFSET_TABLE_") == 0) + /* "Define" it as absolute. */ + symbols[i].st_shndx = SHN_ABS; + me->arch.syminfo[i].got_offset = -1UL; + me->arch.syminfo[i].plt_offset = -1UL; + me->arch.syminfo[i].got_initialized = 0; + me->arch.syminfo[i].plt_initialized = 0; + } + + /* Search for got/plt relocations. */ + me->arch.got_size = me->arch.plt_size = 0; + for (i = 0; i < hdr->e_shnum; i++) { + if (sechdrs[i].sh_type != SHT_RELA) + continue; + nrela = sechdrs[i].sh_size / sizeof(Elf_Rela); + rela = (void *) hdr + sechdrs[i].sh_offset; + for (j = 0; j < nrela; j++) + check_rela(rela + j, me); + } + + /* Increase core size by size of got & plt and set start + offsets for got and plt. */ + me->core_size = ALIGN(me->core_size, 4); + me->arch.got_offset = me->core_size; + me->core_size += me->arch.got_size; + me->arch.plt_offset = me->core_size; + me->core_size += me->arch.plt_size; + return 0; +} + +static int +apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, + struct module *me) +{ + struct mod_arch_syminfo *info; + Elf_Addr loc, val; + int r_type, r_sym; + + /* This is where to make the change */ + loc = base + rela->r_offset; + /* This is the symbol it is referring to. Note that all + undefined symbols have been resolved. */ + r_sym = ELF_R_SYM(rela->r_info); + r_type = ELF_R_TYPE(rela->r_info); + info = me->arch.syminfo + r_sym; + val = symtab[r_sym].st_value; + + switch (r_type) { + case R_390_8: /* Direct 8 bit. */ + case R_390_12: /* Direct 12 bit. */ + case R_390_16: /* Direct 16 bit. */ + case R_390_20: /* Direct 20 bit. */ + case R_390_32: /* Direct 32 bit. */ + case R_390_64: /* Direct 64 bit. */ + val += rela->r_addend; + if (r_type == R_390_8) + *(unsigned char *) loc = val; + else if (r_type == R_390_12) + *(unsigned short *) loc = (val & 0xfff) | + (*(unsigned short *) loc & 0xf000); + else if (r_type == R_390_16) + *(unsigned short *) loc = val; + else if (r_type == R_390_20) + *(unsigned int *) loc = + (*(unsigned int *) loc & 0xf00000ff) | + (val & 0xfff) << 16 | (val & 0xff000) >> 4; + else if (r_type == R_390_32) + *(unsigned int *) loc = val; + else if (r_type == R_390_64) + *(unsigned long *) loc = val; + break; + case R_390_PC16: /* PC relative 16 bit. */ + case R_390_PC16DBL: /* PC relative 16 bit shifted by 1. */ + case R_390_PC32DBL: /* PC relative 32 bit shifted by 1. */ + case R_390_PC32: /* PC relative 32 bit. */ + case R_390_PC64: /* PC relative 64 bit. */ + val += rela->r_addend - loc; + if (r_type == R_390_PC16) + *(unsigned short *) loc = val; + else if (r_type == R_390_PC16DBL) + *(unsigned short *) loc = val >> 1; + else if (r_type == R_390_PC32DBL) + *(unsigned int *) loc = val >> 1; + else if (r_type == R_390_PC32) + *(unsigned int *) loc = val; + else if (r_type == R_390_PC64) + *(unsigned long *) loc = val; + break; + case R_390_GOT12: /* 12 bit GOT offset. */ + case R_390_GOT16: /* 16 bit GOT offset. */ + case R_390_GOT20: /* 20 bit GOT offset. */ + case R_390_GOT32: /* 32 bit GOT offset. */ + case R_390_GOT64: /* 64 bit GOT offset. */ + case R_390_GOTENT: /* 32 bit PC rel. to GOT entry shifted by 1. */ + case R_390_GOTPLT12: /* 12 bit offset to jump slot. */ + case R_390_GOTPLT20: /* 20 bit offset to jump slot. */ + case R_390_GOTPLT16: /* 16 bit offset to jump slot. */ + case R_390_GOTPLT32: /* 32 bit offset to jump slot. */ + case R_390_GOTPLT64: /* 64 bit offset to jump slot. */ + case R_390_GOTPLTENT: /* 32 bit rel. offset to jump slot >> 1. */ + if (info->got_initialized == 0) { + Elf_Addr *gotent; + + gotent = me->module_core + me->arch.got_offset + + info->got_offset; + *gotent = val; + info->got_initialized = 1; + } + val = info->got_offset + rela->r_addend; + if (r_type == R_390_GOT12 || + r_type == R_390_GOTPLT12) + *(unsigned short *) loc = (val & 0xfff) | + (*(unsigned short *) loc & 0xf000); + else if (r_type == R_390_GOT16 || + r_type == R_390_GOTPLT16) + *(unsigned short *) loc = val; + else if (r_type == R_390_GOT20 || + r_type == R_390_GOTPLT20) + *(unsigned int *) loc = + (*(unsigned int *) loc & 0xf00000ff) | + (val & 0xfff) << 16 | (val & 0xff000) >> 4; + else if (r_type == R_390_GOT32 || + r_type == R_390_GOTPLT32) + *(unsigned int *) loc = val; + else if (r_type == R_390_GOTENT || + r_type == R_390_GOTPLTENT) + *(unsigned int *) loc = + (val + (Elf_Addr) me->module_core - loc) >> 1; + else if (r_type == R_390_GOT64 || + r_type == R_390_GOTPLT64) + *(unsigned long *) loc = val; + break; + case R_390_PLT16DBL: /* 16 bit PC rel. PLT shifted by 1. */ + case R_390_PLT32DBL: /* 32 bit PC rel. PLT shifted by 1. */ + case R_390_PLT32: /* 32 bit PC relative PLT address. */ + case R_390_PLT64: /* 64 bit PC relative PLT address. */ + case R_390_PLTOFF16: /* 16 bit offset from GOT to PLT. */ + case R_390_PLTOFF32: /* 32 bit offset from GOT to PLT. */ + case R_390_PLTOFF64: /* 16 bit offset from GOT to PLT. */ + if (info->plt_initialized == 0) { + unsigned int *ip; + ip = me->module_core + me->arch.plt_offset + + info->plt_offset; +#ifndef CONFIG_64BIT + ip[0] = 0x0d105810; /* basr 1,0; l 1,6(1); br 1 */ + ip[1] = 0x100607f1; + ip[2] = val; +#else /* CONFIG_64BIT */ + ip[0] = 0x0d10e310; /* basr 1,0; lg 1,10(1); br 1 */ + ip[1] = 0x100a0004; + ip[2] = 0x07f10000; + ip[3] = (unsigned int) (val >> 32); + ip[4] = (unsigned int) val; +#endif /* CONFIG_64BIT */ + info->plt_initialized = 1; + } + if (r_type == R_390_PLTOFF16 || + r_type == R_390_PLTOFF32 || + r_type == R_390_PLTOFF64) + val = me->arch.plt_offset - me->arch.got_offset + + info->plt_offset + rela->r_addend; + else { + if (!((r_type == R_390_PLT16DBL && + val - loc + 0xffffUL < 0x1ffffeUL) || + (r_type == R_390_PLT32DBL && + val - loc + 0xffffffffULL < 0x1fffffffeULL))) + val = (Elf_Addr) me->module_core + + me->arch.plt_offset + + info->plt_offset; + val += rela->r_addend - loc; + } + if (r_type == R_390_PLT16DBL) + *(unsigned short *) loc = val >> 1; + else if (r_type == R_390_PLTOFF16) + *(unsigned short *) loc = val; + else if (r_type == R_390_PLT32DBL) + *(unsigned int *) loc = val >> 1; + else if (r_type == R_390_PLT32 || + r_type == R_390_PLTOFF32) + *(unsigned int *) loc = val; + else if (r_type == R_390_PLT64 || + r_type == R_390_PLTOFF64) + *(unsigned long *) loc = val; + break; + case R_390_GOTOFF16: /* 16 bit offset to GOT. */ + case R_390_GOTOFF32: /* 32 bit offset to GOT. */ + case R_390_GOTOFF64: /* 64 bit offset to GOT. */ + val = val + rela->r_addend - + ((Elf_Addr) me->module_core + me->arch.got_offset); + if (r_type == R_390_GOTOFF16) + *(unsigned short *) loc = val; + else if (r_type == R_390_GOTOFF32) + *(unsigned int *) loc = val; + else if (r_type == R_390_GOTOFF64) + *(unsigned long *) loc = val; + break; + case R_390_GOTPC: /* 32 bit PC relative offset to GOT. */ + case R_390_GOTPCDBL: /* 32 bit PC rel. off. to GOT shifted by 1. */ + val = (Elf_Addr) me->module_core + me->arch.got_offset + + rela->r_addend - loc; + if (r_type == R_390_GOTPC) + *(unsigned int *) loc = val; + else if (r_type == R_390_GOTPCDBL) + *(unsigned int *) loc = val >> 1; + break; + case R_390_COPY: + case R_390_GLOB_DAT: /* Create GOT entry. */ + case R_390_JMP_SLOT: /* Create PLT entry. */ + case R_390_RELATIVE: /* Adjust by program base. */ + /* Only needed if we want to support loading of + modules linked with -shared. */ + break; + default: + printk(KERN_ERR "module %s: Unknown relocation: %u\n", + me->name, r_type); + return -ENOEXEC; + } + return 0; +} + +int +apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, + unsigned int symindex, unsigned int relsec, + struct module *me) +{ + Elf_Addr base; + Elf_Sym *symtab; + Elf_Rela *rela; + unsigned long i, n; + int rc; + + DEBUGP("Applying relocate section %u to %u\n", + relsec, sechdrs[relsec].sh_info); + base = sechdrs[sechdrs[relsec].sh_info].sh_addr; + symtab = (Elf_Sym *) sechdrs[symindex].sh_addr; + rela = (Elf_Rela *) sechdrs[relsec].sh_addr; + n = sechdrs[relsec].sh_size / sizeof(Elf_Rela); + + for (i = 0; i < n; i++, rela++) { + rc = apply_rela(rela, base, symtab, me); + if (rc) + return rc; + } + return 0; +} + +int module_finalize(const Elf_Ehdr *hdr, + const Elf_Shdr *sechdrs, + struct module *me) +{ + vfree(me->arch.syminfo); + me->arch.syminfo = NULL; + return 0; +} diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c new file mode 100644 index 00000000..8c372ca6 --- /dev/null +++ b/arch/s390/kernel/nmi.c @@ -0,0 +1,375 @@ +/* + * Machine check handler + * + * Copyright IBM Corp. 2000,2009 + * Author(s): Ingo Adlung <adlung@de.ibm.com>, + * Martin Schwidefsky <schwidefsky@de.ibm.com>, + * Cornelia Huck <cornelia.huck@de.ibm.com>, + * Heiko Carstens <heiko.carstens@de.ibm.com>, + */ + +#include <linux/kernel_stat.h> +#include <linux/init.h> +#include <linux/errno.h> +#include <linux/hardirq.h> +#include <linux/time.h> +#include <linux/module.h> +#include <asm/lowcore.h> +#include <asm/smp.h> +#include <asm/etr.h> +#include <asm/cputime.h> +#include <asm/nmi.h> +#include <asm/crw.h> + +struct mcck_struct { + int kill_task; + int channel_report; + int warning; + unsigned long long mcck_code; +}; + +static DEFINE_PER_CPU(struct mcck_struct, cpu_mcck); + +static void s390_handle_damage(char *msg) +{ + smp_send_stop(); + disabled_wait((unsigned long) __builtin_return_address(0)); + while (1); +} + +/* + * Main machine check handler function. Will be called with interrupts enabled + * or disabled and machine checks enabled or disabled. + */ +void s390_handle_mcck(void) +{ + unsigned long flags; + struct mcck_struct mcck; + + /* + * Disable machine checks and get the current state of accumulated + * machine checks. Afterwards delete the old state and enable machine + * checks again. + */ + local_irq_save(flags); + local_mcck_disable(); + mcck = __get_cpu_var(cpu_mcck); + memset(&__get_cpu_var(cpu_mcck), 0, sizeof(struct mcck_struct)); + clear_thread_flag(TIF_MCCK_PENDING); + local_mcck_enable(); + local_irq_restore(flags); + + if (mcck.channel_report) + crw_handle_channel_report(); + /* + * A warning may remain for a prolonged period on the bare iron. + * (actually until the machine is powered off, or the problem is gone) + * So we just stop listening for the WARNING MCH and avoid continuously + * being interrupted. One caveat is however, that we must do this per + * processor and cannot use the smp version of ctl_clear_bit(). + * On VM we only get one interrupt per virtally presented machinecheck. + * Though one suffices, we may get one interrupt per (virtual) cpu. + */ + if (mcck.warning) { /* WARNING pending ? */ + static int mchchk_wng_posted = 0; + + /* Use single cpu clear, as we cannot handle smp here. */ + __ctl_clear_bit(14, 24); /* Disable WARNING MCH */ + if (xchg(&mchchk_wng_posted, 1) == 0) + kill_cad_pid(SIGPWR, 1); + } + if (mcck.kill_task) { + local_irq_enable(); + printk(KERN_EMERG "mcck: Terminating task because of machine " + "malfunction (code 0x%016llx).\n", mcck.mcck_code); + printk(KERN_EMERG "mcck: task: %s, pid: %d.\n", + current->comm, current->pid); + do_exit(SIGSEGV); + } +} +EXPORT_SYMBOL_GPL(s390_handle_mcck); + +/* + * returns 0 if all registers could be validated + * returns 1 otherwise + */ +static int notrace s390_revalidate_registers(struct mci *mci) +{ + int kill_task; + u64 zero; + void *fpt_save_area, *fpt_creg_save_area; + + kill_task = 0; + zero = 0; + + if (!mci->gr) { + /* + * General purpose registers couldn't be restored and have + * unknown contents. Process needs to be terminated. + */ + kill_task = 1; + } + if (!mci->fp) { + /* + * Floating point registers can't be restored and + * therefore the process needs to be terminated. + */ + kill_task = 1; + } +#ifndef CONFIG_64BIT + asm volatile( + " ld 0,0(%0)\n" + " ld 2,8(%0)\n" + " ld 4,16(%0)\n" + " ld 6,24(%0)" + : : "a" (&S390_lowcore.floating_pt_save_area)); +#endif + + if (MACHINE_HAS_IEEE) { +#ifdef CONFIG_64BIT + fpt_save_area = &S390_lowcore.floating_pt_save_area; + fpt_creg_save_area = &S390_lowcore.fpt_creg_save_area; +#else + fpt_save_area = (void *) S390_lowcore.extended_save_area_addr; + fpt_creg_save_area = fpt_save_area + 128; +#endif + if (!mci->fc) { + /* + * Floating point control register can't be restored. + * Task will be terminated. + */ + asm volatile("lfpc 0(%0)" : : "a" (&zero), "m" (zero)); + kill_task = 1; + + } else + asm volatile("lfpc 0(%0)" : : "a" (fpt_creg_save_area)); + + asm volatile( + " ld 0,0(%0)\n" + " ld 1,8(%0)\n" + " ld 2,16(%0)\n" + " ld 3,24(%0)\n" + " ld 4,32(%0)\n" + " ld 5,40(%0)\n" + " ld 6,48(%0)\n" + " ld 7,56(%0)\n" + " ld 8,64(%0)\n" + " ld 9,72(%0)\n" + " ld 10,80(%0)\n" + " ld 11,88(%0)\n" + " ld 12,96(%0)\n" + " ld 13,104(%0)\n" + " ld 14,112(%0)\n" + " ld 15,120(%0)\n" + : : "a" (fpt_save_area)); + } + /* Revalidate access registers */ + asm volatile( + " lam 0,15,0(%0)" + : : "a" (&S390_lowcore.access_regs_save_area)); + if (!mci->ar) { + /* + * Access registers have unknown contents. + * Terminating task. + */ + kill_task = 1; + } + /* Revalidate control registers */ + if (!mci->cr) { + /* + * Control registers have unknown contents. + * Can't recover and therefore stopping machine. + */ + s390_handle_damage("invalid control registers."); + } else { +#ifdef CONFIG_64BIT + asm volatile( + " lctlg 0,15,0(%0)" + : : "a" (&S390_lowcore.cregs_save_area)); +#else + asm volatile( + " lctl 0,15,0(%0)" + : : "a" (&S390_lowcore.cregs_save_area)); +#endif + } + /* + * We don't even try to revalidate the TOD register, since we simply + * can't write something sensible into that register. + */ +#ifdef CONFIG_64BIT + /* + * See if we can revalidate the TOD programmable register with its + * old contents (should be zero) otherwise set it to zero. + */ + if (!mci->pr) + asm volatile( + " sr 0,0\n" + " sckpf" + : : : "0", "cc"); + else + asm volatile( + " l 0,0(%0)\n" + " sckpf" + : : "a" (&S390_lowcore.tod_progreg_save_area) + : "0", "cc"); +#endif + /* Revalidate clock comparator register */ + if (S390_lowcore.clock_comparator == -1) + set_clock_comparator(S390_lowcore.mcck_clock); + else + set_clock_comparator(S390_lowcore.clock_comparator); + /* Check if old PSW is valid */ + if (!mci->wp) + /* + * Can't tell if we come from user or kernel mode + * -> stopping machine. + */ + s390_handle_damage("old psw invalid."); + + if (!mci->ms || !mci->pm || !mci->ia) + kill_task = 1; + + return kill_task; +} + +#define MAX_IPD_COUNT 29 +#define MAX_IPD_TIME (5 * 60 * USEC_PER_SEC) /* 5 minutes */ + +#define ED_STP_ISLAND 6 /* External damage STP island check */ +#define ED_STP_SYNC 7 /* External damage STP sync check */ +#define ED_ETR_SYNC 12 /* External damage ETR sync check */ +#define ED_ETR_SWITCH 13 /* External damage ETR switch to local */ + +/* + * machine check handler. + */ +void notrace s390_do_machine_check(struct pt_regs *regs) +{ + static int ipd_count; + static DEFINE_SPINLOCK(ipd_lock); + static unsigned long long last_ipd; + struct mcck_struct *mcck; + unsigned long long tmp; + struct mci *mci; + int umode; + + nmi_enter(); + kstat_cpu(smp_processor_id()).irqs[NMI_NMI]++; + mci = (struct mci *) &S390_lowcore.mcck_interruption_code; + mcck = &__get_cpu_var(cpu_mcck); + umode = user_mode(regs); + + if (mci->sd) { + /* System damage -> stopping machine */ + s390_handle_damage("received system damage machine check."); + } + if (mci->pd) { + if (mci->b) { + /* Processing backup -> verify if we can survive this */ + u64 z_mcic, o_mcic, t_mcic; +#ifdef CONFIG_64BIT + z_mcic = (1ULL<<63 | 1ULL<<59 | 1ULL<<29); + o_mcic = (1ULL<<43 | 1ULL<<42 | 1ULL<<41 | 1ULL<<40 | + 1ULL<<36 | 1ULL<<35 | 1ULL<<34 | 1ULL<<32 | + 1ULL<<30 | 1ULL<<21 | 1ULL<<20 | 1ULL<<17 | + 1ULL<<16); +#else + z_mcic = (1ULL<<63 | 1ULL<<59 | 1ULL<<57 | 1ULL<<50 | + 1ULL<<29); + o_mcic = (1ULL<<43 | 1ULL<<42 | 1ULL<<41 | 1ULL<<40 | + 1ULL<<36 | 1ULL<<35 | 1ULL<<34 | 1ULL<<32 | + 1ULL<<30 | 1ULL<<20 | 1ULL<<17 | 1ULL<<16); +#endif + t_mcic = *(u64 *)mci; + + if (((t_mcic & z_mcic) != 0) || + ((t_mcic & o_mcic) != o_mcic)) { + s390_handle_damage("processing backup machine " + "check with damage."); + } + + /* + * Nullifying exigent condition, therefore we might + * retry this instruction. + */ + spin_lock(&ipd_lock); + tmp = get_clock(); + if (((tmp - last_ipd) >> 12) < MAX_IPD_TIME) + ipd_count++; + else + ipd_count = 1; + last_ipd = tmp; + if (ipd_count == MAX_IPD_COUNT) + s390_handle_damage("too many ipd retries."); + spin_unlock(&ipd_lock); + } else { + /* Processing damage -> stopping machine */ + s390_handle_damage("received instruction processing " + "damage machine check."); + } + } + if (s390_revalidate_registers(mci)) { + if (umode) { + /* + * Couldn't restore all register contents while in + * user mode -> mark task for termination. + */ + mcck->kill_task = 1; + mcck->mcck_code = *(unsigned long long *) mci; + set_thread_flag(TIF_MCCK_PENDING); + } else { + /* + * Couldn't restore all register contents while in + * kernel mode -> stopping machine. + */ + s390_handle_damage("unable to revalidate registers."); + } + } + if (mci->cd) { + /* Timing facility damage */ + s390_handle_damage("TOD clock damaged"); + } + if (mci->ed && mci->ec) { + /* External damage */ + if (S390_lowcore.external_damage_code & (1U << ED_ETR_SYNC)) + etr_sync_check(); + if (S390_lowcore.external_damage_code & (1U << ED_ETR_SWITCH)) + etr_switch_to_local(); + if (S390_lowcore.external_damage_code & (1U << ED_STP_SYNC)) + stp_sync_check(); + if (S390_lowcore.external_damage_code & (1U << ED_STP_ISLAND)) + stp_island_check(); + } + if (mci->se) + /* Storage error uncorrected */ + s390_handle_damage("received storage error uncorrected " + "machine check."); + if (mci->ke) + /* Storage key-error uncorrected */ + s390_handle_damage("received storage key-error uncorrected " + "machine check."); + if (mci->ds && mci->fa) + /* Storage degradation */ + s390_handle_damage("received storage degradation machine " + "check."); + if (mci->cp) { + /* Channel report word pending */ + mcck->channel_report = 1; + set_thread_flag(TIF_MCCK_PENDING); + } + if (mci->w) { + /* Warning pending */ + mcck->warning = 1; + set_thread_flag(TIF_MCCK_PENDING); + } + nmi_exit(); +} + +static int __init machine_check_init(void) +{ + ctl_set_bit(14, 25); /* enable external damage MCH */ + ctl_set_bit(14, 27); /* enable system recovery MCH */ + ctl_set_bit(14, 24); /* enable warning MCH */ + return 0; +} +arch_initcall(machine_check_init); diff --git a/arch/s390/kernel/os_info.c b/arch/s390/kernel/os_info.c new file mode 100644 index 00000000..e8d6c214 --- /dev/null +++ b/arch/s390/kernel/os_info.c @@ -0,0 +1,168 @@ +/* + * OS info memory interface + * + * Copyright IBM Corp. 2012 + * Author(s): Michael Holzheu <holzheu@linux.vnet.ibm.com> + */ + +#define KMSG_COMPONENT "os_info" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include <linux/crash_dump.h> +#include <linux/kernel.h> +#include <asm/checksum.h> +#include <asm/lowcore.h> +#include <asm/os_info.h> + +/* + * OS info structure has to be page aligned + */ +static struct os_info os_info __page_aligned_data; + +/* + * Compute checksum over OS info structure + */ +u32 os_info_csum(struct os_info *os_info) +{ + int size = sizeof(*os_info) - offsetof(struct os_info, version_major); + return csum_partial(&os_info->version_major, size, 0); +} + +/* + * Add crashkernel info to OS info and update checksum + */ +void os_info_crashkernel_add(unsigned long base, unsigned long size) +{ + os_info.crashkernel_addr = (u64)(unsigned long)base; + os_info.crashkernel_size = (u64)(unsigned long)size; + os_info.csum = os_info_csum(&os_info); +} + +/* + * Add OS info entry and update checksum + */ +void os_info_entry_add(int nr, void *ptr, u64 size) +{ + os_info.entry[nr].addr = (u64)(unsigned long)ptr; + os_info.entry[nr].size = size; + os_info.entry[nr].csum = csum_partial(ptr, size, 0); + os_info.csum = os_info_csum(&os_info); +} + +/* + * Initialize OS info struture and set lowcore pointer + */ +void __init os_info_init(void) +{ + void *ptr = &os_info; + + os_info.version_major = OS_INFO_VERSION_MAJOR; + os_info.version_minor = OS_INFO_VERSION_MINOR; + os_info.magic = OS_INFO_MAGIC; + os_info.csum = os_info_csum(&os_info); + copy_to_absolute_zero(&S390_lowcore.os_info, &ptr, sizeof(ptr)); +} + +#ifdef CONFIG_CRASH_DUMP + +static struct os_info *os_info_old; + +/* + * Allocate and copy OS info entry from oldmem + */ +static void os_info_old_alloc(int nr, int align) +{ + unsigned long addr, size = 0; + char *buf, *buf_align, *msg; + u32 csum; + + addr = os_info_old->entry[nr].addr; + if (!addr) { + msg = "not available"; + goto fail; + } + size = os_info_old->entry[nr].size; + buf = kmalloc(size + align - 1, GFP_KERNEL); + if (!buf) { + msg = "alloc failed"; + goto fail; + } + buf_align = PTR_ALIGN(buf, align); + if (copy_from_oldmem(buf_align, (void *) addr, size)) { + msg = "copy failed"; + goto fail_free; + } + csum = csum_partial(buf_align, size, 0); + if (csum != os_info_old->entry[nr].csum) { + msg = "checksum failed"; + goto fail_free; + } + os_info_old->entry[nr].addr = (u64)(unsigned long)buf_align; + msg = "copied"; + goto out; +fail_free: + kfree(buf); +fail: + os_info_old->entry[nr].addr = 0; +out: + pr_info("entry %i: %s (addr=0x%lx size=%lu)\n", + nr, msg, addr, size); +} + +/* + * Initialize os info and os info entries from oldmem + */ +static void os_info_old_init(void) +{ + static int os_info_init; + unsigned long addr; + + if (os_info_init) + return; + if (!OLDMEM_BASE) + goto fail; + if (copy_from_oldmem(&addr, &S390_lowcore.os_info, sizeof(addr))) + goto fail; + if (addr == 0 || addr % PAGE_SIZE) + goto fail; + os_info_old = kzalloc(sizeof(*os_info_old), GFP_KERNEL); + if (!os_info_old) + goto fail; + if (copy_from_oldmem(os_info_old, (void *) addr, sizeof(*os_info_old))) + goto fail_free; + if (os_info_old->magic != OS_INFO_MAGIC) + goto fail_free; + if (os_info_old->csum != os_info_csum(os_info_old)) + goto fail_free; + if (os_info_old->version_major > OS_INFO_VERSION_MAJOR) + goto fail_free; + os_info_old_alloc(OS_INFO_VMCOREINFO, 1); + os_info_old_alloc(OS_INFO_REIPL_BLOCK, 1); + os_info_old_alloc(OS_INFO_INIT_FN, PAGE_SIZE); + pr_info("crashkernel: addr=0x%lx size=%lu\n", + (unsigned long) os_info_old->crashkernel_addr, + (unsigned long) os_info_old->crashkernel_size); + os_info_init = 1; + return; +fail_free: + kfree(os_info_old); +fail: + os_info_init = 1; + os_info_old = NULL; +} + +/* + * Return pointer to os infor entry and its size + */ +void *os_info_old_entry(int nr, unsigned long *size) +{ + os_info_old_init(); + + if (!os_info_old) + return NULL; + if (!os_info_old->entry[nr].addr) + return NULL; + *size = (unsigned long) os_info_old->entry[nr].size; + return (void *)(unsigned long)os_info_old->entry[nr].addr; +} +#endif diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c new file mode 100644 index 00000000..cb019f42 --- /dev/null +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -0,0 +1,690 @@ +/* + * Performance event support for s390x - CPU-measurement Counter Facility + * + * Copyright IBM Corp. 2012 + * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + */ +#define KMSG_COMPONENT "cpum_cf" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include <linux/kernel.h> +#include <linux/kernel_stat.h> +#include <linux/perf_event.h> +#include <linux/percpu.h> +#include <linux/notifier.h> +#include <linux/init.h> +#include <linux/export.h> +#include <asm/ctl_reg.h> +#include <asm/irq.h> +#include <asm/cpu_mf.h> + +/* CPU-measurement counter facility supports these CPU counter sets: + * For CPU counter sets: + * Basic counter set: 0-31 + * Problem-state counter set: 32-63 + * Crypto-activity counter set: 64-127 + * Extented counter set: 128-159 + */ +enum cpumf_ctr_set { + /* CPU counter sets */ + CPUMF_CTR_SET_BASIC = 0, + CPUMF_CTR_SET_USER = 1, + CPUMF_CTR_SET_CRYPTO = 2, + CPUMF_CTR_SET_EXT = 3, + + /* Maximum number of counter sets */ + CPUMF_CTR_SET_MAX, +}; + +#define CPUMF_LCCTL_ENABLE_SHIFT 16 +#define CPUMF_LCCTL_ACTCTL_SHIFT 0 +static const u64 cpumf_state_ctl[CPUMF_CTR_SET_MAX] = { + [CPUMF_CTR_SET_BASIC] = 0x02, + [CPUMF_CTR_SET_USER] = 0x04, + [CPUMF_CTR_SET_CRYPTO] = 0x08, + [CPUMF_CTR_SET_EXT] = 0x01, +}; + +static void ctr_set_enable(u64 *state, int ctr_set) +{ + *state |= cpumf_state_ctl[ctr_set] << CPUMF_LCCTL_ENABLE_SHIFT; +} +static void ctr_set_disable(u64 *state, int ctr_set) +{ + *state &= ~(cpumf_state_ctl[ctr_set] << CPUMF_LCCTL_ENABLE_SHIFT); +} +static void ctr_set_start(u64 *state, int ctr_set) +{ + *state |= cpumf_state_ctl[ctr_set] << CPUMF_LCCTL_ACTCTL_SHIFT; +} +static void ctr_set_stop(u64 *state, int ctr_set) +{ + *state &= ~(cpumf_state_ctl[ctr_set] << CPUMF_LCCTL_ACTCTL_SHIFT); +} + +/* Local CPUMF event structure */ +struct cpu_hw_events { + struct cpumf_ctr_info info; + atomic_t ctr_set[CPUMF_CTR_SET_MAX]; + u64 state, tx_state; + unsigned int flags; +}; +static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { + .ctr_set = { + [CPUMF_CTR_SET_BASIC] = ATOMIC_INIT(0), + [CPUMF_CTR_SET_USER] = ATOMIC_INIT(0), + [CPUMF_CTR_SET_CRYPTO] = ATOMIC_INIT(0), + [CPUMF_CTR_SET_EXT] = ATOMIC_INIT(0), + }, + .state = 0, + .flags = 0, +}; + +static int get_counter_set(u64 event) +{ + int set = -1; + + if (event < 32) + set = CPUMF_CTR_SET_BASIC; + else if (event < 64) + set = CPUMF_CTR_SET_USER; + else if (event < 128) + set = CPUMF_CTR_SET_CRYPTO; + else if (event < 160) + set = CPUMF_CTR_SET_EXT; + + return set; +} + +static int validate_event(const struct hw_perf_event *hwc) +{ + switch (hwc->config_base) { + case CPUMF_CTR_SET_BASIC: + case CPUMF_CTR_SET_USER: + case CPUMF_CTR_SET_CRYPTO: + case CPUMF_CTR_SET_EXT: + /* check for reserved counters */ + if ((hwc->config >= 6 && hwc->config <= 31) || + (hwc->config >= 38 && hwc->config <= 63) || + (hwc->config >= 80 && hwc->config <= 127)) + return -EOPNOTSUPP; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int validate_ctr_version(const struct hw_perf_event *hwc) +{ + struct cpu_hw_events *cpuhw; + int err = 0; + + cpuhw = &get_cpu_var(cpu_hw_events); + + /* check required version for counter sets */ + switch (hwc->config_base) { + case CPUMF_CTR_SET_BASIC: + case CPUMF_CTR_SET_USER: + if (cpuhw->info.cfvn < 1) + err = -EOPNOTSUPP; + break; + case CPUMF_CTR_SET_CRYPTO: + case CPUMF_CTR_SET_EXT: + if (cpuhw->info.csvn < 1) + err = -EOPNOTSUPP; + break; + } + + put_cpu_var(cpu_hw_events); + return err; +} + +static int validate_ctr_auth(const struct hw_perf_event *hwc) +{ + struct cpu_hw_events *cpuhw; + u64 ctrs_state; + int err = 0; + + cpuhw = &get_cpu_var(cpu_hw_events); + + /* check authorization for cpu counter sets */ + ctrs_state = cpumf_state_ctl[hwc->config_base]; + if (!(ctrs_state & cpuhw->info.auth_ctl)) + err = -EPERM; + + put_cpu_var(cpu_hw_events); + return err; +} + +/* + * Change the CPUMF state to active. + * Enable and activate the CPU-counter sets according + * to the per-cpu control state. + */ +static void cpumf_pmu_enable(struct pmu *pmu) +{ + struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + int err; + + if (cpuhw->flags & PMU_F_ENABLED) + return; + + err = lcctl(cpuhw->state); + if (err) { + pr_err("Enabling the performance measuring unit " + "failed with rc=%x\n", err); + return; + } + + cpuhw->flags |= PMU_F_ENABLED; +} + +/* + * Change the CPUMF state to inactive. + * Disable and enable (inactive) the CPU-counter sets according + * to the per-cpu control state. + */ +static void cpumf_pmu_disable(struct pmu *pmu) +{ + struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + int err; + u64 inactive; + + if (!(cpuhw->flags & PMU_F_ENABLED)) + return; + + inactive = cpuhw->state & ~((1 << CPUMF_LCCTL_ENABLE_SHIFT) - 1); + err = lcctl(inactive); + if (err) { + pr_err("Disabling the performance measuring unit " + "failed with rc=%x\n", err); + return; + } + + cpuhw->flags &= ~PMU_F_ENABLED; +} + + +/* Number of perf events counting hardware events */ +static atomic_t num_events = ATOMIC_INIT(0); +/* Used to avoid races in calling reserve/release_cpumf_hardware */ +static DEFINE_MUTEX(pmc_reserve_mutex); + +/* CPU-measurement alerts for the counter facility */ +static void cpumf_measurement_alert(struct ext_code ext_code, + unsigned int alert, unsigned long unused) +{ + struct cpu_hw_events *cpuhw; + + if (!(alert & CPU_MF_INT_CF_MASK)) + return; + + kstat_cpu(smp_processor_id()).irqs[EXTINT_CPM]++; + cpuhw = &__get_cpu_var(cpu_hw_events); + + /* Measurement alerts are shared and might happen when the PMU + * is not reserved. Ignore these alerts in this case. */ + if (!(cpuhw->flags & PMU_F_RESERVED)) + return; + + /* counter authorization change alert */ + if (alert & CPU_MF_INT_CF_CACA) + qctri(&cpuhw->info); + + /* loss of counter data alert */ + if (alert & CPU_MF_INT_CF_LCDA) + pr_err("CPU[%i] Counter data was lost\n", smp_processor_id()); +} + +#define PMC_INIT 0 +#define PMC_RELEASE 1 +static void setup_pmc_cpu(void *flags) +{ + struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + + switch (*((int *) flags)) { + case PMC_INIT: + memset(&cpuhw->info, 0, sizeof(cpuhw->info)); + qctri(&cpuhw->info); + cpuhw->flags |= PMU_F_RESERVED; + break; + + case PMC_RELEASE: + cpuhw->flags &= ~PMU_F_RESERVED; + break; + } + + /* Disable CPU counter sets */ + lcctl(0); +} + +/* Initialize the CPU-measurement facility */ +static int reserve_pmc_hardware(void) +{ + int flags = PMC_INIT; + + on_each_cpu(setup_pmc_cpu, &flags, 1); + measurement_alert_subclass_register(); + + return 0; +} + +/* Release the CPU-measurement facility */ +static void release_pmc_hardware(void) +{ + int flags = PMC_RELEASE; + + on_each_cpu(setup_pmc_cpu, &flags, 1); + measurement_alert_subclass_unregister(); +} + +/* Release the PMU if event is the last perf event */ +static void hw_perf_event_destroy(struct perf_event *event) +{ + if (!atomic_add_unless(&num_events, -1, 1)) { + mutex_lock(&pmc_reserve_mutex); + if (atomic_dec_return(&num_events) == 0) + release_pmc_hardware(); + mutex_unlock(&pmc_reserve_mutex); + } +} + +/* CPUMF <-> perf event mappings for kernel+userspace (basic set) */ +static const int cpumf_generic_events_basic[] = { + [PERF_COUNT_HW_CPU_CYCLES] = 0, + [PERF_COUNT_HW_INSTRUCTIONS] = 1, + [PERF_COUNT_HW_CACHE_REFERENCES] = -1, + [PERF_COUNT_HW_CACHE_MISSES] = -1, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = -1, + [PERF_COUNT_HW_BRANCH_MISSES] = -1, + [PERF_COUNT_HW_BUS_CYCLES] = -1, +}; +/* CPUMF <-> perf event mappings for userspace (problem-state set) */ +static const int cpumf_generic_events_user[] = { + [PERF_COUNT_HW_CPU_CYCLES] = 32, + [PERF_COUNT_HW_INSTRUCTIONS] = 33, + [PERF_COUNT_HW_CACHE_REFERENCES] = -1, + [PERF_COUNT_HW_CACHE_MISSES] = -1, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = -1, + [PERF_COUNT_HW_BRANCH_MISSES] = -1, + [PERF_COUNT_HW_BUS_CYCLES] = -1, +}; + +static int __hw_perf_event_init(struct perf_event *event) +{ + struct perf_event_attr *attr = &event->attr; + struct hw_perf_event *hwc = &event->hw; + int err; + u64 ev; + + switch (attr->type) { + case PERF_TYPE_RAW: + /* Raw events are used to access counters directly, + * hence do not permit excludes */ + if (attr->exclude_kernel || attr->exclude_user || + attr->exclude_hv) + return -EOPNOTSUPP; + ev = attr->config; + break; + + case PERF_TYPE_HARDWARE: + ev = attr->config; + /* Count user space (problem-state) only */ + if (!attr->exclude_user && attr->exclude_kernel) { + if (ev >= ARRAY_SIZE(cpumf_generic_events_user)) + return -EOPNOTSUPP; + ev = cpumf_generic_events_user[ev]; + + /* No support for kernel space counters only */ + } else if (!attr->exclude_kernel && attr->exclude_user) { + return -EOPNOTSUPP; + + /* Count user and kernel space */ + } else { + if (ev >= ARRAY_SIZE(cpumf_generic_events_basic)) + return -EOPNOTSUPP; + ev = cpumf_generic_events_basic[ev]; + } + break; + + default: + return -ENOENT; + } + + if (ev == -1) + return -ENOENT; + + if (ev >= PERF_CPUM_CF_MAX_CTR) + return -EINVAL; + + /* The CPU measurement counter facility does not have any interrupts + * to do sampling. Sampling must be provided by external means, + * for example, by timers. + */ + if (hwc->sample_period) + return -EINVAL; + + /* Use the hardware perf event structure to store the counter number + * in 'config' member and the counter set to which the counter belongs + * in the 'config_base'. The counter set (config_base) is then used + * to enable/disable the counters. + */ + hwc->config = ev; + hwc->config_base = get_counter_set(ev); + + /* Validate the counter that is assigned to this event. + * Because the counter facility can use numerous counters at the + * same time without constraints, it is not necessary to explicity + * validate event groups (event->group_leader != event). + */ + err = validate_event(hwc); + if (err) + return err; + + /* Initialize for using the CPU-measurement counter facility */ + if (!atomic_inc_not_zero(&num_events)) { + mutex_lock(&pmc_reserve_mutex); + if (atomic_read(&num_events) == 0 && reserve_pmc_hardware()) + err = -EBUSY; + else + atomic_inc(&num_events); + mutex_unlock(&pmc_reserve_mutex); + } + event->destroy = hw_perf_event_destroy; + + /* Finally, validate version and authorization of the counter set */ + err = validate_ctr_auth(hwc); + if (!err) + err = validate_ctr_version(hwc); + + return err; +} + +static int cpumf_pmu_event_init(struct perf_event *event) +{ + int err; + + switch (event->attr.type) { + case PERF_TYPE_HARDWARE: + case PERF_TYPE_HW_CACHE: + case PERF_TYPE_RAW: + err = __hw_perf_event_init(event); + break; + default: + return -ENOENT; + } + + if (unlikely(err) && event->destroy) + event->destroy(event); + + return err; +} + +static int hw_perf_event_reset(struct perf_event *event) +{ + u64 prev, new; + int err; + + do { + prev = local64_read(&event->hw.prev_count); + err = ecctr(event->hw.config, &new); + if (err) { + if (err != 3) + break; + /* The counter is not (yet) available. This + * might happen if the counter set to which + * this counter belongs is in the disabled + * state. + */ + new = 0; + } + } while (local64_cmpxchg(&event->hw.prev_count, prev, new) != prev); + + return err; +} + +static int hw_perf_event_update(struct perf_event *event) +{ + u64 prev, new, delta; + int err; + + do { + prev = local64_read(&event->hw.prev_count); + err = ecctr(event->hw.config, &new); + if (err) + goto out; + } while (local64_cmpxchg(&event->hw.prev_count, prev, new) != prev); + + delta = (prev <= new) ? new - prev + : (-1ULL - prev) + new + 1; /* overflow */ + local64_add(delta, &event->count); +out: + return err; +} + +static void cpumf_pmu_read(struct perf_event *event) +{ + if (event->hw.state & PERF_HES_STOPPED) + return; + + hw_perf_event_update(event); +} + +static void cpumf_pmu_start(struct perf_event *event, int flags) +{ + struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; + + if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED))) + return; + + if (WARN_ON_ONCE(hwc->config == -1)) + return; + + if (flags & PERF_EF_RELOAD) + WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); + + hwc->state = 0; + + /* (Re-)enable and activate the counter set */ + ctr_set_enable(&cpuhw->state, hwc->config_base); + ctr_set_start(&cpuhw->state, hwc->config_base); + + /* The counter set to which this counter belongs can be already active. + * Because all counters in a set are active, the event->hw.prev_count + * needs to be synchronized. At this point, the counter set can be in + * the inactive or disabled state. + */ + hw_perf_event_reset(event); + + /* increment refcount for this counter set */ + atomic_inc(&cpuhw->ctr_set[hwc->config_base]); +} + +static void cpumf_pmu_stop(struct perf_event *event, int flags) +{ + struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; + + if (!(hwc->state & PERF_HES_STOPPED)) { + /* Decrement reference count for this counter set and if this + * is the last used counter in the set, clear activation + * control and set the counter set state to inactive. + */ + if (!atomic_dec_return(&cpuhw->ctr_set[hwc->config_base])) + ctr_set_stop(&cpuhw->state, hwc->config_base); + event->hw.state |= PERF_HES_STOPPED; + } + + if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) { + hw_perf_event_update(event); + event->hw.state |= PERF_HES_UPTODATE; + } +} + +static int cpumf_pmu_add(struct perf_event *event, int flags) +{ + struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + + /* Check authorization for the counter set to which this + * counter belongs. + * For group events transaction, the authorization check is + * done in cpumf_pmu_commit_txn(). + */ + if (!(cpuhw->flags & PERF_EVENT_TXN)) + if (validate_ctr_auth(&event->hw)) + return -EPERM; + + ctr_set_enable(&cpuhw->state, event->hw.config_base); + event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED; + + if (flags & PERF_EF_START) + cpumf_pmu_start(event, PERF_EF_RELOAD); + + perf_event_update_userpage(event); + + return 0; +} + +static void cpumf_pmu_del(struct perf_event *event, int flags) +{ + struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + + cpumf_pmu_stop(event, PERF_EF_UPDATE); + + /* Check if any counter in the counter set is still used. If not used, + * change the counter set to the disabled state. This also clears the + * content of all counters in the set. + * + * When a new perf event has been added but not yet started, this can + * clear enable control and resets all counters in a set. Therefore, + * cpumf_pmu_start() always has to reenable a counter set. + */ + if (!atomic_read(&cpuhw->ctr_set[event->hw.config_base])) + ctr_set_disable(&cpuhw->state, event->hw.config_base); + + perf_event_update_userpage(event); +} + +/* + * Start group events scheduling transaction. + * Set flags to perform a single test at commit time. + */ +static void cpumf_pmu_start_txn(struct pmu *pmu) +{ + struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + + perf_pmu_disable(pmu); + cpuhw->flags |= PERF_EVENT_TXN; + cpuhw->tx_state = cpuhw->state; +} + +/* + * Stop and cancel a group events scheduling tranctions. + * Assumes cpumf_pmu_del() is called for each successful added + * cpumf_pmu_add() during the transaction. + */ +static void cpumf_pmu_cancel_txn(struct pmu *pmu) +{ + struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + + WARN_ON(cpuhw->tx_state != cpuhw->state); + + cpuhw->flags &= ~PERF_EVENT_TXN; + perf_pmu_enable(pmu); +} + +/* + * Commit the group events scheduling transaction. On success, the + * transaction is closed. On error, the transaction is kept open + * until cpumf_pmu_cancel_txn() is called. + */ +static int cpumf_pmu_commit_txn(struct pmu *pmu) +{ + struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + u64 state; + + /* check if the updated state can be scheduled */ + state = cpuhw->state & ~((1 << CPUMF_LCCTL_ENABLE_SHIFT) - 1); + state >>= CPUMF_LCCTL_ENABLE_SHIFT; + if ((state & cpuhw->info.auth_ctl) != state) + return -EPERM; + + cpuhw->flags &= ~PERF_EVENT_TXN; + perf_pmu_enable(pmu); + return 0; +} + +/* Performance monitoring unit for s390x */ +static struct pmu cpumf_pmu = { + .pmu_enable = cpumf_pmu_enable, + .pmu_disable = cpumf_pmu_disable, + .event_init = cpumf_pmu_event_init, + .add = cpumf_pmu_add, + .del = cpumf_pmu_del, + .start = cpumf_pmu_start, + .stop = cpumf_pmu_stop, + .read = cpumf_pmu_read, + .start_txn = cpumf_pmu_start_txn, + .commit_txn = cpumf_pmu_commit_txn, + .cancel_txn = cpumf_pmu_cancel_txn, +}; + +static int __cpuinit cpumf_pmu_notifier(struct notifier_block *self, + unsigned long action, void *hcpu) +{ + unsigned int cpu = (long) hcpu; + int flags; + + switch (action & ~CPU_TASKS_FROZEN) { + case CPU_ONLINE: + flags = PMC_INIT; + smp_call_function_single(cpu, setup_pmc_cpu, &flags, 1); + break; + case CPU_DOWN_PREPARE: + flags = PMC_RELEASE; + smp_call_function_single(cpu, setup_pmc_cpu, &flags, 1); + break; + default: + break; + } + + return NOTIFY_OK; +} + +static int __init cpumf_pmu_init(void) +{ + int rc; + + if (!cpum_cf_avail()) + return -ENODEV; + + /* clear bit 15 of cr0 to unauthorize problem-state to + * extract measurement counters */ + ctl_clear_bit(0, 48); + + /* register handler for measurement-alert interruptions */ + rc = register_external_interrupt(0x1407, cpumf_measurement_alert); + if (rc) { + pr_err("Registering for CPU-measurement alerts " + "failed with rc=%i\n", rc); + goto out; + } + + rc = perf_pmu_register(&cpumf_pmu, "cpum_cf", PERF_TYPE_RAW); + if (rc) { + pr_err("Registering the cpum_cf PMU failed with rc=%i\n", rc); + unregister_external_interrupt(0x1407, cpumf_measurement_alert); + goto out; + } + perf_cpu_notifier(cpumf_pmu_notifier); +out: + return rc; +} +early_initcall(cpumf_pmu_init); diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c new file mode 100644 index 00000000..f58f37f6 --- /dev/null +++ b/arch/s390/kernel/perf_event.c @@ -0,0 +1,124 @@ +/* + * Performance event support for s390x + * + * Copyright IBM Corp. 2012 + * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + */ +#define KMSG_COMPONENT "perf" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include <linux/kernel.h> +#include <linux/perf_event.h> +#include <linux/percpu.h> +#include <linux/export.h> +#include <asm/irq.h> +#include <asm/cpu_mf.h> +#include <asm/lowcore.h> +#include <asm/processor.h> + +const char *perf_pmu_name(void) +{ + if (cpum_cf_avail() || cpum_sf_avail()) + return "CPU-measurement facilities (CPUMF)"; + return "pmu"; +} +EXPORT_SYMBOL(perf_pmu_name); + +int perf_num_counters(void) +{ + int num = 0; + + if (cpum_cf_avail()) + num += PERF_CPUM_CF_MAX_CTR; + + return num; +} +EXPORT_SYMBOL(perf_num_counters); + +void perf_event_print_debug(void) +{ + struct cpumf_ctr_info cf_info; + unsigned long flags; + int cpu; + + if (!cpum_cf_avail()) + return; + + local_irq_save(flags); + + cpu = smp_processor_id(); + memset(&cf_info, 0, sizeof(cf_info)); + if (!qctri(&cf_info)) { + pr_info("CPU[%i] CPUM_CF: ver=%u.%u A=%04x E=%04x C=%04x\n", + cpu, cf_info.cfvn, cf_info.csvn, + cf_info.auth_ctl, cf_info.enable_ctl, cf_info.act_ctl); + print_hex_dump_bytes("CPUMF Query: ", DUMP_PREFIX_OFFSET, + &cf_info, sizeof(cf_info)); + } + + local_irq_restore(flags); +} + +/* See also arch/s390/kernel/traps.c */ +static unsigned long __store_trace(struct perf_callchain_entry *entry, + unsigned long sp, + unsigned long low, unsigned long high) +{ + struct stack_frame *sf; + struct pt_regs *regs; + + while (1) { + sp = sp & PSW_ADDR_INSN; + if (sp < low || sp > high - sizeof(*sf)) + return sp; + sf = (struct stack_frame *) sp; + perf_callchain_store(entry, sf->gprs[8] & PSW_ADDR_INSN); + /* Follow the backchain. */ + while (1) { + low = sp; + sp = sf->back_chain & PSW_ADDR_INSN; + if (!sp) + break; + if (sp <= low || sp > high - sizeof(*sf)) + return sp; + sf = (struct stack_frame *) sp; + perf_callchain_store(entry, + sf->gprs[8] & PSW_ADDR_INSN); + } + /* Zero backchain detected, check for interrupt frame. */ + sp = (unsigned long) (sf + 1); + if (sp <= low || sp > high - sizeof(*regs)) + return sp; + regs = (struct pt_regs *) sp; + perf_callchain_store(entry, sf->gprs[8] & PSW_ADDR_INSN); + low = sp; + sp = regs->gprs[15]; + } +} + +void perf_callchain_kernel(struct perf_callchain_entry *entry, + struct pt_regs *regs) +{ + unsigned long head; + struct stack_frame *head_sf; + + if (user_mode(regs)) + return; + + head = regs->gprs[15]; + head_sf = (struct stack_frame *) head; + + if (!head_sf || !head_sf->back_chain) + return; + + head = head_sf->back_chain; + head = __store_trace(entry, head, S390_lowcore.async_stack - ASYNC_SIZE, + S390_lowcore.async_stack); + + __store_trace(entry, head, S390_lowcore.thread_info, + S390_lowcore.thread_info + THREAD_SIZE); +} diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c new file mode 100644 index 00000000..60055cef --- /dev/null +++ b/arch/s390/kernel/process.c @@ -0,0 +1,354 @@ +/* + * This file handles the architecture dependent parts of process handling. + * + * Copyright IBM Corp. 1999,2009 + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>, + * Hartmut Penner <hp@de.ibm.com>, + * Denis Joseph Barrow, + */ + +#include <linux/compiler.h> +#include <linux/cpu.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/elfcore.h> +#include <linux/smp.h> +#include <linux/slab.h> +#include <linux/interrupt.h> +#include <linux/tick.h> +#include <linux/personality.h> +#include <linux/syscalls.h> +#include <linux/compat.h> +#include <linux/kprobes.h> +#include <linux/random.h> +#include <linux/module.h> +#include <asm/io.h> +#include <asm/processor.h> +#include <asm/irq.h> +#include <asm/timer.h> +#include <asm/nmi.h> +#include <asm/smp.h> +#include <asm/switch_to.h> +#include "entry.h" + +asmlinkage void ret_from_fork(void) asm ("ret_from_fork"); + +/* + * Return saved PC of a blocked thread. used in kernel/sched. + * resume in entry.S does not create a new stack frame, it + * just stores the registers %r6-%r15 to the frame given by + * schedule. We want to return the address of the caller of + * schedule, so we have to walk the backchain one time to + * find the frame schedule() store its return address. + */ +unsigned long thread_saved_pc(struct task_struct *tsk) +{ + struct stack_frame *sf, *low, *high; + + if (!tsk || !task_stack_page(tsk)) + return 0; + low = task_stack_page(tsk); + high = (struct stack_frame *) task_pt_regs(tsk); + sf = (struct stack_frame *) (tsk->thread.ksp & PSW_ADDR_INSN); + if (sf <= low || sf > high) + return 0; + sf = (struct stack_frame *) (sf->back_chain & PSW_ADDR_INSN); + if (sf <= low || sf > high) + return 0; + return sf->gprs[8]; +} + +/* + * The idle loop on a S390... + */ +static void default_idle(void) +{ + if (cpu_is_offline(smp_processor_id())) + cpu_die(); + local_irq_disable(); + if (need_resched()) { + local_irq_enable(); + return; + } + local_mcck_disable(); + if (test_thread_flag(TIF_MCCK_PENDING)) { + local_mcck_enable(); + local_irq_enable(); + return; + } + /* Halt the cpu and keep track of cpu time accounting. */ + vtime_stop_cpu(); +} + +void cpu_idle(void) +{ + for (;;) { + tick_nohz_idle_enter(); + rcu_idle_enter(); + while (!need_resched() && !test_thread_flag(TIF_MCCK_PENDING)) + default_idle(); + rcu_idle_exit(); + tick_nohz_idle_exit(); + if (test_thread_flag(TIF_MCCK_PENDING)) + s390_handle_mcck(); + schedule_preempt_disabled(); + } +} + +extern void __kprobes kernel_thread_starter(void); + +asm( + ".section .kprobes.text, \"ax\"\n" + ".global kernel_thread_starter\n" + "kernel_thread_starter:\n" + " la 2,0(10)\n" + " basr 14,9\n" + " la 2,0\n" + " br 11\n" + ".previous\n"); + +int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) +{ + struct pt_regs regs; + + memset(®s, 0, sizeof(regs)); + regs.psw.mask = psw_kernel_bits | + PSW_MASK_DAT | PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK; + regs.psw.addr = (unsigned long) kernel_thread_starter | PSW_ADDR_AMODE; + regs.gprs[9] = (unsigned long) fn; + regs.gprs[10] = (unsigned long) arg; + regs.gprs[11] = (unsigned long) do_exit; + regs.orig_gpr2 = -1; + + /* Ok, create the new process.. */ + return do_fork(flags | CLONE_VM | CLONE_UNTRACED, + 0, ®s, 0, NULL, NULL); +} +EXPORT_SYMBOL(kernel_thread); + +/* + * Free current thread data structures etc.. + */ +void exit_thread(void) +{ +} + +void flush_thread(void) +{ +} + +void release_thread(struct task_struct *dead_task) +{ +} + +int copy_thread(unsigned long clone_flags, unsigned long new_stackp, + unsigned long unused, + struct task_struct *p, struct pt_regs *regs) +{ + struct thread_info *ti; + struct fake_frame + { + struct stack_frame sf; + struct pt_regs childregs; + } *frame; + + frame = container_of(task_pt_regs(p), struct fake_frame, childregs); + p->thread.ksp = (unsigned long) frame; + /* Store access registers to kernel stack of new process. */ + frame->childregs = *regs; + frame->childregs.gprs[2] = 0; /* child returns 0 on fork. */ + frame->childregs.gprs[15] = new_stackp; + frame->sf.back_chain = 0; + + /* new return point is ret_from_fork */ + frame->sf.gprs[8] = (unsigned long) ret_from_fork; + + /* fake return stack for resume(), don't go back to schedule */ + frame->sf.gprs[9] = (unsigned long) frame; + + /* Save access registers to new thread structure. */ + save_access_regs(&p->thread.acrs[0]); + +#ifndef CONFIG_64BIT + /* + * save fprs to current->thread.fp_regs to merge them with + * the emulated registers and then copy the result to the child. + */ + save_fp_regs(¤t->thread.fp_regs); + memcpy(&p->thread.fp_regs, ¤t->thread.fp_regs, + sizeof(s390_fp_regs)); + /* Set a new TLS ? */ + if (clone_flags & CLONE_SETTLS) + p->thread.acrs[0] = regs->gprs[6]; +#else /* CONFIG_64BIT */ + /* Save the fpu registers to new thread structure. */ + save_fp_regs(&p->thread.fp_regs); + /* Set a new TLS ? */ + if (clone_flags & CLONE_SETTLS) { + if (is_compat_task()) { + p->thread.acrs[0] = (unsigned int) regs->gprs[6]; + } else { + p->thread.acrs[0] = (unsigned int)(regs->gprs[6] >> 32); + p->thread.acrs[1] = (unsigned int) regs->gprs[6]; + } + } +#endif /* CONFIG_64BIT */ + /* start new process with ar4 pointing to the correct address space */ + p->thread.mm_segment = get_fs(); + /* Don't copy debug registers */ + memset(&p->thread.per_user, 0, sizeof(p->thread.per_user)); + memset(&p->thread.per_event, 0, sizeof(p->thread.per_event)); + clear_tsk_thread_flag(p, TIF_SINGLE_STEP); + clear_tsk_thread_flag(p, TIF_PER_TRAP); + /* Initialize per thread user and system timer values */ + ti = task_thread_info(p); + ti->user_timer = 0; + ti->system_timer = 0; + return 0; +} + +SYSCALL_DEFINE0(fork) +{ + struct pt_regs *regs = task_pt_regs(current); + return do_fork(SIGCHLD, regs->gprs[15], regs, 0, NULL, NULL); +} + +SYSCALL_DEFINE4(clone, unsigned long, newsp, unsigned long, clone_flags, + int __user *, parent_tidptr, int __user *, child_tidptr) +{ + struct pt_regs *regs = task_pt_regs(current); + + if (!newsp) + newsp = regs->gprs[15]; + return do_fork(clone_flags, newsp, regs, 0, + parent_tidptr, child_tidptr); +} + +/* + * This is trivial, and on the face of it looks like it + * could equally well be done in user mode. + * + * Not so, for quite unobvious reasons - register pressure. + * In user mode vfork() cannot have a stack frame, and if + * done by calling the "clone()" system call directly, you + * do not have enough call-clobbered registers to hold all + * the information you need. + */ +SYSCALL_DEFINE0(vfork) +{ + struct pt_regs *regs = task_pt_regs(current); + return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, + regs->gprs[15], regs, 0, NULL, NULL); +} + +asmlinkage void execve_tail(void) +{ + current->thread.fp_regs.fpc = 0; + if (MACHINE_HAS_IEEE) + asm volatile("sfpc %0,%0" : : "d" (0)); +} + +/* + * sys_execve() executes a new program. + */ +SYSCALL_DEFINE3(execve, const char __user *, name, + const char __user *const __user *, argv, + const char __user *const __user *, envp) +{ + struct pt_regs *regs = task_pt_regs(current); + char *filename; + long rc; + + filename = getname(name); + rc = PTR_ERR(filename); + if (IS_ERR(filename)) + return rc; + rc = do_execve(filename, argv, envp, regs); + if (rc) + goto out; + execve_tail(); + rc = regs->gprs[2]; +out: + putname(filename); + return rc; +} + +/* + * fill in the FPU structure for a core dump. + */ +int dump_fpu (struct pt_regs * regs, s390_fp_regs *fpregs) +{ +#ifndef CONFIG_64BIT + /* + * save fprs to current->thread.fp_regs to merge them with + * the emulated registers and then copy the result to the dump. + */ + save_fp_regs(¤t->thread.fp_regs); + memcpy(fpregs, ¤t->thread.fp_regs, sizeof(s390_fp_regs)); +#else /* CONFIG_64BIT */ + save_fp_regs(fpregs); +#endif /* CONFIG_64BIT */ + return 1; +} +EXPORT_SYMBOL(dump_fpu); + +unsigned long get_wchan(struct task_struct *p) +{ + struct stack_frame *sf, *low, *high; + unsigned long return_address; + int count; + + if (!p || p == current || p->state == TASK_RUNNING || !task_stack_page(p)) + return 0; + low = task_stack_page(p); + high = (struct stack_frame *) task_pt_regs(p); + sf = (struct stack_frame *) (p->thread.ksp & PSW_ADDR_INSN); + if (sf <= low || sf > high) + return 0; + for (count = 0; count < 16; count++) { + sf = (struct stack_frame *) (sf->back_chain & PSW_ADDR_INSN); + if (sf <= low || sf > high) + return 0; + return_address = sf->gprs[8] & PSW_ADDR_INSN; + if (!in_sched_functions(return_address)) + return return_address; + } + return 0; +} + +unsigned long arch_align_stack(unsigned long sp) +{ + if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) + sp -= get_random_int() & ~PAGE_MASK; + return sp & ~0xf; +} + +static inline unsigned long brk_rnd(void) +{ + /* 8MB for 32bit, 1GB for 64bit */ + if (is_32bit_task()) + return (get_random_int() & 0x7ffUL) << PAGE_SHIFT; + else + return (get_random_int() & 0x3ffffUL) << PAGE_SHIFT; +} + +unsigned long arch_randomize_brk(struct mm_struct *mm) +{ + unsigned long ret = PAGE_ALIGN(mm->brk + brk_rnd()); + + if (ret < mm->brk) + return mm->brk; + return ret; +} + +unsigned long randomize_et_dyn(unsigned long base) +{ + unsigned long ret = PAGE_ALIGN(base + brk_rnd()); + + if (!(current->flags & PF_RANDOMIZE)) + return base; + if (ret < base) + return base; + return ret; +} diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c new file mode 100644 index 00000000..6e0073e4 --- /dev/null +++ b/arch/s390/kernel/processor.c @@ -0,0 +1,96 @@ +/* + * arch/s390/kernel/processor.c + * + * Copyright IBM Corp. 2008 + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) + */ + +#define KMSG_COMPONENT "cpu" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/smp.h> +#include <linux/seq_file.h> +#include <linux/delay.h> +#include <linux/cpu.h> +#include <asm/elf.h> +#include <asm/lowcore.h> +#include <asm/param.h> + +static DEFINE_PER_CPU(struct cpuid, cpu_id); + +/* + * cpu_init - initializes state that is per-CPU. + */ +void __cpuinit cpu_init(void) +{ + struct cpuid *id = &per_cpu(cpu_id, smp_processor_id()); + + get_cpu_id(id); + atomic_inc(&init_mm.mm_count); + current->active_mm = &init_mm; + BUG_ON(current->mm); + enter_lazy_tlb(&init_mm, current); +} + +/* + * show_cpuinfo - Get information on one CPU for use by procfs. + */ +static int show_cpuinfo(struct seq_file *m, void *v) +{ + static const char *hwcap_str[10] = { + "esan3", "zarch", "stfle", "msa", "ldisp", "eimm", "dfp", + "edat", "etf3eh", "highgprs" + }; + unsigned long n = (unsigned long) v - 1; + int i; + + if (!n) { + s390_adjust_jiffies(); + seq_printf(m, "vendor_id : IBM/S390\n" + "# processors : %i\n" + "bogomips per cpu: %lu.%02lu\n", + num_online_cpus(), loops_per_jiffy/(500000/HZ), + (loops_per_jiffy/(5000/HZ))%100); + seq_puts(m, "features\t: "); + for (i = 0; i < 10; i++) + if (hwcap_str[i] && (elf_hwcap & (1UL << i))) + seq_printf(m, "%s ", hwcap_str[i]); + seq_puts(m, "\n"); + } + get_online_cpus(); + if (cpu_online(n)) { + struct cpuid *id = &per_cpu(cpu_id, n); + seq_printf(m, "processor %li: " + "version = %02X, " + "identification = %06X, " + "machine = %04X\n", + n, id->version, id->ident, id->machine); + } + put_online_cpus(); + return 0; +} + +static void *c_start(struct seq_file *m, loff_t *pos) +{ + return *pos < nr_cpu_ids ? (void *)((unsigned long) *pos + 1) : NULL; +} + +static void *c_next(struct seq_file *m, void *v, loff_t *pos) +{ + ++*pos; + return c_start(m, pos); +} + +static void c_stop(struct seq_file *m, void *v) +{ +} + +const struct seq_operations cpuinfo_op = { + .start = c_start, + .next = c_next, + .stop = c_stop, + .show = show_cpuinfo, +}; + diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c new file mode 100644 index 00000000..02f300fb --- /dev/null +++ b/arch/s390/kernel/ptrace.c @@ -0,0 +1,1237 @@ +/* + * Ptrace user space interface. + * + * Copyright IBM Corp. 1999,2010 + * Author(s): Denis Joseph Barrow + * Martin Schwidefsky (schwidefsky@de.ibm.com) + */ + +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/mm.h> +#include <linux/smp.h> +#include <linux/errno.h> +#include <linux/ptrace.h> +#include <linux/user.h> +#include <linux/security.h> +#include <linux/audit.h> +#include <linux/signal.h> +#include <linux/elf.h> +#include <linux/regset.h> +#include <linux/tracehook.h> +#include <linux/seccomp.h> +#include <linux/compat.h> +#include <trace/syscall.h> +#include <asm/segment.h> +#include <asm/page.h> +#include <asm/pgtable.h> +#include <asm/pgalloc.h> +#include <asm/uaccess.h> +#include <asm/unistd.h> +#include <asm/switch_to.h> +#include "entry.h" + +#ifdef CONFIG_COMPAT +#include "compat_ptrace.h" +#endif + +#define CREATE_TRACE_POINTS +#include <trace/events/syscalls.h> + +enum s390_regset { + REGSET_GENERAL, + REGSET_FP, + REGSET_LAST_BREAK, + REGSET_SYSTEM_CALL, + REGSET_GENERAL_EXTENDED, +}; + +void update_per_regs(struct task_struct *task) +{ + struct pt_regs *regs = task_pt_regs(task); + struct thread_struct *thread = &task->thread; + struct per_regs old, new; + + /* Copy user specified PER registers */ + new.control = thread->per_user.control; + new.start = thread->per_user.start; + new.end = thread->per_user.end; + + /* merge TIF_SINGLE_STEP into user specified PER registers. */ + if (test_tsk_thread_flag(task, TIF_SINGLE_STEP)) { + new.control |= PER_EVENT_IFETCH; + new.start = 0; + new.end = PSW_ADDR_INSN; + } + + /* Take care of the PER enablement bit in the PSW. */ + if (!(new.control & PER_EVENT_MASK)) { + regs->psw.mask &= ~PSW_MASK_PER; + return; + } + regs->psw.mask |= PSW_MASK_PER; + __ctl_store(old, 9, 11); + if (memcmp(&new, &old, sizeof(struct per_regs)) != 0) + __ctl_load(new, 9, 11); +} + +void user_enable_single_step(struct task_struct *task) +{ + set_tsk_thread_flag(task, TIF_SINGLE_STEP); + if (task == current) + update_per_regs(task); +} + +void user_disable_single_step(struct task_struct *task) +{ + clear_tsk_thread_flag(task, TIF_SINGLE_STEP); + if (task == current) + update_per_regs(task); +} + +/* + * Called by kernel/ptrace.c when detaching.. + * + * Clear all debugging related fields. + */ +void ptrace_disable(struct task_struct *task) +{ + memset(&task->thread.per_user, 0, sizeof(task->thread.per_user)); + memset(&task->thread.per_event, 0, sizeof(task->thread.per_event)); + clear_tsk_thread_flag(task, TIF_SINGLE_STEP); + clear_tsk_thread_flag(task, TIF_PER_TRAP); +} + +#ifndef CONFIG_64BIT +# define __ADDR_MASK 3 +#else +# define __ADDR_MASK 7 +#endif + +static inline unsigned long __peek_user_per(struct task_struct *child, + addr_t addr) +{ + struct per_struct_kernel *dummy = NULL; + + if (addr == (addr_t) &dummy->cr9) + /* Control bits of the active per set. */ + return test_thread_flag(TIF_SINGLE_STEP) ? + PER_EVENT_IFETCH : child->thread.per_user.control; + else if (addr == (addr_t) &dummy->cr10) + /* Start address of the active per set. */ + return test_thread_flag(TIF_SINGLE_STEP) ? + 0 : child->thread.per_user.start; + else if (addr == (addr_t) &dummy->cr11) + /* End address of the active per set. */ + return test_thread_flag(TIF_SINGLE_STEP) ? + PSW_ADDR_INSN : child->thread.per_user.end; + else if (addr == (addr_t) &dummy->bits) + /* Single-step bit. */ + return test_thread_flag(TIF_SINGLE_STEP) ? + (1UL << (BITS_PER_LONG - 1)) : 0; + else if (addr == (addr_t) &dummy->starting_addr) + /* Start address of the user specified per set. */ + return child->thread.per_user.start; + else if (addr == (addr_t) &dummy->ending_addr) + /* End address of the user specified per set. */ + return child->thread.per_user.end; + else if (addr == (addr_t) &dummy->perc_atmid) + /* PER code, ATMID and AI of the last PER trap */ + return (unsigned long) + child->thread.per_event.cause << (BITS_PER_LONG - 16); + else if (addr == (addr_t) &dummy->address) + /* Address of the last PER trap */ + return child->thread.per_event.address; + else if (addr == (addr_t) &dummy->access_id) + /* Access id of the last PER trap */ + return (unsigned long) + child->thread.per_event.paid << (BITS_PER_LONG - 8); + return 0; +} + +/* + * Read the word at offset addr from the user area of a process. The + * trouble here is that the information is littered over different + * locations. The process registers are found on the kernel stack, + * the floating point stuff and the trace settings are stored in + * the task structure. In addition the different structures in + * struct user contain pad bytes that should be read as zeroes. + * Lovely... + */ +static unsigned long __peek_user(struct task_struct *child, addr_t addr) +{ + struct user *dummy = NULL; + addr_t offset, tmp; + + if (addr < (addr_t) &dummy->regs.acrs) { + /* + * psw and gprs are stored on the stack + */ + tmp = *(addr_t *)((addr_t) &task_pt_regs(child)->psw + addr); + if (addr == (addr_t) &dummy->regs.psw.mask) + /* Return a clean psw mask. */ + tmp = psw_user_bits | (tmp & PSW_MASK_USER); + + } else if (addr < (addr_t) &dummy->regs.orig_gpr2) { + /* + * access registers are stored in the thread structure + */ + offset = addr - (addr_t) &dummy->regs.acrs; +#ifdef CONFIG_64BIT + /* + * Very special case: old & broken 64 bit gdb reading + * from acrs[15]. Result is a 64 bit value. Read the + * 32 bit acrs[15] value and shift it by 32. Sick... + */ + if (addr == (addr_t) &dummy->regs.acrs[15]) + tmp = ((unsigned long) child->thread.acrs[15]) << 32; + else +#endif + tmp = *(addr_t *)((addr_t) &child->thread.acrs + offset); + + } else if (addr == (addr_t) &dummy->regs.orig_gpr2) { + /* + * orig_gpr2 is stored on the kernel stack + */ + tmp = (addr_t) task_pt_regs(child)->orig_gpr2; + + } else if (addr < (addr_t) &dummy->regs.fp_regs) { + /* + * prevent reads of padding hole between + * orig_gpr2 and fp_regs on s390. + */ + tmp = 0; + + } else if (addr < (addr_t) (&dummy->regs.fp_regs + 1)) { + /* + * floating point regs. are stored in the thread structure + */ + offset = addr - (addr_t) &dummy->regs.fp_regs; + tmp = *(addr_t *)((addr_t) &child->thread.fp_regs + offset); + if (addr == (addr_t) &dummy->regs.fp_regs.fpc) + tmp &= (unsigned long) FPC_VALID_MASK + << (BITS_PER_LONG - 32); + + } else if (addr < (addr_t) (&dummy->regs.per_info + 1)) { + /* + * Handle access to the per_info structure. + */ + addr -= (addr_t) &dummy->regs.per_info; + tmp = __peek_user_per(child, addr); + + } else + tmp = 0; + + return tmp; +} + +static int +peek_user(struct task_struct *child, addr_t addr, addr_t data) +{ + addr_t tmp, mask; + + /* + * Stupid gdb peeks/pokes the access registers in 64 bit with + * an alignment of 4. Programmers from hell... + */ + mask = __ADDR_MASK; +#ifdef CONFIG_64BIT + if (addr >= (addr_t) &((struct user *) NULL)->regs.acrs && + addr < (addr_t) &((struct user *) NULL)->regs.orig_gpr2) + mask = 3; +#endif + if ((addr & mask) || addr > sizeof(struct user) - __ADDR_MASK) + return -EIO; + + tmp = __peek_user(child, addr); + return put_user(tmp, (addr_t __user *) data); +} + +static inline void __poke_user_per(struct task_struct *child, + addr_t addr, addr_t data) +{ + struct per_struct_kernel *dummy = NULL; + + /* + * There are only three fields in the per_info struct that the + * debugger user can write to. + * 1) cr9: the debugger wants to set a new PER event mask + * 2) starting_addr: the debugger wants to set a new starting + * address to use with the PER event mask. + * 3) ending_addr: the debugger wants to set a new ending + * address to use with the PER event mask. + * The user specified PER event mask and the start and end + * addresses are used only if single stepping is not in effect. + * Writes to any other field in per_info are ignored. + */ + if (addr == (addr_t) &dummy->cr9) + /* PER event mask of the user specified per set. */ + child->thread.per_user.control = + data & (PER_EVENT_MASK | PER_CONTROL_MASK); + else if (addr == (addr_t) &dummy->starting_addr) + /* Starting address of the user specified per set. */ + child->thread.per_user.start = data; + else if (addr == (addr_t) &dummy->ending_addr) + /* Ending address of the user specified per set. */ + child->thread.per_user.end = data; +} + +/* + * Write a word to the user area of a process at location addr. This + * operation does have an additional problem compared to peek_user. + * Stores to the program status word and on the floating point + * control register needs to get checked for validity. + */ +static int __poke_user(struct task_struct *child, addr_t addr, addr_t data) +{ + struct user *dummy = NULL; + addr_t offset; + + if (addr < (addr_t) &dummy->regs.acrs) { + /* + * psw and gprs are stored on the stack + */ + if (addr == (addr_t) &dummy->regs.psw.mask && + ((data & ~PSW_MASK_USER) != psw_user_bits || + ((data & PSW_MASK_EA) && !(data & PSW_MASK_BA)))) + /* Invalid psw mask. */ + return -EINVAL; + *(addr_t *)((addr_t) &task_pt_regs(child)->psw + addr) = data; + + } else if (addr < (addr_t) (&dummy->regs.orig_gpr2)) { + /* + * access registers are stored in the thread structure + */ + offset = addr - (addr_t) &dummy->regs.acrs; +#ifdef CONFIG_64BIT + /* + * Very special case: old & broken 64 bit gdb writing + * to acrs[15] with a 64 bit value. Ignore the lower + * half of the value and write the upper 32 bit to + * acrs[15]. Sick... + */ + if (addr == (addr_t) &dummy->regs.acrs[15]) + child->thread.acrs[15] = (unsigned int) (data >> 32); + else +#endif + *(addr_t *)((addr_t) &child->thread.acrs + offset) = data; + + } else if (addr == (addr_t) &dummy->regs.orig_gpr2) { + /* + * orig_gpr2 is stored on the kernel stack + */ + task_pt_regs(child)->orig_gpr2 = data; + + } else if (addr < (addr_t) &dummy->regs.fp_regs) { + /* + * prevent writes of padding hole between + * orig_gpr2 and fp_regs on s390. + */ + return 0; + + } else if (addr < (addr_t) (&dummy->regs.fp_regs + 1)) { + /* + * floating point regs. are stored in the thread structure + */ + if (addr == (addr_t) &dummy->regs.fp_regs.fpc && + (data & ~((unsigned long) FPC_VALID_MASK + << (BITS_PER_LONG - 32))) != 0) + return -EINVAL; + offset = addr - (addr_t) &dummy->regs.fp_regs; + *(addr_t *)((addr_t) &child->thread.fp_regs + offset) = data; + + } else if (addr < (addr_t) (&dummy->regs.per_info + 1)) { + /* + * Handle access to the per_info structure. + */ + addr -= (addr_t) &dummy->regs.per_info; + __poke_user_per(child, addr, data); + + } + + return 0; +} + +static int poke_user(struct task_struct *child, addr_t addr, addr_t data) +{ + addr_t mask; + + /* + * Stupid gdb peeks/pokes the access registers in 64 bit with + * an alignment of 4. Programmers from hell indeed... + */ + mask = __ADDR_MASK; +#ifdef CONFIG_64BIT + if (addr >= (addr_t) &((struct user *) NULL)->regs.acrs && + addr < (addr_t) &((struct user *) NULL)->regs.orig_gpr2) + mask = 3; +#endif + if ((addr & mask) || addr > sizeof(struct user) - __ADDR_MASK) + return -EIO; + + return __poke_user(child, addr, data); +} + +long arch_ptrace(struct task_struct *child, long request, + unsigned long addr, unsigned long data) +{ + ptrace_area parea; + int copied, ret; + + switch (request) { + case PTRACE_PEEKUSR: + /* read the word at location addr in the USER area. */ + return peek_user(child, addr, data); + + case PTRACE_POKEUSR: + /* write the word at location addr in the USER area */ + return poke_user(child, addr, data); + + case PTRACE_PEEKUSR_AREA: + case PTRACE_POKEUSR_AREA: + if (copy_from_user(&parea, (void __force __user *) addr, + sizeof(parea))) + return -EFAULT; + addr = parea.kernel_addr; + data = parea.process_addr; + copied = 0; + while (copied < parea.len) { + if (request == PTRACE_PEEKUSR_AREA) + ret = peek_user(child, addr, data); + else { + addr_t utmp; + if (get_user(utmp, + (addr_t __force __user *) data)) + return -EFAULT; + ret = poke_user(child, addr, utmp); + } + if (ret) + return ret; + addr += sizeof(unsigned long); + data += sizeof(unsigned long); + copied += sizeof(unsigned long); + } + return 0; + case PTRACE_GET_LAST_BREAK: + put_user(task_thread_info(child)->last_break, + (unsigned long __user *) data); + return 0; + default: + /* Removing high order bit from addr (only for 31 bit). */ + addr &= PSW_ADDR_INSN; + return ptrace_request(child, request, addr, data); + } +} + +#ifdef CONFIG_COMPAT +/* + * Now the fun part starts... a 31 bit program running in the + * 31 bit emulation tracing another program. PTRACE_PEEKTEXT, + * PTRACE_PEEKDATA, PTRACE_POKETEXT and PTRACE_POKEDATA are easy + * to handle, the difference to the 64 bit versions of the requests + * is that the access is done in multiples of 4 byte instead of + * 8 bytes (sizeof(unsigned long) on 31/64 bit). + * The ugly part are PTRACE_PEEKUSR, PTRACE_PEEKUSR_AREA, + * PTRACE_POKEUSR and PTRACE_POKEUSR_AREA. If the traced program + * is a 31 bit program too, the content of struct user can be + * emulated. A 31 bit program peeking into the struct user of + * a 64 bit program is a no-no. + */ + +/* + * Same as peek_user_per but for a 31 bit program. + */ +static inline __u32 __peek_user_per_compat(struct task_struct *child, + addr_t addr) +{ + struct compat_per_struct_kernel *dummy32 = NULL; + + if (addr == (addr_t) &dummy32->cr9) + /* Control bits of the active per set. */ + return (__u32) test_thread_flag(TIF_SINGLE_STEP) ? + PER_EVENT_IFETCH : child->thread.per_user.control; + else if (addr == (addr_t) &dummy32->cr10) + /* Start address of the active per set. */ + return (__u32) test_thread_flag(TIF_SINGLE_STEP) ? + 0 : child->thread.per_user.start; + else if (addr == (addr_t) &dummy32->cr11) + /* End address of the active per set. */ + return test_thread_flag(TIF_SINGLE_STEP) ? + PSW32_ADDR_INSN : child->thread.per_user.end; + else if (addr == (addr_t) &dummy32->bits) + /* Single-step bit. */ + return (__u32) test_thread_flag(TIF_SINGLE_STEP) ? + 0x80000000 : 0; + else if (addr == (addr_t) &dummy32->starting_addr) + /* Start address of the user specified per set. */ + return (__u32) child->thread.per_user.start; + else if (addr == (addr_t) &dummy32->ending_addr) + /* End address of the user specified per set. */ + return (__u32) child->thread.per_user.end; + else if (addr == (addr_t) &dummy32->perc_atmid) + /* PER code, ATMID and AI of the last PER trap */ + return (__u32) child->thread.per_event.cause << 16; + else if (addr == (addr_t) &dummy32->address) + /* Address of the last PER trap */ + return (__u32) child->thread.per_event.address; + else if (addr == (addr_t) &dummy32->access_id) + /* Access id of the last PER trap */ + return (__u32) child->thread.per_event.paid << 24; + return 0; +} + +/* + * Same as peek_user but for a 31 bit program. + */ +static u32 __peek_user_compat(struct task_struct *child, addr_t addr) +{ + struct compat_user *dummy32 = NULL; + addr_t offset; + __u32 tmp; + + if (addr < (addr_t) &dummy32->regs.acrs) { + struct pt_regs *regs = task_pt_regs(child); + /* + * psw and gprs are stored on the stack + */ + if (addr == (addr_t) &dummy32->regs.psw.mask) { + /* Fake a 31 bit psw mask. */ + tmp = (__u32)(regs->psw.mask >> 32); + tmp = psw32_user_bits | (tmp & PSW32_MASK_USER); + } else if (addr == (addr_t) &dummy32->regs.psw.addr) { + /* Fake a 31 bit psw address. */ + tmp = (__u32) regs->psw.addr | + (__u32)(regs->psw.mask & PSW_MASK_BA); + } else { + /* gpr 0-15 */ + tmp = *(__u32 *)((addr_t) ®s->psw + addr*2 + 4); + } + } else if (addr < (addr_t) (&dummy32->regs.orig_gpr2)) { + /* + * access registers are stored in the thread structure + */ + offset = addr - (addr_t) &dummy32->regs.acrs; + tmp = *(__u32*)((addr_t) &child->thread.acrs + offset); + + } else if (addr == (addr_t) (&dummy32->regs.orig_gpr2)) { + /* + * orig_gpr2 is stored on the kernel stack + */ + tmp = *(__u32*)((addr_t) &task_pt_regs(child)->orig_gpr2 + 4); + + } else if (addr < (addr_t) &dummy32->regs.fp_regs) { + /* + * prevent reads of padding hole between + * orig_gpr2 and fp_regs on s390. + */ + tmp = 0; + + } else if (addr < (addr_t) (&dummy32->regs.fp_regs + 1)) { + /* + * floating point regs. are stored in the thread structure + */ + offset = addr - (addr_t) &dummy32->regs.fp_regs; + tmp = *(__u32 *)((addr_t) &child->thread.fp_regs + offset); + + } else if (addr < (addr_t) (&dummy32->regs.per_info + 1)) { + /* + * Handle access to the per_info structure. + */ + addr -= (addr_t) &dummy32->regs.per_info; + tmp = __peek_user_per_compat(child, addr); + + } else + tmp = 0; + + return tmp; +} + +static int peek_user_compat(struct task_struct *child, + addr_t addr, addr_t data) +{ + __u32 tmp; + + if (!is_compat_task() || (addr & 3) || addr > sizeof(struct user) - 3) + return -EIO; + + tmp = __peek_user_compat(child, addr); + return put_user(tmp, (__u32 __user *) data); +} + +/* + * Same as poke_user_per but for a 31 bit program. + */ +static inline void __poke_user_per_compat(struct task_struct *child, + addr_t addr, __u32 data) +{ + struct compat_per_struct_kernel *dummy32 = NULL; + + if (addr == (addr_t) &dummy32->cr9) + /* PER event mask of the user specified per set. */ + child->thread.per_user.control = + data & (PER_EVENT_MASK | PER_CONTROL_MASK); + else if (addr == (addr_t) &dummy32->starting_addr) + /* Starting address of the user specified per set. */ + child->thread.per_user.start = data; + else if (addr == (addr_t) &dummy32->ending_addr) + /* Ending address of the user specified per set. */ + child->thread.per_user.end = data; +} + +/* + * Same as poke_user but for a 31 bit program. + */ +static int __poke_user_compat(struct task_struct *child, + addr_t addr, addr_t data) +{ + struct compat_user *dummy32 = NULL; + __u32 tmp = (__u32) data; + addr_t offset; + + if (addr < (addr_t) &dummy32->regs.acrs) { + struct pt_regs *regs = task_pt_regs(child); + /* + * psw, gprs, acrs and orig_gpr2 are stored on the stack + */ + if (addr == (addr_t) &dummy32->regs.psw.mask) { + /* Build a 64 bit psw mask from 31 bit mask. */ + if ((tmp & ~PSW32_MASK_USER) != psw32_user_bits) + /* Invalid psw mask. */ + return -EINVAL; + regs->psw.mask = (regs->psw.mask & ~PSW_MASK_USER) | + (regs->psw.mask & PSW_MASK_BA) | + (__u64)(tmp & PSW32_MASK_USER) << 32; + } else if (addr == (addr_t) &dummy32->regs.psw.addr) { + /* Build a 64 bit psw address from 31 bit address. */ + regs->psw.addr = (__u64) tmp & PSW32_ADDR_INSN; + /* Transfer 31 bit amode bit to psw mask. */ + regs->psw.mask = (regs->psw.mask & ~PSW_MASK_BA) | + (__u64)(tmp & PSW32_ADDR_AMODE); + } else { + /* gpr 0-15 */ + *(__u32*)((addr_t) ®s->psw + addr*2 + 4) = tmp; + } + } else if (addr < (addr_t) (&dummy32->regs.orig_gpr2)) { + /* + * access registers are stored in the thread structure + */ + offset = addr - (addr_t) &dummy32->regs.acrs; + *(__u32*)((addr_t) &child->thread.acrs + offset) = tmp; + + } else if (addr == (addr_t) (&dummy32->regs.orig_gpr2)) { + /* + * orig_gpr2 is stored on the kernel stack + */ + *(__u32*)((addr_t) &task_pt_regs(child)->orig_gpr2 + 4) = tmp; + + } else if (addr < (addr_t) &dummy32->regs.fp_regs) { + /* + * prevent writess of padding hole between + * orig_gpr2 and fp_regs on s390. + */ + return 0; + + } else if (addr < (addr_t) (&dummy32->regs.fp_regs + 1)) { + /* + * floating point regs. are stored in the thread structure + */ + if (addr == (addr_t) &dummy32->regs.fp_regs.fpc && + (tmp & ~FPC_VALID_MASK) != 0) + /* Invalid floating point control. */ + return -EINVAL; + offset = addr - (addr_t) &dummy32->regs.fp_regs; + *(__u32 *)((addr_t) &child->thread.fp_regs + offset) = tmp; + + } else if (addr < (addr_t) (&dummy32->regs.per_info + 1)) { + /* + * Handle access to the per_info structure. + */ + addr -= (addr_t) &dummy32->regs.per_info; + __poke_user_per_compat(child, addr, data); + } + + return 0; +} + +static int poke_user_compat(struct task_struct *child, + addr_t addr, addr_t data) +{ + if (!is_compat_task() || (addr & 3) || + addr > sizeof(struct compat_user) - 3) + return -EIO; + + return __poke_user_compat(child, addr, data); +} + +long compat_arch_ptrace(struct task_struct *child, compat_long_t request, + compat_ulong_t caddr, compat_ulong_t cdata) +{ + unsigned long addr = caddr; + unsigned long data = cdata; + compat_ptrace_area parea; + int copied, ret; + + switch (request) { + case PTRACE_PEEKUSR: + /* read the word at location addr in the USER area. */ + return peek_user_compat(child, addr, data); + + case PTRACE_POKEUSR: + /* write the word at location addr in the USER area */ + return poke_user_compat(child, addr, data); + + case PTRACE_PEEKUSR_AREA: + case PTRACE_POKEUSR_AREA: + if (copy_from_user(&parea, (void __force __user *) addr, + sizeof(parea))) + return -EFAULT; + addr = parea.kernel_addr; + data = parea.process_addr; + copied = 0; + while (copied < parea.len) { + if (request == PTRACE_PEEKUSR_AREA) + ret = peek_user_compat(child, addr, data); + else { + __u32 utmp; + if (get_user(utmp, + (__u32 __force __user *) data)) + return -EFAULT; + ret = poke_user_compat(child, addr, utmp); + } + if (ret) + return ret; + addr += sizeof(unsigned int); + data += sizeof(unsigned int); + copied += sizeof(unsigned int); + } + return 0; + case PTRACE_GET_LAST_BREAK: + put_user(task_thread_info(child)->last_break, + (unsigned int __user *) data); + return 0; + } + return compat_ptrace_request(child, request, addr, data); +} +#endif + +asmlinkage long do_syscall_trace_enter(struct pt_regs *regs) +{ + long ret = 0; + + /* Do the secure computing check first. */ + secure_computing(regs->gprs[2]); + + /* + * The sysc_tracesys code in entry.S stored the system + * call number to gprs[2]. + */ + if (test_thread_flag(TIF_SYSCALL_TRACE) && + (tracehook_report_syscall_entry(regs) || + regs->gprs[2] >= NR_syscalls)) { + /* + * Tracing decided this syscall should not happen or the + * debugger stored an invalid system call number. Skip + * the system call and the system call restart handling. + */ + clear_thread_flag(TIF_SYSCALL); + ret = -1; + } + + if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) + trace_sys_enter(regs, regs->gprs[2]); + + audit_syscall_entry(is_compat_task() ? + AUDIT_ARCH_S390 : AUDIT_ARCH_S390X, + regs->gprs[2], regs->orig_gpr2, + regs->gprs[3], regs->gprs[4], + regs->gprs[5]); + return ret ?: regs->gprs[2]; +} + +asmlinkage void do_syscall_trace_exit(struct pt_regs *regs) +{ + audit_syscall_exit(regs); + + if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) + trace_sys_exit(regs, regs->gprs[2]); + + if (test_thread_flag(TIF_SYSCALL_TRACE)) + tracehook_report_syscall_exit(regs, 0); +} + +/* + * user_regset definitions. + */ + +static int s390_regs_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + if (target == current) + save_access_regs(target->thread.acrs); + + if (kbuf) { + unsigned long *k = kbuf; + while (count > 0) { + *k++ = __peek_user(target, pos); + count -= sizeof(*k); + pos += sizeof(*k); + } + } else { + unsigned long __user *u = ubuf; + while (count > 0) { + if (__put_user(__peek_user(target, pos), u++)) + return -EFAULT; + count -= sizeof(*u); + pos += sizeof(*u); + } + } + return 0; +} + +static int s390_regs_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int rc = 0; + + if (target == current) + save_access_regs(target->thread.acrs); + + if (kbuf) { + const unsigned long *k = kbuf; + while (count > 0 && !rc) { + rc = __poke_user(target, pos, *k++); + count -= sizeof(*k); + pos += sizeof(*k); + } + } else { + const unsigned long __user *u = ubuf; + while (count > 0 && !rc) { + unsigned long word; + rc = __get_user(word, u++); + if (rc) + break; + rc = __poke_user(target, pos, word); + count -= sizeof(*u); + pos += sizeof(*u); + } + } + + if (rc == 0 && target == current) + restore_access_regs(target->thread.acrs); + + return rc; +} + +static int s390_fpregs_get(struct task_struct *target, + const struct user_regset *regset, unsigned int pos, + unsigned int count, void *kbuf, void __user *ubuf) +{ + if (target == current) + save_fp_regs(&target->thread.fp_regs); + + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.fp_regs, 0, -1); +} + +static int s390_fpregs_set(struct task_struct *target, + const struct user_regset *regset, unsigned int pos, + unsigned int count, const void *kbuf, + const void __user *ubuf) +{ + int rc = 0; + + if (target == current) + save_fp_regs(&target->thread.fp_regs); + + /* If setting FPC, must validate it first. */ + if (count > 0 && pos < offsetof(s390_fp_regs, fprs)) { + u32 fpc[2] = { target->thread.fp_regs.fpc, 0 }; + rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &fpc, + 0, offsetof(s390_fp_regs, fprs)); + if (rc) + return rc; + if ((fpc[0] & ~FPC_VALID_MASK) != 0 || fpc[1] != 0) + return -EINVAL; + target->thread.fp_regs.fpc = fpc[0]; + } + + if (rc == 0 && count > 0) + rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + target->thread.fp_regs.fprs, + offsetof(s390_fp_regs, fprs), -1); + + if (rc == 0 && target == current) + restore_fp_regs(&target->thread.fp_regs); + + return rc; +} + +#ifdef CONFIG_64BIT + +static int s390_last_break_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + if (count > 0) { + if (kbuf) { + unsigned long *k = kbuf; + *k = task_thread_info(target)->last_break; + } else { + unsigned long __user *u = ubuf; + if (__put_user(task_thread_info(target)->last_break, u)) + return -EFAULT; + } + } + return 0; +} + +static int s390_last_break_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + return 0; +} + +#endif + +static int s390_system_call_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + unsigned int *data = &task_thread_info(target)->system_call; + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, + data, 0, sizeof(unsigned int)); +} + +static int s390_system_call_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + unsigned int *data = &task_thread_info(target)->system_call; + return user_regset_copyin(&pos, &count, &kbuf, &ubuf, + data, 0, sizeof(unsigned int)); +} + +static const struct user_regset s390_regsets[] = { + [REGSET_GENERAL] = { + .core_note_type = NT_PRSTATUS, + .n = sizeof(s390_regs) / sizeof(long), + .size = sizeof(long), + .align = sizeof(long), + .get = s390_regs_get, + .set = s390_regs_set, + }, + [REGSET_FP] = { + .core_note_type = NT_PRFPREG, + .n = sizeof(s390_fp_regs) / sizeof(long), + .size = sizeof(long), + .align = sizeof(long), + .get = s390_fpregs_get, + .set = s390_fpregs_set, + }, +#ifdef CONFIG_64BIT + [REGSET_LAST_BREAK] = { + .core_note_type = NT_S390_LAST_BREAK, + .n = 1, + .size = sizeof(long), + .align = sizeof(long), + .get = s390_last_break_get, + .set = s390_last_break_set, + }, +#endif + [REGSET_SYSTEM_CALL] = { + .core_note_type = NT_S390_SYSTEM_CALL, + .n = 1, + .size = sizeof(unsigned int), + .align = sizeof(unsigned int), + .get = s390_system_call_get, + .set = s390_system_call_set, + }, +}; + +static const struct user_regset_view user_s390_view = { + .name = UTS_MACHINE, + .e_machine = EM_S390, + .regsets = s390_regsets, + .n = ARRAY_SIZE(s390_regsets) +}; + +#ifdef CONFIG_COMPAT +static int s390_compat_regs_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + if (target == current) + save_access_regs(target->thread.acrs); + + if (kbuf) { + compat_ulong_t *k = kbuf; + while (count > 0) { + *k++ = __peek_user_compat(target, pos); + count -= sizeof(*k); + pos += sizeof(*k); + } + } else { + compat_ulong_t __user *u = ubuf; + while (count > 0) { + if (__put_user(__peek_user_compat(target, pos), u++)) + return -EFAULT; + count -= sizeof(*u); + pos += sizeof(*u); + } + } + return 0; +} + +static int s390_compat_regs_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int rc = 0; + + if (target == current) + save_access_regs(target->thread.acrs); + + if (kbuf) { + const compat_ulong_t *k = kbuf; + while (count > 0 && !rc) { + rc = __poke_user_compat(target, pos, *k++); + count -= sizeof(*k); + pos += sizeof(*k); + } + } else { + const compat_ulong_t __user *u = ubuf; + while (count > 0 && !rc) { + compat_ulong_t word; + rc = __get_user(word, u++); + if (rc) + break; + rc = __poke_user_compat(target, pos, word); + count -= sizeof(*u); + pos += sizeof(*u); + } + } + + if (rc == 0 && target == current) + restore_access_regs(target->thread.acrs); + + return rc; +} + +static int s390_compat_regs_high_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + compat_ulong_t *gprs_high; + + gprs_high = (compat_ulong_t *) + &task_pt_regs(target)->gprs[pos / sizeof(compat_ulong_t)]; + if (kbuf) { + compat_ulong_t *k = kbuf; + while (count > 0) { + *k++ = *gprs_high; + gprs_high += 2; + count -= sizeof(*k); + } + } else { + compat_ulong_t __user *u = ubuf; + while (count > 0) { + if (__put_user(*gprs_high, u++)) + return -EFAULT; + gprs_high += 2; + count -= sizeof(*u); + } + } + return 0; +} + +static int s390_compat_regs_high_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + compat_ulong_t *gprs_high; + int rc = 0; + + gprs_high = (compat_ulong_t *) + &task_pt_regs(target)->gprs[pos / sizeof(compat_ulong_t)]; + if (kbuf) { + const compat_ulong_t *k = kbuf; + while (count > 0) { + *gprs_high = *k++; + *gprs_high += 2; + count -= sizeof(*k); + } + } else { + const compat_ulong_t __user *u = ubuf; + while (count > 0 && !rc) { + unsigned long word; + rc = __get_user(word, u++); + if (rc) + break; + *gprs_high = word; + *gprs_high += 2; + count -= sizeof(*u); + } + } + + return rc; +} + +static int s390_compat_last_break_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + compat_ulong_t last_break; + + if (count > 0) { + last_break = task_thread_info(target)->last_break; + if (kbuf) { + unsigned long *k = kbuf; + *k = last_break; + } else { + unsigned long __user *u = ubuf; + if (__put_user(last_break, u)) + return -EFAULT; + } + } + return 0; +} + +static int s390_compat_last_break_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + return 0; +} + +static const struct user_regset s390_compat_regsets[] = { + [REGSET_GENERAL] = { + .core_note_type = NT_PRSTATUS, + .n = sizeof(s390_compat_regs) / sizeof(compat_long_t), + .size = sizeof(compat_long_t), + .align = sizeof(compat_long_t), + .get = s390_compat_regs_get, + .set = s390_compat_regs_set, + }, + [REGSET_FP] = { + .core_note_type = NT_PRFPREG, + .n = sizeof(s390_fp_regs) / sizeof(compat_long_t), + .size = sizeof(compat_long_t), + .align = sizeof(compat_long_t), + .get = s390_fpregs_get, + .set = s390_fpregs_set, + }, + [REGSET_LAST_BREAK] = { + .core_note_type = NT_S390_LAST_BREAK, + .n = 1, + .size = sizeof(long), + .align = sizeof(long), + .get = s390_compat_last_break_get, + .set = s390_compat_last_break_set, + }, + [REGSET_SYSTEM_CALL] = { + .core_note_type = NT_S390_SYSTEM_CALL, + .n = 1, + .size = sizeof(compat_uint_t), + .align = sizeof(compat_uint_t), + .get = s390_system_call_get, + .set = s390_system_call_set, + }, + [REGSET_GENERAL_EXTENDED] = { + .core_note_type = NT_S390_HIGH_GPRS, + .n = sizeof(s390_compat_regs_high) / sizeof(compat_long_t), + .size = sizeof(compat_long_t), + .align = sizeof(compat_long_t), + .get = s390_compat_regs_high_get, + .set = s390_compat_regs_high_set, + }, +}; + +static const struct user_regset_view user_s390_compat_view = { + .name = "s390", + .e_machine = EM_S390, + .regsets = s390_compat_regsets, + .n = ARRAY_SIZE(s390_compat_regsets) +}; +#endif + +const struct user_regset_view *task_user_regset_view(struct task_struct *task) +{ +#ifdef CONFIG_COMPAT + if (test_tsk_thread_flag(task, TIF_31BIT)) + return &user_s390_compat_view; +#endif + return &user_s390_view; +} + +static const char *gpr_names[NUM_GPRS] = { + "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", + "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", +}; + +unsigned long regs_get_register(struct pt_regs *regs, unsigned int offset) +{ + if (offset >= NUM_GPRS) + return 0; + return regs->gprs[offset]; +} + +int regs_query_register_offset(const char *name) +{ + unsigned long offset; + + if (!name || *name != 'r') + return -EINVAL; + if (strict_strtoul(name + 1, 10, &offset)) + return -EINVAL; + if (offset >= NUM_GPRS) + return -EINVAL; + return offset; +} + +const char *regs_query_register_name(unsigned int offset) +{ + if (offset >= NUM_GPRS) + return NULL; + return gpr_names[offset]; +} + +static int regs_within_kernel_stack(struct pt_regs *regs, unsigned long addr) +{ + unsigned long ksp = kernel_stack_pointer(regs); + + return (addr & ~(THREAD_SIZE - 1)) == (ksp & ~(THREAD_SIZE - 1)); +} + +/** + * regs_get_kernel_stack_nth() - get Nth entry of the stack + * @regs:pt_regs which contains kernel stack pointer. + * @n:stack entry number. + * + * regs_get_kernel_stack_nth() returns @n th entry of the kernel stack which + * is specifined by @regs. If the @n th entry is NOT in the kernel stack, + * this returns 0. + */ +unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n) +{ + unsigned long addr; + + addr = kernel_stack_pointer(regs) + n * sizeof(long); + if (!regs_within_kernel_stack(regs, addr)) + return 0; + return *(unsigned long *)addr; +} diff --git a/arch/s390/kernel/reipl.S b/arch/s390/kernel/reipl.S new file mode 100644 index 00000000..ad67c214 --- /dev/null +++ b/arch/s390/kernel/reipl.S @@ -0,0 +1,93 @@ +/* + * arch/s390/kernel/reipl.S + * + * S390 version + * Copyright (C) 2000 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Author(s): Holger Smolinski (Holger.Smolinski@de.ibm.com) + */ + +#include <linux/linkage.h> +#include <asm/asm-offsets.h> + +# +# store_status: Empty implementation until kdump is supported on 31 bit +# +ENTRY(store_status) + br %r14 + +# +# do_reipl_asm +# Parameter: r2 = schid of reipl device +# +ENTRY(do_reipl_asm) + basr %r13,0 +.Lpg0: lpsw .Lnewpsw-.Lpg0(%r13) +.Lpg1: # do store status of all registers + + stm %r0,%r15,__LC_GPREGS_SAVE_AREA + stctl %c0,%c15,__LC_CREGS_SAVE_AREA + stam %a0,%a15,__LC_AREGS_SAVE_AREA + l %r10,.Ldump_pfx-.Lpg0(%r13) + mvc __LC_PREFIX_SAVE_AREA(4),0(%r10) + stckc .Lclkcmp-.Lpg0(%r13) + mvc __LC_CLOCK_COMP_SAVE_AREA(8),.Lclkcmp-.Lpg0(%r13) + stpt __LC_CPU_TIMER_SAVE_AREA + st %r13, __LC_PSW_SAVE_AREA+4 + lctl %c6,%c6,.Lall-.Lpg0(%r13) + lr %r1,%r2 + mvc __LC_PGM_NEW_PSW(8),.Lpcnew-.Lpg0(%r13) + stsch .Lschib-.Lpg0(%r13) + oi .Lschib+5-.Lpg0(%r13),0x84 +.Lecs: xi .Lschib+27-.Lpg0(%r13),0x01 + msch .Lschib-.Lpg0(%r13) + lhi %r0,5 +.Lssch: ssch .Liplorb-.Lpg0(%r13) + jz .L001 + brct %r0,.Lssch + bas %r14,.Ldisab-.Lpg0(%r13) +.L001: mvc __LC_IO_NEW_PSW(8),.Lionew-.Lpg0(%r13) +.Ltpi: lpsw .Lwaitpsw-.Lpg0(%r13) +.Lcont: c %r1,__LC_SUBCHANNEL_ID + jnz .Ltpi + clc __LC_IO_INT_PARM(4),.Liplorb-.Lpg0(%r13) + jnz .Ltpi + tsch .Liplirb-.Lpg0(%r13) + tm .Liplirb+9-.Lpg0(%r13),0xbf + jz .L002 + bas %r14,.Ldisab-.Lpg0(%r13) +.L002: tm .Liplirb+8-.Lpg0(%r13),0xf3 + jz .L003 + bas %r14,.Ldisab-.Lpg0(%r13) +.L003: st %r1,__LC_SUBCHANNEL_ID + lpsw 0 + sigp 0,0,0(6) +.Ldisab: st %r14,.Ldispsw+4-.Lpg0(%r13) + lpsw .Ldispsw-.Lpg0(%r13) + .align 8 +.Lclkcmp: .quad 0x0000000000000000 +.Lall: .long 0xff000000 +.Ldump_pfx: .long dump_prefix_page + .align 8 +.Lnewpsw: .long 0x00080000,0x80000000+.Lpg1 +.Lpcnew: .long 0x00080000,0x80000000+.Lecs +.Lionew: .long 0x00080000,0x80000000+.Lcont +.Lwaitpsw: .long 0x020a0000,0x00000000+.Ltpi +.Ldispsw: .long 0x000a0000,0x00000000 +.Liplccws: .long 0x02000000,0x60000018 + .long 0x08000008,0x20000001 +.Liplorb: .long 0x0049504c,0x0040ff80 + .long 0x00000000+.Liplccws +.Lschib: .long 0x00000000,0x00000000 + .long 0x00000000,0x00000000 + .long 0x00000000,0x00000000 + .long 0x00000000,0x00000000 + .long 0x00000000,0x00000000 + .long 0x00000000,0x00000000 +.Liplirb: .long 0x00000000,0x00000000 + .long 0x00000000,0x00000000 + .long 0x00000000,0x00000000 + .long 0x00000000,0x00000000 + .long 0x00000000,0x00000000 + .long 0x00000000,0x00000000 + .long 0x00000000,0x00000000 + .long 0x00000000,0x00000000 diff --git a/arch/s390/kernel/reipl64.S b/arch/s390/kernel/reipl64.S new file mode 100644 index 00000000..36b32658 --- /dev/null +++ b/arch/s390/kernel/reipl64.S @@ -0,0 +1,154 @@ +/* + * Copyright IBM Corp 2000,2011 + * Author(s): Holger Smolinski <Holger.Smolinski@de.ibm.com>, + * Denis Joseph Barrow, + */ + +#include <linux/linkage.h> +#include <asm/asm-offsets.h> + +# +# store_status +# +# Prerequisites to run this function: +# - Prefix register is set to zero +# - Original prefix register is stored in "dump_prefix_page" +# - Lowcore protection is off +# +ENTRY(store_status) + /* Save register one and load save area base */ + stg %r1,__LC_SAVE_AREA_RESTART + lghi %r1,SAVE_AREA_BASE + /* General purpose registers */ + stmg %r0,%r15,__LC_GPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + lg %r2,__LC_SAVE_AREA_RESTART + stg %r2,__LC_GPREGS_SAVE_AREA-SAVE_AREA_BASE+8(%r1) + /* Control registers */ + stctg %c0,%c15,__LC_CREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + /* Access registers */ + stam %a0,%a15,__LC_AREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + /* Floating point registers */ + std %f0, 0x00 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + std %f1, 0x08 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + std %f2, 0x10 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + std %f3, 0x18 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + std %f4, 0x20 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + std %f5, 0x28 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + std %f6, 0x30 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + std %f7, 0x38 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + std %f8, 0x40 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + std %f9, 0x48 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + std %f10,0x50 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + std %f11,0x58 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + std %f12,0x60 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + std %f13,0x68 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + std %f14,0x70 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + std %f15,0x78 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + /* Floating point control register */ + stfpc __LC_FP_CREG_SAVE_AREA-SAVE_AREA_BASE(%r1) + /* CPU timer */ + stpt __LC_CPU_TIMER_SAVE_AREA-SAVE_AREA_BASE(%r1) + /* Saved prefix register */ + larl %r2,dump_prefix_page + mvc __LC_PREFIX_SAVE_AREA-SAVE_AREA_BASE(4,%r1),0(%r2) + /* Clock comparator - seven bytes */ + larl %r2,.Lclkcmp + stckc 0(%r2) + mvc __LC_CLOCK_COMP_SAVE_AREA-SAVE_AREA_BASE + 1(7,%r1),1(%r2) + /* Program status word */ + epsw %r2,%r3 + st %r2,__LC_PSW_SAVE_AREA-SAVE_AREA_BASE + 0(%r1) + st %r3,__LC_PSW_SAVE_AREA-SAVE_AREA_BASE + 4(%r1) + larl %r2,store_status + stg %r2,__LC_PSW_SAVE_AREA-SAVE_AREA_BASE + 8(%r1) + br %r14 + + .section .bss + .align 8 +.Lclkcmp: .quad 0x0000000000000000 + .previous + +# +# do_reipl_asm +# Parameter: r2 = schid of reipl device +# + +ENTRY(do_reipl_asm) + basr %r13,0 +.Lpg0: lpswe .Lnewpsw-.Lpg0(%r13) +.Lpg1: brasl %r14,store_status + + lctlg %c6,%c6,.Lall-.Lpg0(%r13) + lgr %r1,%r2 + mvc __LC_PGM_NEW_PSW(16),.Lpcnew-.Lpg0(%r13) + stsch .Lschib-.Lpg0(%r13) + oi .Lschib+5-.Lpg0(%r13),0x84 +.Lecs: xi .Lschib+27-.Lpg0(%r13),0x01 + msch .Lschib-.Lpg0(%r13) + lghi %r0,5 +.Lssch: ssch .Liplorb-.Lpg0(%r13) + jz .L001 + brct %r0,.Lssch + bas %r14,.Ldisab-.Lpg0(%r13) +.L001: mvc __LC_IO_NEW_PSW(16),.Lionew-.Lpg0(%r13) +.Ltpi: lpswe .Lwaitpsw-.Lpg0(%r13) +.Lcont: c %r1,__LC_SUBCHANNEL_ID + jnz .Ltpi + clc __LC_IO_INT_PARM(4),.Liplorb-.Lpg0(%r13) + jnz .Ltpi + tsch .Liplirb-.Lpg0(%r13) + tm .Liplirb+9-.Lpg0(%r13),0xbf + jz .L002 + bas %r14,.Ldisab-.Lpg0(%r13) +.L002: tm .Liplirb+8-.Lpg0(%r13),0xf3 + jz .L003 + bas %r14,.Ldisab-.Lpg0(%r13) +.L003: st %r1,__LC_SUBCHANNEL_ID + lhi %r1,0 # mode 0 = esa + slr %r0,%r0 # set cpuid to zero + sigp %r1,%r0,0x12 # switch to esa mode + lpsw 0 +.Ldisab: sll %r14,1 + srl %r14,1 # need to kill hi bit to avoid specification exceptions. + st %r14,.Ldispsw+12-.Lpg0(%r13) + lpswe .Ldispsw-.Lpg0(%r13) + .align 8 +.Lall: .quad 0x00000000ff000000 + .align 16 +/* + * These addresses have to be 31 bit otherwise + * the sigp will throw a specifcation exception + * when switching to ESA mode as bit 31 be set + * in the ESA psw. + * Bit 31 of the addresses has to be 0 for the + * 31bit lpswe instruction a fact they appear to have + * omitted from the pop. + */ +.Lnewpsw: .quad 0x0000000080000000 + .quad .Lpg1 +.Lpcnew: .quad 0x0000000080000000 + .quad .Lecs +.Lionew: .quad 0x0000000080000000 + .quad .Lcont +.Lwaitpsw: .quad 0x0202000080000000 + .quad .Ltpi +.Ldispsw: .quad 0x0002000080000000 + .quad 0x0000000000000000 +.Liplccws: .long 0x02000000,0x60000018 + .long 0x08000008,0x20000001 +.Liplorb: .long 0x0049504c,0x0040ff80 + .long 0x00000000+.Liplccws +.Lschib: .long 0x00000000,0x00000000 + .long 0x00000000,0x00000000 + .long 0x00000000,0x00000000 + .long 0x00000000,0x00000000 + .long 0x00000000,0x00000000 + .long 0x00000000,0x00000000 +.Liplirb: .long 0x00000000,0x00000000 + .long 0x00000000,0x00000000 + .long 0x00000000,0x00000000 + .long 0x00000000,0x00000000 + .long 0x00000000,0x00000000 + .long 0x00000000,0x00000000 + .long 0x00000000,0x00000000 + .long 0x00000000,0x00000000 diff --git a/arch/s390/kernel/relocate_kernel.S b/arch/s390/kernel/relocate_kernel.S new file mode 100644 index 00000000..c91d70ae --- /dev/null +++ b/arch/s390/kernel/relocate_kernel.S @@ -0,0 +1,119 @@ +/* + * arch/s390/kernel/relocate_kernel.S + * + * (C) Copyright IBM Corp. 2005 + * + * Author(s): Rolf Adelsberger, + * Heiko Carstens <heiko.carstens@de.ibm.com> + * + */ + +#include <linux/linkage.h> + +/* + * moves the new kernel to its destination... + * %r2 = pointer to first kimage_entry_t + * %r3 = start address - where to jump to after the job is done... + * + * %r5 will be used as temp. storage + * %r6 holds the destination address + * %r7 = PAGE_SIZE + * %r8 holds the source address + * %r9 = PAGE_SIZE + * %r10 is a page mask + */ + + .text +ENTRY(relocate_kernel) + basr %r13,0 # base address + .base: + stnsm sys_msk-.base(%r13),0xfb # disable DAT + stctl %c0,%c15,ctlregs-.base(%r13) + stm %r0,%r15,gprregs-.base(%r13) + la %r1,load_psw-.base(%r13) + mvc 0(8,%r0),0(%r1) + la %r0,.back-.base(%r13) + st %r0,4(%r0) + oi 4(%r0),0x80 + mvc 0x68(8,%r0),0(%r1) + la %r0,.back_pgm-.base(%r13) + st %r0,0x6c(%r0) + oi 0x6c(%r0),0x80 + lhi %r0,0 + diag %r0,%r0,0x308 + .back: + basr %r13,0 + .back_base: + oi have_diag308-.back_base(%r13),0x01 + lctl %c0,%c15,ctlregs-.back_base(%r13) + lm %r0,%r15,gprregs-.back_base(%r13) + j .start_reloc + .back_pgm: + lm %r0,%r15,gprregs-.base(%r13) + .start_reloc: + lhi %r10,-1 # preparing the mask + sll %r10,12 # shift it such that it becomes 0xf000 + .top: + lhi %r7,4096 # load PAGE_SIZE in r7 + lhi %r9,4096 # load PAGE_SIZE in r9 + l %r5,0(%r2) # read another word for indirection page + ahi %r2,4 # increment pointer + tml %r5,0x1 # is it a destination page? + je .indir_check # NO, goto "indir_check" + lr %r6,%r5 # r6 = r5 + nr %r6,%r10 # mask it out and... + j .top # ...next iteration + .indir_check: + tml %r5,0x2 # is it a indirection page? + je .done_test # NO, goto "done_test" + nr %r5,%r10 # YES, mask out, + lr %r2,%r5 # move it into the right register, + j .top # and read next... + .done_test: + tml %r5,0x4 # is it the done indicator? + je .source_test # NO! Well, then it should be the source indicator... + j .done # ok, lets finish it here... + .source_test: + tml %r5,0x8 # it should be a source indicator... + je .top # NO, ignore it... + lr %r8,%r5 # r8 = r5 + nr %r8,%r10 # masking + 0: mvcle %r6,%r8,0x0 # copy PAGE_SIZE bytes from r8 to r6 - pad with 0 + jo 0b + j .top + .done: + sr %r0,%r0 # clear register r0 + la %r4,load_psw-.base(%r13) # load psw-address into the register + o %r3,4(%r4) # or load address into psw + st %r3,4(%r4) + mvc 0(8,%r0),0(%r4) # copy psw to absolute address 0 + tm have_diag308-.base(%r13),0x01 + jno .no_diag308 + diag %r0,%r0,0x308 + .no_diag308: + sr %r1,%r1 # clear %r1 + sr %r2,%r2 # clear %r2 + sigp %r1,%r2,0x12 # set cpuid to zero + lpsw 0 # hopefully start new kernel... + + .align 8 + load_psw: + .long 0x00080000,0x80000000 + sys_msk: + .quad 0 + ctlregs: + .rept 16 + .long 0 + .endr + gprregs: + .rept 16 + .long 0 + .endr + have_diag308: + .byte 0 + .align 8 + relocate_kernel_end: + .align 8 + .globl relocate_kernel_len + relocate_kernel_len: + .quad relocate_kernel_end - relocate_kernel diff --git a/arch/s390/kernel/relocate_kernel64.S b/arch/s390/kernel/relocate_kernel64.S new file mode 100644 index 00000000..7c3ce589 --- /dev/null +++ b/arch/s390/kernel/relocate_kernel64.S @@ -0,0 +1,122 @@ +/* + * arch/s390/kernel/relocate_kernel64.S + * + * (C) Copyright IBM Corp. 2005 + * + * Author(s): Rolf Adelsberger, + * Heiko Carstens <heiko.carstens@de.ibm.com> + * + */ + +#include <linux/linkage.h> + +/* + * moves the new kernel to its destination... + * %r2 = pointer to first kimage_entry_t + * %r3 = start address - where to jump to after the job is done... + * + * %r5 will be used as temp. storage + * %r6 holds the destination address + * %r7 = PAGE_SIZE + * %r8 holds the source address + * %r9 = PAGE_SIZE + * + * 0xf000 is a page_mask + */ + + .text +ENTRY(relocate_kernel) + basr %r13,0 # base address + .base: + stnsm sys_msk-.base(%r13),0xfb # disable DAT + stctg %c0,%c15,ctlregs-.base(%r13) + stmg %r0,%r15,gprregs-.base(%r13) + lghi %r0,3 + sllg %r0,%r0,31 + stg %r0,0x1d0(%r0) + la %r0,.back_pgm-.base(%r13) + stg %r0,0x1d8(%r0) + la %r1,load_psw-.base(%r13) + mvc 0(8,%r0),0(%r1) + la %r0,.back-.base(%r13) + st %r0,4(%r0) + oi 4(%r0),0x80 + lghi %r0,0 + diag %r0,%r0,0x308 + .back: + lhi %r1,1 # mode 1 = esame + sigp %r1,%r0,0x12 # switch to esame mode + sam64 # switch to 64 bit addressing mode + basr %r13,0 + .back_base: + oi have_diag308-.back_base(%r13),0x01 + lctlg %c0,%c15,ctlregs-.back_base(%r13) + lmg %r0,%r15,gprregs-.back_base(%r13) + j .top + .back_pgm: + lmg %r0,%r15,gprregs-.base(%r13) + .top: + lghi %r7,4096 # load PAGE_SIZE in r7 + lghi %r9,4096 # load PAGE_SIZE in r9 + lg %r5,0(%r2) # read another word for indirection page + aghi %r2,8 # increment pointer + tml %r5,0x1 # is it a destination page? + je .indir_check # NO, goto "indir_check" + lgr %r6,%r5 # r6 = r5 + nill %r6,0xf000 # mask it out and... + j .top # ...next iteration + .indir_check: + tml %r5,0x2 # is it a indirection page? + je .done_test # NO, goto "done_test" + nill %r5,0xf000 # YES, mask out, + lgr %r2,%r5 # move it into the right register, + j .top # and read next... + .done_test: + tml %r5,0x4 # is it the done indicator? + je .source_test # NO! Well, then it should be the source indicator... + j .done # ok, lets finish it here... + .source_test: + tml %r5,0x8 # it should be a source indicator... + je .top # NO, ignore it... + lgr %r8,%r5 # r8 = r5 + nill %r8,0xf000 # masking + 0: mvcle %r6,%r8,0x0 # copy PAGE_SIZE bytes from r8 to r6 - pad with 0 + jo 0b + j .top + .done: + sgr %r0,%r0 # clear register r0 + la %r4,load_psw-.base(%r13) # load psw-address into the register + o %r3,4(%r4) # or load address into psw + st %r3,4(%r4) + mvc 0(8,%r0),0(%r4) # copy psw to absolute address 0 + tm have_diag308-.base(%r13),0x01 + jno .no_diag308 + diag %r0,%r0,0x308 + .no_diag308: + sam31 # 31 bit mode + sr %r1,%r1 # erase register r1 + sr %r2,%r2 # erase register r2 + sigp %r1,%r2,0x12 # set cpuid to zero + lpsw 0 # hopefully start new kernel... + + .align 8 + load_psw: + .long 0x00080000,0x80000000 + sys_msk: + .quad 0 + ctlregs: + .rept 16 + .quad 0 + .endr + gprregs: + .rept 16 + .quad 0 + .endr + have_diag308: + .byte 0 + .align 8 + relocate_kernel_end: + .align 8 + .globl relocate_kernel_len + relocate_kernel_len: + .quad relocate_kernel_end - relocate_kernel diff --git a/arch/s390/kernel/s390_ksyms.c b/arch/s390/kernel/s390_ksyms.c new file mode 100644 index 00000000..57b53664 --- /dev/null +++ b/arch/s390/kernel/s390_ksyms.c @@ -0,0 +1,10 @@ +#include <linux/module.h> +#include <linux/kvm_host.h> +#include <asm/ftrace.h> + +#ifdef CONFIG_FUNCTION_TRACER +EXPORT_SYMBOL(_mcount); +#endif +#if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE) +EXPORT_SYMBOL(sie64a); +#endif diff --git a/arch/s390/kernel/sclp.S b/arch/s390/kernel/sclp.S new file mode 100644 index 00000000..95792d84 --- /dev/null +++ b/arch/s390/kernel/sclp.S @@ -0,0 +1,353 @@ +/* + * Mini SCLP driver. + * + * Copyright IBM Corp. 2004,2009 + * + * Author(s): Peter Oberparleiter <Peter.Oberparleiter@de.ibm.com>, + * Heiko Carstens <heiko.carstens@de.ibm.com>, + * + */ + +#include <linux/linkage.h> + +LC_EXT_NEW_PSW = 0x58 # addr of ext int handler +LC_EXT_NEW_PSW_64 = 0x1b0 # addr of ext int handler 64 bit +LC_EXT_INT_PARAM = 0x80 # addr of ext int parameter +LC_EXT_INT_CODE = 0x86 # addr of ext int code +LC_AR_MODE_ID = 0xa3 + +# +# Subroutine which waits synchronously until either an external interruption +# or a timeout occurs. +# +# Parameters: +# R2 = 0 for no timeout, non-zero for timeout in (approximated) seconds +# +# Returns: +# R2 = 0 on interrupt, 2 on timeout +# R3 = external interruption parameter if R2=0 +# + +_sclp_wait_int: + stm %r6,%r15,24(%r15) # save registers + basr %r13,0 # get base register +.LbaseS1: + ahi %r15,-96 # create stack frame + la %r8,LC_EXT_NEW_PSW # register int handler + la %r9,.LextpswS1-.LbaseS1(%r13) +#ifdef CONFIG_64BIT + tm LC_AR_MODE_ID,1 + jno .Lesa1 + la %r8,LC_EXT_NEW_PSW_64 # register int handler 64 bit + la %r9,.LextpswS1_64-.LbaseS1(%r13) +.Lesa1: +#endif + mvc .LoldpswS1-.LbaseS1(16,%r13),0(%r8) + mvc 0(16,%r8),0(%r9) + lhi %r6,0x0200 # cr mask for ext int (cr0.54) + ltr %r2,%r2 + jz .LsetctS1 + ahi %r6,0x0800 # cr mask for clock int (cr0.52) + stck .LtimeS1-.LbaseS1(%r13) # initiate timeout + al %r2,.LtimeS1-.LbaseS1(%r13) + st %r2,.LtimeS1-.LbaseS1(%r13) + sckc .LtimeS1-.LbaseS1(%r13) + +.LsetctS1: + stctl %c0,%c0,.LctlS1-.LbaseS1(%r13) # enable required interrupts + l %r0,.LctlS1-.LbaseS1(%r13) + lhi %r1,~(0x200 | 0x800) # clear old values + nr %r1,%r0 + or %r1,%r6 # set new value + st %r1,.LctlS1-.LbaseS1(%r13) + lctl %c0,%c0,.LctlS1-.LbaseS1(%r13) + st %r0,.LctlS1-.LbaseS1(%r13) + lhi %r2,2 # return code for timeout +.LloopS1: + lpsw .LwaitpswS1-.LbaseS1(%r13) # wait until interrupt +.LwaitS1: + lh %r7,LC_EXT_INT_CODE + chi %r7,0x1004 # timeout? + je .LtimeoutS1 + chi %r7,0x2401 # service int? + jne .LloopS1 + sr %r2,%r2 + l %r3,LC_EXT_INT_PARAM +.LtimeoutS1: + lctl %c0,%c0,.LctlS1-.LbaseS1(%r13) # restore interrupt setting + # restore old handler + mvc 0(16,%r8),.LoldpswS1-.LbaseS1(%r13) + lm %r6,%r15,120(%r15) # restore registers + br %r14 # return to caller + + .align 8 +.LoldpswS1: + .long 0, 0, 0, 0 # old ext int PSW +.LextpswS1: + .long 0x00080000, 0x80000000+.LwaitS1 # PSW to handle ext int +#ifdef CONFIG_64BIT +.LextpswS1_64: + .quad 0x0000000180000000, .LwaitS1 # PSW to handle ext int, 64 bit +#endif +.LwaitpswS1: + .long 0x010a0000, 0x00000000+.LloopS1 # PSW to wait for ext int +.LtimeS1: + .quad 0 # current time +.LctlS1: + .long 0 # CT0 contents + +# +# Subroutine to synchronously issue a service call. +# +# Parameters: +# R2 = command word +# R3 = sccb address +# +# Returns: +# R2 = 0 on success, 1 on failure +# R3 = sccb response code if R2 = 0 +# + +_sclp_servc: + stm %r6,%r15,24(%r15) # save registers + ahi %r15,-96 # create stack frame + lr %r6,%r2 # save command word + lr %r7,%r3 # save sccb address +.LretryS2: + lhi %r2,1 # error return code + .insn rre,0xb2200000,%r6,%r7 # servc + brc 1,.LendS2 # exit if not operational + brc 8,.LnotbusyS2 # go on if not busy + sr %r2,%r2 # wait until no longer busy + bras %r14,_sclp_wait_int + j .LretryS2 # retry +.LnotbusyS2: + sr %r2,%r2 # wait until result + bras %r14,_sclp_wait_int + sr %r2,%r2 + lh %r3,6(%r7) +.LendS2: + lm %r6,%r15,120(%r15) # restore registers + br %r14 + +# +# Subroutine to set up the SCLP interface. +# +# Parameters: +# R2 = 0 to activate, non-zero to deactivate +# +# Returns: +# R2 = 0 on success, non-zero on failure +# + +_sclp_setup: + stm %r6,%r15,24(%r15) # save registers + ahi %r15,-96 # create stack frame + basr %r13,0 # get base register +.LbaseS3: + l %r6,.LsccbS0-.LbaseS3(%r13) # prepare init mask sccb + mvc 0(.LinitendS3-.LinitsccbS3,%r6),.LinitsccbS3-.LbaseS3(%r13) + ltr %r2,%r2 # initialization? + jz .LdoinitS3 # go ahead + # clear masks + xc .LinitmaskS3-.LinitsccbS3(8,%r6),.LinitmaskS3-.LinitsccbS3(%r6) +.LdoinitS3: + l %r2,.LwritemaskS3-.LbaseS3(%r13)# get command word + lr %r3,%r6 # get sccb address + bras %r14,_sclp_servc # issue service call + ltr %r2,%r2 # servc successful? + jnz .LerrorS3 + chi %r3,0x20 # write mask successful? + jne .LerrorS3 + # check masks + la %r2,.LinitmaskS3-.LinitsccbS3(%r6) + l %r1,0(%r2) # receive mask ok? + n %r1,12(%r2) + cl %r1,0(%r2) + jne .LerrorS3 + l %r1,4(%r2) # send mask ok? + n %r1,8(%r2) + cl %r1,4(%r2) + sr %r2,%r2 + je .LendS3 +.LerrorS3: + lhi %r2,1 # error return code +.LendS3: + lm %r6,%r15,120(%r15) # restore registers + br %r14 +.LwritemaskS3: + .long 0x00780005 # SCLP command for write mask +.LinitsccbS3: + .word .LinitendS3-.LinitsccbS3 + .byte 0,0,0,0 + .word 0 + .word 0 + .word 4 +.LinitmaskS3: + .long 0x80000000 + .long 0x40000000 + .long 0 + .long 0 +.LinitendS3: + +# +# Subroutine which prints a given text to the SCLP console. +# +# Parameters: +# R2 = address of nil-terminated ASCII text +# +# Returns: +# R2 = 0 on success, 1 on failure +# + +_sclp_print: + stm %r6,%r15,24(%r15) # save registers + ahi %r15,-96 # create stack frame + basr %r13,0 # get base register +.LbaseS4: + l %r8,.LsccbS0-.LbaseS4(%r13) # prepare write data sccb + mvc 0(.LmtoS4-.LwritesccbS4,%r8),.LwritesccbS4-.LbaseS4(%r13) + la %r7,.LmtoS4-.LwritesccbS4(%r8) # current mto addr + sr %r0,%r0 + l %r10,.Lascebc-.LbaseS4(%r13) # address of translation table +.LinitmtoS4: + # initialize mto + mvc 0(.LmtoendS4-.LmtoS4,%r7),.LmtoS4-.LbaseS4(%r13) + lhi %r6,.LmtoendS4-.LmtoS4 # current mto length +.LloopS4: + ic %r0,0(%r2) # get character + ahi %r2,1 + ltr %r0,%r0 # end of string? + jz .LfinalizemtoS4 + chi %r0,0x15 # end of line (NL)? + jz .LfinalizemtoS4 + stc %r0,0(%r6,%r7) # copy to mto + la %r11,0(%r6,%r7) + tr 0(1,%r11),0(%r10) # translate to EBCDIC + ahi %r6,1 + j .LloopS4 +.LfinalizemtoS4: + sth %r6,0(%r7) # update mto length + lh %r9,.LmdbS4-.LwritesccbS4(%r8) # update mdb length + ar %r9,%r6 + sth %r9,.LmdbS4-.LwritesccbS4(%r8) + lh %r9,.LevbufS4-.LwritesccbS4(%r8)# update evbuf length + ar %r9,%r6 + sth %r9,.LevbufS4-.LwritesccbS4(%r8) + lh %r9,0(%r8) # update sccb length + ar %r9,%r6 + sth %r9,0(%r8) + ar %r7,%r6 # update current mto address + ltr %r0,%r0 # more characters? + jnz .LinitmtoS4 + l %r2,.LwritedataS4-.LbaseS4(%r13)# write data + lr %r3,%r8 + bras %r14,_sclp_servc + ltr %r2,%r2 # servc successful? + jnz .LendS4 + chi %r3,0x20 # write data successful? + je .LendS4 + lhi %r2,1 # error return code +.LendS4: + lm %r6,%r15,120(%r15) # restore registers + br %r14 + +# +# Function which prints a given text to the SCLP console. +# +# Parameters: +# R2 = address of nil-terminated ASCII text +# +# Returns: +# R2 = 0 on success, 1 on failure +# + +ENTRY(_sclp_print_early) + stm %r6,%r15,24(%r15) # save registers + ahi %r15,-96 # create stack frame +#ifdef CONFIG_64BIT + tm LC_AR_MODE_ID,1 + jno .Lesa2 + ahi %r15,-80 + stmh %r6,%r15,96(%r15) # store upper register halves +.Lesa2: +#endif + lr %r10,%r2 # save string pointer + lhi %r2,0 + bras %r14,_sclp_setup # enable console + ltr %r2,%r2 + jnz .LendS5 + lr %r2,%r10 + bras %r14,_sclp_print # print string + ltr %r2,%r2 + jnz .LendS5 + lhi %r2,1 + bras %r14,_sclp_setup # disable console +.LendS5: +#ifdef CONFIG_64BIT + tm LC_AR_MODE_ID,1 + jno .Lesa3 + lmh %r6,%r15,96(%r15) # store upper register halves + ahi %r15,80 +.Lesa3: +#endif + lm %r6,%r15,120(%r15) # restore registers + br %r14 + +.LwritedataS4: + .long 0x00760005 # SCLP command for write data +.LwritesccbS4: + # sccb + .word .LmtoS4-.LwritesccbS4 + .byte 0 + .byte 0,0,0 + .word 0 + + # evbuf +.LevbufS4: + .word .LmtoS4-.LevbufS4 + .byte 0x02 + .byte 0 + .word 0 + +.LmdbS4: + # mdb + .word .LmtoS4-.LmdbS4 + .word 1 + .long 0xd4c4c240 + .long 1 + + # go +.LgoS4: + .word .LmtoS4-.LgoS4 + .word 1 + .long 0 + .byte 0,0,0,0,0,0,0,0 + .byte 0,0,0 + .byte 0 + .byte 0,0,0,0,0,0,0 + .byte 0 + .word 0 + .byte 0,0,0,0,0,0,0,0,0,0 + .byte 0,0,0,0,0,0,0,0 + .byte 0,0,0,0,0,0,0,0 + +.LmtoS4: + .word .LmtoendS4-.LmtoS4 + .word 4 + .word 0x1000 + .byte 0 + .byte 0,0,0 +.LmtoendS4: + + # Global constants +.LsccbS0: + .long _sclp_work_area +.Lascebc: + .long _ascebc + +.section .data,"aw",@progbits + .balign 4096 +_sclp_work_area: + .fill 4096 +.previous diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c new file mode 100644 index 00000000..06264ae8 --- /dev/null +++ b/arch/s390/kernel/setup.c @@ -0,0 +1,1092 @@ +/* + * arch/s390/kernel/setup.c + * + * S390 version + * Copyright (C) IBM Corp. 1999,2012 + * Author(s): Hartmut Penner (hp@de.ibm.com), + * Martin Schwidefsky (schwidefsky@de.ibm.com) + * + * Derived from "arch/i386/kernel/setup.c" + * Copyright (C) 1995, Linus Torvalds + */ + +/* + * This file handles the architecture-dependent parts of initialization + */ + +#define KMSG_COMPONENT "setup" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include <linux/errno.h> +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/memblock.h> +#include <linux/mm.h> +#include <linux/stddef.h> +#include <linux/unistd.h> +#include <linux/ptrace.h> +#include <linux/user.h> +#include <linux/tty.h> +#include <linux/ioport.h> +#include <linux/delay.h> +#include <linux/init.h> +#include <linux/initrd.h> +#include <linux/bootmem.h> +#include <linux/root_dev.h> +#include <linux/console.h> +#include <linux/kernel_stat.h> +#include <linux/device.h> +#include <linux/notifier.h> +#include <linux/pfn.h> +#include <linux/ctype.h> +#include <linux/reboot.h> +#include <linux/topology.h> +#include <linux/ftrace.h> +#include <linux/kexec.h> +#include <linux/crash_dump.h> +#include <linux/memory.h> +#include <linux/compat.h> + +#include <asm/ipl.h> +#include <asm/uaccess.h> +#include <asm/facility.h> +#include <asm/smp.h> +#include <asm/mmu_context.h> +#include <asm/cpcmd.h> +#include <asm/lowcore.h> +#include <asm/irq.h> +#include <asm/page.h> +#include <asm/ptrace.h> +#include <asm/sections.h> +#include <asm/ebcdic.h> +#include <asm/kvm_virtio.h> +#include <asm/diag.h> +#include <asm/os_info.h> +#include "entry.h" + +long psw_kernel_bits = PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_ASC_PRIMARY | + PSW_MASK_EA | PSW_MASK_BA; +long psw_user_bits = PSW_MASK_DAT | PSW_MASK_IO | PSW_MASK_EXT | + PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_MASK_MCHECK | + PSW_MASK_PSTATE | PSW_ASC_HOME; + +/* + * User copy operations. + */ +struct uaccess_ops uaccess; +EXPORT_SYMBOL(uaccess); + +/* + * Machine setup.. + */ +unsigned int console_mode = 0; +EXPORT_SYMBOL(console_mode); + +unsigned int console_devno = -1; +EXPORT_SYMBOL(console_devno); + +unsigned int console_irq = -1; +EXPORT_SYMBOL(console_irq); + +unsigned long elf_hwcap = 0; +char elf_platform[ELF_PLATFORM_SIZE]; + +struct mem_chunk __initdata memory_chunk[MEMORY_CHUNKS]; + +int __initdata memory_end_set; +unsigned long __initdata memory_end; + +unsigned long VMALLOC_START; +EXPORT_SYMBOL(VMALLOC_START); + +unsigned long VMALLOC_END; +EXPORT_SYMBOL(VMALLOC_END); + +struct page *vmemmap; +EXPORT_SYMBOL(vmemmap); + +/* An array with a pointer to the lowcore of every CPU. */ +struct _lowcore *lowcore_ptr[NR_CPUS]; +EXPORT_SYMBOL(lowcore_ptr); + +/* + * This is set up by the setup-routine at boot-time + * for S390 need to find out, what we have to setup + * using address 0x10400 ... + */ + +#include <asm/setup.h> + +/* + * condev= and conmode= setup parameter. + */ + +static int __init condev_setup(char *str) +{ + int vdev; + + vdev = simple_strtoul(str, &str, 0); + if (vdev >= 0 && vdev < 65536) { + console_devno = vdev; + console_irq = -1; + } + return 1; +} + +__setup("condev=", condev_setup); + +static void __init set_preferred_console(void) +{ + if (MACHINE_IS_KVM) + add_preferred_console("hvc", 0, NULL); + else if (CONSOLE_IS_3215 || CONSOLE_IS_SCLP) + add_preferred_console("ttyS", 0, NULL); + else if (CONSOLE_IS_3270) + add_preferred_console("tty3270", 0, NULL); +} + +static int __init conmode_setup(char *str) +{ +#if defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE) + if (strncmp(str, "hwc", 4) == 0 || strncmp(str, "sclp", 5) == 0) + SET_CONSOLE_SCLP; +#endif +#if defined(CONFIG_TN3215_CONSOLE) + if (strncmp(str, "3215", 5) == 0) + SET_CONSOLE_3215; +#endif +#if defined(CONFIG_TN3270_CONSOLE) + if (strncmp(str, "3270", 5) == 0) + SET_CONSOLE_3270; +#endif + set_preferred_console(); + return 1; +} + +__setup("conmode=", conmode_setup); + +static void __init conmode_default(void) +{ + char query_buffer[1024]; + char *ptr; + + if (MACHINE_IS_VM) { + cpcmd("QUERY CONSOLE", query_buffer, 1024, NULL); + console_devno = simple_strtoul(query_buffer + 5, NULL, 16); + ptr = strstr(query_buffer, "SUBCHANNEL ="); + console_irq = simple_strtoul(ptr + 13, NULL, 16); + cpcmd("QUERY TERM", query_buffer, 1024, NULL); + ptr = strstr(query_buffer, "CONMODE"); + /* + * Set the conmode to 3215 so that the device recognition + * will set the cu_type of the console to 3215. If the + * conmode is 3270 and we don't set it back then both + * 3215 and the 3270 driver will try to access the console + * device (3215 as console and 3270 as normal tty). + */ + cpcmd("TERM CONMODE 3215", NULL, 0, NULL); + if (ptr == NULL) { +#if defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE) + SET_CONSOLE_SCLP; +#endif + return; + } + if (strncmp(ptr + 8, "3270", 4) == 0) { +#if defined(CONFIG_TN3270_CONSOLE) + SET_CONSOLE_3270; +#elif defined(CONFIG_TN3215_CONSOLE) + SET_CONSOLE_3215; +#elif defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE) + SET_CONSOLE_SCLP; +#endif + } else if (strncmp(ptr + 8, "3215", 4) == 0) { +#if defined(CONFIG_TN3215_CONSOLE) + SET_CONSOLE_3215; +#elif defined(CONFIG_TN3270_CONSOLE) + SET_CONSOLE_3270; +#elif defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE) + SET_CONSOLE_SCLP; +#endif + } + } else { +#if defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE) + SET_CONSOLE_SCLP; +#endif + } +} + +#ifdef CONFIG_ZFCPDUMP +static void __init setup_zfcpdump(unsigned int console_devno) +{ + static char str[41]; + + if (ipl_info.type != IPL_TYPE_FCP_DUMP) + return; + if (OLDMEM_BASE) + return; + if (console_devno != -1) + sprintf(str, " cio_ignore=all,!0.0.%04x,!0.0.%04x", + ipl_info.data.fcp.dev_id.devno, console_devno); + else + sprintf(str, " cio_ignore=all,!0.0.%04x", + ipl_info.data.fcp.dev_id.devno); + strcat(boot_command_line, str); + console_loglevel = 2; +} +#else +static inline void setup_zfcpdump(unsigned int console_devno) {} +#endif /* CONFIG_ZFCPDUMP */ + + /* + * Reboot, halt and power_off stubs. They just call _machine_restart, + * _machine_halt or _machine_power_off. + */ + +void machine_restart(char *command) +{ + if ((!in_interrupt() && !in_atomic()) || oops_in_progress) + /* + * Only unblank the console if we are called in enabled + * context or a bust_spinlocks cleared the way for us. + */ + console_unblank(); + _machine_restart(command); +} + +void machine_halt(void) +{ + if (!in_interrupt() || oops_in_progress) + /* + * Only unblank the console if we are called in enabled + * context or a bust_spinlocks cleared the way for us. + */ + console_unblank(); + _machine_halt(); +} + +void machine_power_off(void) +{ + if (!in_interrupt() || oops_in_progress) + /* + * Only unblank the console if we are called in enabled + * context or a bust_spinlocks cleared the way for us. + */ + console_unblank(); + _machine_power_off(); +} + +/* + * Dummy power off function. + */ +void (*pm_power_off)(void) = machine_power_off; + +static int __init early_parse_mem(char *p) +{ + memory_end = memparse(p, &p); + memory_end_set = 1; + return 0; +} +early_param("mem", early_parse_mem); + +static int __init parse_vmalloc(char *arg) +{ + if (!arg) + return -EINVAL; + VMALLOC_END = (memparse(arg, &arg) + PAGE_SIZE - 1) & PAGE_MASK; + return 0; +} +early_param("vmalloc", parse_vmalloc); + +unsigned int user_mode = HOME_SPACE_MODE; +EXPORT_SYMBOL_GPL(user_mode); + +static int set_amode_primary(void) +{ + psw_kernel_bits = (psw_kernel_bits & ~PSW_MASK_ASC) | PSW_ASC_HOME; + psw_user_bits = (psw_user_bits & ~PSW_MASK_ASC) | PSW_ASC_PRIMARY; +#ifdef CONFIG_COMPAT + psw32_user_bits = + (psw32_user_bits & ~PSW32_MASK_ASC) | PSW32_ASC_PRIMARY; +#endif + + if (MACHINE_HAS_MVCOS) { + memcpy(&uaccess, &uaccess_mvcos_switch, sizeof(uaccess)); + return 1; + } else { + memcpy(&uaccess, &uaccess_pt, sizeof(uaccess)); + return 0; + } +} + +/* + * Switch kernel/user addressing modes? + */ +static int __init early_parse_switch_amode(char *p) +{ + user_mode = PRIMARY_SPACE_MODE; + return 0; +} +early_param("switch_amode", early_parse_switch_amode); + +static int __init early_parse_user_mode(char *p) +{ + if (p && strcmp(p, "primary") == 0) + user_mode = PRIMARY_SPACE_MODE; + else if (!p || strcmp(p, "home") == 0) + user_mode = HOME_SPACE_MODE; + else + return 1; + return 0; +} +early_param("user_mode", early_parse_user_mode); + +static void setup_addressing_mode(void) +{ + if (user_mode == PRIMARY_SPACE_MODE) { + if (set_amode_primary()) + pr_info("Address spaces switched, " + "mvcos available\n"); + else + pr_info("Address spaces switched, " + "mvcos not available\n"); + } +} + +void *restart_stack __attribute__((__section__(".data"))); + +static void __init setup_lowcore(void) +{ + struct _lowcore *lc; + + /* + * Setup lowcore for boot cpu + */ + BUILD_BUG_ON(sizeof(struct _lowcore) != LC_PAGES * 4096); + lc = __alloc_bootmem_low(LC_PAGES * PAGE_SIZE, LC_PAGES * PAGE_SIZE, 0); + lc->restart_psw.mask = psw_kernel_bits; + lc->restart_psw.addr = + PSW_ADDR_AMODE | (unsigned long) restart_int_handler; + lc->external_new_psw.mask = psw_kernel_bits | + PSW_MASK_DAT | PSW_MASK_MCHECK; + lc->external_new_psw.addr = + PSW_ADDR_AMODE | (unsigned long) ext_int_handler; + lc->svc_new_psw.mask = psw_kernel_bits | + PSW_MASK_DAT | PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK; + lc->svc_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) system_call; + lc->program_new_psw.mask = psw_kernel_bits | + PSW_MASK_DAT | PSW_MASK_MCHECK; + lc->program_new_psw.addr = + PSW_ADDR_AMODE | (unsigned long) pgm_check_handler; + lc->mcck_new_psw.mask = psw_kernel_bits; + lc->mcck_new_psw.addr = + PSW_ADDR_AMODE | (unsigned long) mcck_int_handler; + lc->io_new_psw.mask = psw_kernel_bits | + PSW_MASK_DAT | PSW_MASK_MCHECK; + lc->io_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) io_int_handler; + lc->clock_comparator = -1ULL; + lc->kernel_stack = ((unsigned long) &init_thread_union) + THREAD_SIZE; + lc->async_stack = (unsigned long) + __alloc_bootmem(ASYNC_SIZE, ASYNC_SIZE, 0) + ASYNC_SIZE; + lc->panic_stack = (unsigned long) + __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, 0) + PAGE_SIZE; + lc->current_task = (unsigned long) init_thread_union.thread_info.task; + lc->thread_info = (unsigned long) &init_thread_union; + lc->machine_flags = S390_lowcore.machine_flags; + lc->stfl_fac_list = S390_lowcore.stfl_fac_list; + memcpy(lc->stfle_fac_list, S390_lowcore.stfle_fac_list, + MAX_FACILITY_BIT/8); +#ifndef CONFIG_64BIT + if (MACHINE_HAS_IEEE) { + lc->extended_save_area_addr = (__u32) + __alloc_bootmem_low(PAGE_SIZE, PAGE_SIZE, 0); + /* enable extended save area */ + __ctl_set_bit(14, 29); + } +#else + lc->vdso_per_cpu_data = (unsigned long) &lc->paste[0]; +#endif + lc->sync_enter_timer = S390_lowcore.sync_enter_timer; + lc->async_enter_timer = S390_lowcore.async_enter_timer; + lc->exit_timer = S390_lowcore.exit_timer; + lc->user_timer = S390_lowcore.user_timer; + lc->system_timer = S390_lowcore.system_timer; + lc->steal_timer = S390_lowcore.steal_timer; + lc->last_update_timer = S390_lowcore.last_update_timer; + lc->last_update_clock = S390_lowcore.last_update_clock; + lc->ftrace_func = S390_lowcore.ftrace_func; + + restart_stack = __alloc_bootmem(ASYNC_SIZE, ASYNC_SIZE, 0); + restart_stack += ASYNC_SIZE; + + /* + * Set up PSW restart to call ipl.c:do_restart(). Copy the relevant + * restart data to the absolute zero lowcore. This is necesary if + * PSW restart is done on an offline CPU that has lowcore zero. + */ + lc->restart_stack = (unsigned long) restart_stack; + lc->restart_fn = (unsigned long) do_restart; + lc->restart_data = 0; + lc->restart_source = -1UL; + memcpy(&S390_lowcore.restart_stack, &lc->restart_stack, + 4*sizeof(unsigned long)); + copy_to_absolute_zero(&S390_lowcore.restart_psw, + &lc->restart_psw, sizeof(psw_t)); + + set_prefix((u32)(unsigned long) lc); + lowcore_ptr[0] = lc; +} + +static struct resource code_resource = { + .name = "Kernel code", + .flags = IORESOURCE_BUSY | IORESOURCE_MEM, +}; + +static struct resource data_resource = { + .name = "Kernel data", + .flags = IORESOURCE_BUSY | IORESOURCE_MEM, +}; + +static struct resource bss_resource = { + .name = "Kernel bss", + .flags = IORESOURCE_BUSY | IORESOURCE_MEM, +}; + +static struct resource __initdata *standard_resources[] = { + &code_resource, + &data_resource, + &bss_resource, +}; + +static void __init setup_resources(void) +{ + struct resource *res, *std_res, *sub_res; + int i, j; + + code_resource.start = (unsigned long) &_text; + code_resource.end = (unsigned long) &_etext - 1; + data_resource.start = (unsigned long) &_etext; + data_resource.end = (unsigned long) &_edata - 1; + bss_resource.start = (unsigned long) &__bss_start; + bss_resource.end = (unsigned long) &__bss_stop - 1; + + for (i = 0; i < MEMORY_CHUNKS; i++) { + if (!memory_chunk[i].size) + continue; + if (memory_chunk[i].type == CHUNK_OLDMEM || + memory_chunk[i].type == CHUNK_CRASHK) + continue; + res = alloc_bootmem_low(sizeof(*res)); + res->flags = IORESOURCE_BUSY | IORESOURCE_MEM; + switch (memory_chunk[i].type) { + case CHUNK_READ_WRITE: + case CHUNK_CRASHK: + res->name = "System RAM"; + break; + case CHUNK_READ_ONLY: + res->name = "System ROM"; + res->flags |= IORESOURCE_READONLY; + break; + default: + res->name = "reserved"; + } + res->start = memory_chunk[i].addr; + res->end = res->start + memory_chunk[i].size - 1; + request_resource(&iomem_resource, res); + + for (j = 0; j < ARRAY_SIZE(standard_resources); j++) { + std_res = standard_resources[j]; + if (std_res->start < res->start || + std_res->start > res->end) + continue; + if (std_res->end > res->end) { + sub_res = alloc_bootmem_low(sizeof(*sub_res)); + *sub_res = *std_res; + sub_res->end = res->end; + std_res->start = res->end + 1; + request_resource(res, sub_res); + } else { + request_resource(res, std_res); + } + } + } +} + +unsigned long real_memory_size; +EXPORT_SYMBOL_GPL(real_memory_size); + +static void __init setup_memory_end(void) +{ + unsigned long vmax, vmalloc_size, tmp; + int i; + + +#ifdef CONFIG_ZFCPDUMP + if (ipl_info.type == IPL_TYPE_FCP_DUMP && !OLDMEM_BASE) { + memory_end = ZFCPDUMP_HSA_SIZE; + memory_end_set = 1; + } +#endif + real_memory_size = 0; + memory_end &= PAGE_MASK; + + /* + * Make sure all chunks are MAX_ORDER aligned so we don't need the + * extra checks that HOLES_IN_ZONE would require. + */ + for (i = 0; i < MEMORY_CHUNKS; i++) { + unsigned long start, end; + struct mem_chunk *chunk; + unsigned long align; + + chunk = &memory_chunk[i]; + align = 1UL << (MAX_ORDER + PAGE_SHIFT - 1); + start = (chunk->addr + align - 1) & ~(align - 1); + end = (chunk->addr + chunk->size) & ~(align - 1); + if (start >= end) + memset(chunk, 0, sizeof(*chunk)); + else { + chunk->addr = start; + chunk->size = end - start; + } + real_memory_size = max(real_memory_size, + chunk->addr + chunk->size); + } + + /* Choose kernel address space layout: 2, 3, or 4 levels. */ +#ifdef CONFIG_64BIT + vmalloc_size = VMALLOC_END ?: 128UL << 30; + tmp = (memory_end ?: real_memory_size) / PAGE_SIZE; + tmp = tmp * (sizeof(struct page) + PAGE_SIZE) + vmalloc_size; + if (tmp <= (1UL << 42)) + vmax = 1UL << 42; /* 3-level kernel page table */ + else + vmax = 1UL << 53; /* 4-level kernel page table */ +#else + vmalloc_size = VMALLOC_END ?: 96UL << 20; + vmax = 1UL << 31; /* 2-level kernel page table */ +#endif + /* vmalloc area is at the end of the kernel address space. */ + VMALLOC_END = vmax; + VMALLOC_START = vmax - vmalloc_size; + + /* Split remaining virtual space between 1:1 mapping & vmemmap array */ + tmp = VMALLOC_START / (PAGE_SIZE + sizeof(struct page)); + tmp = VMALLOC_START - tmp * sizeof(struct page); + tmp &= ~((vmax >> 11) - 1); /* align to page table level */ + tmp = min(tmp, 1UL << MAX_PHYSMEM_BITS); + vmemmap = (struct page *) tmp; + + /* Take care that memory_end is set and <= vmemmap */ + memory_end = min(memory_end ?: real_memory_size, tmp); + + /* Fixup memory chunk array to fit into 0..memory_end */ + for (i = 0; i < MEMORY_CHUNKS; i++) { + struct mem_chunk *chunk = &memory_chunk[i]; + + if (chunk->addr >= memory_end) { + memset(chunk, 0, sizeof(*chunk)); + continue; + } + if (chunk->addr + chunk->size > memory_end) + chunk->size = memory_end - chunk->addr; + } +} + +static void __init setup_vmcoreinfo(void) +{ +#ifdef CONFIG_KEXEC + unsigned long ptr = paddr_vmcoreinfo_note(); + + copy_to_absolute_zero(&S390_lowcore.vmcore_info, &ptr, sizeof(ptr)); +#endif +} + +#ifdef CONFIG_CRASH_DUMP + +/* + * Find suitable location for crashkernel memory + */ +static unsigned long __init find_crash_base(unsigned long crash_size, + char **msg) +{ + unsigned long crash_base; + struct mem_chunk *chunk; + int i; + + if (memory_chunk[0].size < crash_size) { + *msg = "first memory chunk must be at least crashkernel size"; + return 0; + } + if (OLDMEM_BASE && crash_size == OLDMEM_SIZE) + return OLDMEM_BASE; + + for (i = MEMORY_CHUNKS - 1; i >= 0; i--) { + chunk = &memory_chunk[i]; + if (chunk->size == 0) + continue; + if (chunk->type != CHUNK_READ_WRITE) + continue; + if (chunk->size < crash_size) + continue; + crash_base = (chunk->addr + chunk->size) - crash_size; + if (crash_base < crash_size) + continue; + if (crash_base < ZFCPDUMP_HSA_SIZE_MAX) + continue; + if (crash_base < (unsigned long) INITRD_START + INITRD_SIZE) + continue; + return crash_base; + } + *msg = "no suitable area found"; + return 0; +} + +/* + * Check if crash_base and crash_size is valid + */ +static int __init verify_crash_base(unsigned long crash_base, + unsigned long crash_size, + char **msg) +{ + struct mem_chunk *chunk; + int i; + + /* + * Because we do the swap to zero, we must have at least 'crash_size' + * bytes free space before crash_base + */ + if (crash_size > crash_base) { + *msg = "crashkernel offset must be greater than size"; + return -EINVAL; + } + + /* First memory chunk must be at least crash_size */ + if (memory_chunk[0].size < crash_size) { + *msg = "first memory chunk must be at least crashkernel size"; + return -EINVAL; + } + /* Check if we fit into the respective memory chunk */ + for (i = 0; i < MEMORY_CHUNKS; i++) { + chunk = &memory_chunk[i]; + if (chunk->size == 0) + continue; + if (crash_base < chunk->addr) + continue; + if (crash_base >= chunk->addr + chunk->size) + continue; + /* we have found the memory chunk */ + if (crash_base + crash_size > chunk->addr + chunk->size) { + *msg = "selected memory chunk is too small for " + "crashkernel memory"; + return -EINVAL; + } + return 0; + } + *msg = "invalid memory range specified"; + return -EINVAL; +} + +/* + * Reserve kdump memory by creating a memory hole in the mem_chunk array + */ +static void __init reserve_kdump_bootmem(unsigned long addr, unsigned long size, + int type) +{ + create_mem_hole(memory_chunk, addr, size, type); +} + +/* + * When kdump is enabled, we have to ensure that no memory from + * the area [0 - crashkernel memory size] and + * [crashk_res.start - crashk_res.end] is set offline. + */ +static int kdump_mem_notifier(struct notifier_block *nb, + unsigned long action, void *data) +{ + struct memory_notify *arg = data; + + if (arg->start_pfn < PFN_DOWN(resource_size(&crashk_res))) + return NOTIFY_BAD; + if (arg->start_pfn > PFN_DOWN(crashk_res.end)) + return NOTIFY_OK; + if (arg->start_pfn + arg->nr_pages - 1 < PFN_DOWN(crashk_res.start)) + return NOTIFY_OK; + return NOTIFY_BAD; +} + +static struct notifier_block kdump_mem_nb = { + .notifier_call = kdump_mem_notifier, +}; + +#endif + +/* + * Make sure that oldmem, where the dump is stored, is protected + */ +static void reserve_oldmem(void) +{ +#ifdef CONFIG_CRASH_DUMP + if (!OLDMEM_BASE) + return; + + reserve_kdump_bootmem(OLDMEM_BASE, OLDMEM_SIZE, CHUNK_OLDMEM); + reserve_kdump_bootmem(OLDMEM_SIZE, memory_end - OLDMEM_SIZE, + CHUNK_OLDMEM); + if (OLDMEM_BASE + OLDMEM_SIZE == real_memory_size) + saved_max_pfn = PFN_DOWN(OLDMEM_BASE) - 1; + else + saved_max_pfn = PFN_DOWN(real_memory_size) - 1; +#endif +} + +/* + * Reserve memory for kdump kernel to be loaded with kexec + */ +static void __init reserve_crashkernel(void) +{ +#ifdef CONFIG_CRASH_DUMP + unsigned long long crash_base, crash_size; + char *msg = NULL; + int rc; + + rc = parse_crashkernel(boot_command_line, memory_end, &crash_size, + &crash_base); + if (rc || crash_size == 0) + return; + crash_base = ALIGN(crash_base, KEXEC_CRASH_MEM_ALIGN); + crash_size = ALIGN(crash_size, KEXEC_CRASH_MEM_ALIGN); + if (register_memory_notifier(&kdump_mem_nb)) + return; + if (!crash_base) + crash_base = find_crash_base(crash_size, &msg); + if (!crash_base) { + pr_info("crashkernel reservation failed: %s\n", msg); + unregister_memory_notifier(&kdump_mem_nb); + return; + } + if (verify_crash_base(crash_base, crash_size, &msg)) { + pr_info("crashkernel reservation failed: %s\n", msg); + unregister_memory_notifier(&kdump_mem_nb); + return; + } + if (!OLDMEM_BASE && MACHINE_IS_VM) + diag10_range(PFN_DOWN(crash_base), PFN_DOWN(crash_size)); + crashk_res.start = crash_base; + crashk_res.end = crash_base + crash_size - 1; + insert_resource(&iomem_resource, &crashk_res); + reserve_kdump_bootmem(crash_base, crash_size, CHUNK_CRASHK); + pr_info("Reserving %lluMB of memory at %lluMB " + "for crashkernel (System RAM: %luMB)\n", + crash_size >> 20, crash_base >> 20, memory_end >> 20); + os_info_crashkernel_add(crash_base, crash_size); +#endif +} + +static void __init setup_memory(void) +{ + unsigned long bootmap_size; + unsigned long start_pfn, end_pfn; + int i; + + /* + * partially used pages are not usable - thus + * we are rounding upwards: + */ + start_pfn = PFN_UP(__pa(&_end)); + end_pfn = max_pfn = PFN_DOWN(memory_end); + +#ifdef CONFIG_BLK_DEV_INITRD + /* + * Move the initrd in case the bitmap of the bootmem allocater + * would overwrite it. + */ + + if (INITRD_START && INITRD_SIZE) { + unsigned long bmap_size; + unsigned long start; + + bmap_size = bootmem_bootmap_pages(end_pfn - start_pfn + 1); + bmap_size = PFN_PHYS(bmap_size); + + if (PFN_PHYS(start_pfn) + bmap_size > INITRD_START) { + start = PFN_PHYS(start_pfn) + bmap_size + PAGE_SIZE; + +#ifdef CONFIG_CRASH_DUMP + if (OLDMEM_BASE) { + /* Move initrd behind kdump oldmem */ + if (start + INITRD_SIZE > OLDMEM_BASE && + start < OLDMEM_BASE + OLDMEM_SIZE) + start = OLDMEM_BASE + OLDMEM_SIZE; + } +#endif + if (start + INITRD_SIZE > memory_end) { + pr_err("initrd extends beyond end of " + "memory (0x%08lx > 0x%08lx) " + "disabling initrd\n", + start + INITRD_SIZE, memory_end); + INITRD_START = INITRD_SIZE = 0; + } else { + pr_info("Moving initrd (0x%08lx -> " + "0x%08lx, size: %ld)\n", + INITRD_START, start, INITRD_SIZE); + memmove((void *) start, (void *) INITRD_START, + INITRD_SIZE); + INITRD_START = start; + } + } + } +#endif + + /* + * Initialize the boot-time allocator + */ + bootmap_size = init_bootmem(start_pfn, end_pfn); + + /* + * Register RAM areas with the bootmem allocator. + */ + + for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) { + unsigned long start_chunk, end_chunk, pfn; + + if (memory_chunk[i].type != CHUNK_READ_WRITE && + memory_chunk[i].type != CHUNK_CRASHK) + continue; + start_chunk = PFN_DOWN(memory_chunk[i].addr); + end_chunk = start_chunk + PFN_DOWN(memory_chunk[i].size); + end_chunk = min(end_chunk, end_pfn); + if (start_chunk >= end_chunk) + continue; + memblock_add_node(PFN_PHYS(start_chunk), + PFN_PHYS(end_chunk - start_chunk), 0); + pfn = max(start_chunk, start_pfn); + for (; pfn < end_chunk; pfn++) + page_set_storage_key(PFN_PHYS(pfn), + PAGE_DEFAULT_KEY, 0); + } + + psw_set_key(PAGE_DEFAULT_KEY); + + free_bootmem_with_active_regions(0, max_pfn); + + /* + * Reserve memory used for lowcore/command line/kernel image. + */ + reserve_bootmem(0, (unsigned long)_ehead, BOOTMEM_DEFAULT); + reserve_bootmem((unsigned long)_stext, + PFN_PHYS(start_pfn) - (unsigned long)_stext, + BOOTMEM_DEFAULT); + /* + * Reserve the bootmem bitmap itself as well. We do this in two + * steps (first step was init_bootmem()) because this catches + * the (very unlikely) case of us accidentally initializing the + * bootmem allocator with an invalid RAM area. + */ + reserve_bootmem(start_pfn << PAGE_SHIFT, bootmap_size, + BOOTMEM_DEFAULT); + +#ifdef CONFIG_CRASH_DUMP + if (crashk_res.start) + reserve_bootmem(crashk_res.start, + crashk_res.end - crashk_res.start + 1, + BOOTMEM_DEFAULT); + if (is_kdump_kernel()) + reserve_bootmem(elfcorehdr_addr - OLDMEM_BASE, + PAGE_ALIGN(elfcorehdr_size), BOOTMEM_DEFAULT); +#endif +#ifdef CONFIG_BLK_DEV_INITRD + if (INITRD_START && INITRD_SIZE) { + if (INITRD_START + INITRD_SIZE <= memory_end) { + reserve_bootmem(INITRD_START, INITRD_SIZE, + BOOTMEM_DEFAULT); + initrd_start = INITRD_START; + initrd_end = initrd_start + INITRD_SIZE; + } else { + pr_err("initrd extends beyond end of " + "memory (0x%08lx > 0x%08lx) " + "disabling initrd\n", + initrd_start + INITRD_SIZE, memory_end); + initrd_start = initrd_end = 0; + } + } +#endif +} + +/* + * Setup hardware capabilities. + */ +static void __init setup_hwcaps(void) +{ + static const int stfl_bits[6] = { 0, 2, 7, 17, 19, 21 }; + struct cpuid cpu_id; + int i; + + /* + * The store facility list bits numbers as found in the principles + * of operation are numbered with bit 1UL<<31 as number 0 to + * bit 1UL<<0 as number 31. + * Bit 0: instructions named N3, "backported" to esa-mode + * Bit 2: z/Architecture mode is active + * Bit 7: the store-facility-list-extended facility is installed + * Bit 17: the message-security assist is installed + * Bit 19: the long-displacement facility is installed + * Bit 21: the extended-immediate facility is installed + * Bit 22: extended-translation facility 3 is installed + * Bit 30: extended-translation facility 3 enhancement facility + * These get translated to: + * HWCAP_S390_ESAN3 bit 0, HWCAP_S390_ZARCH bit 1, + * HWCAP_S390_STFLE bit 2, HWCAP_S390_MSA bit 3, + * HWCAP_S390_LDISP bit 4, HWCAP_S390_EIMM bit 5 and + * HWCAP_S390_ETF3EH bit 8 (22 && 30). + */ + for (i = 0; i < 6; i++) + if (test_facility(stfl_bits[i])) + elf_hwcap |= 1UL << i; + + if (test_facility(22) && test_facility(30)) + elf_hwcap |= HWCAP_S390_ETF3EH; + + /* + * Check for additional facilities with store-facility-list-extended. + * stfle stores doublewords (8 byte) with bit 1ULL<<63 as bit 0 + * and 1ULL<<0 as bit 63. Bits 0-31 contain the same information + * as stored by stfl, bits 32-xxx contain additional facilities. + * How many facility words are stored depends on the number of + * doublewords passed to the instruction. The additional facilities + * are: + * Bit 42: decimal floating point facility is installed + * Bit 44: perform floating point operation facility is installed + * translated to: + * HWCAP_S390_DFP bit 6 (42 && 44). + */ + if ((elf_hwcap & (1UL << 2)) && test_facility(42) && test_facility(44)) + elf_hwcap |= HWCAP_S390_DFP; + + /* + * Huge page support HWCAP_S390_HPAGE is bit 7. + */ + if (MACHINE_HAS_HPAGE) + elf_hwcap |= HWCAP_S390_HPAGE; + + /* + * 64-bit register support for 31-bit processes + * HWCAP_S390_HIGH_GPRS is bit 9. + */ + elf_hwcap |= HWCAP_S390_HIGH_GPRS; + + get_cpu_id(&cpu_id); + switch (cpu_id.machine) { + case 0x9672: +#if !defined(CONFIG_64BIT) + default: /* Use "g5" as default for 31 bit kernels. */ +#endif + strcpy(elf_platform, "g5"); + break; + case 0x2064: + case 0x2066: +#if defined(CONFIG_64BIT) + default: /* Use "z900" as default for 64 bit kernels. */ +#endif + strcpy(elf_platform, "z900"); + break; + case 0x2084: + case 0x2086: + strcpy(elf_platform, "z990"); + break; + case 0x2094: + case 0x2096: + strcpy(elf_platform, "z9-109"); + break; + case 0x2097: + case 0x2098: + strcpy(elf_platform, "z10"); + break; + case 0x2817: + case 0x2818: + strcpy(elf_platform, "z196"); + break; + } +} + +/* + * Setup function called from init/main.c just after the banner + * was printed. + */ + +void __init setup_arch(char **cmdline_p) +{ + /* + * print what head.S has found out about the machine + */ +#ifndef CONFIG_64BIT + if (MACHINE_IS_VM) + pr_info("Linux is running as a z/VM " + "guest operating system in 31-bit mode\n"); + else if (MACHINE_IS_LPAR) + pr_info("Linux is running natively in 31-bit mode\n"); + if (MACHINE_HAS_IEEE) + pr_info("The hardware system has IEEE compatible " + "floating point units\n"); + else + pr_info("The hardware system has no IEEE compatible " + "floating point units\n"); +#else /* CONFIG_64BIT */ + if (MACHINE_IS_VM) + pr_info("Linux is running as a z/VM " + "guest operating system in 64-bit mode\n"); + else if (MACHINE_IS_KVM) + pr_info("Linux is running under KVM in 64-bit mode\n"); + else if (MACHINE_IS_LPAR) + pr_info("Linux is running natively in 64-bit mode\n"); +#endif /* CONFIG_64BIT */ + + /* Have one command line that is parsed and saved in /proc/cmdline */ + /* boot_command_line has been already set up in early.c */ + *cmdline_p = boot_command_line; + + ROOT_DEV = Root_RAM0; + + init_mm.start_code = PAGE_OFFSET; + init_mm.end_code = (unsigned long) &_etext; + init_mm.end_data = (unsigned long) &_edata; + init_mm.brk = (unsigned long) &_end; + + if (MACHINE_HAS_MVCOS) + memcpy(&uaccess, &uaccess_mvcos, sizeof(uaccess)); + else + memcpy(&uaccess, &uaccess_std, sizeof(uaccess)); + + parse_early_param(); + + os_info_init(); + setup_ipl(); + setup_memory_end(); + setup_addressing_mode(); + reserve_oldmem(); + reserve_crashkernel(); + setup_memory(); + setup_resources(); + setup_vmcoreinfo(); + setup_lowcore(); + + cpu_init(); + s390_init_cpu_topology(); + + /* + * Setup capabilities (ELF_HWCAP & ELF_PLATFORM). + */ + setup_hwcaps(); + + /* + * Create kernel page tables and switch to virtual addressing. + */ + paging_init(); + + /* Setup default console */ + conmode_default(); + set_preferred_console(); + + /* Setup zfcpdump support */ + setup_zfcpdump(console_devno); +} diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c new file mode 100644 index 00000000..f7582b27 --- /dev/null +++ b/arch/s390/kernel/signal.c @@ -0,0 +1,522 @@ +/* + * arch/s390/kernel/signal.c + * + * Copyright (C) IBM Corp. 1999,2006 + * Author(s): Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com) + * + * Based on Intel version + * + * Copyright (C) 1991, 1992 Linus Torvalds + * + * 1997-11-28 Modified for POSIX.1b signals by Richard Henderson + */ + +#include <linux/sched.h> +#include <linux/mm.h> +#include <linux/smp.h> +#include <linux/kernel.h> +#include <linux/signal.h> +#include <linux/errno.h> +#include <linux/wait.h> +#include <linux/ptrace.h> +#include <linux/unistd.h> +#include <linux/stddef.h> +#include <linux/tty.h> +#include <linux/personality.h> +#include <linux/binfmts.h> +#include <linux/tracehook.h> +#include <linux/syscalls.h> +#include <linux/compat.h> +#include <asm/ucontext.h> +#include <asm/uaccess.h> +#include <asm/lowcore.h> +#include <asm/switch_to.h> +#include "entry.h" + +#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) + + +typedef struct +{ + __u8 callee_used_stack[__SIGNAL_FRAMESIZE]; + struct sigcontext sc; + _sigregs sregs; + int signo; + __u8 retcode[S390_SYSCALL_SIZE]; +} sigframe; + +typedef struct +{ + __u8 callee_used_stack[__SIGNAL_FRAMESIZE]; + __u8 retcode[S390_SYSCALL_SIZE]; + struct siginfo info; + struct ucontext uc; +} rt_sigframe; + +/* + * Atomically swap in the new signal mask, and wait for a signal. + */ +SYSCALL_DEFINE3(sigsuspend, int, history0, int, history1, old_sigset_t, mask) +{ + sigset_t blocked; + + current->saved_sigmask = current->blocked; + mask &= _BLOCKABLE; + siginitset(&blocked, mask); + set_current_blocked(&blocked); + set_current_state(TASK_INTERRUPTIBLE); + schedule(); + set_restore_sigmask(); + return -ERESTARTNOHAND; +} + +SYSCALL_DEFINE3(sigaction, int, sig, const struct old_sigaction __user *, act, + struct old_sigaction __user *, oact) +{ + struct k_sigaction new_ka, old_ka; + int ret; + + if (act) { + old_sigset_t mask; + if (!access_ok(VERIFY_READ, act, sizeof(*act)) || + __get_user(new_ka.sa.sa_handler, &act->sa_handler) || + __get_user(new_ka.sa.sa_restorer, &act->sa_restorer) || + __get_user(new_ka.sa.sa_flags, &act->sa_flags) || + __get_user(mask, &act->sa_mask)) + return -EFAULT; + siginitset(&new_ka.sa.sa_mask, mask); + } + + ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); + + if (!ret && oact) { + if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) || + __put_user(old_ka.sa.sa_handler, &oact->sa_handler) || + __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer) || + __put_user(old_ka.sa.sa_flags, &oact->sa_flags) || + __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask)) + return -EFAULT; + } + + return ret; +} + +SYSCALL_DEFINE2(sigaltstack, const stack_t __user *, uss, + stack_t __user *, uoss) +{ + struct pt_regs *regs = task_pt_regs(current); + return do_sigaltstack(uss, uoss, regs->gprs[15]); +} + +/* Returns non-zero on fault. */ +static int save_sigregs(struct pt_regs *regs, _sigregs __user *sregs) +{ + _sigregs user_sregs; + + save_access_regs(current->thread.acrs); + + /* Copy a 'clean' PSW mask to the user to avoid leaking + information about whether PER is currently on. */ + user_sregs.regs.psw.mask = psw_user_bits | + (regs->psw.mask & PSW_MASK_USER); + user_sregs.regs.psw.addr = regs->psw.addr; + memcpy(&user_sregs.regs.gprs, ®s->gprs, sizeof(sregs->regs.gprs)); + memcpy(&user_sregs.regs.acrs, current->thread.acrs, + sizeof(sregs->regs.acrs)); + /* + * We have to store the fp registers to current->thread.fp_regs + * to merge them with the emulated registers. + */ + save_fp_regs(¤t->thread.fp_regs); + memcpy(&user_sregs.fpregs, ¤t->thread.fp_regs, + sizeof(s390_fp_regs)); + return __copy_to_user(sregs, &user_sregs, sizeof(_sigregs)); +} + +/* Returns positive number on error */ +static int restore_sigregs(struct pt_regs *regs, _sigregs __user *sregs) +{ + int err; + _sigregs user_sregs; + + /* Alwys make any pending restarted system call return -EINTR */ + current_thread_info()->restart_block.fn = do_no_restart_syscall; + + err = __copy_from_user(&user_sregs, sregs, sizeof(_sigregs)); + if (err) + return err; + /* Use regs->psw.mask instead of psw_user_bits to preserve PER bit. */ + regs->psw.mask = (regs->psw.mask & ~PSW_MASK_USER) | + (user_sregs.regs.psw.mask & PSW_MASK_USER); + /* Check for invalid amode */ + if (regs->psw.mask & PSW_MASK_EA) + regs->psw.mask |= PSW_MASK_BA; + regs->psw.addr = user_sregs.regs.psw.addr; + memcpy(®s->gprs, &user_sregs.regs.gprs, sizeof(sregs->regs.gprs)); + memcpy(¤t->thread.acrs, &user_sregs.regs.acrs, + sizeof(sregs->regs.acrs)); + restore_access_regs(current->thread.acrs); + + memcpy(¤t->thread.fp_regs, &user_sregs.fpregs, + sizeof(s390_fp_regs)); + current->thread.fp_regs.fpc &= FPC_VALID_MASK; + + restore_fp_regs(¤t->thread.fp_regs); + clear_thread_flag(TIF_SYSCALL); /* No longer in a system call */ + return 0; +} + +SYSCALL_DEFINE0(sigreturn) +{ + struct pt_regs *regs = task_pt_regs(current); + sigframe __user *frame = (sigframe __user *)regs->gprs[15]; + sigset_t set; + + if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) + goto badframe; + if (__copy_from_user(&set.sig, &frame->sc.oldmask, _SIGMASK_COPY_SIZE)) + goto badframe; + sigdelsetmask(&set, ~_BLOCKABLE); + set_current_blocked(&set); + if (restore_sigregs(regs, &frame->sregs)) + goto badframe; + return regs->gprs[2]; +badframe: + force_sig(SIGSEGV, current); + return 0; +} + +SYSCALL_DEFINE0(rt_sigreturn) +{ + struct pt_regs *regs = task_pt_regs(current); + rt_sigframe __user *frame = (rt_sigframe __user *)regs->gprs[15]; + sigset_t set; + + if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) + goto badframe; + if (__copy_from_user(&set.sig, &frame->uc.uc_sigmask, sizeof(set))) + goto badframe; + sigdelsetmask(&set, ~_BLOCKABLE); + set_current_blocked(&set); + if (restore_sigregs(regs, &frame->uc.uc_mcontext)) + goto badframe; + if (do_sigaltstack(&frame->uc.uc_stack, NULL, + regs->gprs[15]) == -EFAULT) + goto badframe; + return regs->gprs[2]; +badframe: + force_sig(SIGSEGV, current); + return 0; +} + +/* + * Set up a signal frame. + */ + + +/* + * Determine which stack to use.. + */ +static inline void __user * +get_sigframe(struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size) +{ + unsigned long sp; + + /* Default to using normal stack */ + sp = regs->gprs[15]; + + /* Overflow on alternate signal stack gives SIGSEGV. */ + if (on_sig_stack(sp) && !on_sig_stack((sp - frame_size) & -8UL)) + return (void __user *) -1UL; + + /* This is the X/Open sanctioned signal stack switching. */ + if (ka->sa.sa_flags & SA_ONSTACK) { + if (! sas_ss_flags(sp)) + sp = current->sas_ss_sp + current->sas_ss_size; + } + + /* This is the legacy signal stack switching. */ + else if (!user_mode(regs) && + !(ka->sa.sa_flags & SA_RESTORER) && + ka->sa.sa_restorer) { + sp = (unsigned long) ka->sa.sa_restorer; + } + + return (void __user *)((sp - frame_size) & -8ul); +} + +static inline int map_signal(int sig) +{ + if (current_thread_info()->exec_domain + && current_thread_info()->exec_domain->signal_invmap + && sig < 32) + return current_thread_info()->exec_domain->signal_invmap[sig]; + else + return sig; +} + +static int setup_frame(int sig, struct k_sigaction *ka, + sigset_t *set, struct pt_regs * regs) +{ + sigframe __user *frame; + + frame = get_sigframe(ka, regs, sizeof(sigframe)); + if (!access_ok(VERIFY_WRITE, frame, sizeof(sigframe))) + goto give_sigsegv; + + if (frame == (void __user *) -1UL) + goto give_sigsegv; + + if (__copy_to_user(&frame->sc.oldmask, &set->sig, _SIGMASK_COPY_SIZE)) + goto give_sigsegv; + + if (save_sigregs(regs, &frame->sregs)) + goto give_sigsegv; + if (__put_user(&frame->sregs, &frame->sc.sregs)) + goto give_sigsegv; + + /* Set up to return from userspace. If provided, use a stub + already in userspace. */ + if (ka->sa.sa_flags & SA_RESTORER) { + regs->gprs[14] = (unsigned long) + ka->sa.sa_restorer | PSW_ADDR_AMODE; + } else { + regs->gprs[14] = (unsigned long) + frame->retcode | PSW_ADDR_AMODE; + if (__put_user(S390_SYSCALL_OPCODE | __NR_sigreturn, + (u16 __user *)(frame->retcode))) + goto give_sigsegv; + } + + /* Set up backchain. */ + if (__put_user(regs->gprs[15], (addr_t __user *) frame)) + goto give_sigsegv; + + /* Set up registers for signal handler */ + regs->gprs[15] = (unsigned long) frame; + regs->psw.mask |= PSW_MASK_EA | PSW_MASK_BA; /* 64 bit amode */ + regs->psw.addr = (unsigned long) ka->sa.sa_handler | PSW_ADDR_AMODE; + + regs->gprs[2] = map_signal(sig); + regs->gprs[3] = (unsigned long) &frame->sc; + + /* We forgot to include these in the sigcontext. + To avoid breaking binary compatibility, they are passed as args. */ + if (sig == SIGSEGV || sig == SIGBUS || sig == SIGILL || + sig == SIGTRAP || sig == SIGFPE) { + /* set extra registers only for synchronous signals */ + regs->gprs[4] = regs->int_code & 127; + regs->gprs[5] = regs->int_parm_long; + regs->gprs[6] = task_thread_info(current)->last_break; + } + + /* Place signal number on stack to allow backtrace from handler. */ + if (__put_user(regs->gprs[2], (int __user *) &frame->signo)) + goto give_sigsegv; + return 0; + +give_sigsegv: + force_sigsegv(sig, current); + return -EFAULT; +} + +static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, + sigset_t *set, struct pt_regs * regs) +{ + int err = 0; + rt_sigframe __user *frame; + + frame = get_sigframe(ka, regs, sizeof(rt_sigframe)); + if (!access_ok(VERIFY_WRITE, frame, sizeof(rt_sigframe))) + goto give_sigsegv; + + if (frame == (void __user *) -1UL) + goto give_sigsegv; + + if (copy_siginfo_to_user(&frame->info, info)) + goto give_sigsegv; + + /* Create the ucontext. */ + err |= __put_user(0, &frame->uc.uc_flags); + err |= __put_user(NULL, &frame->uc.uc_link); + err |= __put_user((void __user *)current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); + err |= __put_user(sas_ss_flags(regs->gprs[15]), + &frame->uc.uc_stack.ss_flags); + err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size); + err |= save_sigregs(regs, &frame->uc.uc_mcontext); + err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); + if (err) + goto give_sigsegv; + + /* Set up to return from userspace. If provided, use a stub + already in userspace. */ + if (ka->sa.sa_flags & SA_RESTORER) { + regs->gprs[14] = (unsigned long) + ka->sa.sa_restorer | PSW_ADDR_AMODE; + } else { + regs->gprs[14] = (unsigned long) + frame->retcode | PSW_ADDR_AMODE; + if (__put_user(S390_SYSCALL_OPCODE | __NR_rt_sigreturn, + (u16 __user *)(frame->retcode))) + goto give_sigsegv; + } + + /* Set up backchain. */ + if (__put_user(regs->gprs[15], (addr_t __user *) frame)) + goto give_sigsegv; + + /* Set up registers for signal handler */ + regs->gprs[15] = (unsigned long) frame; + regs->psw.mask |= PSW_MASK_EA | PSW_MASK_BA; /* 64 bit amode */ + regs->psw.addr = (unsigned long) ka->sa.sa_handler | PSW_ADDR_AMODE; + + regs->gprs[2] = map_signal(sig); + regs->gprs[3] = (unsigned long) &frame->info; + regs->gprs[4] = (unsigned long) &frame->uc; + regs->gprs[5] = task_thread_info(current)->last_break; + return 0; + +give_sigsegv: + force_sigsegv(sig, current); + return -EFAULT; +} + +static int handle_signal(unsigned long sig, struct k_sigaction *ka, + siginfo_t *info, sigset_t *oldset, + struct pt_regs *regs) +{ + int ret; + + /* Set up the stack frame */ + if (ka->sa.sa_flags & SA_SIGINFO) + ret = setup_rt_frame(sig, ka, info, oldset, regs); + else + ret = setup_frame(sig, ka, oldset, regs); + if (ret) + return ret; + block_sigmask(ka, sig); + return 0; +} + +/* + * Note that 'init' is a special process: it doesn't get signals it doesn't + * want to handle. Thus you cannot kill init even with a SIGKILL even by + * mistake. + * + * Note that we go through the signals twice: once to check the signals that + * the kernel can handle, and then we build all the user-level signal handling + * stack-frames in one go after that. + */ +void do_signal(struct pt_regs *regs) +{ + siginfo_t info; + int signr; + struct k_sigaction ka; + sigset_t *oldset; + + /* + * We want the common case to go fast, which + * is why we may in certain cases get here from + * kernel mode. Just return without doing anything + * if so. + */ + if (!user_mode(regs)) + return; + + if (test_thread_flag(TIF_RESTORE_SIGMASK)) + oldset = ¤t->saved_sigmask; + else + oldset = ¤t->blocked; + + /* + * Get signal to deliver. When running under ptrace, at this point + * the debugger may change all our registers, including the system + * call information. + */ + current_thread_info()->system_call = + test_thread_flag(TIF_SYSCALL) ? regs->int_code : 0; + signr = get_signal_to_deliver(&info, &ka, regs, NULL); + + if (signr > 0) { + /* Whee! Actually deliver the signal. */ + if (current_thread_info()->system_call) { + regs->int_code = current_thread_info()->system_call; + /* Check for system call restarting. */ + switch (regs->gprs[2]) { + case -ERESTART_RESTARTBLOCK: + case -ERESTARTNOHAND: + regs->gprs[2] = -EINTR; + break; + case -ERESTARTSYS: + if (!(ka.sa.sa_flags & SA_RESTART)) { + regs->gprs[2] = -EINTR; + break; + } + /* fallthrough */ + case -ERESTARTNOINTR: + regs->gprs[2] = regs->orig_gpr2; + regs->psw.addr = + __rewind_psw(regs->psw, + regs->int_code >> 16); + break; + } + } + /* No longer in a system call */ + clear_thread_flag(TIF_SYSCALL); + + if ((is_compat_task() ? + handle_signal32(signr, &ka, &info, oldset, regs) : + handle_signal(signr, &ka, &info, oldset, regs)) == 0) { + /* + * A signal was successfully delivered; the saved + * sigmask will have been stored in the signal frame, + * and will be restored by sigreturn, so we can simply + * clear the TIF_RESTORE_SIGMASK flag. + */ + if (test_thread_flag(TIF_RESTORE_SIGMASK)) + clear_thread_flag(TIF_RESTORE_SIGMASK); + + /* + * Let tracing know that we've done the handler setup. + */ + tracehook_signal_handler(signr, &info, &ka, regs, + test_thread_flag(TIF_SINGLE_STEP)); + } + return; + } + + /* No handlers present - check for system call restart */ + clear_thread_flag(TIF_SYSCALL); + if (current_thread_info()->system_call) { + regs->int_code = current_thread_info()->system_call; + switch (regs->gprs[2]) { + case -ERESTART_RESTARTBLOCK: + /* Restart with sys_restart_syscall */ + regs->int_code = __NR_restart_syscall; + /* fallthrough */ + case -ERESTARTNOHAND: + case -ERESTARTSYS: + case -ERESTARTNOINTR: + /* Restart system call with magic TIF bit. */ + regs->gprs[2] = regs->orig_gpr2; + set_thread_flag(TIF_SYSCALL); + break; + } + } + + /* + * If there's no signal to deliver, we just put the saved sigmask back. + */ + if (test_thread_flag(TIF_RESTORE_SIGMASK)) { + clear_thread_flag(TIF_RESTORE_SIGMASK); + sigprocmask(SIG_SETMASK, ¤t->saved_sigmask, NULL); + } +} + +void do_notify_resume(struct pt_regs *regs) +{ + clear_thread_flag(TIF_NOTIFY_RESUME); + tracehook_notify_resume(regs); + if (current->replacement_session_keyring) + key_replace_session_keyring(); +} diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c new file mode 100644 index 00000000..1f772276 --- /dev/null +++ b/arch/s390/kernel/smp.c @@ -0,0 +1,1146 @@ +/* + * SMP related functions + * + * Copyright IBM Corp. 1999,2012 + * Author(s): Denis Joseph Barrow, + * Martin Schwidefsky <schwidefsky@de.ibm.com>, + * Heiko Carstens <heiko.carstens@de.ibm.com>, + * + * based on other smp stuff by + * (c) 1995 Alan Cox, CymruNET Ltd <alan@cymru.net> + * (c) 1998 Ingo Molnar + * + * The code outside of smp.c uses logical cpu numbers, only smp.c does + * the translation of logical to physical cpu ids. All new code that + * operates on physical cpu numbers needs to go into smp.c. + */ + +#define KMSG_COMPONENT "cpu" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include <linux/workqueue.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/mm.h> +#include <linux/err.h> +#include <linux/spinlock.h> +#include <linux/kernel_stat.h> +#include <linux/delay.h> +#include <linux/interrupt.h> +#include <linux/irqflags.h> +#include <linux/cpu.h> +#include <linux/slab.h> +#include <linux/crash_dump.h> +#include <asm/asm-offsets.h> +#include <asm/switch_to.h> +#include <asm/facility.h> +#include <asm/ipl.h> +#include <asm/setup.h> +#include <asm/irq.h> +#include <asm/tlbflush.h> +#include <asm/timer.h> +#include <asm/lowcore.h> +#include <asm/sclp.h> +#include <asm/vdso.h> +#include <asm/debug.h> +#include <asm/os_info.h> +#include "entry.h" + +enum { + sigp_sense = 1, + sigp_external_call = 2, + sigp_emergency_signal = 3, + sigp_start = 4, + sigp_stop = 5, + sigp_restart = 6, + sigp_stop_and_store_status = 9, + sigp_initial_cpu_reset = 11, + sigp_cpu_reset = 12, + sigp_set_prefix = 13, + sigp_store_status_at_address = 14, + sigp_store_extended_status_at_address = 15, + sigp_set_architecture = 18, + sigp_conditional_emergency_signal = 19, + sigp_sense_running = 21, +}; + +enum { + sigp_order_code_accepted = 0, + sigp_status_stored = 1, + sigp_busy = 2, + sigp_not_operational = 3, +}; + +enum { + ec_schedule = 0, + ec_call_function, + ec_call_function_single, + ec_stop_cpu, +}; + +enum { + CPU_STATE_STANDBY, + CPU_STATE_CONFIGURED, +}; + +struct pcpu { + struct cpu cpu; + struct task_struct *idle; /* idle process for the cpu */ + struct _lowcore *lowcore; /* lowcore page(s) for the cpu */ + unsigned long async_stack; /* async stack for the cpu */ + unsigned long panic_stack; /* panic stack for the cpu */ + unsigned long ec_mask; /* bit mask for ec_xxx functions */ + int state; /* physical cpu state */ + u32 status; /* last status received via sigp */ + u16 address; /* physical cpu address */ +}; + +static u8 boot_cpu_type; +static u16 boot_cpu_address; +static struct pcpu pcpu_devices[NR_CPUS]; + +DEFINE_MUTEX(smp_cpu_state_mutex); + +/* + * Signal processor helper functions. + */ +static inline int __pcpu_sigp(u16 addr, u8 order, u32 parm, u32 *status) +{ + register unsigned int reg1 asm ("1") = parm; + int cc; + + asm volatile( + " sigp %1,%2,0(%3)\n" + " ipm %0\n" + " srl %0,28\n" + : "=d" (cc), "+d" (reg1) : "d" (addr), "a" (order) : "cc"); + if (status && cc == 1) + *status = reg1; + return cc; +} + +static inline int __pcpu_sigp_relax(u16 addr, u8 order, u32 parm, u32 *status) +{ + int cc; + + while (1) { + cc = __pcpu_sigp(addr, order, parm, status); + if (cc != sigp_busy) + return cc; + cpu_relax(); + } +} + +static int pcpu_sigp_retry(struct pcpu *pcpu, u8 order, u32 parm) +{ + int cc, retry; + + for (retry = 0; ; retry++) { + cc = __pcpu_sigp(pcpu->address, order, parm, &pcpu->status); + if (cc != sigp_busy) + break; + if (retry >= 3) + udelay(10); + } + return cc; +} + +static inline int pcpu_stopped(struct pcpu *pcpu) +{ + if (__pcpu_sigp(pcpu->address, sigp_sense, + 0, &pcpu->status) != sigp_status_stored) + return 0; + /* Check for stopped and check stop state */ + return !!(pcpu->status & 0x50); +} + +static inline int pcpu_running(struct pcpu *pcpu) +{ + if (__pcpu_sigp(pcpu->address, sigp_sense_running, + 0, &pcpu->status) != sigp_status_stored) + return 1; + /* Check for running status */ + return !(pcpu->status & 0x400); +} + +/* + * Find struct pcpu by cpu address. + */ +static struct pcpu *pcpu_find_address(const struct cpumask *mask, int address) +{ + int cpu; + + for_each_cpu(cpu, mask) + if (pcpu_devices[cpu].address == address) + return pcpu_devices + cpu; + return NULL; +} + +static void pcpu_ec_call(struct pcpu *pcpu, int ec_bit) +{ + int order; + + set_bit(ec_bit, &pcpu->ec_mask); + order = pcpu_running(pcpu) ? + sigp_external_call : sigp_emergency_signal; + pcpu_sigp_retry(pcpu, order, 0); +} + +static int __cpuinit pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) +{ + struct _lowcore *lc; + + if (pcpu != &pcpu_devices[0]) { + pcpu->lowcore = (struct _lowcore *) + __get_free_pages(GFP_KERNEL | GFP_DMA, LC_ORDER); + pcpu->async_stack = __get_free_pages(GFP_KERNEL, ASYNC_ORDER); + pcpu->panic_stack = __get_free_page(GFP_KERNEL); + if (!pcpu->lowcore || !pcpu->panic_stack || !pcpu->async_stack) + goto out; + } + lc = pcpu->lowcore; + memcpy(lc, &S390_lowcore, 512); + memset((char *) lc + 512, 0, sizeof(*lc) - 512); + lc->async_stack = pcpu->async_stack + ASYNC_SIZE; + lc->panic_stack = pcpu->panic_stack + PAGE_SIZE; + lc->cpu_nr = cpu; +#ifndef CONFIG_64BIT + if (MACHINE_HAS_IEEE) { + lc->extended_save_area_addr = get_zeroed_page(GFP_KERNEL); + if (!lc->extended_save_area_addr) + goto out; + } +#else + if (vdso_alloc_per_cpu(lc)) + goto out; +#endif + lowcore_ptr[cpu] = lc; + pcpu_sigp_retry(pcpu, sigp_set_prefix, (u32)(unsigned long) lc); + return 0; +out: + if (pcpu != &pcpu_devices[0]) { + free_page(pcpu->panic_stack); + free_pages(pcpu->async_stack, ASYNC_ORDER); + free_pages((unsigned long) pcpu->lowcore, LC_ORDER); + } + return -ENOMEM; +} + +static void pcpu_free_lowcore(struct pcpu *pcpu) +{ + pcpu_sigp_retry(pcpu, sigp_set_prefix, 0); + lowcore_ptr[pcpu - pcpu_devices] = NULL; +#ifndef CONFIG_64BIT + if (MACHINE_HAS_IEEE) { + struct _lowcore *lc = pcpu->lowcore; + + free_page((unsigned long) lc->extended_save_area_addr); + lc->extended_save_area_addr = 0; + } +#else + vdso_free_per_cpu(pcpu->lowcore); +#endif + if (pcpu != &pcpu_devices[0]) { + free_page(pcpu->panic_stack); + free_pages(pcpu->async_stack, ASYNC_ORDER); + free_pages((unsigned long) pcpu->lowcore, LC_ORDER); + } +} + +static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu) +{ + struct _lowcore *lc = pcpu->lowcore; + + atomic_inc(&init_mm.context.attach_count); + lc->cpu_nr = cpu; + lc->percpu_offset = __per_cpu_offset[cpu]; + lc->kernel_asce = S390_lowcore.kernel_asce; + lc->machine_flags = S390_lowcore.machine_flags; + lc->ftrace_func = S390_lowcore.ftrace_func; + lc->user_timer = lc->system_timer = lc->steal_timer = 0; + __ctl_store(lc->cregs_save_area, 0, 15); + save_access_regs((unsigned int *) lc->access_regs_save_area); + memcpy(lc->stfle_fac_list, S390_lowcore.stfle_fac_list, + MAX_FACILITY_BIT/8); +} + +static void pcpu_attach_task(struct pcpu *pcpu, struct task_struct *tsk) +{ + struct _lowcore *lc = pcpu->lowcore; + struct thread_info *ti = task_thread_info(tsk); + + lc->kernel_stack = (unsigned long) task_stack_page(tsk) + THREAD_SIZE; + lc->thread_info = (unsigned long) task_thread_info(tsk); + lc->current_task = (unsigned long) tsk; + lc->user_timer = ti->user_timer; + lc->system_timer = ti->system_timer; + lc->steal_timer = 0; +} + +static void pcpu_start_fn(struct pcpu *pcpu, void (*func)(void *), void *data) +{ + struct _lowcore *lc = pcpu->lowcore; + + lc->restart_stack = lc->kernel_stack; + lc->restart_fn = (unsigned long) func; + lc->restart_data = (unsigned long) data; + lc->restart_source = -1UL; + pcpu_sigp_retry(pcpu, sigp_restart, 0); +} + +/* + * Call function via PSW restart on pcpu and stop the current cpu. + */ +static void pcpu_delegate(struct pcpu *pcpu, void (*func)(void *), + void *data, unsigned long stack) +{ + struct _lowcore *lc = pcpu->lowcore; + unsigned short this_cpu; + + __load_psw_mask(psw_kernel_bits); + this_cpu = stap(); + if (pcpu->address == this_cpu) + func(data); /* should not return */ + /* Stop target cpu (if func returns this stops the current cpu). */ + pcpu_sigp_retry(pcpu, sigp_stop, 0); + /* Restart func on the target cpu and stop the current cpu. */ + lc->restart_stack = stack; + lc->restart_fn = (unsigned long) func; + lc->restart_data = (unsigned long) data; + lc->restart_source = (unsigned long) this_cpu; + asm volatile( + "0: sigp 0,%0,6 # sigp restart to target cpu\n" + " brc 2,0b # busy, try again\n" + "1: sigp 0,%1,5 # sigp stop to current cpu\n" + " brc 2,1b # busy, try again\n" + : : "d" (pcpu->address), "d" (this_cpu) : "0", "1", "cc"); + for (;;) ; +} + +/* + * Call function on an online CPU. + */ +void smp_call_online_cpu(void (*func)(void *), void *data) +{ + struct pcpu *pcpu; + + /* Use the current cpu if it is online. */ + pcpu = pcpu_find_address(cpu_online_mask, stap()); + if (!pcpu) + /* Use the first online cpu. */ + pcpu = pcpu_devices + cpumask_first(cpu_online_mask); + pcpu_delegate(pcpu, func, data, (unsigned long) restart_stack); +} + +/* + * Call function on the ipl CPU. + */ +void smp_call_ipl_cpu(void (*func)(void *), void *data) +{ + pcpu_delegate(&pcpu_devices[0], func, data, + pcpu_devices->panic_stack + PAGE_SIZE); +} + +int smp_find_processor_id(u16 address) +{ + int cpu; + + for_each_present_cpu(cpu) + if (pcpu_devices[cpu].address == address) + return cpu; + return -1; +} + +int smp_vcpu_scheduled(int cpu) +{ + return pcpu_running(pcpu_devices + cpu); +} + +void smp_yield(void) +{ + if (MACHINE_HAS_DIAG44) + asm volatile("diag 0,0,0x44"); +} + +void smp_yield_cpu(int cpu) +{ + if (MACHINE_HAS_DIAG9C) + asm volatile("diag %0,0,0x9c" + : : "d" (pcpu_devices[cpu].address)); + else if (MACHINE_HAS_DIAG44) + asm volatile("diag 0,0,0x44"); +} + +/* + * Send cpus emergency shutdown signal. This gives the cpus the + * opportunity to complete outstanding interrupts. + */ +void smp_emergency_stop(cpumask_t *cpumask) +{ + u64 end; + int cpu; + + end = get_clock() + (1000000UL << 12); + for_each_cpu(cpu, cpumask) { + struct pcpu *pcpu = pcpu_devices + cpu; + set_bit(ec_stop_cpu, &pcpu->ec_mask); + while (__pcpu_sigp(pcpu->address, sigp_emergency_signal, + 0, NULL) == sigp_busy && + get_clock() < end) + cpu_relax(); + } + while (get_clock() < end) { + for_each_cpu(cpu, cpumask) + if (pcpu_stopped(pcpu_devices + cpu)) + cpumask_clear_cpu(cpu, cpumask); + if (cpumask_empty(cpumask)) + break; + cpu_relax(); + } +} + +/* + * Stop all cpus but the current one. + */ +void smp_send_stop(void) +{ + cpumask_t cpumask; + int cpu; + + /* Disable all interrupts/machine checks */ + __load_psw_mask(psw_kernel_bits | PSW_MASK_DAT); + trace_hardirqs_off(); + + debug_set_critical(); + cpumask_copy(&cpumask, cpu_online_mask); + cpumask_clear_cpu(smp_processor_id(), &cpumask); + + if (oops_in_progress) + smp_emergency_stop(&cpumask); + + /* stop all processors */ + for_each_cpu(cpu, &cpumask) { + struct pcpu *pcpu = pcpu_devices + cpu; + pcpu_sigp_retry(pcpu, sigp_stop, 0); + while (!pcpu_stopped(pcpu)) + cpu_relax(); + } +} + +/* + * Stop the current cpu. + */ +void smp_stop_cpu(void) +{ + pcpu_sigp_retry(pcpu_devices + smp_processor_id(), sigp_stop, 0); + for (;;) ; +} + +/* + * This is the main routine where commands issued by other + * cpus are handled. + */ +static void do_ext_call_interrupt(struct ext_code ext_code, + unsigned int param32, unsigned long param64) +{ + unsigned long bits; + int cpu; + + cpu = smp_processor_id(); + if (ext_code.code == 0x1202) + kstat_cpu(cpu).irqs[EXTINT_EXC]++; + else + kstat_cpu(cpu).irqs[EXTINT_EMS]++; + /* + * handle bit signal external calls + */ + bits = xchg(&pcpu_devices[cpu].ec_mask, 0); + + if (test_bit(ec_stop_cpu, &bits)) + smp_stop_cpu(); + + if (test_bit(ec_schedule, &bits)) + scheduler_ipi(); + + if (test_bit(ec_call_function, &bits)) + generic_smp_call_function_interrupt(); + + if (test_bit(ec_call_function_single, &bits)) + generic_smp_call_function_single_interrupt(); + +} + +void arch_send_call_function_ipi_mask(const struct cpumask *mask) +{ + int cpu; + + for_each_cpu(cpu, mask) + pcpu_ec_call(pcpu_devices + cpu, ec_call_function); +} + +void arch_send_call_function_single_ipi(int cpu) +{ + pcpu_ec_call(pcpu_devices + cpu, ec_call_function_single); +} + +#ifndef CONFIG_64BIT +/* + * this function sends a 'purge tlb' signal to another CPU. + */ +static void smp_ptlb_callback(void *info) +{ + __tlb_flush_local(); +} + +void smp_ptlb_all(void) +{ + on_each_cpu(smp_ptlb_callback, NULL, 1); +} +EXPORT_SYMBOL(smp_ptlb_all); +#endif /* ! CONFIG_64BIT */ + +/* + * this function sends a 'reschedule' IPI to another CPU. + * it goes straight through and wastes no time serializing + * anything. Worst case is that we lose a reschedule ... + */ +void smp_send_reschedule(int cpu) +{ + pcpu_ec_call(pcpu_devices + cpu, ec_schedule); +} + +/* + * parameter area for the set/clear control bit callbacks + */ +struct ec_creg_mask_parms { + unsigned long orval; + unsigned long andval; + int cr; +}; + +/* + * callback for setting/clearing control bits + */ +static void smp_ctl_bit_callback(void *info) +{ + struct ec_creg_mask_parms *pp = info; + unsigned long cregs[16]; + + __ctl_store(cregs, 0, 15); + cregs[pp->cr] = (cregs[pp->cr] & pp->andval) | pp->orval; + __ctl_load(cregs, 0, 15); +} + +/* + * Set a bit in a control register of all cpus + */ +void smp_ctl_set_bit(int cr, int bit) +{ + struct ec_creg_mask_parms parms = { 1UL << bit, -1UL, cr }; + + on_each_cpu(smp_ctl_bit_callback, &parms, 1); +} +EXPORT_SYMBOL(smp_ctl_set_bit); + +/* + * Clear a bit in a control register of all cpus + */ +void smp_ctl_clear_bit(int cr, int bit) +{ + struct ec_creg_mask_parms parms = { 0, ~(1UL << bit), cr }; + + on_each_cpu(smp_ctl_bit_callback, &parms, 1); +} +EXPORT_SYMBOL(smp_ctl_clear_bit); + +#if defined(CONFIG_ZFCPDUMP) || defined(CONFIG_CRASH_DUMP) + +struct save_area *zfcpdump_save_areas[NR_CPUS + 1]; +EXPORT_SYMBOL_GPL(zfcpdump_save_areas); + +static void __init smp_get_save_area(int cpu, u16 address) +{ + void *lc = pcpu_devices[0].lowcore; + struct save_area *save_area; + + if (is_kdump_kernel()) + return; + if (!OLDMEM_BASE && (address == boot_cpu_address || + ipl_info.type != IPL_TYPE_FCP_DUMP)) + return; + if (cpu >= NR_CPUS) { + pr_warning("CPU %i exceeds the maximum %i and is excluded " + "from the dump\n", cpu, NR_CPUS - 1); + return; + } + save_area = kmalloc(sizeof(struct save_area), GFP_KERNEL); + if (!save_area) + panic("could not allocate memory for save area\n"); + zfcpdump_save_areas[cpu] = save_area; +#ifdef CONFIG_CRASH_DUMP + if (address == boot_cpu_address) { + /* Copy the registers of the boot cpu. */ + copy_oldmem_page(1, (void *) save_area, sizeof(*save_area), + SAVE_AREA_BASE - PAGE_SIZE, 0); + return; + } +#endif + /* Get the registers of a non-boot cpu. */ + __pcpu_sigp_relax(address, sigp_stop_and_store_status, 0, NULL); + memcpy_real(save_area, lc + SAVE_AREA_BASE, sizeof(*save_area)); +} + +int smp_store_status(int cpu) +{ + struct pcpu *pcpu; + + pcpu = pcpu_devices + cpu; + if (__pcpu_sigp_relax(pcpu->address, sigp_stop_and_store_status, + 0, NULL) != sigp_order_code_accepted) + return -EIO; + return 0; +} + +#else /* CONFIG_ZFCPDUMP || CONFIG_CRASH_DUMP */ + +static inline void smp_get_save_area(int cpu, u16 address) { } + +#endif /* CONFIG_ZFCPDUMP || CONFIG_CRASH_DUMP */ + +static struct sclp_cpu_info *smp_get_cpu_info(void) +{ + static int use_sigp_detection; + struct sclp_cpu_info *info; + int address; + + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (info && (use_sigp_detection || sclp_get_cpu_info(info))) { + use_sigp_detection = 1; + for (address = 0; address <= MAX_CPU_ADDRESS; address++) { + if (__pcpu_sigp_relax(address, sigp_sense, 0, NULL) == + sigp_not_operational) + continue; + info->cpu[info->configured].address = address; + info->configured++; + } + info->combined = info->configured; + } + return info; +} + +static int __devinit smp_add_present_cpu(int cpu); + +static int __devinit __smp_rescan_cpus(struct sclp_cpu_info *info, + int sysfs_add) +{ + struct pcpu *pcpu; + cpumask_t avail; + int cpu, nr, i; + + nr = 0; + cpumask_xor(&avail, cpu_possible_mask, cpu_present_mask); + cpu = cpumask_first(&avail); + for (i = 0; (i < info->combined) && (cpu < nr_cpu_ids); i++) { + if (info->has_cpu_type && info->cpu[i].type != boot_cpu_type) + continue; + if (pcpu_find_address(cpu_present_mask, info->cpu[i].address)) + continue; + pcpu = pcpu_devices + cpu; + pcpu->address = info->cpu[i].address; + pcpu->state = (cpu >= info->configured) ? + CPU_STATE_STANDBY : CPU_STATE_CONFIGURED; + cpu_set_polarization(cpu, POLARIZATION_UNKNOWN); + set_cpu_present(cpu, true); + if (sysfs_add && smp_add_present_cpu(cpu) != 0) + set_cpu_present(cpu, false); + else + nr++; + cpu = cpumask_next(cpu, &avail); + } + return nr; +} + +static void __init smp_detect_cpus(void) +{ + unsigned int cpu, c_cpus, s_cpus; + struct sclp_cpu_info *info; + + info = smp_get_cpu_info(); + if (!info) + panic("smp_detect_cpus failed to allocate memory\n"); + if (info->has_cpu_type) { + for (cpu = 0; cpu < info->combined; cpu++) { + if (info->cpu[cpu].address != boot_cpu_address) + continue; + /* The boot cpu dictates the cpu type. */ + boot_cpu_type = info->cpu[cpu].type; + break; + } + } + c_cpus = s_cpus = 0; + for (cpu = 0; cpu < info->combined; cpu++) { + if (info->has_cpu_type && info->cpu[cpu].type != boot_cpu_type) + continue; + if (cpu < info->configured) { + smp_get_save_area(c_cpus, info->cpu[cpu].address); + c_cpus++; + } else + s_cpus++; + } + pr_info("%d configured CPUs, %d standby CPUs\n", c_cpus, s_cpus); + get_online_cpus(); + __smp_rescan_cpus(info, 0); + put_online_cpus(); + kfree(info); +} + +/* + * Activate a secondary processor. + */ +static void __cpuinit smp_start_secondary(void *cpuvoid) +{ + S390_lowcore.last_update_clock = get_clock(); + S390_lowcore.restart_stack = (unsigned long) restart_stack; + S390_lowcore.restart_fn = (unsigned long) do_restart; + S390_lowcore.restart_data = 0; + S390_lowcore.restart_source = -1UL; + restore_access_regs(S390_lowcore.access_regs_save_area); + __ctl_load(S390_lowcore.cregs_save_area, 0, 15); + __load_psw_mask(psw_kernel_bits | PSW_MASK_DAT); + cpu_init(); + preempt_disable(); + init_cpu_timer(); + init_cpu_vtimer(); + pfault_init(); + notify_cpu_starting(smp_processor_id()); + ipi_call_lock(); + set_cpu_online(smp_processor_id(), true); + ipi_call_unlock(); + local_irq_enable(); + /* cpu_idle will call schedule for us */ + cpu_idle(); +} + +struct create_idle { + struct work_struct work; + struct task_struct *idle; + struct completion done; + int cpu; +}; + +static void __cpuinit smp_fork_idle(struct work_struct *work) +{ + struct create_idle *c_idle; + + c_idle = container_of(work, struct create_idle, work); + c_idle->idle = fork_idle(c_idle->cpu); + complete(&c_idle->done); +} + +/* Upping and downing of CPUs */ +int __cpuinit __cpu_up(unsigned int cpu) +{ + struct create_idle c_idle; + struct pcpu *pcpu; + int rc; + + pcpu = pcpu_devices + cpu; + if (pcpu->state != CPU_STATE_CONFIGURED) + return -EIO; + if (pcpu_sigp_retry(pcpu, sigp_initial_cpu_reset, 0) != + sigp_order_code_accepted) + return -EIO; + if (!pcpu->idle) { + c_idle.done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done); + INIT_WORK_ONSTACK(&c_idle.work, smp_fork_idle); + c_idle.cpu = cpu; + schedule_work(&c_idle.work); + wait_for_completion(&c_idle.done); + if (IS_ERR(c_idle.idle)) + return PTR_ERR(c_idle.idle); + pcpu->idle = c_idle.idle; + } + init_idle(pcpu->idle, cpu); + rc = pcpu_alloc_lowcore(pcpu, cpu); + if (rc) + return rc; + pcpu_prepare_secondary(pcpu, cpu); + pcpu_attach_task(pcpu, pcpu->idle); + pcpu_start_fn(pcpu, smp_start_secondary, NULL); + while (!cpu_online(cpu)) + cpu_relax(); + return 0; +} + +static int __init setup_possible_cpus(char *s) +{ + int max, cpu; + + if (kstrtoint(s, 0, &max) < 0) + return 0; + init_cpu_possible(cpumask_of(0)); + for (cpu = 1; cpu < max && cpu < nr_cpu_ids; cpu++) + set_cpu_possible(cpu, true); + return 0; +} +early_param("possible_cpus", setup_possible_cpus); + +#ifdef CONFIG_HOTPLUG_CPU + +int __cpu_disable(void) +{ + unsigned long cregs[16]; + + set_cpu_online(smp_processor_id(), false); + /* Disable pseudo page faults on this cpu. */ + pfault_fini(); + /* Disable interrupt sources via control register. */ + __ctl_store(cregs, 0, 15); + cregs[0] &= ~0x0000ee70UL; /* disable all external interrupts */ + cregs[6] &= ~0xff000000UL; /* disable all I/O interrupts */ + cregs[14] &= ~0x1f000000UL; /* disable most machine checks */ + __ctl_load(cregs, 0, 15); + return 0; +} + +void __cpu_die(unsigned int cpu) +{ + struct pcpu *pcpu; + + /* Wait until target cpu is down */ + pcpu = pcpu_devices + cpu; + while (!pcpu_stopped(pcpu)) + cpu_relax(); + pcpu_free_lowcore(pcpu); + atomic_dec(&init_mm.context.attach_count); +} + +void __noreturn cpu_die(void) +{ + idle_task_exit(); + pcpu_sigp_retry(pcpu_devices + smp_processor_id(), sigp_stop, 0); + for (;;) ; +} + +#endif /* CONFIG_HOTPLUG_CPU */ + +static void smp_call_os_info_init_fn(void) +{ + int (*init_fn)(void); + unsigned long size; + + init_fn = os_info_old_entry(OS_INFO_INIT_FN, &size); + if (!init_fn) + return; + init_fn(); +} + +void __init smp_prepare_cpus(unsigned int max_cpus) +{ + /* request the 0x1201 emergency signal external interrupt */ + if (register_external_interrupt(0x1201, do_ext_call_interrupt) != 0) + panic("Couldn't request external interrupt 0x1201"); + /* request the 0x1202 external call external interrupt */ + if (register_external_interrupt(0x1202, do_ext_call_interrupt) != 0) + panic("Couldn't request external interrupt 0x1202"); + smp_call_os_info_init_fn(); + smp_detect_cpus(); +} + +void __init smp_prepare_boot_cpu(void) +{ + struct pcpu *pcpu = pcpu_devices; + + boot_cpu_address = stap(); + pcpu->idle = current; + pcpu->state = CPU_STATE_CONFIGURED; + pcpu->address = boot_cpu_address; + pcpu->lowcore = (struct _lowcore *)(unsigned long) store_prefix(); + pcpu->async_stack = S390_lowcore.async_stack - ASYNC_SIZE; + pcpu->panic_stack = S390_lowcore.panic_stack - PAGE_SIZE; + S390_lowcore.percpu_offset = __per_cpu_offset[0]; + cpu_set_polarization(0, POLARIZATION_UNKNOWN); + set_cpu_present(0, true); + set_cpu_online(0, true); +} + +void __init smp_cpus_done(unsigned int max_cpus) +{ +} + +void __init smp_setup_processor_id(void) +{ + S390_lowcore.cpu_nr = 0; +} + +/* + * the frequency of the profiling timer can be changed + * by writing a multiplier value into /proc/profile. + * + * usually you want to run this on all CPUs ;) + */ +int setup_profiling_timer(unsigned int multiplier) +{ + return 0; +} + +#ifdef CONFIG_HOTPLUG_CPU +static ssize_t cpu_configure_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + ssize_t count; + + mutex_lock(&smp_cpu_state_mutex); + count = sprintf(buf, "%d\n", pcpu_devices[dev->id].state); + mutex_unlock(&smp_cpu_state_mutex); + return count; +} + +static ssize_t cpu_configure_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct pcpu *pcpu; + int cpu, val, rc; + char delim; + + if (sscanf(buf, "%d %c", &val, &delim) != 1) + return -EINVAL; + if (val != 0 && val != 1) + return -EINVAL; + get_online_cpus(); + mutex_lock(&smp_cpu_state_mutex); + rc = -EBUSY; + /* disallow configuration changes of online cpus and cpu 0 */ + cpu = dev->id; + if (cpu_online(cpu) || cpu == 0) + goto out; + pcpu = pcpu_devices + cpu; + rc = 0; + switch (val) { + case 0: + if (pcpu->state != CPU_STATE_CONFIGURED) + break; + rc = sclp_cpu_deconfigure(pcpu->address); + if (rc) + break; + pcpu->state = CPU_STATE_STANDBY; + cpu_set_polarization(cpu, POLARIZATION_UNKNOWN); + topology_expect_change(); + break; + case 1: + if (pcpu->state != CPU_STATE_STANDBY) + break; + rc = sclp_cpu_configure(pcpu->address); + if (rc) + break; + pcpu->state = CPU_STATE_CONFIGURED; + cpu_set_polarization(cpu, POLARIZATION_UNKNOWN); + topology_expect_change(); + break; + default: + break; + } +out: + mutex_unlock(&smp_cpu_state_mutex); + put_online_cpus(); + return rc ? rc : count; +} +static DEVICE_ATTR(configure, 0644, cpu_configure_show, cpu_configure_store); +#endif /* CONFIG_HOTPLUG_CPU */ + +static ssize_t show_cpu_address(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", pcpu_devices[dev->id].address); +} +static DEVICE_ATTR(address, 0444, show_cpu_address, NULL); + +static struct attribute *cpu_common_attrs[] = { +#ifdef CONFIG_HOTPLUG_CPU + &dev_attr_configure.attr, +#endif + &dev_attr_address.attr, + NULL, +}; + +static struct attribute_group cpu_common_attr_group = { + .attrs = cpu_common_attrs, +}; + +static ssize_t show_capability(struct device *dev, + struct device_attribute *attr, char *buf) +{ + unsigned int capability; + int rc; + + rc = get_cpu_capability(&capability); + if (rc) + return rc; + return sprintf(buf, "%u\n", capability); +} +static DEVICE_ATTR(capability, 0444, show_capability, NULL); + +static ssize_t show_idle_count(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id); + unsigned long long idle_count; + unsigned int sequence; + + do { + sequence = ACCESS_ONCE(idle->sequence); + idle_count = ACCESS_ONCE(idle->idle_count); + if (ACCESS_ONCE(idle->idle_enter)) + idle_count++; + } while ((sequence & 1) || (idle->sequence != sequence)); + return sprintf(buf, "%llu\n", idle_count); +} +static DEVICE_ATTR(idle_count, 0444, show_idle_count, NULL); + +static ssize_t show_idle_time(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id); + unsigned long long now, idle_time, idle_enter, idle_exit; + unsigned int sequence; + + do { + now = get_clock(); + sequence = ACCESS_ONCE(idle->sequence); + idle_time = ACCESS_ONCE(idle->idle_time); + idle_enter = ACCESS_ONCE(idle->idle_enter); + idle_exit = ACCESS_ONCE(idle->idle_exit); + } while ((sequence & 1) || (idle->sequence != sequence)); + idle_time += idle_enter ? ((idle_exit ? : now) - idle_enter) : 0; + return sprintf(buf, "%llu\n", idle_time >> 12); +} +static DEVICE_ATTR(idle_time_us, 0444, show_idle_time, NULL); + +static struct attribute *cpu_online_attrs[] = { + &dev_attr_capability.attr, + &dev_attr_idle_count.attr, + &dev_attr_idle_time_us.attr, + NULL, +}; + +static struct attribute_group cpu_online_attr_group = { + .attrs = cpu_online_attrs, +}; + +static int __cpuinit smp_cpu_notify(struct notifier_block *self, + unsigned long action, void *hcpu) +{ + unsigned int cpu = (unsigned int)(long)hcpu; + struct cpu *c = &pcpu_devices[cpu].cpu; + struct device *s = &c->dev; + struct s390_idle_data *idle; + int err = 0; + + switch (action) { + case CPU_ONLINE: + case CPU_ONLINE_FROZEN: + idle = &per_cpu(s390_idle, cpu); + memset(idle, 0, sizeof(struct s390_idle_data)); + err = sysfs_create_group(&s->kobj, &cpu_online_attr_group); + break; + case CPU_DEAD: + case CPU_DEAD_FROZEN: + sysfs_remove_group(&s->kobj, &cpu_online_attr_group); + break; + } + return notifier_from_errno(err); +} + +static struct notifier_block __cpuinitdata smp_cpu_nb = { + .notifier_call = smp_cpu_notify, +}; + +static int __devinit smp_add_present_cpu(int cpu) +{ + struct cpu *c = &pcpu_devices[cpu].cpu; + struct device *s = &c->dev; + int rc; + + c->hotpluggable = 1; + rc = register_cpu(c, cpu); + if (rc) + goto out; + rc = sysfs_create_group(&s->kobj, &cpu_common_attr_group); + if (rc) + goto out_cpu; + if (cpu_online(cpu)) { + rc = sysfs_create_group(&s->kobj, &cpu_online_attr_group); + if (rc) + goto out_online; + } + rc = topology_cpu_init(c); + if (rc) + goto out_topology; + return 0; + +out_topology: + if (cpu_online(cpu)) + sysfs_remove_group(&s->kobj, &cpu_online_attr_group); +out_online: + sysfs_remove_group(&s->kobj, &cpu_common_attr_group); +out_cpu: +#ifdef CONFIG_HOTPLUG_CPU + unregister_cpu(c); +#endif +out: + return rc; +} + +#ifdef CONFIG_HOTPLUG_CPU + +int __ref smp_rescan_cpus(void) +{ + struct sclp_cpu_info *info; + int nr; + + info = smp_get_cpu_info(); + if (!info) + return -ENOMEM; + get_online_cpus(); + mutex_lock(&smp_cpu_state_mutex); + nr = __smp_rescan_cpus(info, 1); + mutex_unlock(&smp_cpu_state_mutex); + put_online_cpus(); + kfree(info); + if (nr) + topology_schedule_update(); + return 0; +} + +static ssize_t __ref rescan_store(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t count) +{ + int rc; + + rc = smp_rescan_cpus(); + return rc ? rc : count; +} +static DEVICE_ATTR(rescan, 0200, NULL, rescan_store); +#endif /* CONFIG_HOTPLUG_CPU */ + +static int __init s390_smp_init(void) +{ + int cpu, rc; + + register_cpu_notifier(&smp_cpu_nb); +#ifdef CONFIG_HOTPLUG_CPU + rc = device_create_file(cpu_subsys.dev_root, &dev_attr_rescan); + if (rc) + return rc; +#endif + for_each_present_cpu(cpu) { + rc = smp_add_present_cpu(cpu); + if (rc) + return rc; + } + return 0; +} +subsys_initcall(s390_smp_init); diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c new file mode 100644 index 00000000..8841919e --- /dev/null +++ b/arch/s390/kernel/stacktrace.c @@ -0,0 +1,98 @@ +/* + * arch/s390/kernel/stacktrace.c + * + * Stack trace management functions + * + * Copyright (C) IBM Corp. 2006 + * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com> + */ + +#include <linux/sched.h> +#include <linux/stacktrace.h> +#include <linux/kallsyms.h> +#include <linux/module.h> + +static unsigned long save_context_stack(struct stack_trace *trace, + unsigned long sp, + unsigned long low, + unsigned long high, + int savesched) +{ + struct stack_frame *sf; + struct pt_regs *regs; + unsigned long addr; + + while(1) { + sp &= PSW_ADDR_INSN; + if (sp < low || sp > high) + return sp; + sf = (struct stack_frame *)sp; + while(1) { + addr = sf->gprs[8] & PSW_ADDR_INSN; + if (!trace->skip) + trace->entries[trace->nr_entries++] = addr; + else + trace->skip--; + if (trace->nr_entries >= trace->max_entries) + return sp; + low = sp; + sp = sf->back_chain & PSW_ADDR_INSN; + if (!sp) + break; + if (sp <= low || sp > high - sizeof(*sf)) + return sp; + sf = (struct stack_frame *)sp; + } + /* Zero backchain detected, check for interrupt frame. */ + sp = (unsigned long)(sf + 1); + if (sp <= low || sp > high - sizeof(*regs)) + return sp; + regs = (struct pt_regs *)sp; + addr = regs->psw.addr & PSW_ADDR_INSN; + if (savesched || !in_sched_functions(addr)) { + if (!trace->skip) + trace->entries[trace->nr_entries++] = addr; + else + trace->skip--; + } + if (trace->nr_entries >= trace->max_entries) + return sp; + low = sp; + sp = regs->gprs[15]; + } +} + +void save_stack_trace(struct stack_trace *trace) +{ + register unsigned long sp asm ("15"); + unsigned long orig_sp, new_sp; + + orig_sp = sp & PSW_ADDR_INSN; + new_sp = save_context_stack(trace, orig_sp, + S390_lowcore.panic_stack - PAGE_SIZE, + S390_lowcore.panic_stack, 1); + if (new_sp != orig_sp) + return; + new_sp = save_context_stack(trace, new_sp, + S390_lowcore.async_stack - ASYNC_SIZE, + S390_lowcore.async_stack, 1); + if (new_sp != orig_sp) + return; + save_context_stack(trace, new_sp, + S390_lowcore.thread_info, + S390_lowcore.thread_info + THREAD_SIZE, 1); +} +EXPORT_SYMBOL_GPL(save_stack_trace); + +void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) +{ + unsigned long sp, low, high; + + sp = tsk->thread.ksp & PSW_ADDR_INSN; + low = (unsigned long) task_stack_page(tsk); + high = (unsigned long) task_pt_regs(tsk); + save_context_stack(trace, sp, low, high, 0); + if (trace->nr_entries < trace->max_entries) + trace->entries[trace->nr_entries++] = ULONG_MAX; +} +EXPORT_SYMBOL_GPL(save_stack_trace_tsk); diff --git a/arch/s390/kernel/suspend.c b/arch/s390/kernel/suspend.c new file mode 100644 index 00000000..aa1494d0 --- /dev/null +++ b/arch/s390/kernel/suspend.c @@ -0,0 +1,182 @@ +/* + * Suspend support specific for s390. + * + * Copyright IBM Corp. 2009 + * + * Author(s): Hans-Joachim Picht <hans@linux.vnet.ibm.com> + */ + +#include <linux/pfn.h> +#include <linux/suspend.h> +#include <linux/mm.h> +#include <asm/ctl_reg.h> + +/* + * References to section boundaries + */ +extern const void __nosave_begin, __nosave_end; + +/* + * The restore of the saved pages in an hibernation image will set + * the change and referenced bits in the storage key for each page. + * Overindication of the referenced bits after an hibernation cycle + * does not cause any harm but the overindication of the change bits + * would cause trouble. + * Use the ARCH_SAVE_PAGE_KEYS hooks to save the storage key of each + * page to the most significant byte of the associated page frame + * number in the hibernation image. + */ + +/* + * Key storage is allocated as a linked list of pages. + * The size of the keys array is (PAGE_SIZE - sizeof(long)) + */ +struct page_key_data { + struct page_key_data *next; + unsigned char data[]; +}; + +#define PAGE_KEY_DATA_SIZE (PAGE_SIZE - sizeof(struct page_key_data *)) + +static struct page_key_data *page_key_data; +static struct page_key_data *page_key_rp, *page_key_wp; +static unsigned long page_key_rx, page_key_wx; + +/* + * For each page in the hibernation image one additional byte is + * stored in the most significant byte of the page frame number. + * On suspend no additional memory is required but on resume the + * keys need to be memorized until the page data has been restored. + * Only then can the storage keys be set to their old state. + */ +unsigned long page_key_additional_pages(unsigned long pages) +{ + return DIV_ROUND_UP(pages, PAGE_KEY_DATA_SIZE); +} + +/* + * Free page_key_data list of arrays. + */ +void page_key_free(void) +{ + struct page_key_data *pkd; + + while (page_key_data) { + pkd = page_key_data; + page_key_data = pkd->next; + free_page((unsigned long) pkd); + } +} + +/* + * Allocate page_key_data list of arrays with enough room to store + * one byte for each page in the hibernation image. + */ +int page_key_alloc(unsigned long pages) +{ + struct page_key_data *pk; + unsigned long size; + + size = DIV_ROUND_UP(pages, PAGE_KEY_DATA_SIZE); + while (size--) { + pk = (struct page_key_data *) get_zeroed_page(GFP_KERNEL); + if (!pk) { + page_key_free(); + return -ENOMEM; + } + pk->next = page_key_data; + page_key_data = pk; + } + page_key_rp = page_key_wp = page_key_data; + page_key_rx = page_key_wx = 0; + return 0; +} + +/* + * Save the storage key into the upper 8 bits of the page frame number. + */ +void page_key_read(unsigned long *pfn) +{ + unsigned long addr; + + addr = (unsigned long) page_address(pfn_to_page(*pfn)); + *(unsigned char *) pfn = (unsigned char) page_get_storage_key(addr); +} + +/* + * Extract the storage key from the upper 8 bits of the page frame number + * and store it in the page_key_data list of arrays. + */ +void page_key_memorize(unsigned long *pfn) +{ + page_key_wp->data[page_key_wx] = *(unsigned char *) pfn; + *(unsigned char *) pfn = 0; + if (++page_key_wx < PAGE_KEY_DATA_SIZE) + return; + page_key_wp = page_key_wp->next; + page_key_wx = 0; +} + +/* + * Get the next key from the page_key_data list of arrays and set the + * storage key of the page referred by @address. If @address refers to + * a "safe" page the swsusp_arch_resume code will transfer the storage + * key from the buffer page to the original page. + */ +void page_key_write(void *address) +{ + page_set_storage_key((unsigned long) address, + page_key_rp->data[page_key_rx], 0); + if (++page_key_rx >= PAGE_KEY_DATA_SIZE) + return; + page_key_rp = page_key_rp->next; + page_key_rx = 0; +} + +int pfn_is_nosave(unsigned long pfn) +{ + unsigned long nosave_begin_pfn = PFN_DOWN(__pa(&__nosave_begin)); + unsigned long nosave_end_pfn = PFN_DOWN(__pa(&__nosave_end)); + + /* Always save lowcore pages (LC protection might be enabled). */ + if (pfn <= LC_PAGES) + return 0; + if (pfn >= nosave_begin_pfn && pfn < nosave_end_pfn) + return 1; + /* Skip memory holes and read-only pages (NSS, DCSS, ...). */ + if (tprot(PFN_PHYS(pfn))) + return 1; + return 0; +} + +void save_processor_state(void) +{ + /* swsusp_arch_suspend() actually saves all cpu register contents. + * Machine checks must be disabled since swsusp_arch_suspend() stores + * register contents to their lowcore save areas. That's the same + * place where register contents on machine checks would be saved. + * To avoid register corruption disable machine checks. + * We must also disable machine checks in the new psw mask for + * program checks, since swsusp_arch_suspend() may generate program + * checks. Disabling machine checks for all other new psw masks is + * just paranoia. + */ + local_mcck_disable(); + /* Disable lowcore protection */ + __ctl_clear_bit(0,28); + S390_lowcore.external_new_psw.mask &= ~PSW_MASK_MCHECK; + S390_lowcore.svc_new_psw.mask &= ~PSW_MASK_MCHECK; + S390_lowcore.io_new_psw.mask &= ~PSW_MASK_MCHECK; + S390_lowcore.program_new_psw.mask &= ~PSW_MASK_MCHECK; +} + +void restore_processor_state(void) +{ + S390_lowcore.external_new_psw.mask |= PSW_MASK_MCHECK; + S390_lowcore.svc_new_psw.mask |= PSW_MASK_MCHECK; + S390_lowcore.io_new_psw.mask |= PSW_MASK_MCHECK; + S390_lowcore.program_new_psw.mask |= PSW_MASK_MCHECK; + /* Enable lowcore protection */ + __ctl_set_bit(0,28); + local_mcck_enable(); +} diff --git a/arch/s390/kernel/swsusp_asm64.S b/arch/s390/kernel/swsusp_asm64.S new file mode 100644 index 00000000..dd70ef04 --- /dev/null +++ b/arch/s390/kernel/swsusp_asm64.S @@ -0,0 +1,285 @@ +/* + * S390 64-bit swsusp implementation + * + * Copyright IBM Corp. 2009 + * + * Author(s): Hans-Joachim Picht <hans@linux.vnet.ibm.com> + * Michael Holzheu <holzheu@linux.vnet.ibm.com> + */ + +#include <linux/linkage.h> +#include <asm/page.h> +#include <asm/ptrace.h> +#include <asm/thread_info.h> +#include <asm/asm-offsets.h> + +/* + * Save register context in absolute 0 lowcore and call swsusp_save() to + * create in-memory kernel image. The context is saved in the designated + * "store status" memory locations (see POP). + * We return from this function twice. The first time during the suspend to + * disk process. The second time via the swsusp_arch_resume() function + * (see below) in the resume process. + * This function runs with disabled interrupts. + */ + .section .text +ENTRY(swsusp_arch_suspend) + stmg %r6,%r15,__SF_GPRS(%r15) + lgr %r1,%r15 + aghi %r15,-STACK_FRAME_OVERHEAD + stg %r1,__SF_BACKCHAIN(%r15) + + /* Deactivate DAT */ + stnsm __SF_EMPTY(%r15),0xfb + + /* Store prefix register on stack */ + stpx __SF_EMPTY(%r15) + + /* Save prefix register contents for lowcore */ + llgf %r4,__SF_EMPTY(%r15) + + /* Get pointer to save area */ + lghi %r1,0x1000 + + /* Save CPU address */ + stap __LC_EXT_CPU_ADDR(%r0) + + /* Store registers */ + mvc 0x318(4,%r1),__SF_EMPTY(%r15) /* move prefix to lowcore */ + stfpc 0x31c(%r1) /* store fpu control */ + std 0,0x200(%r1) /* store f0 */ + std 1,0x208(%r1) /* store f1 */ + std 2,0x210(%r1) /* store f2 */ + std 3,0x218(%r1) /* store f3 */ + std 4,0x220(%r1) /* store f4 */ + std 5,0x228(%r1) /* store f5 */ + std 6,0x230(%r1) /* store f6 */ + std 7,0x238(%r1) /* store f7 */ + std 8,0x240(%r1) /* store f8 */ + std 9,0x248(%r1) /* store f9 */ + std 10,0x250(%r1) /* store f10 */ + std 11,0x258(%r1) /* store f11 */ + std 12,0x260(%r1) /* store f12 */ + std 13,0x268(%r1) /* store f13 */ + std 14,0x270(%r1) /* store f14 */ + std 15,0x278(%r1) /* store f15 */ + stam %a0,%a15,0x340(%r1) /* store access registers */ + stctg %c0,%c15,0x380(%r1) /* store control registers */ + stmg %r0,%r15,0x280(%r1) /* store general registers */ + + stpt 0x328(%r1) /* store timer */ + stck __SF_EMPTY(%r15) /* store clock */ + stckc 0x330(%r1) /* store clock comparator */ + + /* Update cputime accounting before going to sleep */ + lg %r0,__LC_LAST_UPDATE_TIMER + slg %r0,0x328(%r1) + alg %r0,__LC_SYSTEM_TIMER + stg %r0,__LC_SYSTEM_TIMER + mvc __LC_LAST_UPDATE_TIMER(8),0x328(%r1) + lg %r0,__LC_LAST_UPDATE_CLOCK + slg %r0,__SF_EMPTY(%r15) + alg %r0,__LC_STEAL_TIMER + stg %r0,__LC_STEAL_TIMER + mvc __LC_LAST_UPDATE_CLOCK(8),__SF_EMPTY(%r15) + + /* Activate DAT */ + stosm __SF_EMPTY(%r15),0x04 + + /* Set prefix page to zero */ + xc __SF_EMPTY(4,%r15),__SF_EMPTY(%r15) + spx __SF_EMPTY(%r15) + + lghi %r2,0 + lghi %r3,2*PAGE_SIZE + lghi %r5,2*PAGE_SIZE +1: mvcle %r2,%r4,0 + jo 1b + + /* Save image */ + brasl %r14,swsusp_save + + /* Restore prefix register and return */ + lghi %r1,0x1000 + spx 0x318(%r1) + lmg %r6,%r15,STACK_FRAME_OVERHEAD + __SF_GPRS(%r15) + lghi %r2,0 + br %r14 + +/* + * Restore saved memory image to correct place and restore register context. + * Then we return to the function that called swsusp_arch_suspend(). + * swsusp_arch_resume() runs with disabled interrupts. + */ +ENTRY(swsusp_arch_resume) + stmg %r6,%r15,__SF_GPRS(%r15) + lgr %r1,%r15 + aghi %r15,-STACK_FRAME_OVERHEAD + stg %r1,__SF_BACKCHAIN(%r15) + + /* Make all free pages stable */ + lghi %r2,1 + brasl %r14,arch_set_page_states + + /* Deactivate DAT */ + stnsm __SF_EMPTY(%r15),0xfb + + /* Set prefix page to zero */ + xc __SF_EMPTY(4,%r15),__SF_EMPTY(%r15) + spx __SF_EMPTY(%r15) + + /* Restore saved image */ + larl %r1,restore_pblist + lg %r1,0(%r1) + ltgr %r1,%r1 + jz 2f +0: + lg %r2,8(%r1) + lg %r4,0(%r1) + iske %r0,%r4 + lghi %r3,PAGE_SIZE + lghi %r5,PAGE_SIZE +1: + mvcle %r2,%r4,0 + jo 1b + lg %r2,8(%r1) + sske %r0,%r2 + lg %r1,16(%r1) + ltgr %r1,%r1 + jnz 0b +2: + ptlb /* flush tlb */ + + /* Reset System */ + larl %r1,restart_entry + larl %r2,.Lrestart_diag308_psw + og %r1,0(%r2) + stg %r1,0(%r0) + larl %r1,.Lnew_pgm_check_psw + epsw %r2,%r3 + stm %r2,%r3,0(%r1) + mvc __LC_PGM_NEW_PSW(16,%r0),0(%r1) + lghi %r0,0 + diag %r0,%r0,0x308 +restart_entry: + lhi %r1,1 + sigp %r1,%r0,0x12 + sam64 + larl %r1,.Lnew_pgm_check_psw + lpswe 0(%r1) +pgm_check_entry: + + /* Switch to original suspend CPU */ + larl %r1,.Lresume_cpu /* Resume CPU address: r2 */ + stap 0(%r1) + llgh %r2,0(%r1) + llgh %r1,__LC_EXT_CPU_ADDR(%r0) /* Suspend CPU address: r1 */ + cgr %r1,%r2 + je restore_registers /* r1 = r2 -> nothing to do */ + larl %r4,.Lrestart_suspend_psw /* Set new restart PSW */ + mvc __LC_RST_NEW_PSW(16,%r0),0(%r4) +3: + sigp %r9,%r1,11 /* sigp initial cpu reset */ + brc 8,4f /* accepted */ + brc 2,3b /* busy, try again */ + + /* Suspend CPU not available -> panic */ + larl %r15,init_thread_union + ahi %r15,1<<(PAGE_SHIFT+THREAD_ORDER) + larl %r2,.Lpanic_string + larl %r3,_sclp_print_early + lghi %r1,0 + sam31 + sigp %r1,%r0,0x12 + basr %r14,%r3 + larl %r3,.Ldisabled_wait_31 + lpsw 0(%r3) +4: + /* Switch to suspend CPU */ + sigp %r9,%r1,6 /* sigp restart to suspend CPU */ + brc 2,4b /* busy, try again */ +5: + sigp %r9,%r2,5 /* sigp stop to current resume CPU */ + brc 2,5b /* busy, try again */ +6: j 6b + +restart_suspend: + larl %r1,.Lresume_cpu + llgh %r2,0(%r1) +7: + sigp %r9,%r2,1 /* sigp sense, wait for resume CPU */ + brc 8,7b /* accepted, status 0, still running */ + brc 2,7b /* busy, try again */ + tmll %r9,0x40 /* Test if resume CPU is stopped */ + jz 7b + +restore_registers: + /* Restore registers */ + lghi %r13,0x1000 /* %r1 = pointer to save area */ + + /* Ignore time spent in suspended state. */ + llgf %r1,0x318(%r13) + stck __LC_LAST_UPDATE_CLOCK(%r1) + spt 0x328(%r13) /* reprogram timer */ + //sckc 0x330(%r13) /* set clock comparator */ + + lctlg %c0,%c15,0x380(%r13) /* load control registers */ + lam %a0,%a15,0x340(%r13) /* load access registers */ + + lfpc 0x31c(%r13) /* load fpu control */ + ld 0,0x200(%r13) /* load f0 */ + ld 1,0x208(%r13) /* load f1 */ + ld 2,0x210(%r13) /* load f2 */ + ld 3,0x218(%r13) /* load f3 */ + ld 4,0x220(%r13) /* load f4 */ + ld 5,0x228(%r13) /* load f5 */ + ld 6,0x230(%r13) /* load f6 */ + ld 7,0x238(%r13) /* load f7 */ + ld 8,0x240(%r13) /* load f8 */ + ld 9,0x248(%r13) /* load f9 */ + ld 10,0x250(%r13) /* load f10 */ + ld 11,0x258(%r13) /* load f11 */ + ld 12,0x260(%r13) /* load f12 */ + ld 13,0x268(%r13) /* load f13 */ + ld 14,0x270(%r13) /* load f14 */ + ld 15,0x278(%r13) /* load f15 */ + + /* Load old stack */ + lg %r15,0x2f8(%r13) + + /* Restore prefix register */ + spx 0x318(%r13) + + /* Activate DAT */ + stosm __SF_EMPTY(%r15),0x04 + + /* Make all free pages unstable */ + lghi %r2,0 + brasl %r14,arch_set_page_states + + /* Log potential guest relocation */ + brasl %r14,lgr_info_log + + /* Reinitialize the channel subsystem */ + brasl %r14,channel_subsystem_reinit + + /* Return 0 */ + lmg %r6,%r15,STACK_FRAME_OVERHEAD + __SF_GPRS(%r15) + lghi %r2,0 + br %r14 + + .section .data..nosave,"aw",@progbits + .align 8 +.Ldisabled_wait_31: + .long 0x000a0000,0x00000000 +.Lpanic_string: + .asciz "Resume not possible because suspend CPU is no longer available" + .align 8 +.Lrestart_diag308_psw: + .long 0x00080000,0x80000000 +.Lrestart_suspend_psw: + .quad 0x0000000180000000,restart_suspend +.Lnew_pgm_check_psw: + .quad 0,pgm_check_entry +.Lresume_cpu: + .byte 0,0 diff --git a/arch/s390/kernel/sys_s390.c b/arch/s390/kernel/sys_s390.c new file mode 100644 index 00000000..78ea1948 --- /dev/null +++ b/arch/s390/kernel/sys_s390.c @@ -0,0 +1,151 @@ +/* + * arch/s390/kernel/sys_s390.c + * + * S390 version + * Copyright (C) 1999,2000 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), + * Thomas Spatzier (tspat@de.ibm.com) + * + * Derived from "arch/i386/kernel/sys_i386.c" + * + * This file contains various random system calls that + * have a non-standard calling sequence on the Linux/s390 + * platform. + */ + +#include <linux/errno.h> +#include <linux/sched.h> +#include <linux/mm.h> +#include <linux/fs.h> +#include <linux/smp.h> +#include <linux/sem.h> +#include <linux/msg.h> +#include <linux/shm.h> +#include <linux/stat.h> +#include <linux/syscalls.h> +#include <linux/mman.h> +#include <linux/file.h> +#include <linux/utsname.h> +#include <linux/personality.h> +#include <linux/unistd.h> +#include <linux/ipc.h> +#include <asm/uaccess.h> +#include "entry.h" + +/* + * Perform the mmap() system call. Linux for S/390 isn't able to handle more + * than 5 system call parameters, so this system call uses a memory block + * for parameter passing. + */ + +struct s390_mmap_arg_struct { + unsigned long addr; + unsigned long len; + unsigned long prot; + unsigned long flags; + unsigned long fd; + unsigned long offset; +}; + +SYSCALL_DEFINE1(mmap2, struct s390_mmap_arg_struct __user *, arg) +{ + struct s390_mmap_arg_struct a; + int error = -EFAULT; + + if (copy_from_user(&a, arg, sizeof(a))) + goto out; + error = sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, a.offset); +out: + return error; +} + +/* + * sys_ipc() is the de-multiplexer for the SysV IPC calls. + */ +SYSCALL_DEFINE5(s390_ipc, uint, call, int, first, unsigned long, second, + unsigned long, third, void __user *, ptr) +{ + if (call >> 16) + return -EINVAL; + /* The s390 sys_ipc variant has only five parameters instead of six + * like the generic variant. The only difference is the handling of + * the SEMTIMEDOP subcall where on s390 the third parameter is used + * as a pointer to a struct timespec where the generic variant uses + * the fifth parameter. + * Therefore we can call the generic variant by simply passing the + * third parameter also as fifth parameter. + */ + return sys_ipc(call, first, second, third, ptr, third); +} + +#ifdef CONFIG_64BIT +SYSCALL_DEFINE1(s390_personality, unsigned int, personality) +{ + unsigned int ret; + + if (current->personality == PER_LINUX32 && personality == PER_LINUX) + personality = PER_LINUX32; + ret = sys_personality(personality); + if (ret == PER_LINUX32) + ret = PER_LINUX; + + return ret; +} +#endif /* CONFIG_64BIT */ + +/* + * Wrapper function for sys_fadvise64/fadvise64_64 + */ +#ifndef CONFIG_64BIT + +SYSCALL_DEFINE5(s390_fadvise64, int, fd, u32, offset_high, u32, offset_low, + size_t, len, int, advice) +{ + return sys_fadvise64(fd, (u64) offset_high << 32 | offset_low, + len, advice); +} + +struct fadvise64_64_args { + int fd; + long long offset; + long long len; + int advice; +}; + +SYSCALL_DEFINE1(s390_fadvise64_64, struct fadvise64_64_args __user *, args) +{ + struct fadvise64_64_args a; + + if ( copy_from_user(&a, args, sizeof(a)) ) + return -EFAULT; + return sys_fadvise64_64(a.fd, a.offset, a.len, a.advice); +} + +/* + * This is a wrapper to call sys_fallocate(). For 31 bit s390 the last + * 64 bit argument "len" is split into the upper and lower 32 bits. The + * system call wrapper in the user space loads the value to %r6/%r7. + * The code in entry.S keeps the values in %r2 - %r6 where they are and + * stores %r7 to 96(%r15). But the standard C linkage requires that + * the whole 64 bit value for len is stored on the stack and doesn't + * use %r6 at all. So s390_fallocate has to convert the arguments from + * %r2: fd, %r3: mode, %r4/%r5: offset, %r6/96(%r15)-99(%r15): len + * to + * %r2: fd, %r3: mode, %r4/%r5: offset, 96(%r15)-103(%r15): len + */ +SYSCALL_DEFINE(s390_fallocate)(int fd, int mode, loff_t offset, + u32 len_high, u32 len_low) +{ + return sys_fallocate(fd, mode, offset, ((u64)len_high << 32) | len_low); +} +#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS +asmlinkage long SyS_s390_fallocate(long fd, long mode, loff_t offset, + long len_high, long len_low) +{ + return SYSC_s390_fallocate((int) fd, (int) mode, offset, + (u32) len_high, (u32) len_low); +} +SYSCALL_ALIAS(sys_s390_fallocate, SyS_s390_fallocate); +#endif + +#endif diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S new file mode 100644 index 00000000..bcab2f04 --- /dev/null +++ b/arch/s390/kernel/syscalls.S @@ -0,0 +1,352 @@ +/* + * definitions for sys_call_table, each line represents an + * entry in the table in the form + * SYSCALL(31 bit syscall, 64 bit syscall, 31 bit emulated syscall) + * + * this file is meant to be included from entry.S and entry64.S + */ + +#define NI_SYSCALL SYSCALL(sys_ni_syscall,sys_ni_syscall,sys_ni_syscall) + +NI_SYSCALL /* 0 */ +SYSCALL(sys_exit,sys_exit,sys32_exit_wrapper) +SYSCALL(sys_fork,sys_fork,sys_fork) +SYSCALL(sys_read,sys_read,sys32_read_wrapper) +SYSCALL(sys_write,sys_write,sys32_write_wrapper) +SYSCALL(sys_open,sys_open,sys32_open_wrapper) /* 5 */ +SYSCALL(sys_close,sys_close,sys32_close_wrapper) +SYSCALL(sys_restart_syscall,sys_restart_syscall,sys_restart_syscall) +SYSCALL(sys_creat,sys_creat,sys32_creat_wrapper) +SYSCALL(sys_link,sys_link,sys32_link_wrapper) +SYSCALL(sys_unlink,sys_unlink,sys32_unlink_wrapper) /* 10 */ +SYSCALL(sys_execve,sys_execve,sys32_execve_wrapper) +SYSCALL(sys_chdir,sys_chdir,sys32_chdir_wrapper) +SYSCALL(sys_time,sys_ni_syscall,sys32_time_wrapper) /* old time syscall */ +SYSCALL(sys_mknod,sys_mknod,sys32_mknod_wrapper) +SYSCALL(sys_chmod,sys_chmod,sys32_chmod_wrapper) /* 15 */ +SYSCALL(sys_lchown16,sys_ni_syscall,sys32_lchown16_wrapper) /* old lchown16 syscall*/ +NI_SYSCALL /* old break syscall holder */ +NI_SYSCALL /* old stat syscall holder */ +SYSCALL(sys_lseek,sys_lseek,sys32_lseek_wrapper) +SYSCALL(sys_getpid,sys_getpid,sys_getpid) /* 20 */ +SYSCALL(sys_mount,sys_mount,sys32_mount_wrapper) +SYSCALL(sys_oldumount,sys_oldumount,sys32_oldumount_wrapper) +SYSCALL(sys_setuid16,sys_ni_syscall,sys32_setuid16_wrapper) /* old setuid16 syscall*/ +SYSCALL(sys_getuid16,sys_ni_syscall,sys32_getuid16) /* old getuid16 syscall*/ +SYSCALL(sys_stime,sys_ni_syscall,sys32_stime_wrapper) /* 25 old stime syscall */ +SYSCALL(sys_ptrace,sys_ptrace,sys32_ptrace_wrapper) +SYSCALL(sys_alarm,sys_alarm,sys32_alarm_wrapper) +NI_SYSCALL /* old fstat syscall */ +SYSCALL(sys_pause,sys_pause,sys_pause) +SYSCALL(sys_utime,sys_utime,compat_sys_utime_wrapper) /* 30 */ +NI_SYSCALL /* old stty syscall */ +NI_SYSCALL /* old gtty syscall */ +SYSCALL(sys_access,sys_access,sys32_access_wrapper) +SYSCALL(sys_nice,sys_nice,sys32_nice_wrapper) +NI_SYSCALL /* 35 old ftime syscall */ +SYSCALL(sys_sync,sys_sync,sys_sync) +SYSCALL(sys_kill,sys_kill,sys32_kill_wrapper) +SYSCALL(sys_rename,sys_rename,sys32_rename_wrapper) +SYSCALL(sys_mkdir,sys_mkdir,sys32_mkdir_wrapper) +SYSCALL(sys_rmdir,sys_rmdir,sys32_rmdir_wrapper) /* 40 */ +SYSCALL(sys_dup,sys_dup,sys32_dup_wrapper) +SYSCALL(sys_pipe,sys_pipe,sys32_pipe_wrapper) +SYSCALL(sys_times,sys_times,compat_sys_times_wrapper) +NI_SYSCALL /* old prof syscall */ +SYSCALL(sys_brk,sys_brk,sys32_brk_wrapper) /* 45 */ +SYSCALL(sys_setgid16,sys_ni_syscall,sys32_setgid16_wrapper) /* old setgid16 syscall*/ +SYSCALL(sys_getgid16,sys_ni_syscall,sys32_getgid16) /* old getgid16 syscall*/ +SYSCALL(sys_signal,sys_signal,sys32_signal_wrapper) +SYSCALL(sys_geteuid16,sys_ni_syscall,sys32_geteuid16) /* old geteuid16 syscall */ +SYSCALL(sys_getegid16,sys_ni_syscall,sys32_getegid16) /* 50 old getegid16 syscall */ +SYSCALL(sys_acct,sys_acct,sys32_acct_wrapper) +SYSCALL(sys_umount,sys_umount,sys32_umount_wrapper) +NI_SYSCALL /* old lock syscall */ +SYSCALL(sys_ioctl,sys_ioctl,compat_sys_ioctl_wrapper) +SYSCALL(sys_fcntl,sys_fcntl,compat_sys_fcntl_wrapper) /* 55 */ +NI_SYSCALL /* intel mpx syscall */ +SYSCALL(sys_setpgid,sys_setpgid,sys32_setpgid_wrapper) +NI_SYSCALL /* old ulimit syscall */ +NI_SYSCALL /* old uname syscall */ +SYSCALL(sys_umask,sys_umask,sys32_umask_wrapper) /* 60 */ +SYSCALL(sys_chroot,sys_chroot,sys32_chroot_wrapper) +SYSCALL(sys_ustat,sys_ustat,sys32_ustat_wrapper) +SYSCALL(sys_dup2,sys_dup2,sys32_dup2_wrapper) +SYSCALL(sys_getppid,sys_getppid,sys_getppid) +SYSCALL(sys_getpgrp,sys_getpgrp,sys_getpgrp) /* 65 */ +SYSCALL(sys_setsid,sys_setsid,sys_setsid) +SYSCALL(sys_sigaction,sys_sigaction,sys32_sigaction_wrapper) +NI_SYSCALL /* old sgetmask syscall*/ +NI_SYSCALL /* old ssetmask syscall*/ +SYSCALL(sys_setreuid16,sys_ni_syscall,sys32_setreuid16_wrapper) /* old setreuid16 syscall */ +SYSCALL(sys_setregid16,sys_ni_syscall,sys32_setregid16_wrapper) /* old setregid16 syscall */ +SYSCALL(sys_sigsuspend,sys_sigsuspend,sys_sigsuspend_wrapper) +SYSCALL(sys_sigpending,sys_sigpending,compat_sys_sigpending_wrapper) +SYSCALL(sys_sethostname,sys_sethostname,sys32_sethostname_wrapper) +SYSCALL(sys_setrlimit,sys_setrlimit,compat_sys_setrlimit_wrapper) /* 75 */ +SYSCALL(sys_old_getrlimit,sys_getrlimit,compat_sys_old_getrlimit_wrapper) +SYSCALL(sys_getrusage,sys_getrusage,compat_sys_getrusage_wrapper) +SYSCALL(sys_gettimeofday,sys_gettimeofday,compat_sys_gettimeofday_wrapper) +SYSCALL(sys_settimeofday,sys_settimeofday,compat_sys_settimeofday_wrapper) +SYSCALL(sys_getgroups16,sys_ni_syscall,sys32_getgroups16_wrapper) /* 80 old getgroups16 syscall */ +SYSCALL(sys_setgroups16,sys_ni_syscall,sys32_setgroups16_wrapper) /* old setgroups16 syscall */ +NI_SYSCALL /* old select syscall */ +SYSCALL(sys_symlink,sys_symlink,sys32_symlink_wrapper) +NI_SYSCALL /* old lstat syscall */ +SYSCALL(sys_readlink,sys_readlink,sys32_readlink_wrapper) /* 85 */ +SYSCALL(sys_uselib,sys_uselib,sys32_uselib_wrapper) +SYSCALL(sys_swapon,sys_swapon,sys32_swapon_wrapper) +SYSCALL(sys_reboot,sys_reboot,sys32_reboot_wrapper) +SYSCALL(sys_ni_syscall,sys_ni_syscall,old32_readdir_wrapper) /* old readdir syscall */ +SYSCALL(sys_old_mmap,sys_old_mmap,old32_mmap_wrapper) /* 90 */ +SYSCALL(sys_munmap,sys_munmap,sys32_munmap_wrapper) +SYSCALL(sys_truncate,sys_truncate,sys32_truncate_wrapper) +SYSCALL(sys_ftruncate,sys_ftruncate,sys32_ftruncate_wrapper) +SYSCALL(sys_fchmod,sys_fchmod,sys32_fchmod_wrapper) +SYSCALL(sys_fchown16,sys_ni_syscall,sys32_fchown16_wrapper) /* 95 old fchown16 syscall*/ +SYSCALL(sys_getpriority,sys_getpriority,sys32_getpriority_wrapper) +SYSCALL(sys_setpriority,sys_setpriority,sys32_setpriority_wrapper) +NI_SYSCALL /* old profil syscall */ +SYSCALL(sys_statfs,sys_statfs,compat_sys_statfs_wrapper) +SYSCALL(sys_fstatfs,sys_fstatfs,compat_sys_fstatfs_wrapper) /* 100 */ +NI_SYSCALL /* ioperm for i386 */ +SYSCALL(sys_socketcall,sys_socketcall,compat_sys_socketcall_wrapper) +SYSCALL(sys_syslog,sys_syslog,sys32_syslog_wrapper) +SYSCALL(sys_setitimer,sys_setitimer,compat_sys_setitimer_wrapper) +SYSCALL(sys_getitimer,sys_getitimer,compat_sys_getitimer_wrapper) /* 105 */ +SYSCALL(sys_newstat,sys_newstat,compat_sys_newstat_wrapper) +SYSCALL(sys_newlstat,sys_newlstat,compat_sys_newlstat_wrapper) +SYSCALL(sys_newfstat,sys_newfstat,compat_sys_newfstat_wrapper) +NI_SYSCALL /* old uname syscall */ +SYSCALL(sys_lookup_dcookie,sys_lookup_dcookie,sys32_lookup_dcookie_wrapper) /* 110 */ +SYSCALL(sys_vhangup,sys_vhangup,sys_vhangup) +NI_SYSCALL /* old "idle" system call */ +NI_SYSCALL /* vm86old for i386 */ +SYSCALL(sys_wait4,sys_wait4,compat_sys_wait4_wrapper) +SYSCALL(sys_swapoff,sys_swapoff,sys32_swapoff_wrapper) /* 115 */ +SYSCALL(sys_sysinfo,sys_sysinfo,compat_sys_sysinfo_wrapper) +SYSCALL(sys_s390_ipc,sys_s390_ipc,sys32_ipc_wrapper) +SYSCALL(sys_fsync,sys_fsync,sys32_fsync_wrapper) +SYSCALL(sys_sigreturn,sys_sigreturn,sys32_sigreturn) +SYSCALL(sys_clone,sys_clone,sys_clone_wrapper) /* 120 */ +SYSCALL(sys_setdomainname,sys_setdomainname,sys32_setdomainname_wrapper) +SYSCALL(sys_newuname,sys_newuname,sys32_newuname_wrapper) +NI_SYSCALL /* modify_ldt for i386 */ +SYSCALL(sys_adjtimex,sys_adjtimex,compat_sys_adjtimex_wrapper) +SYSCALL(sys_mprotect,sys_mprotect,sys32_mprotect_wrapper) /* 125 */ +SYSCALL(sys_sigprocmask,sys_sigprocmask,compat_sys_sigprocmask_wrapper) +NI_SYSCALL /* old "create module" */ +SYSCALL(sys_init_module,sys_init_module,sys_init_module_wrapper) +SYSCALL(sys_delete_module,sys_delete_module,sys_delete_module_wrapper) +NI_SYSCALL /* 130: old get_kernel_syms */ +SYSCALL(sys_quotactl,sys_quotactl,sys32_quotactl_wrapper) +SYSCALL(sys_getpgid,sys_getpgid,sys32_getpgid_wrapper) +SYSCALL(sys_fchdir,sys_fchdir,sys32_fchdir_wrapper) +SYSCALL(sys_bdflush,sys_bdflush,sys32_bdflush_wrapper) +SYSCALL(sys_sysfs,sys_sysfs,sys32_sysfs_wrapper) /* 135 */ +SYSCALL(sys_personality,sys_s390_personality,sys32_personality_wrapper) +NI_SYSCALL /* for afs_syscall */ +SYSCALL(sys_setfsuid16,sys_ni_syscall,sys32_setfsuid16_wrapper) /* old setfsuid16 syscall */ +SYSCALL(sys_setfsgid16,sys_ni_syscall,sys32_setfsgid16_wrapper) /* old setfsgid16 syscall */ +SYSCALL(sys_llseek,sys_llseek,sys32_llseek_wrapper) /* 140 */ +SYSCALL(sys_getdents,sys_getdents,sys32_getdents_wrapper) +SYSCALL(sys_select,sys_select,compat_sys_select_wrapper) +SYSCALL(sys_flock,sys_flock,sys32_flock_wrapper) +SYSCALL(sys_msync,sys_msync,sys32_msync_wrapper) +SYSCALL(sys_readv,sys_readv,compat_sys_readv_wrapper) /* 145 */ +SYSCALL(sys_writev,sys_writev,compat_sys_writev_wrapper) +SYSCALL(sys_getsid,sys_getsid,sys32_getsid_wrapper) +SYSCALL(sys_fdatasync,sys_fdatasync,sys32_fdatasync_wrapper) +SYSCALL(sys_sysctl,sys_sysctl,sys32_sysctl_wrapper) +SYSCALL(sys_mlock,sys_mlock,sys32_mlock_wrapper) /* 150 */ +SYSCALL(sys_munlock,sys_munlock,sys32_munlock_wrapper) +SYSCALL(sys_mlockall,sys_mlockall,sys32_mlockall_wrapper) +SYSCALL(sys_munlockall,sys_munlockall,sys_munlockall) +SYSCALL(sys_sched_setparam,sys_sched_setparam,sys32_sched_setparam_wrapper) +SYSCALL(sys_sched_getparam,sys_sched_getparam,sys32_sched_getparam_wrapper) /* 155 */ +SYSCALL(sys_sched_setscheduler,sys_sched_setscheduler,sys32_sched_setscheduler_wrapper) +SYSCALL(sys_sched_getscheduler,sys_sched_getscheduler,sys32_sched_getscheduler_wrapper) +SYSCALL(sys_sched_yield,sys_sched_yield,sys_sched_yield) +SYSCALL(sys_sched_get_priority_max,sys_sched_get_priority_max,sys32_sched_get_priority_max_wrapper) +SYSCALL(sys_sched_get_priority_min,sys_sched_get_priority_min,sys32_sched_get_priority_min_wrapper) /* 160 */ +SYSCALL(sys_sched_rr_get_interval,sys_sched_rr_get_interval,sys32_sched_rr_get_interval_wrapper) +SYSCALL(sys_nanosleep,sys_nanosleep,compat_sys_nanosleep_wrapper) +SYSCALL(sys_mremap,sys_mremap,sys32_mremap_wrapper) +SYSCALL(sys_setresuid16,sys_ni_syscall,sys32_setresuid16_wrapper) /* old setresuid16 syscall */ +SYSCALL(sys_getresuid16,sys_ni_syscall,sys32_getresuid16_wrapper) /* 165 old getresuid16 syscall */ +NI_SYSCALL /* for vm86 */ +NI_SYSCALL /* old sys_query_module */ +SYSCALL(sys_poll,sys_poll,sys32_poll_wrapper) +NI_SYSCALL /* old nfsservctl */ +SYSCALL(sys_setresgid16,sys_ni_syscall,sys32_setresgid16_wrapper) /* 170 old setresgid16 syscall */ +SYSCALL(sys_getresgid16,sys_ni_syscall,sys32_getresgid16_wrapper) /* old getresgid16 syscall */ +SYSCALL(sys_prctl,sys_prctl,sys32_prctl_wrapper) +SYSCALL(sys_rt_sigreturn,sys_rt_sigreturn,sys32_rt_sigreturn) +SYSCALL(sys_rt_sigaction,sys_rt_sigaction,sys32_rt_sigaction_wrapper) +SYSCALL(sys_rt_sigprocmask,sys_rt_sigprocmask,sys32_rt_sigprocmask_wrapper) /* 175 */ +SYSCALL(sys_rt_sigpending,sys_rt_sigpending,sys32_rt_sigpending_wrapper) +SYSCALL(sys_rt_sigtimedwait,sys_rt_sigtimedwait,compat_sys_rt_sigtimedwait_wrapper) +SYSCALL(sys_rt_sigqueueinfo,sys_rt_sigqueueinfo,sys32_rt_sigqueueinfo_wrapper) +SYSCALL(sys_rt_sigsuspend,sys_rt_sigsuspend,compat_sys_rt_sigsuspend_wrapper) +SYSCALL(sys_pread64,sys_pread64,sys32_pread64_wrapper) /* 180 */ +SYSCALL(sys_pwrite64,sys_pwrite64,sys32_pwrite64_wrapper) +SYSCALL(sys_chown16,sys_ni_syscall,sys32_chown16_wrapper) /* old chown16 syscall */ +SYSCALL(sys_getcwd,sys_getcwd,sys32_getcwd_wrapper) +SYSCALL(sys_capget,sys_capget,sys32_capget_wrapper) +SYSCALL(sys_capset,sys_capset,sys32_capset_wrapper) /* 185 */ +SYSCALL(sys_sigaltstack,sys_sigaltstack,sys32_sigaltstack_wrapper) +SYSCALL(sys_sendfile,sys_sendfile64,sys32_sendfile_wrapper) +NI_SYSCALL /* streams1 */ +NI_SYSCALL /* streams2 */ +SYSCALL(sys_vfork,sys_vfork,sys_vfork) /* 190 */ +SYSCALL(sys_getrlimit,sys_getrlimit,compat_sys_getrlimit_wrapper) +SYSCALL(sys_mmap2,sys_mmap2,sys32_mmap2_wrapper) +SYSCALL(sys_truncate64,sys_ni_syscall,sys32_truncate64_wrapper) +SYSCALL(sys_ftruncate64,sys_ni_syscall,sys32_ftruncate64_wrapper) +SYSCALL(sys_stat64,sys_ni_syscall,sys32_stat64_wrapper) /* 195 */ +SYSCALL(sys_lstat64,sys_ni_syscall,sys32_lstat64_wrapper) +SYSCALL(sys_fstat64,sys_ni_syscall,sys32_fstat64_wrapper) +SYSCALL(sys_lchown,sys_lchown,sys32_lchown_wrapper) +SYSCALL(sys_getuid,sys_getuid,sys_getuid) +SYSCALL(sys_getgid,sys_getgid,sys_getgid) /* 200 */ +SYSCALL(sys_geteuid,sys_geteuid,sys_geteuid) +SYSCALL(sys_getegid,sys_getegid,sys_getegid) +SYSCALL(sys_setreuid,sys_setreuid,sys32_setreuid_wrapper) +SYSCALL(sys_setregid,sys_setregid,sys32_setregid_wrapper) +SYSCALL(sys_getgroups,sys_getgroups,sys32_getgroups_wrapper) /* 205 */ +SYSCALL(sys_setgroups,sys_setgroups,sys32_setgroups_wrapper) +SYSCALL(sys_fchown,sys_fchown,sys32_fchown_wrapper) +SYSCALL(sys_setresuid,sys_setresuid,sys32_setresuid_wrapper) +SYSCALL(sys_getresuid,sys_getresuid,sys32_getresuid_wrapper) +SYSCALL(sys_setresgid,sys_setresgid,sys32_setresgid_wrapper) /* 210 */ +SYSCALL(sys_getresgid,sys_getresgid,sys32_getresgid_wrapper) +SYSCALL(sys_chown,sys_chown,sys32_chown_wrapper) +SYSCALL(sys_setuid,sys_setuid,sys32_setuid_wrapper) +SYSCALL(sys_setgid,sys_setgid,sys32_setgid_wrapper) +SYSCALL(sys_setfsuid,sys_setfsuid,sys32_setfsuid_wrapper) /* 215 */ +SYSCALL(sys_setfsgid,sys_setfsgid,sys32_setfsgid_wrapper) +SYSCALL(sys_pivot_root,sys_pivot_root,sys32_pivot_root_wrapper) +SYSCALL(sys_mincore,sys_mincore,sys32_mincore_wrapper) +SYSCALL(sys_madvise,sys_madvise,sys32_madvise_wrapper) +SYSCALL(sys_getdents64,sys_getdents64,sys32_getdents64_wrapper) /* 220 */ +SYSCALL(sys_fcntl64,sys_ni_syscall,compat_sys_fcntl64_wrapper) +SYSCALL(sys_readahead,sys_readahead,sys32_readahead_wrapper) +SYSCALL(sys_sendfile64,sys_ni_syscall,sys32_sendfile64_wrapper) +SYSCALL(sys_setxattr,sys_setxattr,sys32_setxattr_wrapper) +SYSCALL(sys_lsetxattr,sys_lsetxattr,sys32_lsetxattr_wrapper) /* 225 */ +SYSCALL(sys_fsetxattr,sys_fsetxattr,sys32_fsetxattr_wrapper) +SYSCALL(sys_getxattr,sys_getxattr,sys32_getxattr_wrapper) +SYSCALL(sys_lgetxattr,sys_lgetxattr,sys32_lgetxattr_wrapper) +SYSCALL(sys_fgetxattr,sys_fgetxattr,sys32_fgetxattr_wrapper) +SYSCALL(sys_listxattr,sys_listxattr,sys32_listxattr_wrapper) /* 230 */ +SYSCALL(sys_llistxattr,sys_llistxattr,sys32_llistxattr_wrapper) +SYSCALL(sys_flistxattr,sys_flistxattr,sys32_flistxattr_wrapper) +SYSCALL(sys_removexattr,sys_removexattr,sys32_removexattr_wrapper) +SYSCALL(sys_lremovexattr,sys_lremovexattr,sys32_lremovexattr_wrapper) +SYSCALL(sys_fremovexattr,sys_fremovexattr,sys32_fremovexattr_wrapper) /* 235 */ +SYSCALL(sys_gettid,sys_gettid,sys_gettid) +SYSCALL(sys_tkill,sys_tkill,sys_tkill_wrapper) +SYSCALL(sys_futex,sys_futex,compat_sys_futex_wrapper) +SYSCALL(sys_sched_setaffinity,sys_sched_setaffinity,sys32_sched_setaffinity_wrapper) +SYSCALL(sys_sched_getaffinity,sys_sched_getaffinity,sys32_sched_getaffinity_wrapper) /* 240 */ +SYSCALL(sys_tgkill,sys_tgkill,sys_tgkill_wrapper) +NI_SYSCALL /* reserved for TUX */ +SYSCALL(sys_io_setup,sys_io_setup,sys32_io_setup_wrapper) +SYSCALL(sys_io_destroy,sys_io_destroy,sys32_io_destroy_wrapper) +SYSCALL(sys_io_getevents,sys_io_getevents,sys32_io_getevents_wrapper) /* 245 */ +SYSCALL(sys_io_submit,sys_io_submit,sys32_io_submit_wrapper) +SYSCALL(sys_io_cancel,sys_io_cancel,sys32_io_cancel_wrapper) +SYSCALL(sys_exit_group,sys_exit_group,sys32_exit_group_wrapper) +SYSCALL(sys_epoll_create,sys_epoll_create,sys_epoll_create_wrapper) +SYSCALL(sys_epoll_ctl,sys_epoll_ctl,sys_epoll_ctl_wrapper) /* 250 */ +SYSCALL(sys_epoll_wait,sys_epoll_wait,sys_epoll_wait_wrapper) +SYSCALL(sys_set_tid_address,sys_set_tid_address,sys32_set_tid_address_wrapper) +SYSCALL(sys_s390_fadvise64,sys_fadvise64_64,sys32_fadvise64_wrapper) +SYSCALL(sys_timer_create,sys_timer_create,sys32_timer_create_wrapper) +SYSCALL(sys_timer_settime,sys_timer_settime,sys32_timer_settime_wrapper) /* 255 */ +SYSCALL(sys_timer_gettime,sys_timer_gettime,sys32_timer_gettime_wrapper) +SYSCALL(sys_timer_getoverrun,sys_timer_getoverrun,sys32_timer_getoverrun_wrapper) +SYSCALL(sys_timer_delete,sys_timer_delete,sys32_timer_delete_wrapper) +SYSCALL(sys_clock_settime,sys_clock_settime,sys32_clock_settime_wrapper) +SYSCALL(sys_clock_gettime,sys_clock_gettime,sys32_clock_gettime_wrapper) /* 260 */ +SYSCALL(sys_clock_getres,sys_clock_getres,sys32_clock_getres_wrapper) +SYSCALL(sys_clock_nanosleep,sys_clock_nanosleep,sys32_clock_nanosleep_wrapper) +NI_SYSCALL /* reserved for vserver */ +SYSCALL(sys_s390_fadvise64_64,sys_ni_syscall,sys32_fadvise64_64_wrapper) +SYSCALL(sys_statfs64,sys_statfs64,compat_sys_statfs64_wrapper) +SYSCALL(sys_fstatfs64,sys_fstatfs64,compat_sys_fstatfs64_wrapper) +SYSCALL(sys_remap_file_pages,sys_remap_file_pages,sys32_remap_file_pages_wrapper) +NI_SYSCALL /* 268 sys_mbind */ +NI_SYSCALL /* 269 sys_get_mempolicy */ +NI_SYSCALL /* 270 sys_set_mempolicy */ +SYSCALL(sys_mq_open,sys_mq_open,compat_sys_mq_open_wrapper) +SYSCALL(sys_mq_unlink,sys_mq_unlink,sys32_mq_unlink_wrapper) +SYSCALL(sys_mq_timedsend,sys_mq_timedsend,compat_sys_mq_timedsend_wrapper) +SYSCALL(sys_mq_timedreceive,sys_mq_timedreceive,compat_sys_mq_timedreceive_wrapper) +SYSCALL(sys_mq_notify,sys_mq_notify,compat_sys_mq_notify_wrapper) /* 275 */ +SYSCALL(sys_mq_getsetattr,sys_mq_getsetattr,compat_sys_mq_getsetattr_wrapper) +SYSCALL(sys_kexec_load,sys_kexec_load,compat_sys_kexec_load_wrapper) +SYSCALL(sys_add_key,sys_add_key,compat_sys_add_key_wrapper) +SYSCALL(sys_request_key,sys_request_key,compat_sys_request_key_wrapper) +SYSCALL(sys_keyctl,sys_keyctl,compat_sys_keyctl_wrapper) /* 280 */ +SYSCALL(sys_waitid,sys_waitid,compat_sys_waitid_wrapper) +SYSCALL(sys_ioprio_set,sys_ioprio_set,sys_ioprio_set_wrapper) +SYSCALL(sys_ioprio_get,sys_ioprio_get,sys_ioprio_get_wrapper) +SYSCALL(sys_inotify_init,sys_inotify_init,sys_inotify_init) +SYSCALL(sys_inotify_add_watch,sys_inotify_add_watch,sys_inotify_add_watch_wrapper) /* 285 */ +SYSCALL(sys_inotify_rm_watch,sys_inotify_rm_watch,sys_inotify_rm_watch_wrapper) +NI_SYSCALL /* 287 sys_migrate_pages */ +SYSCALL(sys_openat,sys_openat,compat_sys_openat_wrapper) +SYSCALL(sys_mkdirat,sys_mkdirat,sys_mkdirat_wrapper) +SYSCALL(sys_mknodat,sys_mknodat,sys_mknodat_wrapper) /* 290 */ +SYSCALL(sys_fchownat,sys_fchownat,sys_fchownat_wrapper) +SYSCALL(sys_futimesat,sys_futimesat,compat_sys_futimesat_wrapper) +SYSCALL(sys_fstatat64,sys_newfstatat,sys32_fstatat64_wrapper) +SYSCALL(sys_unlinkat,sys_unlinkat,sys_unlinkat_wrapper) +SYSCALL(sys_renameat,sys_renameat,sys_renameat_wrapper) /* 295 */ +SYSCALL(sys_linkat,sys_linkat,sys_linkat_wrapper) +SYSCALL(sys_symlinkat,sys_symlinkat,sys_symlinkat_wrapper) +SYSCALL(sys_readlinkat,sys_readlinkat,sys_readlinkat_wrapper) +SYSCALL(sys_fchmodat,sys_fchmodat,sys_fchmodat_wrapper) +SYSCALL(sys_faccessat,sys_faccessat,sys_faccessat_wrapper) /* 300 */ +SYSCALL(sys_pselect6,sys_pselect6,compat_sys_pselect6_wrapper) +SYSCALL(sys_ppoll,sys_ppoll,compat_sys_ppoll_wrapper) +SYSCALL(sys_unshare,sys_unshare,sys_unshare_wrapper) +SYSCALL(sys_set_robust_list,sys_set_robust_list,compat_sys_set_robust_list_wrapper) +SYSCALL(sys_get_robust_list,sys_get_robust_list,compat_sys_get_robust_list_wrapper) +SYSCALL(sys_splice,sys_splice,sys_splice_wrapper) +SYSCALL(sys_sync_file_range,sys_sync_file_range,sys_sync_file_range_wrapper) +SYSCALL(sys_tee,sys_tee,sys_tee_wrapper) +SYSCALL(sys_vmsplice,sys_vmsplice,compat_sys_vmsplice_wrapper) +NI_SYSCALL /* 310 sys_move_pages */ +SYSCALL(sys_getcpu,sys_getcpu,sys_getcpu_wrapper) +SYSCALL(sys_epoll_pwait,sys_epoll_pwait,compat_sys_epoll_pwait_wrapper) +SYSCALL(sys_utimes,sys_utimes,compat_sys_utimes_wrapper) +SYSCALL(sys_s390_fallocate,sys_fallocate,sys_fallocate_wrapper) +SYSCALL(sys_utimensat,sys_utimensat,compat_sys_utimensat_wrapper) /* 315 */ +SYSCALL(sys_signalfd,sys_signalfd,compat_sys_signalfd_wrapper) +NI_SYSCALL /* 317 old sys_timer_fd */ +SYSCALL(sys_eventfd,sys_eventfd,sys_eventfd_wrapper) +SYSCALL(sys_timerfd_create,sys_timerfd_create,sys_timerfd_create_wrapper) +SYSCALL(sys_timerfd_settime,sys_timerfd_settime,compat_sys_timerfd_settime_wrapper) /* 320 */ +SYSCALL(sys_timerfd_gettime,sys_timerfd_gettime,compat_sys_timerfd_gettime_wrapper) +SYSCALL(sys_signalfd4,sys_signalfd4,compat_sys_signalfd4_wrapper) +SYSCALL(sys_eventfd2,sys_eventfd2,sys_eventfd2_wrapper) +SYSCALL(sys_inotify_init1,sys_inotify_init1,sys_inotify_init1_wrapper) +SYSCALL(sys_pipe2,sys_pipe2,sys_pipe2_wrapper) /* 325 */ +SYSCALL(sys_dup3,sys_dup3,sys_dup3_wrapper) +SYSCALL(sys_epoll_create1,sys_epoll_create1,sys_epoll_create1_wrapper) +SYSCALL(sys_preadv,sys_preadv,compat_sys_preadv_wrapper) +SYSCALL(sys_pwritev,sys_pwritev,compat_sys_pwritev_wrapper) +SYSCALL(sys_rt_tgsigqueueinfo,sys_rt_tgsigqueueinfo,compat_sys_rt_tgsigqueueinfo_wrapper) /* 330 */ +SYSCALL(sys_perf_event_open,sys_perf_event_open,sys_perf_event_open_wrapper) +SYSCALL(sys_fanotify_init,sys_fanotify_init,sys_fanotify_init_wrapper) +SYSCALL(sys_fanotify_mark,sys_fanotify_mark,sys_fanotify_mark_wrapper) +SYSCALL(sys_prlimit64,sys_prlimit64,sys_prlimit64_wrapper) +SYSCALL(sys_name_to_handle_at,sys_name_to_handle_at,sys_name_to_handle_at_wrapper) /* 335 */ +SYSCALL(sys_open_by_handle_at,sys_open_by_handle_at,compat_sys_open_by_handle_at_wrapper) +SYSCALL(sys_clock_adjtime,sys_clock_adjtime,compat_sys_clock_adjtime_wrapper) +SYSCALL(sys_syncfs,sys_syncfs,sys_syncfs_wrapper) +SYSCALL(sys_setns,sys_setns,sys_setns_wrapper) +SYSCALL(sys_process_vm_readv,sys_process_vm_readv,compat_sys_process_vm_readv_wrapper) /* 340 */ +SYSCALL(sys_process_vm_writev,sys_process_vm_writev,compat_sys_process_vm_writev_wrapper) diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c new file mode 100644 index 00000000..2a94b774 --- /dev/null +++ b/arch/s390/kernel/sysinfo.c @@ -0,0 +1,470 @@ +/* + * Copyright IBM Corp. 2001, 2009 + * Author(s): Ulrich Weigand <Ulrich.Weigand@de.ibm.com>, + * Martin Schwidefsky <schwidefsky@de.ibm.com>, + */ + +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/proc_fs.h> +#include <linux/seq_file.h> +#include <linux/init.h> +#include <linux/delay.h> +#include <linux/module.h> +#include <linux/slab.h> +#include <asm/ebcdic.h> +#include <asm/sysinfo.h> +#include <asm/cpcmd.h> +#include <asm/topology.h> + +/* Sigh, math-emu. Don't ask. */ +#include <asm/sfp-util.h> +#include <math-emu/soft-fp.h> +#include <math-emu/single.h> + +static inline int stsi_0(void) +{ + int rc = stsi(NULL, 0, 0, 0); + return rc == -ENOSYS ? rc : (((unsigned int) rc) >> 28); +} + +static int stsi_1_1_1(struct sysinfo_1_1_1 *info, char *page, int len) +{ + if (stsi(info, 1, 1, 1) == -ENOSYS) + return len; + + EBCASC(info->manufacturer, sizeof(info->manufacturer)); + EBCASC(info->type, sizeof(info->type)); + EBCASC(info->model, sizeof(info->model)); + EBCASC(info->sequence, sizeof(info->sequence)); + EBCASC(info->plant, sizeof(info->plant)); + EBCASC(info->model_capacity, sizeof(info->model_capacity)); + EBCASC(info->model_perm_cap, sizeof(info->model_perm_cap)); + EBCASC(info->model_temp_cap, sizeof(info->model_temp_cap)); + len += sprintf(page + len, "Manufacturer: %-16.16s\n", + info->manufacturer); + len += sprintf(page + len, "Type: %-4.4s\n", + info->type); + if (info->model[0] != '\0') + /* + * Sigh: the model field has been renamed with System z9 + * to model_capacity and a new model field has been added + * after the plant field. To avoid confusing older programs + * the "Model:" prints "model_capacity model" or just + * "model_capacity" if the model string is empty . + */ + len += sprintf(page + len, + "Model: %-16.16s %-16.16s\n", + info->model_capacity, info->model); + else + len += sprintf(page + len, "Model: %-16.16s\n", + info->model_capacity); + len += sprintf(page + len, "Sequence Code: %-16.16s\n", + info->sequence); + len += sprintf(page + len, "Plant: %-4.4s\n", + info->plant); + len += sprintf(page + len, "Model Capacity: %-16.16s %08u\n", + info->model_capacity, *(u32 *) info->model_cap_rating); + if (info->model_perm_cap[0] != '\0') + len += sprintf(page + len, + "Model Perm. Capacity: %-16.16s %08u\n", + info->model_perm_cap, + *(u32 *) info->model_perm_cap_rating); + if (info->model_temp_cap[0] != '\0') + len += sprintf(page + len, + "Model Temp. Capacity: %-16.16s %08u\n", + info->model_temp_cap, + *(u32 *) info->model_temp_cap_rating); + if (info->cai) { + len += sprintf(page + len, + "Capacity Adj. Ind.: %d\n", + info->cai); + len += sprintf(page + len, "Capacity Ch. Reason: %d\n", + info->ccr); + } + return len; +} + +static int stsi_15_1_x(struct sysinfo_15_1_x *info, char *page, int len) +{ + static int max_mnest; + int i, rc; + + len += sprintf(page + len, "\n"); + if (!MACHINE_HAS_TOPOLOGY) + return len; + if (max_mnest) { + stsi(info, 15, 1, max_mnest); + } else { + for (max_mnest = 6; max_mnest > 1; max_mnest--) { + rc = stsi(info, 15, 1, max_mnest); + if (rc != -ENOSYS) + break; + } + } + len += sprintf(page + len, "CPU Topology HW: "); + for (i = 0; i < TOPOLOGY_NR_MAG; i++) + len += sprintf(page + len, " %d", info->mag[i]); + len += sprintf(page + len, "\n"); +#ifdef CONFIG_SCHED_MC + store_topology(info); + len += sprintf(page + len, "CPU Topology SW: "); + for (i = 0; i < TOPOLOGY_NR_MAG; i++) + len += sprintf(page + len, " %d", info->mag[i]); + len += sprintf(page + len, "\n"); +#endif + return len; +} + +static int stsi_1_2_2(struct sysinfo_1_2_2 *info, char *page, int len) +{ + struct sysinfo_1_2_2_extension *ext; + int i; + + if (stsi(info, 1, 2, 2) == -ENOSYS) + return len; + ext = (struct sysinfo_1_2_2_extension *) + ((unsigned long) info + info->acc_offset); + + len += sprintf(page + len, "CPUs Total: %d\n", + info->cpus_total); + len += sprintf(page + len, "CPUs Configured: %d\n", + info->cpus_configured); + len += sprintf(page + len, "CPUs Standby: %d\n", + info->cpus_standby); + len += sprintf(page + len, "CPUs Reserved: %d\n", + info->cpus_reserved); + + if (info->format == 1) { + /* + * Sigh 2. According to the specification the alternate + * capability field is a 32 bit floating point number + * if the higher order 8 bits are not zero. Printing + * a floating point number in the kernel is a no-no, + * always print the number as 32 bit unsigned integer. + * The user-space needs to know about the strange + * encoding of the alternate cpu capability. + */ + len += sprintf(page + len, "Capability: %u %u\n", + info->capability, ext->alt_capability); + for (i = 2; i <= info->cpus_total; i++) + len += sprintf(page + len, + "Adjustment %02d-way: %u %u\n", + i, info->adjustment[i-2], + ext->alt_adjustment[i-2]); + + } else { + len += sprintf(page + len, "Capability: %u\n", + info->capability); + for (i = 2; i <= info->cpus_total; i++) + len += sprintf(page + len, + "Adjustment %02d-way: %u\n", + i, info->adjustment[i-2]); + } + + if (info->secondary_capability != 0) + len += sprintf(page + len, "Secondary Capability: %d\n", + info->secondary_capability); + return len; +} + +static int stsi_2_2_2(struct sysinfo_2_2_2 *info, char *page, int len) +{ + if (stsi(info, 2, 2, 2) == -ENOSYS) + return len; + + EBCASC(info->name, sizeof(info->name)); + + len += sprintf(page + len, "\n"); + len += sprintf(page + len, "LPAR Number: %d\n", + info->lpar_number); + + len += sprintf(page + len, "LPAR Characteristics: "); + if (info->characteristics & LPAR_CHAR_DEDICATED) + len += sprintf(page + len, "Dedicated "); + if (info->characteristics & LPAR_CHAR_SHARED) + len += sprintf(page + len, "Shared "); + if (info->characteristics & LPAR_CHAR_LIMITED) + len += sprintf(page + len, "Limited "); + len += sprintf(page + len, "\n"); + + len += sprintf(page + len, "LPAR Name: %-8.8s\n", + info->name); + + len += sprintf(page + len, "LPAR Adjustment: %d\n", + info->caf); + + len += sprintf(page + len, "LPAR CPUs Total: %d\n", + info->cpus_total); + len += sprintf(page + len, "LPAR CPUs Configured: %d\n", + info->cpus_configured); + len += sprintf(page + len, "LPAR CPUs Standby: %d\n", + info->cpus_standby); + len += sprintf(page + len, "LPAR CPUs Reserved: %d\n", + info->cpus_reserved); + len += sprintf(page + len, "LPAR CPUs Dedicated: %d\n", + info->cpus_dedicated); + len += sprintf(page + len, "LPAR CPUs Shared: %d\n", + info->cpus_shared); + return len; +} + +static int stsi_3_2_2(struct sysinfo_3_2_2 *info, char *page, int len) +{ + int i; + + if (stsi(info, 3, 2, 2) == -ENOSYS) + return len; + for (i = 0; i < info->count; i++) { + EBCASC(info->vm[i].name, sizeof(info->vm[i].name)); + EBCASC(info->vm[i].cpi, sizeof(info->vm[i].cpi)); + len += sprintf(page + len, "\n"); + len += sprintf(page + len, "VM%02d Name: %-8.8s\n", + i, info->vm[i].name); + len += sprintf(page + len, "VM%02d Control Program: %-16.16s\n", + i, info->vm[i].cpi); + + len += sprintf(page + len, "VM%02d Adjustment: %d\n", + i, info->vm[i].caf); + + len += sprintf(page + len, "VM%02d CPUs Total: %d\n", + i, info->vm[i].cpus_total); + len += sprintf(page + len, "VM%02d CPUs Configured: %d\n", + i, info->vm[i].cpus_configured); + len += sprintf(page + len, "VM%02d CPUs Standby: %d\n", + i, info->vm[i].cpus_standby); + len += sprintf(page + len, "VM%02d CPUs Reserved: %d\n", + i, info->vm[i].cpus_reserved); + } + return len; +} + +static int proc_read_sysinfo(char *page, char **start, + off_t off, int count, + int *eof, void *data) +{ + unsigned long info = get_zeroed_page(GFP_KERNEL); + int level, len; + + if (!info) + return 0; + + len = 0; + level = stsi_0(); + if (level >= 1) + len = stsi_1_1_1((struct sysinfo_1_1_1 *) info, page, len); + + if (level >= 1) + len = stsi_15_1_x((struct sysinfo_15_1_x *) info, page, len); + + if (level >= 1) + len = stsi_1_2_2((struct sysinfo_1_2_2 *) info, page, len); + + if (level >= 2) + len = stsi_2_2_2((struct sysinfo_2_2_2 *) info, page, len); + + if (level >= 3) + len = stsi_3_2_2((struct sysinfo_3_2_2 *) info, page, len); + + free_page(info); + return len; +} + +static __init int create_proc_sysinfo(void) +{ + create_proc_read_entry("sysinfo", 0444, NULL, + proc_read_sysinfo, NULL); + return 0; +} +device_initcall(create_proc_sysinfo); + +/* + * Service levels interface. + */ + +static DECLARE_RWSEM(service_level_sem); +static LIST_HEAD(service_level_list); + +int register_service_level(struct service_level *slr) +{ + struct service_level *ptr; + + down_write(&service_level_sem); + list_for_each_entry(ptr, &service_level_list, list) + if (ptr == slr) { + up_write(&service_level_sem); + return -EEXIST; + } + list_add_tail(&slr->list, &service_level_list); + up_write(&service_level_sem); + return 0; +} +EXPORT_SYMBOL(register_service_level); + +int unregister_service_level(struct service_level *slr) +{ + struct service_level *ptr, *next; + int rc = -ENOENT; + + down_write(&service_level_sem); + list_for_each_entry_safe(ptr, next, &service_level_list, list) { + if (ptr != slr) + continue; + list_del(&ptr->list); + rc = 0; + break; + } + up_write(&service_level_sem); + return rc; +} +EXPORT_SYMBOL(unregister_service_level); + +static void *service_level_start(struct seq_file *m, loff_t *pos) +{ + down_read(&service_level_sem); + return seq_list_start(&service_level_list, *pos); +} + +static void *service_level_next(struct seq_file *m, void *p, loff_t *pos) +{ + return seq_list_next(p, &service_level_list, pos); +} + +static void service_level_stop(struct seq_file *m, void *p) +{ + up_read(&service_level_sem); +} + +static int service_level_show(struct seq_file *m, void *p) +{ + struct service_level *slr; + + slr = list_entry(p, struct service_level, list); + slr->seq_print(m, slr); + return 0; +} + +static const struct seq_operations service_level_seq_ops = { + .start = service_level_start, + .next = service_level_next, + .stop = service_level_stop, + .show = service_level_show +}; + +static int service_level_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &service_level_seq_ops); +} + +static const struct file_operations service_level_ops = { + .open = service_level_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release +}; + +static void service_level_vm_print(struct seq_file *m, + struct service_level *slr) +{ + char *query_buffer, *str; + + query_buffer = kmalloc(1024, GFP_KERNEL | GFP_DMA); + if (!query_buffer) + return; + cpcmd("QUERY CPLEVEL", query_buffer, 1024, NULL); + str = strchr(query_buffer, '\n'); + if (str) + *str = 0; + seq_printf(m, "VM: %s\n", query_buffer); + kfree(query_buffer); +} + +static struct service_level service_level_vm = { + .seq_print = service_level_vm_print +}; + +static __init int create_proc_service_level(void) +{ + proc_create("service_levels", 0, NULL, &service_level_ops); + if (MACHINE_IS_VM) + register_service_level(&service_level_vm); + return 0; +} +subsys_initcall(create_proc_service_level); + +/* + * Bogomips calculation based on cpu capability. + */ +int get_cpu_capability(unsigned int *capability) +{ + struct sysinfo_1_2_2 *info; + int rc; + + info = (void *) get_zeroed_page(GFP_KERNEL); + if (!info) + return -ENOMEM; + rc = stsi(info, 1, 2, 2); + if (rc == -ENOSYS) + goto out; + rc = 0; + *capability = info->capability; +out: + free_page((unsigned long) info); + return rc; +} + +/* + * CPU capability might have changed. Therefore recalculate loops_per_jiffy. + */ +void s390_adjust_jiffies(void) +{ + struct sysinfo_1_2_2 *info; + const unsigned int fmil = 0x4b189680; /* 1e7 as 32-bit float. */ + FP_DECL_S(SA); FP_DECL_S(SB); FP_DECL_S(SR); + FP_DECL_EX; + unsigned int capability; + + info = (void *) get_zeroed_page(GFP_KERNEL); + if (!info) + return; + + if (stsi(info, 1, 2, 2) != -ENOSYS) { + /* + * Major sigh. The cpu capability encoding is "special". + * If the first 9 bits of info->capability are 0 then it + * is a 32 bit unsigned integer in the range 0 .. 2^23. + * If the first 9 bits are != 0 then it is a 32 bit float. + * In addition a lower value indicates a proportionally + * higher cpu capacity. Bogomips are the other way round. + * To get to a halfway suitable number we divide 1e7 + * by the cpu capability number. Yes, that means a floating + * point division .. math-emu here we come :-) + */ + FP_UNPACK_SP(SA, &fmil); + if ((info->capability >> 23) == 0) + FP_FROM_INT_S(SB, (long) info->capability, 64, long); + else + FP_UNPACK_SP(SB, &info->capability); + FP_DIV_S(SR, SA, SB); + FP_TO_INT_S(capability, SR, 32, 0); + } else + /* + * Really old machine without stsi block for basic + * cpu information. Report 42.0 bogomips. + */ + capability = 42; + loops_per_jiffy = capability * (500000/HZ); + free_page((unsigned long) info); +} + +/* + * calibrate the delay loop + */ +void __cpuinit calibrate_delay(void) +{ + s390_adjust_jiffies(); + /* Print the good old Bogomips line .. */ + printk(KERN_DEBUG "Calibrating delay loop (skipped)... " + "%lu.%02lu BogoMIPS preset\n", loops_per_jiffy/(500000/HZ), + (loops_per_jiffy/(5000/HZ)) % 100); +} diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c new file mode 100644 index 00000000..d4e1cb1d --- /dev/null +++ b/arch/s390/kernel/time.c @@ -0,0 +1,1789 @@ +/* + * arch/s390/kernel/time.c + * Time of day based timer functions. + * + * S390 version + * Copyright IBM Corp. 1999, 2008 + * Author(s): Hartmut Penner (hp@de.ibm.com), + * Martin Schwidefsky (schwidefsky@de.ibm.com), + * Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com) + * + * Derived from "arch/i386/kernel/time.c" + * Copyright (C) 1991, 1992, 1995 Linus Torvalds + */ + +#define KMSG_COMPONENT "time" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include <linux/kernel_stat.h> +#include <linux/errno.h> +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/param.h> +#include <linux/string.h> +#include <linux/mm.h> +#include <linux/interrupt.h> +#include <linux/cpu.h> +#include <linux/stop_machine.h> +#include <linux/time.h> +#include <linux/device.h> +#include <linux/delay.h> +#include <linux/init.h> +#include <linux/smp.h> +#include <linux/types.h> +#include <linux/profile.h> +#include <linux/timex.h> +#include <linux/notifier.h> +#include <linux/clocksource.h> +#include <linux/clockchips.h> +#include <linux/gfp.h> +#include <linux/kprobes.h> +#include <asm/uaccess.h> +#include <asm/delay.h> +#include <asm/div64.h> +#include <asm/vdso.h> +#include <asm/irq.h> +#include <asm/irq_regs.h> +#include <asm/timer.h> +#include <asm/etr.h> +#include <asm/cio.h> +#include "entry.h" + +/* change this if you have some constant time drift */ +#define USECS_PER_JIFFY ((unsigned long) 1000000/HZ) +#define CLK_TICKS_PER_JIFFY ((unsigned long) USECS_PER_JIFFY << 12) + +u64 sched_clock_base_cc = -1; /* Force to data section. */ +EXPORT_SYMBOL_GPL(sched_clock_base_cc); + +static DEFINE_PER_CPU(struct clock_event_device, comparators); + +/* + * Scheduler clock - returns current time in nanosec units. + */ +unsigned long long notrace __kprobes sched_clock(void) +{ + return (get_clock_monotonic() * 125) >> 9; +} + +/* + * Monotonic_clock - returns # of nanoseconds passed since time_init() + */ +unsigned long long monotonic_clock(void) +{ + return sched_clock(); +} +EXPORT_SYMBOL(monotonic_clock); + +void tod_to_timeval(__u64 todval, struct timespec *xt) +{ + unsigned long long sec; + + sec = todval >> 12; + do_div(sec, 1000000); + xt->tv_sec = sec; + todval -= (sec * 1000000) << 12; + xt->tv_nsec = ((todval * 1000) >> 12); +} +EXPORT_SYMBOL(tod_to_timeval); + +void clock_comparator_work(void) +{ + struct clock_event_device *cd; + + S390_lowcore.clock_comparator = -1ULL; + set_clock_comparator(S390_lowcore.clock_comparator); + cd = &__get_cpu_var(comparators); + cd->event_handler(cd); +} + +/* + * Fixup the clock comparator. + */ +static void fixup_clock_comparator(unsigned long long delta) +{ + /* If nobody is waiting there's nothing to fix. */ + if (S390_lowcore.clock_comparator == -1ULL) + return; + S390_lowcore.clock_comparator += delta; + set_clock_comparator(S390_lowcore.clock_comparator); +} + +static int s390_next_ktime(ktime_t expires, + struct clock_event_device *evt) +{ + struct timespec ts; + u64 nsecs; + + ts.tv_sec = ts.tv_nsec = 0; + monotonic_to_bootbased(&ts); + nsecs = ktime_to_ns(ktime_add(timespec_to_ktime(ts), expires)); + do_div(nsecs, 125); + S390_lowcore.clock_comparator = sched_clock_base_cc + (nsecs << 9); + set_clock_comparator(S390_lowcore.clock_comparator); + return 0; +} + +static void s390_set_mode(enum clock_event_mode mode, + struct clock_event_device *evt) +{ +} + +/* + * Set up lowcore and control register of the current cpu to + * enable TOD clock and clock comparator interrupts. + */ +void init_cpu_timer(void) +{ + struct clock_event_device *cd; + int cpu; + + S390_lowcore.clock_comparator = -1ULL; + set_clock_comparator(S390_lowcore.clock_comparator); + + cpu = smp_processor_id(); + cd = &per_cpu(comparators, cpu); + cd->name = "comparator"; + cd->features = CLOCK_EVT_FEAT_ONESHOT | + CLOCK_EVT_FEAT_KTIME; + cd->mult = 16777; + cd->shift = 12; + cd->min_delta_ns = 1; + cd->max_delta_ns = LONG_MAX; + cd->rating = 400; + cd->cpumask = cpumask_of(cpu); + cd->set_next_ktime = s390_next_ktime; + cd->set_mode = s390_set_mode; + + clockevents_register_device(cd); + + /* Enable clock comparator timer interrupt. */ + __ctl_set_bit(0,11); + + /* Always allow the timing alert external interrupt. */ + __ctl_set_bit(0, 4); +} + +static void clock_comparator_interrupt(struct ext_code ext_code, + unsigned int param32, + unsigned long param64) +{ + kstat_cpu(smp_processor_id()).irqs[EXTINT_CLK]++; + if (S390_lowcore.clock_comparator == -1ULL) + set_clock_comparator(S390_lowcore.clock_comparator); +} + +static void etr_timing_alert(struct etr_irq_parm *); +static void stp_timing_alert(struct stp_irq_parm *); + +static void timing_alert_interrupt(struct ext_code ext_code, + unsigned int param32, unsigned long param64) +{ + kstat_cpu(smp_processor_id()).irqs[EXTINT_TLA]++; + if (param32 & 0x00c40000) + etr_timing_alert((struct etr_irq_parm *) ¶m32); + if (param32 & 0x00038000) + stp_timing_alert((struct stp_irq_parm *) ¶m32); +} + +static void etr_reset(void); +static void stp_reset(void); + +void read_persistent_clock(struct timespec *ts) +{ + tod_to_timeval(get_clock() - TOD_UNIX_EPOCH, ts); +} + +void read_boot_clock(struct timespec *ts) +{ + tod_to_timeval(sched_clock_base_cc - TOD_UNIX_EPOCH, ts); +} + +static cycle_t read_tod_clock(struct clocksource *cs) +{ + return get_clock(); +} + +static struct clocksource clocksource_tod = { + .name = "tod", + .rating = 400, + .read = read_tod_clock, + .mask = -1ULL, + .mult = 1000, + .shift = 12, + .flags = CLOCK_SOURCE_IS_CONTINUOUS, +}; + +struct clocksource * __init clocksource_default_clock(void) +{ + return &clocksource_tod; +} + +void update_vsyscall(struct timespec *wall_time, struct timespec *wtm, + struct clocksource *clock, u32 mult) +{ + if (clock != &clocksource_tod) + return; + + /* Make userspace gettimeofday spin until we're done. */ + ++vdso_data->tb_update_count; + smp_wmb(); + vdso_data->xtime_tod_stamp = clock->cycle_last; + vdso_data->xtime_clock_sec = wall_time->tv_sec; + vdso_data->xtime_clock_nsec = wall_time->tv_nsec; + vdso_data->wtom_clock_sec = wtm->tv_sec; + vdso_data->wtom_clock_nsec = wtm->tv_nsec; + vdso_data->ntp_mult = mult; + smp_wmb(); + ++vdso_data->tb_update_count; +} + +extern struct timezone sys_tz; + +void update_vsyscall_tz(void) +{ + /* Make userspace gettimeofday spin until we're done. */ + ++vdso_data->tb_update_count; + smp_wmb(); + vdso_data->tz_minuteswest = sys_tz.tz_minuteswest; + vdso_data->tz_dsttime = sys_tz.tz_dsttime; + smp_wmb(); + ++vdso_data->tb_update_count; +} + +/* + * Initialize the TOD clock and the CPU timer of + * the boot cpu. + */ +void __init time_init(void) +{ + /* Reset time synchronization interfaces. */ + etr_reset(); + stp_reset(); + + /* request the clock comparator external interrupt */ + if (register_external_interrupt(0x1004, clock_comparator_interrupt)) + panic("Couldn't request external interrupt 0x1004"); + + /* request the timing alert external interrupt */ + if (register_external_interrupt(0x1406, timing_alert_interrupt)) + panic("Couldn't request external interrupt 0x1406"); + + if (clocksource_register(&clocksource_tod) != 0) + panic("Could not register TOD clock source"); + + /* Enable TOD clock interrupts on the boot cpu. */ + init_cpu_timer(); + + /* Enable cpu timer interrupts on the boot cpu. */ + vtime_init(); +} + +/* + * The time is "clock". old is what we think the time is. + * Adjust the value by a multiple of jiffies and add the delta to ntp. + * "delay" is an approximation how long the synchronization took. If + * the time correction is positive, then "delay" is subtracted from + * the time difference and only the remaining part is passed to ntp. + */ +static unsigned long long adjust_time(unsigned long long old, + unsigned long long clock, + unsigned long long delay) +{ + unsigned long long delta, ticks; + struct timex adjust; + + if (clock > old) { + /* It is later than we thought. */ + delta = ticks = clock - old; + delta = ticks = (delta < delay) ? 0 : delta - delay; + delta -= do_div(ticks, CLK_TICKS_PER_JIFFY); + adjust.offset = ticks * (1000000 / HZ); + } else { + /* It is earlier than we thought. */ + delta = ticks = old - clock; + delta -= do_div(ticks, CLK_TICKS_PER_JIFFY); + delta = -delta; + adjust.offset = -ticks * (1000000 / HZ); + } + sched_clock_base_cc += delta; + if (adjust.offset != 0) { + pr_notice("The ETR interface has adjusted the clock " + "by %li microseconds\n", adjust.offset); + adjust.modes = ADJ_OFFSET_SINGLESHOT; + do_adjtimex(&adjust); + } + return delta; +} + +static DEFINE_PER_CPU(atomic_t, clock_sync_word); +static DEFINE_MUTEX(clock_sync_mutex); +static unsigned long clock_sync_flags; + +#define CLOCK_SYNC_HAS_ETR 0 +#define CLOCK_SYNC_HAS_STP 1 +#define CLOCK_SYNC_ETR 2 +#define CLOCK_SYNC_STP 3 + +/* + * The synchronous get_clock function. It will write the current clock + * value to the clock pointer and return 0 if the clock is in sync with + * the external time source. If the clock mode is local it will return + * -ENOSYS and -EAGAIN if the clock is not in sync with the external + * reference. + */ +int get_sync_clock(unsigned long long *clock) +{ + atomic_t *sw_ptr; + unsigned int sw0, sw1; + + sw_ptr = &get_cpu_var(clock_sync_word); + sw0 = atomic_read(sw_ptr); + *clock = get_clock(); + sw1 = atomic_read(sw_ptr); + put_cpu_var(clock_sync_word); + if (sw0 == sw1 && (sw0 & 0x80000000U)) + /* Success: time is in sync. */ + return 0; + if (!test_bit(CLOCK_SYNC_HAS_ETR, &clock_sync_flags) && + !test_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags)) + return -ENOSYS; + if (!test_bit(CLOCK_SYNC_ETR, &clock_sync_flags) && + !test_bit(CLOCK_SYNC_STP, &clock_sync_flags)) + return -EACCES; + return -EAGAIN; +} +EXPORT_SYMBOL(get_sync_clock); + +/* + * Make get_sync_clock return -EAGAIN. + */ +static void disable_sync_clock(void *dummy) +{ + atomic_t *sw_ptr = &__get_cpu_var(clock_sync_word); + /* + * Clear the in-sync bit 2^31. All get_sync_clock calls will + * fail until the sync bit is turned back on. In addition + * increase the "sequence" counter to avoid the race of an + * etr event and the complete recovery against get_sync_clock. + */ + atomic_clear_mask(0x80000000, sw_ptr); + atomic_inc(sw_ptr); +} + +/* + * Make get_sync_clock return 0 again. + * Needs to be called from a context disabled for preemption. + */ +static void enable_sync_clock(void) +{ + atomic_t *sw_ptr = &__get_cpu_var(clock_sync_word); + atomic_set_mask(0x80000000, sw_ptr); +} + +/* + * Function to check if the clock is in sync. + */ +static inline int check_sync_clock(void) +{ + atomic_t *sw_ptr; + int rc; + + sw_ptr = &get_cpu_var(clock_sync_word); + rc = (atomic_read(sw_ptr) & 0x80000000U) != 0; + put_cpu_var(clock_sync_word); + return rc; +} + +/* Single threaded workqueue used for etr and stp sync events */ +static struct workqueue_struct *time_sync_wq; + +static void __init time_init_wq(void) +{ + if (time_sync_wq) + return; + time_sync_wq = create_singlethread_workqueue("timesync"); +} + +/* + * External Time Reference (ETR) code. + */ +static int etr_port0_online; +static int etr_port1_online; +static int etr_steai_available; + +static int __init early_parse_etr(char *p) +{ + if (strncmp(p, "off", 3) == 0) + etr_port0_online = etr_port1_online = 0; + else if (strncmp(p, "port0", 5) == 0) + etr_port0_online = 1; + else if (strncmp(p, "port1", 5) == 0) + etr_port1_online = 1; + else if (strncmp(p, "on", 2) == 0) + etr_port0_online = etr_port1_online = 1; + return 0; +} +early_param("etr", early_parse_etr); + +enum etr_event { + ETR_EVENT_PORT0_CHANGE, + ETR_EVENT_PORT1_CHANGE, + ETR_EVENT_PORT_ALERT, + ETR_EVENT_SYNC_CHECK, + ETR_EVENT_SWITCH_LOCAL, + ETR_EVENT_UPDATE, +}; + +/* + * Valid bit combinations of the eacr register are (x = don't care): + * e0 e1 dp p0 p1 ea es sl + * 0 0 x 0 0 0 0 0 initial, disabled state + * 0 0 x 0 1 1 0 0 port 1 online + * 0 0 x 1 0 1 0 0 port 0 online + * 0 0 x 1 1 1 0 0 both ports online + * 0 1 x 0 1 1 0 0 port 1 online and usable, ETR or PPS mode + * 0 1 x 0 1 1 0 1 port 1 online, usable and ETR mode + * 0 1 x 0 1 1 1 0 port 1 online, usable, PPS mode, in-sync + * 0 1 x 0 1 1 1 1 port 1 online, usable, ETR mode, in-sync + * 0 1 x 1 1 1 0 0 both ports online, port 1 usable + * 0 1 x 1 1 1 1 0 both ports online, port 1 usable, PPS mode, in-sync + * 0 1 x 1 1 1 1 1 both ports online, port 1 usable, ETR mode, in-sync + * 1 0 x 1 0 1 0 0 port 0 online and usable, ETR or PPS mode + * 1 0 x 1 0 1 0 1 port 0 online, usable and ETR mode + * 1 0 x 1 0 1 1 0 port 0 online, usable, PPS mode, in-sync + * 1 0 x 1 0 1 1 1 port 0 online, usable, ETR mode, in-sync + * 1 0 x 1 1 1 0 0 both ports online, port 0 usable + * 1 0 x 1 1 1 1 0 both ports online, port 0 usable, PPS mode, in-sync + * 1 0 x 1 1 1 1 1 both ports online, port 0 usable, ETR mode, in-sync + * 1 1 x 1 1 1 1 0 both ports online & usable, ETR, in-sync + * 1 1 x 1 1 1 1 1 both ports online & usable, ETR, in-sync + */ +static struct etr_eacr etr_eacr; +static u64 etr_tolec; /* time of last eacr update */ +static struct etr_aib etr_port0; +static int etr_port0_uptodate; +static struct etr_aib etr_port1; +static int etr_port1_uptodate; +static unsigned long etr_events; +static struct timer_list etr_timer; + +static void etr_timeout(unsigned long dummy); +static void etr_work_fn(struct work_struct *work); +static DEFINE_MUTEX(etr_work_mutex); +static DECLARE_WORK(etr_work, etr_work_fn); + +/* + * Reset ETR attachment. + */ +static void etr_reset(void) +{ + etr_eacr = (struct etr_eacr) { + .e0 = 0, .e1 = 0, ._pad0 = 4, .dp = 0, + .p0 = 0, .p1 = 0, ._pad1 = 0, .ea = 0, + .es = 0, .sl = 0 }; + if (etr_setr(&etr_eacr) == 0) { + etr_tolec = get_clock(); + set_bit(CLOCK_SYNC_HAS_ETR, &clock_sync_flags); + if (etr_port0_online && etr_port1_online) + set_bit(CLOCK_SYNC_ETR, &clock_sync_flags); + } else if (etr_port0_online || etr_port1_online) { + pr_warning("The real or virtual hardware system does " + "not provide an ETR interface\n"); + etr_port0_online = etr_port1_online = 0; + } +} + +static int __init etr_init(void) +{ + struct etr_aib aib; + + if (!test_bit(CLOCK_SYNC_HAS_ETR, &clock_sync_flags)) + return 0; + time_init_wq(); + /* Check if this machine has the steai instruction. */ + if (etr_steai(&aib, ETR_STEAI_STEPPING_PORT) == 0) + etr_steai_available = 1; + setup_timer(&etr_timer, etr_timeout, 0UL); + if (etr_port0_online) { + set_bit(ETR_EVENT_PORT0_CHANGE, &etr_events); + queue_work(time_sync_wq, &etr_work); + } + if (etr_port1_online) { + set_bit(ETR_EVENT_PORT1_CHANGE, &etr_events); + queue_work(time_sync_wq, &etr_work); + } + return 0; +} + +arch_initcall(etr_init); + +/* + * Two sorts of ETR machine checks. The architecture reads: + * "When a machine-check niterruption occurs and if a switch-to-local or + * ETR-sync-check interrupt request is pending but disabled, this pending + * disabled interruption request is indicated and is cleared". + * Which means that we can get etr_switch_to_local events from the machine + * check handler although the interruption condition is disabled. Lovely.. + */ + +/* + * Switch to local machine check. This is called when the last usable + * ETR port goes inactive. After switch to local the clock is not in sync. + */ +void etr_switch_to_local(void) +{ + if (!etr_eacr.sl) + return; + disable_sync_clock(NULL); + if (!test_and_set_bit(ETR_EVENT_SWITCH_LOCAL, &etr_events)) { + etr_eacr.es = etr_eacr.sl = 0; + etr_setr(&etr_eacr); + queue_work(time_sync_wq, &etr_work); + } +} + +/* + * ETR sync check machine check. This is called when the ETR OTE and the + * local clock OTE are farther apart than the ETR sync check tolerance. + * After a ETR sync check the clock is not in sync. The machine check + * is broadcasted to all cpus at the same time. + */ +void etr_sync_check(void) +{ + if (!etr_eacr.es) + return; + disable_sync_clock(NULL); + if (!test_and_set_bit(ETR_EVENT_SYNC_CHECK, &etr_events)) { + etr_eacr.es = 0; + etr_setr(&etr_eacr); + queue_work(time_sync_wq, &etr_work); + } +} + +/* + * ETR timing alert. There are two causes: + * 1) port state change, check the usability of the port + * 2) port alert, one of the ETR-data-validity bits (v1-v2 bits of the + * sldr-status word) or ETR-data word 1 (edf1) or ETR-data word 3 (edf3) + * or ETR-data word 4 (edf4) has changed. + */ +static void etr_timing_alert(struct etr_irq_parm *intparm) +{ + if (intparm->pc0) + /* ETR port 0 state change. */ + set_bit(ETR_EVENT_PORT0_CHANGE, &etr_events); + if (intparm->pc1) + /* ETR port 1 state change. */ + set_bit(ETR_EVENT_PORT1_CHANGE, &etr_events); + if (intparm->eai) + /* + * ETR port alert on either port 0, 1 or both. + * Both ports are not up-to-date now. + */ + set_bit(ETR_EVENT_PORT_ALERT, &etr_events); + queue_work(time_sync_wq, &etr_work); +} + +static void etr_timeout(unsigned long dummy) +{ + set_bit(ETR_EVENT_UPDATE, &etr_events); + queue_work(time_sync_wq, &etr_work); +} + +/* + * Check if the etr mode is pss. + */ +static inline int etr_mode_is_pps(struct etr_eacr eacr) +{ + return eacr.es && !eacr.sl; +} + +/* + * Check if the etr mode is etr. + */ +static inline int etr_mode_is_etr(struct etr_eacr eacr) +{ + return eacr.es && eacr.sl; +} + +/* + * Check if the port can be used for TOD synchronization. + * For PPS mode the port has to receive OTEs. For ETR mode + * the port has to receive OTEs, the ETR stepping bit has to + * be zero and the validity bits for data frame 1, 2, and 3 + * have to be 1. + */ +static int etr_port_valid(struct etr_aib *aib, int port) +{ + unsigned int psc; + + /* Check that this port is receiving OTEs. */ + if (aib->tsp == 0) + return 0; + + psc = port ? aib->esw.psc1 : aib->esw.psc0; + if (psc == etr_lpsc_pps_mode) + return 1; + if (psc == etr_lpsc_operational_step) + return !aib->esw.y && aib->slsw.v1 && + aib->slsw.v2 && aib->slsw.v3; + return 0; +} + +/* + * Check if two ports are on the same network. + */ +static int etr_compare_network(struct etr_aib *aib1, struct etr_aib *aib2) +{ + // FIXME: any other fields we have to compare? + return aib1->edf1.net_id == aib2->edf1.net_id; +} + +/* + * Wrapper for etr_stei that converts physical port states + * to logical port states to be consistent with the output + * of stetr (see etr_psc vs. etr_lpsc). + */ +static void etr_steai_cv(struct etr_aib *aib, unsigned int func) +{ + BUG_ON(etr_steai(aib, func) != 0); + /* Convert port state to logical port state. */ + if (aib->esw.psc0 == 1) + aib->esw.psc0 = 2; + else if (aib->esw.psc0 == 0 && aib->esw.p == 0) + aib->esw.psc0 = 1; + if (aib->esw.psc1 == 1) + aib->esw.psc1 = 2; + else if (aib->esw.psc1 == 0 && aib->esw.p == 1) + aib->esw.psc1 = 1; +} + +/* + * Check if the aib a2 is still connected to the same attachment as + * aib a1, the etv values differ by one and a2 is valid. + */ +static int etr_aib_follows(struct etr_aib *a1, struct etr_aib *a2, int p) +{ + int state_a1, state_a2; + + /* Paranoia check: e0/e1 should better be the same. */ + if (a1->esw.eacr.e0 != a2->esw.eacr.e0 || + a1->esw.eacr.e1 != a2->esw.eacr.e1) + return 0; + + /* Still connected to the same etr ? */ + state_a1 = p ? a1->esw.psc1 : a1->esw.psc0; + state_a2 = p ? a2->esw.psc1 : a2->esw.psc0; + if (state_a1 == etr_lpsc_operational_step) { + if (state_a2 != etr_lpsc_operational_step || + a1->edf1.net_id != a2->edf1.net_id || + a1->edf1.etr_id != a2->edf1.etr_id || + a1->edf1.etr_pn != a2->edf1.etr_pn) + return 0; + } else if (state_a2 != etr_lpsc_pps_mode) + return 0; + + /* The ETV value of a2 needs to be ETV of a1 + 1. */ + if (a1->edf2.etv + 1 != a2->edf2.etv) + return 0; + + if (!etr_port_valid(a2, p)) + return 0; + + return 1; +} + +struct clock_sync_data { + atomic_t cpus; + int in_sync; + unsigned long long fixup_cc; + int etr_port; + struct etr_aib *etr_aib; +}; + +static void clock_sync_cpu(struct clock_sync_data *sync) +{ + atomic_dec(&sync->cpus); + enable_sync_clock(); + /* + * This looks like a busy wait loop but it isn't. etr_sync_cpus + * is called on all other cpus while the TOD clocks is stopped. + * __udelay will stop the cpu on an enabled wait psw until the + * TOD is running again. + */ + while (sync->in_sync == 0) { + __udelay(1); + /* + * A different cpu changes *in_sync. Therefore use + * barrier() to force memory access. + */ + barrier(); + } + if (sync->in_sync != 1) + /* Didn't work. Clear per-cpu in sync bit again. */ + disable_sync_clock(NULL); + /* + * This round of TOD syncing is done. Set the clock comparator + * to the next tick and let the processor continue. + */ + fixup_clock_comparator(sync->fixup_cc); +} + +/* + * Sync the TOD clock using the port referred to by aibp. This port + * has to be enabled and the other port has to be disabled. The + * last eacr update has to be more than 1.6 seconds in the past. + */ +static int etr_sync_clock(void *data) +{ + static int first; + unsigned long long clock, old_clock, delay, delta; + struct clock_sync_data *etr_sync; + struct etr_aib *sync_port, *aib; + int port; + int rc; + + etr_sync = data; + + if (xchg(&first, 1) == 1) { + /* Slave */ + clock_sync_cpu(etr_sync); + return 0; + } + + /* Wait until all other cpus entered the sync function. */ + while (atomic_read(&etr_sync->cpus) != 0) + cpu_relax(); + + port = etr_sync->etr_port; + aib = etr_sync->etr_aib; + sync_port = (port == 0) ? &etr_port0 : &etr_port1; + enable_sync_clock(); + + /* Set clock to next OTE. */ + __ctl_set_bit(14, 21); + __ctl_set_bit(0, 29); + clock = ((unsigned long long) (aib->edf2.etv + 1)) << 32; + old_clock = get_clock(); + if (set_clock(clock) == 0) { + __udelay(1); /* Wait for the clock to start. */ + __ctl_clear_bit(0, 29); + __ctl_clear_bit(14, 21); + etr_stetr(aib); + /* Adjust Linux timing variables. */ + delay = (unsigned long long) + (aib->edf2.etv - sync_port->edf2.etv) << 32; + delta = adjust_time(old_clock, clock, delay); + etr_sync->fixup_cc = delta; + fixup_clock_comparator(delta); + /* Verify that the clock is properly set. */ + if (!etr_aib_follows(sync_port, aib, port)) { + /* Didn't work. */ + disable_sync_clock(NULL); + etr_sync->in_sync = -EAGAIN; + rc = -EAGAIN; + } else { + etr_sync->in_sync = 1; + rc = 0; + } + } else { + /* Could not set the clock ?!? */ + __ctl_clear_bit(0, 29); + __ctl_clear_bit(14, 21); + disable_sync_clock(NULL); + etr_sync->in_sync = -EAGAIN; + rc = -EAGAIN; + } + xchg(&first, 0); + return rc; +} + +static int etr_sync_clock_stop(struct etr_aib *aib, int port) +{ + struct clock_sync_data etr_sync; + struct etr_aib *sync_port; + int follows; + int rc; + + /* Check if the current aib is adjacent to the sync port aib. */ + sync_port = (port == 0) ? &etr_port0 : &etr_port1; + follows = etr_aib_follows(sync_port, aib, port); + memcpy(sync_port, aib, sizeof(*aib)); + if (!follows) + return -EAGAIN; + memset(&etr_sync, 0, sizeof(etr_sync)); + etr_sync.etr_aib = aib; + etr_sync.etr_port = port; + get_online_cpus(); + atomic_set(&etr_sync.cpus, num_online_cpus() - 1); + rc = stop_machine(etr_sync_clock, &etr_sync, cpu_online_mask); + put_online_cpus(); + return rc; +} + +/* + * Handle the immediate effects of the different events. + * The port change event is used for online/offline changes. + */ +static struct etr_eacr etr_handle_events(struct etr_eacr eacr) +{ + if (test_and_clear_bit(ETR_EVENT_SYNC_CHECK, &etr_events)) + eacr.es = 0; + if (test_and_clear_bit(ETR_EVENT_SWITCH_LOCAL, &etr_events)) + eacr.es = eacr.sl = 0; + if (test_and_clear_bit(ETR_EVENT_PORT_ALERT, &etr_events)) + etr_port0_uptodate = etr_port1_uptodate = 0; + + if (test_and_clear_bit(ETR_EVENT_PORT0_CHANGE, &etr_events)) { + if (eacr.e0) + /* + * Port change of an enabled port. We have to + * assume that this can have caused an stepping + * port switch. + */ + etr_tolec = get_clock(); + eacr.p0 = etr_port0_online; + if (!eacr.p0) + eacr.e0 = 0; + etr_port0_uptodate = 0; + } + if (test_and_clear_bit(ETR_EVENT_PORT1_CHANGE, &etr_events)) { + if (eacr.e1) + /* + * Port change of an enabled port. We have to + * assume that this can have caused an stepping + * port switch. + */ + etr_tolec = get_clock(); + eacr.p1 = etr_port1_online; + if (!eacr.p1) + eacr.e1 = 0; + etr_port1_uptodate = 0; + } + clear_bit(ETR_EVENT_UPDATE, &etr_events); + return eacr; +} + +/* + * Set up a timer that expires after the etr_tolec + 1.6 seconds if + * one of the ports needs an update. + */ +static void etr_set_tolec_timeout(unsigned long long now) +{ + unsigned long micros; + + if ((!etr_eacr.p0 || etr_port0_uptodate) && + (!etr_eacr.p1 || etr_port1_uptodate)) + return; + micros = (now > etr_tolec) ? ((now - etr_tolec) >> 12) : 0; + micros = (micros > 1600000) ? 0 : 1600000 - micros; + mod_timer(&etr_timer, jiffies + (micros * HZ) / 1000000 + 1); +} + +/* + * Set up a time that expires after 1/2 second. + */ +static void etr_set_sync_timeout(void) +{ + mod_timer(&etr_timer, jiffies + HZ/2); +} + +/* + * Update the aib information for one or both ports. + */ +static struct etr_eacr etr_handle_update(struct etr_aib *aib, + struct etr_eacr eacr) +{ + /* With both ports disabled the aib information is useless. */ + if (!eacr.e0 && !eacr.e1) + return eacr; + + /* Update port0 or port1 with aib stored in etr_work_fn. */ + if (aib->esw.q == 0) { + /* Information for port 0 stored. */ + if (eacr.p0 && !etr_port0_uptodate) { + etr_port0 = *aib; + if (etr_port0_online) + etr_port0_uptodate = 1; + } + } else { + /* Information for port 1 stored. */ + if (eacr.p1 && !etr_port1_uptodate) { + etr_port1 = *aib; + if (etr_port0_online) + etr_port1_uptodate = 1; + } + } + + /* + * Do not try to get the alternate port aib if the clock + * is not in sync yet. + */ + if (!eacr.es || !check_sync_clock()) + return eacr; + + /* + * If steai is available we can get the information about + * the other port immediately. If only stetr is available the + * data-port bit toggle has to be used. + */ + if (etr_steai_available) { + if (eacr.p0 && !etr_port0_uptodate) { + etr_steai_cv(&etr_port0, ETR_STEAI_PORT_0); + etr_port0_uptodate = 1; + } + if (eacr.p1 && !etr_port1_uptodate) { + etr_steai_cv(&etr_port1, ETR_STEAI_PORT_1); + etr_port1_uptodate = 1; + } + } else { + /* + * One port was updated above, if the other + * port is not uptodate toggle dp bit. + */ + if ((eacr.p0 && !etr_port0_uptodate) || + (eacr.p1 && !etr_port1_uptodate)) + eacr.dp ^= 1; + else + eacr.dp = 0; + } + return eacr; +} + +/* + * Write new etr control register if it differs from the current one. + * Return 1 if etr_tolec has been updated as well. + */ +static void etr_update_eacr(struct etr_eacr eacr) +{ + int dp_changed; + + if (memcmp(&etr_eacr, &eacr, sizeof(eacr)) == 0) + /* No change, return. */ + return; + /* + * The disable of an active port of the change of the data port + * bit can/will cause a change in the data port. + */ + dp_changed = etr_eacr.e0 > eacr.e0 || etr_eacr.e1 > eacr.e1 || + (etr_eacr.dp ^ eacr.dp) != 0; + etr_eacr = eacr; + etr_setr(&etr_eacr); + if (dp_changed) + etr_tolec = get_clock(); +} + +/* + * ETR work. In this function you'll find the main logic. In + * particular this is the only function that calls etr_update_eacr(), + * it "controls" the etr control register. + */ +static void etr_work_fn(struct work_struct *work) +{ + unsigned long long now; + struct etr_eacr eacr; + struct etr_aib aib; + int sync_port; + + /* prevent multiple execution. */ + mutex_lock(&etr_work_mutex); + + /* Create working copy of etr_eacr. */ + eacr = etr_eacr; + + /* Check for the different events and their immediate effects. */ + eacr = etr_handle_events(eacr); + + /* Check if ETR is supposed to be active. */ + eacr.ea = eacr.p0 || eacr.p1; + if (!eacr.ea) { + /* Both ports offline. Reset everything. */ + eacr.dp = eacr.es = eacr.sl = 0; + on_each_cpu(disable_sync_clock, NULL, 1); + del_timer_sync(&etr_timer); + etr_update_eacr(eacr); + goto out_unlock; + } + + /* Store aib to get the current ETR status word. */ + BUG_ON(etr_stetr(&aib) != 0); + etr_port0.esw = etr_port1.esw = aib.esw; /* Copy status word. */ + now = get_clock(); + + /* + * Update the port information if the last stepping port change + * or data port change is older than 1.6 seconds. + */ + if (now >= etr_tolec + (1600000 << 12)) + eacr = etr_handle_update(&aib, eacr); + + /* + * Select ports to enable. The preferred synchronization mode is PPS. + * If a port can be enabled depends on a number of things: + * 1) The port needs to be online and uptodate. A port is not + * disabled just because it is not uptodate, but it is only + * enabled if it is uptodate. + * 2) The port needs to have the same mode (pps / etr). + * 3) The port needs to be usable -> etr_port_valid() == 1 + * 4) To enable the second port the clock needs to be in sync. + * 5) If both ports are useable and are ETR ports, the network id + * has to be the same. + * The eacr.sl bit is used to indicate etr mode vs. pps mode. + */ + if (eacr.p0 && aib.esw.psc0 == etr_lpsc_pps_mode) { + eacr.sl = 0; + eacr.e0 = 1; + if (!etr_mode_is_pps(etr_eacr)) + eacr.es = 0; + if (!eacr.es || !eacr.p1 || aib.esw.psc1 != etr_lpsc_pps_mode) + eacr.e1 = 0; + // FIXME: uptodate checks ? + else if (etr_port0_uptodate && etr_port1_uptodate) + eacr.e1 = 1; + sync_port = (etr_port0_uptodate && + etr_port_valid(&etr_port0, 0)) ? 0 : -1; + } else if (eacr.p1 && aib.esw.psc1 == etr_lpsc_pps_mode) { + eacr.sl = 0; + eacr.e0 = 0; + eacr.e1 = 1; + if (!etr_mode_is_pps(etr_eacr)) + eacr.es = 0; + sync_port = (etr_port1_uptodate && + etr_port_valid(&etr_port1, 1)) ? 1 : -1; + } else if (eacr.p0 && aib.esw.psc0 == etr_lpsc_operational_step) { + eacr.sl = 1; + eacr.e0 = 1; + if (!etr_mode_is_etr(etr_eacr)) + eacr.es = 0; + if (!eacr.es || !eacr.p1 || + aib.esw.psc1 != etr_lpsc_operational_alt) + eacr.e1 = 0; + else if (etr_port0_uptodate && etr_port1_uptodate && + etr_compare_network(&etr_port0, &etr_port1)) + eacr.e1 = 1; + sync_port = (etr_port0_uptodate && + etr_port_valid(&etr_port0, 0)) ? 0 : -1; + } else if (eacr.p1 && aib.esw.psc1 == etr_lpsc_operational_step) { + eacr.sl = 1; + eacr.e0 = 0; + eacr.e1 = 1; + if (!etr_mode_is_etr(etr_eacr)) + eacr.es = 0; + sync_port = (etr_port1_uptodate && + etr_port_valid(&etr_port1, 1)) ? 1 : -1; + } else { + /* Both ports not usable. */ + eacr.es = eacr.sl = 0; + sync_port = -1; + } + + /* + * If the clock is in sync just update the eacr and return. + * If there is no valid sync port wait for a port update. + */ + if ((eacr.es && check_sync_clock()) || sync_port < 0) { + etr_update_eacr(eacr); + etr_set_tolec_timeout(now); + goto out_unlock; + } + + /* + * Prepare control register for clock syncing + * (reset data port bit, set sync check control. + */ + eacr.dp = 0; + eacr.es = 1; + + /* + * Update eacr and try to synchronize the clock. If the update + * of eacr caused a stepping port switch (or if we have to + * assume that a stepping port switch has occurred) or the + * clock syncing failed, reset the sync check control bit + * and set up a timer to try again after 0.5 seconds + */ + etr_update_eacr(eacr); + if (now < etr_tolec + (1600000 << 12) || + etr_sync_clock_stop(&aib, sync_port) != 0) { + /* Sync failed. Try again in 1/2 second. */ + eacr.es = 0; + etr_update_eacr(eacr); + etr_set_sync_timeout(); + } else + etr_set_tolec_timeout(now); +out_unlock: + mutex_unlock(&etr_work_mutex); +} + +/* + * Sysfs interface functions + */ +static struct bus_type etr_subsys = { + .name = "etr", + .dev_name = "etr", +}; + +static struct device etr_port0_dev = { + .id = 0, + .bus = &etr_subsys, +}; + +static struct device etr_port1_dev = { + .id = 1, + .bus = &etr_subsys, +}; + +/* + * ETR subsys attributes + */ +static ssize_t etr_stepping_port_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + return sprintf(buf, "%i\n", etr_port0.esw.p); +} + +static DEVICE_ATTR(stepping_port, 0400, etr_stepping_port_show, NULL); + +static ssize_t etr_stepping_mode_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + char *mode_str; + + if (etr_mode_is_pps(etr_eacr)) + mode_str = "pps"; + else if (etr_mode_is_etr(etr_eacr)) + mode_str = "etr"; + else + mode_str = "local"; + return sprintf(buf, "%s\n", mode_str); +} + +static DEVICE_ATTR(stepping_mode, 0400, etr_stepping_mode_show, NULL); + +/* + * ETR port attributes + */ +static inline struct etr_aib *etr_aib_from_dev(struct device *dev) +{ + if (dev == &etr_port0_dev) + return etr_port0_online ? &etr_port0 : NULL; + else + return etr_port1_online ? &etr_port1 : NULL; +} + +static ssize_t etr_online_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + unsigned int online; + + online = (dev == &etr_port0_dev) ? etr_port0_online : etr_port1_online; + return sprintf(buf, "%i\n", online); +} + +static ssize_t etr_online_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + unsigned int value; + + value = simple_strtoul(buf, NULL, 0); + if (value != 0 && value != 1) + return -EINVAL; + if (!test_bit(CLOCK_SYNC_HAS_ETR, &clock_sync_flags)) + return -EOPNOTSUPP; + mutex_lock(&clock_sync_mutex); + if (dev == &etr_port0_dev) { + if (etr_port0_online == value) + goto out; /* Nothing to do. */ + etr_port0_online = value; + if (etr_port0_online && etr_port1_online) + set_bit(CLOCK_SYNC_ETR, &clock_sync_flags); + else + clear_bit(CLOCK_SYNC_ETR, &clock_sync_flags); + set_bit(ETR_EVENT_PORT0_CHANGE, &etr_events); + queue_work(time_sync_wq, &etr_work); + } else { + if (etr_port1_online == value) + goto out; /* Nothing to do. */ + etr_port1_online = value; + if (etr_port0_online && etr_port1_online) + set_bit(CLOCK_SYNC_ETR, &clock_sync_flags); + else + clear_bit(CLOCK_SYNC_ETR, &clock_sync_flags); + set_bit(ETR_EVENT_PORT1_CHANGE, &etr_events); + queue_work(time_sync_wq, &etr_work); + } +out: + mutex_unlock(&clock_sync_mutex); + return count; +} + +static DEVICE_ATTR(online, 0600, etr_online_show, etr_online_store); + +static ssize_t etr_stepping_control_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + return sprintf(buf, "%i\n", (dev == &etr_port0_dev) ? + etr_eacr.e0 : etr_eacr.e1); +} + +static DEVICE_ATTR(stepping_control, 0400, etr_stepping_control_show, NULL); + +static ssize_t etr_mode_code_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + if (!etr_port0_online && !etr_port1_online) + /* Status word is not uptodate if both ports are offline. */ + return -ENODATA; + return sprintf(buf, "%i\n", (dev == &etr_port0_dev) ? + etr_port0.esw.psc0 : etr_port0.esw.psc1); +} + +static DEVICE_ATTR(state_code, 0400, etr_mode_code_show, NULL); + +static ssize_t etr_untuned_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct etr_aib *aib = etr_aib_from_dev(dev); + + if (!aib || !aib->slsw.v1) + return -ENODATA; + return sprintf(buf, "%i\n", aib->edf1.u); +} + +static DEVICE_ATTR(untuned, 0400, etr_untuned_show, NULL); + +static ssize_t etr_network_id_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct etr_aib *aib = etr_aib_from_dev(dev); + + if (!aib || !aib->slsw.v1) + return -ENODATA; + return sprintf(buf, "%i\n", aib->edf1.net_id); +} + +static DEVICE_ATTR(network, 0400, etr_network_id_show, NULL); + +static ssize_t etr_id_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct etr_aib *aib = etr_aib_from_dev(dev); + + if (!aib || !aib->slsw.v1) + return -ENODATA; + return sprintf(buf, "%i\n", aib->edf1.etr_id); +} + +static DEVICE_ATTR(id, 0400, etr_id_show, NULL); + +static ssize_t etr_port_number_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct etr_aib *aib = etr_aib_from_dev(dev); + + if (!aib || !aib->slsw.v1) + return -ENODATA; + return sprintf(buf, "%i\n", aib->edf1.etr_pn); +} + +static DEVICE_ATTR(port, 0400, etr_port_number_show, NULL); + +static ssize_t etr_coupled_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct etr_aib *aib = etr_aib_from_dev(dev); + + if (!aib || !aib->slsw.v3) + return -ENODATA; + return sprintf(buf, "%i\n", aib->edf3.c); +} + +static DEVICE_ATTR(coupled, 0400, etr_coupled_show, NULL); + +static ssize_t etr_local_time_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct etr_aib *aib = etr_aib_from_dev(dev); + + if (!aib || !aib->slsw.v3) + return -ENODATA; + return sprintf(buf, "%i\n", aib->edf3.blto); +} + +static DEVICE_ATTR(local_time, 0400, etr_local_time_show, NULL); + +static ssize_t etr_utc_offset_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct etr_aib *aib = etr_aib_from_dev(dev); + + if (!aib || !aib->slsw.v3) + return -ENODATA; + return sprintf(buf, "%i\n", aib->edf3.buo); +} + +static DEVICE_ATTR(utc_offset, 0400, etr_utc_offset_show, NULL); + +static struct device_attribute *etr_port_attributes[] = { + &dev_attr_online, + &dev_attr_stepping_control, + &dev_attr_state_code, + &dev_attr_untuned, + &dev_attr_network, + &dev_attr_id, + &dev_attr_port, + &dev_attr_coupled, + &dev_attr_local_time, + &dev_attr_utc_offset, + NULL +}; + +static int __init etr_register_port(struct device *dev) +{ + struct device_attribute **attr; + int rc; + + rc = device_register(dev); + if (rc) + goto out; + for (attr = etr_port_attributes; *attr; attr++) { + rc = device_create_file(dev, *attr); + if (rc) + goto out_unreg; + } + return 0; +out_unreg: + for (; attr >= etr_port_attributes; attr--) + device_remove_file(dev, *attr); + device_unregister(dev); +out: + return rc; +} + +static void __init etr_unregister_port(struct device *dev) +{ + struct device_attribute **attr; + + for (attr = etr_port_attributes; *attr; attr++) + device_remove_file(dev, *attr); + device_unregister(dev); +} + +static int __init etr_init_sysfs(void) +{ + int rc; + + rc = subsys_system_register(&etr_subsys, NULL); + if (rc) + goto out; + rc = device_create_file(etr_subsys.dev_root, &dev_attr_stepping_port); + if (rc) + goto out_unreg_subsys; + rc = device_create_file(etr_subsys.dev_root, &dev_attr_stepping_mode); + if (rc) + goto out_remove_stepping_port; + rc = etr_register_port(&etr_port0_dev); + if (rc) + goto out_remove_stepping_mode; + rc = etr_register_port(&etr_port1_dev); + if (rc) + goto out_remove_port0; + return 0; + +out_remove_port0: + etr_unregister_port(&etr_port0_dev); +out_remove_stepping_mode: + device_remove_file(etr_subsys.dev_root, &dev_attr_stepping_mode); +out_remove_stepping_port: + device_remove_file(etr_subsys.dev_root, &dev_attr_stepping_port); +out_unreg_subsys: + bus_unregister(&etr_subsys); +out: + return rc; +} + +device_initcall(etr_init_sysfs); + +/* + * Server Time Protocol (STP) code. + */ +static int stp_online; +static struct stp_sstpi stp_info; +static void *stp_page; + +static void stp_work_fn(struct work_struct *work); +static DEFINE_MUTEX(stp_work_mutex); +static DECLARE_WORK(stp_work, stp_work_fn); +static struct timer_list stp_timer; + +static int __init early_parse_stp(char *p) +{ + if (strncmp(p, "off", 3) == 0) + stp_online = 0; + else if (strncmp(p, "on", 2) == 0) + stp_online = 1; + return 0; +} +early_param("stp", early_parse_stp); + +/* + * Reset STP attachment. + */ +static void __init stp_reset(void) +{ + int rc; + + stp_page = (void *) get_zeroed_page(GFP_ATOMIC); + rc = chsc_sstpc(stp_page, STP_OP_CTRL, 0x0000); + if (rc == 0) + set_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags); + else if (stp_online) { + pr_warning("The real or virtual hardware system does " + "not provide an STP interface\n"); + free_page((unsigned long) stp_page); + stp_page = NULL; + stp_online = 0; + } +} + +static void stp_timeout(unsigned long dummy) +{ + queue_work(time_sync_wq, &stp_work); +} + +static int __init stp_init(void) +{ + if (!test_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags)) + return 0; + setup_timer(&stp_timer, stp_timeout, 0UL); + time_init_wq(); + if (!stp_online) + return 0; + queue_work(time_sync_wq, &stp_work); + return 0; +} + +arch_initcall(stp_init); + +/* + * STP timing alert. There are three causes: + * 1) timing status change + * 2) link availability change + * 3) time control parameter change + * In all three cases we are only interested in the clock source state. + * If a STP clock source is now available use it. + */ +static void stp_timing_alert(struct stp_irq_parm *intparm) +{ + if (intparm->tsc || intparm->lac || intparm->tcpc) + queue_work(time_sync_wq, &stp_work); +} + +/* + * STP sync check machine check. This is called when the timing state + * changes from the synchronized state to the unsynchronized state. + * After a STP sync check the clock is not in sync. The machine check + * is broadcasted to all cpus at the same time. + */ +void stp_sync_check(void) +{ + disable_sync_clock(NULL); + queue_work(time_sync_wq, &stp_work); +} + +/* + * STP island condition machine check. This is called when an attached + * server attempts to communicate over an STP link and the servers + * have matching CTN ids and have a valid stratum-1 configuration + * but the configurations do not match. + */ +void stp_island_check(void) +{ + disable_sync_clock(NULL); + queue_work(time_sync_wq, &stp_work); +} + + +static int stp_sync_clock(void *data) +{ + static int first; + unsigned long long old_clock, delta; + struct clock_sync_data *stp_sync; + int rc; + + stp_sync = data; + + if (xchg(&first, 1) == 1) { + /* Slave */ + clock_sync_cpu(stp_sync); + return 0; + } + + /* Wait until all other cpus entered the sync function. */ + while (atomic_read(&stp_sync->cpus) != 0) + cpu_relax(); + + enable_sync_clock(); + + rc = 0; + if (stp_info.todoff[0] || stp_info.todoff[1] || + stp_info.todoff[2] || stp_info.todoff[3] || + stp_info.tmd != 2) { + old_clock = get_clock(); + rc = chsc_sstpc(stp_page, STP_OP_SYNC, 0); + if (rc == 0) { + delta = adjust_time(old_clock, get_clock(), 0); + fixup_clock_comparator(delta); + rc = chsc_sstpi(stp_page, &stp_info, + sizeof(struct stp_sstpi)); + if (rc == 0 && stp_info.tmd != 2) + rc = -EAGAIN; + } + } + if (rc) { + disable_sync_clock(NULL); + stp_sync->in_sync = -EAGAIN; + } else + stp_sync->in_sync = 1; + xchg(&first, 0); + return 0; +} + +/* + * STP work. Check for the STP state and take over the clock + * synchronization if the STP clock source is usable. + */ +static void stp_work_fn(struct work_struct *work) +{ + struct clock_sync_data stp_sync; + int rc; + + /* prevent multiple execution. */ + mutex_lock(&stp_work_mutex); + + if (!stp_online) { + chsc_sstpc(stp_page, STP_OP_CTRL, 0x0000); + del_timer_sync(&stp_timer); + goto out_unlock; + } + + rc = chsc_sstpc(stp_page, STP_OP_CTRL, 0xb0e0); + if (rc) + goto out_unlock; + + rc = chsc_sstpi(stp_page, &stp_info, sizeof(struct stp_sstpi)); + if (rc || stp_info.c == 0) + goto out_unlock; + + /* Skip synchronization if the clock is already in sync. */ + if (check_sync_clock()) + goto out_unlock; + + memset(&stp_sync, 0, sizeof(stp_sync)); + get_online_cpus(); + atomic_set(&stp_sync.cpus, num_online_cpus() - 1); + stop_machine(stp_sync_clock, &stp_sync, cpu_online_mask); + put_online_cpus(); + + if (!check_sync_clock()) + /* + * There is a usable clock but the synchonization failed. + * Retry after a second. + */ + mod_timer(&stp_timer, jiffies + HZ); + +out_unlock: + mutex_unlock(&stp_work_mutex); +} + +/* + * STP subsys sysfs interface functions + */ +static struct bus_type stp_subsys = { + .name = "stp", + .dev_name = "stp", +}; + +static ssize_t stp_ctn_id_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + if (!stp_online) + return -ENODATA; + return sprintf(buf, "%016llx\n", + *(unsigned long long *) stp_info.ctnid); +} + +static DEVICE_ATTR(ctn_id, 0400, stp_ctn_id_show, NULL); + +static ssize_t stp_ctn_type_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + if (!stp_online) + return -ENODATA; + return sprintf(buf, "%i\n", stp_info.ctn); +} + +static DEVICE_ATTR(ctn_type, 0400, stp_ctn_type_show, NULL); + +static ssize_t stp_dst_offset_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + if (!stp_online || !(stp_info.vbits & 0x2000)) + return -ENODATA; + return sprintf(buf, "%i\n", (int)(s16) stp_info.dsto); +} + +static DEVICE_ATTR(dst_offset, 0400, stp_dst_offset_show, NULL); + +static ssize_t stp_leap_seconds_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + if (!stp_online || !(stp_info.vbits & 0x8000)) + return -ENODATA; + return sprintf(buf, "%i\n", (int)(s16) stp_info.leaps); +} + +static DEVICE_ATTR(leap_seconds, 0400, stp_leap_seconds_show, NULL); + +static ssize_t stp_stratum_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + if (!stp_online) + return -ENODATA; + return sprintf(buf, "%i\n", (int)(s16) stp_info.stratum); +} + +static DEVICE_ATTR(stratum, 0400, stp_stratum_show, NULL); + +static ssize_t stp_time_offset_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + if (!stp_online || !(stp_info.vbits & 0x0800)) + return -ENODATA; + return sprintf(buf, "%i\n", (int) stp_info.tto); +} + +static DEVICE_ATTR(time_offset, 0400, stp_time_offset_show, NULL); + +static ssize_t stp_time_zone_offset_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + if (!stp_online || !(stp_info.vbits & 0x4000)) + return -ENODATA; + return sprintf(buf, "%i\n", (int)(s16) stp_info.tzo); +} + +static DEVICE_ATTR(time_zone_offset, 0400, + stp_time_zone_offset_show, NULL); + +static ssize_t stp_timing_mode_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + if (!stp_online) + return -ENODATA; + return sprintf(buf, "%i\n", stp_info.tmd); +} + +static DEVICE_ATTR(timing_mode, 0400, stp_timing_mode_show, NULL); + +static ssize_t stp_timing_state_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + if (!stp_online) + return -ENODATA; + return sprintf(buf, "%i\n", stp_info.tst); +} + +static DEVICE_ATTR(timing_state, 0400, stp_timing_state_show, NULL); + +static ssize_t stp_online_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + return sprintf(buf, "%i\n", stp_online); +} + +static ssize_t stp_online_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + unsigned int value; + + value = simple_strtoul(buf, NULL, 0); + if (value != 0 && value != 1) + return -EINVAL; + if (!test_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags)) + return -EOPNOTSUPP; + mutex_lock(&clock_sync_mutex); + stp_online = value; + if (stp_online) + set_bit(CLOCK_SYNC_STP, &clock_sync_flags); + else + clear_bit(CLOCK_SYNC_STP, &clock_sync_flags); + queue_work(time_sync_wq, &stp_work); + mutex_unlock(&clock_sync_mutex); + return count; +} + +/* + * Can't use DEVICE_ATTR because the attribute should be named + * stp/online but dev_attr_online already exists in this file .. + */ +static struct device_attribute dev_attr_stp_online = { + .attr = { .name = "online", .mode = 0600 }, + .show = stp_online_show, + .store = stp_online_store, +}; + +static struct device_attribute *stp_attributes[] = { + &dev_attr_ctn_id, + &dev_attr_ctn_type, + &dev_attr_dst_offset, + &dev_attr_leap_seconds, + &dev_attr_stp_online, + &dev_attr_stratum, + &dev_attr_time_offset, + &dev_attr_time_zone_offset, + &dev_attr_timing_mode, + &dev_attr_timing_state, + NULL +}; + +static int __init stp_init_sysfs(void) +{ + struct device_attribute **attr; + int rc; + + rc = subsys_system_register(&stp_subsys, NULL); + if (rc) + goto out; + for (attr = stp_attributes; *attr; attr++) { + rc = device_create_file(stp_subsys.dev_root, *attr); + if (rc) + goto out_unreg; + } + return 0; +out_unreg: + for (; attr >= stp_attributes; attr--) + device_remove_file(stp_subsys.dev_root, *attr); + bus_unregister(&stp_subsys); +out: + return rc; +} + +device_initcall(stp_init_sysfs); diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c new file mode 100644 index 00000000..4f8dc942 --- /dev/null +++ b/arch/s390/kernel/topology.c @@ -0,0 +1,465 @@ +/* + * Copyright IBM Corp. 2007,2011 + * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com> + */ + +#define KMSG_COMPONENT "cpu" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include <linux/workqueue.h> +#include <linux/bootmem.h> +#include <linux/cpuset.h> +#include <linux/device.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/init.h> +#include <linux/delay.h> +#include <linux/cpu.h> +#include <linux/smp.h> +#include <linux/mm.h> + +#define PTF_HORIZONTAL (0UL) +#define PTF_VERTICAL (1UL) +#define PTF_CHECK (2UL) + +struct mask_info { + struct mask_info *next; + unsigned char id; + cpumask_t mask; +}; + +static int topology_enabled = 1; +static void topology_work_fn(struct work_struct *work); +static struct sysinfo_15_1_x *tl_info; +static void set_topology_timer(void); +static DECLARE_WORK(topology_work, topology_work_fn); +/* topology_lock protects the core linked list */ +static DEFINE_SPINLOCK(topology_lock); + +static struct mask_info core_info; +cpumask_t cpu_core_map[NR_CPUS]; +unsigned char cpu_core_id[NR_CPUS]; + +static struct mask_info book_info; +cpumask_t cpu_book_map[NR_CPUS]; +unsigned char cpu_book_id[NR_CPUS]; + +/* smp_cpu_state_mutex must be held when accessing this array */ +int cpu_polarization[NR_CPUS]; + +static cpumask_t cpu_group_map(struct mask_info *info, unsigned int cpu) +{ + cpumask_t mask; + + cpumask_clear(&mask); + if (!topology_enabled || !MACHINE_HAS_TOPOLOGY) { + cpumask_copy(&mask, cpumask_of(cpu)); + return mask; + } + while (info) { + if (cpumask_test_cpu(cpu, &info->mask)) { + mask = info->mask; + break; + } + info = info->next; + } + if (cpumask_empty(&mask)) + cpumask_copy(&mask, cpumask_of(cpu)); + return mask; +} + +static struct mask_info *add_cpus_to_mask(struct topology_cpu *tl_cpu, + struct mask_info *book, + struct mask_info *core, + int one_core_per_cpu) +{ + unsigned int cpu; + + for (cpu = find_first_bit(&tl_cpu->mask[0], TOPOLOGY_CPU_BITS); + cpu < TOPOLOGY_CPU_BITS; + cpu = find_next_bit(&tl_cpu->mask[0], TOPOLOGY_CPU_BITS, cpu + 1)) + { + unsigned int rcpu; + int lcpu; + + rcpu = TOPOLOGY_CPU_BITS - 1 - cpu + tl_cpu->origin; + lcpu = smp_find_processor_id(rcpu); + if (lcpu >= 0) { + cpumask_set_cpu(lcpu, &book->mask); + cpu_book_id[lcpu] = book->id; + cpumask_set_cpu(lcpu, &core->mask); + if (one_core_per_cpu) { + cpu_core_id[lcpu] = rcpu; + core = core->next; + } else { + cpu_core_id[lcpu] = core->id; + } + cpu_set_polarization(lcpu, tl_cpu->pp); + } + } + return core; +} + +static void clear_masks(void) +{ + struct mask_info *info; + + info = &core_info; + while (info) { + cpumask_clear(&info->mask); + info = info->next; + } + info = &book_info; + while (info) { + cpumask_clear(&info->mask); + info = info->next; + } +} + +static union topology_entry *next_tle(union topology_entry *tle) +{ + if (!tle->nl) + return (union topology_entry *)((struct topology_cpu *)tle + 1); + return (union topology_entry *)((struct topology_container *)tle + 1); +} + +static void __tl_to_cores_generic(struct sysinfo_15_1_x *info) +{ + struct mask_info *core = &core_info; + struct mask_info *book = &book_info; + union topology_entry *tle, *end; + + tle = info->tle; + end = (union topology_entry *)((unsigned long)info + info->length); + while (tle < end) { + switch (tle->nl) { + case 2: + book = book->next; + book->id = tle->container.id; + break; + case 1: + core = core->next; + core->id = tle->container.id; + break; + case 0: + add_cpus_to_mask(&tle->cpu, book, core, 0); + break; + default: + clear_masks(); + return; + } + tle = next_tle(tle); + } +} + +static void __tl_to_cores_z10(struct sysinfo_15_1_x *info) +{ + struct mask_info *core = &core_info; + struct mask_info *book = &book_info; + union topology_entry *tle, *end; + + tle = info->tle; + end = (union topology_entry *)((unsigned long)info + info->length); + while (tle < end) { + switch (tle->nl) { + case 1: + book = book->next; + book->id = tle->container.id; + break; + case 0: + core = add_cpus_to_mask(&tle->cpu, book, core, 1); + break; + default: + clear_masks(); + return; + } + tle = next_tle(tle); + } +} + +static void tl_to_cores(struct sysinfo_15_1_x *info) +{ + struct cpuid cpu_id; + + get_cpu_id(&cpu_id); + spin_lock_irq(&topology_lock); + clear_masks(); + switch (cpu_id.machine) { + case 0x2097: + case 0x2098: + __tl_to_cores_z10(info); + break; + default: + __tl_to_cores_generic(info); + } + spin_unlock_irq(&topology_lock); +} + +static void topology_update_polarization_simple(void) +{ + int cpu; + + mutex_lock(&smp_cpu_state_mutex); + for_each_possible_cpu(cpu) + cpu_set_polarization(cpu, POLARIZATION_HRZ); + mutex_unlock(&smp_cpu_state_mutex); +} + +static int ptf(unsigned long fc) +{ + int rc; + + asm volatile( + " .insn rre,0xb9a20000,%1,%1\n" + " ipm %0\n" + " srl %0,28\n" + : "=d" (rc) + : "d" (fc) : "cc"); + return rc; +} + +int topology_set_cpu_management(int fc) +{ + int cpu, rc; + + if (!MACHINE_HAS_TOPOLOGY) + return -EOPNOTSUPP; + if (fc) + rc = ptf(PTF_VERTICAL); + else + rc = ptf(PTF_HORIZONTAL); + if (rc) + return -EBUSY; + for_each_possible_cpu(cpu) + cpu_set_polarization(cpu, POLARIZATION_UNKNOWN); + return rc; +} + +static void update_cpu_core_map(void) +{ + unsigned long flags; + int cpu; + + spin_lock_irqsave(&topology_lock, flags); + for_each_possible_cpu(cpu) { + cpu_core_map[cpu] = cpu_group_map(&core_info, cpu); + cpu_book_map[cpu] = cpu_group_map(&book_info, cpu); + } + spin_unlock_irqrestore(&topology_lock, flags); +} + +void store_topology(struct sysinfo_15_1_x *info) +{ + int rc; + + rc = stsi(info, 15, 1, 3); + if (rc != -ENOSYS) + return; + stsi(info, 15, 1, 2); +} + +int arch_update_cpu_topology(void) +{ + struct sysinfo_15_1_x *info = tl_info; + struct device *dev; + int cpu; + + if (!MACHINE_HAS_TOPOLOGY) { + update_cpu_core_map(); + topology_update_polarization_simple(); + return 0; + } + store_topology(info); + tl_to_cores(info); + update_cpu_core_map(); + for_each_online_cpu(cpu) { + dev = get_cpu_device(cpu); + kobject_uevent(&dev->kobj, KOBJ_CHANGE); + } + return 1; +} + +static void topology_work_fn(struct work_struct *work) +{ + rebuild_sched_domains(); +} + +void topology_schedule_update(void) +{ + schedule_work(&topology_work); +} + +static void topology_timer_fn(unsigned long ignored) +{ + if (ptf(PTF_CHECK)) + topology_schedule_update(); + set_topology_timer(); +} + +static struct timer_list topology_timer = + TIMER_DEFERRED_INITIALIZER(topology_timer_fn, 0, 0); + +static atomic_t topology_poll = ATOMIC_INIT(0); + +static void set_topology_timer(void) +{ + if (atomic_add_unless(&topology_poll, -1, 0)) + mod_timer(&topology_timer, jiffies + HZ / 10); + else + mod_timer(&topology_timer, jiffies + HZ * 60); +} + +void topology_expect_change(void) +{ + if (!MACHINE_HAS_TOPOLOGY) + return; + /* This is racy, but it doesn't matter since it is just a heuristic. + * Worst case is that we poll in a higher frequency for a bit longer. + */ + if (atomic_read(&topology_poll) > 60) + return; + atomic_add(60, &topology_poll); + set_topology_timer(); +} + +static int __init early_parse_topology(char *p) +{ + if (strncmp(p, "off", 3)) + return 0; + topology_enabled = 0; + return 0; +} +early_param("topology", early_parse_topology); + +static void __init alloc_masks(struct sysinfo_15_1_x *info, + struct mask_info *mask, int offset) +{ + int i, nr_masks; + + nr_masks = info->mag[TOPOLOGY_NR_MAG - offset]; + for (i = 0; i < info->mnest - offset; i++) + nr_masks *= info->mag[TOPOLOGY_NR_MAG - offset - 1 - i]; + nr_masks = max(nr_masks, 1); + for (i = 0; i < nr_masks; i++) { + mask->next = alloc_bootmem(sizeof(struct mask_info)); + mask = mask->next; + } +} + +void __init s390_init_cpu_topology(void) +{ + struct sysinfo_15_1_x *info; + int i; + + if (!MACHINE_HAS_TOPOLOGY) + return; + tl_info = alloc_bootmem_pages(PAGE_SIZE); + info = tl_info; + store_topology(info); + pr_info("The CPU configuration topology of the machine is:"); + for (i = 0; i < TOPOLOGY_NR_MAG; i++) + printk(KERN_CONT " %d", info->mag[i]); + printk(KERN_CONT " / %d\n", info->mnest); + alloc_masks(info, &core_info, 1); + alloc_masks(info, &book_info, 2); +} + +static int cpu_management; + +static ssize_t dispatching_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + ssize_t count; + + mutex_lock(&smp_cpu_state_mutex); + count = sprintf(buf, "%d\n", cpu_management); + mutex_unlock(&smp_cpu_state_mutex); + return count; +} + +static ssize_t dispatching_store(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t count) +{ + int val, rc; + char delim; + + if (sscanf(buf, "%d %c", &val, &delim) != 1) + return -EINVAL; + if (val != 0 && val != 1) + return -EINVAL; + rc = 0; + get_online_cpus(); + mutex_lock(&smp_cpu_state_mutex); + if (cpu_management == val) + goto out; + rc = topology_set_cpu_management(val); + if (rc) + goto out; + cpu_management = val; + topology_expect_change(); +out: + mutex_unlock(&smp_cpu_state_mutex); + put_online_cpus(); + return rc ? rc : count; +} +static DEVICE_ATTR(dispatching, 0644, dispatching_show, + dispatching_store); + +static ssize_t cpu_polarization_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + int cpu = dev->id; + ssize_t count; + + mutex_lock(&smp_cpu_state_mutex); + switch (cpu_read_polarization(cpu)) { + case POLARIZATION_HRZ: + count = sprintf(buf, "horizontal\n"); + break; + case POLARIZATION_VL: + count = sprintf(buf, "vertical:low\n"); + break; + case POLARIZATION_VM: + count = sprintf(buf, "vertical:medium\n"); + break; + case POLARIZATION_VH: + count = sprintf(buf, "vertical:high\n"); + break; + default: + count = sprintf(buf, "unknown\n"); + break; + } + mutex_unlock(&smp_cpu_state_mutex); + return count; +} +static DEVICE_ATTR(polarization, 0444, cpu_polarization_show, NULL); + +static struct attribute *topology_cpu_attrs[] = { + &dev_attr_polarization.attr, + NULL, +}; + +static struct attribute_group topology_cpu_attr_group = { + .attrs = topology_cpu_attrs, +}; + +int topology_cpu_init(struct cpu *cpu) +{ + return sysfs_create_group(&cpu->dev.kobj, &topology_cpu_attr_group); +} + +static int __init topology_init(void) +{ + if (!MACHINE_HAS_TOPOLOGY) { + topology_update_polarization_simple(); + goto out; + } + set_topology_timer(); +out: + update_cpu_core_map(); + return device_create_file(cpu_subsys.dev_root, &dev_attr_dispatching); +} +device_initcall(topology_init); diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c new file mode 100644 index 00000000..77cdf423 --- /dev/null +++ b/arch/s390/kernel/traps.c @@ -0,0 +1,656 @@ +/* + * arch/s390/kernel/traps.c + * + * S390 version + * Copyright (C) 1999,2000 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), + * Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com), + * + * Derived from "arch/i386/kernel/traps.c" + * Copyright (C) 1991, 1992 Linus Torvalds + */ + +/* + * 'Traps.c' handles hardware traps and faults after we have saved some + * state in 'asm.s'. + */ +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/string.h> +#include <linux/errno.h> +#include <linux/ptrace.h> +#include <linux/timer.h> +#include <linux/mm.h> +#include <linux/smp.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/seq_file.h> +#include <linux/delay.h> +#include <linux/module.h> +#include <linux/kdebug.h> +#include <linux/kallsyms.h> +#include <linux/reboot.h> +#include <linux/kprobes.h> +#include <linux/bug.h> +#include <linux/utsname.h> +#include <asm/uaccess.h> +#include <asm/io.h> +#include <linux/atomic.h> +#include <asm/mathemu.h> +#include <asm/cpcmd.h> +#include <asm/lowcore.h> +#include <asm/debug.h> +#include <asm/ipl.h> +#include "entry.h" + +void (*pgm_check_table[128])(struct pt_regs *regs); + +int show_unhandled_signals = 1; + +#define stack_pointer ({ void **sp; asm("la %0,0(15)" : "=&d" (sp)); sp; }) + +#ifndef CONFIG_64BIT +#define LONG "%08lx " +#define FOURLONG "%08lx %08lx %08lx %08lx\n" +static int kstack_depth_to_print = 12; +#else /* CONFIG_64BIT */ +#define LONG "%016lx " +#define FOURLONG "%016lx %016lx %016lx %016lx\n" +static int kstack_depth_to_print = 20; +#endif /* CONFIG_64BIT */ + +/* + * For show_trace we have tree different stack to consider: + * - the panic stack which is used if the kernel stack has overflown + * - the asynchronous interrupt stack (cpu related) + * - the synchronous kernel stack (process related) + * The stack trace can start at any of the three stack and can potentially + * touch all of them. The order is: panic stack, async stack, sync stack. + */ +static unsigned long +__show_trace(unsigned long sp, unsigned long low, unsigned long high) +{ + struct stack_frame *sf; + struct pt_regs *regs; + + while (1) { + sp = sp & PSW_ADDR_INSN; + if (sp < low || sp > high - sizeof(*sf)) + return sp; + sf = (struct stack_frame *) sp; + printk("([<%016lx>] ", sf->gprs[8] & PSW_ADDR_INSN); + print_symbol("%s)\n", sf->gprs[8] & PSW_ADDR_INSN); + /* Follow the backchain. */ + while (1) { + low = sp; + sp = sf->back_chain & PSW_ADDR_INSN; + if (!sp) + break; + if (sp <= low || sp > high - sizeof(*sf)) + return sp; + sf = (struct stack_frame *) sp; + printk(" [<%016lx>] ", sf->gprs[8] & PSW_ADDR_INSN); + print_symbol("%s\n", sf->gprs[8] & PSW_ADDR_INSN); + } + /* Zero backchain detected, check for interrupt frame. */ + sp = (unsigned long) (sf + 1); + if (sp <= low || sp > high - sizeof(*regs)) + return sp; + regs = (struct pt_regs *) sp; + printk(" [<%016lx>] ", regs->psw.addr & PSW_ADDR_INSN); + print_symbol("%s\n", regs->psw.addr & PSW_ADDR_INSN); + low = sp; + sp = regs->gprs[15]; + } +} + +static void show_trace(struct task_struct *task, unsigned long *stack) +{ + register unsigned long __r15 asm ("15"); + unsigned long sp; + + sp = (unsigned long) stack; + if (!sp) + sp = task ? task->thread.ksp : __r15; + printk("Call Trace:\n"); +#ifdef CONFIG_CHECK_STACK + sp = __show_trace(sp, S390_lowcore.panic_stack - 4096, + S390_lowcore.panic_stack); +#endif + sp = __show_trace(sp, S390_lowcore.async_stack - ASYNC_SIZE, + S390_lowcore.async_stack); + if (task) + __show_trace(sp, (unsigned long) task_stack_page(task), + (unsigned long) task_stack_page(task) + THREAD_SIZE); + else + __show_trace(sp, S390_lowcore.thread_info, + S390_lowcore.thread_info + THREAD_SIZE); + if (!task) + task = current; + debug_show_held_locks(task); +} + +void show_stack(struct task_struct *task, unsigned long *sp) +{ + register unsigned long * __r15 asm ("15"); + unsigned long *stack; + int i; + + if (!sp) + stack = task ? (unsigned long *) task->thread.ksp : __r15; + else + stack = sp; + + for (i = 0; i < kstack_depth_to_print; i++) { + if (((addr_t) stack & (THREAD_SIZE-1)) == 0) + break; + if ((i * sizeof(long) % 32) == 0) + printk("%s ", i == 0 ? "" : "\n"); + printk(LONG, *stack++); + } + printk("\n"); + show_trace(task, sp); +} + +static void show_last_breaking_event(struct pt_regs *regs) +{ +#ifdef CONFIG_64BIT + printk("Last Breaking-Event-Address:\n"); + printk(" [<%016lx>] ", regs->args[0] & PSW_ADDR_INSN); + print_symbol("%s\n", regs->args[0] & PSW_ADDR_INSN); +#endif +} + +/* + * The architecture-independent dump_stack generator + */ +void dump_stack(void) +{ + printk("CPU: %d %s %s %.*s\n", + task_thread_info(current)->cpu, print_tainted(), + init_utsname()->release, + (int)strcspn(init_utsname()->version, " "), + init_utsname()->version); + printk("Process %s (pid: %d, task: %p, ksp: %p)\n", + current->comm, current->pid, current, + (void *) current->thread.ksp); + show_stack(NULL, NULL); +} +EXPORT_SYMBOL(dump_stack); + +static inline int mask_bits(struct pt_regs *regs, unsigned long bits) +{ + return (regs->psw.mask & bits) / ((~bits + 1) & bits); +} + +void show_registers(struct pt_regs *regs) +{ + char *mode; + + mode = (regs->psw.mask & PSW_MASK_PSTATE) ? "User" : "Krnl"; + printk("%s PSW : %p %p", + mode, (void *) regs->psw.mask, + (void *) regs->psw.addr); + print_symbol(" (%s)\n", regs->psw.addr & PSW_ADDR_INSN); + printk(" R:%x T:%x IO:%x EX:%x Key:%x M:%x W:%x " + "P:%x AS:%x CC:%x PM:%x", mask_bits(regs, PSW_MASK_PER), + mask_bits(regs, PSW_MASK_DAT), mask_bits(regs, PSW_MASK_IO), + mask_bits(regs, PSW_MASK_EXT), mask_bits(regs, PSW_MASK_KEY), + mask_bits(regs, PSW_MASK_MCHECK), mask_bits(regs, PSW_MASK_WAIT), + mask_bits(regs, PSW_MASK_PSTATE), mask_bits(regs, PSW_MASK_ASC), + mask_bits(regs, PSW_MASK_CC), mask_bits(regs, PSW_MASK_PM)); +#ifdef CONFIG_64BIT + printk(" EA:%x", mask_bits(regs, PSW_MASK_EA | PSW_MASK_BA)); +#endif + printk("\n%s GPRS: " FOURLONG, mode, + regs->gprs[0], regs->gprs[1], regs->gprs[2], regs->gprs[3]); + printk(" " FOURLONG, + regs->gprs[4], regs->gprs[5], regs->gprs[6], regs->gprs[7]); + printk(" " FOURLONG, + regs->gprs[8], regs->gprs[9], regs->gprs[10], regs->gprs[11]); + printk(" " FOURLONG, + regs->gprs[12], regs->gprs[13], regs->gprs[14], regs->gprs[15]); + + show_code(regs); +} + +void show_regs(struct pt_regs *regs) +{ + print_modules(); + printk("CPU: %d %s %s %.*s\n", + task_thread_info(current)->cpu, print_tainted(), + init_utsname()->release, + (int)strcspn(init_utsname()->version, " "), + init_utsname()->version); + printk("Process %s (pid: %d, task: %p, ksp: %p)\n", + current->comm, current->pid, current, + (void *) current->thread.ksp); + show_registers(regs); + /* Show stack backtrace if pt_regs is from kernel mode */ + if (!(regs->psw.mask & PSW_MASK_PSTATE)) + show_trace(NULL, (unsigned long *) regs->gprs[15]); + show_last_breaking_event(regs); +} + +static DEFINE_SPINLOCK(die_lock); + +void die(struct pt_regs *regs, const char *str) +{ + static int die_counter; + + oops_enter(); + lgr_info_log(); + debug_stop_all(); + console_verbose(); + spin_lock_irq(&die_lock); + bust_spinlocks(1); + printk("%s: %04x [#%d] ", str, regs->int_code & 0xffff, ++die_counter); +#ifdef CONFIG_PREEMPT + printk("PREEMPT "); +#endif +#ifdef CONFIG_SMP + printk("SMP "); +#endif +#ifdef CONFIG_DEBUG_PAGEALLOC + printk("DEBUG_PAGEALLOC"); +#endif + printk("\n"); + notify_die(DIE_OOPS, str, regs, 0, regs->int_code & 0xffff, SIGSEGV); + show_regs(regs); + bust_spinlocks(0); + add_taint(TAINT_DIE); + spin_unlock_irq(&die_lock); + if (in_interrupt()) + panic("Fatal exception in interrupt"); + if (panic_on_oops) + panic("Fatal exception: panic_on_oops"); + oops_exit(); + do_exit(SIGSEGV); +} + +static inline void report_user_fault(struct pt_regs *regs, int signr) +{ + if ((task_pid_nr(current) > 1) && !show_unhandled_signals) + return; + if (!unhandled_signal(current, signr)) + return; + if (!printk_ratelimit()) + return; + printk("User process fault: interruption code 0x%X ", regs->int_code); + print_vma_addr("in ", regs->psw.addr & PSW_ADDR_INSN); + printk("\n"); + show_regs(regs); +} + +int is_valid_bugaddr(unsigned long addr) +{ + return 1; +} + +static inline void __user *get_psw_address(struct pt_regs *regs) +{ + return (void __user *) + ((regs->psw.addr - (regs->int_code >> 16)) & PSW_ADDR_INSN); +} + +static void __kprobes do_trap(struct pt_regs *regs, + int si_signo, int si_code, char *str) +{ + siginfo_t info; + + if (notify_die(DIE_TRAP, str, regs, 0, + regs->int_code, si_signo) == NOTIFY_STOP) + return; + + if (regs->psw.mask & PSW_MASK_PSTATE) { + info.si_signo = si_signo; + info.si_errno = 0; + info.si_code = si_code; + info.si_addr = get_psw_address(regs); + force_sig_info(si_signo, &info, current); + report_user_fault(regs, si_signo); + } else { + const struct exception_table_entry *fixup; + fixup = search_exception_tables(regs->psw.addr & PSW_ADDR_INSN); + if (fixup) + regs->psw.addr = fixup->fixup | PSW_ADDR_AMODE; + else { + enum bug_trap_type btt; + + btt = report_bug(regs->psw.addr & PSW_ADDR_INSN, regs); + if (btt == BUG_TRAP_TYPE_WARN) + return; + die(regs, str); + } + } +} + +void __kprobes do_per_trap(struct pt_regs *regs) +{ + siginfo_t info; + + if (notify_die(DIE_SSTEP, "sstep", regs, 0, 0, SIGTRAP) == NOTIFY_STOP) + return; + if (!current->ptrace) + return; + info.si_signo = SIGTRAP; + info.si_errno = 0; + info.si_code = TRAP_HWBKPT; + info.si_addr = + (void __force __user *) current->thread.per_event.address; + force_sig_info(SIGTRAP, &info, current); +} + +static void default_trap_handler(struct pt_regs *regs) +{ + if (regs->psw.mask & PSW_MASK_PSTATE) { + report_user_fault(regs, SIGSEGV); + do_exit(SIGSEGV); + } else + die(regs, "Unknown program exception"); +} + +#define DO_ERROR_INFO(name, signr, sicode, str) \ +static void name(struct pt_regs *regs) \ +{ \ + do_trap(regs, signr, sicode, str); \ +} + +DO_ERROR_INFO(addressing_exception, SIGILL, ILL_ILLADR, + "addressing exception") +DO_ERROR_INFO(execute_exception, SIGILL, ILL_ILLOPN, + "execute exception") +DO_ERROR_INFO(divide_exception, SIGFPE, FPE_INTDIV, + "fixpoint divide exception") +DO_ERROR_INFO(overflow_exception, SIGFPE, FPE_INTOVF, + "fixpoint overflow exception") +DO_ERROR_INFO(hfp_overflow_exception, SIGFPE, FPE_FLTOVF, + "HFP overflow exception") +DO_ERROR_INFO(hfp_underflow_exception, SIGFPE, FPE_FLTUND, + "HFP underflow exception") +DO_ERROR_INFO(hfp_significance_exception, SIGFPE, FPE_FLTRES, + "HFP significance exception") +DO_ERROR_INFO(hfp_divide_exception, SIGFPE, FPE_FLTDIV, + "HFP divide exception") +DO_ERROR_INFO(hfp_sqrt_exception, SIGFPE, FPE_FLTINV, + "HFP square root exception") +DO_ERROR_INFO(operand_exception, SIGILL, ILL_ILLOPN, + "operand exception") +DO_ERROR_INFO(privileged_op, SIGILL, ILL_PRVOPC, + "privileged operation") +DO_ERROR_INFO(special_op_exception, SIGILL, ILL_ILLOPN, + "special operation exception") +DO_ERROR_INFO(translation_exception, SIGILL, ILL_ILLOPN, + "translation exception") + +static inline void do_fp_trap(struct pt_regs *regs, int fpc) +{ + int si_code = 0; + /* FPC[2] is Data Exception Code */ + if ((fpc & 0x00000300) == 0) { + /* bits 6 and 7 of DXC are 0 iff IEEE exception */ + if (fpc & 0x8000) /* invalid fp operation */ + si_code = FPE_FLTINV; + else if (fpc & 0x4000) /* div by 0 */ + si_code = FPE_FLTDIV; + else if (fpc & 0x2000) /* overflow */ + si_code = FPE_FLTOVF; + else if (fpc & 0x1000) /* underflow */ + si_code = FPE_FLTUND; + else if (fpc & 0x0800) /* inexact */ + si_code = FPE_FLTRES; + } + do_trap(regs, SIGFPE, si_code, "floating point exception"); +} + +static void __kprobes illegal_op(struct pt_regs *regs) +{ + siginfo_t info; + __u8 opcode[6]; + __u16 __user *location; + int signal = 0; + + location = get_psw_address(regs); + + if (regs->psw.mask & PSW_MASK_PSTATE) { + if (get_user(*((__u16 *) opcode), (__u16 __user *) location)) + return; + if (*((__u16 *) opcode) == S390_BREAKPOINT_U16) { + if (current->ptrace) { + info.si_signo = SIGTRAP; + info.si_errno = 0; + info.si_code = TRAP_BRKPT; + info.si_addr = location; + force_sig_info(SIGTRAP, &info, current); + } else + signal = SIGILL; +#ifdef CONFIG_MATHEMU + } else if (opcode[0] == 0xb3) { + if (get_user(*((__u16 *) (opcode+2)), location+1)) + return; + signal = math_emu_b3(opcode, regs); + } else if (opcode[0] == 0xed) { + if (get_user(*((__u32 *) (opcode+2)), + (__u32 __user *)(location+1))) + return; + signal = math_emu_ed(opcode, regs); + } else if (*((__u16 *) opcode) == 0xb299) { + if (get_user(*((__u16 *) (opcode+2)), location+1)) + return; + signal = math_emu_srnm(opcode, regs); + } else if (*((__u16 *) opcode) == 0xb29c) { + if (get_user(*((__u16 *) (opcode+2)), location+1)) + return; + signal = math_emu_stfpc(opcode, regs); + } else if (*((__u16 *) opcode) == 0xb29d) { + if (get_user(*((__u16 *) (opcode+2)), location+1)) + return; + signal = math_emu_lfpc(opcode, regs); +#endif + } else + signal = SIGILL; + } else { + /* + * If we get an illegal op in kernel mode, send it through the + * kprobes notifier. If kprobes doesn't pick it up, SIGILL + */ + if (notify_die(DIE_BPT, "bpt", regs, 0, + 3, SIGTRAP) != NOTIFY_STOP) + signal = SIGILL; + } + +#ifdef CONFIG_MATHEMU + if (signal == SIGFPE) + do_fp_trap(regs, current->thread.fp_regs.fpc); + else if (signal == SIGSEGV) + do_trap(regs, signal, SEGV_MAPERR, "user address fault"); + else +#endif + if (signal) + do_trap(regs, signal, ILL_ILLOPC, "illegal operation"); +} + + +#ifdef CONFIG_MATHEMU +void specification_exception(struct pt_regs *regs) +{ + __u8 opcode[6]; + __u16 __user *location = NULL; + int signal = 0; + + location = (__u16 __user *) get_psw_address(regs); + + if (regs->psw.mask & PSW_MASK_PSTATE) { + get_user(*((__u16 *) opcode), location); + switch (opcode[0]) { + case 0x28: /* LDR Rx,Ry */ + signal = math_emu_ldr(opcode); + break; + case 0x38: /* LER Rx,Ry */ + signal = math_emu_ler(opcode); + break; + case 0x60: /* STD R,D(X,B) */ + get_user(*((__u16 *) (opcode+2)), location+1); + signal = math_emu_std(opcode, regs); + break; + case 0x68: /* LD R,D(X,B) */ + get_user(*((__u16 *) (opcode+2)), location+1); + signal = math_emu_ld(opcode, regs); + break; + case 0x70: /* STE R,D(X,B) */ + get_user(*((__u16 *) (opcode+2)), location+1); + signal = math_emu_ste(opcode, regs); + break; + case 0x78: /* LE R,D(X,B) */ + get_user(*((__u16 *) (opcode+2)), location+1); + signal = math_emu_le(opcode, regs); + break; + default: + signal = SIGILL; + break; + } + } else + signal = SIGILL; + + if (signal == SIGFPE) + do_fp_trap(regs, current->thread.fp_regs.fpc); + else if (signal) + do_trap(regs, signal, ILL_ILLOPN, "specification exception"); +} +#else +DO_ERROR_INFO(specification_exception, SIGILL, ILL_ILLOPN, + "specification exception"); +#endif + +static void data_exception(struct pt_regs *regs) +{ + __u16 __user *location; + int signal = 0; + + location = get_psw_address(regs); + + if (MACHINE_HAS_IEEE) + asm volatile("stfpc %0" : "=m" (current->thread.fp_regs.fpc)); + +#ifdef CONFIG_MATHEMU + else if (regs->psw.mask & PSW_MASK_PSTATE) { + __u8 opcode[6]; + get_user(*((__u16 *) opcode), location); + switch (opcode[0]) { + case 0x28: /* LDR Rx,Ry */ + signal = math_emu_ldr(opcode); + break; + case 0x38: /* LER Rx,Ry */ + signal = math_emu_ler(opcode); + break; + case 0x60: /* STD R,D(X,B) */ + get_user(*((__u16 *) (opcode+2)), location+1); + signal = math_emu_std(opcode, regs); + break; + case 0x68: /* LD R,D(X,B) */ + get_user(*((__u16 *) (opcode+2)), location+1); + signal = math_emu_ld(opcode, regs); + break; + case 0x70: /* STE R,D(X,B) */ + get_user(*((__u16 *) (opcode+2)), location+1); + signal = math_emu_ste(opcode, regs); + break; + case 0x78: /* LE R,D(X,B) */ + get_user(*((__u16 *) (opcode+2)), location+1); + signal = math_emu_le(opcode, regs); + break; + case 0xb3: + get_user(*((__u16 *) (opcode+2)), location+1); + signal = math_emu_b3(opcode, regs); + break; + case 0xed: + get_user(*((__u32 *) (opcode+2)), + (__u32 __user *)(location+1)); + signal = math_emu_ed(opcode, regs); + break; + case 0xb2: + if (opcode[1] == 0x99) { + get_user(*((__u16 *) (opcode+2)), location+1); + signal = math_emu_srnm(opcode, regs); + } else if (opcode[1] == 0x9c) { + get_user(*((__u16 *) (opcode+2)), location+1); + signal = math_emu_stfpc(opcode, regs); + } else if (opcode[1] == 0x9d) { + get_user(*((__u16 *) (opcode+2)), location+1); + signal = math_emu_lfpc(opcode, regs); + } else + signal = SIGILL; + break; + default: + signal = SIGILL; + break; + } + } +#endif + if (current->thread.fp_regs.fpc & FPC_DXC_MASK) + signal = SIGFPE; + else + signal = SIGILL; + if (signal == SIGFPE) + do_fp_trap(regs, current->thread.fp_regs.fpc); + else if (signal) + do_trap(regs, signal, ILL_ILLOPN, "data exception"); +} + +static void space_switch_exception(struct pt_regs *regs) +{ + /* Set user psw back to home space mode. */ + if (regs->psw.mask & PSW_MASK_PSTATE) + regs->psw.mask |= PSW_ASC_HOME; + /* Send SIGILL. */ + do_trap(regs, SIGILL, ILL_PRVOPC, "space switch event"); +} + +void __kprobes kernel_stack_overflow(struct pt_regs * regs) +{ + bust_spinlocks(1); + printk("Kernel stack overflow.\n"); + show_regs(regs); + bust_spinlocks(0); + panic("Corrupt kernel stack, can't continue."); +} + +/* init is done in lowcore.S and head.S */ + +void __init trap_init(void) +{ + int i; + + for (i = 0; i < 128; i++) + pgm_check_table[i] = &default_trap_handler; + pgm_check_table[1] = &illegal_op; + pgm_check_table[2] = &privileged_op; + pgm_check_table[3] = &execute_exception; + pgm_check_table[4] = &do_protection_exception; + pgm_check_table[5] = &addressing_exception; + pgm_check_table[6] = &specification_exception; + pgm_check_table[7] = &data_exception; + pgm_check_table[8] = &overflow_exception; + pgm_check_table[9] = ÷_exception; + pgm_check_table[0x0A] = &overflow_exception; + pgm_check_table[0x0B] = ÷_exception; + pgm_check_table[0x0C] = &hfp_overflow_exception; + pgm_check_table[0x0D] = &hfp_underflow_exception; + pgm_check_table[0x0E] = &hfp_significance_exception; + pgm_check_table[0x0F] = &hfp_divide_exception; + pgm_check_table[0x10] = &do_dat_exception; + pgm_check_table[0x11] = &do_dat_exception; + pgm_check_table[0x12] = &translation_exception; + pgm_check_table[0x13] = &special_op_exception; +#ifdef CONFIG_64BIT + pgm_check_table[0x38] = &do_asce_exception; + pgm_check_table[0x39] = &do_dat_exception; + pgm_check_table[0x3A] = &do_dat_exception; + pgm_check_table[0x3B] = &do_dat_exception; +#endif /* CONFIG_64BIT */ + pgm_check_table[0x15] = &operand_exception; + pgm_check_table[0x1C] = &space_switch_exception; + pgm_check_table[0x1D] = &hfp_sqrt_exception; + /* Enable machine checks early. */ + local_mcck_enable(); +} diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c new file mode 100644 index 00000000..ea5590fd --- /dev/null +++ b/arch/s390/kernel/vdso.c @@ -0,0 +1,333 @@ +/* + * vdso setup for s390 + * + * Copyright IBM Corp. 2008 + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/errno.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/smp.h> +#include <linux/stddef.h> +#include <linux/unistd.h> +#include <linux/slab.h> +#include <linux/user.h> +#include <linux/elf.h> +#include <linux/security.h> +#include <linux/bootmem.h> +#include <linux/compat.h> +#include <asm/asm-offsets.h> +#include <asm/pgtable.h> +#include <asm/processor.h> +#include <asm/mmu.h> +#include <asm/mmu_context.h> +#include <asm/sections.h> +#include <asm/vdso.h> +#include <asm/facility.h> + +#if defined(CONFIG_32BIT) || defined(CONFIG_COMPAT) +extern char vdso32_start, vdso32_end; +static void *vdso32_kbase = &vdso32_start; +static unsigned int vdso32_pages; +static struct page **vdso32_pagelist; +#endif + +#ifdef CONFIG_64BIT +extern char vdso64_start, vdso64_end; +static void *vdso64_kbase = &vdso64_start; +static unsigned int vdso64_pages; +static struct page **vdso64_pagelist; +#endif /* CONFIG_64BIT */ + +/* + * Should the kernel map a VDSO page into processes and pass its + * address down to glibc upon exec()? + */ +unsigned int __read_mostly vdso_enabled = 1; + +static int __init vdso_setup(char *s) +{ + unsigned long val; + int rc; + + rc = 0; + if (strncmp(s, "on", 3) == 0) + vdso_enabled = 1; + else if (strncmp(s, "off", 4) == 0) + vdso_enabled = 0; + else { + rc = strict_strtoul(s, 0, &val); + vdso_enabled = rc ? 0 : !!val; + } + return !rc; +} +__setup("vdso=", vdso_setup); + +/* + * The vdso data page + */ +static union { + struct vdso_data data; + u8 page[PAGE_SIZE]; +} vdso_data_store __page_aligned_data; +struct vdso_data *vdso_data = &vdso_data_store.data; + +/* + * Setup vdso data page. + */ +static void vdso_init_data(struct vdso_data *vd) +{ + vd->ectg_available = user_mode != HOME_SPACE_MODE && test_facility(31); +} + +#ifdef CONFIG_64BIT +/* + * Allocate/free per cpu vdso data. + */ +#define SEGMENT_ORDER 2 + +int vdso_alloc_per_cpu(struct _lowcore *lowcore) +{ + unsigned long segment_table, page_table, page_frame; + u32 *psal, *aste; + int i; + + lowcore->vdso_per_cpu_data = __LC_PASTE; + + if (user_mode == HOME_SPACE_MODE || !vdso_enabled) + return 0; + + segment_table = __get_free_pages(GFP_KERNEL, SEGMENT_ORDER); + page_table = get_zeroed_page(GFP_KERNEL | GFP_DMA); + page_frame = get_zeroed_page(GFP_KERNEL); + if (!segment_table || !page_table || !page_frame) + goto out; + + clear_table((unsigned long *) segment_table, _SEGMENT_ENTRY_EMPTY, + PAGE_SIZE << SEGMENT_ORDER); + clear_table((unsigned long *) page_table, _PAGE_TYPE_EMPTY, + 256*sizeof(unsigned long)); + + *(unsigned long *) segment_table = _SEGMENT_ENTRY + page_table; + *(unsigned long *) page_table = _PAGE_RO + page_frame; + + psal = (u32 *) (page_table + 256*sizeof(unsigned long)); + aste = psal + 32; + + for (i = 4; i < 32; i += 4) + psal[i] = 0x80000000; + + lowcore->paste[4] = (u32)(addr_t) psal; + psal[0] = 0x20000000; + psal[2] = (u32)(addr_t) aste; + *(unsigned long *) (aste + 2) = segment_table + + _ASCE_TABLE_LENGTH + _ASCE_USER_BITS + _ASCE_TYPE_SEGMENT; + aste[4] = (u32)(addr_t) psal; + lowcore->vdso_per_cpu_data = page_frame; + + return 0; + +out: + free_page(page_frame); + free_page(page_table); + free_pages(segment_table, SEGMENT_ORDER); + return -ENOMEM; +} + +void vdso_free_per_cpu(struct _lowcore *lowcore) +{ + unsigned long segment_table, page_table, page_frame; + u32 *psal, *aste; + + if (user_mode == HOME_SPACE_MODE || !vdso_enabled) + return; + + psal = (u32 *)(addr_t) lowcore->paste[4]; + aste = (u32 *)(addr_t) psal[2]; + segment_table = *(unsigned long *)(aste + 2) & PAGE_MASK; + page_table = *(unsigned long *) segment_table; + page_frame = *(unsigned long *) page_table; + + free_page(page_frame); + free_page(page_table); + free_pages(segment_table, SEGMENT_ORDER); +} + +static void vdso_init_cr5(void) +{ + unsigned long cr5; + + if (user_mode == HOME_SPACE_MODE || !vdso_enabled) + return; + cr5 = offsetof(struct _lowcore, paste); + __ctl_load(cr5, 5, 5); +} +#endif /* CONFIG_64BIT */ + +/* + * This is called from binfmt_elf, we create the special vma for the + * vDSO and insert it into the mm struct tree + */ +int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) +{ + struct mm_struct *mm = current->mm; + struct page **vdso_pagelist; + unsigned long vdso_pages; + unsigned long vdso_base; + int rc; + + if (!vdso_enabled) + return 0; + /* + * Only map the vdso for dynamically linked elf binaries. + */ + if (!uses_interp) + return 0; + +#ifdef CONFIG_64BIT + vdso_pagelist = vdso64_pagelist; + vdso_pages = vdso64_pages; +#ifdef CONFIG_COMPAT + if (is_compat_task()) { + vdso_pagelist = vdso32_pagelist; + vdso_pages = vdso32_pages; + } +#endif +#else + vdso_pagelist = vdso32_pagelist; + vdso_pages = vdso32_pages; +#endif + + /* + * vDSO has a problem and was disabled, just don't "enable" it for + * the process + */ + if (vdso_pages == 0) + return 0; + + current->mm->context.vdso_base = 0; + + /* + * pick a base address for the vDSO in process space. We try to put + * it at vdso_base which is the "natural" base for it, but we might + * fail and end up putting it elsewhere. + */ + down_write(&mm->mmap_sem); + vdso_base = get_unmapped_area(NULL, 0, vdso_pages << PAGE_SHIFT, 0, 0); + if (IS_ERR_VALUE(vdso_base)) { + rc = vdso_base; + goto out_up; + } + + /* + * Put vDSO base into mm struct. We need to do this before calling + * install_special_mapping or the perf counter mmap tracking code + * will fail to recognise it as a vDSO (since arch_vma_name fails). + */ + current->mm->context.vdso_base = vdso_base; + + /* + * our vma flags don't have VM_WRITE so by default, the process + * isn't allowed to write those pages. + * gdb can break that with ptrace interface, and thus trigger COW + * on those pages but it's then your responsibility to never do that + * on the "data" page of the vDSO or you'll stop getting kernel + * updates and your nice userland gettimeofday will be totally dead. + * It's fine to use that for setting breakpoints in the vDSO code + * pages though. + */ + rc = install_special_mapping(mm, vdso_base, vdso_pages << PAGE_SHIFT, + VM_READ|VM_EXEC| + VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, + vdso_pagelist); + if (rc) + current->mm->context.vdso_base = 0; +out_up: + up_write(&mm->mmap_sem); + return rc; +} + +const char *arch_vma_name(struct vm_area_struct *vma) +{ + if (vma->vm_mm && vma->vm_start == vma->vm_mm->context.vdso_base) + return "[vdso]"; + return NULL; +} + +static int __init vdso_init(void) +{ + int i; + + if (!vdso_enabled) + return 0; + vdso_init_data(vdso_data); +#if defined(CONFIG_32BIT) || defined(CONFIG_COMPAT) + /* Calculate the size of the 32 bit vDSO */ + vdso32_pages = ((&vdso32_end - &vdso32_start + + PAGE_SIZE - 1) >> PAGE_SHIFT) + 1; + + /* Make sure pages are in the correct state */ + vdso32_pagelist = kzalloc(sizeof(struct page *) * (vdso32_pages + 1), + GFP_KERNEL); + BUG_ON(vdso32_pagelist == NULL); + for (i = 0; i < vdso32_pages - 1; i++) { + struct page *pg = virt_to_page(vdso32_kbase + i*PAGE_SIZE); + ClearPageReserved(pg); + get_page(pg); + vdso32_pagelist[i] = pg; + } + vdso32_pagelist[vdso32_pages - 1] = virt_to_page(vdso_data); + vdso32_pagelist[vdso32_pages] = NULL; +#endif + +#ifdef CONFIG_64BIT + /* Calculate the size of the 64 bit vDSO */ + vdso64_pages = ((&vdso64_end - &vdso64_start + + PAGE_SIZE - 1) >> PAGE_SHIFT) + 1; + + /* Make sure pages are in the correct state */ + vdso64_pagelist = kzalloc(sizeof(struct page *) * (vdso64_pages + 1), + GFP_KERNEL); + BUG_ON(vdso64_pagelist == NULL); + for (i = 0; i < vdso64_pages - 1; i++) { + struct page *pg = virt_to_page(vdso64_kbase + i*PAGE_SIZE); + ClearPageReserved(pg); + get_page(pg); + vdso64_pagelist[i] = pg; + } + vdso64_pagelist[vdso64_pages - 1] = virt_to_page(vdso_data); + vdso64_pagelist[vdso64_pages] = NULL; + if (vdso_alloc_per_cpu(&S390_lowcore)) + BUG(); + vdso_init_cr5(); +#endif /* CONFIG_64BIT */ + + get_page(virt_to_page(vdso_data)); + + smp_wmb(); + + return 0; +} +early_initcall(vdso_init); + +int in_gate_area_no_mm(unsigned long addr) +{ + return 0; +} + +int in_gate_area(struct mm_struct *mm, unsigned long addr) +{ + return 0; +} + +struct vm_area_struct *get_gate_vma(struct mm_struct *mm) +{ + return NULL; +} diff --git a/arch/s390/kernel/vdso32/Makefile b/arch/s390/kernel/vdso32/Makefile new file mode 100644 index 00000000..8ad2b34a --- /dev/null +++ b/arch/s390/kernel/vdso32/Makefile @@ -0,0 +1,58 @@ +# List of files in the vdso, has to be asm only for now + +obj-vdso32 = gettimeofday.o clock_getres.o clock_gettime.o note.o + +# Build rules + +targets := $(obj-vdso32) vdso32.so vdso32.so.dbg +obj-vdso32 := $(addprefix $(obj)/, $(obj-vdso32)) + +KBUILD_AFLAGS_31 := $(filter-out -m64,$(KBUILD_AFLAGS)) +KBUILD_AFLAGS_31 += -m31 -s + +KBUILD_CFLAGS_31 := $(filter-out -m64,$(KBUILD_CFLAGS)) +KBUILD_CFLAGS_31 += -m31 -fPIC -shared -fno-common -fno-builtin +KBUILD_CFLAGS_31 += -nostdlib -Wl,-soname=linux-vdso32.so.1 \ + $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) + +$(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_31) +$(targets:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_31) + +obj-y += vdso32_wrapper.o +extra-y += vdso32.lds +CPPFLAGS_vdso32.lds += -P -C -U$(ARCH) + +# Disable gcov profiling for VDSO code +GCOV_PROFILE := n + +# Force dependency (incbin is bad) +$(obj)/vdso32_wrapper.o : $(obj)/vdso32.so + +# link rule for the .so file, .lds has to be first +$(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32) + $(call if_changed,vdso32ld) + +# strip rule for the .so file +$(obj)/%.so: OBJCOPYFLAGS := -S +$(obj)/%.so: $(obj)/%.so.dbg FORCE + $(call if_changed,objcopy) + +# assembly rules for the .S files +$(obj-vdso32): %.o: %.S + $(call if_changed_dep,vdso32as) + +# actual build commands +quiet_cmd_vdso32ld = VDSO32L $@ + cmd_vdso32ld = $(CC) $(c_flags) -Wl,-T $^ -o $@ +quiet_cmd_vdso32as = VDSO32A $@ + cmd_vdso32as = $(CC) $(a_flags) -c -o $@ $< + +# install commands for the unstripped file +quiet_cmd_vdso_install = INSTALL $@ + cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@ + +vdso32.so: $(obj)/vdso32.so.dbg + @mkdir -p $(MODLIB)/vdso + $(call cmd,vdso_install) + +vdso_install: vdso32.so diff --git a/arch/s390/kernel/vdso32/clock_getres.S b/arch/s390/kernel/vdso32/clock_getres.S new file mode 100644 index 00000000..36aaa25d --- /dev/null +++ b/arch/s390/kernel/vdso32/clock_getres.S @@ -0,0 +1,39 @@ +/* + * Userland implementation of clock_getres() for 32 bits processes in a + * s390 kernel for use in the vDSO + * + * Copyright IBM Corp. 2008 + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + */ +#include <asm/vdso.h> +#include <asm/asm-offsets.h> +#include <asm/unistd.h> + + .text + .align 4 + .globl __kernel_clock_getres + .type __kernel_clock_getres,@function +__kernel_clock_getres: + .cfi_startproc + chi %r2,__CLOCK_REALTIME + je 0f + chi %r2,__CLOCK_MONOTONIC + jne 3f +0: ltr %r3,%r3 + jz 2f /* res == NULL */ + basr %r1,0 +1: l %r0,4f-1b(%r1) + xc 0(4,%r3),0(%r3) /* set tp->tv_sec to zero */ + st %r0,4(%r3) /* store tp->tv_usec */ +2: lhi %r2,0 + br %r14 +3: lhi %r1,__NR_clock_getres /* fallback to svc */ + svc 0 + br %r14 +4: .long __CLOCK_REALTIME_RES + .cfi_endproc + .size __kernel_clock_getres,.-__kernel_clock_getres diff --git a/arch/s390/kernel/vdso32/clock_gettime.S b/arch/s390/kernel/vdso32/clock_gettime.S new file mode 100644 index 00000000..b2224e0b --- /dev/null +++ b/arch/s390/kernel/vdso32/clock_gettime.S @@ -0,0 +1,128 @@ +/* + * Userland implementation of clock_gettime() for 32 bits processes in a + * s390 kernel for use in the vDSO + * + * Copyright IBM Corp. 2008 + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + */ +#include <asm/vdso.h> +#include <asm/asm-offsets.h> +#include <asm/unistd.h> + + .text + .align 4 + .globl __kernel_clock_gettime + .type __kernel_clock_gettime,@function +__kernel_clock_gettime: + .cfi_startproc + basr %r5,0 +0: al %r5,21f-0b(%r5) /* get &_vdso_data */ + chi %r2,__CLOCK_REALTIME + je 10f + chi %r2,__CLOCK_MONOTONIC + jne 19f + + /* CLOCK_MONOTONIC */ + ltr %r3,%r3 + jz 9f /* tp == NULL */ +1: l %r4,__VDSO_UPD_COUNT+4(%r5) /* load update counter */ + tml %r4,0x0001 /* pending update ? loop */ + jnz 1b + stck 24(%r15) /* Store TOD clock */ + lm %r0,%r1,24(%r15) + s %r0,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */ + sl %r1,__VDSO_XTIME_STAMP+4(%r5) + brc 3,2f + ahi %r0,-1 +2: ms %r0,__VDSO_NTP_MULT(%r5) /* cyc2ns(clock,cycle_delta) */ + lr %r2,%r0 + l %r0,__VDSO_NTP_MULT(%r5) + ltr %r1,%r1 + mr %r0,%r0 + jnm 3f + a %r0,__VDSO_NTP_MULT(%r5) +3: alr %r0,%r2 + srdl %r0,12 + al %r0,__VDSO_XTIME_NSEC(%r5) /* + xtime */ + al %r1,__VDSO_XTIME_NSEC+4(%r5) + brc 12,4f + ahi %r0,1 +4: l %r2,__VDSO_XTIME_SEC+4(%r5) + al %r0,__VDSO_WTOM_NSEC(%r5) /* + wall_to_monotonic */ + al %r1,__VDSO_WTOM_NSEC+4(%r5) + brc 12,5f + ahi %r0,1 +5: al %r2,__VDSO_WTOM_SEC+4(%r5) + cl %r4,__VDSO_UPD_COUNT+4(%r5) /* check update counter */ + jne 1b + basr %r5,0 +6: ltr %r0,%r0 + jnz 7f + cl %r1,20f-6b(%r5) + jl 8f +7: ahi %r2,1 + sl %r1,20f-6b(%r5) + brc 3,6b + ahi %r0,-1 + j 6b +8: st %r2,0(%r3) /* store tp->tv_sec */ + st %r1,4(%r3) /* store tp->tv_nsec */ +9: lhi %r2,0 + br %r14 + + /* CLOCK_REALTIME */ +10: ltr %r3,%r3 /* tp == NULL */ + jz 18f +11: l %r4,__VDSO_UPD_COUNT+4(%r5) /* load update counter */ + tml %r4,0x0001 /* pending update ? loop */ + jnz 11b + stck 24(%r15) /* Store TOD clock */ + lm %r0,%r1,24(%r15) + s %r0,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */ + sl %r1,__VDSO_XTIME_STAMP+4(%r5) + brc 3,12f + ahi %r0,-1 +12: ms %r0,__VDSO_NTP_MULT(%r5) /* cyc2ns(clock,cycle_delta) */ + lr %r2,%r0 + l %r0,__VDSO_NTP_MULT(%r5) + ltr %r1,%r1 + mr %r0,%r0 + jnm 13f + a %r0,__VDSO_NTP_MULT(%r5) +13: alr %r0,%r2 + srdl %r0,12 + al %r0,__VDSO_XTIME_NSEC(%r5) /* + xtime */ + al %r1,__VDSO_XTIME_NSEC+4(%r5) + brc 12,14f + ahi %r0,1 +14: l %r2,__VDSO_XTIME_SEC+4(%r5) + cl %r4,__VDSO_UPD_COUNT+4(%r5) /* check update counter */ + jne 11b + basr %r5,0 +15: ltr %r0,%r0 + jnz 16f + cl %r1,20f-15b(%r5) + jl 17f +16: ahi %r2,1 + sl %r1,20f-15b(%r5) + brc 3,15b + ahi %r0,-1 + j 15b +17: st %r2,0(%r3) /* store tp->tv_sec */ + st %r1,4(%r3) /* store tp->tv_nsec */ +18: lhi %r2,0 + br %r14 + + /* Fallback to system call */ +19: lhi %r1,__NR_clock_gettime + svc 0 + br %r14 + +20: .long 1000000000 +21: .long _vdso_data - 0b + .cfi_endproc + .size __kernel_clock_gettime,.-__kernel_clock_gettime diff --git a/arch/s390/kernel/vdso32/gettimeofday.S b/arch/s390/kernel/vdso32/gettimeofday.S new file mode 100644 index 00000000..2d363317 --- /dev/null +++ b/arch/s390/kernel/vdso32/gettimeofday.S @@ -0,0 +1,78 @@ +/* + * Userland implementation of gettimeofday() for 32 bits processes in a + * s390 kernel for use in the vDSO + * + * Copyright IBM Corp. 2008 + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + */ +#include <asm/vdso.h> +#include <asm/asm-offsets.h> +#include <asm/unistd.h> + + .text + .align 4 + .globl __kernel_gettimeofday + .type __kernel_gettimeofday,@function +__kernel_gettimeofday: + .cfi_startproc + basr %r5,0 +0: al %r5,13f-0b(%r5) /* get &_vdso_data */ +1: ltr %r3,%r3 /* check if tz is NULL */ + je 2f + mvc 0(8,%r3),__VDSO_TIMEZONE(%r5) +2: ltr %r2,%r2 /* check if tv is NULL */ + je 10f + l %r4,__VDSO_UPD_COUNT+4(%r5) /* load update counter */ + tml %r4,0x0001 /* pending update ? loop */ + jnz 1b + stck 24(%r15) /* Store TOD clock */ + lm %r0,%r1,24(%r15) + s %r0,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */ + sl %r1,__VDSO_XTIME_STAMP+4(%r5) + brc 3,3f + ahi %r0,-1 +3: ms %r0,__VDSO_NTP_MULT(%r5) /* cyc2ns(clock,cycle_delta) */ + st %r0,24(%r15) + l %r0,__VDSO_NTP_MULT(%r5) + ltr %r1,%r1 + mr %r0,%r0 + jnm 4f + a %r0,__VDSO_NTP_MULT(%r5) +4: al %r0,24(%r15) + srdl %r0,12 + al %r0,__VDSO_XTIME_NSEC(%r5) /* + xtime */ + al %r1,__VDSO_XTIME_NSEC+4(%r5) + brc 12,5f + ahi %r0,1 +5: mvc 24(4,%r15),__VDSO_XTIME_SEC+4(%r5) + cl %r4,__VDSO_UPD_COUNT+4(%r5) /* check update counter */ + jne 1b + l %r4,24(%r15) /* get tv_sec from stack */ + basr %r5,0 +6: ltr %r0,%r0 + jnz 7f + cl %r1,11f-6b(%r5) + jl 8f +7: ahi %r4,1 + sl %r1,11f-6b(%r5) + brc 3,6b + ahi %r0,-1 + j 6b +8: st %r4,0(%r2) /* store tv->tv_sec */ + ltr %r1,%r1 + m %r0,12f-6b(%r5) + jnm 9f + al %r0,12f-6b(%r5) +9: srl %r0,6 + st %r0,4(%r2) /* store tv->tv_usec */ +10: slr %r2,%r2 + br %r14 +11: .long 1000000000 +12: .long 274877907 +13: .long _vdso_data - 0b + .cfi_endproc + .size __kernel_gettimeofday,.-__kernel_gettimeofday diff --git a/arch/s390/kernel/vdso32/note.S b/arch/s390/kernel/vdso32/note.S new file mode 100644 index 00000000..79a071e4 --- /dev/null +++ b/arch/s390/kernel/vdso32/note.S @@ -0,0 +1,12 @@ +/* + * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text. + * Here we can supply some information useful to userland. + */ + +#include <linux/uts.h> +#include <linux/version.h> +#include <linux/elfnote.h> + +ELFNOTE_START(Linux, 0, "a") + .long LINUX_VERSION_CODE +ELFNOTE_END diff --git a/arch/s390/kernel/vdso32/vdso32.lds.S b/arch/s390/kernel/vdso32/vdso32.lds.S new file mode 100644 index 00000000..a8c379fa --- /dev/null +++ b/arch/s390/kernel/vdso32/vdso32.lds.S @@ -0,0 +1,138 @@ +/* + * This is the infamous ld script for the 32 bits vdso + * library + */ +#include <asm/vdso.h> + +OUTPUT_FORMAT("elf32-s390", "elf32-s390", "elf32-s390") +OUTPUT_ARCH(s390:31-bit) +ENTRY(_start) + +SECTIONS +{ + . = VDSO32_LBASE + SIZEOF_HEADERS; + + .hash : { *(.hash) } :text + .gnu.hash : { *(.gnu.hash) } + .dynsym : { *(.dynsym) } + .dynstr : { *(.dynstr) } + .gnu.version : { *(.gnu.version) } + .gnu.version_d : { *(.gnu.version_d) } + .gnu.version_r : { *(.gnu.version_r) } + + .note : { *(.note.*) } :text :note + + . = ALIGN(16); + .text : { + *(.text .stub .text.* .gnu.linkonce.t.*) + } :text + PROVIDE(__etext = .); + PROVIDE(_etext = .); + PROVIDE(etext = .); + + /* + * Other stuff is appended to the text segment: + */ + .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) } + .rodata1 : { *(.rodata1) } + + .dynamic : { *(.dynamic) } :text :dynamic + + .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr + .eh_frame : { KEEP (*(.eh_frame)) } :text + .gcc_except_table : { *(.gcc_except_table .gcc_except_table.*) } + + .rela.dyn ALIGN(8) : { *(.rela.dyn) } + .got ALIGN(8) : { *(.got .toc) } + + _end = .; + PROVIDE(end = .); + + /* + * Stabs debugging sections are here too. + */ + .stab 0 : { *(.stab) } + .stabstr 0 : { *(.stabstr) } + .stab.excl 0 : { *(.stab.excl) } + .stab.exclstr 0 : { *(.stab.exclstr) } + .stab.index 0 : { *(.stab.index) } + .stab.indexstr 0 : { *(.stab.indexstr) } + .comment 0 : { *(.comment) } + + /* + * DWARF debug sections. + * Symbols in the DWARF debugging sections are relative to the + * beginning of the section so we begin them at 0. + */ + /* DWARF 1 */ + .debug 0 : { *(.debug) } + .line 0 : { *(.line) } + /* GNU DWARF 1 extensions */ + .debug_srcinfo 0 : { *(.debug_srcinfo) } + .debug_sfnames 0 : { *(.debug_sfnames) } + /* DWARF 1.1 and DWARF 2 */ + .debug_aranges 0 : { *(.debug_aranges) } + .debug_pubnames 0 : { *(.debug_pubnames) } + /* DWARF 2 */ + .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) } + .debug_abbrev 0 : { *(.debug_abbrev) } + .debug_line 0 : { *(.debug_line) } + .debug_frame 0 : { *(.debug_frame) } + .debug_str 0 : { *(.debug_str) } + .debug_loc 0 : { *(.debug_loc) } + .debug_macinfo 0 : { *(.debug_macinfo) } + /* SGI/MIPS DWARF 2 extensions */ + .debug_weaknames 0 : { *(.debug_weaknames) } + .debug_funcnames 0 : { *(.debug_funcnames) } + .debug_typenames 0 : { *(.debug_typenames) } + .debug_varnames 0 : { *(.debug_varnames) } + /* DWARF 3 */ + .debug_pubtypes 0 : { *(.debug_pubtypes) } + .debug_ranges 0 : { *(.debug_ranges) } + .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) } + + . = ALIGN(4096); + PROVIDE(_vdso_data = .); + + /DISCARD/ : { + *(.note.GNU-stack) + *(.branch_lt) + *(.data .data.* .gnu.linkonce.d.* .sdata*) + *(.bss .sbss .dynbss .dynsbss) + } +} + +/* + * Very old versions of ld do not recognize this name token; use the constant. + */ +#define PT_GNU_EH_FRAME 0x6474e550 + +/* + * We must supply the ELF program headers explicitly to get just one + * PT_LOAD segment, and set the flags explicitly to make segments read-only. + */ +PHDRS +{ + text PT_LOAD FILEHDR PHDRS FLAGS(5); /* PF_R|PF_X */ + dynamic PT_DYNAMIC FLAGS(4); /* PF_R */ + note PT_NOTE FLAGS(4); /* PF_R */ + eh_frame_hdr PT_GNU_EH_FRAME; +} + +/* + * This controls what symbols we export from the DSO. + */ +VERSION +{ + VDSO_VERSION_STRING { + global: + /* + * Has to be there for the kernel to find + */ + __kernel_gettimeofday; + __kernel_clock_gettime; + __kernel_clock_getres; + + local: *; + }; +} diff --git a/arch/s390/kernel/vdso32/vdso32_wrapper.S b/arch/s390/kernel/vdso32/vdso32_wrapper.S new file mode 100644 index 00000000..ae42f8ce --- /dev/null +++ b/arch/s390/kernel/vdso32/vdso32_wrapper.S @@ -0,0 +1,14 @@ +#include <linux/init.h> +#include <linux/linkage.h> +#include <asm/page.h> + + __PAGE_ALIGNED_DATA + + .globl vdso32_start, vdso32_end + .balign PAGE_SIZE +vdso32_start: + .incbin "arch/s390/kernel/vdso32/vdso32.so" + .balign PAGE_SIZE +vdso32_end: + + .previous diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile new file mode 100644 index 00000000..2a8ddfd1 --- /dev/null +++ b/arch/s390/kernel/vdso64/Makefile @@ -0,0 +1,58 @@ +# List of files in the vdso, has to be asm only for now + +obj-vdso64 = gettimeofday.o clock_getres.o clock_gettime.o note.o + +# Build rules + +targets := $(obj-vdso64) vdso64.so vdso64.so.dbg +obj-vdso64 := $(addprefix $(obj)/, $(obj-vdso64)) + +KBUILD_AFLAGS_64 := $(filter-out -m64,$(KBUILD_AFLAGS)) +KBUILD_AFLAGS_64 += -m64 -s + +KBUILD_CFLAGS_64 := $(filter-out -m64,$(KBUILD_CFLAGS)) +KBUILD_CFLAGS_64 += -m64 -fPIC -shared -fno-common -fno-builtin +KBUILD_CFLAGS_64 += -nostdlib -Wl,-soname=linux-vdso64.so.1 \ + $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) + +$(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_64) +$(targets:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_64) + +obj-y += vdso64_wrapper.o +extra-y += vdso64.lds +CPPFLAGS_vdso64.lds += -P -C -U$(ARCH) + +# Disable gcov profiling for VDSO code +GCOV_PROFILE := n + +# Force dependency (incbin is bad) +$(obj)/vdso64_wrapper.o : $(obj)/vdso64.so + +# link rule for the .so file, .lds has to be first +$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64) + $(call if_changed,vdso64ld) + +# strip rule for the .so file +$(obj)/%.so: OBJCOPYFLAGS := -S +$(obj)/%.so: $(obj)/%.so.dbg FORCE + $(call if_changed,objcopy) + +# assembly rules for the .S files +$(obj-vdso64): %.o: %.S + $(call if_changed_dep,vdso64as) + +# actual build commands +quiet_cmd_vdso64ld = VDSO64L $@ + cmd_vdso64ld = $(CC) $(c_flags) -Wl,-T $^ -o $@ +quiet_cmd_vdso64as = VDSO64A $@ + cmd_vdso64as = $(CC) $(a_flags) -c -o $@ $< + +# install commands for the unstripped file +quiet_cmd_vdso_install = INSTALL $@ + cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@ + +vdso64.so: $(obj)/vdso64.so.dbg + @mkdir -p $(MODLIB)/vdso + $(call cmd,vdso_install) + +vdso_install: vdso64.so diff --git a/arch/s390/kernel/vdso64/clock_getres.S b/arch/s390/kernel/vdso64/clock_getres.S new file mode 100644 index 00000000..176e1f75 --- /dev/null +++ b/arch/s390/kernel/vdso64/clock_getres.S @@ -0,0 +1,44 @@ +/* + * Userland implementation of clock_getres() for 64 bits processes in a + * s390 kernel for use in the vDSO + * + * Copyright IBM Corp. 2008 + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + */ +#include <asm/vdso.h> +#include <asm/asm-offsets.h> +#include <asm/unistd.h> + + .text + .align 4 + .globl __kernel_clock_getres + .type __kernel_clock_getres,@function +__kernel_clock_getres: + .cfi_startproc + cghi %r2,__CLOCK_REALTIME + je 0f + cghi %r2,__CLOCK_MONOTONIC + je 0f + cghi %r2,-2 /* CLOCK_THREAD_CPUTIME_ID for this thread */ + jne 2f + larl %r5,_vdso_data + icm %r0,15,__LC_ECTG_OK(%r5) + jz 2f +0: ltgr %r3,%r3 + jz 1f /* res == NULL */ + larl %r1,3f + lg %r0,0(%r1) + xc 0(8,%r3),0(%r3) /* set tp->tv_sec to zero */ + stg %r0,8(%r3) /* store tp->tv_usec */ +1: lghi %r2,0 + br %r14 +2: lghi %r1,__NR_clock_getres /* fallback to svc */ + svc 0 + br %r14 +3: .quad __CLOCK_REALTIME_RES + .cfi_endproc + .size __kernel_clock_getres,.-__kernel_clock_getres diff --git a/arch/s390/kernel/vdso64/clock_gettime.S b/arch/s390/kernel/vdso64/clock_gettime.S new file mode 100644 index 00000000..d46c95ed --- /dev/null +++ b/arch/s390/kernel/vdso64/clock_gettime.S @@ -0,0 +1,125 @@ +/* + * Userland implementation of clock_gettime() for 64 bits processes in a + * s390 kernel for use in the vDSO + * + * Copyright IBM Corp. 2008 + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + */ +#include <asm/vdso.h> +#include <asm/asm-offsets.h> +#include <asm/unistd.h> + + .text + .align 4 + .globl __kernel_clock_gettime + .type __kernel_clock_gettime,@function +__kernel_clock_gettime: + .cfi_startproc + larl %r5,_vdso_data + cghi %r2,__CLOCK_REALTIME + je 4f + cghi %r2,-2 /* CLOCK_THREAD_CPUTIME_ID for this thread */ + je 9f + cghi %r2,__CLOCK_MONOTONIC + jne 12f + + /* CLOCK_MONOTONIC */ + ltgr %r3,%r3 + jz 3f /* tp == NULL */ +0: lg %r4,__VDSO_UPD_COUNT(%r5) /* load update counter */ + tmll %r4,0x0001 /* pending update ? loop */ + jnz 0b + stck 48(%r15) /* Store TOD clock */ + lg %r1,48(%r15) + sg %r1,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */ + msgf %r1,__VDSO_NTP_MULT(%r5) /* * NTP adjustment */ + srlg %r1,%r1,12 /* cyc2ns(clock,cycle_delta) */ + alg %r1,__VDSO_XTIME_NSEC(%r5) /* + xtime */ + lg %r0,__VDSO_XTIME_SEC(%r5) + alg %r1,__VDSO_WTOM_NSEC(%r5) /* + wall_to_monotonic */ + alg %r0,__VDSO_WTOM_SEC(%r5) + clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */ + jne 0b + larl %r5,13f +1: clg %r1,0(%r5) + jl 2f + slg %r1,0(%r5) + aghi %r0,1 + j 1b +2: stg %r0,0(%r3) /* store tp->tv_sec */ + stg %r1,8(%r3) /* store tp->tv_nsec */ +3: lghi %r2,0 + br %r14 + + /* CLOCK_REALTIME */ +4: ltr %r3,%r3 /* tp == NULL */ + jz 8f +5: lg %r4,__VDSO_UPD_COUNT(%r5) /* load update counter */ + tmll %r4,0x0001 /* pending update ? loop */ + jnz 5b + stck 48(%r15) /* Store TOD clock */ + lg %r1,48(%r15) + sg %r1,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */ + msgf %r1,__VDSO_NTP_MULT(%r5) /* * NTP adjustment */ + srlg %r1,%r1,12 /* cyc2ns(clock,cycle_delta) */ + alg %r1,__VDSO_XTIME_NSEC(%r5) /* + xtime */ + lg %r0,__VDSO_XTIME_SEC(%r5) + clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */ + jne 5b + larl %r5,13f +6: clg %r1,0(%r5) + jl 7f + slg %r1,0(%r5) + aghi %r0,1 + j 6b +7: stg %r0,0(%r3) /* store tp->tv_sec */ + stg %r1,8(%r3) /* store tp->tv_nsec */ +8: lghi %r2,0 + br %r14 + + /* CLOCK_THREAD_CPUTIME_ID for this thread */ +9: icm %r0,15,__VDSO_ECTG_OK(%r5) + jz 12f + ear %r2,%a4 + llilh %r4,0x0100 + sar %a4,%r4 + lghi %r4,0 + epsw %r5,0 + sacf 512 /* Magic ectg instruction */ + .insn ssf,0xc80100000000,__VDSO_ECTG_BASE(4),__VDSO_ECTG_USER(4),4 + tml %r5,0x4000 + jo 11f + tml %r5,0x8000 + jno 10f + sacf 256 + j 11f +10: sacf 0 +11: sar %a4,%r2 + algr %r1,%r0 /* r1 = cputime as TOD value */ + mghi %r1,1000 /* convert to nanoseconds */ + srlg %r1,%r1,12 /* r1 = cputime in nanosec */ + lgr %r4,%r1 + larl %r5,13f + srlg %r1,%r1,9 /* divide by 1000000000 */ + mlg %r0,8(%r5) + srlg %r0,%r0,11 /* r0 = tv_sec */ + stg %r0,0(%r3) + msg %r0,0(%r5) /* calculate tv_nsec */ + slgr %r4,%r0 /* r4 = tv_nsec */ + stg %r4,8(%r3) + lghi %r2,0 + br %r14 + + /* Fallback to system call */ +12: lghi %r1,__NR_clock_gettime + svc 0 + br %r14 + +13: .quad 1000000000 +14: .quad 19342813113834067 + .cfi_endproc + .size __kernel_clock_gettime,.-__kernel_clock_gettime diff --git a/arch/s390/kernel/vdso64/gettimeofday.S b/arch/s390/kernel/vdso64/gettimeofday.S new file mode 100644 index 00000000..36ee6747 --- /dev/null +++ b/arch/s390/kernel/vdso64/gettimeofday.S @@ -0,0 +1,56 @@ +/* + * Userland implementation of gettimeofday() for 64 bits processes in a + * s390 kernel for use in the vDSO + * + * Copyright IBM Corp. 2008 + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + */ +#include <asm/vdso.h> +#include <asm/asm-offsets.h> +#include <asm/unistd.h> + + .text + .align 4 + .globl __kernel_gettimeofday + .type __kernel_gettimeofday,@function +__kernel_gettimeofday: + .cfi_startproc + larl %r5,_vdso_data +0: ltgr %r3,%r3 /* check if tz is NULL */ + je 1f + mvc 0(8,%r3),__VDSO_TIMEZONE(%r5) +1: ltgr %r2,%r2 /* check if tv is NULL */ + je 4f + lg %r4,__VDSO_UPD_COUNT(%r5) /* load update counter */ + tmll %r4,0x0001 /* pending update ? loop */ + jnz 0b + stck 48(%r15) /* Store TOD clock */ + lg %r1,48(%r15) + sg %r1,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */ + msgf %r1,__VDSO_NTP_MULT(%r5) /* * NTP adjustment */ + srlg %r1,%r1,12 /* cyc2ns(clock,cycle_delta) */ + alg %r1,__VDSO_XTIME_NSEC(%r5) /* + xtime.tv_nsec */ + lg %r0,__VDSO_XTIME_SEC(%r5) /* xtime.tv_sec */ + clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */ + jne 0b + larl %r5,5f +2: clg %r1,0(%r5) + jl 3f + slg %r1,0(%r5) + aghi %r0,1 + j 2b +3: stg %r0,0(%r2) /* store tv->tv_sec */ + slgr %r0,%r0 /* tv_nsec -> tv_usec */ + ml %r0,8(%r5) + srlg %r0,%r0,6 + stg %r0,8(%r2) /* store tv->tv_usec */ +4: lghi %r2,0 + br %r14 +5: .quad 1000000000 + .long 274877907 + .cfi_endproc + .size __kernel_gettimeofday,.-__kernel_gettimeofday diff --git a/arch/s390/kernel/vdso64/note.S b/arch/s390/kernel/vdso64/note.S new file mode 100644 index 00000000..79a071e4 --- /dev/null +++ b/arch/s390/kernel/vdso64/note.S @@ -0,0 +1,12 @@ +/* + * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text. + * Here we can supply some information useful to userland. + */ + +#include <linux/uts.h> +#include <linux/version.h> +#include <linux/elfnote.h> + +ELFNOTE_START(Linux, 0, "a") + .long LINUX_VERSION_CODE +ELFNOTE_END diff --git a/arch/s390/kernel/vdso64/vdso64.lds.S b/arch/s390/kernel/vdso64/vdso64.lds.S new file mode 100644 index 00000000..9f5979d1 --- /dev/null +++ b/arch/s390/kernel/vdso64/vdso64.lds.S @@ -0,0 +1,138 @@ +/* + * This is the infamous ld script for the 64 bits vdso + * library + */ +#include <asm/vdso.h> + +OUTPUT_FORMAT("elf64-s390", "elf64-s390", "elf64-s390") +OUTPUT_ARCH(s390:64-bit) +ENTRY(_start) + +SECTIONS +{ + . = VDSO64_LBASE + SIZEOF_HEADERS; + + .hash : { *(.hash) } :text + .gnu.hash : { *(.gnu.hash) } + .dynsym : { *(.dynsym) } + .dynstr : { *(.dynstr) } + .gnu.version : { *(.gnu.version) } + .gnu.version_d : { *(.gnu.version_d) } + .gnu.version_r : { *(.gnu.version_r) } + + .note : { *(.note.*) } :text :note + + . = ALIGN(16); + .text : { + *(.text .stub .text.* .gnu.linkonce.t.*) + } :text + PROVIDE(__etext = .); + PROVIDE(_etext = .); + PROVIDE(etext = .); + + /* + * Other stuff is appended to the text segment: + */ + .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) } + .rodata1 : { *(.rodata1) } + + .dynamic : { *(.dynamic) } :text :dynamic + + .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr + .eh_frame : { KEEP (*(.eh_frame)) } :text + .gcc_except_table : { *(.gcc_except_table .gcc_except_table.*) } + + .rela.dyn ALIGN(8) : { *(.rela.dyn) } + .got ALIGN(8) : { *(.got .toc) } + + _end = .; + PROVIDE(end = .); + + /* + * Stabs debugging sections are here too. + */ + .stab 0 : { *(.stab) } + .stabstr 0 : { *(.stabstr) } + .stab.excl 0 : { *(.stab.excl) } + .stab.exclstr 0 : { *(.stab.exclstr) } + .stab.index 0 : { *(.stab.index) } + .stab.indexstr 0 : { *(.stab.indexstr) } + .comment 0 : { *(.comment) } + + /* + * DWARF debug sections. + * Symbols in the DWARF debugging sections are relative to the + * beginning of the section so we begin them at 0. + */ + /* DWARF 1 */ + .debug 0 : { *(.debug) } + .line 0 : { *(.line) } + /* GNU DWARF 1 extensions */ + .debug_srcinfo 0 : { *(.debug_srcinfo) } + .debug_sfnames 0 : { *(.debug_sfnames) } + /* DWARF 1.1 and DWARF 2 */ + .debug_aranges 0 : { *(.debug_aranges) } + .debug_pubnames 0 : { *(.debug_pubnames) } + /* DWARF 2 */ + .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) } + .debug_abbrev 0 : { *(.debug_abbrev) } + .debug_line 0 : { *(.debug_line) } + .debug_frame 0 : { *(.debug_frame) } + .debug_str 0 : { *(.debug_str) } + .debug_loc 0 : { *(.debug_loc) } + .debug_macinfo 0 : { *(.debug_macinfo) } + /* SGI/MIPS DWARF 2 extensions */ + .debug_weaknames 0 : { *(.debug_weaknames) } + .debug_funcnames 0 : { *(.debug_funcnames) } + .debug_typenames 0 : { *(.debug_typenames) } + .debug_varnames 0 : { *(.debug_varnames) } + /* DWARF 3 */ + .debug_pubtypes 0 : { *(.debug_pubtypes) } + .debug_ranges 0 : { *(.debug_ranges) } + .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) } + + . = ALIGN(4096); + PROVIDE(_vdso_data = .); + + /DISCARD/ : { + *(.note.GNU-stack) + *(.branch_lt) + *(.data .data.* .gnu.linkonce.d.* .sdata*) + *(.bss .sbss .dynbss .dynsbss) + } +} + +/* + * Very old versions of ld do not recognize this name token; use the constant. + */ +#define PT_GNU_EH_FRAME 0x6474e550 + +/* + * We must supply the ELF program headers explicitly to get just one + * PT_LOAD segment, and set the flags explicitly to make segments read-only. + */ +PHDRS +{ + text PT_LOAD FILEHDR PHDRS FLAGS(5); /* PF_R|PF_X */ + dynamic PT_DYNAMIC FLAGS(4); /* PF_R */ + note PT_NOTE FLAGS(4); /* PF_R */ + eh_frame_hdr PT_GNU_EH_FRAME; +} + +/* + * This controls what symbols we export from the DSO. + */ +VERSION +{ + VDSO_VERSION_STRING { + global: + /* + * Has to be there for the kernel to find + */ + __kernel_gettimeofday; + __kernel_clock_gettime; + __kernel_clock_getres; + + local: *; + }; +} diff --git a/arch/s390/kernel/vdso64/vdso64_wrapper.S b/arch/s390/kernel/vdso64/vdso64_wrapper.S new file mode 100644 index 00000000..c245842b --- /dev/null +++ b/arch/s390/kernel/vdso64/vdso64_wrapper.S @@ -0,0 +1,14 @@ +#include <linux/init.h> +#include <linux/linkage.h> +#include <asm/page.h> + + __PAGE_ALIGNED_DATA + + .globl vdso64_start, vdso64_end + .balign PAGE_SIZE +vdso64_start: + .incbin "arch/s390/kernel/vdso64/vdso64.so" + .balign PAGE_SIZE +vdso64_end: + + .previous diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S new file mode 100644 index 00000000..21109c63 --- /dev/null +++ b/arch/s390/kernel/vmlinux.lds.S @@ -0,0 +1,96 @@ +/* ld script to make s390 Linux kernel + * Written by Martin Schwidefsky (schwidefsky@de.ibm.com) + */ + +#include <asm/thread_info.h> +#include <asm/page.h> +#include <asm-generic/vmlinux.lds.h> + +#ifndef CONFIG_64BIT +OUTPUT_FORMAT("elf32-s390", "elf32-s390", "elf32-s390") +OUTPUT_ARCH(s390) +ENTRY(startup) +jiffies = jiffies_64 + 4; +#else +OUTPUT_FORMAT("elf64-s390", "elf64-s390", "elf64-s390") +OUTPUT_ARCH(s390:64-bit) +ENTRY(startup) +jiffies = jiffies_64; +#endif + +PHDRS { + text PT_LOAD FLAGS(5); /* R_E */ + data PT_LOAD FLAGS(7); /* RWE */ + note PT_NOTE FLAGS(0); /* ___ */ +} + +SECTIONS +{ + . = 0x00000000; + .text : { + _text = .; /* Text and read-only data */ + HEAD_TEXT + TEXT_TEXT + SCHED_TEXT + LOCK_TEXT + KPROBES_TEXT + IRQENTRY_TEXT + *(.fixup) + *(.gnu.warning) + } :text = 0x0700 + + _etext = .; /* End of text section */ + + NOTES :text :note + + .dummy : { *(.dummy) } :data + + RODATA + +#ifdef CONFIG_SHARED_KERNEL + . = ALIGN(0x100000); /* VM shared segments are 1MB aligned */ +#endif + + . = ALIGN(PAGE_SIZE); + _eshared = .; /* End of shareable data */ + _sdata = .; /* Start of data section */ + + EXCEPTION_TABLE(16) :data + + RW_DATA_SECTION(0x100, PAGE_SIZE, THREAD_SIZE) + + _edata = .; /* End of data section */ + + /* will be freed after init */ + . = ALIGN(PAGE_SIZE); /* Init code and data */ + __init_begin = .; + + INIT_TEXT_SECTION(PAGE_SIZE) + + /* + * .exit.text is discarded at runtime, not link time, + * to deal with references from __bug_table + */ + .exit.text : { + EXIT_TEXT + } + + /* early.c uses stsi, which requires page aligned data. */ + . = ALIGN(PAGE_SIZE); + INIT_DATA_SECTION(0x100) + + PERCPU_SECTION(0x100) + . = ALIGN(PAGE_SIZE); + __init_end = .; /* freed after init ends here */ + + BSS_SECTION(0, 2, 0) + + _end = . ; + + /* Debugging sections. */ + STABS_DEBUG + DWARF_DEBUG + + /* Sections to be discarded */ + DISCARDS +} diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c new file mode 100644 index 00000000..39ebff50 --- /dev/null +++ b/arch/s390/kernel/vtime.c @@ -0,0 +1,504 @@ +/* + * arch/s390/kernel/vtime.c + * Virtual cpu timer based timer functions. + * + * S390 version + * Copyright (C) 2004 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Author(s): Jan Glauber <jan.glauber@de.ibm.com> + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/time.h> +#include <linux/delay.h> +#include <linux/init.h> +#include <linux/smp.h> +#include <linux/types.h> +#include <linux/timex.h> +#include <linux/notifier.h> +#include <linux/kernel_stat.h> +#include <linux/rcupdate.h> +#include <linux/posix-timers.h> +#include <linux/cpu.h> +#include <linux/kprobes.h> + +#include <asm/timer.h> +#include <asm/irq_regs.h> +#include <asm/cputime.h> +#include <asm/irq.h> +#include "entry.h" + +static DEFINE_PER_CPU(struct vtimer_queue, virt_cpu_timer); + +DEFINE_PER_CPU(struct s390_idle_data, s390_idle); + +static inline __u64 get_vtimer(void) +{ + __u64 timer; + + asm volatile("STPT %0" : "=m" (timer)); + return timer; +} + +static inline void set_vtimer(__u64 expires) +{ + __u64 timer; + + asm volatile (" STPT %0\n" /* Store current cpu timer value */ + " SPT %1" /* Set new value immediately afterwards */ + : "=m" (timer) : "m" (expires) ); + S390_lowcore.system_timer += S390_lowcore.last_update_timer - timer; + S390_lowcore.last_update_timer = expires; +} + +/* + * Update process times based on virtual cpu times stored by entry.S + * to the lowcore fields user_timer, system_timer & steal_clock. + */ +static void do_account_vtime(struct task_struct *tsk, int hardirq_offset) +{ + struct thread_info *ti = task_thread_info(tsk); + __u64 timer, clock, user, system, steal; + + timer = S390_lowcore.last_update_timer; + clock = S390_lowcore.last_update_clock; + asm volatile (" STPT %0\n" /* Store current cpu timer value */ + " STCK %1" /* Store current tod clock value */ + : "=m" (S390_lowcore.last_update_timer), + "=m" (S390_lowcore.last_update_clock) ); + S390_lowcore.system_timer += timer - S390_lowcore.last_update_timer; + S390_lowcore.steal_timer += S390_lowcore.last_update_clock - clock; + + user = S390_lowcore.user_timer - ti->user_timer; + S390_lowcore.steal_timer -= user; + ti->user_timer = S390_lowcore.user_timer; + account_user_time(tsk, user, user); + + system = S390_lowcore.system_timer - ti->system_timer; + S390_lowcore.steal_timer -= system; + ti->system_timer = S390_lowcore.system_timer; + account_system_time(tsk, hardirq_offset, system, system); + + steal = S390_lowcore.steal_timer; + if ((s64) steal > 0) { + S390_lowcore.steal_timer = 0; + account_steal_time(steal); + } +} + +void account_vtime(struct task_struct *prev, struct task_struct *next) +{ + struct thread_info *ti; + + do_account_vtime(prev, 0); + ti = task_thread_info(prev); + ti->user_timer = S390_lowcore.user_timer; + ti->system_timer = S390_lowcore.system_timer; + ti = task_thread_info(next); + S390_lowcore.user_timer = ti->user_timer; + S390_lowcore.system_timer = ti->system_timer; +} + +void account_process_tick(struct task_struct *tsk, int user_tick) +{ + do_account_vtime(tsk, HARDIRQ_OFFSET); +} + +/* + * Update process times based on virtual cpu times stored by entry.S + * to the lowcore fields user_timer, system_timer & steal_clock. + */ +void account_system_vtime(struct task_struct *tsk) +{ + struct thread_info *ti = task_thread_info(tsk); + __u64 timer, system; + + timer = S390_lowcore.last_update_timer; + S390_lowcore.last_update_timer = get_vtimer(); + S390_lowcore.system_timer += timer - S390_lowcore.last_update_timer; + + system = S390_lowcore.system_timer - ti->system_timer; + S390_lowcore.steal_timer -= system; + ti->system_timer = S390_lowcore.system_timer; + account_system_time(tsk, 0, system, system); +} +EXPORT_SYMBOL_GPL(account_system_vtime); + +void __kprobes vtime_stop_cpu(void) +{ + struct s390_idle_data *idle = &__get_cpu_var(s390_idle); + struct vtimer_queue *vq = &__get_cpu_var(virt_cpu_timer); + unsigned long long idle_time; + unsigned long psw_mask; + + trace_hardirqs_on(); + /* Don't trace preempt off for idle. */ + stop_critical_timings(); + + /* Wait for external, I/O or machine check interrupt. */ + psw_mask = psw_kernel_bits | PSW_MASK_WAIT | PSW_MASK_DAT | + PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK; + idle->nohz_delay = 0; + + /* Call the assembler magic in entry.S */ + psw_idle(idle, vq, psw_mask, !list_empty(&vq->list)); + + /* Reenable preemption tracer. */ + start_critical_timings(); + + /* Account time spent with enabled wait psw loaded as idle time. */ + idle->sequence++; + smp_wmb(); + idle_time = idle->idle_exit - idle->idle_enter; + idle->idle_time += idle_time; + idle->idle_enter = idle->idle_exit = 0ULL; + idle->idle_count++; + account_idle_time(idle_time); + smp_wmb(); + idle->sequence++; +} + +cputime64_t s390_get_idle_time(int cpu) +{ + struct s390_idle_data *idle = &per_cpu(s390_idle, cpu); + unsigned long long now, idle_enter, idle_exit; + unsigned int sequence; + + do { + now = get_clock(); + sequence = ACCESS_ONCE(idle->sequence); + idle_enter = ACCESS_ONCE(idle->idle_enter); + idle_exit = ACCESS_ONCE(idle->idle_exit); + } while ((sequence & 1) || (idle->sequence != sequence)); + return idle_enter ? ((idle_exit ? : now) - idle_enter) : 0; +} + +/* + * Sorted add to a list. List is linear searched until first bigger + * element is found. + */ +static void list_add_sorted(struct vtimer_list *timer, struct list_head *head) +{ + struct vtimer_list *event; + + list_for_each_entry(event, head, entry) { + if (event->expires > timer->expires) { + list_add_tail(&timer->entry, &event->entry); + return; + } + } + list_add_tail(&timer->entry, head); +} + +/* + * Do the callback functions of expired vtimer events. + * Called from within the interrupt handler. + */ +static void do_callbacks(struct list_head *cb_list) +{ + struct vtimer_queue *vq; + struct vtimer_list *event, *tmp; + + if (list_empty(cb_list)) + return; + + vq = &__get_cpu_var(virt_cpu_timer); + + list_for_each_entry_safe(event, tmp, cb_list, entry) { + list_del_init(&event->entry); + (event->function)(event->data); + if (event->interval) { + /* Recharge interval timer */ + event->expires = event->interval + vq->elapsed; + spin_lock(&vq->lock); + list_add_sorted(event, &vq->list); + spin_unlock(&vq->lock); + } + } +} + +/* + * Handler for the virtual CPU timer. + */ +static void do_cpu_timer_interrupt(struct ext_code ext_code, + unsigned int param32, unsigned long param64) +{ + struct vtimer_queue *vq; + struct vtimer_list *event, *tmp; + struct list_head cb_list; /* the callback queue */ + __u64 elapsed, next; + + kstat_cpu(smp_processor_id()).irqs[EXTINT_TMR]++; + INIT_LIST_HEAD(&cb_list); + vq = &__get_cpu_var(virt_cpu_timer); + + /* walk timer list, fire all expired events */ + spin_lock(&vq->lock); + + elapsed = vq->elapsed + (vq->timer - S390_lowcore.async_enter_timer); + BUG_ON((s64) elapsed < 0); + vq->elapsed = 0; + list_for_each_entry_safe(event, tmp, &vq->list, entry) { + if (event->expires < elapsed) + /* move expired timer to the callback queue */ + list_move_tail(&event->entry, &cb_list); + else + event->expires -= elapsed; + } + spin_unlock(&vq->lock); + + do_callbacks(&cb_list); + + /* next event is first in list */ + next = VTIMER_MAX_SLICE; + spin_lock(&vq->lock); + if (!list_empty(&vq->list)) { + event = list_first_entry(&vq->list, struct vtimer_list, entry); + next = event->expires; + } + spin_unlock(&vq->lock); + /* + * To improve precision add the time spent by the + * interrupt handler to the elapsed time. + * Note: CPU timer counts down and we got an interrupt, + * the current content is negative + */ + elapsed = S390_lowcore.async_enter_timer - get_vtimer(); + set_vtimer(next - elapsed); + vq->timer = next - elapsed; + vq->elapsed = elapsed; +} + +void init_virt_timer(struct vtimer_list *timer) +{ + timer->function = NULL; + INIT_LIST_HEAD(&timer->entry); +} +EXPORT_SYMBOL(init_virt_timer); + +static inline int vtimer_pending(struct vtimer_list *timer) +{ + return (!list_empty(&timer->entry)); +} + +/* + * this function should only run on the specified CPU + */ +static void internal_add_vtimer(struct vtimer_list *timer) +{ + struct vtimer_queue *vq; + unsigned long flags; + __u64 left, expires; + + vq = &per_cpu(virt_cpu_timer, timer->cpu); + spin_lock_irqsave(&vq->lock, flags); + + BUG_ON(timer->cpu != smp_processor_id()); + + if (list_empty(&vq->list)) { + /* First timer on this cpu, just program it. */ + list_add(&timer->entry, &vq->list); + set_vtimer(timer->expires); + vq->timer = timer->expires; + vq->elapsed = 0; + } else { + /* Check progress of old timers. */ + expires = timer->expires; + left = get_vtimer(); + if (likely((s64) expires < (s64) left)) { + /* The new timer expires before the current timer. */ + set_vtimer(expires); + vq->elapsed += vq->timer - left; + vq->timer = expires; + } else { + vq->elapsed += vq->timer - left; + vq->timer = left; + } + /* Insert new timer into per cpu list. */ + timer->expires += vq->elapsed; + list_add_sorted(timer, &vq->list); + } + + spin_unlock_irqrestore(&vq->lock, flags); + /* release CPU acquired in prepare_vtimer or mod_virt_timer() */ + put_cpu(); +} + +static inline void prepare_vtimer(struct vtimer_list *timer) +{ + BUG_ON(!timer->function); + BUG_ON(!timer->expires || timer->expires > VTIMER_MAX_SLICE); + BUG_ON(vtimer_pending(timer)); + timer->cpu = get_cpu(); +} + +/* + * add_virt_timer - add an oneshot virtual CPU timer + */ +void add_virt_timer(void *new) +{ + struct vtimer_list *timer; + + timer = (struct vtimer_list *)new; + prepare_vtimer(timer); + timer->interval = 0; + internal_add_vtimer(timer); +} +EXPORT_SYMBOL(add_virt_timer); + +/* + * add_virt_timer_int - add an interval virtual CPU timer + */ +void add_virt_timer_periodic(void *new) +{ + struct vtimer_list *timer; + + timer = (struct vtimer_list *)new; + prepare_vtimer(timer); + timer->interval = timer->expires; + internal_add_vtimer(timer); +} +EXPORT_SYMBOL(add_virt_timer_periodic); + +static int __mod_vtimer(struct vtimer_list *timer, __u64 expires, int periodic) +{ + struct vtimer_queue *vq; + unsigned long flags; + int cpu; + + BUG_ON(!timer->function); + BUG_ON(!expires || expires > VTIMER_MAX_SLICE); + + if (timer->expires == expires && vtimer_pending(timer)) + return 1; + + cpu = get_cpu(); + vq = &per_cpu(virt_cpu_timer, cpu); + + /* disable interrupts before test if timer is pending */ + spin_lock_irqsave(&vq->lock, flags); + + /* if timer isn't pending add it on the current CPU */ + if (!vtimer_pending(timer)) { + spin_unlock_irqrestore(&vq->lock, flags); + + if (periodic) + timer->interval = expires; + else + timer->interval = 0; + timer->expires = expires; + timer->cpu = cpu; + internal_add_vtimer(timer); + return 0; + } + + /* check if we run on the right CPU */ + BUG_ON(timer->cpu != cpu); + + list_del_init(&timer->entry); + timer->expires = expires; + if (periodic) + timer->interval = expires; + + /* the timer can't expire anymore so we can release the lock */ + spin_unlock_irqrestore(&vq->lock, flags); + internal_add_vtimer(timer); + return 1; +} + +/* + * If we change a pending timer the function must be called on the CPU + * where the timer is running on. + * + * returns whether it has modified a pending timer (1) or not (0) + */ +int mod_virt_timer(struct vtimer_list *timer, __u64 expires) +{ + return __mod_vtimer(timer, expires, 0); +} +EXPORT_SYMBOL(mod_virt_timer); + +/* + * If we change a pending timer the function must be called on the CPU + * where the timer is running on. + * + * returns whether it has modified a pending timer (1) or not (0) + */ +int mod_virt_timer_periodic(struct vtimer_list *timer, __u64 expires) +{ + return __mod_vtimer(timer, expires, 1); +} +EXPORT_SYMBOL(mod_virt_timer_periodic); + +/* + * delete a virtual timer + * + * returns whether the deleted timer was pending (1) or not (0) + */ +int del_virt_timer(struct vtimer_list *timer) +{ + unsigned long flags; + struct vtimer_queue *vq; + + /* check if timer is pending */ + if (!vtimer_pending(timer)) + return 0; + + vq = &per_cpu(virt_cpu_timer, timer->cpu); + spin_lock_irqsave(&vq->lock, flags); + + /* we don't interrupt a running timer, just let it expire! */ + list_del_init(&timer->entry); + + spin_unlock_irqrestore(&vq->lock, flags); + return 1; +} +EXPORT_SYMBOL(del_virt_timer); + +/* + * Start the virtual CPU timer on the current CPU. + */ +void init_cpu_vtimer(void) +{ + struct vtimer_queue *vq; + + /* initialize per cpu vtimer structure */ + vq = &__get_cpu_var(virt_cpu_timer); + INIT_LIST_HEAD(&vq->list); + spin_lock_init(&vq->lock); + + /* enable cpu timer interrupts */ + __ctl_set_bit(0,10); + + /* set initial cpu timer */ + set_vtimer(0x7fffffffffffffffULL); +} + +static int __cpuinit s390_nohz_notify(struct notifier_block *self, + unsigned long action, void *hcpu) +{ + struct s390_idle_data *idle; + long cpu = (long) hcpu; + + idle = &per_cpu(s390_idle, cpu); + switch (action) { + case CPU_DYING: + case CPU_DYING_FROZEN: + idle->nohz_delay = 0; + default: + break; + } + return NOTIFY_OK; +} + +void __init vtime_init(void) +{ + /* request the cpu timer external interrupt */ + if (register_external_interrupt(0x1005, do_cpu_timer_interrupt)) + panic("Couldn't request external interrupt 0x1005"); + + /* Enable cpu timer interrupts on the boot cpu. */ + init_cpu_vtimer(); + cpu_notifier(s390_nohz_notify, 0); +} + diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig new file mode 100644 index 00000000..78eb9847 --- /dev/null +++ b/arch/s390/kvm/Kconfig @@ -0,0 +1,50 @@ +# +# KVM configuration +# +source "virt/kvm/Kconfig" + +menuconfig VIRTUALIZATION + def_bool y + prompt "Virtualization" + ---help--- + Say Y here to get to see options for using your Linux host to run other + operating systems inside virtual machines (guests). + This option alone does not add any kernel code. + + If you say N, all options in this submenu will be skipped and disabled. + +if VIRTUALIZATION + +config KVM + def_tristate y + prompt "Kernel-based Virtual Machine (KVM) support" + depends on HAVE_KVM && EXPERIMENTAL + select PREEMPT_NOTIFIERS + select ANON_INODES + ---help--- + Support hosting paravirtualized guest machines using the SIE + virtualization capability on the mainframe. This should work + on any 64bit machine. + + This module provides access to the hardware capabilities through + a character device node named /dev/kvm. + + To compile this as a module, choose M here: the module + will be called kvm. + + If unsure, say N. + +config KVM_S390_UCONTROL + bool "Userspace controlled virtual machines" + depends on KVM + ---help--- + Allow CAP_SYS_ADMIN users to create KVM virtual machines that are + controlled by userspace. + + If unsure, say N. + +# OK, it's a little counter-intuitive to do this, but it puts it neatly under +# the virtualization menu. +source drivers/vhost/Kconfig + +endif # VIRTUALIZATION diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile new file mode 100644 index 00000000..3975722b --- /dev/null +++ b/arch/s390/kvm/Makefile @@ -0,0 +1,14 @@ +# Makefile for kernel virtual machines on s390 +# +# Copyright IBM Corp. 2008 +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License (version 2 only) +# as published by the Free Software Foundation. + +common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o) + +ccflags-y := -Ivirt/kvm -Iarch/s390/kvm + +kvm-objs := $(common-objs) kvm-s390.o intercept.o interrupt.o priv.o sigp.o diag.o +obj-$(CONFIG_KVM) += kvm.o diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c new file mode 100644 index 00000000..a353f0ea --- /dev/null +++ b/arch/s390/kvm/diag.c @@ -0,0 +1,97 @@ +/* + * diag.c - handling diagnose instructions + * + * Copyright IBM Corp. 2008,2011 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * Author(s): Carsten Otte <cotte@de.ibm.com> + * Christian Borntraeger <borntraeger@de.ibm.com> + */ + +#include <linux/kvm.h> +#include <linux/kvm_host.h> +#include "kvm-s390.h" + +static int diag_release_pages(struct kvm_vcpu *vcpu) +{ + unsigned long start, end; + unsigned long prefix = vcpu->arch.sie_block->prefix; + + start = vcpu->run->s.regs.gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4]; + end = vcpu->run->s.regs.gprs[vcpu->arch.sie_block->ipa & 0xf] + 4096; + + if (start & ~PAGE_MASK || end & ~PAGE_MASK || start > end + || start < 2 * PAGE_SIZE) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + + VCPU_EVENT(vcpu, 5, "diag release pages %lX %lX", start, end); + vcpu->stat.diagnose_10++; + + /* we checked for start > end above */ + if (end < prefix || start >= prefix + 2 * PAGE_SIZE) { + gmap_discard(start, end, vcpu->arch.gmap); + } else { + if (start < prefix) + gmap_discard(start, prefix, vcpu->arch.gmap); + if (end >= prefix) + gmap_discard(prefix + 2 * PAGE_SIZE, + end, vcpu->arch.gmap); + } + return 0; +} + +static int __diag_time_slice_end(struct kvm_vcpu *vcpu) +{ + VCPU_EVENT(vcpu, 5, "%s", "diag time slice end"); + vcpu->stat.diagnose_44++; + vcpu_put(vcpu); + yield(); + vcpu_load(vcpu); + return 0; +} + +static int __diag_ipl_functions(struct kvm_vcpu *vcpu) +{ + unsigned int reg = vcpu->arch.sie_block->ipa & 0xf; + unsigned long subcode = vcpu->run->s.regs.gprs[reg] & 0xffff; + + VCPU_EVENT(vcpu, 5, "diag ipl functions, subcode %lx", subcode); + switch (subcode) { + case 3: + vcpu->run->s390_reset_flags = KVM_S390_RESET_CLEAR; + break; + case 4: + vcpu->run->s390_reset_flags = 0; + break; + default: + return -EOPNOTSUPP; + } + + atomic_set_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags); + vcpu->run->s390_reset_flags |= KVM_S390_RESET_SUBSYSTEM; + vcpu->run->s390_reset_flags |= KVM_S390_RESET_IPL; + vcpu->run->s390_reset_flags |= KVM_S390_RESET_CPU_INIT; + vcpu->run->exit_reason = KVM_EXIT_S390_RESET; + VCPU_EVENT(vcpu, 3, "requesting userspace resets %llx", + vcpu->run->s390_reset_flags); + return -EREMOTE; +} + +int kvm_s390_handle_diag(struct kvm_vcpu *vcpu) +{ + int code = (vcpu->arch.sie_block->ipb & 0xfff0000) >> 16; + + switch (code) { + case 0x10: + return diag_release_pages(vcpu); + case 0x44: + return __diag_time_slice_end(vcpu); + case 0x308: + return __diag_ipl_functions(vcpu); + default: + return -EOPNOTSUPP; + } +} diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h new file mode 100644 index 00000000..c86f6ae4 --- /dev/null +++ b/arch/s390/kvm/gaccess.h @@ -0,0 +1,386 @@ +/* + * access.h - access guest memory + * + * Copyright IBM Corp. 2008,2009 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * Author(s): Carsten Otte <cotte@de.ibm.com> + */ + +#ifndef __KVM_S390_GACCESS_H +#define __KVM_S390_GACCESS_H + +#include <linux/compiler.h> +#include <linux/kvm_host.h> +#include <asm/uaccess.h> +#include "kvm-s390.h" + +static inline void __user *__guestaddr_to_user(struct kvm_vcpu *vcpu, + unsigned long guestaddr) +{ + unsigned long prefix = vcpu->arch.sie_block->prefix; + + if (guestaddr < 2 * PAGE_SIZE) + guestaddr += prefix; + else if ((guestaddr >= prefix) && (guestaddr < prefix + 2 * PAGE_SIZE)) + guestaddr -= prefix; + + return (void __user *) gmap_fault(guestaddr, vcpu->arch.gmap); +} + +static inline int get_guest_u64(struct kvm_vcpu *vcpu, unsigned long guestaddr, + u64 *result) +{ + void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); + + BUG_ON(guestaddr & 7); + + if (IS_ERR((void __force *) uptr)) + return PTR_ERR((void __force *) uptr); + + return get_user(*result, (unsigned long __user *) uptr); +} + +static inline int get_guest_u32(struct kvm_vcpu *vcpu, unsigned long guestaddr, + u32 *result) +{ + void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); + + BUG_ON(guestaddr & 3); + + if (IS_ERR((void __force *) uptr)) + return PTR_ERR((void __force *) uptr); + + return get_user(*result, (u32 __user *) uptr); +} + +static inline int get_guest_u16(struct kvm_vcpu *vcpu, unsigned long guestaddr, + u16 *result) +{ + void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); + + BUG_ON(guestaddr & 1); + + if (IS_ERR(uptr)) + return PTR_ERR(uptr); + + return get_user(*result, (u16 __user *) uptr); +} + +static inline int get_guest_u8(struct kvm_vcpu *vcpu, unsigned long guestaddr, + u8 *result) +{ + void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); + + if (IS_ERR((void __force *) uptr)) + return PTR_ERR((void __force *) uptr); + + return get_user(*result, (u8 __user *) uptr); +} + +static inline int put_guest_u64(struct kvm_vcpu *vcpu, unsigned long guestaddr, + u64 value) +{ + void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); + + BUG_ON(guestaddr & 7); + + if (IS_ERR((void __force *) uptr)) + return PTR_ERR((void __force *) uptr); + + return put_user(value, (u64 __user *) uptr); +} + +static inline int put_guest_u32(struct kvm_vcpu *vcpu, unsigned long guestaddr, + u32 value) +{ + void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); + + BUG_ON(guestaddr & 3); + + if (IS_ERR((void __force *) uptr)) + return PTR_ERR((void __force *) uptr); + + return put_user(value, (u32 __user *) uptr); +} + +static inline int put_guest_u16(struct kvm_vcpu *vcpu, unsigned long guestaddr, + u16 value) +{ + void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); + + BUG_ON(guestaddr & 1); + + if (IS_ERR((void __force *) uptr)) + return PTR_ERR((void __force *) uptr); + + return put_user(value, (u16 __user *) uptr); +} + +static inline int put_guest_u8(struct kvm_vcpu *vcpu, unsigned long guestaddr, + u8 value) +{ + void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); + + if (IS_ERR((void __force *) uptr)) + return PTR_ERR((void __force *) uptr); + + return put_user(value, (u8 __user *) uptr); +} + + +static inline int __copy_to_guest_slow(struct kvm_vcpu *vcpu, + unsigned long guestdest, + void *from, unsigned long n) +{ + int rc; + unsigned long i; + u8 *data = from; + + for (i = 0; i < n; i++) { + rc = put_guest_u8(vcpu, guestdest++, *(data++)); + if (rc < 0) + return rc; + } + return 0; +} + +static inline int __copy_to_guest_fast(struct kvm_vcpu *vcpu, + unsigned long guestdest, + void *from, unsigned long n) +{ + int r; + void __user *uptr; + unsigned long size; + + if (guestdest + n < guestdest) + return -EFAULT; + + /* simple case: all within one segment table entry? */ + if ((guestdest & PMD_MASK) == ((guestdest+n) & PMD_MASK)) { + uptr = (void __user *) gmap_fault(guestdest, vcpu->arch.gmap); + + if (IS_ERR((void __force *) uptr)) + return PTR_ERR((void __force *) uptr); + + r = copy_to_user(uptr, from, n); + + if (r) + r = -EFAULT; + + goto out; + } + + /* copy first segment */ + uptr = (void __user *)gmap_fault(guestdest, vcpu->arch.gmap); + + if (IS_ERR((void __force *) uptr)) + return PTR_ERR((void __force *) uptr); + + size = PMD_SIZE - (guestdest & ~PMD_MASK); + + r = copy_to_user(uptr, from, size); + + if (r) { + r = -EFAULT; + goto out; + } + from += size; + n -= size; + guestdest += size; + + /* copy full segments */ + while (n >= PMD_SIZE) { + uptr = (void __user *)gmap_fault(guestdest, vcpu->arch.gmap); + + if (IS_ERR((void __force *) uptr)) + return PTR_ERR((void __force *) uptr); + + r = copy_to_user(uptr, from, PMD_SIZE); + + if (r) { + r = -EFAULT; + goto out; + } + from += PMD_SIZE; + n -= PMD_SIZE; + guestdest += PMD_SIZE; + } + + /* copy the tail segment */ + if (n) { + uptr = (void __user *)gmap_fault(guestdest, vcpu->arch.gmap); + + if (IS_ERR((void __force *) uptr)) + return PTR_ERR((void __force *) uptr); + + r = copy_to_user(uptr, from, n); + + if (r) + r = -EFAULT; + } +out: + return r; +} + +static inline int copy_to_guest_absolute(struct kvm_vcpu *vcpu, + unsigned long guestdest, + void *from, unsigned long n) +{ + return __copy_to_guest_fast(vcpu, guestdest, from, n); +} + +static inline int copy_to_guest(struct kvm_vcpu *vcpu, unsigned long guestdest, + void *from, unsigned long n) +{ + unsigned long prefix = vcpu->arch.sie_block->prefix; + + if ((guestdest < 2 * PAGE_SIZE) && (guestdest + n > 2 * PAGE_SIZE)) + goto slowpath; + + if ((guestdest < prefix) && (guestdest + n > prefix)) + goto slowpath; + + if ((guestdest < prefix + 2 * PAGE_SIZE) + && (guestdest + n > prefix + 2 * PAGE_SIZE)) + goto slowpath; + + if (guestdest < 2 * PAGE_SIZE) + guestdest += prefix; + else if ((guestdest >= prefix) && (guestdest < prefix + 2 * PAGE_SIZE)) + guestdest -= prefix; + + return __copy_to_guest_fast(vcpu, guestdest, from, n); +slowpath: + return __copy_to_guest_slow(vcpu, guestdest, from, n); +} + +static inline int __copy_from_guest_slow(struct kvm_vcpu *vcpu, void *to, + unsigned long guestsrc, + unsigned long n) +{ + int rc; + unsigned long i; + u8 *data = to; + + for (i = 0; i < n; i++) { + rc = get_guest_u8(vcpu, guestsrc++, data++); + if (rc < 0) + return rc; + } + return 0; +} + +static inline int __copy_from_guest_fast(struct kvm_vcpu *vcpu, void *to, + unsigned long guestsrc, + unsigned long n) +{ + int r; + void __user *uptr; + unsigned long size; + + if (guestsrc + n < guestsrc) + return -EFAULT; + + /* simple case: all within one segment table entry? */ + if ((guestsrc & PMD_MASK) == ((guestsrc+n) & PMD_MASK)) { + uptr = (void __user *) gmap_fault(guestsrc, vcpu->arch.gmap); + + if (IS_ERR((void __force *) uptr)) + return PTR_ERR((void __force *) uptr); + + r = copy_from_user(to, uptr, n); + + if (r) + r = -EFAULT; + + goto out; + } + + /* copy first segment */ + uptr = (void __user *)gmap_fault(guestsrc, vcpu->arch.gmap); + + if (IS_ERR((void __force *) uptr)) + return PTR_ERR((void __force *) uptr); + + size = PMD_SIZE - (guestsrc & ~PMD_MASK); + + r = copy_from_user(to, uptr, size); + + if (r) { + r = -EFAULT; + goto out; + } + to += size; + n -= size; + guestsrc += size; + + /* copy full segments */ + while (n >= PMD_SIZE) { + uptr = (void __user *)gmap_fault(guestsrc, vcpu->arch.gmap); + + if (IS_ERR((void __force *) uptr)) + return PTR_ERR((void __force *) uptr); + + r = copy_from_user(to, uptr, PMD_SIZE); + + if (r) { + r = -EFAULT; + goto out; + } + to += PMD_SIZE; + n -= PMD_SIZE; + guestsrc += PMD_SIZE; + } + + /* copy the tail segment */ + if (n) { + uptr = (void __user *)gmap_fault(guestsrc, vcpu->arch.gmap); + + if (IS_ERR((void __force *) uptr)) + return PTR_ERR((void __force *) uptr); + + r = copy_from_user(to, uptr, n); + + if (r) + r = -EFAULT; + } +out: + return r; +} + +static inline int copy_from_guest_absolute(struct kvm_vcpu *vcpu, void *to, + unsigned long guestsrc, + unsigned long n) +{ + return __copy_from_guest_fast(vcpu, to, guestsrc, n); +} + +static inline int copy_from_guest(struct kvm_vcpu *vcpu, void *to, + unsigned long guestsrc, unsigned long n) +{ + unsigned long prefix = vcpu->arch.sie_block->prefix; + + if ((guestsrc < 2 * PAGE_SIZE) && (guestsrc + n > 2 * PAGE_SIZE)) + goto slowpath; + + if ((guestsrc < prefix) && (guestsrc + n > prefix)) + goto slowpath; + + if ((guestsrc < prefix + 2 * PAGE_SIZE) + && (guestsrc + n > prefix + 2 * PAGE_SIZE)) + goto slowpath; + + if (guestsrc < 2 * PAGE_SIZE) + guestsrc += prefix; + else if ((guestsrc >= prefix) && (guestsrc < prefix + 2 * PAGE_SIZE)) + guestsrc -= prefix; + + return __copy_from_guest_fast(vcpu, to, guestsrc, n); +slowpath: + return __copy_from_guest_slow(vcpu, to, guestsrc, n); +} +#endif diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c new file mode 100644 index 00000000..36145657 --- /dev/null +++ b/arch/s390/kvm/intercept.c @@ -0,0 +1,265 @@ +/* + * intercept.c - in-kernel handling for sie intercepts + * + * Copyright IBM Corp. 2008,2009 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * Author(s): Carsten Otte <cotte@de.ibm.com> + * Christian Borntraeger <borntraeger@de.ibm.com> + */ + +#include <linux/kvm_host.h> +#include <linux/errno.h> +#include <linux/pagemap.h> + +#include <asm/kvm_host.h> + +#include "kvm-s390.h" +#include "gaccess.h" + +static int handle_lctlg(struct kvm_vcpu *vcpu) +{ + int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4; + int reg3 = vcpu->arch.sie_block->ipa & 0x000f; + int base2 = vcpu->arch.sie_block->ipb >> 28; + int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16) + + ((vcpu->arch.sie_block->ipb & 0xff00) << 4); + u64 useraddr; + int reg, rc; + + vcpu->stat.instruction_lctlg++; + if ((vcpu->arch.sie_block->ipb & 0xff) != 0x2f) + return -EOPNOTSUPP; + + useraddr = disp2; + if (base2) + useraddr += vcpu->run->s.regs.gprs[base2]; + + if (useraddr & 7) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + + reg = reg1; + + VCPU_EVENT(vcpu, 5, "lctlg r1:%x, r3:%x,b2:%x,d2:%x", reg1, reg3, base2, + disp2); + + do { + rc = get_guest_u64(vcpu, useraddr, + &vcpu->arch.sie_block->gcr[reg]); + if (rc == -EFAULT) { + kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + break; + } + useraddr += 8; + if (reg == reg3) + break; + reg = (reg + 1) % 16; + } while (1); + return 0; +} + +static int handle_lctl(struct kvm_vcpu *vcpu) +{ + int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4; + int reg3 = vcpu->arch.sie_block->ipa & 0x000f; + int base2 = vcpu->arch.sie_block->ipb >> 28; + int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16); + u64 useraddr; + u32 val = 0; + int reg, rc; + + vcpu->stat.instruction_lctl++; + + useraddr = disp2; + if (base2) + useraddr += vcpu->run->s.regs.gprs[base2]; + + if (useraddr & 3) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + + VCPU_EVENT(vcpu, 5, "lctl r1:%x, r3:%x,b2:%x,d2:%x", reg1, reg3, base2, + disp2); + + reg = reg1; + do { + rc = get_guest_u32(vcpu, useraddr, &val); + if (rc == -EFAULT) { + kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + break; + } + vcpu->arch.sie_block->gcr[reg] &= 0xffffffff00000000ul; + vcpu->arch.sie_block->gcr[reg] |= val; + useraddr += 4; + if (reg == reg3) + break; + reg = (reg + 1) % 16; + } while (1); + return 0; +} + +static intercept_handler_t instruction_handlers[256] = { + [0x83] = kvm_s390_handle_diag, + [0xae] = kvm_s390_handle_sigp, + [0xb2] = kvm_s390_handle_b2, + [0xb7] = handle_lctl, + [0xe5] = kvm_s390_handle_e5, + [0xeb] = handle_lctlg, +}; + +static int handle_noop(struct kvm_vcpu *vcpu) +{ + switch (vcpu->arch.sie_block->icptcode) { + case 0x0: + vcpu->stat.exit_null++; + break; + case 0x10: + vcpu->stat.exit_external_request++; + break; + case 0x14: + vcpu->stat.exit_external_interrupt++; + break; + default: + break; /* nothing */ + } + return 0; +} + +static int handle_stop(struct kvm_vcpu *vcpu) +{ + int rc = 0; + + vcpu->stat.exit_stop_request++; + spin_lock_bh(&vcpu->arch.local_int.lock); + + if (vcpu->arch.local_int.action_bits & ACTION_RELOADVCPU_ON_STOP) { + vcpu->arch.local_int.action_bits &= ~ACTION_RELOADVCPU_ON_STOP; + rc = SIE_INTERCEPT_RERUNVCPU; + vcpu->run->exit_reason = KVM_EXIT_INTR; + } + + if (vcpu->arch.local_int.action_bits & ACTION_STOP_ON_STOP) { + atomic_set_mask(CPUSTAT_STOPPED, + &vcpu->arch.sie_block->cpuflags); + vcpu->arch.local_int.action_bits &= ~ACTION_STOP_ON_STOP; + VCPU_EVENT(vcpu, 3, "%s", "cpu stopped"); + rc = -EOPNOTSUPP; + } + + if (vcpu->arch.local_int.action_bits & ACTION_STORE_ON_STOP) { + vcpu->arch.local_int.action_bits &= ~ACTION_STORE_ON_STOP; + /* store status must be called unlocked. Since local_int.lock + * only protects local_int.* and not guest memory we can give + * up the lock here */ + spin_unlock_bh(&vcpu->arch.local_int.lock); + rc = kvm_s390_vcpu_store_status(vcpu, + KVM_S390_STORE_STATUS_NOADDR); + if (rc >= 0) + rc = -EOPNOTSUPP; + } else + spin_unlock_bh(&vcpu->arch.local_int.lock); + return rc; +} + +static int handle_validity(struct kvm_vcpu *vcpu) +{ + unsigned long vmaddr; + int viwhy = vcpu->arch.sie_block->ipb >> 16; + int rc; + + vcpu->stat.exit_validity++; + if (viwhy == 0x37) { + vmaddr = gmap_fault(vcpu->arch.sie_block->prefix, + vcpu->arch.gmap); + if (IS_ERR_VALUE(vmaddr)) { + rc = -EOPNOTSUPP; + goto out; + } + rc = fault_in_pages_writeable((char __user *) vmaddr, + PAGE_SIZE); + if (rc) { + /* user will receive sigsegv, exit to user */ + rc = -EOPNOTSUPP; + goto out; + } + vmaddr = gmap_fault(vcpu->arch.sie_block->prefix + PAGE_SIZE, + vcpu->arch.gmap); + if (IS_ERR_VALUE(vmaddr)) { + rc = -EOPNOTSUPP; + goto out; + } + rc = fault_in_pages_writeable((char __user *) vmaddr, + PAGE_SIZE); + if (rc) { + /* user will receive sigsegv, exit to user */ + rc = -EOPNOTSUPP; + goto out; + } + } else + rc = -EOPNOTSUPP; + +out: + if (rc) + VCPU_EVENT(vcpu, 2, "unhandled validity intercept code %d", + viwhy); + return rc; +} + +static int handle_instruction(struct kvm_vcpu *vcpu) +{ + intercept_handler_t handler; + + vcpu->stat.exit_instruction++; + handler = instruction_handlers[vcpu->arch.sie_block->ipa >> 8]; + if (handler) + return handler(vcpu); + return -EOPNOTSUPP; +} + +static int handle_prog(struct kvm_vcpu *vcpu) +{ + vcpu->stat.exit_program_interruption++; + return kvm_s390_inject_program_int(vcpu, vcpu->arch.sie_block->iprcc); +} + +static int handle_instruction_and_prog(struct kvm_vcpu *vcpu) +{ + int rc, rc2; + + vcpu->stat.exit_instr_and_program++; + rc = handle_instruction(vcpu); + rc2 = handle_prog(vcpu); + + if (rc == -EOPNOTSUPP) + vcpu->arch.sie_block->icptcode = 0x04; + if (rc) + return rc; + return rc2; +} + +static const intercept_handler_t intercept_funcs[] = { + [0x00 >> 2] = handle_noop, + [0x04 >> 2] = handle_instruction, + [0x08 >> 2] = handle_prog, + [0x0C >> 2] = handle_instruction_and_prog, + [0x10 >> 2] = handle_noop, + [0x14 >> 2] = handle_noop, + [0x1C >> 2] = kvm_s390_handle_wait, + [0x20 >> 2] = handle_validity, + [0x28 >> 2] = handle_stop, +}; + +int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu) +{ + intercept_handler_t func; + u8 code = vcpu->arch.sie_block->icptcode; + + if (code & 3 || (code >> 2) >= ARRAY_SIZE(intercept_funcs)) + return -EOPNOTSUPP; + func = intercept_funcs[code >> 2]; + if (func) + return func(vcpu); + return -EOPNOTSUPP; +} diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c new file mode 100644 index 00000000..2d9f9a72 --- /dev/null +++ b/arch/s390/kvm/interrupt.c @@ -0,0 +1,641 @@ +/* + * interrupt.c - handling kvm guest interrupts + * + * Copyright IBM Corp. 2008 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * Author(s): Carsten Otte <cotte@de.ibm.com> + */ + +#include <linux/interrupt.h> +#include <linux/kvm_host.h> +#include <linux/hrtimer.h> +#include <linux/signal.h> +#include <linux/slab.h> +#include <asm/asm-offsets.h> +#include <asm/uaccess.h> +#include "kvm-s390.h" +#include "gaccess.h" + +static int psw_extint_disabled(struct kvm_vcpu *vcpu) +{ + return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_EXT); +} + +static int psw_interrupts_disabled(struct kvm_vcpu *vcpu) +{ + if ((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PER) || + (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_IO) || + (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_EXT)) + return 0; + return 1; +} + +static int __interrupt_is_deliverable(struct kvm_vcpu *vcpu, + struct kvm_s390_interrupt_info *inti) +{ + switch (inti->type) { + case KVM_S390_INT_EXTERNAL_CALL: + if (psw_extint_disabled(vcpu)) + return 0; + if (vcpu->arch.sie_block->gcr[0] & 0x2000ul) + return 1; + case KVM_S390_INT_EMERGENCY: + if (psw_extint_disabled(vcpu)) + return 0; + if (vcpu->arch.sie_block->gcr[0] & 0x4000ul) + return 1; + return 0; + case KVM_S390_INT_SERVICE: + if (psw_extint_disabled(vcpu)) + return 0; + if (vcpu->arch.sie_block->gcr[0] & 0x200ul) + return 1; + return 0; + case KVM_S390_INT_VIRTIO: + if (psw_extint_disabled(vcpu)) + return 0; + if (vcpu->arch.sie_block->gcr[0] & 0x200ul) + return 1; + return 0; + case KVM_S390_PROGRAM_INT: + case KVM_S390_SIGP_STOP: + case KVM_S390_SIGP_SET_PREFIX: + case KVM_S390_RESTART: + return 1; + default: + BUG(); + } + return 0; +} + +static void __set_cpu_idle(struct kvm_vcpu *vcpu) +{ + BUG_ON(vcpu->vcpu_id > KVM_MAX_VCPUS - 1); + atomic_set_mask(CPUSTAT_WAIT, &vcpu->arch.sie_block->cpuflags); + set_bit(vcpu->vcpu_id, vcpu->arch.local_int.float_int->idle_mask); +} + +static void __unset_cpu_idle(struct kvm_vcpu *vcpu) +{ + BUG_ON(vcpu->vcpu_id > KVM_MAX_VCPUS - 1); + atomic_clear_mask(CPUSTAT_WAIT, &vcpu->arch.sie_block->cpuflags); + clear_bit(vcpu->vcpu_id, vcpu->arch.local_int.float_int->idle_mask); +} + +static void __reset_intercept_indicators(struct kvm_vcpu *vcpu) +{ + atomic_clear_mask(CPUSTAT_ECALL_PEND | + CPUSTAT_IO_INT | CPUSTAT_EXT_INT | CPUSTAT_STOP_INT, + &vcpu->arch.sie_block->cpuflags); + vcpu->arch.sie_block->lctl = 0x0000; +} + +static void __set_cpuflag(struct kvm_vcpu *vcpu, u32 flag) +{ + atomic_set_mask(flag, &vcpu->arch.sie_block->cpuflags); +} + +static void __set_intercept_indicator(struct kvm_vcpu *vcpu, + struct kvm_s390_interrupt_info *inti) +{ + switch (inti->type) { + case KVM_S390_INT_EXTERNAL_CALL: + case KVM_S390_INT_EMERGENCY: + case KVM_S390_INT_SERVICE: + case KVM_S390_INT_VIRTIO: + if (psw_extint_disabled(vcpu)) + __set_cpuflag(vcpu, CPUSTAT_EXT_INT); + else + vcpu->arch.sie_block->lctl |= LCTL_CR0; + break; + case KVM_S390_SIGP_STOP: + __set_cpuflag(vcpu, CPUSTAT_STOP_INT); + break; + default: + BUG(); + } +} + +static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, + struct kvm_s390_interrupt_info *inti) +{ + const unsigned short table[] = { 2, 4, 4, 6 }; + int rc, exception = 0; + + switch (inti->type) { + case KVM_S390_INT_EMERGENCY: + VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp emerg"); + vcpu->stat.deliver_emergency_signal++; + rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x1201); + if (rc == -EFAULT) + exception = 1; + + rc = put_guest_u16(vcpu, __LC_EXT_CPU_ADDR, inti->emerg.code); + if (rc == -EFAULT) + exception = 1; + + rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + if (rc == -EFAULT) + exception = 1; + + rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, + __LC_EXT_NEW_PSW, sizeof(psw_t)); + if (rc == -EFAULT) + exception = 1; + break; + + case KVM_S390_INT_EXTERNAL_CALL: + VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp ext call"); + vcpu->stat.deliver_external_call++; + rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x1202); + if (rc == -EFAULT) + exception = 1; + + rc = put_guest_u16(vcpu, __LC_EXT_CPU_ADDR, inti->extcall.code); + if (rc == -EFAULT) + exception = 1; + + rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + if (rc == -EFAULT) + exception = 1; + + rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, + __LC_EXT_NEW_PSW, sizeof(psw_t)); + if (rc == -EFAULT) + exception = 1; + break; + + case KVM_S390_INT_SERVICE: + VCPU_EVENT(vcpu, 4, "interrupt: sclp parm:%x", + inti->ext.ext_params); + vcpu->stat.deliver_service_signal++; + rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x2401); + if (rc == -EFAULT) + exception = 1; + + rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + if (rc == -EFAULT) + exception = 1; + + rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, + __LC_EXT_NEW_PSW, sizeof(psw_t)); + if (rc == -EFAULT) + exception = 1; + + rc = put_guest_u32(vcpu, __LC_EXT_PARAMS, inti->ext.ext_params); + if (rc == -EFAULT) + exception = 1; + break; + + case KVM_S390_INT_VIRTIO: + VCPU_EVENT(vcpu, 4, "interrupt: virtio parm:%x,parm64:%llx", + inti->ext.ext_params, inti->ext.ext_params2); + vcpu->stat.deliver_virtio_interrupt++; + rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x2603); + if (rc == -EFAULT) + exception = 1; + + rc = put_guest_u16(vcpu, __LC_EXT_CPU_ADDR, 0x0d00); + if (rc == -EFAULT) + exception = 1; + + rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + if (rc == -EFAULT) + exception = 1; + + rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, + __LC_EXT_NEW_PSW, sizeof(psw_t)); + if (rc == -EFAULT) + exception = 1; + + rc = put_guest_u32(vcpu, __LC_EXT_PARAMS, inti->ext.ext_params); + if (rc == -EFAULT) + exception = 1; + + rc = put_guest_u64(vcpu, __LC_EXT_PARAMS2, + inti->ext.ext_params2); + if (rc == -EFAULT) + exception = 1; + break; + + case KVM_S390_SIGP_STOP: + VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu stop"); + vcpu->stat.deliver_stop_signal++; + __set_intercept_indicator(vcpu, inti); + break; + + case KVM_S390_SIGP_SET_PREFIX: + VCPU_EVENT(vcpu, 4, "interrupt: set prefix to %x", + inti->prefix.address); + vcpu->stat.deliver_prefix_signal++; + kvm_s390_set_prefix(vcpu, inti->prefix.address); + break; + + case KVM_S390_RESTART: + VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu restart"); + vcpu->stat.deliver_restart_signal++; + rc = copy_to_guest(vcpu, offsetof(struct _lowcore, + restart_old_psw), &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + if (rc == -EFAULT) + exception = 1; + + rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, + offsetof(struct _lowcore, restart_psw), sizeof(psw_t)); + if (rc == -EFAULT) + exception = 1; + atomic_clear_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags); + break; + + case KVM_S390_PROGRAM_INT: + VCPU_EVENT(vcpu, 4, "interrupt: pgm check code:%x, ilc:%x", + inti->pgm.code, + table[vcpu->arch.sie_block->ipa >> 14]); + vcpu->stat.deliver_program_int++; + rc = put_guest_u16(vcpu, __LC_PGM_INT_CODE, inti->pgm.code); + if (rc == -EFAULT) + exception = 1; + + rc = put_guest_u16(vcpu, __LC_PGM_ILC, + table[vcpu->arch.sie_block->ipa >> 14]); + if (rc == -EFAULT) + exception = 1; + + rc = copy_to_guest(vcpu, __LC_PGM_OLD_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + if (rc == -EFAULT) + exception = 1; + + rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, + __LC_PGM_NEW_PSW, sizeof(psw_t)); + if (rc == -EFAULT) + exception = 1; + break; + + default: + BUG(); + } + if (exception) { + printk("kvm: The guest lowcore is not mapped during interrupt " + "delivery, killing userspace\n"); + do_exit(SIGKILL); + } +} + +static int __try_deliver_ckc_interrupt(struct kvm_vcpu *vcpu) +{ + int rc, exception = 0; + + if (psw_extint_disabled(vcpu)) + return 0; + if (!(vcpu->arch.sie_block->gcr[0] & 0x800ul)) + return 0; + rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x1004); + if (rc == -EFAULT) + exception = 1; + rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + if (rc == -EFAULT) + exception = 1; + rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, + __LC_EXT_NEW_PSW, sizeof(psw_t)); + if (rc == -EFAULT) + exception = 1; + if (exception) { + printk("kvm: The guest lowcore is not mapped during interrupt " + "delivery, killing userspace\n"); + do_exit(SIGKILL); + } + return 1; +} + +static int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu) +{ + struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; + struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int; + struct kvm_s390_interrupt_info *inti; + int rc = 0; + + if (atomic_read(&li->active)) { + spin_lock_bh(&li->lock); + list_for_each_entry(inti, &li->list, list) + if (__interrupt_is_deliverable(vcpu, inti)) { + rc = 1; + break; + } + spin_unlock_bh(&li->lock); + } + + if ((!rc) && atomic_read(&fi->active)) { + spin_lock(&fi->lock); + list_for_each_entry(inti, &fi->list, list) + if (__interrupt_is_deliverable(vcpu, inti)) { + rc = 1; + break; + } + spin_unlock(&fi->lock); + } + + if ((!rc) && (vcpu->arch.sie_block->ckc < + get_clock() + vcpu->arch.sie_block->epoch)) { + if ((!psw_extint_disabled(vcpu)) && + (vcpu->arch.sie_block->gcr[0] & 0x800ul)) + rc = 1; + } + + return rc; +} + +int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) +{ + return 0; +} + +int kvm_s390_handle_wait(struct kvm_vcpu *vcpu) +{ + u64 now, sltime; + DECLARE_WAITQUEUE(wait, current); + + vcpu->stat.exit_wait_state++; + if (kvm_cpu_has_interrupt(vcpu)) + return 0; + + __set_cpu_idle(vcpu); + spin_lock_bh(&vcpu->arch.local_int.lock); + vcpu->arch.local_int.timer_due = 0; + spin_unlock_bh(&vcpu->arch.local_int.lock); + + if (psw_interrupts_disabled(vcpu)) { + VCPU_EVENT(vcpu, 3, "%s", "disabled wait"); + __unset_cpu_idle(vcpu); + return -EOPNOTSUPP; /* disabled wait */ + } + + if (psw_extint_disabled(vcpu) || + (!(vcpu->arch.sie_block->gcr[0] & 0x800ul))) { + VCPU_EVENT(vcpu, 3, "%s", "enabled wait w/o timer"); + goto no_timer; + } + + now = get_clock() + vcpu->arch.sie_block->epoch; + if (vcpu->arch.sie_block->ckc < now) { + __unset_cpu_idle(vcpu); + return 0; + } + + sltime = ((vcpu->arch.sie_block->ckc - now)*125)>>9; + + hrtimer_start(&vcpu->arch.ckc_timer, ktime_set (0, sltime) , HRTIMER_MODE_REL); + VCPU_EVENT(vcpu, 5, "enabled wait via clock comparator: %llx ns", sltime); +no_timer: + spin_lock(&vcpu->arch.local_int.float_int->lock); + spin_lock_bh(&vcpu->arch.local_int.lock); + add_wait_queue(&vcpu->arch.local_int.wq, &wait); + while (list_empty(&vcpu->arch.local_int.list) && + list_empty(&vcpu->arch.local_int.float_int->list) && + (!vcpu->arch.local_int.timer_due) && + !signal_pending(current)) { + set_current_state(TASK_INTERRUPTIBLE); + spin_unlock_bh(&vcpu->arch.local_int.lock); + spin_unlock(&vcpu->arch.local_int.float_int->lock); + vcpu_put(vcpu); + schedule(); + vcpu_load(vcpu); + spin_lock(&vcpu->arch.local_int.float_int->lock); + spin_lock_bh(&vcpu->arch.local_int.lock); + } + __unset_cpu_idle(vcpu); + __set_current_state(TASK_RUNNING); + remove_wait_queue(&vcpu->arch.local_int.wq, &wait); + spin_unlock_bh(&vcpu->arch.local_int.lock); + spin_unlock(&vcpu->arch.local_int.float_int->lock); + hrtimer_try_to_cancel(&vcpu->arch.ckc_timer); + return 0; +} + +void kvm_s390_tasklet(unsigned long parm) +{ + struct kvm_vcpu *vcpu = (struct kvm_vcpu *) parm; + + spin_lock(&vcpu->arch.local_int.lock); + vcpu->arch.local_int.timer_due = 1; + if (waitqueue_active(&vcpu->arch.local_int.wq)) + wake_up_interruptible(&vcpu->arch.local_int.wq); + spin_unlock(&vcpu->arch.local_int.lock); +} + +/* + * low level hrtimer wake routine. Because this runs in hardirq context + * we schedule a tasklet to do the real work. + */ +enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer) +{ + struct kvm_vcpu *vcpu; + + vcpu = container_of(timer, struct kvm_vcpu, arch.ckc_timer); + tasklet_schedule(&vcpu->arch.tasklet); + + return HRTIMER_NORESTART; +} + +void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu) +{ + struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; + struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int; + struct kvm_s390_interrupt_info *n, *inti = NULL; + int deliver; + + __reset_intercept_indicators(vcpu); + if (atomic_read(&li->active)) { + do { + deliver = 0; + spin_lock_bh(&li->lock); + list_for_each_entry_safe(inti, n, &li->list, list) { + if (__interrupt_is_deliverable(vcpu, inti)) { + list_del(&inti->list); + deliver = 1; + break; + } + __set_intercept_indicator(vcpu, inti); + } + if (list_empty(&li->list)) + atomic_set(&li->active, 0); + spin_unlock_bh(&li->lock); + if (deliver) { + __do_deliver_interrupt(vcpu, inti); + kfree(inti); + } + } while (deliver); + } + + if ((vcpu->arch.sie_block->ckc < + get_clock() + vcpu->arch.sie_block->epoch)) + __try_deliver_ckc_interrupt(vcpu); + + if (atomic_read(&fi->active)) { + do { + deliver = 0; + spin_lock(&fi->lock); + list_for_each_entry_safe(inti, n, &fi->list, list) { + if (__interrupt_is_deliverable(vcpu, inti)) { + list_del(&inti->list); + deliver = 1; + break; + } + __set_intercept_indicator(vcpu, inti); + } + if (list_empty(&fi->list)) + atomic_set(&fi->active, 0); + spin_unlock(&fi->lock); + if (deliver) { + __do_deliver_interrupt(vcpu, inti); + kfree(inti); + } + } while (deliver); + } +} + +int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code) +{ + struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; + struct kvm_s390_interrupt_info *inti; + + inti = kzalloc(sizeof(*inti), GFP_KERNEL); + if (!inti) + return -ENOMEM; + + inti->type = KVM_S390_PROGRAM_INT; + inti->pgm.code = code; + + VCPU_EVENT(vcpu, 3, "inject: program check %d (from kernel)", code); + spin_lock_bh(&li->lock); + list_add(&inti->list, &li->list); + atomic_set(&li->active, 1); + BUG_ON(waitqueue_active(&li->wq)); + spin_unlock_bh(&li->lock); + return 0; +} + +int kvm_s390_inject_vm(struct kvm *kvm, + struct kvm_s390_interrupt *s390int) +{ + struct kvm_s390_local_interrupt *li; + struct kvm_s390_float_interrupt *fi; + struct kvm_s390_interrupt_info *inti; + int sigcpu; + + inti = kzalloc(sizeof(*inti), GFP_KERNEL); + if (!inti) + return -ENOMEM; + + switch (s390int->type) { + case KVM_S390_INT_VIRTIO: + VM_EVENT(kvm, 5, "inject: virtio parm:%x,parm64:%llx", + s390int->parm, s390int->parm64); + inti->type = s390int->type; + inti->ext.ext_params = s390int->parm; + inti->ext.ext_params2 = s390int->parm64; + break; + case KVM_S390_INT_SERVICE: + VM_EVENT(kvm, 5, "inject: sclp parm:%x", s390int->parm); + inti->type = s390int->type; + inti->ext.ext_params = s390int->parm; + break; + case KVM_S390_PROGRAM_INT: + case KVM_S390_SIGP_STOP: + case KVM_S390_INT_EXTERNAL_CALL: + case KVM_S390_INT_EMERGENCY: + default: + kfree(inti); + return -EINVAL; + } + + mutex_lock(&kvm->lock); + fi = &kvm->arch.float_int; + spin_lock(&fi->lock); + list_add_tail(&inti->list, &fi->list); + atomic_set(&fi->active, 1); + sigcpu = find_first_bit(fi->idle_mask, KVM_MAX_VCPUS); + if (sigcpu == KVM_MAX_VCPUS) { + do { + sigcpu = fi->next_rr_cpu++; + if (sigcpu == KVM_MAX_VCPUS) + sigcpu = fi->next_rr_cpu = 0; + } while (fi->local_int[sigcpu] == NULL); + } + li = fi->local_int[sigcpu]; + spin_lock_bh(&li->lock); + atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); + if (waitqueue_active(&li->wq)) + wake_up_interruptible(&li->wq); + spin_unlock_bh(&li->lock); + spin_unlock(&fi->lock); + mutex_unlock(&kvm->lock); + return 0; +} + +int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, + struct kvm_s390_interrupt *s390int) +{ + struct kvm_s390_local_interrupt *li; + struct kvm_s390_interrupt_info *inti; + + inti = kzalloc(sizeof(*inti), GFP_KERNEL); + if (!inti) + return -ENOMEM; + + switch (s390int->type) { + case KVM_S390_PROGRAM_INT: + if (s390int->parm & 0xffff0000) { + kfree(inti); + return -EINVAL; + } + inti->type = s390int->type; + inti->pgm.code = s390int->parm; + VCPU_EVENT(vcpu, 3, "inject: program check %d (from user)", + s390int->parm); + break; + case KVM_S390_SIGP_SET_PREFIX: + inti->prefix.address = s390int->parm; + inti->type = s390int->type; + VCPU_EVENT(vcpu, 3, "inject: set prefix to %x (from user)", + s390int->parm); + break; + case KVM_S390_SIGP_STOP: + case KVM_S390_RESTART: + case KVM_S390_INT_EXTERNAL_CALL: + case KVM_S390_INT_EMERGENCY: + VCPU_EVENT(vcpu, 3, "inject: type %x", s390int->type); + inti->type = s390int->type; + break; + case KVM_S390_INT_VIRTIO: + case KVM_S390_INT_SERVICE: + default: + kfree(inti); + return -EINVAL; + } + + mutex_lock(&vcpu->kvm->lock); + li = &vcpu->arch.local_int; + spin_lock_bh(&li->lock); + if (inti->type == KVM_S390_PROGRAM_INT) + list_add(&inti->list, &li->list); + else + list_add_tail(&inti->list, &li->list); + atomic_set(&li->active, 1); + if (inti->type == KVM_S390_SIGP_STOP) + li->action_bits |= ACTION_STOP_ON_STOP; + atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); + if (waitqueue_active(&li->wq)) + wake_up_interruptible(&vcpu->arch.local_int.wq); + spin_unlock_bh(&li->lock); + mutex_unlock(&vcpu->kvm->lock); + return 0; +} diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c new file mode 100644 index 00000000..217ce443 --- /dev/null +++ b/arch/s390/kvm/kvm-s390.c @@ -0,0 +1,908 @@ +/* + * s390host.c -- hosting zSeries kernel virtual machines + * + * Copyright IBM Corp. 2008,2009 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * Author(s): Carsten Otte <cotte@de.ibm.com> + * Christian Borntraeger <borntraeger@de.ibm.com> + * Heiko Carstens <heiko.carstens@de.ibm.com> + * Christian Ehrhardt <ehrhardt@de.ibm.com> + */ + +#include <linux/compiler.h> +#include <linux/err.h> +#include <linux/fs.h> +#include <linux/hrtimer.h> +#include <linux/init.h> +#include <linux/kvm.h> +#include <linux/kvm_host.h> +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/timer.h> +#include <asm/asm-offsets.h> +#include <asm/lowcore.h> +#include <asm/pgtable.h> +#include <asm/nmi.h> +#include <asm/switch_to.h> +#include "kvm-s390.h" +#include "gaccess.h" + +#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU + +struct kvm_stats_debugfs_item debugfs_entries[] = { + { "userspace_handled", VCPU_STAT(exit_userspace) }, + { "exit_null", VCPU_STAT(exit_null) }, + { "exit_validity", VCPU_STAT(exit_validity) }, + { "exit_stop_request", VCPU_STAT(exit_stop_request) }, + { "exit_external_request", VCPU_STAT(exit_external_request) }, + { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) }, + { "exit_instruction", VCPU_STAT(exit_instruction) }, + { "exit_program_interruption", VCPU_STAT(exit_program_interruption) }, + { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) }, + { "instruction_lctlg", VCPU_STAT(instruction_lctlg) }, + { "instruction_lctl", VCPU_STAT(instruction_lctl) }, + { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) }, + { "deliver_external_call", VCPU_STAT(deliver_external_call) }, + { "deliver_service_signal", VCPU_STAT(deliver_service_signal) }, + { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) }, + { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) }, + { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) }, + { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) }, + { "deliver_program_interruption", VCPU_STAT(deliver_program_int) }, + { "exit_wait_state", VCPU_STAT(exit_wait_state) }, + { "instruction_stidp", VCPU_STAT(instruction_stidp) }, + { "instruction_spx", VCPU_STAT(instruction_spx) }, + { "instruction_stpx", VCPU_STAT(instruction_stpx) }, + { "instruction_stap", VCPU_STAT(instruction_stap) }, + { "instruction_storage_key", VCPU_STAT(instruction_storage_key) }, + { "instruction_stsch", VCPU_STAT(instruction_stsch) }, + { "instruction_chsc", VCPU_STAT(instruction_chsc) }, + { "instruction_stsi", VCPU_STAT(instruction_stsi) }, + { "instruction_stfl", VCPU_STAT(instruction_stfl) }, + { "instruction_tprot", VCPU_STAT(instruction_tprot) }, + { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) }, + { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) }, + { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) }, + { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) }, + { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) }, + { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) }, + { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) }, + { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) }, + { "diagnose_10", VCPU_STAT(diagnose_10) }, + { "diagnose_44", VCPU_STAT(diagnose_44) }, + { NULL } +}; + +static unsigned long long *facilities; + +/* Section: not file related */ +int kvm_arch_hardware_enable(void *garbage) +{ + /* every s390 is virtualization enabled ;-) */ + return 0; +} + +void kvm_arch_hardware_disable(void *garbage) +{ +} + +int kvm_arch_hardware_setup(void) +{ + return 0; +} + +void kvm_arch_hardware_unsetup(void) +{ +} + +void kvm_arch_check_processor_compat(void *rtn) +{ +} + +int kvm_arch_init(void *opaque) +{ + return 0; +} + +void kvm_arch_exit(void) +{ +} + +/* Section: device related */ +long kvm_arch_dev_ioctl(struct file *filp, + unsigned int ioctl, unsigned long arg) +{ + if (ioctl == KVM_S390_ENABLE_SIE) + return s390_enable_sie(); + return -EINVAL; +} + +int kvm_dev_ioctl_check_extension(long ext) +{ + int r; + + switch (ext) { + case KVM_CAP_S390_PSW: + case KVM_CAP_S390_GMAP: + case KVM_CAP_SYNC_MMU: +#ifdef CONFIG_KVM_S390_UCONTROL + case KVM_CAP_S390_UCONTROL: +#endif + case KVM_CAP_SYNC_REGS: + r = 1; + break; + default: + r = 0; + } + return r; +} + +/* Section: vm related */ +/* + * Get (and clear) the dirty memory log for a memory slot. + */ +int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, + struct kvm_dirty_log *log) +{ + return 0; +} + +long kvm_arch_vm_ioctl(struct file *filp, + unsigned int ioctl, unsigned long arg) +{ + struct kvm *kvm = filp->private_data; + void __user *argp = (void __user *)arg; + int r; + + switch (ioctl) { + case KVM_S390_INTERRUPT: { + struct kvm_s390_interrupt s390int; + + r = -EFAULT; + if (copy_from_user(&s390int, argp, sizeof(s390int))) + break; + r = kvm_s390_inject_vm(kvm, &s390int); + break; + } + default: + r = -ENOTTY; + } + + return r; +} + +int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) +{ + int rc; + char debug_name[16]; + + rc = -EINVAL; +#ifdef CONFIG_KVM_S390_UCONTROL + if (type & ~KVM_VM_S390_UCONTROL) + goto out_err; + if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN))) + goto out_err; +#else + if (type) + goto out_err; +#endif + + rc = s390_enable_sie(); + if (rc) + goto out_err; + + rc = -ENOMEM; + + kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL); + if (!kvm->arch.sca) + goto out_err; + + sprintf(debug_name, "kvm-%u", current->pid); + + kvm->arch.dbf = debug_register(debug_name, 8, 2, 8 * sizeof(long)); + if (!kvm->arch.dbf) + goto out_nodbf; + + spin_lock_init(&kvm->arch.float_int.lock); + INIT_LIST_HEAD(&kvm->arch.float_int.list); + + debug_register_view(kvm->arch.dbf, &debug_sprintf_view); + VM_EVENT(kvm, 3, "%s", "vm created"); + + if (type & KVM_VM_S390_UCONTROL) { + kvm->arch.gmap = NULL; + } else { + kvm->arch.gmap = gmap_alloc(current->mm); + if (!kvm->arch.gmap) + goto out_nogmap; + } + return 0; +out_nogmap: + debug_unregister(kvm->arch.dbf); +out_nodbf: + free_page((unsigned long)(kvm->arch.sca)); +out_err: + return rc; +} + +void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) +{ + VCPU_EVENT(vcpu, 3, "%s", "free cpu"); + if (!kvm_is_ucontrol(vcpu->kvm)) { + clear_bit(63 - vcpu->vcpu_id, + (unsigned long *) &vcpu->kvm->arch.sca->mcn); + if (vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda == + (__u64) vcpu->arch.sie_block) + vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda = 0; + } + smp_mb(); + + if (kvm_is_ucontrol(vcpu->kvm)) + gmap_free(vcpu->arch.gmap); + + free_page((unsigned long)(vcpu->arch.sie_block)); + kvm_vcpu_uninit(vcpu); + kfree(vcpu); +} + +static void kvm_free_vcpus(struct kvm *kvm) +{ + unsigned int i; + struct kvm_vcpu *vcpu; + + kvm_for_each_vcpu(i, vcpu, kvm) + kvm_arch_vcpu_destroy(vcpu); + + mutex_lock(&kvm->lock); + for (i = 0; i < atomic_read(&kvm->online_vcpus); i++) + kvm->vcpus[i] = NULL; + + atomic_set(&kvm->online_vcpus, 0); + mutex_unlock(&kvm->lock); +} + +void kvm_arch_sync_events(struct kvm *kvm) +{ +} + +void kvm_arch_destroy_vm(struct kvm *kvm) +{ + kvm_free_vcpus(kvm); + free_page((unsigned long)(kvm->arch.sca)); + debug_unregister(kvm->arch.dbf); + if (!kvm_is_ucontrol(kvm)) + gmap_free(kvm->arch.gmap); +} + +/* Section: vcpu related */ +int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) +{ + if (kvm_is_ucontrol(vcpu->kvm)) { + vcpu->arch.gmap = gmap_alloc(current->mm); + if (!vcpu->arch.gmap) + return -ENOMEM; + return 0; + } + + vcpu->arch.gmap = vcpu->kvm->arch.gmap; + vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX | + KVM_SYNC_GPRS | + KVM_SYNC_ACRS | + KVM_SYNC_CRS; + return 0; +} + +void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) +{ + /* Nothing todo */ +} + +void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) +{ + save_fp_regs(&vcpu->arch.host_fpregs); + save_access_regs(vcpu->arch.host_acrs); + vcpu->arch.guest_fpregs.fpc &= FPC_VALID_MASK; + restore_fp_regs(&vcpu->arch.guest_fpregs); + restore_access_regs(vcpu->run->s.regs.acrs); + gmap_enable(vcpu->arch.gmap); + atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags); +} + +void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) +{ + atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags); + gmap_disable(vcpu->arch.gmap); + save_fp_regs(&vcpu->arch.guest_fpregs); + save_access_regs(vcpu->run->s.regs.acrs); + restore_fp_regs(&vcpu->arch.host_fpregs); + restore_access_regs(vcpu->arch.host_acrs); +} + +static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu) +{ + /* this equals initial cpu reset in pop, but we don't switch to ESA */ + vcpu->arch.sie_block->gpsw.mask = 0UL; + vcpu->arch.sie_block->gpsw.addr = 0UL; + kvm_s390_set_prefix(vcpu, 0); + vcpu->arch.sie_block->cputm = 0UL; + vcpu->arch.sie_block->ckc = 0UL; + vcpu->arch.sie_block->todpr = 0; + memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64)); + vcpu->arch.sie_block->gcr[0] = 0xE0UL; + vcpu->arch.sie_block->gcr[14] = 0xC2000000UL; + vcpu->arch.guest_fpregs.fpc = 0; + asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc)); + vcpu->arch.sie_block->gbea = 1; +} + +int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) +{ + atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH | + CPUSTAT_SM | + CPUSTAT_STOPPED); + vcpu->arch.sie_block->ecb = 6; + vcpu->arch.sie_block->eca = 0xC1002001U; + vcpu->arch.sie_block->fac = (int) (long) facilities; + hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS); + tasklet_init(&vcpu->arch.tasklet, kvm_s390_tasklet, + (unsigned long) vcpu); + vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup; + get_cpu_id(&vcpu->arch.cpu_id); + vcpu->arch.cpu_id.version = 0xff; + return 0; +} + +struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, + unsigned int id) +{ + struct kvm_vcpu *vcpu; + int rc = -EINVAL; + + if (id >= KVM_MAX_VCPUS) + goto out; + + rc = -ENOMEM; + + vcpu = kzalloc(sizeof(struct kvm_vcpu), GFP_KERNEL); + if (!vcpu) + goto out; + + vcpu->arch.sie_block = (struct kvm_s390_sie_block *) + get_zeroed_page(GFP_KERNEL); + + if (!vcpu->arch.sie_block) + goto out_free_cpu; + + vcpu->arch.sie_block->icpua = id; + if (!kvm_is_ucontrol(kvm)) { + if (!kvm->arch.sca) { + WARN_ON_ONCE(1); + goto out_free_cpu; + } + if (!kvm->arch.sca->cpu[id].sda) + kvm->arch.sca->cpu[id].sda = + (__u64) vcpu->arch.sie_block; + vcpu->arch.sie_block->scaoh = + (__u32)(((__u64)kvm->arch.sca) >> 32); + vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca; + set_bit(63 - id, (unsigned long *) &kvm->arch.sca->mcn); + } + + spin_lock_init(&vcpu->arch.local_int.lock); + INIT_LIST_HEAD(&vcpu->arch.local_int.list); + vcpu->arch.local_int.float_int = &kvm->arch.float_int; + spin_lock(&kvm->arch.float_int.lock); + kvm->arch.float_int.local_int[id] = &vcpu->arch.local_int; + init_waitqueue_head(&vcpu->arch.local_int.wq); + vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags; + spin_unlock(&kvm->arch.float_int.lock); + + rc = kvm_vcpu_init(vcpu, kvm, id); + if (rc) + goto out_free_sie_block; + VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu, + vcpu->arch.sie_block); + + return vcpu; +out_free_sie_block: + free_page((unsigned long)(vcpu->arch.sie_block)); +out_free_cpu: + kfree(vcpu); +out: + return ERR_PTR(rc); +} + +int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) +{ + /* kvm common code refers to this, but never calls it */ + BUG(); + return 0; +} + +static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu) +{ + kvm_s390_vcpu_initial_reset(vcpu); + return 0; +} + +int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) +{ + memcpy(&vcpu->run->s.regs.gprs, ®s->gprs, sizeof(regs->gprs)); + return 0; +} + +int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) +{ + memcpy(®s->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs)); + return 0; +} + +int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) +{ + memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs)); + memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs)); + restore_access_regs(vcpu->run->s.regs.acrs); + return 0; +} + +int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) +{ + memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs)); + memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs)); + return 0; +} + +int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) +{ + memcpy(&vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs)); + vcpu->arch.guest_fpregs.fpc = fpu->fpc & FPC_VALID_MASK; + restore_fp_regs(&vcpu->arch.guest_fpregs); + return 0; +} + +int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) +{ + memcpy(&fpu->fprs, &vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs)); + fpu->fpc = vcpu->arch.guest_fpregs.fpc; + return 0; +} + +static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw) +{ + int rc = 0; + + if (!(atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_STOPPED)) + rc = -EBUSY; + else { + vcpu->run->psw_mask = psw.mask; + vcpu->run->psw_addr = psw.addr; + } + return rc; +} + +int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, + struct kvm_translation *tr) +{ + return -EINVAL; /* not implemented yet */ +} + +int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, + struct kvm_guest_debug *dbg) +{ + return -EINVAL; /* not implemented yet */ +} + +int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, + struct kvm_mp_state *mp_state) +{ + return -EINVAL; /* not implemented yet */ +} + +int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, + struct kvm_mp_state *mp_state) +{ + return -EINVAL; /* not implemented yet */ +} + +static int __vcpu_run(struct kvm_vcpu *vcpu) +{ + int rc; + + memcpy(&vcpu->arch.sie_block->gg14, &vcpu->run->s.regs.gprs[14], 16); + + if (need_resched()) + schedule(); + + if (test_thread_flag(TIF_MCCK_PENDING)) + s390_handle_mcck(); + + if (!kvm_is_ucontrol(vcpu->kvm)) + kvm_s390_deliver_pending_interrupts(vcpu); + + vcpu->arch.sie_block->icptcode = 0; + local_irq_disable(); + kvm_guest_enter(); + local_irq_enable(); + VCPU_EVENT(vcpu, 6, "entering sie flags %x", + atomic_read(&vcpu->arch.sie_block->cpuflags)); + rc = sie64a(vcpu->arch.sie_block, vcpu->run->s.regs.gprs); + if (rc) { + if (kvm_is_ucontrol(vcpu->kvm)) { + rc = SIE_INTERCEPT_UCONTROL; + } else { + VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction"); + kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + rc = 0; + } + } + VCPU_EVENT(vcpu, 6, "exit sie icptcode %d", + vcpu->arch.sie_block->icptcode); + local_irq_disable(); + kvm_guest_exit(); + local_irq_enable(); + + memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16); + return rc; +} + +int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +{ + int rc; + sigset_t sigsaved; + +rerun_vcpu: + if (vcpu->sigset_active) + sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); + + atomic_clear_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags); + + BUG_ON(vcpu->kvm->arch.float_int.local_int[vcpu->vcpu_id] == NULL); + + switch (kvm_run->exit_reason) { + case KVM_EXIT_S390_SIEIC: + case KVM_EXIT_UNKNOWN: + case KVM_EXIT_INTR: + case KVM_EXIT_S390_RESET: + case KVM_EXIT_S390_UCONTROL: + break; + default: + BUG(); + } + + vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask; + vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr; + if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX) { + kvm_run->kvm_dirty_regs &= ~KVM_SYNC_PREFIX; + kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix); + } + if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) { + kvm_run->kvm_dirty_regs &= ~KVM_SYNC_CRS; + memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128); + kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix); + } + + might_fault(); + + do { + rc = __vcpu_run(vcpu); + if (rc) + break; + if (kvm_is_ucontrol(vcpu->kvm)) + rc = -EOPNOTSUPP; + else + rc = kvm_handle_sie_intercept(vcpu); + } while (!signal_pending(current) && !rc); + + if (rc == SIE_INTERCEPT_RERUNVCPU) + goto rerun_vcpu; + + if (signal_pending(current) && !rc) { + kvm_run->exit_reason = KVM_EXIT_INTR; + rc = -EINTR; + } + +#ifdef CONFIG_KVM_S390_UCONTROL + if (rc == SIE_INTERCEPT_UCONTROL) { + kvm_run->exit_reason = KVM_EXIT_S390_UCONTROL; + kvm_run->s390_ucontrol.trans_exc_code = + current->thread.gmap_addr; + kvm_run->s390_ucontrol.pgm_code = 0x10; + rc = 0; + } +#endif + + if (rc == -EOPNOTSUPP) { + /* intercept cannot be handled in-kernel, prepare kvm-run */ + kvm_run->exit_reason = KVM_EXIT_S390_SIEIC; + kvm_run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode; + kvm_run->s390_sieic.ipa = vcpu->arch.sie_block->ipa; + kvm_run->s390_sieic.ipb = vcpu->arch.sie_block->ipb; + rc = 0; + } + + if (rc == -EREMOTE) { + /* intercept was handled, but userspace support is needed + * kvm_run has been prepared by the handler */ + rc = 0; + } + + kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask; + kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr; + kvm_run->s.regs.prefix = vcpu->arch.sie_block->prefix; + memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128); + + if (vcpu->sigset_active) + sigprocmask(SIG_SETMASK, &sigsaved, NULL); + + vcpu->stat.exit_userspace++; + return rc; +} + +static int __guestcopy(struct kvm_vcpu *vcpu, u64 guestdest, void *from, + unsigned long n, int prefix) +{ + if (prefix) + return copy_to_guest(vcpu, guestdest, from, n); + else + return copy_to_guest_absolute(vcpu, guestdest, from, n); +} + +/* + * store status at address + * we use have two special cases: + * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit + * KVM_S390_STORE_STATUS_PREFIXED: -> prefix + */ +int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr) +{ + unsigned char archmode = 1; + int prefix; + + if (addr == KVM_S390_STORE_STATUS_NOADDR) { + if (copy_to_guest_absolute(vcpu, 163ul, &archmode, 1)) + return -EFAULT; + addr = SAVE_AREA_BASE; + prefix = 0; + } else if (addr == KVM_S390_STORE_STATUS_PREFIXED) { + if (copy_to_guest(vcpu, 163ul, &archmode, 1)) + return -EFAULT; + addr = SAVE_AREA_BASE; + prefix = 1; + } else + prefix = 0; + + if (__guestcopy(vcpu, addr + offsetof(struct save_area, fp_regs), + vcpu->arch.guest_fpregs.fprs, 128, prefix)) + return -EFAULT; + + if (__guestcopy(vcpu, addr + offsetof(struct save_area, gp_regs), + vcpu->run->s.regs.gprs, 128, prefix)) + return -EFAULT; + + if (__guestcopy(vcpu, addr + offsetof(struct save_area, psw), + &vcpu->arch.sie_block->gpsw, 16, prefix)) + return -EFAULT; + + if (__guestcopy(vcpu, addr + offsetof(struct save_area, pref_reg), + &vcpu->arch.sie_block->prefix, 4, prefix)) + return -EFAULT; + + if (__guestcopy(vcpu, + addr + offsetof(struct save_area, fp_ctrl_reg), + &vcpu->arch.guest_fpregs.fpc, 4, prefix)) + return -EFAULT; + + if (__guestcopy(vcpu, addr + offsetof(struct save_area, tod_reg), + &vcpu->arch.sie_block->todpr, 4, prefix)) + return -EFAULT; + + if (__guestcopy(vcpu, addr + offsetof(struct save_area, timer), + &vcpu->arch.sie_block->cputm, 8, prefix)) + return -EFAULT; + + if (__guestcopy(vcpu, addr + offsetof(struct save_area, clk_cmp), + &vcpu->arch.sie_block->ckc, 8, prefix)) + return -EFAULT; + + if (__guestcopy(vcpu, addr + offsetof(struct save_area, acc_regs), + &vcpu->run->s.regs.acrs, 64, prefix)) + return -EFAULT; + + if (__guestcopy(vcpu, + addr + offsetof(struct save_area, ctrl_regs), + &vcpu->arch.sie_block->gcr, 128, prefix)) + return -EFAULT; + return 0; +} + +long kvm_arch_vcpu_ioctl(struct file *filp, + unsigned int ioctl, unsigned long arg) +{ + struct kvm_vcpu *vcpu = filp->private_data; + void __user *argp = (void __user *)arg; + long r; + + switch (ioctl) { + case KVM_S390_INTERRUPT: { + struct kvm_s390_interrupt s390int; + + r = -EFAULT; + if (copy_from_user(&s390int, argp, sizeof(s390int))) + break; + r = kvm_s390_inject_vcpu(vcpu, &s390int); + break; + } + case KVM_S390_STORE_STATUS: + r = kvm_s390_vcpu_store_status(vcpu, arg); + break; + case KVM_S390_SET_INITIAL_PSW: { + psw_t psw; + + r = -EFAULT; + if (copy_from_user(&psw, argp, sizeof(psw))) + break; + r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw); + break; + } + case KVM_S390_INITIAL_RESET: + r = kvm_arch_vcpu_ioctl_initial_reset(vcpu); + break; +#ifdef CONFIG_KVM_S390_UCONTROL + case KVM_S390_UCAS_MAP: { + struct kvm_s390_ucas_mapping ucasmap; + + if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) { + r = -EFAULT; + break; + } + + if (!kvm_is_ucontrol(vcpu->kvm)) { + r = -EINVAL; + break; + } + + r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr, + ucasmap.vcpu_addr, ucasmap.length); + break; + } + case KVM_S390_UCAS_UNMAP: { + struct kvm_s390_ucas_mapping ucasmap; + + if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) { + r = -EFAULT; + break; + } + + if (!kvm_is_ucontrol(vcpu->kvm)) { + r = -EINVAL; + break; + } + + r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr, + ucasmap.length); + break; + } +#endif + case KVM_S390_VCPU_FAULT: { + r = gmap_fault(arg, vcpu->arch.gmap); + if (!IS_ERR_VALUE(r)) + r = 0; + break; + } + default: + r = -ENOTTY; + } + return r; +} + +int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) +{ +#ifdef CONFIG_KVM_S390_UCONTROL + if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET) + && (kvm_is_ucontrol(vcpu->kvm))) { + vmf->page = virt_to_page(vcpu->arch.sie_block); + get_page(vmf->page); + return 0; + } +#endif + return VM_FAULT_SIGBUS; +} + +void kvm_arch_free_memslot(struct kvm_memory_slot *free, + struct kvm_memory_slot *dont) +{ +} + +int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) +{ + return 0; +} + +/* Section: memory related */ +int kvm_arch_prepare_memory_region(struct kvm *kvm, + struct kvm_memory_slot *memslot, + struct kvm_memory_slot old, + struct kvm_userspace_memory_region *mem, + int user_alloc) +{ + /* A few sanity checks. We can have exactly one memory slot which has + to start at guest virtual zero and which has to be located at a + page boundary in userland and which has to end at a page boundary. + The memory in userland is ok to be fragmented into various different + vmas. It is okay to mmap() and munmap() stuff in this slot after + doing this call at any time */ + + if (mem->slot) + return -EINVAL; + + if (mem->guest_phys_addr) + return -EINVAL; + + if (mem->userspace_addr & 0xffffful) + return -EINVAL; + + if (mem->memory_size & 0xffffful) + return -EINVAL; + + if (!user_alloc) + return -EINVAL; + + return 0; +} + +void kvm_arch_commit_memory_region(struct kvm *kvm, + struct kvm_userspace_memory_region *mem, + struct kvm_memory_slot old, + int user_alloc) +{ + int rc; + + + rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr, + mem->guest_phys_addr, mem->memory_size); + if (rc) + printk(KERN_WARNING "kvm-s390: failed to commit memory region\n"); + return; +} + +void kvm_arch_flush_shadow(struct kvm *kvm) +{ +} + +static int __init kvm_s390_init(void) +{ + int ret; + ret = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE); + if (ret) + return ret; + + /* + * guests can ask for up to 255+1 double words, we need a full page + * to hold the maximum amount of facilities. On the other hand, we + * only set facilities that are known to work in KVM. + */ + facilities = (unsigned long long *) get_zeroed_page(GFP_KERNEL|GFP_DMA); + if (!facilities) { + kvm_exit(); + return -ENOMEM; + } + memcpy(facilities, S390_lowcore.stfle_fac_list, 16); + facilities[0] &= 0xff00fff3f47c0000ULL; + facilities[1] &= 0x201c000000000000ULL; + return 0; +} + +static void __exit kvm_s390_exit(void) +{ + free_page((unsigned long) facilities); + kvm_exit(); +} + +module_init(kvm_s390_init); +module_exit(kvm_s390_exit); diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h new file mode 100644 index 00000000..ff28f9d1 --- /dev/null +++ b/arch/s390/kvm/kvm-s390.h @@ -0,0 +1,92 @@ +/* + * kvm_s390.h - definition for kvm on s390 + * + * Copyright IBM Corp. 2008,2009 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * Author(s): Carsten Otte <cotte@de.ibm.com> + * Christian Borntraeger <borntraeger@de.ibm.com> + * Christian Ehrhardt <ehrhardt@de.ibm.com> + */ + +#ifndef ARCH_S390_KVM_S390_H +#define ARCH_S390_KVM_S390_H + +#include <linux/hrtimer.h> +#include <linux/kvm.h> +#include <linux/kvm_host.h> + +/* The current code can have up to 256 pages for virtio */ +#define VIRTIODESCSPACE (256ul * 4096ul) + +typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu); + +/* negativ values are error codes, positive values for internal conditions */ +#define SIE_INTERCEPT_RERUNVCPU (1<<0) +#define SIE_INTERCEPT_UCONTROL (1<<1) +int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu); + +#define VM_EVENT(d_kvm, d_loglevel, d_string, d_args...)\ +do { \ + debug_sprintf_event(d_kvm->arch.dbf, d_loglevel, d_string "\n", \ + d_args); \ +} while (0) + +#define VCPU_EVENT(d_vcpu, d_loglevel, d_string, d_args...)\ +do { \ + debug_sprintf_event(d_vcpu->kvm->arch.dbf, d_loglevel, \ + "%02d[%016lx-%016lx]: " d_string "\n", d_vcpu->vcpu_id, \ + d_vcpu->arch.sie_block->gpsw.mask, d_vcpu->arch.sie_block->gpsw.addr,\ + d_args); \ +} while (0) + +static inline int __cpu_is_stopped(struct kvm_vcpu *vcpu) +{ + return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_STOP_INT; +} + +static inline int kvm_is_ucontrol(struct kvm *kvm) +{ +#ifdef CONFIG_KVM_S390_UCONTROL + if (kvm->arch.gmap) + return 0; + return 1; +#else + return 0; +#endif +} + +static inline void kvm_s390_set_prefix(struct kvm_vcpu *vcpu, u32 prefix) +{ + vcpu->arch.sie_block->prefix = prefix & 0x7fffe000u; + vcpu->arch.sie_block->ihcpu = 0xffff; +} + +int kvm_s390_handle_wait(struct kvm_vcpu *vcpu); +enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer); +void kvm_s390_tasklet(unsigned long parm); +void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu); +int kvm_s390_inject_vm(struct kvm *kvm, + struct kvm_s390_interrupt *s390int); +int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, + struct kvm_s390_interrupt *s390int); +int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code); +int kvm_s390_inject_sigp_stop(struct kvm_vcpu *vcpu, int action); + +/* implemented in priv.c */ +int kvm_s390_handle_b2(struct kvm_vcpu *vcpu); +int kvm_s390_handle_e5(struct kvm_vcpu *vcpu); + +/* implemented in sigp.c */ +int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu); + +/* implemented in kvm-s390.c */ +int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, + unsigned long addr); +/* implemented in diag.c */ +int kvm_s390_handle_diag(struct kvm_vcpu *vcpu); + +#endif diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c new file mode 100644 index 00000000..e5a45dbd --- /dev/null +++ b/arch/s390/kvm/priv.c @@ -0,0 +1,382 @@ +/* + * priv.c - handling privileged instructions + * + * Copyright IBM Corp. 2008 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * Author(s): Carsten Otte <cotte@de.ibm.com> + * Christian Borntraeger <borntraeger@de.ibm.com> + */ + +#include <linux/kvm.h> +#include <linux/gfp.h> +#include <linux/errno.h> +#include <asm/current.h> +#include <asm/debug.h> +#include <asm/ebcdic.h> +#include <asm/sysinfo.h> +#include "gaccess.h" +#include "kvm-s390.h" + +static int handle_set_prefix(struct kvm_vcpu *vcpu) +{ + int base2 = vcpu->arch.sie_block->ipb >> 28; + int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16); + u64 operand2; + u32 address = 0; + u8 tmp; + + vcpu->stat.instruction_spx++; + + operand2 = disp2; + if (base2) + operand2 += vcpu->run->s.regs.gprs[base2]; + + /* must be word boundary */ + if (operand2 & 3) { + kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + goto out; + } + + /* get the value */ + if (get_guest_u32(vcpu, operand2, &address)) { + kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + goto out; + } + + address = address & 0x7fffe000u; + + /* make sure that the new value is valid memory */ + if (copy_from_guest_absolute(vcpu, &tmp, address, 1) || + (copy_from_guest_absolute(vcpu, &tmp, address + PAGE_SIZE, 1))) { + kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + goto out; + } + + kvm_s390_set_prefix(vcpu, address); + + VCPU_EVENT(vcpu, 5, "setting prefix to %x", address); +out: + return 0; +} + +static int handle_store_prefix(struct kvm_vcpu *vcpu) +{ + int base2 = vcpu->arch.sie_block->ipb >> 28; + int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16); + u64 operand2; + u32 address; + + vcpu->stat.instruction_stpx++; + operand2 = disp2; + if (base2) + operand2 += vcpu->run->s.regs.gprs[base2]; + + /* must be word boundary */ + if (operand2 & 3) { + kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + goto out; + } + + address = vcpu->arch.sie_block->prefix; + address = address & 0x7fffe000u; + + /* get the value */ + if (put_guest_u32(vcpu, operand2, address)) { + kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + goto out; + } + + VCPU_EVENT(vcpu, 5, "storing prefix to %x", address); +out: + return 0; +} + +static int handle_store_cpu_address(struct kvm_vcpu *vcpu) +{ + int base2 = vcpu->arch.sie_block->ipb >> 28; + int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16); + u64 useraddr; + int rc; + + vcpu->stat.instruction_stap++; + useraddr = disp2; + if (base2) + useraddr += vcpu->run->s.regs.gprs[base2]; + + if (useraddr & 1) { + kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + goto out; + } + + rc = put_guest_u16(vcpu, useraddr, vcpu->vcpu_id); + if (rc == -EFAULT) { + kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + goto out; + } + + VCPU_EVENT(vcpu, 5, "storing cpu address to %llx", useraddr); +out: + return 0; +} + +static int handle_skey(struct kvm_vcpu *vcpu) +{ + vcpu->stat.instruction_storage_key++; + vcpu->arch.sie_block->gpsw.addr -= 4; + VCPU_EVENT(vcpu, 4, "%s", "retrying storage key operation"); + return 0; +} + +static int handle_stsch(struct kvm_vcpu *vcpu) +{ + vcpu->stat.instruction_stsch++; + VCPU_EVENT(vcpu, 4, "%s", "store subchannel - CC3"); + /* condition code 3 */ + vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); + vcpu->arch.sie_block->gpsw.mask |= (3 & 3ul) << 44; + return 0; +} + +static int handle_chsc(struct kvm_vcpu *vcpu) +{ + vcpu->stat.instruction_chsc++; + VCPU_EVENT(vcpu, 4, "%s", "channel subsystem call - CC3"); + /* condition code 3 */ + vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); + vcpu->arch.sie_block->gpsw.mask |= (3 & 3ul) << 44; + return 0; +} + +static int handle_stfl(struct kvm_vcpu *vcpu) +{ + unsigned int facility_list; + int rc; + + vcpu->stat.instruction_stfl++; + /* only pass the facility bits, which we can handle */ + facility_list = S390_lowcore.stfl_fac_list & 0xff00fff3; + + rc = copy_to_guest(vcpu, offsetof(struct _lowcore, stfl_fac_list), + &facility_list, sizeof(facility_list)); + if (rc == -EFAULT) + kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + else + VCPU_EVENT(vcpu, 5, "store facility list value %x", + facility_list); + return 0; +} + +static int handle_stidp(struct kvm_vcpu *vcpu) +{ + int base2 = vcpu->arch.sie_block->ipb >> 28; + int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16); + u64 operand2; + int rc; + + vcpu->stat.instruction_stidp++; + operand2 = disp2; + if (base2) + operand2 += vcpu->run->s.regs.gprs[base2]; + + if (operand2 & 7) { + kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + goto out; + } + + rc = put_guest_u64(vcpu, operand2, vcpu->arch.stidp_data); + if (rc == -EFAULT) { + kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + goto out; + } + + VCPU_EVENT(vcpu, 5, "%s", "store cpu id"); +out: + return 0; +} + +static void handle_stsi_3_2_2(struct kvm_vcpu *vcpu, struct sysinfo_3_2_2 *mem) +{ + struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int; + int cpus = 0; + int n; + + spin_lock(&fi->lock); + for (n = 0; n < KVM_MAX_VCPUS; n++) + if (fi->local_int[n]) + cpus++; + spin_unlock(&fi->lock); + + /* deal with other level 3 hypervisors */ + if (stsi(mem, 3, 2, 2) == -ENOSYS) + mem->count = 0; + if (mem->count < 8) + mem->count++; + for (n = mem->count - 1; n > 0 ; n--) + memcpy(&mem->vm[n], &mem->vm[n - 1], sizeof(mem->vm[0])); + + mem->vm[0].cpus_total = cpus; + mem->vm[0].cpus_configured = cpus; + mem->vm[0].cpus_standby = 0; + mem->vm[0].cpus_reserved = 0; + mem->vm[0].caf = 1000; + memcpy(mem->vm[0].name, "KVMguest", 8); + ASCEBC(mem->vm[0].name, 8); + memcpy(mem->vm[0].cpi, "KVM/Linux ", 16); + ASCEBC(mem->vm[0].cpi, 16); +} + +static int handle_stsi(struct kvm_vcpu *vcpu) +{ + int fc = (vcpu->run->s.regs.gprs[0] & 0xf0000000) >> 28; + int sel1 = vcpu->run->s.regs.gprs[0] & 0xff; + int sel2 = vcpu->run->s.regs.gprs[1] & 0xffff; + int base2 = vcpu->arch.sie_block->ipb >> 28; + int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16); + u64 operand2; + unsigned long mem; + + vcpu->stat.instruction_stsi++; + VCPU_EVENT(vcpu, 4, "stsi: fc: %x sel1: %x sel2: %x", fc, sel1, sel2); + + operand2 = disp2; + if (base2) + operand2 += vcpu->run->s.regs.gprs[base2]; + + if (operand2 & 0xfff && fc > 0) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + + switch (fc) { + case 0: + vcpu->run->s.regs.gprs[0] = 3 << 28; + vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); + return 0; + case 1: /* same handling for 1 and 2 */ + case 2: + mem = get_zeroed_page(GFP_KERNEL); + if (!mem) + goto out_fail; + if (stsi((void *) mem, fc, sel1, sel2) == -ENOSYS) + goto out_mem; + break; + case 3: + if (sel1 != 2 || sel2 != 2) + goto out_fail; + mem = get_zeroed_page(GFP_KERNEL); + if (!mem) + goto out_fail; + handle_stsi_3_2_2(vcpu, (void *) mem); + break; + default: + goto out_fail; + } + + if (copy_to_guest_absolute(vcpu, operand2, (void *) mem, PAGE_SIZE)) { + kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + goto out_mem; + } + free_page(mem); + vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); + vcpu->run->s.regs.gprs[0] = 0; + return 0; +out_mem: + free_page(mem); +out_fail: + /* condition code 3 */ + vcpu->arch.sie_block->gpsw.mask |= 3ul << 44; + return 0; +} + +static intercept_handler_t priv_handlers[256] = { + [0x02] = handle_stidp, + [0x10] = handle_set_prefix, + [0x11] = handle_store_prefix, + [0x12] = handle_store_cpu_address, + [0x29] = handle_skey, + [0x2a] = handle_skey, + [0x2b] = handle_skey, + [0x34] = handle_stsch, + [0x5f] = handle_chsc, + [0x7d] = handle_stsi, + [0xb1] = handle_stfl, +}; + +int kvm_s390_handle_b2(struct kvm_vcpu *vcpu) +{ + intercept_handler_t handler; + + /* + * a lot of B2 instructions are priviledged. We first check for + * the privileged ones, that we can handle in the kernel. If the + * kernel can handle this instruction, we check for the problem + * state bit and (a) handle the instruction or (b) send a code 2 + * program check. + * Anything else goes to userspace.*/ + handler = priv_handlers[vcpu->arch.sie_block->ipa & 0x00ff]; + if (handler) { + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, + PGM_PRIVILEGED_OPERATION); + else + return handler(vcpu); + } + return -EOPNOTSUPP; +} + +static int handle_tprot(struct kvm_vcpu *vcpu) +{ + int base1 = (vcpu->arch.sie_block->ipb & 0xf0000000) >> 28; + int disp1 = (vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16; + int base2 = (vcpu->arch.sie_block->ipb & 0xf000) >> 12; + int disp2 = vcpu->arch.sie_block->ipb & 0x0fff; + u64 address1 = disp1 + base1 ? vcpu->run->s.regs.gprs[base1] : 0; + u64 address2 = disp2 + base2 ? vcpu->run->s.regs.gprs[base2] : 0; + struct vm_area_struct *vma; + unsigned long user_address; + + vcpu->stat.instruction_tprot++; + + /* we only handle the Linux memory detection case: + * access key == 0 + * guest DAT == off + * everything else goes to userspace. */ + if (address2 & 0xf0) + return -EOPNOTSUPP; + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_DAT) + return -EOPNOTSUPP; + + + /* we must resolve the address without holding the mmap semaphore. + * This is ok since the userspace hypervisor is not supposed to change + * the mapping while the guest queries the memory. Otherwise the guest + * might crash or get wrong info anyway. */ + user_address = (unsigned long) __guestaddr_to_user(vcpu, address1); + + down_read(¤t->mm->mmap_sem); + vma = find_vma(current->mm, user_address); + if (!vma) { + up_read(¤t->mm->mmap_sem); + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + } + + vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); + if (!(vma->vm_flags & VM_WRITE) && (vma->vm_flags & VM_READ)) + vcpu->arch.sie_block->gpsw.mask |= (1ul << 44); + if (!(vma->vm_flags & VM_WRITE) && !(vma->vm_flags & VM_READ)) + vcpu->arch.sie_block->gpsw.mask |= (2ul << 44); + + up_read(¤t->mm->mmap_sem); + return 0; +} + +int kvm_s390_handle_e5(struct kvm_vcpu *vcpu) +{ + /* For e5xx... instructions we only handle TPROT */ + if ((vcpu->arch.sie_block->ipa & 0x00ff) == 0x01) + return handle_tprot(vcpu); + return -EOPNOTSUPP; +} + diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c new file mode 100644 index 00000000..0ad4cf23 --- /dev/null +++ b/arch/s390/kvm/sigp.c @@ -0,0 +1,421 @@ +/* + * sigp.c - handlinge interprocessor communication + * + * Copyright IBM Corp. 2008,2009 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * Author(s): Carsten Otte <cotte@de.ibm.com> + * Christian Borntraeger <borntraeger@de.ibm.com> + * Christian Ehrhardt <ehrhardt@de.ibm.com> + */ + +#include <linux/kvm.h> +#include <linux/kvm_host.h> +#include <linux/slab.h> +#include "gaccess.h" +#include "kvm-s390.h" + +/* sigp order codes */ +#define SIGP_SENSE 0x01 +#define SIGP_EXTERNAL_CALL 0x02 +#define SIGP_EMERGENCY 0x03 +#define SIGP_START 0x04 +#define SIGP_STOP 0x05 +#define SIGP_RESTART 0x06 +#define SIGP_STOP_STORE_STATUS 0x09 +#define SIGP_INITIAL_CPU_RESET 0x0b +#define SIGP_CPU_RESET 0x0c +#define SIGP_SET_PREFIX 0x0d +#define SIGP_STORE_STATUS_ADDR 0x0e +#define SIGP_SET_ARCH 0x12 +#define SIGP_SENSE_RUNNING 0x15 + +/* cpu status bits */ +#define SIGP_STAT_EQUIPMENT_CHECK 0x80000000UL +#define SIGP_STAT_NOT_RUNNING 0x00000400UL +#define SIGP_STAT_INCORRECT_STATE 0x00000200UL +#define SIGP_STAT_INVALID_PARAMETER 0x00000100UL +#define SIGP_STAT_EXT_CALL_PENDING 0x00000080UL +#define SIGP_STAT_STOPPED 0x00000040UL +#define SIGP_STAT_OPERATOR_INTERV 0x00000020UL +#define SIGP_STAT_CHECK_STOP 0x00000010UL +#define SIGP_STAT_INOPERATIVE 0x00000004UL +#define SIGP_STAT_INVALID_ORDER 0x00000002UL +#define SIGP_STAT_RECEIVER_CHECK 0x00000001UL + + +static int __sigp_sense(struct kvm_vcpu *vcpu, u16 cpu_addr, + u64 *reg) +{ + struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int; + int rc; + + if (cpu_addr >= KVM_MAX_VCPUS) + return 3; /* not operational */ + + spin_lock(&fi->lock); + if (fi->local_int[cpu_addr] == NULL) + rc = 3; /* not operational */ + else if (!(atomic_read(fi->local_int[cpu_addr]->cpuflags) + & CPUSTAT_STOPPED)) { + *reg &= 0xffffffff00000000UL; + rc = 1; /* status stored */ + } else { + *reg &= 0xffffffff00000000UL; + *reg |= SIGP_STAT_STOPPED; + rc = 1; /* status stored */ + } + spin_unlock(&fi->lock); + + VCPU_EVENT(vcpu, 4, "sensed status of cpu %x rc %x", cpu_addr, rc); + return rc; +} + +static int __sigp_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr) +{ + struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int; + struct kvm_s390_local_interrupt *li; + struct kvm_s390_interrupt_info *inti; + int rc; + + if (cpu_addr >= KVM_MAX_VCPUS) + return 3; /* not operational */ + + inti = kzalloc(sizeof(*inti), GFP_KERNEL); + if (!inti) + return -ENOMEM; + + inti->type = KVM_S390_INT_EMERGENCY; + inti->emerg.code = vcpu->vcpu_id; + + spin_lock(&fi->lock); + li = fi->local_int[cpu_addr]; + if (li == NULL) { + rc = 3; /* not operational */ + kfree(inti); + goto unlock; + } + spin_lock_bh(&li->lock); + list_add_tail(&inti->list, &li->list); + atomic_set(&li->active, 1); + atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); + if (waitqueue_active(&li->wq)) + wake_up_interruptible(&li->wq); + spin_unlock_bh(&li->lock); + rc = 0; /* order accepted */ + VCPU_EVENT(vcpu, 4, "sent sigp emerg to cpu %x", cpu_addr); +unlock: + spin_unlock(&fi->lock); + return rc; +} + +static int __sigp_external_call(struct kvm_vcpu *vcpu, u16 cpu_addr) +{ + struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int; + struct kvm_s390_local_interrupt *li; + struct kvm_s390_interrupt_info *inti; + int rc; + + if (cpu_addr >= KVM_MAX_VCPUS) + return 3; /* not operational */ + + inti = kzalloc(sizeof(*inti), GFP_KERNEL); + if (!inti) + return -ENOMEM; + + inti->type = KVM_S390_INT_EXTERNAL_CALL; + inti->extcall.code = vcpu->vcpu_id; + + spin_lock(&fi->lock); + li = fi->local_int[cpu_addr]; + if (li == NULL) { + rc = 3; /* not operational */ + kfree(inti); + goto unlock; + } + spin_lock_bh(&li->lock); + list_add_tail(&inti->list, &li->list); + atomic_set(&li->active, 1); + atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); + if (waitqueue_active(&li->wq)) + wake_up_interruptible(&li->wq); + spin_unlock_bh(&li->lock); + rc = 0; /* order accepted */ + VCPU_EVENT(vcpu, 4, "sent sigp ext call to cpu %x", cpu_addr); +unlock: + spin_unlock(&fi->lock); + return rc; +} + +static int __inject_sigp_stop(struct kvm_s390_local_interrupt *li, int action) +{ + struct kvm_s390_interrupt_info *inti; + + inti = kzalloc(sizeof(*inti), GFP_ATOMIC); + if (!inti) + return -ENOMEM; + inti->type = KVM_S390_SIGP_STOP; + + spin_lock_bh(&li->lock); + if ((atomic_read(li->cpuflags) & CPUSTAT_STOPPED)) + goto out; + list_add_tail(&inti->list, &li->list); + atomic_set(&li->active, 1); + atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags); + li->action_bits |= action; + if (waitqueue_active(&li->wq)) + wake_up_interruptible(&li->wq); +out: + spin_unlock_bh(&li->lock); + + return 0; /* order accepted */ +} + +static int __sigp_stop(struct kvm_vcpu *vcpu, u16 cpu_addr, int action) +{ + struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int; + struct kvm_s390_local_interrupt *li; + int rc; + + if (cpu_addr >= KVM_MAX_VCPUS) + return 3; /* not operational */ + + spin_lock(&fi->lock); + li = fi->local_int[cpu_addr]; + if (li == NULL) { + rc = 3; /* not operational */ + goto unlock; + } + + rc = __inject_sigp_stop(li, action); + +unlock: + spin_unlock(&fi->lock); + VCPU_EVENT(vcpu, 4, "sent sigp stop to cpu %x", cpu_addr); + return rc; +} + +int kvm_s390_inject_sigp_stop(struct kvm_vcpu *vcpu, int action) +{ + struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; + return __inject_sigp_stop(li, action); +} + +static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter) +{ + int rc; + + switch (parameter & 0xff) { + case 0: + rc = 3; /* not operational */ + break; + case 1: + case 2: + rc = 0; /* order accepted */ + break; + default: + rc = -EOPNOTSUPP; + } + return rc; +} + +static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address, + u64 *reg) +{ + struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int; + struct kvm_s390_local_interrupt *li = NULL; + struct kvm_s390_interrupt_info *inti; + int rc; + u8 tmp; + + /* make sure that the new value is valid memory */ + address = address & 0x7fffe000u; + if (copy_from_guest_absolute(vcpu, &tmp, address, 1) || + copy_from_guest_absolute(vcpu, &tmp, address + PAGE_SIZE, 1)) { + *reg |= SIGP_STAT_INVALID_PARAMETER; + return 1; /* invalid parameter */ + } + + inti = kzalloc(sizeof(*inti), GFP_KERNEL); + if (!inti) + return 2; /* busy */ + + spin_lock(&fi->lock); + if (cpu_addr < KVM_MAX_VCPUS) + li = fi->local_int[cpu_addr]; + + if (li == NULL) { + rc = 1; /* incorrect state */ + *reg &= SIGP_STAT_INCORRECT_STATE; + kfree(inti); + goto out_fi; + } + + spin_lock_bh(&li->lock); + /* cpu must be in stopped state */ + if (!(atomic_read(li->cpuflags) & CPUSTAT_STOPPED)) { + rc = 1; /* incorrect state */ + *reg &= SIGP_STAT_INCORRECT_STATE; + kfree(inti); + goto out_li; + } + + inti->type = KVM_S390_SIGP_SET_PREFIX; + inti->prefix.address = address; + + list_add_tail(&inti->list, &li->list); + atomic_set(&li->active, 1); + if (waitqueue_active(&li->wq)) + wake_up_interruptible(&li->wq); + rc = 0; /* order accepted */ + + VCPU_EVENT(vcpu, 4, "set prefix of cpu %02x to %x", cpu_addr, address); +out_li: + spin_unlock_bh(&li->lock); +out_fi: + spin_unlock(&fi->lock); + return rc; +} + +static int __sigp_sense_running(struct kvm_vcpu *vcpu, u16 cpu_addr, + u64 *reg) +{ + int rc; + struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int; + + if (cpu_addr >= KVM_MAX_VCPUS) + return 3; /* not operational */ + + spin_lock(&fi->lock); + if (fi->local_int[cpu_addr] == NULL) + rc = 3; /* not operational */ + else { + if (atomic_read(fi->local_int[cpu_addr]->cpuflags) + & CPUSTAT_RUNNING) { + /* running */ + rc = 1; + } else { + /* not running */ + *reg &= 0xffffffff00000000UL; + *reg |= SIGP_STAT_NOT_RUNNING; + rc = 0; + } + } + spin_unlock(&fi->lock); + + VCPU_EVENT(vcpu, 4, "sensed running status of cpu %x rc %x", cpu_addr, + rc); + + return rc; +} + +static int __sigp_restart(struct kvm_vcpu *vcpu, u16 cpu_addr) +{ + int rc = 0; + struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int; + struct kvm_s390_local_interrupt *li; + + if (cpu_addr >= KVM_MAX_VCPUS) + return 3; /* not operational */ + + spin_lock(&fi->lock); + li = fi->local_int[cpu_addr]; + if (li == NULL) { + rc = 3; /* not operational */ + goto out; + } + + spin_lock_bh(&li->lock); + if (li->action_bits & ACTION_STOP_ON_STOP) + rc = 2; /* busy */ + else + VCPU_EVENT(vcpu, 4, "sigp restart %x to handle userspace", + cpu_addr); + spin_unlock_bh(&li->lock); +out: + spin_unlock(&fi->lock); + return rc; +} + +int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu) +{ + int r1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4; + int r3 = vcpu->arch.sie_block->ipa & 0x000f; + int base2 = vcpu->arch.sie_block->ipb >> 28; + int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16); + u32 parameter; + u16 cpu_addr = vcpu->run->s.regs.gprs[r3]; + u8 order_code; + int rc; + + /* sigp in userspace can exit */ + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, + PGM_PRIVILEGED_OPERATION); + + order_code = disp2; + if (base2) + order_code += vcpu->run->s.regs.gprs[base2]; + + if (r1 % 2) + parameter = vcpu->run->s.regs.gprs[r1]; + else + parameter = vcpu->run->s.regs.gprs[r1 + 1]; + + switch (order_code) { + case SIGP_SENSE: + vcpu->stat.instruction_sigp_sense++; + rc = __sigp_sense(vcpu, cpu_addr, + &vcpu->run->s.regs.gprs[r1]); + break; + case SIGP_EXTERNAL_CALL: + vcpu->stat.instruction_sigp_external_call++; + rc = __sigp_external_call(vcpu, cpu_addr); + break; + case SIGP_EMERGENCY: + vcpu->stat.instruction_sigp_emergency++; + rc = __sigp_emergency(vcpu, cpu_addr); + break; + case SIGP_STOP: + vcpu->stat.instruction_sigp_stop++; + rc = __sigp_stop(vcpu, cpu_addr, ACTION_STOP_ON_STOP); + break; + case SIGP_STOP_STORE_STATUS: + vcpu->stat.instruction_sigp_stop++; + rc = __sigp_stop(vcpu, cpu_addr, ACTION_STORE_ON_STOP | + ACTION_STOP_ON_STOP); + break; + case SIGP_SET_ARCH: + vcpu->stat.instruction_sigp_arch++; + rc = __sigp_set_arch(vcpu, parameter); + break; + case SIGP_SET_PREFIX: + vcpu->stat.instruction_sigp_prefix++; + rc = __sigp_set_prefix(vcpu, cpu_addr, parameter, + &vcpu->run->s.regs.gprs[r1]); + break; + case SIGP_SENSE_RUNNING: + vcpu->stat.instruction_sigp_sense_running++; + rc = __sigp_sense_running(vcpu, cpu_addr, + &vcpu->run->s.regs.gprs[r1]); + break; + case SIGP_RESTART: + vcpu->stat.instruction_sigp_restart++; + rc = __sigp_restart(vcpu, cpu_addr); + if (rc == 2) /* busy */ + break; + /* user space must know about restart */ + default: + return -EOPNOTSUPP; + } + + if (rc < 0) + return rc; + + vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); + vcpu->arch.sie_block->gpsw.mask |= (rc & 3ul) << 44; + return 0; +} diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile new file mode 100644 index 00000000..761ab8b5 --- /dev/null +++ b/arch/s390/lib/Makefile @@ -0,0 +1,9 @@ +# +# Makefile for s390-specific library files.. +# + +lib-y += delay.o string.o uaccess_std.o uaccess_pt.o +obj-y += usercopy.o +obj-$(CONFIG_32BIT) += div64.o qrnnd.o ucmpdi2.o +lib-$(CONFIG_64BIT) += uaccess_mvcos.o +lib-$(CONFIG_SMP) += spinlock.o diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c new file mode 100644 index 00000000..9f1f71e8 --- /dev/null +++ b/arch/s390/lib/delay.c @@ -0,0 +1,131 @@ +/* + * Precise Delay Loops for S390 + * + * Copyright IBM Corp. 1999,2008 + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>, + * Heiko Carstens <heiko.carstens@de.ibm.com>, + */ + +#include <linux/sched.h> +#include <linux/delay.h> +#include <linux/timex.h> +#include <linux/module.h> +#include <linux/irqflags.h> +#include <linux/interrupt.h> +#include <asm/div64.h> +#include <asm/timer.h> + +void __delay(unsigned long loops) +{ + /* + * To end the bloody studid and useless discussion about the + * BogoMips number I took the liberty to define the __delay + * function in a way that that resulting BogoMips number will + * yield the megahertz number of the cpu. The important function + * is udelay and that is done using the tod clock. -- martin. + */ + asm volatile("0: brct %0,0b" : : "d" ((loops/2) + 1)); +} + +static void __udelay_disabled(unsigned long long usecs) +{ + unsigned long cr0, cr6, new; + u64 clock_saved, end; + + end = get_clock() + (usecs << 12); + clock_saved = local_tick_disable(); + __ctl_store(cr0, 0, 0); + __ctl_store(cr6, 6, 6); + new = (cr0 & 0xffff00e0) | 0x00000800; + __ctl_load(new , 0, 0); + new = 0; + __ctl_load(new, 6, 6); + lockdep_off(); + do { + set_clock_comparator(end); + vtime_stop_cpu(); + local_irq_disable(); + } while (get_clock() < end); + lockdep_on(); + __ctl_load(cr0, 0, 0); + __ctl_load(cr6, 6, 6); + local_tick_enable(clock_saved); +} + +static void __udelay_enabled(unsigned long long usecs) +{ + u64 clock_saved, end; + + end = get_clock() + (usecs << 12); + do { + clock_saved = 0; + if (end < S390_lowcore.clock_comparator) { + clock_saved = local_tick_disable(); + set_clock_comparator(end); + } + vtime_stop_cpu(); + local_irq_disable(); + if (clock_saved) + local_tick_enable(clock_saved); + } while (get_clock() < end); +} + +/* + * Waits for 'usecs' microseconds using the TOD clock comparator. + */ +void __udelay(unsigned long long usecs) +{ + unsigned long flags; + + preempt_disable(); + local_irq_save(flags); + if (in_irq()) { + __udelay_disabled(usecs); + goto out; + } + if (in_softirq()) { + if (raw_irqs_disabled_flags(flags)) + __udelay_disabled(usecs); + else + __udelay_enabled(usecs); + goto out; + } + if (raw_irqs_disabled_flags(flags)) { + local_bh_disable(); + __udelay_disabled(usecs); + _local_bh_enable(); + goto out; + } + __udelay_enabled(usecs); +out: + local_irq_restore(flags); + preempt_enable(); +} +EXPORT_SYMBOL(__udelay); + +/* + * Simple udelay variant. To be used on startup and reboot + * when the interrupt handler isn't working. + */ +void udelay_simple(unsigned long long usecs) +{ + u64 end; + + end = get_clock() + (usecs << 12); + while (get_clock() < end) + cpu_relax(); +} + +void __ndelay(unsigned long long nsecs) +{ + u64 end; + + nsecs <<= 9; + do_div(nsecs, 125); + end = get_clock() + nsecs; + if (nsecs & ~0xfffUL) + __udelay(nsecs >> 12); + while (get_clock() < end) + barrier(); +} +EXPORT_SYMBOL(__ndelay); diff --git a/arch/s390/lib/div64.c b/arch/s390/lib/div64.c new file mode 100644 index 00000000..d9e62c0b --- /dev/null +++ b/arch/s390/lib/div64.c @@ -0,0 +1,149 @@ +/* + * arch/s390/lib/div64.c + * + * __div64_32 implementation for 31 bit. + * + * Copyright (C) IBM Corp. 2006 + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), + */ + +#include <linux/types.h> +#include <linux/module.h> + +#ifdef CONFIG_MARCH_G5 + +/* + * Function to divide an unsigned 64 bit integer by an unsigned + * 31 bit integer using signed 64/32 bit division. + */ +static uint32_t __div64_31(uint64_t *n, uint32_t base) +{ + register uint32_t reg2 asm("2"); + register uint32_t reg3 asm("3"); + uint32_t *words = (uint32_t *) n; + uint32_t tmp; + + /* Special case base==1, remainder = 0, quotient = n */ + if (base == 1) + return 0; + /* + * Special case base==0 will cause a fixed point divide exception + * on the dr instruction and may not happen anyway. For the + * following calculation we can assume base > 1. The first + * signed 64 / 32 bit division with an upper half of 0 will + * give the correct upper half of the 64 bit quotient. + */ + reg2 = 0UL; + reg3 = words[0]; + asm volatile( + " dr %0,%2\n" + : "+d" (reg2), "+d" (reg3) : "d" (base) : "cc" ); + words[0] = reg3; + reg3 = words[1]; + /* + * To get the lower half of the 64 bit quotient and the 32 bit + * remainder we have to use a little trick. Since we only have + * a signed division the quotient can get too big. To avoid this + * the 64 bit dividend is halved, then the signed division will + * work. Afterwards the quotient and the remainder are doubled. + * If the last bit of the dividend has been one the remainder + * is increased by one then checked against the base. If the + * remainder has overflown subtract base and increase the + * quotient. Simple, no ? + */ + asm volatile( + " nr %2,%1\n" + " srdl %0,1\n" + " dr %0,%3\n" + " alr %0,%0\n" + " alr %1,%1\n" + " alr %0,%2\n" + " clr %0,%3\n" + " jl 0f\n" + " slr %0,%3\n" + " ahi %1,1\n" + "0:\n" + : "+d" (reg2), "+d" (reg3), "=d" (tmp) + : "d" (base), "2" (1UL) : "cc" ); + words[1] = reg3; + return reg2; +} + +/* + * Function to divide an unsigned 64 bit integer by an unsigned + * 32 bit integer using the unsigned 64/31 bit division. + */ +uint32_t __div64_32(uint64_t *n, uint32_t base) +{ + uint32_t r; + + /* + * If the most significant bit of base is set, divide n by + * (base/2). That allows to use 64/31 bit division and gives a + * good approximation of the result: n = (base/2)*q + r. The + * result needs to be corrected with two simple transformations. + * If base is already < 2^31-1 __div64_31 can be used directly. + */ + r = __div64_31(n, ((signed) base < 0) ? (base/2) : base); + if ((signed) base < 0) { + uint64_t q = *n; + /* + * First transformation: + * n = (base/2)*q + r + * = ((base/2)*2)*(q/2) + ((q&1) ? (base/2) : 0) + r + * Since r < (base/2), r + (base/2) < base. + * With q1 = (q/2) and r1 = r + ((q&1) ? (base/2) : 0) + * n = ((base/2)*2)*q1 + r1 with r1 < base. + */ + if (q & 1) + r += base/2; + q >>= 1; + /* + * Second transformation. ((base/2)*2) could have lost the + * last bit. + * n = ((base/2)*2)*q1 + r1 + * = base*q1 - ((base&1) ? q1 : 0) + r1 + */ + if (base & 1) { + int64_t rx = r - q; + /* + * base is >= 2^31. The worst case for the while + * loop is n=2^64-1 base=2^31+1. That gives a + * maximum for q=(2^64-1)/2^31 = 0x1ffffffff. Since + * base >= 2^31 the loop is finished after a maximum + * of three iterations. + */ + while (rx < 0) { + rx += base; + q--; + } + r = rx; + } + *n = q; + } + return r; +} + +#else /* MARCH_G5 */ + +uint32_t __div64_32(uint64_t *n, uint32_t base) +{ + register uint32_t reg2 asm("2"); + register uint32_t reg3 asm("3"); + uint32_t *words = (uint32_t *) n; + + reg2 = 0UL; + reg3 = words[0]; + asm volatile( + " dlr %0,%2\n" + : "+d" (reg2), "+d" (reg3) : "d" (base) : "cc" ); + words[0] = reg3; + reg3 = words[1]; + asm volatile( + " dlr %0,%2\n" + : "+d" (reg2), "+d" (reg3) : "d" (base) : "cc" ); + words[1] = reg3; + return reg2; +} + +#endif /* MARCH_G5 */ diff --git a/arch/s390/lib/qrnnd.S b/arch/s390/lib/qrnnd.S new file mode 100644 index 00000000..d3213291 --- /dev/null +++ b/arch/s390/lib/qrnnd.S @@ -0,0 +1,78 @@ +# S/390 __udiv_qrnnd + +#include <linux/linkage.h> + +# r2 : &__r +# r3 : upper half of 64 bit word n +# r4 : lower half of 64 bit word n +# r5 : divisor d +# the reminder r of the division is to be stored to &__r and +# the quotient q is to be returned + + .text +ENTRY(__udiv_qrnnd) + st %r2,24(%r15) # store pointer to reminder for later + lr %r0,%r3 # reload n + lr %r1,%r4 + ltr %r2,%r5 # reload and test divisor + jp 5f + # divisor >= 0x80000000 + srdl %r0,2 # n/4 + srl %r2,1 # d/2 + slr %r1,%r2 # special case if last bit of d is set + brc 3,0f # (n/4) div (n/2) can overflow by 1 + ahi %r0,-1 # trick: subtract n/2, then divide +0: dr %r0,%r2 # signed division + ahi %r1,1 # trick part 2: add 1 to the quotient + # now (n >> 2) = (d >> 1) * %r1 + %r0 + lhi %r3,1 + nr %r3,%r1 # test last bit of q + jz 1f + alr %r0,%r2 # add (d>>1) to r +1: srl %r1,1 # q >>= 1 + # now (n >> 2) = (d&-2) * %r1 + %r0 + lhi %r3,1 + nr %r3,%r5 # test last bit of d + jz 2f + slr %r0,%r1 # r -= q + brc 3,2f # borrow ? + alr %r0,%r5 # r += d + ahi %r1,-1 +2: # now (n >> 2) = d * %r1 + %r0 + alr %r1,%r1 # q <<= 1 + alr %r0,%r0 # r <<= 1 + brc 12,3f # overflow on r ? + slr %r0,%r5 # r -= d + ahi %r1,1 # q += 1 +3: lhi %r3,2 + nr %r3,%r4 # test next to last bit of n + jz 4f + ahi %r0,1 # r += 1 +4: clr %r0,%r5 # r >= d ? + jl 6f + slr %r0,%r5 # r -= d + ahi %r1,1 # q += 1 + # now (n >> 1) = d * %r1 + %r0 + j 6f +5: # divisor < 0x80000000 + srdl %r0,1 + dr %r0,%r2 # signed division + # now (n >> 1) = d * %r1 + %r0 +6: alr %r1,%r1 # q <<= 1 + alr %r0,%r0 # r <<= 1 + brc 12,7f # overflow on r ? + slr %r0,%r5 # r -= d + ahi %r1,1 # q += 1 +7: lhi %r3,1 + nr %r3,%r4 # isolate last bit of n + alr %r0,%r3 # r += (n & 1) + clr %r0,%r5 # r >= d ? + jl 8f + slr %r0,%r5 # r -= d + ahi %r1,1 # q += 1 +8: # now n = d * %r1 + %r0 + l %r2,24(%r15) + st %r0,0(%r2) + lr %r2,%r1 + br %r14 + .end __udiv_qrnnd diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c new file mode 100644 index 00000000..093eb694 --- /dev/null +++ b/arch/s390/lib/spinlock.c @@ -0,0 +1,219 @@ +/* + * arch/s390/lib/spinlock.c + * Out of line spinlock code. + * + * Copyright (C) IBM Corp. 2004, 2006 + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) + */ + +#include <linux/types.h> +#include <linux/module.h> +#include <linux/spinlock.h> +#include <linux/init.h> +#include <linux/smp.h> +#include <asm/io.h> + +int spin_retry = 1000; + +/** + * spin_retry= parameter + */ +static int __init spin_retry_setup(char *str) +{ + spin_retry = simple_strtoul(str, &str, 0); + return 1; +} +__setup("spin_retry=", spin_retry_setup); + +void arch_spin_lock_wait(arch_spinlock_t *lp) +{ + int count = spin_retry; + unsigned int cpu = ~smp_processor_id(); + unsigned int owner; + + while (1) { + owner = lp->owner_cpu; + if (!owner || smp_vcpu_scheduled(~owner)) { + for (count = spin_retry; count > 0; count--) { + if (arch_spin_is_locked(lp)) + continue; + if (_raw_compare_and_swap(&lp->owner_cpu, 0, + cpu) == 0) + return; + } + if (MACHINE_IS_LPAR) + continue; + } + owner = lp->owner_cpu; + if (owner) + smp_yield_cpu(~owner); + if (_raw_compare_and_swap(&lp->owner_cpu, 0, cpu) == 0) + return; + } +} +EXPORT_SYMBOL(arch_spin_lock_wait); + +void arch_spin_lock_wait_flags(arch_spinlock_t *lp, unsigned long flags) +{ + int count = spin_retry; + unsigned int cpu = ~smp_processor_id(); + unsigned int owner; + + local_irq_restore(flags); + while (1) { + owner = lp->owner_cpu; + if (!owner || smp_vcpu_scheduled(~owner)) { + for (count = spin_retry; count > 0; count--) { + if (arch_spin_is_locked(lp)) + continue; + local_irq_disable(); + if (_raw_compare_and_swap(&lp->owner_cpu, 0, + cpu) == 0) + return; + local_irq_restore(flags); + } + if (MACHINE_IS_LPAR) + continue; + } + owner = lp->owner_cpu; + if (owner) + smp_yield_cpu(~owner); + local_irq_disable(); + if (_raw_compare_and_swap(&lp->owner_cpu, 0, cpu) == 0) + return; + local_irq_restore(flags); + } +} +EXPORT_SYMBOL(arch_spin_lock_wait_flags); + +int arch_spin_trylock_retry(arch_spinlock_t *lp) +{ + unsigned int cpu = ~smp_processor_id(); + int count; + + for (count = spin_retry; count > 0; count--) { + if (arch_spin_is_locked(lp)) + continue; + if (_raw_compare_and_swap(&lp->owner_cpu, 0, cpu) == 0) + return 1; + } + return 0; +} +EXPORT_SYMBOL(arch_spin_trylock_retry); + +void arch_spin_relax(arch_spinlock_t *lock) +{ + unsigned int cpu = lock->owner_cpu; + if (cpu != 0) { + if (MACHINE_IS_VM || MACHINE_IS_KVM || + !smp_vcpu_scheduled(~cpu)) + smp_yield_cpu(~cpu); + } +} +EXPORT_SYMBOL(arch_spin_relax); + +void _raw_read_lock_wait(arch_rwlock_t *rw) +{ + unsigned int old; + int count = spin_retry; + + while (1) { + if (count-- <= 0) { + smp_yield(); + count = spin_retry; + } + if (!arch_read_can_lock(rw)) + continue; + old = rw->lock & 0x7fffffffU; + if (_raw_compare_and_swap(&rw->lock, old, old + 1) == old) + return; + } +} +EXPORT_SYMBOL(_raw_read_lock_wait); + +void _raw_read_lock_wait_flags(arch_rwlock_t *rw, unsigned long flags) +{ + unsigned int old; + int count = spin_retry; + + local_irq_restore(flags); + while (1) { + if (count-- <= 0) { + smp_yield(); + count = spin_retry; + } + if (!arch_read_can_lock(rw)) + continue; + old = rw->lock & 0x7fffffffU; + local_irq_disable(); + if (_raw_compare_and_swap(&rw->lock, old, old + 1) == old) + return; + } +} +EXPORT_SYMBOL(_raw_read_lock_wait_flags); + +int _raw_read_trylock_retry(arch_rwlock_t *rw) +{ + unsigned int old; + int count = spin_retry; + + while (count-- > 0) { + if (!arch_read_can_lock(rw)) + continue; + old = rw->lock & 0x7fffffffU; + if (_raw_compare_and_swap(&rw->lock, old, old + 1) == old) + return 1; + } + return 0; +} +EXPORT_SYMBOL(_raw_read_trylock_retry); + +void _raw_write_lock_wait(arch_rwlock_t *rw) +{ + int count = spin_retry; + + while (1) { + if (count-- <= 0) { + smp_yield(); + count = spin_retry; + } + if (!arch_write_can_lock(rw)) + continue; + if (_raw_compare_and_swap(&rw->lock, 0, 0x80000000) == 0) + return; + } +} +EXPORT_SYMBOL(_raw_write_lock_wait); + +void _raw_write_lock_wait_flags(arch_rwlock_t *rw, unsigned long flags) +{ + int count = spin_retry; + + local_irq_restore(flags); + while (1) { + if (count-- <= 0) { + smp_yield(); + count = spin_retry; + } + if (!arch_write_can_lock(rw)) + continue; + local_irq_disable(); + if (_raw_compare_and_swap(&rw->lock, 0, 0x80000000) == 0) + return; + } +} +EXPORT_SYMBOL(_raw_write_lock_wait_flags); + +int _raw_write_trylock_retry(arch_rwlock_t *rw) +{ + int count = spin_retry; + + while (count-- > 0) { + if (!arch_write_can_lock(rw)) + continue; + if (_raw_compare_and_swap(&rw->lock, 0, 0x80000000) == 0) + return 1; + } + return 0; +} +EXPORT_SYMBOL(_raw_write_trylock_retry); diff --git a/arch/s390/lib/string.c b/arch/s390/lib/string.c new file mode 100644 index 00000000..4143b7c1 --- /dev/null +++ b/arch/s390/lib/string.c @@ -0,0 +1,389 @@ +/* + * arch/s390/lib/string.c + * Optimized string functions + * + * S390 version + * Copyright (C) 2004 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) + */ + +#define IN_ARCH_STRING_C 1 + +#include <linux/types.h> +#include <linux/module.h> + +/* + * Helper functions to find the end of a string + */ +static inline char *__strend(const char *s) +{ + register unsigned long r0 asm("0") = 0; + + asm volatile ("0: srst %0,%1\n" + " jo 0b" + : "+d" (r0), "+a" (s) : : "cc" ); + return (char *) r0; +} + +static inline char *__strnend(const char *s, size_t n) +{ + register unsigned long r0 asm("0") = 0; + const char *p = s + n; + + asm volatile ("0: srst %0,%1\n" + " jo 0b" + : "+d" (p), "+a" (s) : "d" (r0) : "cc" ); + return (char *) p; +} + +/** + * strlen - Find the length of a string + * @s: The string to be sized + * + * returns the length of @s + */ +size_t strlen(const char *s) +{ +#if __GNUC__ < 4 + return __strend(s) - s; +#else + return __builtin_strlen(s); +#endif +} +EXPORT_SYMBOL(strlen); + +/** + * strnlen - Find the length of a length-limited string + * @s: The string to be sized + * @n: The maximum number of bytes to search + * + * returns the minimum of the length of @s and @n + */ +size_t strnlen(const char * s, size_t n) +{ + return __strnend(s, n) - s; +} +EXPORT_SYMBOL(strnlen); + +/** + * strcpy - Copy a %NUL terminated string + * @dest: Where to copy the string to + * @src: Where to copy the string from + * + * returns a pointer to @dest + */ +char *strcpy(char *dest, const char *src) +{ +#if __GNUC__ < 4 + register int r0 asm("0") = 0; + char *ret = dest; + + asm volatile ("0: mvst %0,%1\n" + " jo 0b" + : "+&a" (dest), "+&a" (src) : "d" (r0) + : "cc", "memory" ); + return ret; +#else + return __builtin_strcpy(dest, src); +#endif +} +EXPORT_SYMBOL(strcpy); + +/** + * strlcpy - Copy a %NUL terminated string into a sized buffer + * @dest: Where to copy the string to + * @src: Where to copy the string from + * @size: size of destination buffer + * + * Compatible with *BSD: the result is always a valid + * NUL-terminated string that fits in the buffer (unless, + * of course, the buffer size is zero). It does not pad + * out the result like strncpy() does. + */ +size_t strlcpy(char *dest, const char *src, size_t size) +{ + size_t ret = __strend(src) - src; + + if (size) { + size_t len = (ret >= size) ? size-1 : ret; + dest[len] = '\0'; + __builtin_memcpy(dest, src, len); + } + return ret; +} +EXPORT_SYMBOL(strlcpy); + +/** + * strncpy - Copy a length-limited, %NUL-terminated string + * @dest: Where to copy the string to + * @src: Where to copy the string from + * @n: The maximum number of bytes to copy + * + * The result is not %NUL-terminated if the source exceeds + * @n bytes. + */ +char *strncpy(char *dest, const char *src, size_t n) +{ + size_t len = __strnend(src, n) - src; + __builtin_memset(dest + len, 0, n - len); + __builtin_memcpy(dest, src, len); + return dest; +} +EXPORT_SYMBOL(strncpy); + +/** + * strcat - Append one %NUL-terminated string to another + * @dest: The string to be appended to + * @src: The string to append to it + * + * returns a pointer to @dest + */ +char *strcat(char *dest, const char *src) +{ + register int r0 asm("0") = 0; + unsigned long dummy; + char *ret = dest; + + asm volatile ("0: srst %0,%1\n" + " jo 0b\n" + "1: mvst %0,%2\n" + " jo 1b" + : "=&a" (dummy), "+a" (dest), "+a" (src) + : "d" (r0), "0" (0UL) : "cc", "memory" ); + return ret; +} +EXPORT_SYMBOL(strcat); + +/** + * strlcat - Append a length-limited, %NUL-terminated string to another + * @dest: The string to be appended to + * @src: The string to append to it + * @n: The size of the destination buffer. + */ +size_t strlcat(char *dest, const char *src, size_t n) +{ + size_t dsize = __strend(dest) - dest; + size_t len = __strend(src) - src; + size_t res = dsize + len; + + if (dsize < n) { + dest += dsize; + n -= dsize; + if (len >= n) + len = n - 1; + dest[len] = '\0'; + __builtin_memcpy(dest, src, len); + } + return res; +} +EXPORT_SYMBOL(strlcat); + +/** + * strncat - Append a length-limited, %NUL-terminated string to another + * @dest: The string to be appended to + * @src: The string to append to it + * @n: The maximum numbers of bytes to copy + * + * returns a pointer to @dest + * + * Note that in contrast to strncpy, strncat ensures the result is + * terminated. + */ +char *strncat(char *dest, const char *src, size_t n) +{ + size_t len = __strnend(src, n) - src; + char *p = __strend(dest); + + p[len] = '\0'; + __builtin_memcpy(p, src, len); + return dest; +} +EXPORT_SYMBOL(strncat); + +/** + * strcmp - Compare two strings + * @cs: One string + * @ct: Another string + * + * returns 0 if @cs and @ct are equal, + * < 0 if @cs is less than @ct + * > 0 if @cs is greater than @ct + */ +int strcmp(const char *cs, const char *ct) +{ + register int r0 asm("0") = 0; + int ret = 0; + + asm volatile ("0: clst %2,%3\n" + " jo 0b\n" + " je 1f\n" + " ic %0,0(%2)\n" + " ic %1,0(%3)\n" + " sr %0,%1\n" + "1:" + : "+d" (ret), "+d" (r0), "+a" (cs), "+a" (ct) + : : "cc" ); + return ret; +} +EXPORT_SYMBOL(strcmp); + +/** + * strrchr - Find the last occurrence of a character in a string + * @s: The string to be searched + * @c: The character to search for + */ +char * strrchr(const char * s, int c) +{ + size_t len = __strend(s) - s; + + if (len) + do { + if (s[len] == (char) c) + return (char *) s + len; + } while (--len > 0); + return NULL; +} +EXPORT_SYMBOL(strrchr); + +/** + * strstr - Find the first substring in a %NUL terminated string + * @s1: The string to be searched + * @s2: The string to search for + */ +char * strstr(const char * s1,const char * s2) +{ + int l1, l2; + + l2 = __strend(s2) - s2; + if (!l2) + return (char *) s1; + l1 = __strend(s1) - s1; + while (l1-- >= l2) { + register unsigned long r2 asm("2") = (unsigned long) s1; + register unsigned long r3 asm("3") = (unsigned long) l2; + register unsigned long r4 asm("4") = (unsigned long) s2; + register unsigned long r5 asm("5") = (unsigned long) l2; + int cc; + + asm volatile ("0: clcle %1,%3,0\n" + " jo 0b\n" + " ipm %0\n" + " srl %0,28" + : "=&d" (cc), "+a" (r2), "+a" (r3), + "+a" (r4), "+a" (r5) : : "cc" ); + if (!cc) + return (char *) s1; + s1++; + } + return NULL; +} +EXPORT_SYMBOL(strstr); + +/** + * memchr - Find a character in an area of memory. + * @s: The memory area + * @c: The byte to search for + * @n: The size of the area. + * + * returns the address of the first occurrence of @c, or %NULL + * if @c is not found + */ +void *memchr(const void *s, int c, size_t n) +{ + register int r0 asm("0") = (char) c; + const void *ret = s + n; + + asm volatile ("0: srst %0,%1\n" + " jo 0b\n" + " jl 1f\n" + " la %0,0\n" + "1:" + : "+a" (ret), "+&a" (s) : "d" (r0) : "cc" ); + return (void *) ret; +} +EXPORT_SYMBOL(memchr); + +/** + * memcmp - Compare two areas of memory + * @cs: One area of memory + * @ct: Another area of memory + * @count: The size of the area. + */ +int memcmp(const void *cs, const void *ct, size_t n) +{ + register unsigned long r2 asm("2") = (unsigned long) cs; + register unsigned long r3 asm("3") = (unsigned long) n; + register unsigned long r4 asm("4") = (unsigned long) ct; + register unsigned long r5 asm("5") = (unsigned long) n; + int ret; + + asm volatile ("0: clcle %1,%3,0\n" + " jo 0b\n" + " ipm %0\n" + " srl %0,28" + : "=&d" (ret), "+a" (r2), "+a" (r3), "+a" (r4), "+a" (r5) + : : "cc" ); + if (ret) + ret = *(char *) r2 - *(char *) r4; + return ret; +} +EXPORT_SYMBOL(memcmp); + +/** + * memscan - Find a character in an area of memory. + * @s: The memory area + * @c: The byte to search for + * @n: The size of the area. + * + * returns the address of the first occurrence of @c, or 1 byte past + * the area if @c is not found + */ +void *memscan(void *s, int c, size_t n) +{ + register int r0 asm("0") = (char) c; + const void *ret = s + n; + + asm volatile ("0: srst %0,%1\n" + " jo 0b\n" + : "+a" (ret), "+&a" (s) : "d" (r0) : "cc" ); + return (void *) ret; +} +EXPORT_SYMBOL(memscan); + +/** + * memcpy - Copy one area of memory to another + * @dest: Where to copy to + * @src: Where to copy from + * @n: The size of the area. + * + * returns a pointer to @dest + */ +void *memcpy(void *dest, const void *src, size_t n) +{ + return __builtin_memcpy(dest, src, n); +} +EXPORT_SYMBOL(memcpy); + +/** + * memset - Fill a region of memory with the given value + * @s: Pointer to the start of the area. + * @c: The byte to fill the area with + * @n: The size of the area. + * + * returns a pointer to @s + */ +void *memset(void *s, int c, size_t n) +{ + char *xs; + + if (c == 0) + return __builtin_memset(s, 0, n); + + xs = (char *) s; + if (n > 0) + do { + *xs++ = c; + } while (--n > 0); + return s; +} +EXPORT_SYMBOL(memset); diff --git a/arch/s390/lib/uaccess.h b/arch/s390/lib/uaccess.h new file mode 100644 index 00000000..1d2536cb --- /dev/null +++ b/arch/s390/lib/uaccess.h @@ -0,0 +1,23 @@ +/* + * arch/s390/uaccess.h + * + * Copyright IBM Corp. 2007 + * + */ + +#ifndef __ARCH_S390_LIB_UACCESS_H +#define __ARCH_S390_LIB_UACCESS_H + +extern size_t copy_from_user_std(size_t, const void __user *, void *); +extern size_t copy_to_user_std(size_t, void __user *, const void *); +extern size_t strnlen_user_std(size_t, const char __user *); +extern size_t strncpy_from_user_std(size_t, const char __user *, char *); +extern int futex_atomic_cmpxchg_std(u32 *, u32 __user *, u32, u32); +extern int futex_atomic_op_std(int, u32 __user *, int, int *); + +extern size_t copy_from_user_pt(size_t, const void __user *, void *); +extern size_t copy_to_user_pt(size_t, void __user *, const void *); +extern int futex_atomic_op_pt(int, u32 __user *, int, int *); +extern int futex_atomic_cmpxchg_pt(u32 *, u32 __user *, u32, u32); + +#endif /* __ARCH_S390_LIB_UACCESS_H */ diff --git a/arch/s390/lib/uaccess_mvcos.c b/arch/s390/lib/uaccess_mvcos.c new file mode 100644 index 00000000..60455f10 --- /dev/null +++ b/arch/s390/lib/uaccess_mvcos.c @@ -0,0 +1,227 @@ +/* + * arch/s390/lib/uaccess_mvcos.c + * + * Optimized user space space access functions based on mvcos. + * + * Copyright (C) IBM Corp. 2006 + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), + * Gerald Schaefer (gerald.schaefer@de.ibm.com) + */ + +#include <linux/errno.h> +#include <linux/mm.h> +#include <asm/uaccess.h> +#include <asm/futex.h> +#include "uaccess.h" + +#ifndef __s390x__ +#define AHI "ahi" +#define ALR "alr" +#define CLR "clr" +#define LHI "lhi" +#define SLR "slr" +#else +#define AHI "aghi" +#define ALR "algr" +#define CLR "clgr" +#define LHI "lghi" +#define SLR "slgr" +#endif + +static size_t copy_from_user_mvcos(size_t size, const void __user *ptr, void *x) +{ + register unsigned long reg0 asm("0") = 0x81UL; + unsigned long tmp1, tmp2; + + tmp1 = -4096UL; + asm volatile( + "0: .insn ss,0xc80000000000,0(%0,%2),0(%1),0\n" + "9: jz 7f\n" + "1:"ALR" %0,%3\n" + " "SLR" %1,%3\n" + " "SLR" %2,%3\n" + " j 0b\n" + "2: la %4,4095(%1)\n"/* %4 = ptr + 4095 */ + " nr %4,%3\n" /* %4 = (ptr + 4095) & -4096 */ + " "SLR" %4,%1\n" + " "CLR" %0,%4\n" /* copy crosses next page boundary? */ + " jnh 4f\n" + "3: .insn ss,0xc80000000000,0(%4,%2),0(%1),0\n" + "10:"SLR" %0,%4\n" + " "ALR" %2,%4\n" + "4:"LHI" %4,-1\n" + " "ALR" %4,%0\n" /* copy remaining size, subtract 1 */ + " bras %3,6f\n" /* memset loop */ + " xc 0(1,%2),0(%2)\n" + "5: xc 0(256,%2),0(%2)\n" + " la %2,256(%2)\n" + "6:"AHI" %4,-256\n" + " jnm 5b\n" + " ex %4,0(%3)\n" + " j 8f\n" + "7:"SLR" %0,%0\n" + "8: \n" + EX_TABLE(0b,2b) EX_TABLE(3b,4b) EX_TABLE(9b,2b) EX_TABLE(10b,4b) + : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2) + : "d" (reg0) : "cc", "memory"); + return size; +} + +static size_t copy_from_user_mvcos_check(size_t size, const void __user *ptr, void *x) +{ + if (size <= 256) + return copy_from_user_std(size, ptr, x); + return copy_from_user_mvcos(size, ptr, x); +} + +static size_t copy_to_user_mvcos(size_t size, void __user *ptr, const void *x) +{ + register unsigned long reg0 asm("0") = 0x810000UL; + unsigned long tmp1, tmp2; + + tmp1 = -4096UL; + asm volatile( + "0: .insn ss,0xc80000000000,0(%0,%1),0(%2),0\n" + "6: jz 4f\n" + "1:"ALR" %0,%3\n" + " "SLR" %1,%3\n" + " "SLR" %2,%3\n" + " j 0b\n" + "2: la %4,4095(%1)\n"/* %4 = ptr + 4095 */ + " nr %4,%3\n" /* %4 = (ptr + 4095) & -4096 */ + " "SLR" %4,%1\n" + " "CLR" %0,%4\n" /* copy crosses next page boundary? */ + " jnh 5f\n" + "3: .insn ss,0xc80000000000,0(%4,%1),0(%2),0\n" + "7:"SLR" %0,%4\n" + " j 5f\n" + "4:"SLR" %0,%0\n" + "5: \n" + EX_TABLE(0b,2b) EX_TABLE(3b,5b) EX_TABLE(6b,2b) EX_TABLE(7b,5b) + : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2) + : "d" (reg0) : "cc", "memory"); + return size; +} + +static size_t copy_to_user_mvcos_check(size_t size, void __user *ptr, + const void *x) +{ + if (size <= 256) + return copy_to_user_std(size, ptr, x); + return copy_to_user_mvcos(size, ptr, x); +} + +static size_t copy_in_user_mvcos(size_t size, void __user *to, + const void __user *from) +{ + register unsigned long reg0 asm("0") = 0x810081UL; + unsigned long tmp1, tmp2; + + tmp1 = -4096UL; + /* FIXME: copy with reduced length. */ + asm volatile( + "0: .insn ss,0xc80000000000,0(%0,%1),0(%2),0\n" + " jz 2f\n" + "1:"ALR" %0,%3\n" + " "SLR" %1,%3\n" + " "SLR" %2,%3\n" + " j 0b\n" + "2:"SLR" %0,%0\n" + "3: \n" + EX_TABLE(0b,3b) + : "+a" (size), "+a" (to), "+a" (from), "+a" (tmp1), "=a" (tmp2) + : "d" (reg0) : "cc", "memory"); + return size; +} + +static size_t clear_user_mvcos(size_t size, void __user *to) +{ + register unsigned long reg0 asm("0") = 0x810000UL; + unsigned long tmp1, tmp2; + + tmp1 = -4096UL; + asm volatile( + "0: .insn ss,0xc80000000000,0(%0,%1),0(%4),0\n" + " jz 4f\n" + "1:"ALR" %0,%2\n" + " "SLR" %1,%2\n" + " j 0b\n" + "2: la %3,4095(%1)\n"/* %4 = to + 4095 */ + " nr %3,%2\n" /* %4 = (to + 4095) & -4096 */ + " "SLR" %3,%1\n" + " "CLR" %0,%3\n" /* copy crosses next page boundary? */ + " jnh 5f\n" + "3: .insn ss,0xc80000000000,0(%3,%1),0(%4),0\n" + " "SLR" %0,%3\n" + " j 5f\n" + "4:"SLR" %0,%0\n" + "5: \n" + EX_TABLE(0b,2b) EX_TABLE(3b,5b) + : "+a" (size), "+a" (to), "+a" (tmp1), "=a" (tmp2) + : "a" (empty_zero_page), "d" (reg0) : "cc", "memory"); + return size; +} + +static size_t strnlen_user_mvcos(size_t count, const char __user *src) +{ + char buf[256]; + int rc; + size_t done, len, len_str; + + done = 0; + do { + len = min(count - done, (size_t) 256); + rc = uaccess.copy_from_user(len, src + done, buf); + if (unlikely(rc == len)) + return 0; + len -= rc; + len_str = strnlen(buf, len); + done += len_str; + } while ((len_str == len) && (done < count)); + return done + 1; +} + +static size_t strncpy_from_user_mvcos(size_t count, const char __user *src, + char *dst) +{ + int rc; + size_t done, len, len_str; + + done = 0; + do { + len = min(count - done, (size_t) 4096); + rc = uaccess.copy_from_user(len, src + done, dst); + if (unlikely(rc == len)) + return -EFAULT; + len -= rc; + len_str = strnlen(dst, len); + done += len_str; + } while ((len_str == len) && (done < count)); + return done; +} + +struct uaccess_ops uaccess_mvcos = { + .copy_from_user = copy_from_user_mvcos_check, + .copy_from_user_small = copy_from_user_std, + .copy_to_user = copy_to_user_mvcos_check, + .copy_to_user_small = copy_to_user_std, + .copy_in_user = copy_in_user_mvcos, + .clear_user = clear_user_mvcos, + .strnlen_user = strnlen_user_std, + .strncpy_from_user = strncpy_from_user_std, + .futex_atomic_op = futex_atomic_op_std, + .futex_atomic_cmpxchg = futex_atomic_cmpxchg_std, +}; + +struct uaccess_ops uaccess_mvcos_switch = { + .copy_from_user = copy_from_user_mvcos, + .copy_from_user_small = copy_from_user_mvcos, + .copy_to_user = copy_to_user_mvcos, + .copy_to_user_small = copy_to_user_mvcos, + .copy_in_user = copy_in_user_mvcos, + .clear_user = clear_user_mvcos, + .strnlen_user = strnlen_user_mvcos, + .strncpy_from_user = strncpy_from_user_mvcos, + .futex_atomic_op = futex_atomic_op_pt, + .futex_atomic_cmpxchg = futex_atomic_cmpxchg_pt, +}; diff --git a/arch/s390/lib/uaccess_pt.c b/arch/s390/lib/uaccess_pt.c new file mode 100644 index 00000000..342ae35a --- /dev/null +++ b/arch/s390/lib/uaccess_pt.c @@ -0,0 +1,406 @@ +/* + * arch/s390/lib/uaccess_pt.c + * + * User access functions based on page table walks for enhanced + * system layout without hardware support. + * + * Copyright IBM Corp. 2006 + * Author(s): Gerald Schaefer (gerald.schaefer@de.ibm.com) + */ + +#include <linux/errno.h> +#include <linux/hardirq.h> +#include <linux/mm.h> +#include <asm/uaccess.h> +#include <asm/futex.h> +#include "uaccess.h" + +static inline pte_t *follow_table(struct mm_struct *mm, unsigned long addr) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + + pgd = pgd_offset(mm, addr); + if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd))) + return (pte_t *) 0x3a; + + pud = pud_offset(pgd, addr); + if (pud_none(*pud) || unlikely(pud_bad(*pud))) + return (pte_t *) 0x3b; + + pmd = pmd_offset(pud, addr); + if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) + return (pte_t *) 0x10; + + return pte_offset_map(pmd, addr); +} + +static __always_inline size_t __user_copy_pt(unsigned long uaddr, void *kptr, + size_t n, int write_user) +{ + struct mm_struct *mm = current->mm; + unsigned long offset, pfn, done, size; + pte_t *pte; + void *from, *to; + + done = 0; +retry: + spin_lock(&mm->page_table_lock); + do { + pte = follow_table(mm, uaddr); + if ((unsigned long) pte < 0x1000) + goto fault; + if (!pte_present(*pte)) { + pte = (pte_t *) 0x11; + goto fault; + } else if (write_user && !pte_write(*pte)) { + pte = (pte_t *) 0x04; + goto fault; + } + + pfn = pte_pfn(*pte); + offset = uaddr & (PAGE_SIZE - 1); + size = min(n - done, PAGE_SIZE - offset); + if (write_user) { + to = (void *)((pfn << PAGE_SHIFT) + offset); + from = kptr + done; + } else { + from = (void *)((pfn << PAGE_SHIFT) + offset); + to = kptr + done; + } + memcpy(to, from, size); + done += size; + uaddr += size; + } while (done < n); + spin_unlock(&mm->page_table_lock); + return n - done; +fault: + spin_unlock(&mm->page_table_lock); + if (__handle_fault(uaddr, (unsigned long) pte, write_user)) + return n - done; + goto retry; +} + +/* + * Do DAT for user address by page table walk, return kernel address. + * This function needs to be called with current->mm->page_table_lock held. + */ +static __always_inline unsigned long __dat_user_addr(unsigned long uaddr) +{ + struct mm_struct *mm = current->mm; + unsigned long pfn; + pte_t *pte; + int rc; + +retry: + pte = follow_table(mm, uaddr); + if ((unsigned long) pte < 0x1000) + goto fault; + if (!pte_present(*pte)) { + pte = (pte_t *) 0x11; + goto fault; + } + + pfn = pte_pfn(*pte); + return (pfn << PAGE_SHIFT) + (uaddr & (PAGE_SIZE - 1)); +fault: + spin_unlock(&mm->page_table_lock); + rc = __handle_fault(uaddr, (unsigned long) pte, 0); + spin_lock(&mm->page_table_lock); + if (!rc) + goto retry; + return 0; +} + +size_t copy_from_user_pt(size_t n, const void __user *from, void *to) +{ + size_t rc; + + if (segment_eq(get_fs(), KERNEL_DS)) { + memcpy(to, (void __kernel __force *) from, n); + return 0; + } + rc = __user_copy_pt((unsigned long) from, to, n, 0); + if (unlikely(rc)) + memset(to + n - rc, 0, rc); + return rc; +} + +size_t copy_to_user_pt(size_t n, void __user *to, const void *from) +{ + if (segment_eq(get_fs(), KERNEL_DS)) { + memcpy((void __kernel __force *) to, from, n); + return 0; + } + return __user_copy_pt((unsigned long) to, (void *) from, n, 1); +} + +static size_t clear_user_pt(size_t n, void __user *to) +{ + long done, size, ret; + + if (segment_eq(get_fs(), KERNEL_DS)) { + memset((void __kernel __force *) to, 0, n); + return 0; + } + done = 0; + do { + if (n - done > PAGE_SIZE) + size = PAGE_SIZE; + else + size = n - done; + ret = __user_copy_pt((unsigned long) to + done, + &empty_zero_page, size, 1); + done += size; + if (ret) + return ret + n - done; + } while (done < n); + return 0; +} + +static size_t strnlen_user_pt(size_t count, const char __user *src) +{ + char *addr; + unsigned long uaddr = (unsigned long) src; + struct mm_struct *mm = current->mm; + unsigned long offset, pfn, done, len; + pte_t *pte; + size_t len_str; + + if (segment_eq(get_fs(), KERNEL_DS)) + return strnlen((const char __kernel __force *) src, count) + 1; + done = 0; +retry: + spin_lock(&mm->page_table_lock); + do { + pte = follow_table(mm, uaddr); + if ((unsigned long) pte < 0x1000) + goto fault; + if (!pte_present(*pte)) { + pte = (pte_t *) 0x11; + goto fault; + } + + pfn = pte_pfn(*pte); + offset = uaddr & (PAGE_SIZE-1); + addr = (char *)(pfn << PAGE_SHIFT) + offset; + len = min(count - done, PAGE_SIZE - offset); + len_str = strnlen(addr, len); + done += len_str; + uaddr += len_str; + } while ((len_str == len) && (done < count)); + spin_unlock(&mm->page_table_lock); + return done + 1; +fault: + spin_unlock(&mm->page_table_lock); + if (__handle_fault(uaddr, (unsigned long) pte, 0)) + return 0; + goto retry; +} + +static size_t strncpy_from_user_pt(size_t count, const char __user *src, + char *dst) +{ + size_t n = strnlen_user_pt(count, src); + + if (!n) + return -EFAULT; + if (n > count) + n = count; + if (segment_eq(get_fs(), KERNEL_DS)) { + memcpy(dst, (const char __kernel __force *) src, n); + if (dst[n-1] == '\0') + return n-1; + else + return n; + } + if (__user_copy_pt((unsigned long) src, dst, n, 0)) + return -EFAULT; + if (dst[n-1] == '\0') + return n-1; + else + return n; +} + +static size_t copy_in_user_pt(size_t n, void __user *to, + const void __user *from) +{ + struct mm_struct *mm = current->mm; + unsigned long offset_from, offset_to, offset_max, pfn_from, pfn_to, + uaddr, done, size, error_code; + unsigned long uaddr_from = (unsigned long) from; + unsigned long uaddr_to = (unsigned long) to; + pte_t *pte_from, *pte_to; + int write_user; + + if (segment_eq(get_fs(), KERNEL_DS)) { + memcpy((void __force *) to, (void __force *) from, n); + return 0; + } + done = 0; +retry: + spin_lock(&mm->page_table_lock); + do { + write_user = 0; + uaddr = uaddr_from; + pte_from = follow_table(mm, uaddr_from); + error_code = (unsigned long) pte_from; + if (error_code < 0x1000) + goto fault; + if (!pte_present(*pte_from)) { + error_code = 0x11; + goto fault; + } + + write_user = 1; + uaddr = uaddr_to; + pte_to = follow_table(mm, uaddr_to); + error_code = (unsigned long) pte_to; + if (error_code < 0x1000) + goto fault; + if (!pte_present(*pte_to)) { + error_code = 0x11; + goto fault; + } else if (!pte_write(*pte_to)) { + error_code = 0x04; + goto fault; + } + + pfn_from = pte_pfn(*pte_from); + pfn_to = pte_pfn(*pte_to); + offset_from = uaddr_from & (PAGE_SIZE-1); + offset_to = uaddr_from & (PAGE_SIZE-1); + offset_max = max(offset_from, offset_to); + size = min(n - done, PAGE_SIZE - offset_max); + + memcpy((void *)(pfn_to << PAGE_SHIFT) + offset_to, + (void *)(pfn_from << PAGE_SHIFT) + offset_from, size); + done += size; + uaddr_from += size; + uaddr_to += size; + } while (done < n); + spin_unlock(&mm->page_table_lock); + return n - done; +fault: + spin_unlock(&mm->page_table_lock); + if (__handle_fault(uaddr, error_code, write_user)) + return n - done; + goto retry; +} + +#define __futex_atomic_op(insn, ret, oldval, newval, uaddr, oparg) \ + asm volatile("0: l %1,0(%6)\n" \ + "1: " insn \ + "2: cs %1,%2,0(%6)\n" \ + "3: jl 1b\n" \ + " lhi %0,0\n" \ + "4:\n" \ + EX_TABLE(0b,4b) EX_TABLE(2b,4b) EX_TABLE(3b,4b) \ + : "=d" (ret), "=&d" (oldval), "=&d" (newval), \ + "=m" (*uaddr) \ + : "0" (-EFAULT), "d" (oparg), "a" (uaddr), \ + "m" (*uaddr) : "cc" ); + +static int __futex_atomic_op_pt(int op, u32 __user *uaddr, int oparg, int *old) +{ + int oldval = 0, newval, ret; + + switch (op) { + case FUTEX_OP_SET: + __futex_atomic_op("lr %2,%5\n", + ret, oldval, newval, uaddr, oparg); + break; + case FUTEX_OP_ADD: + __futex_atomic_op("lr %2,%1\nar %2,%5\n", + ret, oldval, newval, uaddr, oparg); + break; + case FUTEX_OP_OR: + __futex_atomic_op("lr %2,%1\nor %2,%5\n", + ret, oldval, newval, uaddr, oparg); + break; + case FUTEX_OP_ANDN: + __futex_atomic_op("lr %2,%1\nnr %2,%5\n", + ret, oldval, newval, uaddr, oparg); + break; + case FUTEX_OP_XOR: + __futex_atomic_op("lr %2,%1\nxr %2,%5\n", + ret, oldval, newval, uaddr, oparg); + break; + default: + ret = -ENOSYS; + } + if (ret == 0) + *old = oldval; + return ret; +} + +int futex_atomic_op_pt(int op, u32 __user *uaddr, int oparg, int *old) +{ + int ret; + + if (segment_eq(get_fs(), KERNEL_DS)) + return __futex_atomic_op_pt(op, uaddr, oparg, old); + spin_lock(¤t->mm->page_table_lock); + uaddr = (u32 __force __user *) + __dat_user_addr((__force unsigned long) uaddr); + if (!uaddr) { + spin_unlock(¤t->mm->page_table_lock); + return -EFAULT; + } + get_page(virt_to_page(uaddr)); + spin_unlock(¤t->mm->page_table_lock); + ret = __futex_atomic_op_pt(op, uaddr, oparg, old); + put_page(virt_to_page(uaddr)); + return ret; +} + +static int __futex_atomic_cmpxchg_pt(u32 *uval, u32 __user *uaddr, + u32 oldval, u32 newval) +{ + int ret; + + asm volatile("0: cs %1,%4,0(%5)\n" + "1: la %0,0\n" + "2:\n" + EX_TABLE(0b,2b) EX_TABLE(1b,2b) + : "=d" (ret), "+d" (oldval), "=m" (*uaddr) + : "0" (-EFAULT), "d" (newval), "a" (uaddr), "m" (*uaddr) + : "cc", "memory" ); + *uval = oldval; + return ret; +} + +int futex_atomic_cmpxchg_pt(u32 *uval, u32 __user *uaddr, + u32 oldval, u32 newval) +{ + int ret; + + if (segment_eq(get_fs(), KERNEL_DS)) + return __futex_atomic_cmpxchg_pt(uval, uaddr, oldval, newval); + spin_lock(¤t->mm->page_table_lock); + uaddr = (u32 __force __user *) + __dat_user_addr((__force unsigned long) uaddr); + if (!uaddr) { + spin_unlock(¤t->mm->page_table_lock); + return -EFAULT; + } + get_page(virt_to_page(uaddr)); + spin_unlock(¤t->mm->page_table_lock); + ret = __futex_atomic_cmpxchg_pt(uval, uaddr, oldval, newval); + put_page(virt_to_page(uaddr)); + return ret; +} + +struct uaccess_ops uaccess_pt = { + .copy_from_user = copy_from_user_pt, + .copy_from_user_small = copy_from_user_pt, + .copy_to_user = copy_to_user_pt, + .copy_to_user_small = copy_to_user_pt, + .copy_in_user = copy_in_user_pt, + .clear_user = clear_user_pt, + .strnlen_user = strnlen_user_pt, + .strncpy_from_user = strncpy_from_user_pt, + .futex_atomic_op = futex_atomic_op_pt, + .futex_atomic_cmpxchg = futex_atomic_cmpxchg_pt, +}; diff --git a/arch/s390/lib/uaccess_std.c b/arch/s390/lib/uaccess_std.c new file mode 100644 index 00000000..bb1a7eed --- /dev/null +++ b/arch/s390/lib/uaccess_std.c @@ -0,0 +1,319 @@ +/* + * arch/s390/lib/uaccess_std.c + * + * Standard user space access functions based on mvcp/mvcs and doing + * interesting things in the secondary space mode. + * + * Copyright (C) IBM Corp. 2006 + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), + * Gerald Schaefer (gerald.schaefer@de.ibm.com) + */ + +#include <linux/errno.h> +#include <linux/mm.h> +#include <linux/uaccess.h> +#include <asm/futex.h> +#include "uaccess.h" + +#ifndef __s390x__ +#define AHI "ahi" +#define ALR "alr" +#define CLR "clr" +#define LHI "lhi" +#define SLR "slr" +#else +#define AHI "aghi" +#define ALR "algr" +#define CLR "clgr" +#define LHI "lghi" +#define SLR "slgr" +#endif + +size_t copy_from_user_std(size_t size, const void __user *ptr, void *x) +{ + unsigned long tmp1, tmp2; + + tmp1 = -256UL; + asm volatile( + "0: mvcp 0(%0,%2),0(%1),%3\n" + "10:jz 8f\n" + "1:"ALR" %0,%3\n" + " la %1,256(%1)\n" + " la %2,256(%2)\n" + "2: mvcp 0(%0,%2),0(%1),%3\n" + "11:jnz 1b\n" + " j 8f\n" + "3: la %4,255(%1)\n" /* %4 = ptr + 255 */ + " "LHI" %3,-4096\n" + " nr %4,%3\n" /* %4 = (ptr + 255) & -4096 */ + " "SLR" %4,%1\n" + " "CLR" %0,%4\n" /* copy crosses next page boundary? */ + " jnh 5f\n" + "4: mvcp 0(%4,%2),0(%1),%3\n" + "12:"SLR" %0,%4\n" + " "ALR" %2,%4\n" + "5:"LHI" %4,-1\n" + " "ALR" %4,%0\n" /* copy remaining size, subtract 1 */ + " bras %3,7f\n" /* memset loop */ + " xc 0(1,%2),0(%2)\n" + "6: xc 0(256,%2),0(%2)\n" + " la %2,256(%2)\n" + "7:"AHI" %4,-256\n" + " jnm 6b\n" + " ex %4,0(%3)\n" + " j 9f\n" + "8:"SLR" %0,%0\n" + "9: \n" + EX_TABLE(0b,3b) EX_TABLE(2b,3b) EX_TABLE(4b,5b) + EX_TABLE(10b,3b) EX_TABLE(11b,3b) EX_TABLE(12b,5b) + : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2) + : : "cc", "memory"); + return size; +} + +static size_t copy_from_user_std_check(size_t size, const void __user *ptr, + void *x) +{ + if (size <= 1024) + return copy_from_user_std(size, ptr, x); + return copy_from_user_pt(size, ptr, x); +} + +size_t copy_to_user_std(size_t size, void __user *ptr, const void *x) +{ + unsigned long tmp1, tmp2; + + tmp1 = -256UL; + asm volatile( + "0: mvcs 0(%0,%1),0(%2),%3\n" + "7: jz 5f\n" + "1:"ALR" %0,%3\n" + " la %1,256(%1)\n" + " la %2,256(%2)\n" + "2: mvcs 0(%0,%1),0(%2),%3\n" + "8: jnz 1b\n" + " j 5f\n" + "3: la %4,255(%1)\n" /* %4 = ptr + 255 */ + " "LHI" %3,-4096\n" + " nr %4,%3\n" /* %4 = (ptr + 255) & -4096 */ + " "SLR" %4,%1\n" + " "CLR" %0,%4\n" /* copy crosses next page boundary? */ + " jnh 6f\n" + "4: mvcs 0(%4,%1),0(%2),%3\n" + "9:"SLR" %0,%4\n" + " j 6f\n" + "5:"SLR" %0,%0\n" + "6: \n" + EX_TABLE(0b,3b) EX_TABLE(2b,3b) EX_TABLE(4b,6b) + EX_TABLE(7b,3b) EX_TABLE(8b,3b) EX_TABLE(9b,6b) + : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2) + : : "cc", "memory"); + return size; +} + +static size_t copy_to_user_std_check(size_t size, void __user *ptr, + const void *x) +{ + if (size <= 1024) + return copy_to_user_std(size, ptr, x); + return copy_to_user_pt(size, ptr, x); +} + +static size_t copy_in_user_std(size_t size, void __user *to, + const void __user *from) +{ + unsigned long tmp1; + + asm volatile( + " sacf 256\n" + " "AHI" %0,-1\n" + " jo 5f\n" + " bras %3,3f\n" + "0:"AHI" %0,257\n" + "1: mvc 0(1,%1),0(%2)\n" + " la %1,1(%1)\n" + " la %2,1(%2)\n" + " "AHI" %0,-1\n" + " jnz 1b\n" + " j 5f\n" + "2: mvc 0(256,%1),0(%2)\n" + " la %1,256(%1)\n" + " la %2,256(%2)\n" + "3:"AHI" %0,-256\n" + " jnm 2b\n" + "4: ex %0,1b-0b(%3)\n" + "5: "SLR" %0,%0\n" + "6: sacf 0\n" + EX_TABLE(1b,6b) EX_TABLE(2b,0b) EX_TABLE(4b,0b) + : "+a" (size), "+a" (to), "+a" (from), "=a" (tmp1) + : : "cc", "memory"); + return size; +} + +static size_t clear_user_std(size_t size, void __user *to) +{ + unsigned long tmp1, tmp2; + + asm volatile( + " sacf 256\n" + " "AHI" %0,-1\n" + " jo 5f\n" + " bras %3,3f\n" + " xc 0(1,%1),0(%1)\n" + "0:"AHI" %0,257\n" + " la %2,255(%1)\n" /* %2 = ptr + 255 */ + " srl %2,12\n" + " sll %2,12\n" /* %2 = (ptr + 255) & -4096 */ + " "SLR" %2,%1\n" + " "CLR" %0,%2\n" /* clear crosses next page boundary? */ + " jnh 5f\n" + " "AHI" %2,-1\n" + "1: ex %2,0(%3)\n" + " "AHI" %2,1\n" + " "SLR" %0,%2\n" + " j 5f\n" + "2: xc 0(256,%1),0(%1)\n" + " la %1,256(%1)\n" + "3:"AHI" %0,-256\n" + " jnm 2b\n" + "4: ex %0,0(%3)\n" + "5: "SLR" %0,%0\n" + "6: sacf 0\n" + EX_TABLE(1b,6b) EX_TABLE(2b,0b) EX_TABLE(4b,0b) + : "+a" (size), "+a" (to), "=a" (tmp1), "=a" (tmp2) + : : "cc", "memory"); + return size; +} + +size_t strnlen_user_std(size_t size, const char __user *src) +{ + register unsigned long reg0 asm("0") = 0UL; + unsigned long tmp1, tmp2; + + asm volatile( + " la %2,0(%1)\n" + " la %3,0(%0,%1)\n" + " "SLR" %0,%0\n" + " sacf 256\n" + "0: srst %3,%2\n" + " jo 0b\n" + " la %0,1(%3)\n" /* strnlen_user results includes \0 */ + " "SLR" %0,%1\n" + "1: sacf 0\n" + EX_TABLE(0b,1b) + : "+a" (size), "+a" (src), "=a" (tmp1), "=a" (tmp2) + : "d" (reg0) : "cc", "memory"); + return size; +} + +size_t strncpy_from_user_std(size_t size, const char __user *src, char *dst) +{ + register unsigned long reg0 asm("0") = 0UL; + unsigned long tmp1, tmp2; + + asm volatile( + " la %3,0(%1)\n" + " la %4,0(%0,%1)\n" + " sacf 256\n" + "0: srst %4,%3\n" + " jo 0b\n" + " sacf 0\n" + " la %0,0(%4)\n" + " jh 1f\n" /* found \0 in string ? */ + " "AHI" %4,1\n" /* include \0 in copy */ + "1:"SLR" %0,%1\n" /* %0 = return length (without \0) */ + " "SLR" %4,%1\n" /* %4 = copy length (including \0) */ + "2: mvcp 0(%4,%2),0(%1),%5\n" + " jz 9f\n" + "3:"AHI" %4,-256\n" + " la %1,256(%1)\n" + " la %2,256(%2)\n" + "4: mvcp 0(%4,%2),0(%1),%5\n" + " jnz 3b\n" + " j 9f\n" + "7: sacf 0\n" + "8:"LHI" %0,%6\n" + "9:\n" + EX_TABLE(0b,7b) EX_TABLE(2b,8b) EX_TABLE(4b,8b) + : "+a" (size), "+a" (src), "+d" (dst), "=a" (tmp1), "=a" (tmp2) + : "d" (reg0), "K" (-EFAULT) : "cc", "memory"); + return size; +} + +#define __futex_atomic_op(insn, ret, oldval, newval, uaddr, oparg) \ + asm volatile( \ + " sacf 256\n" \ + "0: l %1,0(%6)\n" \ + "1:"insn \ + "2: cs %1,%2,0(%6)\n" \ + "3: jl 1b\n" \ + " lhi %0,0\n" \ + "4: sacf 0\n" \ + EX_TABLE(0b,4b) EX_TABLE(2b,4b) EX_TABLE(3b,4b) \ + : "=d" (ret), "=&d" (oldval), "=&d" (newval), \ + "=m" (*uaddr) \ + : "0" (-EFAULT), "d" (oparg), "a" (uaddr), \ + "m" (*uaddr) : "cc"); + +int futex_atomic_op_std(int op, u32 __user *uaddr, int oparg, int *old) +{ + int oldval = 0, newval, ret; + + switch (op) { + case FUTEX_OP_SET: + __futex_atomic_op("lr %2,%5\n", + ret, oldval, newval, uaddr, oparg); + break; + case FUTEX_OP_ADD: + __futex_atomic_op("lr %2,%1\nar %2,%5\n", + ret, oldval, newval, uaddr, oparg); + break; + case FUTEX_OP_OR: + __futex_atomic_op("lr %2,%1\nor %2,%5\n", + ret, oldval, newval, uaddr, oparg); + break; + case FUTEX_OP_ANDN: + __futex_atomic_op("lr %2,%1\nnr %2,%5\n", + ret, oldval, newval, uaddr, oparg); + break; + case FUTEX_OP_XOR: + __futex_atomic_op("lr %2,%1\nxr %2,%5\n", + ret, oldval, newval, uaddr, oparg); + break; + default: + ret = -ENOSYS; + } + *old = oldval; + return ret; +} + +int futex_atomic_cmpxchg_std(u32 *uval, u32 __user *uaddr, + u32 oldval, u32 newval) +{ + int ret; + + asm volatile( + " sacf 256\n" + "0: cs %1,%4,0(%5)\n" + "1: la %0,0\n" + "2: sacf 0\n" + EX_TABLE(0b,2b) EX_TABLE(1b,2b) + : "=d" (ret), "+d" (oldval), "=m" (*uaddr) + : "0" (-EFAULT), "d" (newval), "a" (uaddr), "m" (*uaddr) + : "cc", "memory" ); + *uval = oldval; + return ret; +} + +struct uaccess_ops uaccess_std = { + .copy_from_user = copy_from_user_std_check, + .copy_from_user_small = copy_from_user_std, + .copy_to_user = copy_to_user_std_check, + .copy_to_user_small = copy_to_user_std, + .copy_in_user = copy_in_user_std, + .clear_user = clear_user_std, + .strnlen_user = strnlen_user_std, + .strncpy_from_user = strncpy_from_user_std, + .futex_atomic_op = futex_atomic_op_std, + .futex_atomic_cmpxchg = futex_atomic_cmpxchg_std, +}; diff --git a/arch/s390/lib/ucmpdi2.c b/arch/s390/lib/ucmpdi2.c new file mode 100644 index 00000000..3e05ff53 --- /dev/null +++ b/arch/s390/lib/ucmpdi2.c @@ -0,0 +1,26 @@ +#include <linux/module.h> + +union ull_union { + unsigned long long ull; + struct { + unsigned int high; + unsigned int low; + } ui; +}; + +int __ucmpdi2(unsigned long long a, unsigned long long b) +{ + union ull_union au = {.ull = a}; + union ull_union bu = {.ull = b}; + + if (au.ui.high < bu.ui.high) + return 0; + else if (au.ui.high > bu.ui.high) + return 2; + if (au.ui.low < bu.ui.low) + return 0; + else if (au.ui.low > bu.ui.low) + return 2; + return 1; +} +EXPORT_SYMBOL(__ucmpdi2); diff --git a/arch/s390/lib/usercopy.c b/arch/s390/lib/usercopy.c new file mode 100644 index 00000000..14b363fe --- /dev/null +++ b/arch/s390/lib/usercopy.c @@ -0,0 +1,8 @@ +#include <linux/module.h> +#include <linux/bug.h> + +void copy_from_user_overflow(void) +{ + WARN(1, "Buffer overflow detected!\n"); +} +EXPORT_SYMBOL(copy_from_user_overflow); diff --git a/arch/s390/math-emu/Makefile b/arch/s390/math-emu/Makefile new file mode 100644 index 00000000..51d39954 --- /dev/null +++ b/arch/s390/math-emu/Makefile @@ -0,0 +1,7 @@ +# +# Makefile for the FPU instruction emulation. +# + +obj-$(CONFIG_MATHEMU) := math.o + +ccflags-y := -I$(src) -Iinclude/math-emu -w diff --git a/arch/s390/math-emu/math.c b/arch/s390/math-emu/math.c new file mode 100644 index 00000000..cd4e9c16 --- /dev/null +++ b/arch/s390/math-emu/math.c @@ -0,0 +1,2255 @@ +/* + * arch/s390/math-emu/math.c + * + * S390 version + * Copyright (C) 1999-2001 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), + * + * 'math.c' emulates IEEE instructions on a S390 processor + * that does not have the IEEE fpu (all processors before G5). + */ + +#include <linux/types.h> +#include <linux/sched.h> +#include <linux/mm.h> +#include <asm/uaccess.h> +#include <asm/lowcore.h> + +#include <asm/sfp-util.h> +#include <math-emu/soft-fp.h> +#include <math-emu/single.h> +#include <math-emu/double.h> +#include <math-emu/quad.h> + +/* + * I miss a macro to round a floating point number to the + * nearest integer in the same floating point format. + */ +#define _FP_TO_FPINT_ROUND(fs, wc, X) \ + do { \ + switch (X##_c) \ + { \ + case FP_CLS_NORMAL: \ + if (X##_e > _FP_FRACBITS_##fs + _FP_EXPBIAS_##fs) \ + { /* floating point number has no bits after the dot. */ \ + } \ + else if (X##_e <= _FP_FRACBITS_##fs + _FP_EXPBIAS_##fs && \ + X##_e > _FP_EXPBIAS_##fs) \ + { /* some bits before the dot, some after it. */ \ + _FP_FRAC_SRS_##wc(X, _FP_WFRACBITS_##fs, \ + X##_e - _FP_EXPBIAS_##fs \ + + _FP_FRACBITS_##fs); \ + _FP_ROUND(wc, X); \ + _FP_FRAC_SLL_##wc(X, X##_e - _FP_EXPBIAS_##fs \ + + _FP_FRACBITS_##fs); \ + } \ + else \ + { /* all bits after the dot. */ \ + FP_SET_EXCEPTION(FP_EX_INEXACT); \ + X##_c = FP_CLS_ZERO; \ + } \ + break; \ + case FP_CLS_NAN: \ + case FP_CLS_INF: \ + case FP_CLS_ZERO: \ + break; \ + } \ + } while (0) + +#define FP_TO_FPINT_ROUND_S(X) _FP_TO_FPINT_ROUND(S,1,X) +#define FP_TO_FPINT_ROUND_D(X) _FP_TO_FPINT_ROUND(D,2,X) +#define FP_TO_FPINT_ROUND_Q(X) _FP_TO_FPINT_ROUND(Q,4,X) + +typedef union { + long double ld; + struct { + __u64 high; + __u64 low; + } w; +} mathemu_ldcv; + +#ifdef CONFIG_SYSCTL +int sysctl_ieee_emulation_warnings=1; +#endif + +#define mathemu_put_user(x, p) \ + do { \ + if (put_user((x),(p))) \ + return SIGSEGV; \ + } while (0) + +#define mathemu_get_user(x, p) \ + do { \ + if (get_user((x),(p))) \ + return SIGSEGV; \ + } while (0) + +#define mathemu_copy_from_user(d, s, n)\ + do { \ + if (copy_from_user((d),(s),(n)) != 0) \ + return SIGSEGV; \ + } while (0) + +#define mathemu_copy_to_user(d, s, n) \ + do { \ + if (copy_to_user((d),(s),(n)) != 0) \ + return SIGSEGV; \ + } while (0) + +static void display_emulation_not_implemented(struct pt_regs *regs, char *instr) +{ + __u16 *location; + +#ifdef CONFIG_SYSCTL + if(sysctl_ieee_emulation_warnings) +#endif + { + location = (__u16 *)(regs->psw.addr-S390_lowcore.pgm_ilc); + printk("%s ieee fpu instruction not emulated " + "process name: %s pid: %d \n", + instr, current->comm, current->pid); + printk("%s's PSW: %08lx %08lx\n", instr, + (unsigned long) regs->psw.mask, + (unsigned long) location); + } +} + +static inline void emu_set_CC (struct pt_regs *regs, int cc) +{ + regs->psw.mask = (regs->psw.mask & 0xFFFFCFFF) | ((cc&3) << 12); +} + +/* + * Set the condition code in the user psw. + * 0 : Result is zero + * 1 : Result is less than zero + * 2 : Result is greater than zero + * 3 : Result is NaN or INF + */ +static inline void emu_set_CC_cs(struct pt_regs *regs, int class, int sign) +{ + switch (class) { + case FP_CLS_NORMAL: + case FP_CLS_INF: + emu_set_CC(regs, sign ? 1 : 2); + break; + case FP_CLS_ZERO: + emu_set_CC(regs, 0); + break; + case FP_CLS_NAN: + emu_set_CC(regs, 3); + break; + } +} + +/* Add long double */ +static int emu_axbr (struct pt_regs *regs, int rx, int ry) { + FP_DECL_Q(QA); FP_DECL_Q(QB); FP_DECL_Q(QR); + FP_DECL_EX; + mathemu_ldcv cvt; + int mode; + + mode = current->thread.fp_regs.fpc & 3; + cvt.w.high = current->thread.fp_regs.fprs[rx].ui; + cvt.w.low = current->thread.fp_regs.fprs[rx+2].ui; + FP_UNPACK_QP(QA, &cvt.ld); + cvt.w.high = current->thread.fp_regs.fprs[ry].ui; + cvt.w.low = current->thread.fp_regs.fprs[ry+2].ui; + FP_UNPACK_QP(QB, &cvt.ld); + FP_ADD_Q(QR, QA, QB); + FP_PACK_QP(&cvt.ld, QR); + current->thread.fp_regs.fprs[rx].ui = cvt.w.high; + current->thread.fp_regs.fprs[rx+2].ui = cvt.w.low; + emu_set_CC_cs(regs, QR_c, QR_s); + return _fex; +} + +/* Add double */ +static int emu_adbr (struct pt_regs *regs, int rx, int ry) { + FP_DECL_D(DA); FP_DECL_D(DB); FP_DECL_D(DR); + FP_DECL_EX; + int mode; + + mode = current->thread.fp_regs.fpc & 3; + FP_UNPACK_DP(DA, ¤t->thread.fp_regs.fprs[rx].d); + FP_UNPACK_DP(DB, ¤t->thread.fp_regs.fprs[ry].d); + FP_ADD_D(DR, DA, DB); + FP_PACK_DP(¤t->thread.fp_regs.fprs[rx].d, DR); + emu_set_CC_cs(regs, DR_c, DR_s); + return _fex; +} + +/* Add double */ +static int emu_adb (struct pt_regs *regs, int rx, double *val) { + FP_DECL_D(DA); FP_DECL_D(DB); FP_DECL_D(DR); + FP_DECL_EX; + int mode; + + mode = current->thread.fp_regs.fpc & 3; + FP_UNPACK_DP(DA, ¤t->thread.fp_regs.fprs[rx].d); + FP_UNPACK_DP(DB, val); + FP_ADD_D(DR, DA, DB); + FP_PACK_DP(¤t->thread.fp_regs.fprs[rx].d, DR); + emu_set_CC_cs(regs, DR_c, DR_s); + return _fex; +} + +/* Add float */ +static int emu_aebr (struct pt_regs *regs, int rx, int ry) { + FP_DECL_S(SA); FP_DECL_S(SB); FP_DECL_S(SR); + FP_DECL_EX; + int mode; + + mode = current->thread.fp_regs.fpc & 3; + FP_UNPACK_SP(SA, ¤t->thread.fp_regs.fprs[rx].f); + FP_UNPACK_SP(SB, ¤t->thread.fp_regs.fprs[ry].f); + FP_ADD_S(SR, SA, SB); + FP_PACK_SP(¤t->thread.fp_regs.fprs[rx].f, SR); + emu_set_CC_cs(regs, SR_c, SR_s); + return _fex; +} + +/* Add float */ +static int emu_aeb (struct pt_regs *regs, int rx, float *val) { + FP_DECL_S(SA); FP_DECL_S(SB); FP_DECL_S(SR); + FP_DECL_EX; + int mode; + + mode = current->thread.fp_regs.fpc & 3; + FP_UNPACK_SP(SA, ¤t->thread.fp_regs.fprs[rx].f); + FP_UNPACK_SP(SB, val); + FP_ADD_S(SR, SA, SB); + FP_PACK_SP(¤t->thread.fp_regs.fprs[rx].f, SR); + emu_set_CC_cs(regs, SR_c, SR_s); + return _fex; +} + +/* Compare long double */ +static int emu_cxbr (struct pt_regs *regs, int rx, int ry) { + FP_DECL_Q(QA); FP_DECL_Q(QB); + mathemu_ldcv cvt; + int IR; + + cvt.w.high = current->thread.fp_regs.fprs[rx].ui; + cvt.w.low = current->thread.fp_regs.fprs[rx+2].ui; + FP_UNPACK_RAW_QP(QA, &cvt.ld); + cvt.w.high = current->thread.fp_regs.fprs[ry].ui; + cvt.w.low = current->thread.fp_regs.fprs[ry+2].ui; + FP_UNPACK_RAW_QP(QB, &cvt.ld); + FP_CMP_Q(IR, QA, QB, 3); + /* + * IR == -1 if DA < DB, IR == 0 if DA == DB, + * IR == 1 if DA > DB and IR == 3 if unorderded + */ + emu_set_CC(regs, (IR == -1) ? 1 : (IR == 1) ? 2 : IR); + return 0; +} + +/* Compare double */ +static int emu_cdbr (struct pt_regs *regs, int rx, int ry) { + FP_DECL_D(DA); FP_DECL_D(DB); + int IR; + + FP_UNPACK_RAW_DP(DA, ¤t->thread.fp_regs.fprs[rx].d); + FP_UNPACK_RAW_DP(DB, ¤t->thread.fp_regs.fprs[ry].d); + FP_CMP_D(IR, DA, DB, 3); + /* + * IR == -1 if DA < DB, IR == 0 if DA == DB, + * IR == 1 if DA > DB and IR == 3 if unorderded + */ + emu_set_CC(regs, (IR == -1) ? 1 : (IR == 1) ? 2 : IR); + return 0; +} + +/* Compare double */ +static int emu_cdb (struct pt_regs *regs, int rx, double *val) { + FP_DECL_D(DA); FP_DECL_D(DB); + int IR; + + FP_UNPACK_RAW_DP(DA, ¤t->thread.fp_regs.fprs[rx].d); + FP_UNPACK_RAW_DP(DB, val); + FP_CMP_D(IR, DA, DB, 3); + /* + * IR == -1 if DA < DB, IR == 0 if DA == DB, + * IR == 1 if DA > DB and IR == 3 if unorderded + */ + emu_set_CC(regs, (IR == -1) ? 1 : (IR == 1) ? 2 : IR); + return 0; +} + +/* Compare float */ +static int emu_cebr (struct pt_regs *regs, int rx, int ry) { + FP_DECL_S(SA); FP_DECL_S(SB); + int IR; + + FP_UNPACK_RAW_SP(SA, ¤t->thread.fp_regs.fprs[rx].f); + FP_UNPACK_RAW_SP(SB, ¤t->thread.fp_regs.fprs[ry].f); + FP_CMP_S(IR, SA, SB, 3); + /* + * IR == -1 if DA < DB, IR == 0 if DA == DB, + * IR == 1 if DA > DB and IR == 3 if unorderded + */ + emu_set_CC(regs, (IR == -1) ? 1 : (IR == 1) ? 2 : IR); + return 0; +} + +/* Compare float */ +static int emu_ceb (struct pt_regs *regs, int rx, float *val) { + FP_DECL_S(SA); FP_DECL_S(SB); + int IR; + + FP_UNPACK_RAW_SP(SA, ¤t->thread.fp_regs.fprs[rx].f); + FP_UNPACK_RAW_SP(SB, val); + FP_CMP_S(IR, SA, SB, 3); + /* + * IR == -1 if DA < DB, IR == 0 if DA == DB, + * IR == 1 if DA > DB and IR == 3 if unorderded + */ + emu_set_CC(regs, (IR == -1) ? 1 : (IR == 1) ? 2 : IR); + return 0; +} + +/* Compare and signal long double */ +static int emu_kxbr (struct pt_regs *regs, int rx, int ry) { + FP_DECL_Q(QA); FP_DECL_Q(QB); + FP_DECL_EX; + mathemu_ldcv cvt; + int IR; + + cvt.w.high = current->thread.fp_regs.fprs[rx].ui; + cvt.w.low = current->thread.fp_regs.fprs[rx+2].ui; + FP_UNPACK_RAW_QP(QA, &cvt.ld); + cvt.w.high = current->thread.fp_regs.fprs[ry].ui; + cvt.w.low = current->thread.fp_regs.fprs[ry+2].ui; + FP_UNPACK_QP(QB, &cvt.ld); + FP_CMP_Q(IR, QA, QB, 3); + /* + * IR == -1 if DA < DB, IR == 0 if DA == DB, + * IR == 1 if DA > DB and IR == 3 if unorderded + */ + emu_set_CC(regs, (IR == -1) ? 1 : (IR == 1) ? 2 : IR); + if (IR == 3) + FP_SET_EXCEPTION (FP_EX_INVALID); + return _fex; +} + +/* Compare and signal double */ +static int emu_kdbr (struct pt_regs *regs, int rx, int ry) { + FP_DECL_D(DA); FP_DECL_D(DB); + FP_DECL_EX; + int IR; + + FP_UNPACK_RAW_DP(DA, ¤t->thread.fp_regs.fprs[rx].d); + FP_UNPACK_RAW_DP(DB, ¤t->thread.fp_regs.fprs[ry].d); + FP_CMP_D(IR, DA, DB, 3); + /* + * IR == -1 if DA < DB, IR == 0 if DA == DB, + * IR == 1 if DA > DB and IR == 3 if unorderded + */ + emu_set_CC(regs, (IR == -1) ? 1 : (IR == 1) ? 2 : IR); + if (IR == 3) + FP_SET_EXCEPTION (FP_EX_INVALID); + return _fex; +} + +/* Compare and signal double */ +static int emu_kdb (struct pt_regs *regs, int rx, double *val) { + FP_DECL_D(DA); FP_DECL_D(DB); + FP_DECL_EX; + int IR; + + FP_UNPACK_RAW_DP(DA, ¤t->thread.fp_regs.fprs[rx].d); + FP_UNPACK_RAW_DP(DB, val); + FP_CMP_D(IR, DA, DB, 3); + /* + * IR == -1 if DA < DB, IR == 0 if DA == DB, + * IR == 1 if DA > DB and IR == 3 if unorderded + */ + emu_set_CC(regs, (IR == -1) ? 1 : (IR == 1) ? 2 : IR); + if (IR == 3) + FP_SET_EXCEPTION (FP_EX_INVALID); + return _fex; +} + +/* Compare and signal float */ +static int emu_kebr (struct pt_regs *regs, int rx, int ry) { + FP_DECL_S(SA); FP_DECL_S(SB); + FP_DECL_EX; + int IR; + + FP_UNPACK_RAW_SP(SA, ¤t->thread.fp_regs.fprs[rx].f); + FP_UNPACK_RAW_SP(SB, ¤t->thread.fp_regs.fprs[ry].f); + FP_CMP_S(IR, SA, SB, 3); + /* + * IR == -1 if DA < DB, IR == 0 if DA == DB, + * IR == 1 if DA > DB and IR == 3 if unorderded + */ + emu_set_CC(regs, (IR == -1) ? 1 : (IR == 1) ? 2 : IR); + if (IR == 3) + FP_SET_EXCEPTION (FP_EX_INVALID); + return _fex; +} + +/* Compare and signal float */ +static int emu_keb (struct pt_regs *regs, int rx, float *val) { + FP_DECL_S(SA); FP_DECL_S(SB); + FP_DECL_EX; + int IR; + + FP_UNPACK_RAW_SP(SA, ¤t->thread.fp_regs.fprs[rx].f); + FP_UNPACK_RAW_SP(SB, val); + FP_CMP_S(IR, SA, SB, 3); + /* + * IR == -1 if DA < DB, IR == 0 if DA == DB, + * IR == 1 if DA > DB and IR == 3 if unorderded + */ + emu_set_CC(regs, (IR == -1) ? 1 : (IR == 1) ? 2 : IR); + if (IR == 3) + FP_SET_EXCEPTION (FP_EX_INVALID); + return _fex; +} + +/* Convert from fixed long double */ +static int emu_cxfbr (struct pt_regs *regs, int rx, int ry) { + FP_DECL_Q(QR); + FP_DECL_EX; + mathemu_ldcv cvt; + __s32 si; + int mode; + + mode = current->thread.fp_regs.fpc & 3; + si = regs->gprs[ry]; + FP_FROM_INT_Q(QR, si, 32, int); + FP_PACK_QP(&cvt.ld, QR); + current->thread.fp_regs.fprs[rx].ui = cvt.w.high; + current->thread.fp_regs.fprs[rx+2].ui = cvt.w.low; + return _fex; +} + +/* Convert from fixed double */ +static int emu_cdfbr (struct pt_regs *regs, int rx, int ry) { + FP_DECL_D(DR); + FP_DECL_EX; + __s32 si; + int mode; + + mode = current->thread.fp_regs.fpc & 3; + si = regs->gprs[ry]; + FP_FROM_INT_D(DR, si, 32, int); + FP_PACK_DP(¤t->thread.fp_regs.fprs[rx].d, DR); + return _fex; +} + +/* Convert from fixed float */ +static int emu_cefbr (struct pt_regs *regs, int rx, int ry) { + FP_DECL_S(SR); + FP_DECL_EX; + __s32 si; + int mode; + + mode = current->thread.fp_regs.fpc & 3; + si = regs->gprs[ry]; + FP_FROM_INT_S(SR, si, 32, int); + FP_PACK_SP(¤t->thread.fp_regs.fprs[rx].f, SR); + return _fex; +} + +/* Convert to fixed long double */ +static int emu_cfxbr (struct pt_regs *regs, int rx, int ry, int mask) { + FP_DECL_Q(QA); + FP_DECL_EX; + mathemu_ldcv cvt; + __s32 si; + int mode; + + if (mask == 0) + mode = current->thread.fp_regs.fpc & 3; + else if (mask == 1) + mode = FP_RND_NEAREST; + else + mode = mask - 4; + cvt.w.high = current->thread.fp_regs.fprs[ry].ui; + cvt.w.low = current->thread.fp_regs.fprs[ry+2].ui; + FP_UNPACK_QP(QA, &cvt.ld); + FP_TO_INT_ROUND_Q(si, QA, 32, 1); + regs->gprs[rx] = si; + emu_set_CC_cs(regs, QA_c, QA_s); + return _fex; +} + +/* Convert to fixed double */ +static int emu_cfdbr (struct pt_regs *regs, int rx, int ry, int mask) { + FP_DECL_D(DA); + FP_DECL_EX; + __s32 si; + int mode; + + if (mask == 0) + mode = current->thread.fp_regs.fpc & 3; + else if (mask == 1) + mode = FP_RND_NEAREST; + else + mode = mask - 4; + FP_UNPACK_DP(DA, ¤t->thread.fp_regs.fprs[ry].d); + FP_TO_INT_ROUND_D(si, DA, 32, 1); + regs->gprs[rx] = si; + emu_set_CC_cs(regs, DA_c, DA_s); + return _fex; +} + +/* Convert to fixed float */ +static int emu_cfebr (struct pt_regs *regs, int rx, int ry, int mask) { + FP_DECL_S(SA); + FP_DECL_EX; + __s32 si; + int mode; + + if (mask == 0) + mode = current->thread.fp_regs.fpc & 3; + else if (mask == 1) + mode = FP_RND_NEAREST; + else + mode = mask - 4; + FP_UNPACK_SP(SA, ¤t->thread.fp_regs.fprs[ry].f); + FP_TO_INT_ROUND_S(si, SA, 32, 1); + regs->gprs[rx] = si; + emu_set_CC_cs(regs, SA_c, SA_s); + return _fex; +} + +/* Divide long double */ +static int emu_dxbr (struct pt_regs *regs, int rx, int ry) { + FP_DECL_Q(QA); FP_DECL_Q(QB); FP_DECL_Q(QR); + FP_DECL_EX; + mathemu_ldcv cvt; + int mode; + + mode = current->thread.fp_regs.fpc & 3; + cvt.w.high = current->thread.fp_regs.fprs[rx].ui; + cvt.w.low = current->thread.fp_regs.fprs[rx+2].ui; + FP_UNPACK_QP(QA, &cvt.ld); + cvt.w.high = current->thread.fp_regs.fprs[ry].ui; + cvt.w.low = current->thread.fp_regs.fprs[ry+2].ui; + FP_UNPACK_QP(QB, &cvt.ld); + FP_DIV_Q(QR, QA, QB); + FP_PACK_QP(&cvt.ld, QR); + current->thread.fp_regs.fprs[rx].ui = cvt.w.high; + current->thread.fp_regs.fprs[rx+2].ui = cvt.w.low; + return _fex; +} + +/* Divide double */ +static int emu_ddbr (struct pt_regs *regs, int rx, int ry) { + FP_DECL_D(DA); FP_DECL_D(DB); FP_DECL_D(DR); + FP_DECL_EX; + int mode; + + mode = current->thread.fp_regs.fpc & 3; + FP_UNPACK_DP(DA, ¤t->thread.fp_regs.fprs[rx].d); + FP_UNPACK_DP(DB, ¤t->thread.fp_regs.fprs[ry].d); + FP_DIV_D(DR, DA, DB); + FP_PACK_DP(¤t->thread.fp_regs.fprs[rx].d, DR); + return _fex; +} + +/* Divide double */ +static int emu_ddb (struct pt_regs *regs, int rx, double *val) { + FP_DECL_D(DA); FP_DECL_D(DB); FP_DECL_D(DR); + FP_DECL_EX; + int mode; + + mode = current->thread.fp_regs.fpc & 3; + FP_UNPACK_DP(DA, ¤t->thread.fp_regs.fprs[rx].d); + FP_UNPACK_DP(DB, val); + FP_DIV_D(DR, DA, DB); + FP_PACK_DP(¤t->thread.fp_regs.fprs[rx].d, DR); + return _fex; +} + +/* Divide float */ +static int emu_debr (struct pt_regs *regs, int rx, int ry) { + FP_DECL_S(SA); FP_DECL_S(SB); FP_DECL_S(SR); + FP_DECL_EX; + int mode; + + mode = current->thread.fp_regs.fpc & 3; + FP_UNPACK_SP(SA, ¤t->thread.fp_regs.fprs[rx].f); + FP_UNPACK_SP(SB, ¤t->thread.fp_regs.fprs[ry].f); + FP_DIV_S(SR, SA, SB); + FP_PACK_SP(¤t->thread.fp_regs.fprs[rx].f, SR); + return _fex; +} + +/* Divide float */ +static int emu_deb (struct pt_regs *regs, int rx, float *val) { + FP_DECL_S(SA); FP_DECL_S(SB); FP_DECL_S(SR); + FP_DECL_EX; + int mode; + + mode = current->thread.fp_regs.fpc & 3; + FP_UNPACK_SP(SA, ¤t->thread.fp_regs.fprs[rx].f); + FP_UNPACK_SP(SB, val); + FP_DIV_S(SR, SA, SB); + FP_PACK_SP(¤t->thread.fp_regs.fprs[rx].f, SR); + return _fex; +} + +/* Divide to integer double */ +static int emu_didbr (struct pt_regs *regs, int rx, int ry, int mask) { + display_emulation_not_implemented(regs, "didbr"); + return 0; +} + +/* Divide to integer float */ +static int emu_diebr (struct pt_regs *regs, int rx, int ry, int mask) { + display_emulation_not_implemented(regs, "diebr"); + return 0; +} + +/* Extract fpc */ +static int emu_efpc (struct pt_regs *regs, int rx, int ry) { + regs->gprs[rx] = current->thread.fp_regs.fpc; + return 0; +} + +/* Load and test long double */ +static int emu_ltxbr (struct pt_regs *regs, int rx, int ry) { + s390_fp_regs *fp_regs = ¤t->thread.fp_regs; + mathemu_ldcv cvt; + FP_DECL_Q(QA); + FP_DECL_EX; + + cvt.w.high = current->thread.fp_regs.fprs[ry].ui; + cvt.w.low = current->thread.fp_regs.fprs[ry+2].ui; + FP_UNPACK_QP(QA, &cvt.ld); + fp_regs->fprs[rx].ui = fp_regs->fprs[ry].ui; + fp_regs->fprs[rx+2].ui = fp_regs->fprs[ry+2].ui; + emu_set_CC_cs(regs, QA_c, QA_s); + return _fex; +} + +/* Load and test double */ +static int emu_ltdbr (struct pt_regs *regs, int rx, int ry) { + s390_fp_regs *fp_regs = ¤t->thread.fp_regs; + FP_DECL_D(DA); + FP_DECL_EX; + + FP_UNPACK_DP(DA, &fp_regs->fprs[ry].d); + fp_regs->fprs[rx].ui = fp_regs->fprs[ry].ui; + emu_set_CC_cs(regs, DA_c, DA_s); + return _fex; +} + +/* Load and test double */ +static int emu_ltebr (struct pt_regs *regs, int rx, int ry) { + s390_fp_regs *fp_regs = ¤t->thread.fp_regs; + FP_DECL_S(SA); + FP_DECL_EX; + + FP_UNPACK_SP(SA, &fp_regs->fprs[ry].f); + fp_regs->fprs[rx].ui = fp_regs->fprs[ry].ui; + emu_set_CC_cs(regs, SA_c, SA_s); + return _fex; +} + +/* Load complement long double */ +static int emu_lcxbr (struct pt_regs *regs, int rx, int ry) { + FP_DECL_Q(QA); FP_DECL_Q(QR); + FP_DECL_EX; + mathemu_ldcv cvt; + int mode; + + mode = current->thread.fp_regs.fpc & 3; + cvt.w.high = current->thread.fp_regs.fprs[ry].ui; + cvt.w.low = current->thread.fp_regs.fprs[ry+2].ui; + FP_UNPACK_QP(QA, &cvt.ld); + FP_NEG_Q(QR, QA); + FP_PACK_QP(&cvt.ld, QR); + current->thread.fp_regs.fprs[rx].ui = cvt.w.high; + current->thread.fp_regs.fprs[rx+2].ui = cvt.w.low; + emu_set_CC_cs(regs, QR_c, QR_s); + return _fex; +} + +/* Load complement double */ +static int emu_lcdbr (struct pt_regs *regs, int rx, int ry) { + FP_DECL_D(DA); FP_DECL_D(DR); + FP_DECL_EX; + int mode; + + mode = current->thread.fp_regs.fpc & 3; + FP_UNPACK_DP(DA, ¤t->thread.fp_regs.fprs[ry].d); + FP_NEG_D(DR, DA); + FP_PACK_DP(¤t->thread.fp_regs.fprs[rx].d, DR); + emu_set_CC_cs(regs, DR_c, DR_s); + return _fex; +} + +/* Load complement float */ +static int emu_lcebr (struct pt_regs *regs, int rx, int ry) { + FP_DECL_S(SA); FP_DECL_S(SR); + FP_DECL_EX; + int mode; + + mode = current->thread.fp_regs.fpc & 3; + FP_UNPACK_SP(SA, ¤t->thread.fp_regs.fprs[ry].f); + FP_NEG_S(SR, SA); + FP_PACK_SP(¤t->thread.fp_regs.fprs[rx].f, SR); + emu_set_CC_cs(regs, SR_c, SR_s); + return _fex; +} + +/* Load floating point integer long double */ +static int emu_fixbr (struct pt_regs *regs, int rx, int ry, int mask) { + s390_fp_regs *fp_regs = ¤t->thread.fp_regs; + FP_DECL_Q(QA); + FP_DECL_EX; + mathemu_ldcv cvt; + __s32 si; + int mode; + + if (mask == 0) + mode = fp_regs->fpc & 3; + else if (mask == 1) + mode = FP_RND_NEAREST; + else + mode = mask - 4; + cvt.w.high = fp_regs->fprs[ry].ui; + cvt.w.low = fp_regs->fprs[ry+2].ui; + FP_UNPACK_QP(QA, &cvt.ld); + FP_TO_FPINT_ROUND_Q(QA); + FP_PACK_QP(&cvt.ld, QA); + fp_regs->fprs[rx].ui = cvt.w.high; + fp_regs->fprs[rx+2].ui = cvt.w.low; + return _fex; +} + +/* Load floating point integer double */ +static int emu_fidbr (struct pt_regs *regs, int rx, int ry, int mask) { + /* FIXME: rounding mode !! */ + s390_fp_regs *fp_regs = ¤t->thread.fp_regs; + FP_DECL_D(DA); + FP_DECL_EX; + __s32 si; + int mode; + + if (mask == 0) + mode = fp_regs->fpc & 3; + else if (mask == 1) + mode = FP_RND_NEAREST; + else + mode = mask - 4; + FP_UNPACK_DP(DA, &fp_regs->fprs[ry].d); + FP_TO_FPINT_ROUND_D(DA); + FP_PACK_DP(&fp_regs->fprs[rx].d, DA); + return _fex; +} + +/* Load floating point integer float */ +static int emu_fiebr (struct pt_regs *regs, int rx, int ry, int mask) { + s390_fp_regs *fp_regs = ¤t->thread.fp_regs; + FP_DECL_S(SA); + FP_DECL_EX; + __s32 si; + int mode; + + if (mask == 0) + mode = fp_regs->fpc & 3; + else if (mask == 1) + mode = FP_RND_NEAREST; + else + mode = mask - 4; + FP_UNPACK_SP(SA, &fp_regs->fprs[ry].f); + FP_TO_FPINT_ROUND_S(SA); + FP_PACK_SP(&fp_regs->fprs[rx].f, SA); + return _fex; +} + +/* Load lengthened double to long double */ +static int emu_lxdbr (struct pt_regs *regs, int rx, int ry) { + FP_DECL_D(DA); FP_DECL_Q(QR); + FP_DECL_EX; + mathemu_ldcv cvt; + int mode; + + mode = current->thread.fp_regs.fpc & 3; + FP_UNPACK_DP(DA, ¤t->thread.fp_regs.fprs[ry].d); + FP_CONV (Q, D, 4, 2, QR, DA); + FP_PACK_QP(&cvt.ld, QR); + current->thread.fp_regs.fprs[rx].ui = cvt.w.high; + current->thread.fp_regs.fprs[rx+2].ui = cvt.w.low; + return _fex; +} + +/* Load lengthened double to long double */ +static int emu_lxdb (struct pt_regs *regs, int rx, double *val) { + FP_DECL_D(DA); FP_DECL_Q(QR); + FP_DECL_EX; + mathemu_ldcv cvt; + int mode; + + mode = current->thread.fp_regs.fpc & 3; + FP_UNPACK_DP(DA, val); + FP_CONV (Q, D, 4, 2, QR, DA); + FP_PACK_QP(&cvt.ld, QR); + current->thread.fp_regs.fprs[rx].ui = cvt.w.high; + current->thread.fp_regs.fprs[rx+2].ui = cvt.w.low; + return _fex; +} + +/* Load lengthened float to long double */ +static int emu_lxebr (struct pt_regs *regs, int rx, int ry) { + FP_DECL_S(SA); FP_DECL_Q(QR); + FP_DECL_EX; + mathemu_ldcv cvt; + int mode; + + mode = current->thread.fp_regs.fpc & 3; + FP_UNPACK_SP(SA, ¤t->thread.fp_regs.fprs[ry].f); + FP_CONV (Q, S, 4, 1, QR, SA); + FP_PACK_QP(&cvt.ld, QR); + current->thread.fp_regs.fprs[rx].ui = cvt.w.high; + current->thread.fp_regs.fprs[rx+2].ui = cvt.w.low; + return _fex; +} + +/* Load lengthened float to long double */ +static int emu_lxeb (struct pt_regs *regs, int rx, float *val) { + FP_DECL_S(SA); FP_DECL_Q(QR); + FP_DECL_EX; + mathemu_ldcv cvt; + int mode; + + mode = current->thread.fp_regs.fpc & 3; + FP_UNPACK_SP(SA, val); + FP_CONV (Q, S, 4, 1, QR, SA); + FP_PACK_QP(&cvt.ld, QR); + current->thread.fp_regs.fprs[rx].ui = cvt.w.high; + current->thread.fp_regs.fprs[rx+2].ui = cvt.w.low; + return _fex; +} + +/* Load lengthened float to double */ +static int emu_ldebr (struct pt_regs *regs, int rx, int ry) { + FP_DECL_S(SA); FP_DECL_D(DR); + FP_DECL_EX; + int mode; + + mode = current->thread.fp_regs.fpc & 3; + FP_UNPACK_SP(SA, ¤t->thread.fp_regs.fprs[ry].f); + FP_CONV (D, S, 2, 1, DR, SA); + FP_PACK_DP(¤t->thread.fp_regs.fprs[rx].d, DR); + return _fex; +} + +/* Load lengthened float to double */ +static int emu_ldeb (struct pt_regs *regs, int rx, float *val) { + FP_DECL_S(SA); FP_DECL_D(DR); + FP_DECL_EX; + int mode; + + mode = current->thread.fp_regs.fpc & 3; + FP_UNPACK_SP(SA, val); + FP_CONV (D, S, 2, 1, DR, SA); + FP_PACK_DP(¤t->thread.fp_regs.fprs[rx].d, DR); + return _fex; +} + +/* Load negative long double */ +static int emu_lnxbr (struct pt_regs *regs, int rx, int ry) { + FP_DECL_Q(QA); FP_DECL_Q(QR); + FP_DECL_EX; + mathemu_ldcv cvt; + int mode; + + mode = current->thread.fp_regs.fpc & 3; + cvt.w.high = current->thread.fp_regs.fprs[ry].ui; + cvt.w.low = current->thread.fp_regs.fprs[ry+2].ui; + FP_UNPACK_QP(QA, &cvt.ld); + if (QA_s == 0) { + FP_NEG_Q(QR, QA); + FP_PACK_QP(&cvt.ld, QR); + current->thread.fp_regs.fprs[rx].ui = cvt.w.high; + current->thread.fp_regs.fprs[rx+2].ui = cvt.w.low; + } else { + current->thread.fp_regs.fprs[rx].ui = + current->thread.fp_regs.fprs[ry].ui; + current->thread.fp_regs.fprs[rx+2].ui = + current->thread.fp_regs.fprs[ry+2].ui; + } + emu_set_CC_cs(regs, QR_c, QR_s); + return _fex; +} + +/* Load negative double */ +static int emu_lndbr (struct pt_regs *regs, int rx, int ry) { + FP_DECL_D(DA); FP_DECL_D(DR); + FP_DECL_EX; + int mode; + + mode = current->thread.fp_regs.fpc & 3; + FP_UNPACK_DP(DA, ¤t->thread.fp_regs.fprs[ry].d); + if (DA_s == 0) { + FP_NEG_D(DR, DA); + FP_PACK_DP(¤t->thread.fp_regs.fprs[rx].d, DR); + } else + current->thread.fp_regs.fprs[rx].ui = + current->thread.fp_regs.fprs[ry].ui; + emu_set_CC_cs(regs, DR_c, DR_s); + return _fex; +} + +/* Load negative float */ +static int emu_lnebr (struct pt_regs *regs, int rx, int ry) { + FP_DECL_S(SA); FP_DECL_S(SR); + FP_DECL_EX; + int mode; + + mode = current->thread.fp_regs.fpc & 3; + FP_UNPACK_SP(SA, ¤t->thread.fp_regs.fprs[ry].f); + if (SA_s == 0) { + FP_NEG_S(SR, SA); + FP_PACK_SP(¤t->thread.fp_regs.fprs[rx].f, SR); + } else + current->thread.fp_regs.fprs[rx].ui = + current->thread.fp_regs.fprs[ry].ui; + emu_set_CC_cs(regs, SR_c, SR_s); + return _fex; +} + +/* Load positive long double */ +static int emu_lpxbr (struct pt_regs *regs, int rx, int ry) { + FP_DECL_Q(QA); FP_DECL_Q(QR); + FP_DECL_EX; + mathemu_ldcv cvt; + int mode; + + mode = current->thread.fp_regs.fpc & 3; + cvt.w.high = current->thread.fp_regs.fprs[ry].ui; + cvt.w.low = current->thread.fp_regs.fprs[ry+2].ui; + FP_UNPACK_QP(QA, &cvt.ld); + if (QA_s != 0) { + FP_NEG_Q(QR, QA); + FP_PACK_QP(&cvt.ld, QR); + current->thread.fp_regs.fprs[rx].ui = cvt.w.high; + current->thread.fp_regs.fprs[rx+2].ui = cvt.w.low; + } else{ + current->thread.fp_regs.fprs[rx].ui = + current->thread.fp_regs.fprs[ry].ui; + current->thread.fp_regs.fprs[rx+2].ui = + current->thread.fp_regs.fprs[ry+2].ui; + } + emu_set_CC_cs(regs, QR_c, QR_s); + return _fex; +} + +/* Load positive double */ +static int emu_lpdbr (struct pt_regs *regs, int rx, int ry) { + FP_DECL_D(DA); FP_DECL_D(DR); + FP_DECL_EX; + int mode; + + mode = current->thread.fp_regs.fpc & 3; + FP_UNPACK_DP(DA, ¤t->thread.fp_regs.fprs[ry].d); + if (DA_s != 0) { + FP_NEG_D(DR, DA); + FP_PACK_DP(¤t->thread.fp_regs.fprs[rx].d, DR); + } else + current->thread.fp_regs.fprs[rx].ui = + current->thread.fp_regs.fprs[ry].ui; + emu_set_CC_cs(regs, DR_c, DR_s); + return _fex; +} + +/* Load positive float */ +static int emu_lpebr (struct pt_regs *regs, int rx, int ry) { + FP_DECL_S(SA); FP_DECL_S(SR); + FP_DECL_EX; + int mode; + + mode = current->thread.fp_regs.fpc & 3; + FP_UNPACK_SP(SA, ¤t->thread.fp_regs.fprs[ry].f); + if (SA_s != 0) { + FP_NEG_S(SR, SA); + FP_PACK_SP(¤t->thread.fp_regs.fprs[rx].f, SR); + } else + current->thread.fp_regs.fprs[rx].ui = + current->thread.fp_regs.fprs[ry].ui; + emu_set_CC_cs(regs, SR_c, SR_s); + return _fex; +} + +/* Load rounded long double to double */ +static int emu_ldxbr (struct pt_regs *regs, int rx, int ry) { + FP_DECL_Q(QA); FP_DECL_D(DR); + FP_DECL_EX; + mathemu_ldcv cvt; + int mode; + + mode = current->thread.fp_regs.fpc & 3; + cvt.w.high = current->thread.fp_regs.fprs[ry].ui; + cvt.w.low = current->thread.fp_regs.fprs[ry+2].ui; + FP_UNPACK_QP(QA, &cvt.ld); + FP_CONV (D, Q, 2, 4, DR, QA); + FP_PACK_DP(¤t->thread.fp_regs.fprs[rx].f, DR); + return _fex; +} + +/* Load rounded long double to float */ +static int emu_lexbr (struct pt_regs *regs, int rx, int ry) { + FP_DECL_Q(QA); FP_DECL_S(SR); + FP_DECL_EX; + mathemu_ldcv cvt; + int mode; + + mode = current->thread.fp_regs.fpc & 3; + cvt.w.high = current->thread.fp_regs.fprs[ry].ui; + cvt.w.low = current->thread.fp_regs.fprs[ry+2].ui; + FP_UNPACK_QP(QA, &cvt.ld); + FP_CONV (S, Q, 1, 4, SR, QA); + FP_PACK_SP(¤t->thread.fp_regs.fprs[rx].f, SR); + return _fex; +} + +/* Load rounded double to float */ +static int emu_ledbr (struct pt_regs *regs, int rx, int ry) { + FP_DECL_D(DA); FP_DECL_S(SR); + FP_DECL_EX; + int mode; + + mode = current->thread.fp_regs.fpc & 3; + FP_UNPACK_DP(DA, ¤t->thread.fp_regs.fprs[ry].d); + FP_CONV (S, D, 1, 2, SR, DA); + FP_PACK_SP(¤t->thread.fp_regs.fprs[rx].f, SR); + return _fex; +} + +/* Multiply long double */ +static int emu_mxbr (struct pt_regs *regs, int rx, int ry) { + FP_DECL_Q(QA); FP_DECL_Q(QB); FP_DECL_Q(QR); + FP_DECL_EX; + mathemu_ldcv cvt; + int mode; + + mode = current->thread.fp_regs.fpc & 3; + cvt.w.high = current->thread.fp_regs.fprs[rx].ui; + cvt.w.low = current->thread.fp_regs.fprs[rx+2].ui; + FP_UNPACK_QP(QA, &cvt.ld); + cvt.w.high = current->thread.fp_regs.fprs[ry].ui; + cvt.w.low = current->thread.fp_regs.fprs[ry+2].ui; + FP_UNPACK_QP(QB, &cvt.ld); + FP_MUL_Q(QR, QA, QB); + FP_PACK_QP(&cvt.ld, QR); + current->thread.fp_regs.fprs[rx].ui = cvt.w.high; + current->thread.fp_regs.fprs[rx+2].ui = cvt.w.low; + return _fex; +} + +/* Multiply double */ +static int emu_mdbr (struct pt_regs *regs, int rx, int ry) { + FP_DECL_D(DA); FP_DECL_D(DB); FP_DECL_D(DR); + FP_DECL_EX; + int mode; + + mode = current->thread.fp_regs.fpc & 3; + FP_UNPACK_DP(DA, ¤t->thread.fp_regs.fprs[rx].d); + FP_UNPACK_DP(DB, ¤t->thread.fp_regs.fprs[ry].d); + FP_MUL_D(DR, DA, DB); + FP_PACK_DP(¤t->thread.fp_regs.fprs[rx].d, DR); + return _fex; +} + +/* Multiply double */ +static int emu_mdb (struct pt_regs *regs, int rx, double *val) { + FP_DECL_D(DA); FP_DECL_D(DB); FP_DECL_D(DR); + FP_DECL_EX; + int mode; + + mode = current->thread.fp_regs.fpc & 3; + FP_UNPACK_DP(DA, ¤t->thread.fp_regs.fprs[rx].d); + FP_UNPACK_DP(DB, val); + FP_MUL_D(DR, DA, DB); + FP_PACK_DP(¤t->thread.fp_regs.fprs[rx].d, DR); + return _fex; +} + +/* Multiply double to long double */ +static int emu_mxdbr (struct pt_regs *regs, int rx, int ry) { + FP_DECL_D(DA); FP_DECL_Q(QA); FP_DECL_Q(QB); FP_DECL_Q(QR); + FP_DECL_EX; + mathemu_ldcv cvt; + int mode; + + mode = current->thread.fp_regs.fpc & 3; + FP_UNPACK_DP(DA, ¤t->thread.fp_regs.fprs[rx].d); + FP_CONV (Q, D, 4, 2, QA, DA); + FP_UNPACK_DP(DA, ¤t->thread.fp_regs.fprs[ry].d); + FP_CONV (Q, D, 4, 2, QB, DA); + FP_MUL_Q(QR, QA, QB); + FP_PACK_QP(&cvt.ld, QR); + current->thread.fp_regs.fprs[rx].ui = cvt.w.high; + current->thread.fp_regs.fprs[rx+2].ui = cvt.w.low; + return _fex; +} + +/* Multiply double to long double */ +static int emu_mxdb (struct pt_regs *regs, int rx, long double *val) { + FP_DECL_Q(QA); FP_DECL_Q(QB); FP_DECL_Q(QR); + FP_DECL_EX; + mathemu_ldcv cvt; + int mode; + + mode = current->thread.fp_regs.fpc & 3; + cvt.w.high = current->thread.fp_regs.fprs[rx].ui; + cvt.w.low = current->thread.fp_regs.fprs[rx+2].ui; + FP_UNPACK_QP(QA, &cvt.ld); + FP_UNPACK_QP(QB, val); + FP_MUL_Q(QR, QA, QB); + FP_PACK_QP(&cvt.ld, QR); + current->thread.fp_regs.fprs[rx].ui = cvt.w.high; + current->thread.fp_regs.fprs[rx+2].ui = cvt.w.low; + return _fex; +} + +/* Multiply float */ +static int emu_meebr (struct pt_regs *regs, int rx, int ry) { + FP_DECL_S(SA); FP_DECL_S(SB); FP_DECL_S(SR); + FP_DECL_EX; + int mode; + + mode = current->thread.fp_regs.fpc & 3; + FP_UNPACK_SP(SA, ¤t->thread.fp_regs.fprs[rx].f); + FP_UNPACK_SP(SB, ¤t->thread.fp_regs.fprs[ry].f); + FP_MUL_S(SR, SA, SB); + FP_PACK_SP(¤t->thread.fp_regs.fprs[rx].f, SR); + return _fex; +} + +/* Multiply float */ +static int emu_meeb (struct pt_regs *regs, int rx, float *val) { + FP_DECL_S(SA); FP_DECL_S(SB); FP_DECL_S(SR); + FP_DECL_EX; + int mode; + + mode = current->thread.fp_regs.fpc & 3; + FP_UNPACK_SP(SA, ¤t->thread.fp_regs.fprs[rx].f); + FP_UNPACK_SP(SB, val); + FP_MUL_S(SR, SA, SB); + FP_PACK_SP(¤t->thread.fp_regs.fprs[rx].f, SR); + return _fex; +} + +/* Multiply float to double */ +static int emu_mdebr (struct pt_regs *regs, int rx, int ry) { + FP_DECL_S(SA); FP_DECL_D(DA); FP_DECL_D(DB); FP_DECL_D(DR); + FP_DECL_EX; + int mode; + + mode = current->thread.fp_regs.fpc & 3; + FP_UNPACK_SP(SA, ¤t->thread.fp_regs.fprs[rx].f); + FP_CONV (D, S, 2, 1, DA, SA); + FP_UNPACK_SP(SA, ¤t->thread.fp_regs.fprs[ry].f); + FP_CONV (D, S, 2, 1, DB, SA); + FP_MUL_D(DR, DA, DB); + FP_PACK_DP(¤t->thread.fp_regs.fprs[rx].d, DR); + return _fex; +} + +/* Multiply float to double */ +static int emu_mdeb (struct pt_regs *regs, int rx, float *val) { + FP_DECL_S(SA); FP_DECL_D(DA); FP_DECL_D(DB); FP_DECL_D(DR); + FP_DECL_EX; + int mode; + + mode = current->thread.fp_regs.fpc & 3; + FP_UNPACK_SP(SA, ¤t->thread.fp_regs.fprs[rx].f); + FP_CONV (D, S, 2, 1, DA, SA); + FP_UNPACK_SP(SA, val); + FP_CONV (D, S, 2, 1, DB, SA); + FP_MUL_D(DR, DA, DB); + FP_PACK_DP(¤t->thread.fp_regs.fprs[rx].d, DR); + return _fex; +} + +/* Multiply and add double */ +static int emu_madbr (struct pt_regs *regs, int rx, int ry, int rz) { + FP_DECL_D(DA); FP_DECL_D(DB); FP_DECL_D(DC); FP_DECL_D(DR); + FP_DECL_EX; + int mode; + + mode = current->thread.fp_regs.fpc & 3; + FP_UNPACK_DP(DA, ¤t->thread.fp_regs.fprs[rx].d); + FP_UNPACK_DP(DB, ¤t->thread.fp_regs.fprs[ry].d); + FP_UNPACK_DP(DC, ¤t->thread.fp_regs.fprs[rz].d); + FP_MUL_D(DR, DA, DB); + FP_ADD_D(DR, DR, DC); + FP_PACK_DP(¤t->thread.fp_regs.fprs[rz].d, DR); + return _fex; +} + +/* Multiply and add double */ +static int emu_madb (struct pt_regs *regs, int rx, double *val, int rz) { + FP_DECL_D(DA); FP_DECL_D(DB); FP_DECL_D(DC); FP_DECL_D(DR); + FP_DECL_EX; + int mode; + + mode = current->thread.fp_regs.fpc & 3; + FP_UNPACK_DP(DA, ¤t->thread.fp_regs.fprs[rx].d); + FP_UNPACK_DP(DB, val); + FP_UNPACK_DP(DC, ¤t->thread.fp_regs.fprs[rz].d); + FP_MUL_D(DR, DA, DB); + FP_ADD_D(DR, DR, DC); + FP_PACK_DP(¤t->thread.fp_regs.fprs[rz].d, DR); + return _fex; +} + +/* Multiply and add float */ +static int emu_maebr (struct pt_regs *regs, int rx, int ry, int rz) { + FP_DECL_S(SA); FP_DECL_S(SB); FP_DECL_S(SC); FP_DECL_S(SR); + FP_DECL_EX; + int mode; + + mode = current->thread.fp_regs.fpc & 3; + FP_UNPACK_SP(SA, ¤t->thread.fp_regs.fprs[rx].f); + FP_UNPACK_SP(SB, ¤t->thread.fp_regs.fprs[ry].f); + FP_UNPACK_SP(SC, ¤t->thread.fp_regs.fprs[rz].f); + FP_MUL_S(SR, SA, SB); + FP_ADD_S(SR, SR, SC); + FP_PACK_SP(¤t->thread.fp_regs.fprs[rz].f, SR); + return _fex; +} + +/* Multiply and add float */ +static int emu_maeb (struct pt_regs *regs, int rx, float *val, int rz) { + FP_DECL_S(SA); FP_DECL_S(SB); FP_DECL_S(SC); FP_DECL_S(SR); + FP_DECL_EX; + int mode; + + mode = current->thread.fp_regs.fpc & 3; + FP_UNPACK_SP(SA, ¤t->thread.fp_regs.fprs[rx].f); + FP_UNPACK_SP(SB, val); + FP_UNPACK_SP(SC, ¤t->thread.fp_regs.fprs[rz].f); + FP_MUL_S(SR, SA, SB); + FP_ADD_S(SR, SR, SC); + FP_PACK_SP(¤t->thread.fp_regs.fprs[rz].f, SR); + return _fex; +} + +/* Multiply and subtract double */ +static int emu_msdbr (struct pt_regs *regs, int rx, int ry, int rz) { + FP_DECL_D(DA); FP_DECL_D(DB); FP_DECL_D(DC); FP_DECL_D(DR); + FP_DECL_EX; + int mode; + + mode = current->thread.fp_regs.fpc & 3; + FP_UNPACK_DP(DA, ¤t->thread.fp_regs.fprs[rx].d); + FP_UNPACK_DP(DB, ¤t->thread.fp_regs.fprs[ry].d); + FP_UNPACK_DP(DC, ¤t->thread.fp_regs.fprs[rz].d); + FP_MUL_D(DR, DA, DB); + FP_SUB_D(DR, DR, DC); + FP_PACK_DP(¤t->thread.fp_regs.fprs[rz].d, DR); + return _fex; +} + +/* Multiply and subtract double */ +static int emu_msdb (struct pt_regs *regs, int rx, double *val, int rz) { + FP_DECL_D(DA); FP_DECL_D(DB); FP_DECL_D(DC); FP_DECL_D(DR); + FP_DECL_EX; + int mode; + + mode = current->thread.fp_regs.fpc & 3; + FP_UNPACK_DP(DA, ¤t->thread.fp_regs.fprs[rx].d); + FP_UNPACK_DP(DB, val); + FP_UNPACK_DP(DC, ¤t->thread.fp_regs.fprs[rz].d); + FP_MUL_D(DR, DA, DB); + FP_SUB_D(DR, DR, DC); + FP_PACK_DP(¤t->thread.fp_regs.fprs[rz].d, DR); + return _fex; +} + +/* Multiply and subtract float */ +static int emu_msebr (struct pt_regs *regs, int rx, int ry, int rz) { + FP_DECL_S(SA); FP_DECL_S(SB); FP_DECL_S(SC); FP_DECL_S(SR); + FP_DECL_EX; + int mode; + + mode = current->thread.fp_regs.fpc & 3; + FP_UNPACK_SP(SA, ¤t->thread.fp_regs.fprs[rx].f); + FP_UNPACK_SP(SB, ¤t->thread.fp_regs.fprs[ry].f); + FP_UNPACK_SP(SC, ¤t->thread.fp_regs.fprs[rz].f); + FP_MUL_S(SR, SA, SB); + FP_SUB_S(SR, SR, SC); + FP_PACK_SP(¤t->thread.fp_regs.fprs[rz].f, SR); + return _fex; +} + +/* Multiply and subtract float */ +static int emu_mseb (struct pt_regs *regs, int rx, float *val, int rz) { + FP_DECL_S(SA); FP_DECL_S(SB); FP_DECL_S(SC); FP_DECL_S(SR); + FP_DECL_EX; + int mode; + + mode = current->thread.fp_regs.fpc & 3; + FP_UNPACK_SP(SA, ¤t->thread.fp_regs.fprs[rx].f); + FP_UNPACK_SP(SB, val); + FP_UNPACK_SP(SC, ¤t->thread.fp_regs.fprs[rz].f); + FP_MUL_S(SR, SA, SB); + FP_SUB_S(SR, SR, SC); + FP_PACK_SP(¤t->thread.fp_regs.fprs[rz].f, SR); + return _fex; +} + +/* Set floating point control word */ +static int emu_sfpc (struct pt_regs *regs, int rx, int ry) { + __u32 temp; + + temp = regs->gprs[rx]; + if ((temp & ~FPC_VALID_MASK) != 0) + return SIGILL; + current->thread.fp_regs.fpc = temp; + return 0; +} + +/* Square root long double */ +static int emu_sqxbr (struct pt_regs *regs, int rx, int ry) { + FP_DECL_Q(QA); FP_DECL_Q(QR); + FP_DECL_EX; + mathemu_ldcv cvt; + int mode; + + mode = current->thread.fp_regs.fpc & 3; + cvt.w.high = current->thread.fp_regs.fprs[ry].ui; + cvt.w.low = current->thread.fp_regs.fprs[ry+2].ui; + FP_UNPACK_QP(QA, &cvt.ld); + FP_SQRT_Q(QR, QA); + FP_PACK_QP(&cvt.ld, QR); + current->thread.fp_regs.fprs[rx].ui = cvt.w.high; + current->thread.fp_regs.fprs[rx+2].ui = cvt.w.low; + emu_set_CC_cs(regs, QR_c, QR_s); + return _fex; +} + +/* Square root double */ +static int emu_sqdbr (struct pt_regs *regs, int rx, int ry) { + FP_DECL_D(DA); FP_DECL_D(DR); + FP_DECL_EX; + int mode; + + mode = current->thread.fp_regs.fpc & 3; + FP_UNPACK_DP(DA, ¤t->thread.fp_regs.fprs[ry].d); + FP_SQRT_D(DR, DA); + FP_PACK_DP(¤t->thread.fp_regs.fprs[rx].d, DR); + emu_set_CC_cs(regs, DR_c, DR_s); + return _fex; +} + +/* Square root double */ +static int emu_sqdb (struct pt_regs *regs, int rx, double *val) { + FP_DECL_D(DA); FP_DECL_D(DR); + FP_DECL_EX; + int mode; + + mode = current->thread.fp_regs.fpc & 3; + FP_UNPACK_DP(DA, val); + FP_SQRT_D(DR, DA); + FP_PACK_DP(¤t->thread.fp_regs.fprs[rx].d, DR); + emu_set_CC_cs(regs, DR_c, DR_s); + return _fex; +} + +/* Square root float */ +static int emu_sqebr (struct pt_regs *regs, int rx, int ry) { + FP_DECL_S(SA); FP_DECL_S(SR); + FP_DECL_EX; + int mode; + + mode = current->thread.fp_regs.fpc & 3; + FP_UNPACK_SP(SA, ¤t->thread.fp_regs.fprs[ry].f); + FP_SQRT_S(SR, SA); + FP_PACK_SP(¤t->thread.fp_regs.fprs[rx].f, SR); + emu_set_CC_cs(regs, SR_c, SR_s); + return _fex; +} + +/* Square root float */ +static int emu_sqeb (struct pt_regs *regs, int rx, float *val) { + FP_DECL_S(SA); FP_DECL_S(SR); + FP_DECL_EX; + int mode; + + mode = current->thread.fp_regs.fpc & 3; + FP_UNPACK_SP(SA, val); + FP_SQRT_S(SR, SA); + FP_PACK_SP(¤t->thread.fp_regs.fprs[rx].f, SR); + emu_set_CC_cs(regs, SR_c, SR_s); + return _fex; +} + +/* Subtract long double */ +static int emu_sxbr (struct pt_regs *regs, int rx, int ry) { + FP_DECL_Q(QA); FP_DECL_Q(QB); FP_DECL_Q(QR); + FP_DECL_EX; + mathemu_ldcv cvt; + int mode; + + mode = current->thread.fp_regs.fpc & 3; + cvt.w.high = current->thread.fp_regs.fprs[rx].ui; + cvt.w.low = current->thread.fp_regs.fprs[rx+2].ui; + FP_UNPACK_QP(QA, &cvt.ld); + cvt.w.high = current->thread.fp_regs.fprs[ry].ui; + cvt.w.low = current->thread.fp_regs.fprs[ry+2].ui; + FP_UNPACK_QP(QB, &cvt.ld); + FP_SUB_Q(QR, QA, QB); + FP_PACK_QP(&cvt.ld, QR); + current->thread.fp_regs.fprs[rx].ui = cvt.w.high; + current->thread.fp_regs.fprs[rx+2].ui = cvt.w.low; + emu_set_CC_cs(regs, QR_c, QR_s); + return _fex; +} + +/* Subtract double */ +static int emu_sdbr (struct pt_regs *regs, int rx, int ry) { + FP_DECL_D(DA); FP_DECL_D(DB); FP_DECL_D(DR); + FP_DECL_EX; + int mode; + + mode = current->thread.fp_regs.fpc & 3; + FP_UNPACK_DP(DA, ¤t->thread.fp_regs.fprs[rx].d); + FP_UNPACK_DP(DB, ¤t->thread.fp_regs.fprs[ry].d); + FP_SUB_D(DR, DA, DB); + FP_PACK_DP(¤t->thread.fp_regs.fprs[rx].d, DR); + emu_set_CC_cs(regs, DR_c, DR_s); + return _fex; +} + +/* Subtract double */ +static int emu_sdb (struct pt_regs *regs, int rx, double *val) { + FP_DECL_D(DA); FP_DECL_D(DB); FP_DECL_D(DR); + FP_DECL_EX; + int mode; + + mode = current->thread.fp_regs.fpc & 3; + FP_UNPACK_DP(DA, ¤t->thread.fp_regs.fprs[rx].d); + FP_UNPACK_DP(DB, val); + FP_SUB_D(DR, DA, DB); + FP_PACK_DP(¤t->thread.fp_regs.fprs[rx].d, DR); + emu_set_CC_cs(regs, DR_c, DR_s); + return _fex; +} + +/* Subtract float */ +static int emu_sebr (struct pt_regs *regs, int rx, int ry) { + FP_DECL_S(SA); FP_DECL_S(SB); FP_DECL_S(SR); + FP_DECL_EX; + int mode; + + mode = current->thread.fp_regs.fpc & 3; + FP_UNPACK_SP(SA, ¤t->thread.fp_regs.fprs[rx].f); + FP_UNPACK_SP(SB, ¤t->thread.fp_regs.fprs[ry].f); + FP_SUB_S(SR, SA, SB); + FP_PACK_SP(¤t->thread.fp_regs.fprs[rx].f, SR); + emu_set_CC_cs(regs, SR_c, SR_s); + return _fex; +} + +/* Subtract float */ +static int emu_seb (struct pt_regs *regs, int rx, float *val) { + FP_DECL_S(SA); FP_DECL_S(SB); FP_DECL_S(SR); + FP_DECL_EX; + int mode; + + mode = current->thread.fp_regs.fpc & 3; + FP_UNPACK_SP(SA, ¤t->thread.fp_regs.fprs[rx].f); + FP_UNPACK_SP(SB, val); + FP_SUB_S(SR, SA, SB); + FP_PACK_SP(¤t->thread.fp_regs.fprs[rx].f, SR); + emu_set_CC_cs(regs, SR_c, SR_s); + return _fex; +} + +/* Test data class long double */ +static int emu_tcxb (struct pt_regs *regs, int rx, long val) { + FP_DECL_Q(QA); + mathemu_ldcv cvt; + int bit; + + cvt.w.high = current->thread.fp_regs.fprs[rx].ui; + cvt.w.low = current->thread.fp_regs.fprs[rx+2].ui; + FP_UNPACK_RAW_QP(QA, &cvt.ld); + switch (QA_e) { + default: + bit = 8; /* normalized number */ + break; + case 0: + if (_FP_FRAC_ZEROP_4(QA)) + bit = 10; /* zero */ + else + bit = 6; /* denormalized number */ + break; + case _FP_EXPMAX_Q: + if (_FP_FRAC_ZEROP_4(QA)) + bit = 4; /* infinity */ + else if (_FP_FRAC_HIGH_RAW_Q(QA) & _FP_QNANBIT_Q) + bit = 2; /* quiet NAN */ + else + bit = 0; /* signaling NAN */ + break; + } + if (!QA_s) + bit++; + emu_set_CC(regs, ((__u32) val >> bit) & 1); + return 0; +} + +/* Test data class double */ +static int emu_tcdb (struct pt_regs *regs, int rx, long val) { + FP_DECL_D(DA); + int bit; + + FP_UNPACK_RAW_DP(DA, ¤t->thread.fp_regs.fprs[rx].d); + switch (DA_e) { + default: + bit = 8; /* normalized number */ + break; + case 0: + if (_FP_FRAC_ZEROP_2(DA)) + bit = 10; /* zero */ + else + bit = 6; /* denormalized number */ + break; + case _FP_EXPMAX_D: + if (_FP_FRAC_ZEROP_2(DA)) + bit = 4; /* infinity */ + else if (_FP_FRAC_HIGH_RAW_D(DA) & _FP_QNANBIT_D) + bit = 2; /* quiet NAN */ + else + bit = 0; /* signaling NAN */ + break; + } + if (!DA_s) + bit++; + emu_set_CC(regs, ((__u32) val >> bit) & 1); + return 0; +} + +/* Test data class float */ +static int emu_tceb (struct pt_regs *regs, int rx, long val) { + FP_DECL_S(SA); + int bit; + + FP_UNPACK_RAW_SP(SA, ¤t->thread.fp_regs.fprs[rx].f); + switch (SA_e) { + default: + bit = 8; /* normalized number */ + break; + case 0: + if (_FP_FRAC_ZEROP_1(SA)) + bit = 10; /* zero */ + else + bit = 6; /* denormalized number */ + break; + case _FP_EXPMAX_S: + if (_FP_FRAC_ZEROP_1(SA)) + bit = 4; /* infinity */ + else if (_FP_FRAC_HIGH_RAW_S(SA) & _FP_QNANBIT_S) + bit = 2; /* quiet NAN */ + else + bit = 0; /* signaling NAN */ + break; + } + if (!SA_s) + bit++; + emu_set_CC(regs, ((__u32) val >> bit) & 1); + return 0; +} + +static inline void emu_load_regd(int reg) { + if ((reg&9) != 0) /* test if reg in {0,2,4,6} */ + return; + asm volatile( /* load reg from fp_regs.fprs[reg] */ + " bras 1,0f\n" + " ld 0,0(%1)\n" + "0: ex %0,0(1)" + : /* no output */ + : "a" (reg<<4),"a" (¤t->thread.fp_regs.fprs[reg].d) + : "1"); +} + +static inline void emu_load_rege(int reg) { + if ((reg&9) != 0) /* test if reg in {0,2,4,6} */ + return; + asm volatile( /* load reg from fp_regs.fprs[reg] */ + " bras 1,0f\n" + " le 0,0(%1)\n" + "0: ex %0,0(1)" + : /* no output */ + : "a" (reg<<4), "a" (¤t->thread.fp_regs.fprs[reg].f) + : "1"); +} + +static inline void emu_store_regd(int reg) { + if ((reg&9) != 0) /* test if reg in {0,2,4,6} */ + return; + asm volatile( /* store reg to fp_regs.fprs[reg] */ + " bras 1,0f\n" + " std 0,0(%1)\n" + "0: ex %0,0(1)" + : /* no output */ + : "a" (reg<<4), "a" (¤t->thread.fp_regs.fprs[reg].d) + : "1"); +} + + +static inline void emu_store_rege(int reg) { + if ((reg&9) != 0) /* test if reg in {0,2,4,6} */ + return; + asm volatile( /* store reg to fp_regs.fprs[reg] */ + " bras 1,0f\n" + " ste 0,0(%1)\n" + "0: ex %0,0(1)" + : /* no output */ + : "a" (reg<<4), "a" (¤t->thread.fp_regs.fprs[reg].f) + : "1"); +} + +int math_emu_b3(__u8 *opcode, struct pt_regs * regs) { + int _fex = 0; + static const __u8 format_table[256] = { + [0x00] = 0x03,[0x01] = 0x03,[0x02] = 0x03,[0x03] = 0x03, + [0x04] = 0x0f,[0x05] = 0x0d,[0x06] = 0x0e,[0x07] = 0x0d, + [0x08] = 0x03,[0x09] = 0x03,[0x0a] = 0x03,[0x0b] = 0x03, + [0x0c] = 0x0f,[0x0d] = 0x03,[0x0e] = 0x06,[0x0f] = 0x06, + [0x10] = 0x02,[0x11] = 0x02,[0x12] = 0x02,[0x13] = 0x02, + [0x14] = 0x03,[0x15] = 0x02,[0x16] = 0x01,[0x17] = 0x03, + [0x18] = 0x02,[0x19] = 0x02,[0x1a] = 0x02,[0x1b] = 0x02, + [0x1c] = 0x02,[0x1d] = 0x02,[0x1e] = 0x05,[0x1f] = 0x05, + [0x40] = 0x01,[0x41] = 0x01,[0x42] = 0x01,[0x43] = 0x01, + [0x44] = 0x12,[0x45] = 0x0d,[0x46] = 0x11,[0x47] = 0x04, + [0x48] = 0x01,[0x49] = 0x01,[0x4a] = 0x01,[0x4b] = 0x01, + [0x4c] = 0x01,[0x4d] = 0x01,[0x53] = 0x06,[0x57] = 0x06, + [0x5b] = 0x05,[0x5f] = 0x05,[0x84] = 0x13,[0x8c] = 0x13, + [0x94] = 0x09,[0x95] = 0x08,[0x96] = 0x07,[0x98] = 0x0c, + [0x99] = 0x0b,[0x9a] = 0x0a + }; + static const void *jump_table[256]= { + [0x00] = emu_lpebr,[0x01] = emu_lnebr,[0x02] = emu_ltebr, + [0x03] = emu_lcebr,[0x04] = emu_ldebr,[0x05] = emu_lxdbr, + [0x06] = emu_lxebr,[0x07] = emu_mxdbr,[0x08] = emu_kebr, + [0x09] = emu_cebr, [0x0a] = emu_aebr, [0x0b] = emu_sebr, + [0x0c] = emu_mdebr,[0x0d] = emu_debr, [0x0e] = emu_maebr, + [0x0f] = emu_msebr,[0x10] = emu_lpdbr,[0x11] = emu_lndbr, + [0x12] = emu_ltdbr,[0x13] = emu_lcdbr,[0x14] = emu_sqebr, + [0x15] = emu_sqdbr,[0x16] = emu_sqxbr,[0x17] = emu_meebr, + [0x18] = emu_kdbr, [0x19] = emu_cdbr, [0x1a] = emu_adbr, + [0x1b] = emu_sdbr, [0x1c] = emu_mdbr, [0x1d] = emu_ddbr, + [0x1e] = emu_madbr,[0x1f] = emu_msdbr,[0x40] = emu_lpxbr, + [0x41] = emu_lnxbr,[0x42] = emu_ltxbr,[0x43] = emu_lcxbr, + [0x44] = emu_ledbr,[0x45] = emu_ldxbr,[0x46] = emu_lexbr, + [0x47] = emu_fixbr,[0x48] = emu_kxbr, [0x49] = emu_cxbr, + [0x4a] = emu_axbr, [0x4b] = emu_sxbr, [0x4c] = emu_mxbr, + [0x4d] = emu_dxbr, [0x53] = emu_diebr,[0x57] = emu_fiebr, + [0x5b] = emu_didbr,[0x5f] = emu_fidbr,[0x84] = emu_sfpc, + [0x8c] = emu_efpc, [0x94] = emu_cefbr,[0x95] = emu_cdfbr, + [0x96] = emu_cxfbr,[0x98] = emu_cfebr,[0x99] = emu_cfdbr, + [0x9a] = emu_cfxbr + }; + + switch (format_table[opcode[1]]) { + case 1: /* RRE format, long double operation */ + if (opcode[3] & 0x22) + return SIGILL; + emu_store_regd((opcode[3] >> 4) & 15); + emu_store_regd(((opcode[3] >> 4) & 15) + 2); + emu_store_regd(opcode[3] & 15); + emu_store_regd((opcode[3] & 15) + 2); + /* call the emulation function */ + _fex = ((int (*)(struct pt_regs *,int, int)) + jump_table[opcode[1]]) + (regs, opcode[3] >> 4, opcode[3] & 15); + emu_load_regd((opcode[3] >> 4) & 15); + emu_load_regd(((opcode[3] >> 4) & 15) + 2); + emu_load_regd(opcode[3] & 15); + emu_load_regd((opcode[3] & 15) + 2); + break; + case 2: /* RRE format, double operation */ + emu_store_regd((opcode[3] >> 4) & 15); + emu_store_regd(opcode[3] & 15); + /* call the emulation function */ + _fex = ((int (*)(struct pt_regs *, int, int)) + jump_table[opcode[1]]) + (regs, opcode[3] >> 4, opcode[3] & 15); + emu_load_regd((opcode[3] >> 4) & 15); + emu_load_regd(opcode[3] & 15); + break; + case 3: /* RRE format, float operation */ + emu_store_rege((opcode[3] >> 4) & 15); + emu_store_rege(opcode[3] & 15); + /* call the emulation function */ + _fex = ((int (*)(struct pt_regs *, int, int)) + jump_table[opcode[1]]) + (regs, opcode[3] >> 4, opcode[3] & 15); + emu_load_rege((opcode[3] >> 4) & 15); + emu_load_rege(opcode[3] & 15); + break; + case 4: /* RRF format, long double operation */ + if (opcode[3] & 0x22) + return SIGILL; + emu_store_regd((opcode[3] >> 4) & 15); + emu_store_regd(((opcode[3] >> 4) & 15) + 2); + emu_store_regd(opcode[3] & 15); + emu_store_regd((opcode[3] & 15) + 2); + /* call the emulation function */ + _fex = ((int (*)(struct pt_regs *, int, int, int)) + jump_table[opcode[1]]) + (regs, opcode[3] >> 4, opcode[3] & 15, opcode[2] >> 4); + emu_load_regd((opcode[3] >> 4) & 15); + emu_load_regd(((opcode[3] >> 4) & 15) + 2); + emu_load_regd(opcode[3] & 15); + emu_load_regd((opcode[3] & 15) + 2); + break; + case 5: /* RRF format, double operation */ + emu_store_regd((opcode[2] >> 4) & 15); + emu_store_regd((opcode[3] >> 4) & 15); + emu_store_regd(opcode[3] & 15); + /* call the emulation function */ + _fex = ((int (*)(struct pt_regs *, int, int, int)) + jump_table[opcode[1]]) + (regs, opcode[3] >> 4, opcode[3] & 15, opcode[2] >> 4); + emu_load_regd((opcode[2] >> 4) & 15); + emu_load_regd((opcode[3] >> 4) & 15); + emu_load_regd(opcode[3] & 15); + break; + case 6: /* RRF format, float operation */ + emu_store_rege((opcode[2] >> 4) & 15); + emu_store_rege((opcode[3] >> 4) & 15); + emu_store_rege(opcode[3] & 15); + /* call the emulation function */ + _fex = ((int (*)(struct pt_regs *, int, int, int)) + jump_table[opcode[1]]) + (regs, opcode[3] >> 4, opcode[3] & 15, opcode[2] >> 4); + emu_load_rege((opcode[2] >> 4) & 15); + emu_load_rege((opcode[3] >> 4) & 15); + emu_load_rege(opcode[3] & 15); + break; + case 7: /* RRE format, cxfbr instruction */ + /* call the emulation function */ + if (opcode[3] & 0x20) + return SIGILL; + _fex = ((int (*)(struct pt_regs *, int, int)) + jump_table[opcode[1]]) + (regs, opcode[3] >> 4, opcode[3] & 15); + emu_load_regd((opcode[3] >> 4) & 15); + emu_load_regd(((opcode[3] >> 4) & 15) + 2); + break; + case 8: /* RRE format, cdfbr instruction */ + /* call the emulation function */ + _fex = ((int (*)(struct pt_regs *, int, int)) + jump_table[opcode[1]]) + (regs, opcode[3] >> 4, opcode[3] & 15); + emu_load_regd((opcode[3] >> 4) & 15); + break; + case 9: /* RRE format, cefbr instruction */ + /* call the emulation function */ + _fex = ((int (*)(struct pt_regs *, int, int)) + jump_table[opcode[1]]) + (regs, opcode[3] >> 4, opcode[3] & 15); + emu_load_rege((opcode[3] >> 4) & 15); + break; + case 10: /* RRF format, cfxbr instruction */ + if ((opcode[2] & 128) == 128 || (opcode[2] & 96) == 32) + /* mask of { 2,3,8-15 } is invalid */ + return SIGILL; + if (opcode[3] & 2) + return SIGILL; + emu_store_regd(opcode[3] & 15); + emu_store_regd((opcode[3] & 15) + 2); + /* call the emulation function */ + _fex = ((int (*)(struct pt_regs *, int, int, int)) + jump_table[opcode[1]]) + (regs, opcode[3] >> 4, opcode[3] & 15, opcode[2] >> 4); + break; + case 11: /* RRF format, cfdbr instruction */ + if ((opcode[2] & 128) == 128 || (opcode[2] & 96) == 32) + /* mask of { 2,3,8-15 } is invalid */ + return SIGILL; + emu_store_regd(opcode[3] & 15); + /* call the emulation function */ + _fex = ((int (*)(struct pt_regs *, int, int, int)) + jump_table[opcode[1]]) + (regs, opcode[3] >> 4, opcode[3] & 15, opcode[2] >> 4); + break; + case 12: /* RRF format, cfebr instruction */ + if ((opcode[2] & 128) == 128 || (opcode[2] & 96) == 32) + /* mask of { 2,3,8-15 } is invalid */ + return SIGILL; + emu_store_rege(opcode[3] & 15); + /* call the emulation function */ + _fex = ((int (*)(struct pt_regs *, int, int, int)) + jump_table[opcode[1]]) + (regs, opcode[3] >> 4, opcode[3] & 15, opcode[2] >> 4); + break; + case 13: /* RRE format, ldxbr & mdxbr instruction */ + /* double store but long double load */ + if (opcode[3] & 0x20) + return SIGILL; + emu_store_regd((opcode[3] >> 4) & 15); + emu_store_regd(opcode[3] & 15); + /* call the emulation function */ + _fex = ((int (*)(struct pt_regs *, int, int)) + jump_table[opcode[1]]) + (regs, opcode[3] >> 4, opcode[3] & 15); + emu_load_regd((opcode[3] >> 4) & 15); + emu_load_regd(((opcode[3] >> 4) & 15) + 2); + break; + case 14: /* RRE format, ldxbr & mdxbr instruction */ + /* float store but long double load */ + if (opcode[3] & 0x20) + return SIGILL; + emu_store_rege((opcode[3] >> 4) & 15); + emu_store_rege(opcode[3] & 15); + /* call the emulation function */ + _fex = ((int (*)(struct pt_regs *, int, int)) + jump_table[opcode[1]]) + (regs, opcode[3] >> 4, opcode[3] & 15); + emu_load_regd((opcode[3] >> 4) & 15); + emu_load_regd(((opcode[3] >> 4) & 15) + 2); + break; + case 15: /* RRE format, ldebr & mdebr instruction */ + /* float store but double load */ + emu_store_rege((opcode[3] >> 4) & 15); + emu_store_rege(opcode[3] & 15); + /* call the emulation function */ + _fex = ((int (*)(struct pt_regs *, int, int)) + jump_table[opcode[1]]) + (regs, opcode[3] >> 4, opcode[3] & 15); + emu_load_regd((opcode[3] >> 4) & 15); + break; + case 16: /* RRE format, ldxbr instruction */ + /* long double store but double load */ + if (opcode[3] & 2) + return SIGILL; + emu_store_regd(opcode[3] & 15); + emu_store_regd((opcode[3] & 15) + 2); + /* call the emulation function */ + _fex = ((int (*)(struct pt_regs *, int, int)) + jump_table[opcode[1]]) + (regs, opcode[3] >> 4, opcode[3] & 15); + emu_load_regd((opcode[3] >> 4) & 15); + break; + case 17: /* RRE format, ldxbr instruction */ + /* long double store but float load */ + if (opcode[3] & 2) + return SIGILL; + emu_store_regd(opcode[3] & 15); + emu_store_regd((opcode[3] & 15) + 2); + /* call the emulation function */ + _fex = ((int (*)(struct pt_regs *, int, int)) + jump_table[opcode[1]]) + (regs, opcode[3] >> 4, opcode[3] & 15); + emu_load_rege((opcode[3] >> 4) & 15); + break; + case 18: /* RRE format, ledbr instruction */ + /* double store but float load */ + emu_store_regd(opcode[3] & 15); + /* call the emulation function */ + _fex = ((int (*)(struct pt_regs *, int, int)) + jump_table[opcode[1]]) + (regs, opcode[3] >> 4, opcode[3] & 15); + emu_load_rege((opcode[3] >> 4) & 15); + break; + case 19: /* RRE format, efpc & sfpc instruction */ + /* call the emulation function */ + _fex = ((int (*)(struct pt_regs *, int, int)) + jump_table[opcode[1]]) + (regs, opcode[3] >> 4, opcode[3] & 15); + break; + default: /* invalid operation */ + return SIGILL; + } + if (_fex != 0) { + current->thread.fp_regs.fpc |= _fex; + if (current->thread.fp_regs.fpc & (_fex << 8)) + return SIGFPE; + } + return 0; +} + +static void* calc_addr(struct pt_regs *regs, int rx, int rb, int disp) +{ + addr_t addr; + + rx &= 15; + rb &= 15; + addr = disp & 0xfff; + addr += (rx != 0) ? regs->gprs[rx] : 0; /* + index */ + addr += (rb != 0) ? regs->gprs[rb] : 0; /* + base */ + return (void*) addr; +} + +int math_emu_ed(__u8 *opcode, struct pt_regs * regs) { + int _fex = 0; + + static const __u8 format_table[256] = { + [0x04] = 0x06,[0x05] = 0x05,[0x06] = 0x07,[0x07] = 0x05, + [0x08] = 0x02,[0x09] = 0x02,[0x0a] = 0x02,[0x0b] = 0x02, + [0x0c] = 0x06,[0x0d] = 0x02,[0x0e] = 0x04,[0x0f] = 0x04, + [0x10] = 0x08,[0x11] = 0x09,[0x12] = 0x0a,[0x14] = 0x02, + [0x15] = 0x01,[0x17] = 0x02,[0x18] = 0x01,[0x19] = 0x01, + [0x1a] = 0x01,[0x1b] = 0x01,[0x1c] = 0x01,[0x1d] = 0x01, + [0x1e] = 0x03,[0x1f] = 0x03, + }; + static const void *jump_table[]= { + [0x04] = emu_ldeb,[0x05] = emu_lxdb,[0x06] = emu_lxeb, + [0x07] = emu_mxdb,[0x08] = emu_keb, [0x09] = emu_ceb, + [0x0a] = emu_aeb, [0x0b] = emu_seb, [0x0c] = emu_mdeb, + [0x0d] = emu_deb, [0x0e] = emu_maeb,[0x0f] = emu_mseb, + [0x10] = emu_tceb,[0x11] = emu_tcdb,[0x12] = emu_tcxb, + [0x14] = emu_sqeb,[0x15] = emu_sqdb,[0x17] = emu_meeb, + [0x18] = emu_kdb, [0x19] = emu_cdb, [0x1a] = emu_adb, + [0x1b] = emu_sdb, [0x1c] = emu_mdb, [0x1d] = emu_ddb, + [0x1e] = emu_madb,[0x1f] = emu_msdb + }; + + switch (format_table[opcode[5]]) { + case 1: /* RXE format, double constant */ { + __u64 *dxb, temp; + __u32 opc; + + emu_store_regd((opcode[1] >> 4) & 15); + opc = *((__u32 *) opcode); + dxb = (__u64 *) calc_addr(regs, opc >> 16, opc >> 12, opc); + mathemu_copy_from_user(&temp, dxb, 8); + /* call the emulation function */ + _fex = ((int (*)(struct pt_regs *, int, double *)) + jump_table[opcode[5]]) + (regs, opcode[1] >> 4, (double *) &temp); + emu_load_regd((opcode[1] >> 4) & 15); + break; + } + case 2: /* RXE format, float constant */ { + __u32 *dxb, temp; + __u32 opc; + + emu_store_rege((opcode[1] >> 4) & 15); + opc = *((__u32 *) opcode); + dxb = (__u32 *) calc_addr(regs, opc >> 16, opc >> 12, opc); + mathemu_get_user(temp, dxb); + /* call the emulation function */ + _fex = ((int (*)(struct pt_regs *, int, float *)) + jump_table[opcode[5]]) + (regs, opcode[1] >> 4, (float *) &temp); + emu_load_rege((opcode[1] >> 4) & 15); + break; + } + case 3: /* RXF format, double constant */ { + __u64 *dxb, temp; + __u32 opc; + + emu_store_regd((opcode[1] >> 4) & 15); + emu_store_regd((opcode[4] >> 4) & 15); + opc = *((__u32 *) opcode); + dxb = (__u64 *) calc_addr(regs, opc >> 16, opc >> 12, opc); + mathemu_copy_from_user(&temp, dxb, 8); + /* call the emulation function */ + _fex = ((int (*)(struct pt_regs *, int, double *, int)) + jump_table[opcode[5]]) + (regs, opcode[1] >> 4, (double *) &temp, opcode[4] >> 4); + emu_load_regd((opcode[1] >> 4) & 15); + break; + } + case 4: /* RXF format, float constant */ { + __u32 *dxb, temp; + __u32 opc; + + emu_store_rege((opcode[1] >> 4) & 15); + emu_store_rege((opcode[4] >> 4) & 15); + opc = *((__u32 *) opcode); + dxb = (__u32 *) calc_addr(regs, opc >> 16, opc >> 12, opc); + mathemu_get_user(temp, dxb); + /* call the emulation function */ + _fex = ((int (*)(struct pt_regs *, int, float *, int)) + jump_table[opcode[5]]) + (regs, opcode[1] >> 4, (float *) &temp, opcode[4] >> 4); + emu_load_rege((opcode[4] >> 4) & 15); + break; + } + case 5: /* RXE format, double constant */ + /* store double and load long double */ + { + __u64 *dxb, temp; + __u32 opc; + if ((opcode[1] >> 4) & 0x20) + return SIGILL; + emu_store_regd((opcode[1] >> 4) & 15); + opc = *((__u32 *) opcode); + dxb = (__u64 *) calc_addr(regs, opc >> 16, opc >> 12, opc); + mathemu_copy_from_user(&temp, dxb, 8); + /* call the emulation function */ + _fex = ((int (*)(struct pt_regs *, int, double *)) + jump_table[opcode[5]]) + (regs, opcode[1] >> 4, (double *) &temp); + emu_load_regd((opcode[1] >> 4) & 15); + emu_load_regd(((opcode[1] >> 4) & 15) + 2); + break; + } + case 6: /* RXE format, float constant */ + /* store float and load double */ + { + __u32 *dxb, temp; + __u32 opc; + emu_store_rege((opcode[1] >> 4) & 15); + opc = *((__u32 *) opcode); + dxb = (__u32 *) calc_addr(regs, opc >> 16, opc >> 12, opc); + mathemu_get_user(temp, dxb); + /* call the emulation function */ + _fex = ((int (*)(struct pt_regs *, int, float *)) + jump_table[opcode[5]]) + (regs, opcode[1] >> 4, (float *) &temp); + emu_load_regd((opcode[1] >> 4) & 15); + break; + } + case 7: /* RXE format, float constant */ + /* store float and load long double */ + { + __u32 *dxb, temp; + __u32 opc; + if ((opcode[1] >> 4) & 0x20) + return SIGILL; + emu_store_rege((opcode[1] >> 4) & 15); + opc = *((__u32 *) opcode); + dxb = (__u32 *) calc_addr(regs, opc >> 16, opc >> 12, opc); + mathemu_get_user(temp, dxb); + /* call the emulation function */ + _fex = ((int (*)(struct pt_regs *, int, float *)) + jump_table[opcode[5]]) + (regs, opcode[1] >> 4, (float *) &temp); + emu_load_regd((opcode[1] >> 4) & 15); + emu_load_regd(((opcode[1] >> 4) & 15) + 2); + break; + } + case 8: /* RXE format, RX address used as int value */ { + __u64 dxb; + __u32 opc; + + emu_store_rege((opcode[1] >> 4) & 15); + opc = *((__u32 *) opcode); + dxb = (__u64) calc_addr(regs, opc >> 16, opc >> 12, opc); + /* call the emulation function */ + _fex = ((int (*)(struct pt_regs *, int, long)) + jump_table[opcode[5]]) + (regs, opcode[1] >> 4, dxb); + break; + } + case 9: /* RXE format, RX address used as int value */ { + __u64 dxb; + __u32 opc; + + emu_store_regd((opcode[1] >> 4) & 15); + opc = *((__u32 *) opcode); + dxb = (__u64) calc_addr(regs, opc >> 16, opc >> 12, opc); + /* call the emulation function */ + _fex = ((int (*)(struct pt_regs *, int, long)) + jump_table[opcode[5]]) + (regs, opcode[1] >> 4, dxb); + break; + } + case 10: /* RXE format, RX address used as int value */ { + __u64 dxb; + __u32 opc; + + if ((opcode[1] >> 4) & 2) + return SIGILL; + emu_store_regd((opcode[1] >> 4) & 15); + emu_store_regd(((opcode[1] >> 4) & 15) + 2); + opc = *((__u32 *) opcode); + dxb = (__u64) calc_addr(regs, opc >> 16, opc >> 12, opc); + /* call the emulation function */ + _fex = ((int (*)(struct pt_regs *, int, long)) + jump_table[opcode[5]]) + (regs, opcode[1] >> 4, dxb); + break; + } + default: /* invalid operation */ + return SIGILL; + } + if (_fex != 0) { + current->thread.fp_regs.fpc |= _fex; + if (current->thread.fp_regs.fpc & (_fex << 8)) + return SIGFPE; + } + return 0; +} + +/* + * Emulate LDR Rx,Ry with Rx or Ry not in {0, 2, 4, 6} + */ +int math_emu_ldr(__u8 *opcode) { + s390_fp_regs *fp_regs = ¤t->thread.fp_regs; + __u16 opc = *((__u16 *) opcode); + + if ((opc & 0x90) == 0) { /* test if rx in {0,2,4,6} */ + /* we got an exception therefore ry can't be in {0,2,4,6} */ + asm volatile( /* load rx from fp_regs.fprs[ry] */ + " bras 1,0f\n" + " ld 0,0(%1)\n" + "0: ex %0,0(1)" + : /* no output */ + : "a" (opc & 0xf0), "a" (&fp_regs->fprs[opc & 0xf].d) + : "1"); + } else if ((opc & 0x9) == 0) { /* test if ry in {0,2,4,6} */ + asm volatile ( /* store ry to fp_regs.fprs[rx] */ + " bras 1,0f\n" + " std 0,0(%1)\n" + "0: ex %0,0(1)" + : /* no output */ + : "a" ((opc & 0xf) << 4), + "a" (&fp_regs->fprs[(opc & 0xf0)>>4].d) + : "1"); + } else /* move fp_regs.fprs[ry] to fp_regs.fprs[rx] */ + fp_regs->fprs[(opc & 0xf0) >> 4] = fp_regs->fprs[opc & 0xf]; + return 0; +} + +/* + * Emulate LER Rx,Ry with Rx or Ry not in {0, 2, 4, 6} + */ +int math_emu_ler(__u8 *opcode) { + s390_fp_regs *fp_regs = ¤t->thread.fp_regs; + __u16 opc = *((__u16 *) opcode); + + if ((opc & 0x90) == 0) { /* test if rx in {0,2,4,6} */ + /* we got an exception therefore ry can't be in {0,2,4,6} */ + asm volatile( /* load rx from fp_regs.fprs[ry] */ + " bras 1,0f\n" + " le 0,0(%1)\n" + "0: ex %0,0(1)" + : /* no output */ + : "a" (opc & 0xf0), "a" (&fp_regs->fprs[opc & 0xf].f) + : "1"); + } else if ((opc & 0x9) == 0) { /* test if ry in {0,2,4,6} */ + asm volatile( /* store ry to fp_regs.fprs[rx] */ + " bras 1,0f\n" + " ste 0,0(%1)\n" + "0: ex %0,0(1)" + : /* no output */ + : "a" ((opc & 0xf) << 4), + "a" (&fp_regs->fprs[(opc & 0xf0) >> 4].f) + : "1"); + } else /* move fp_regs.fprs[ry] to fp_regs.fprs[rx] */ + fp_regs->fprs[(opc & 0xf0) >> 4] = fp_regs->fprs[opc & 0xf]; + return 0; +} + +/* + * Emulate LD R,D(X,B) with R not in {0, 2, 4, 6} + */ +int math_emu_ld(__u8 *opcode, struct pt_regs * regs) { + s390_fp_regs *fp_regs = ¤t->thread.fp_regs; + __u32 opc = *((__u32 *) opcode); + __u64 *dxb; + + dxb = (__u64 *) calc_addr(regs, opc >> 16, opc >> 12, opc); + mathemu_copy_from_user(&fp_regs->fprs[(opc >> 20) & 0xf].d, dxb, 8); + return 0; +} + +/* + * Emulate LE R,D(X,B) with R not in {0, 2, 4, 6} + */ +int math_emu_le(__u8 *opcode, struct pt_regs * regs) { + s390_fp_regs *fp_regs = ¤t->thread.fp_regs; + __u32 opc = *((__u32 *) opcode); + __u32 *mem, *dxb; + + dxb = (__u32 *) calc_addr(regs, opc >> 16, opc >> 12, opc); + mem = (__u32 *) (&fp_regs->fprs[(opc >> 20) & 0xf].f); + mathemu_get_user(mem[0], dxb); + return 0; +} + +/* + * Emulate STD R,D(X,B) with R not in {0, 2, 4, 6} + */ +int math_emu_std(__u8 *opcode, struct pt_regs * regs) { + s390_fp_regs *fp_regs = ¤t->thread.fp_regs; + __u32 opc = *((__u32 *) opcode); + __u64 *dxb; + + dxb = (__u64 *) calc_addr(regs, opc >> 16, opc >> 12, opc); + mathemu_copy_to_user(dxb, &fp_regs->fprs[(opc >> 20) & 0xf].d, 8); + return 0; +} + +/* + * Emulate STE R,D(X,B) with R not in {0, 2, 4, 6} + */ +int math_emu_ste(__u8 *opcode, struct pt_regs * regs) { + s390_fp_regs *fp_regs = ¤t->thread.fp_regs; + __u32 opc = *((__u32 *) opcode); + __u32 *mem, *dxb; + + dxb = (__u32 *) calc_addr(regs, opc >> 16, opc >> 12, opc); + mem = (__u32 *) (&fp_regs->fprs[(opc >> 20) & 0xf].f); + mathemu_put_user(mem[0], dxb); + return 0; +} + +/* + * Emulate LFPC D(B) + */ +int math_emu_lfpc(__u8 *opcode, struct pt_regs *regs) { + __u32 opc = *((__u32 *) opcode); + __u32 *dxb, temp; + + dxb= (__u32 *) calc_addr(regs, 0, opc>>12, opc); + mathemu_get_user(temp, dxb); + if ((temp & ~FPC_VALID_MASK) != 0) + return SIGILL; + current->thread.fp_regs.fpc = temp; + return 0; +} + +/* + * Emulate STFPC D(B) + */ +int math_emu_stfpc(__u8 *opcode, struct pt_regs *regs) { + __u32 opc = *((__u32 *) opcode); + __u32 *dxb; + + dxb= (__u32 *) calc_addr(regs, 0, opc>>12, opc); + mathemu_put_user(current->thread.fp_regs.fpc, dxb); + return 0; +} + +/* + * Emulate SRNM D(B) + */ +int math_emu_srnm(__u8 *opcode, struct pt_regs *regs) { + __u32 opc = *((__u32 *) opcode); + __u32 temp; + + temp = calc_addr(regs, 0, opc>>12, opc); + current->thread.fp_regs.fpc &= ~3; + current->thread.fp_regs.fpc |= (temp & 3); + return 0; +} + +/* broken compiler ... */ +long long +__negdi2 (long long u) +{ + + union lll { + long long ll; + long s[2]; + }; + + union lll w,uu; + + uu.ll = u; + + w.s[1] = -uu.s[1]; + w.s[0] = -uu.s[0] - ((int) w.s[1] != 0); + + return w.ll; +} diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile new file mode 100644 index 00000000..d98fe900 --- /dev/null +++ b/arch/s390/mm/Makefile @@ -0,0 +1,9 @@ +# +# Makefile for the linux s390-specific parts of the memory manager. +# + +obj-y := init.o fault.o extmem.o mmap.o vmem.o pgtable.o maccess.o \ + page-states.o gup.o +obj-$(CONFIG_CMM) += cmm.o +obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o +obj-$(CONFIG_DEBUG_SET_MODULE_RONX) += pageattr.o diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c new file mode 100644 index 00000000..1f1dba9d --- /dev/null +++ b/arch/s390/mm/cmm.c @@ -0,0 +1,497 @@ +/* + * Collaborative memory management interface. + * + * Copyright IBM Corp 2003,2010 + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>, + * + */ + +#include <linux/errno.h> +#include <linux/fs.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/gfp.h> +#include <linux/sched.h> +#include <linux/sysctl.h> +#include <linux/ctype.h> +#include <linux/swap.h> +#include <linux/kthread.h> +#include <linux/oom.h> +#include <linux/suspend.h> +#include <linux/uaccess.h> + +#include <asm/pgalloc.h> +#include <asm/diag.h> + +#ifdef CONFIG_CMM_IUCV +static char *cmm_default_sender = "VMRMSVM"; +#endif +static char *sender; +module_param(sender, charp, 0400); +MODULE_PARM_DESC(sender, + "Guest name that may send SMSG messages (default VMRMSVM)"); + +#include "../../../drivers/s390/net/smsgiucv.h" + +#define CMM_NR_PAGES ((PAGE_SIZE / sizeof(unsigned long)) - 2) + +struct cmm_page_array { + struct cmm_page_array *next; + unsigned long index; + unsigned long pages[CMM_NR_PAGES]; +}; + +static long cmm_pages; +static long cmm_timed_pages; +static volatile long cmm_pages_target; +static volatile long cmm_timed_pages_target; +static long cmm_timeout_pages; +static long cmm_timeout_seconds; +static int cmm_suspended; + +static struct cmm_page_array *cmm_page_list; +static struct cmm_page_array *cmm_timed_page_list; +static DEFINE_SPINLOCK(cmm_lock); + +static struct task_struct *cmm_thread_ptr; +static DECLARE_WAIT_QUEUE_HEAD(cmm_thread_wait); +static DEFINE_TIMER(cmm_timer, NULL, 0, 0); + +static void cmm_timer_fn(unsigned long); +static void cmm_set_timer(void); + +static long cmm_alloc_pages(long nr, long *counter, + struct cmm_page_array **list) +{ + struct cmm_page_array *pa, *npa; + unsigned long addr; + + while (nr) { + addr = __get_free_page(GFP_NOIO); + if (!addr) + break; + spin_lock(&cmm_lock); + pa = *list; + if (!pa || pa->index >= CMM_NR_PAGES) { + /* Need a new page for the page list. */ + spin_unlock(&cmm_lock); + npa = (struct cmm_page_array *) + __get_free_page(GFP_NOIO); + if (!npa) { + free_page(addr); + break; + } + spin_lock(&cmm_lock); + pa = *list; + if (!pa || pa->index >= CMM_NR_PAGES) { + npa->next = pa; + npa->index = 0; + pa = npa; + *list = pa; + } else + free_page((unsigned long) npa); + } + diag10_range(addr >> PAGE_SHIFT, 1); + pa->pages[pa->index++] = addr; + (*counter)++; + spin_unlock(&cmm_lock); + nr--; + } + return nr; +} + +static long cmm_free_pages(long nr, long *counter, struct cmm_page_array **list) +{ + struct cmm_page_array *pa; + unsigned long addr; + + spin_lock(&cmm_lock); + pa = *list; + while (nr) { + if (!pa || pa->index <= 0) + break; + addr = pa->pages[--pa->index]; + if (pa->index == 0) { + pa = pa->next; + free_page((unsigned long) *list); + *list = pa; + } + free_page(addr); + (*counter)--; + nr--; + } + spin_unlock(&cmm_lock); + return nr; +} + +static int cmm_oom_notify(struct notifier_block *self, + unsigned long dummy, void *parm) +{ + unsigned long *freed = parm; + long nr = 256; + + nr = cmm_free_pages(nr, &cmm_timed_pages, &cmm_timed_page_list); + if (nr > 0) + nr = cmm_free_pages(nr, &cmm_pages, &cmm_page_list); + cmm_pages_target = cmm_pages; + cmm_timed_pages_target = cmm_timed_pages; + *freed += 256 - nr; + return NOTIFY_OK; +} + +static struct notifier_block cmm_oom_nb = { + .notifier_call = cmm_oom_notify, +}; + +static int cmm_thread(void *dummy) +{ + int rc; + + while (1) { + rc = wait_event_interruptible(cmm_thread_wait, + (!cmm_suspended && (cmm_pages != cmm_pages_target || + cmm_timed_pages != cmm_timed_pages_target)) || + kthread_should_stop()); + if (kthread_should_stop() || rc == -ERESTARTSYS) { + cmm_pages_target = cmm_pages; + cmm_timed_pages_target = cmm_timed_pages; + break; + } + if (cmm_pages_target > cmm_pages) { + if (cmm_alloc_pages(1, &cmm_pages, &cmm_page_list)) + cmm_pages_target = cmm_pages; + } else if (cmm_pages_target < cmm_pages) { + cmm_free_pages(1, &cmm_pages, &cmm_page_list); + } + if (cmm_timed_pages_target > cmm_timed_pages) { + if (cmm_alloc_pages(1, &cmm_timed_pages, + &cmm_timed_page_list)) + cmm_timed_pages_target = cmm_timed_pages; + } else if (cmm_timed_pages_target < cmm_timed_pages) { + cmm_free_pages(1, &cmm_timed_pages, + &cmm_timed_page_list); + } + if (cmm_timed_pages > 0 && !timer_pending(&cmm_timer)) + cmm_set_timer(); + } + return 0; +} + +static void cmm_kick_thread(void) +{ + wake_up(&cmm_thread_wait); +} + +static void cmm_set_timer(void) +{ + if (cmm_timed_pages_target <= 0 || cmm_timeout_seconds <= 0) { + if (timer_pending(&cmm_timer)) + del_timer(&cmm_timer); + return; + } + if (timer_pending(&cmm_timer)) { + if (mod_timer(&cmm_timer, jiffies + cmm_timeout_seconds*HZ)) + return; + } + cmm_timer.function = cmm_timer_fn; + cmm_timer.data = 0; + cmm_timer.expires = jiffies + cmm_timeout_seconds*HZ; + add_timer(&cmm_timer); +} + +static void cmm_timer_fn(unsigned long ignored) +{ + long nr; + + nr = cmm_timed_pages_target - cmm_timeout_pages; + if (nr < 0) + cmm_timed_pages_target = 0; + else + cmm_timed_pages_target = nr; + cmm_kick_thread(); + cmm_set_timer(); +} + +static void cmm_set_pages(long nr) +{ + cmm_pages_target = nr; + cmm_kick_thread(); +} + +static long cmm_get_pages(void) +{ + return cmm_pages; +} + +static void cmm_add_timed_pages(long nr) +{ + cmm_timed_pages_target += nr; + cmm_kick_thread(); +} + +static long cmm_get_timed_pages(void) +{ + return cmm_timed_pages; +} + +static void cmm_set_timeout(long nr, long seconds) +{ + cmm_timeout_pages = nr; + cmm_timeout_seconds = seconds; + cmm_set_timer(); +} + +static int cmm_skip_blanks(char *cp, char **endp) +{ + char *str; + + for (str = cp; *str == ' ' || *str == '\t'; str++) + ; + *endp = str; + return str != cp; +} + +static struct ctl_table cmm_table[]; + +static int cmm_pages_handler(ctl_table *ctl, int write, void __user *buffer, + size_t *lenp, loff_t *ppos) +{ + char buf[16], *p; + long nr; + int len; + + if (!*lenp || (*ppos && !write)) { + *lenp = 0; + return 0; + } + + if (write) { + len = *lenp; + if (copy_from_user(buf, buffer, + len > sizeof(buf) ? sizeof(buf) : len)) + return -EFAULT; + buf[sizeof(buf) - 1] = '\0'; + cmm_skip_blanks(buf, &p); + nr = simple_strtoul(p, &p, 0); + if (ctl == &cmm_table[0]) + cmm_set_pages(nr); + else + cmm_add_timed_pages(nr); + } else { + if (ctl == &cmm_table[0]) + nr = cmm_get_pages(); + else + nr = cmm_get_timed_pages(); + len = sprintf(buf, "%ld\n", nr); + if (len > *lenp) + len = *lenp; + if (copy_to_user(buffer, buf, len)) + return -EFAULT; + } + *lenp = len; + *ppos += len; + return 0; +} + +static int cmm_timeout_handler(ctl_table *ctl, int write, void __user *buffer, + size_t *lenp, loff_t *ppos) +{ + char buf[64], *p; + long nr, seconds; + int len; + + if (!*lenp || (*ppos && !write)) { + *lenp = 0; + return 0; + } + + if (write) { + len = *lenp; + if (copy_from_user(buf, buffer, + len > sizeof(buf) ? sizeof(buf) : len)) + return -EFAULT; + buf[sizeof(buf) - 1] = '\0'; + cmm_skip_blanks(buf, &p); + nr = simple_strtoul(p, &p, 0); + cmm_skip_blanks(p, &p); + seconds = simple_strtoul(p, &p, 0); + cmm_set_timeout(nr, seconds); + } else { + len = sprintf(buf, "%ld %ld\n", + cmm_timeout_pages, cmm_timeout_seconds); + if (len > *lenp) + len = *lenp; + if (copy_to_user(buffer, buf, len)) + return -EFAULT; + } + *lenp = len; + *ppos += len; + return 0; +} + +static struct ctl_table cmm_table[] = { + { + .procname = "cmm_pages", + .mode = 0644, + .proc_handler = cmm_pages_handler, + }, + { + .procname = "cmm_timed_pages", + .mode = 0644, + .proc_handler = cmm_pages_handler, + }, + { + .procname = "cmm_timeout", + .mode = 0644, + .proc_handler = cmm_timeout_handler, + }, + { } +}; + +static struct ctl_table cmm_dir_table[] = { + { + .procname = "vm", + .maxlen = 0, + .mode = 0555, + .child = cmm_table, + }, + { } +}; + +#ifdef CONFIG_CMM_IUCV +#define SMSG_PREFIX "CMM" +static void cmm_smsg_target(const char *from, char *msg) +{ + long nr, seconds; + + if (strlen(sender) > 0 && strcmp(from, sender) != 0) + return; + if (!cmm_skip_blanks(msg + strlen(SMSG_PREFIX), &msg)) + return; + if (strncmp(msg, "SHRINK", 6) == 0) { + if (!cmm_skip_blanks(msg + 6, &msg)) + return; + nr = simple_strtoul(msg, &msg, 0); + cmm_skip_blanks(msg, &msg); + if (*msg == '\0') + cmm_set_pages(nr); + } else if (strncmp(msg, "RELEASE", 7) == 0) { + if (!cmm_skip_blanks(msg + 7, &msg)) + return; + nr = simple_strtoul(msg, &msg, 0); + cmm_skip_blanks(msg, &msg); + if (*msg == '\0') + cmm_add_timed_pages(nr); + } else if (strncmp(msg, "REUSE", 5) == 0) { + if (!cmm_skip_blanks(msg + 5, &msg)) + return; + nr = simple_strtoul(msg, &msg, 0); + if (!cmm_skip_blanks(msg, &msg)) + return; + seconds = simple_strtoul(msg, &msg, 0); + cmm_skip_blanks(msg, &msg); + if (*msg == '\0') + cmm_set_timeout(nr, seconds); + } +} +#endif + +static struct ctl_table_header *cmm_sysctl_header; + +static int cmm_suspend(void) +{ + cmm_suspended = 1; + cmm_free_pages(cmm_pages, &cmm_pages, &cmm_page_list); + cmm_free_pages(cmm_timed_pages, &cmm_timed_pages, &cmm_timed_page_list); + return 0; +} + +static int cmm_resume(void) +{ + cmm_suspended = 0; + cmm_kick_thread(); + return 0; +} + +static int cmm_power_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + switch (event) { + case PM_POST_HIBERNATION: + return cmm_resume(); + case PM_HIBERNATION_PREPARE: + return cmm_suspend(); + default: + return NOTIFY_DONE; + } +} + +static struct notifier_block cmm_power_notifier = { + .notifier_call = cmm_power_event, +}; + +static int __init cmm_init(void) +{ + int rc = -ENOMEM; + + cmm_sysctl_header = register_sysctl_table(cmm_dir_table); + if (!cmm_sysctl_header) + goto out_sysctl; +#ifdef CONFIG_CMM_IUCV + /* convert sender to uppercase characters */ + if (sender) { + int len = strlen(sender); + while (len--) + sender[len] = toupper(sender[len]); + } else { + sender = cmm_default_sender; + } + + rc = smsg_register_callback(SMSG_PREFIX, cmm_smsg_target); + if (rc < 0) + goto out_smsg; +#endif + rc = register_oom_notifier(&cmm_oom_nb); + if (rc < 0) + goto out_oom_notify; + rc = register_pm_notifier(&cmm_power_notifier); + if (rc) + goto out_pm; + cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread"); + rc = IS_ERR(cmm_thread_ptr) ? PTR_ERR(cmm_thread_ptr) : 0; + if (rc) + goto out_kthread; + return 0; + +out_kthread: + unregister_pm_notifier(&cmm_power_notifier); +out_pm: + unregister_oom_notifier(&cmm_oom_nb); +out_oom_notify: +#ifdef CONFIG_CMM_IUCV + smsg_unregister_callback(SMSG_PREFIX, cmm_smsg_target); +out_smsg: +#endif + unregister_sysctl_table(cmm_sysctl_header); +out_sysctl: + del_timer_sync(&cmm_timer); + return rc; +} +module_init(cmm_init); + +static void __exit cmm_exit(void) +{ + unregister_sysctl_table(cmm_sysctl_header); +#ifdef CONFIG_CMM_IUCV + smsg_unregister_callback(SMSG_PREFIX, cmm_smsg_target); +#endif + unregister_pm_notifier(&cmm_power_notifier); + unregister_oom_notifier(&cmm_oom_nb); + kthread_stop(cmm_thread_ptr); + del_timer_sync(&cmm_timer); + cmm_free_pages(cmm_pages, &cmm_pages, &cmm_page_list); + cmm_free_pages(cmm_timed_pages, &cmm_timed_pages, &cmm_timed_page_list); +} +module_exit(cmm_exit); + +MODULE_LICENSE("GPL"); diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c new file mode 100644 index 00000000..075ddada --- /dev/null +++ b/arch/s390/mm/extmem.c @@ -0,0 +1,778 @@ +/* + * File...........: arch/s390/mm/extmem.c + * Author(s)......: Carsten Otte <cotte@de.ibm.com> + * Rob M van der Heij <rvdheij@nl.ibm.com> + * Steven Shultz <shultzss@us.ibm.com> + * Bugreports.to..: <Linux390@de.ibm.com> + * (C) IBM Corporation 2002-2004 + */ + +#define KMSG_COMPONENT "extmem" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include <linux/kernel.h> +#include <linux/string.h> +#include <linux/spinlock.h> +#include <linux/list.h> +#include <linux/slab.h> +#include <linux/module.h> +#include <linux/bootmem.h> +#include <linux/ctype.h> +#include <linux/ioport.h> +#include <asm/page.h> +#include <asm/pgtable.h> +#include <asm/ebcdic.h> +#include <asm/errno.h> +#include <asm/extmem.h> +#include <asm/cpcmd.h> +#include <asm/setup.h> + +#define DCSS_LOADSHR 0x00 +#define DCSS_LOADNSR 0x04 +#define DCSS_PURGESEG 0x08 +#define DCSS_FINDSEG 0x0c +#define DCSS_LOADNOLY 0x10 +#define DCSS_SEGEXT 0x18 +#define DCSS_LOADSHRX 0x20 +#define DCSS_LOADNSRX 0x24 +#define DCSS_FINDSEGX 0x2c +#define DCSS_SEGEXTX 0x38 +#define DCSS_FINDSEGA 0x0c + +struct qrange { + unsigned long start; /* last byte type */ + unsigned long end; /* last byte reserved */ +}; + +struct qout64 { + unsigned long segstart; + unsigned long segend; + int segcnt; + int segrcnt; + struct qrange range[6]; +}; + +#ifdef CONFIG_64BIT +struct qrange_old { + unsigned int start; /* last byte type */ + unsigned int end; /* last byte reserved */ +}; + +/* output area format for the Diag x'64' old subcode x'18' */ +struct qout64_old { + int segstart; + int segend; + int segcnt; + int segrcnt; + struct qrange_old range[6]; +}; +#endif + +struct qin64 { + char qopcode; + char rsrv1[3]; + char qrcode; + char rsrv2[3]; + char qname[8]; + unsigned int qoutptr; + short int qoutlen; +}; + +struct dcss_segment { + struct list_head list; + char dcss_name[8]; + char res_name[15]; + unsigned long start_addr; + unsigned long end; + atomic_t ref_count; + int do_nonshared; + unsigned int vm_segtype; + struct qrange range[6]; + int segcnt; + struct resource *res; +}; + +static DEFINE_MUTEX(dcss_lock); +static LIST_HEAD(dcss_list); +static char *segtype_string[] = { "SW", "EW", "SR", "ER", "SN", "EN", "SC", + "EW/EN-MIXED" }; +static int loadshr_scode, loadnsr_scode, findseg_scode; +static int segext_scode, purgeseg_scode; +static int scode_set; + +/* set correct Diag x'64' subcodes. */ +static int +dcss_set_subcodes(void) +{ +#ifdef CONFIG_64BIT + char *name = kmalloc(8 * sizeof(char), GFP_KERNEL | GFP_DMA); + unsigned long rx, ry; + int rc; + + if (name == NULL) + return -ENOMEM; + + rx = (unsigned long) name; + ry = DCSS_FINDSEGX; + + strcpy(name, "dummy"); + asm volatile( + " diag %0,%1,0x64\n" + "0: ipm %2\n" + " srl %2,28\n" + " j 2f\n" + "1: la %2,3\n" + "2:\n" + EX_TABLE(0b, 1b) + : "+d" (rx), "+d" (ry), "=d" (rc) : : "cc"); + + kfree(name); + /* Diag x'64' new subcodes are supported, set to new subcodes */ + if (rc != 3) { + loadshr_scode = DCSS_LOADSHRX; + loadnsr_scode = DCSS_LOADNSRX; + purgeseg_scode = DCSS_PURGESEG; + findseg_scode = DCSS_FINDSEGX; + segext_scode = DCSS_SEGEXTX; + return 0; + } +#endif + /* Diag x'64' new subcodes are not supported, set to old subcodes */ + loadshr_scode = DCSS_LOADNOLY; + loadnsr_scode = DCSS_LOADNSR; + purgeseg_scode = DCSS_PURGESEG; + findseg_scode = DCSS_FINDSEG; + segext_scode = DCSS_SEGEXT; + return 0; +} + +/* + * Create the 8 bytes, ebcdic VM segment name from + * an ascii name. + */ +static void +dcss_mkname(char *name, char *dcss_name) +{ + int i; + + for (i = 0; i < 8; i++) { + if (name[i] == '\0') + break; + dcss_name[i] = toupper(name[i]); + }; + for (; i < 8; i++) + dcss_name[i] = ' '; + ASCEBC(dcss_name, 8); +} + + +/* + * search all segments in dcss_list, and return the one + * namend *name. If not found, return NULL. + */ +static struct dcss_segment * +segment_by_name (char *name) +{ + char dcss_name[9]; + struct list_head *l; + struct dcss_segment *tmp, *retval = NULL; + + BUG_ON(!mutex_is_locked(&dcss_lock)); + dcss_mkname (name, dcss_name); + list_for_each (l, &dcss_list) { + tmp = list_entry (l, struct dcss_segment, list); + if (memcmp(tmp->dcss_name, dcss_name, 8) == 0) { + retval = tmp; + break; + } + } + return retval; +} + + +/* + * Perform a function on a dcss segment. + */ +static inline int +dcss_diag(int *func, void *parameter, + unsigned long *ret1, unsigned long *ret2) +{ + unsigned long rx, ry; + int rc; + + if (scode_set == 0) { + rc = dcss_set_subcodes(); + if (rc < 0) + return rc; + scode_set = 1; + } + rx = (unsigned long) parameter; + ry = (unsigned long) *func; + +#ifdef CONFIG_64BIT + /* 64-bit Diag x'64' new subcode, keep in 64-bit addressing mode */ + if (*func > DCSS_SEGEXT) + asm volatile( + " diag %0,%1,0x64\n" + " ipm %2\n" + " srl %2,28\n" + : "+d" (rx), "+d" (ry), "=d" (rc) : : "cc"); + /* 31-bit Diag x'64' old subcode, switch to 31-bit addressing mode */ + else + asm volatile( + " sam31\n" + " diag %0,%1,0x64\n" + " sam64\n" + " ipm %2\n" + " srl %2,28\n" + : "+d" (rx), "+d" (ry), "=d" (rc) : : "cc"); +#else + asm volatile( + " diag %0,%1,0x64\n" + " ipm %2\n" + " srl %2,28\n" + : "+d" (rx), "+d" (ry), "=d" (rc) : : "cc"); +#endif + *ret1 = rx; + *ret2 = ry; + return rc; +} + +static inline int +dcss_diag_translate_rc (int vm_rc) { + if (vm_rc == 44) + return -ENOENT; + return -EIO; +} + + +/* do a diag to get info about a segment. + * fills start_address, end and vm_segtype fields + */ +static int +query_segment_type (struct dcss_segment *seg) +{ + unsigned long dummy, vmrc; + int diag_cc, rc, i; + struct qout64 *qout; + struct qin64 *qin; + + qin = kmalloc(sizeof(*qin), GFP_KERNEL | GFP_DMA); + qout = kmalloc(sizeof(*qout), GFP_KERNEL | GFP_DMA); + if ((qin == NULL) || (qout == NULL)) { + rc = -ENOMEM; + goto out_free; + } + + /* initialize diag input parameters */ + qin->qopcode = DCSS_FINDSEGA; + qin->qoutptr = (unsigned long) qout; + qin->qoutlen = sizeof(struct qout64); + memcpy (qin->qname, seg->dcss_name, 8); + + diag_cc = dcss_diag(&segext_scode, qin, &dummy, &vmrc); + + if (diag_cc < 0) { + rc = diag_cc; + goto out_free; + } + if (diag_cc > 1) { + pr_warning("Querying a DCSS type failed with rc=%ld\n", vmrc); + rc = dcss_diag_translate_rc (vmrc); + goto out_free; + } + +#ifdef CONFIG_64BIT + /* Only old format of output area of Diagnose x'64' is supported, + copy data for the new format. */ + if (segext_scode == DCSS_SEGEXT) { + struct qout64_old *qout_old; + qout_old = kzalloc(sizeof(*qout_old), GFP_KERNEL | GFP_DMA); + if (qout_old == NULL) { + rc = -ENOMEM; + goto out_free; + } + memcpy(qout_old, qout, sizeof(struct qout64_old)); + qout->segstart = (unsigned long) qout_old->segstart; + qout->segend = (unsigned long) qout_old->segend; + qout->segcnt = qout_old->segcnt; + qout->segrcnt = qout_old->segrcnt; + + if (qout->segcnt > 6) + qout->segrcnt = 6; + for (i = 0; i < qout->segrcnt; i++) { + qout->range[i].start = + (unsigned long) qout_old->range[i].start; + qout->range[i].end = + (unsigned long) qout_old->range[i].end; + } + kfree(qout_old); + } +#endif + if (qout->segcnt > 6) { + rc = -EOPNOTSUPP; + goto out_free; + } + + if (qout->segcnt == 1) { + seg->vm_segtype = qout->range[0].start & 0xff; + } else { + /* multi-part segment. only one type supported here: + - all parts are contiguous + - all parts are either EW or EN type + - maximum 6 parts allowed */ + unsigned long start = qout->segstart >> PAGE_SHIFT; + for (i=0; i<qout->segcnt; i++) { + if (((qout->range[i].start & 0xff) != SEG_TYPE_EW) && + ((qout->range[i].start & 0xff) != SEG_TYPE_EN)) { + rc = -EOPNOTSUPP; + goto out_free; + } + if (start != qout->range[i].start >> PAGE_SHIFT) { + rc = -EOPNOTSUPP; + goto out_free; + } + start = (qout->range[i].end >> PAGE_SHIFT) + 1; + } + seg->vm_segtype = SEG_TYPE_EWEN; + } + + /* analyze diag output and update seg */ + seg->start_addr = qout->segstart; + seg->end = qout->segend; + + memcpy (seg->range, qout->range, 6*sizeof(struct qrange)); + seg->segcnt = qout->segcnt; + + rc = 0; + + out_free: + kfree(qin); + kfree(qout); + return rc; +} + +/* + * get info about a segment + * possible return values: + * -ENOSYS : we are not running on VM + * -EIO : could not perform query diagnose + * -ENOENT : no such segment + * -EOPNOTSUPP: multi-part segment cannot be used with linux + * -ENOMEM : out of memory + * 0 .. 6 : type of segment as defined in include/asm-s390/extmem.h + */ +int +segment_type (char* name) +{ + int rc; + struct dcss_segment seg; + + if (!MACHINE_IS_VM) + return -ENOSYS; + + dcss_mkname(name, seg.dcss_name); + rc = query_segment_type (&seg); + if (rc < 0) + return rc; + return seg.vm_segtype; +} + +/* + * check if segment collides with other segments that are currently loaded + * returns 1 if this is the case, 0 if no collision was found + */ +static int +segment_overlaps_others (struct dcss_segment *seg) +{ + struct list_head *l; + struct dcss_segment *tmp; + + BUG_ON(!mutex_is_locked(&dcss_lock)); + list_for_each(l, &dcss_list) { + tmp = list_entry(l, struct dcss_segment, list); + if ((tmp->start_addr >> 20) > (seg->end >> 20)) + continue; + if ((tmp->end >> 20) < (seg->start_addr >> 20)) + continue; + if (seg == tmp) + continue; + return 1; + } + return 0; +} + +/* + * real segment loading function, called from segment_load + */ +static int +__segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long *end) +{ + unsigned long start_addr, end_addr, dummy; + struct dcss_segment *seg; + int rc, diag_cc; + + start_addr = end_addr = 0; + seg = kmalloc(sizeof(*seg), GFP_KERNEL | GFP_DMA); + if (seg == NULL) { + rc = -ENOMEM; + goto out; + } + dcss_mkname (name, seg->dcss_name); + rc = query_segment_type (seg); + if (rc < 0) + goto out_free; + + if (loadshr_scode == DCSS_LOADSHRX) { + if (segment_overlaps_others(seg)) { + rc = -EBUSY; + goto out_free; + } + } + + rc = vmem_add_mapping(seg->start_addr, seg->end - seg->start_addr + 1); + + if (rc) + goto out_free; + + seg->res = kzalloc(sizeof(struct resource), GFP_KERNEL); + if (seg->res == NULL) { + rc = -ENOMEM; + goto out_shared; + } + seg->res->flags = IORESOURCE_BUSY | IORESOURCE_MEM; + seg->res->start = seg->start_addr; + seg->res->end = seg->end; + memcpy(&seg->res_name, seg->dcss_name, 8); + EBCASC(seg->res_name, 8); + seg->res_name[8] = '\0'; + strncat(seg->res_name, " (DCSS)", 7); + seg->res->name = seg->res_name; + rc = seg->vm_segtype; + if (rc == SEG_TYPE_SC || + ((rc == SEG_TYPE_SR || rc == SEG_TYPE_ER) && !do_nonshared)) + seg->res->flags |= IORESOURCE_READONLY; + if (request_resource(&iomem_resource, seg->res)) { + rc = -EBUSY; + kfree(seg->res); + goto out_shared; + } + + if (do_nonshared) + diag_cc = dcss_diag(&loadnsr_scode, seg->dcss_name, + &start_addr, &end_addr); + else + diag_cc = dcss_diag(&loadshr_scode, seg->dcss_name, + &start_addr, &end_addr); + if (diag_cc < 0) { + dcss_diag(&purgeseg_scode, seg->dcss_name, + &dummy, &dummy); + rc = diag_cc; + goto out_resource; + } + if (diag_cc > 1) { + pr_warning("Loading DCSS %s failed with rc=%ld\n", name, + end_addr); + rc = dcss_diag_translate_rc(end_addr); + dcss_diag(&purgeseg_scode, seg->dcss_name, + &dummy, &dummy); + goto out_resource; + } + seg->start_addr = start_addr; + seg->end = end_addr; + seg->do_nonshared = do_nonshared; + atomic_set(&seg->ref_count, 1); + list_add(&seg->list, &dcss_list); + *addr = seg->start_addr; + *end = seg->end; + if (do_nonshared) + pr_info("DCSS %s of range %p to %p and type %s loaded as " + "exclusive-writable\n", name, (void*) seg->start_addr, + (void*) seg->end, segtype_string[seg->vm_segtype]); + else { + pr_info("DCSS %s of range %p to %p and type %s loaded in " + "shared access mode\n", name, (void*) seg->start_addr, + (void*) seg->end, segtype_string[seg->vm_segtype]); + } + goto out; + out_resource: + release_resource(seg->res); + kfree(seg->res); + out_shared: + vmem_remove_mapping(seg->start_addr, seg->end - seg->start_addr + 1); + out_free: + kfree(seg); + out: + return rc; +} + +/* + * this function loads a DCSS segment + * name : name of the DCSS + * do_nonshared : 0 indicates that the dcss should be shared with other linux images + * 1 indicates that the dcss should be exclusive for this linux image + * addr : will be filled with start address of the segment + * end : will be filled with end address of the segment + * return values: + * -ENOSYS : we are not running on VM + * -EIO : could not perform query or load diagnose + * -ENOENT : no such segment + * -EOPNOTSUPP: multi-part segment cannot be used with linux + * -ENOSPC : segment cannot be used (overlaps with storage) + * -EBUSY : segment can temporarily not be used (overlaps with dcss) + * -ERANGE : segment cannot be used (exceeds kernel mapping range) + * -EPERM : segment is currently loaded with incompatible permissions + * -ENOMEM : out of memory + * 0 .. 6 : type of segment as defined in include/asm-s390/extmem.h + */ +int +segment_load (char *name, int do_nonshared, unsigned long *addr, + unsigned long *end) +{ + struct dcss_segment *seg; + int rc; + + if (!MACHINE_IS_VM) + return -ENOSYS; + + mutex_lock(&dcss_lock); + seg = segment_by_name (name); + if (seg == NULL) + rc = __segment_load (name, do_nonshared, addr, end); + else { + if (do_nonshared == seg->do_nonshared) { + atomic_inc(&seg->ref_count); + *addr = seg->start_addr; + *end = seg->end; + rc = seg->vm_segtype; + } else { + *addr = *end = 0; + rc = -EPERM; + } + } + mutex_unlock(&dcss_lock); + return rc; +} + +/* + * this function modifies the shared state of a DCSS segment. note that + * name : name of the DCSS + * do_nonshared : 0 indicates that the dcss should be shared with other linux images + * 1 indicates that the dcss should be exclusive for this linux image + * return values: + * -EIO : could not perform load diagnose (segment gone!) + * -ENOENT : no such segment (segment gone!) + * -EAGAIN : segment is in use by other exploiters, try later + * -EINVAL : no segment with the given name is currently loaded - name invalid + * -EBUSY : segment can temporarily not be used (overlaps with dcss) + * 0 : operation succeeded + */ +int +segment_modify_shared (char *name, int do_nonshared) +{ + struct dcss_segment *seg; + unsigned long start_addr, end_addr, dummy; + int rc, diag_cc; + + start_addr = end_addr = 0; + mutex_lock(&dcss_lock); + seg = segment_by_name (name); + if (seg == NULL) { + rc = -EINVAL; + goto out_unlock; + } + if (do_nonshared == seg->do_nonshared) { + pr_info("DCSS %s is already in the requested access " + "mode\n", name); + rc = 0; + goto out_unlock; + } + if (atomic_read (&seg->ref_count) != 1) { + pr_warning("DCSS %s is in use and cannot be reloaded\n", + name); + rc = -EAGAIN; + goto out_unlock; + } + release_resource(seg->res); + if (do_nonshared) + seg->res->flags &= ~IORESOURCE_READONLY; + else + if (seg->vm_segtype == SEG_TYPE_SR || + seg->vm_segtype == SEG_TYPE_ER) + seg->res->flags |= IORESOURCE_READONLY; + + if (request_resource(&iomem_resource, seg->res)) { + pr_warning("DCSS %s overlaps with used memory resources " + "and cannot be reloaded\n", name); + rc = -EBUSY; + kfree(seg->res); + goto out_del_mem; + } + + dcss_diag(&purgeseg_scode, seg->dcss_name, &dummy, &dummy); + if (do_nonshared) + diag_cc = dcss_diag(&loadnsr_scode, seg->dcss_name, + &start_addr, &end_addr); + else + diag_cc = dcss_diag(&loadshr_scode, seg->dcss_name, + &start_addr, &end_addr); + if (diag_cc < 0) { + rc = diag_cc; + goto out_del_res; + } + if (diag_cc > 1) { + pr_warning("Reloading DCSS %s failed with rc=%ld\n", name, + end_addr); + rc = dcss_diag_translate_rc(end_addr); + goto out_del_res; + } + seg->start_addr = start_addr; + seg->end = end_addr; + seg->do_nonshared = do_nonshared; + rc = 0; + goto out_unlock; + out_del_res: + release_resource(seg->res); + kfree(seg->res); + out_del_mem: + vmem_remove_mapping(seg->start_addr, seg->end - seg->start_addr + 1); + list_del(&seg->list); + dcss_diag(&purgeseg_scode, seg->dcss_name, &dummy, &dummy); + kfree(seg); + out_unlock: + mutex_unlock(&dcss_lock); + return rc; +} + +/* + * Decrease the use count of a DCSS segment and remove + * it from the address space if nobody is using it + * any longer. + */ +void +segment_unload(char *name) +{ + unsigned long dummy; + struct dcss_segment *seg; + + if (!MACHINE_IS_VM) + return; + + mutex_lock(&dcss_lock); + seg = segment_by_name (name); + if (seg == NULL) { + pr_err("Unloading unknown DCSS %s failed\n", name); + goto out_unlock; + } + if (atomic_dec_return(&seg->ref_count) != 0) + goto out_unlock; + release_resource(seg->res); + kfree(seg->res); + vmem_remove_mapping(seg->start_addr, seg->end - seg->start_addr + 1); + list_del(&seg->list); + dcss_diag(&purgeseg_scode, seg->dcss_name, &dummy, &dummy); + kfree(seg); +out_unlock: + mutex_unlock(&dcss_lock); +} + +/* + * save segment content permanently + */ +void +segment_save(char *name) +{ + struct dcss_segment *seg; + char cmd1[160]; + char cmd2[80]; + int i, response; + + if (!MACHINE_IS_VM) + return; + + mutex_lock(&dcss_lock); + seg = segment_by_name (name); + + if (seg == NULL) { + pr_err("Saving unknown DCSS %s failed\n", name); + goto out; + } + + sprintf(cmd1, "DEFSEG %s", name); + for (i=0; i<seg->segcnt; i++) { + sprintf(cmd1+strlen(cmd1), " %lX-%lX %s", + seg->range[i].start >> PAGE_SHIFT, + seg->range[i].end >> PAGE_SHIFT, + segtype_string[seg->range[i].start & 0xff]); + } + sprintf(cmd2, "SAVESEG %s", name); + response = 0; + cpcmd(cmd1, NULL, 0, &response); + if (response) { + pr_err("Saving a DCSS failed with DEFSEG response code " + "%i\n", response); + goto out; + } + cpcmd(cmd2, NULL, 0, &response); + if (response) { + pr_err("Saving a DCSS failed with SAVESEG response code " + "%i\n", response); + goto out; + } +out: + mutex_unlock(&dcss_lock); +} + +/* + * print appropriate error message for segment_load()/segment_type() + * return code + */ +void segment_warning(int rc, char *seg_name) +{ + switch (rc) { + case -ENOENT: + pr_err("DCSS %s cannot be loaded or queried\n", seg_name); + break; + case -ENOSYS: + pr_err("DCSS %s cannot be loaded or queried without " + "z/VM\n", seg_name); + break; + case -EIO: + pr_err("Loading or querying DCSS %s resulted in a " + "hardware error\n", seg_name); + break; + case -EOPNOTSUPP: + pr_err("DCSS %s has multiple page ranges and cannot be " + "loaded or queried\n", seg_name); + break; + case -ENOSPC: + pr_err("DCSS %s overlaps with used storage and cannot " + "be loaded\n", seg_name); + break; + case -EBUSY: + pr_err("%s needs used memory resources and cannot be " + "loaded or queried\n", seg_name); + break; + case -EPERM: + pr_err("DCSS %s is already loaded in a different access " + "mode\n", seg_name); + break; + case -ENOMEM: + pr_err("There is not enough memory to load or query " + "DCSS %s\n", seg_name); + break; + case -ERANGE: + pr_err("DCSS %s exceeds the kernel mapping range (%lu) " + "and cannot be loaded\n", seg_name, VMEM_MAX_PHYS); + break; + default: + break; + } +} + +EXPORT_SYMBOL(segment_load); +EXPORT_SYMBOL(segment_unload); +EXPORT_SYMBOL(segment_save); +EXPORT_SYMBOL(segment_type); +EXPORT_SYMBOL(segment_modify_shared); +EXPORT_SYMBOL(segment_warning); diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c new file mode 100644 index 00000000..4e668600 --- /dev/null +++ b/arch/s390/mm/fault.c @@ -0,0 +1,664 @@ +/* + * arch/s390/mm/fault.c + * + * S390 version + * Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Author(s): Hartmut Penner (hp@de.ibm.com) + * Ulrich Weigand (uweigand@de.ibm.com) + * + * Derived from "arch/i386/mm/fault.c" + * Copyright (C) 1995 Linus Torvalds + */ + +#include <linux/kernel_stat.h> +#include <linux/perf_event.h> +#include <linux/signal.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/types.h> +#include <linux/ptrace.h> +#include <linux/mman.h> +#include <linux/mm.h> +#include <linux/compat.h> +#include <linux/smp.h> +#include <linux/kdebug.h> +#include <linux/init.h> +#include <linux/console.h> +#include <linux/module.h> +#include <linux/hardirq.h> +#include <linux/kprobes.h> +#include <linux/uaccess.h> +#include <linux/hugetlb.h> +#include <asm/asm-offsets.h> +#include <asm/pgtable.h> +#include <asm/irq.h> +#include <asm/mmu_context.h> +#include <asm/facility.h> +#include "../kernel/entry.h" + +#ifndef CONFIG_64BIT +#define __FAIL_ADDR_MASK 0x7ffff000 +#define __SUBCODE_MASK 0x0200 +#define __PF_RES_FIELD 0ULL +#else /* CONFIG_64BIT */ +#define __FAIL_ADDR_MASK -4096L +#define __SUBCODE_MASK 0x0600 +#define __PF_RES_FIELD 0x8000000000000000ULL +#endif /* CONFIG_64BIT */ + +#define VM_FAULT_BADCONTEXT 0x010000 +#define VM_FAULT_BADMAP 0x020000 +#define VM_FAULT_BADACCESS 0x040000 + +static unsigned long store_indication; + +void fault_init(void) +{ + if (test_facility(2) && test_facility(75)) + store_indication = 0xc00; +} + +static inline int notify_page_fault(struct pt_regs *regs) +{ + int ret = 0; + + /* kprobe_running() needs smp_processor_id() */ + if (kprobes_built_in() && !user_mode(regs)) { + preempt_disable(); + if (kprobe_running() && kprobe_fault_handler(regs, 14)) + ret = 1; + preempt_enable(); + } + return ret; +} + + +/* + * Unlock any spinlocks which will prevent us from getting the + * message out. + */ +void bust_spinlocks(int yes) +{ + if (yes) { + oops_in_progress = 1; + } else { + int loglevel_save = console_loglevel; + console_unblank(); + oops_in_progress = 0; + /* + * OK, the message is on the console. Now we call printk() + * without oops_in_progress set so that printk will give klogd + * a poke. Hold onto your hats... + */ + console_loglevel = 15; + printk(" "); + console_loglevel = loglevel_save; + } +} + +/* + * Returns the address space associated with the fault. + * Returns 0 for kernel space and 1 for user space. + */ +static inline int user_space_fault(unsigned long trans_exc_code) +{ + /* + * The lowest two bits of the translation exception + * identification indicate which paging table was used. + */ + trans_exc_code &= 3; + if (trans_exc_code == 2) + /* Access via secondary space, set_fs setting decides */ + return current->thread.mm_segment.ar4; + if (user_mode == HOME_SPACE_MODE) + /* User space if the access has been done via home space. */ + return trans_exc_code == 3; + /* + * If the user space is not the home space the kernel runs in home + * space. Access via secondary space has already been covered, + * access via primary space or access register is from user space + * and access via home space is from the kernel. + */ + return trans_exc_code != 3; +} + +static inline void report_user_fault(struct pt_regs *regs, long signr) +{ + if ((task_pid_nr(current) > 1) && !show_unhandled_signals) + return; + if (!unhandled_signal(current, signr)) + return; + if (!printk_ratelimit()) + return; + printk(KERN_ALERT "User process fault: interruption code 0x%X ", + regs->int_code); + print_vma_addr(KERN_CONT "in ", regs->psw.addr & PSW_ADDR_INSN); + printk(KERN_CONT "\n"); + printk(KERN_ALERT "failing address: %lX\n", + regs->int_parm_long & __FAIL_ADDR_MASK); + show_regs(regs); +} + +/* + * Send SIGSEGV to task. This is an external routine + * to keep the stack usage of do_page_fault small. + */ +static noinline void do_sigsegv(struct pt_regs *regs, int si_code) +{ + struct siginfo si; + + report_user_fault(regs, SIGSEGV); + si.si_signo = SIGSEGV; + si.si_code = si_code; + si.si_addr = (void __user *)(regs->int_parm_long & __FAIL_ADDR_MASK); + force_sig_info(SIGSEGV, &si, current); +} + +static noinline void do_no_context(struct pt_regs *regs) +{ + const struct exception_table_entry *fixup; + unsigned long address; + + /* Are we prepared to handle this kernel fault? */ + fixup = search_exception_tables(regs->psw.addr & PSW_ADDR_INSN); + if (fixup) { + regs->psw.addr = fixup->fixup | PSW_ADDR_AMODE; + return; + } + + /* + * Oops. The kernel tried to access some bad page. We'll have to + * terminate things with extreme prejudice. + */ + address = regs->int_parm_long & __FAIL_ADDR_MASK; + if (!user_space_fault(regs->int_parm_long)) + printk(KERN_ALERT "Unable to handle kernel pointer dereference" + " at virtual kernel address %p\n", (void *)address); + else + printk(KERN_ALERT "Unable to handle kernel paging request" + " at virtual user address %p\n", (void *)address); + + die(regs, "Oops"); + do_exit(SIGKILL); +} + +static noinline void do_low_address(struct pt_regs *regs) +{ + /* Low-address protection hit in kernel mode means + NULL pointer write access in kernel mode. */ + if (regs->psw.mask & PSW_MASK_PSTATE) { + /* Low-address protection hit in user mode 'cannot happen'. */ + die (regs, "Low-address protection"); + do_exit(SIGKILL); + } + + do_no_context(regs); +} + +static noinline void do_sigbus(struct pt_regs *regs) +{ + struct task_struct *tsk = current; + struct siginfo si; + + /* + * Send a sigbus, regardless of whether we were in kernel + * or user mode. + */ + si.si_signo = SIGBUS; + si.si_errno = 0; + si.si_code = BUS_ADRERR; + si.si_addr = (void __user *)(regs->int_parm_long & __FAIL_ADDR_MASK); + force_sig_info(SIGBUS, &si, tsk); +} + +static noinline void do_fault_error(struct pt_regs *regs, int fault) +{ + int si_code; + + switch (fault) { + case VM_FAULT_BADACCESS: + case VM_FAULT_BADMAP: + /* Bad memory access. Check if it is kernel or user space. */ + if (regs->psw.mask & PSW_MASK_PSTATE) { + /* User mode accesses just cause a SIGSEGV */ + si_code = (fault == VM_FAULT_BADMAP) ? + SEGV_MAPERR : SEGV_ACCERR; + do_sigsegv(regs, si_code); + return; + } + case VM_FAULT_BADCONTEXT: + do_no_context(regs); + break; + default: /* fault & VM_FAULT_ERROR */ + if (fault & VM_FAULT_OOM) { + if (!(regs->psw.mask & PSW_MASK_PSTATE)) + do_no_context(regs); + else + pagefault_out_of_memory(); + } else if (fault & VM_FAULT_SIGBUS) { + /* Kernel mode? Handle exceptions or die */ + if (!(regs->psw.mask & PSW_MASK_PSTATE)) + do_no_context(regs); + else + do_sigbus(regs); + } else + BUG(); + break; + } +} + +/* + * This routine handles page faults. It determines the address, + * and the problem, and then passes it off to one of the appropriate + * routines. + * + * interruption code (int_code): + * 04 Protection -> Write-Protection (suprression) + * 10 Segment translation -> Not present (nullification) + * 11 Page translation -> Not present (nullification) + * 3b Region third trans. -> Not present (nullification) + */ +static inline int do_exception(struct pt_regs *regs, int access) +{ + struct task_struct *tsk; + struct mm_struct *mm; + struct vm_area_struct *vma; + unsigned long trans_exc_code; + unsigned long address; + unsigned int flags; + int fault; + + if (notify_page_fault(regs)) + return 0; + + tsk = current; + mm = tsk->mm; + trans_exc_code = regs->int_parm_long; + + /* + * Verify that the fault happened in user space, that + * we are not in an interrupt and that there is a + * user context. + */ + fault = VM_FAULT_BADCONTEXT; + if (unlikely(!user_space_fault(trans_exc_code) || in_atomic() || !mm)) + goto out; + + address = trans_exc_code & __FAIL_ADDR_MASK; + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); + flags = FAULT_FLAG_ALLOW_RETRY; + if (access == VM_WRITE || (trans_exc_code & store_indication) == 0x400) + flags |= FAULT_FLAG_WRITE; + down_read(&mm->mmap_sem); + +#ifdef CONFIG_PGSTE + if (test_tsk_thread_flag(current, TIF_SIE) && S390_lowcore.gmap) { + address = __gmap_fault(address, + (struct gmap *) S390_lowcore.gmap); + if (address == -EFAULT) { + fault = VM_FAULT_BADMAP; + goto out_up; + } + if (address == -ENOMEM) { + fault = VM_FAULT_OOM; + goto out_up; + } + } +#endif + +retry: + fault = VM_FAULT_BADMAP; + vma = find_vma(mm, address); + if (!vma) + goto out_up; + + if (unlikely(vma->vm_start > address)) { + if (!(vma->vm_flags & VM_GROWSDOWN)) + goto out_up; + if (expand_stack(vma, address)) + goto out_up; + } + + /* + * Ok, we have a good vm_area for this memory access, so + * we can handle it.. + */ + fault = VM_FAULT_BADACCESS; + if (unlikely(!(vma->vm_flags & access))) + goto out_up; + + if (is_vm_hugetlb_page(vma)) + address &= HPAGE_MASK; + /* + * If for any reason at all we couldn't handle the fault, + * make sure we exit gracefully rather than endlessly redo + * the fault. + */ + fault = handle_mm_fault(mm, vma, address, flags); + if (unlikely(fault & VM_FAULT_ERROR)) + goto out_up; + + /* + * Major/minor page fault accounting is only done on the + * initial attempt. If we go through a retry, it is extremely + * likely that the page will be found in page cache at that point. + */ + if (flags & FAULT_FLAG_ALLOW_RETRY) { + if (fault & VM_FAULT_MAJOR) { + tsk->maj_flt++; + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, + regs, address); + } else { + tsk->min_flt++; + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, + regs, address); + } + if (fault & VM_FAULT_RETRY) { + /* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk + * of starvation. */ + flags &= ~FAULT_FLAG_ALLOW_RETRY; + down_read(&mm->mmap_sem); + goto retry; + } + } + /* + * The instruction that caused the program check will + * be repeated. Don't signal single step via SIGTRAP. + */ + clear_tsk_thread_flag(tsk, TIF_PER_TRAP); + fault = 0; +out_up: + up_read(&mm->mmap_sem); +out: + return fault; +} + +void __kprobes do_protection_exception(struct pt_regs *regs) +{ + unsigned long trans_exc_code; + int fault; + + trans_exc_code = regs->int_parm_long; + /* Protection exception is suppressing, decrement psw address. */ + regs->psw.addr = __rewind_psw(regs->psw, regs->int_code >> 16); + /* + * Check for low-address protection. This needs to be treated + * as a special case because the translation exception code + * field is not guaranteed to contain valid data in this case. + */ + if (unlikely(!(trans_exc_code & 4))) { + do_low_address(regs); + return; + } + fault = do_exception(regs, VM_WRITE); + if (unlikely(fault)) + do_fault_error(regs, fault); +} + +void __kprobes do_dat_exception(struct pt_regs *regs) +{ + int access, fault; + + access = VM_READ | VM_EXEC | VM_WRITE; + fault = do_exception(regs, access); + if (unlikely(fault)) + do_fault_error(regs, fault); +} + +#ifdef CONFIG_64BIT +void __kprobes do_asce_exception(struct pt_regs *regs) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + unsigned long trans_exc_code; + + trans_exc_code = regs->int_parm_long; + if (unlikely(!user_space_fault(trans_exc_code) || in_atomic() || !mm)) + goto no_context; + + down_read(&mm->mmap_sem); + vma = find_vma(mm, trans_exc_code & __FAIL_ADDR_MASK); + up_read(&mm->mmap_sem); + + if (vma) { + update_mm(mm, current); + return; + } + + /* User mode accesses just cause a SIGSEGV */ + if (regs->psw.mask & PSW_MASK_PSTATE) { + do_sigsegv(regs, SEGV_MAPERR); + return; + } + +no_context: + do_no_context(regs); +} +#endif + +int __handle_fault(unsigned long uaddr, unsigned long pgm_int_code, int write) +{ + struct pt_regs regs; + int access, fault; + + regs.psw.mask = psw_kernel_bits | PSW_MASK_DAT | PSW_MASK_MCHECK; + if (!irqs_disabled()) + regs.psw.mask |= PSW_MASK_IO | PSW_MASK_EXT; + regs.psw.addr = (unsigned long) __builtin_return_address(0); + regs.psw.addr |= PSW_ADDR_AMODE; + regs.int_code = pgm_int_code; + regs.int_parm_long = (uaddr & PAGE_MASK) | 2; + access = write ? VM_WRITE : VM_READ; + fault = do_exception(®s, access); + if (unlikely(fault)) { + if (fault & VM_FAULT_OOM) + return -EFAULT; + else if (fault & VM_FAULT_SIGBUS) + do_sigbus(®s); + } + return fault ? -EFAULT : 0; +} + +#ifdef CONFIG_PFAULT +/* + * 'pfault' pseudo page faults routines. + */ +static int pfault_disable; + +static int __init nopfault(char *str) +{ + pfault_disable = 1; + return 1; +} + +__setup("nopfault", nopfault); + +struct pfault_refbk { + u16 refdiagc; + u16 reffcode; + u16 refdwlen; + u16 refversn; + u64 refgaddr; + u64 refselmk; + u64 refcmpmk; + u64 reserved; +} __attribute__ ((packed, aligned(8))); + +int pfault_init(void) +{ + struct pfault_refbk refbk = { + .refdiagc = 0x258, + .reffcode = 0, + .refdwlen = 5, + .refversn = 2, + .refgaddr = __LC_CURRENT_PID, + .refselmk = 1ULL << 48, + .refcmpmk = 1ULL << 48, + .reserved = __PF_RES_FIELD }; + int rc; + + if (pfault_disable) + return -1; + asm volatile( + " diag %1,%0,0x258\n" + "0: j 2f\n" + "1: la %0,8\n" + "2:\n" + EX_TABLE(0b,1b) + : "=d" (rc) : "a" (&refbk), "m" (refbk) : "cc"); + return rc; +} + +void pfault_fini(void) +{ + struct pfault_refbk refbk = { + .refdiagc = 0x258, + .reffcode = 1, + .refdwlen = 5, + .refversn = 2, + }; + + if (pfault_disable) + return; + asm volatile( + " diag %0,0,0x258\n" + "0:\n" + EX_TABLE(0b,0b) + : : "a" (&refbk), "m" (refbk) : "cc"); +} + +static DEFINE_SPINLOCK(pfault_lock); +static LIST_HEAD(pfault_list); + +static void pfault_interrupt(struct ext_code ext_code, + unsigned int param32, unsigned long param64) +{ + struct task_struct *tsk; + __u16 subcode; + pid_t pid; + + /* + * Get the external interruption subcode & pfault + * initial/completion signal bit. VM stores this + * in the 'cpu address' field associated with the + * external interrupt. + */ + subcode = ext_code.subcode; + if ((subcode & 0xff00) != __SUBCODE_MASK) + return; + kstat_cpu(smp_processor_id()).irqs[EXTINT_PFL]++; + if (subcode & 0x0080) { + /* Get the token (= pid of the affected task). */ + pid = sizeof(void *) == 4 ? param32 : param64; + rcu_read_lock(); + tsk = find_task_by_pid_ns(pid, &init_pid_ns); + if (tsk) + get_task_struct(tsk); + rcu_read_unlock(); + if (!tsk) + return; + } else { + tsk = current; + } + spin_lock(&pfault_lock); + if (subcode & 0x0080) { + /* signal bit is set -> a page has been swapped in by VM */ + if (tsk->thread.pfault_wait == 1) { + /* Initial interrupt was faster than the completion + * interrupt. pfault_wait is valid. Set pfault_wait + * back to zero and wake up the process. This can + * safely be done because the task is still sleeping + * and can't produce new pfaults. */ + tsk->thread.pfault_wait = 0; + list_del(&tsk->thread.list); + wake_up_process(tsk); + put_task_struct(tsk); + } else { + /* Completion interrupt was faster than initial + * interrupt. Set pfault_wait to -1 so the initial + * interrupt doesn't put the task to sleep. + * If the task is not running, ignore the completion + * interrupt since it must be a leftover of a PFAULT + * CANCEL operation which didn't remove all pending + * completion interrupts. */ + if (tsk->state == TASK_RUNNING) + tsk->thread.pfault_wait = -1; + } + put_task_struct(tsk); + } else { + /* signal bit not set -> a real page is missing. */ + if (tsk->thread.pfault_wait == 1) { + /* Already on the list with a reference: put to sleep */ + set_task_state(tsk, TASK_UNINTERRUPTIBLE); + set_tsk_need_resched(tsk); + } else if (tsk->thread.pfault_wait == -1) { + /* Completion interrupt was faster than the initial + * interrupt (pfault_wait == -1). Set pfault_wait + * back to zero and exit. */ + tsk->thread.pfault_wait = 0; + } else { + /* Initial interrupt arrived before completion + * interrupt. Let the task sleep. + * An extra task reference is needed since a different + * cpu may set the task state to TASK_RUNNING again + * before the scheduler is reached. */ + get_task_struct(tsk); + tsk->thread.pfault_wait = 1; + list_add(&tsk->thread.list, &pfault_list); + set_task_state(tsk, TASK_UNINTERRUPTIBLE); + set_tsk_need_resched(tsk); + } + } + spin_unlock(&pfault_lock); +} + +static int __cpuinit pfault_cpu_notify(struct notifier_block *self, + unsigned long action, void *hcpu) +{ + struct thread_struct *thread, *next; + struct task_struct *tsk; + + switch (action) { + case CPU_DEAD: + case CPU_DEAD_FROZEN: + spin_lock_irq(&pfault_lock); + list_for_each_entry_safe(thread, next, &pfault_list, list) { + thread->pfault_wait = 0; + list_del(&thread->list); + tsk = container_of(thread, struct task_struct, thread); + wake_up_process(tsk); + put_task_struct(tsk); + } + spin_unlock_irq(&pfault_lock); + break; + default: + break; + } + return NOTIFY_OK; +} + +static int __init pfault_irq_init(void) +{ + int rc; + + rc = register_external_interrupt(0x2603, pfault_interrupt); + if (rc) + goto out_extint; + rc = pfault_init() == 0 ? 0 : -EOPNOTSUPP; + if (rc) + goto out_pfault; + service_subclass_irq_register(); + hotcpu_notifier(pfault_cpu_notify, 0); + return 0; + +out_pfault: + unregister_external_interrupt(0x2603, pfault_interrupt); +out_extint: + pfault_disable = 1; + return rc; +} +early_initcall(pfault_irq_init); + +#endif /* CONFIG_PFAULT */ diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c new file mode 100644 index 00000000..65cb06e2 --- /dev/null +++ b/arch/s390/mm/gup.c @@ -0,0 +1,236 @@ +/* + * Lockless get_user_pages_fast for s390 + * + * Copyright IBM Corp. 2010 + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> + */ +#include <linux/sched.h> +#include <linux/mm.h> +#include <linux/hugetlb.h> +#include <linux/vmstat.h> +#include <linux/pagemap.h> +#include <linux/rwsem.h> +#include <asm/pgtable.h> + +/* + * The performance critical leaf functions are made noinline otherwise gcc + * inlines everything into a single function which results in too much + * register pressure. + */ +static inline int gup_pte_range(pmd_t *pmdp, pmd_t pmd, unsigned long addr, + unsigned long end, int write, struct page **pages, int *nr) +{ + unsigned long mask; + pte_t *ptep, pte; + struct page *page; + + mask = (write ? _PAGE_RO : 0) | _PAGE_INVALID | _PAGE_SPECIAL; + + ptep = ((pte_t *) pmd_deref(pmd)) + pte_index(addr); + do { + pte = *ptep; + barrier(); + if ((pte_val(pte) & mask) != 0) + return 0; + VM_BUG_ON(!pfn_valid(pte_pfn(pte))); + page = pte_page(pte); + if (!page_cache_get_speculative(page)) + return 0; + if (unlikely(pte_val(pte) != pte_val(*ptep))) { + put_page(page); + return 0; + } + pages[*nr] = page; + (*nr)++; + + } while (ptep++, addr += PAGE_SIZE, addr != end); + + return 1; +} + +static inline int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr, + unsigned long end, int write, struct page **pages, int *nr) +{ + unsigned long mask, result; + struct page *head, *page, *tail; + int refs; + + result = write ? 0 : _SEGMENT_ENTRY_RO; + mask = result | _SEGMENT_ENTRY_INV; + if ((pmd_val(pmd) & mask) != result) + return 0; + VM_BUG_ON(!pfn_valid(pmd_val(pmd) >> PAGE_SHIFT)); + + refs = 0; + head = pmd_page(pmd); + page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT); + tail = page; + do { + VM_BUG_ON(compound_head(page) != head); + pages[*nr] = page; + (*nr)++; + page++; + refs++; + } while (addr += PAGE_SIZE, addr != end); + + if (!page_cache_add_speculative(head, refs)) { + *nr -= refs; + return 0; + } + + if (unlikely(pmd_val(pmd) != pmd_val(*pmdp))) { + *nr -= refs; + while (refs--) + put_page(head); + return 0; + } + + /* + * Any tail page need their mapcount reference taken before we + * return. + */ + while (refs--) { + if (PageTail(tail)) + get_huge_page_tail(tail); + tail++; + } + + return 1; +} + + +static inline int gup_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr, + unsigned long end, int write, struct page **pages, int *nr) +{ + unsigned long next; + pmd_t *pmdp, pmd; + + pmdp = (pmd_t *) pudp; +#ifdef CONFIG_64BIT + if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) + pmdp = (pmd_t *) pud_deref(pud); + pmdp += pmd_index(addr); +#endif + do { + pmd = *pmdp; + barrier(); + next = pmd_addr_end(addr, end); + if (pmd_none(pmd)) + return 0; + if (unlikely(pmd_huge(pmd))) { + if (!gup_huge_pmd(pmdp, pmd, addr, next, + write, pages, nr)) + return 0; + } else if (!gup_pte_range(pmdp, pmd, addr, next, + write, pages, nr)) + return 0; + } while (pmdp++, addr = next, addr != end); + + return 1; +} + +static inline int gup_pud_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr, + unsigned long end, int write, struct page **pages, int *nr) +{ + unsigned long next; + pud_t *pudp, pud; + + pudp = (pud_t *) pgdp; +#ifdef CONFIG_64BIT + if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2) + pudp = (pud_t *) pgd_deref(pgd); + pudp += pud_index(addr); +#endif + do { + pud = *pudp; + barrier(); + next = pud_addr_end(addr, end); + if (pud_none(pud)) + return 0; + if (!gup_pmd_range(pudp, pud, addr, next, write, pages, nr)) + return 0; + } while (pudp++, addr = next, addr != end); + + return 1; +} + +/** + * get_user_pages_fast() - pin user pages in memory + * @start: starting user address + * @nr_pages: number of pages from start to pin + * @write: whether pages will be written to + * @pages: array that receives pointers to the pages pinned. + * Should be at least nr_pages long. + * + * Attempt to pin user pages in memory without taking mm->mmap_sem. + * If not successful, it will fall back to taking the lock and + * calling get_user_pages(). + * + * Returns number of pages pinned. This may be fewer than the number + * requested. If nr_pages is 0 or negative, returns 0. If no pages + * were pinned, returns -errno. + */ +int get_user_pages_fast(unsigned long start, int nr_pages, int write, + struct page **pages) +{ + struct mm_struct *mm = current->mm; + unsigned long addr, len, end; + unsigned long next; + pgd_t *pgdp, pgd; + int nr = 0; + + start &= PAGE_MASK; + addr = start; + len = (unsigned long) nr_pages << PAGE_SHIFT; + end = start + len; + if (end < start) + goto slow_irqon; + + /* + * local_irq_disable() doesn't prevent pagetable teardown, but does + * prevent the pagetables from being freed on s390. + * + * So long as we atomically load page table pointers versus teardown, + * we can follow the address down to the the page and take a ref on it. + */ + local_irq_disable(); + pgdp = pgd_offset(mm, addr); + do { + pgd = *pgdp; + barrier(); + next = pgd_addr_end(addr, end); + if (pgd_none(pgd)) + goto slow; + if (!gup_pud_range(pgdp, pgd, addr, next, write, pages, &nr)) + goto slow; + } while (pgdp++, addr = next, addr != end); + local_irq_enable(); + + VM_BUG_ON(nr != (end - start) >> PAGE_SHIFT); + return nr; + + { + int ret; +slow: + local_irq_enable(); +slow_irqon: + /* Try to get the remaining pages with get_user_pages */ + start += nr << PAGE_SHIFT; + pages += nr; + + down_read(&mm->mmap_sem); + ret = get_user_pages(current, mm, start, + (end - start) >> PAGE_SHIFT, write, 0, pages, NULL); + up_read(&mm->mmap_sem); + + /* Have to be a bit careful with return values */ + if (nr > 0) { + if (ret < 0) + ret = nr; + else + ret += nr; + } + + return ret; + } +} diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c new file mode 100644 index 00000000..597bb2d2 --- /dev/null +++ b/arch/s390/mm/hugetlbpage.c @@ -0,0 +1,130 @@ +/* + * IBM System z Huge TLB Page Support for Kernel. + * + * Copyright 2007 IBM Corp. + * Author(s): Gerald Schaefer <gerald.schaefer@de.ibm.com> + */ + +#include <linux/mm.h> +#include <linux/hugetlb.h> + + +void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *pteptr, pte_t pteval) +{ + pmd_t *pmdp = (pmd_t *) pteptr; + unsigned long mask; + + if (!MACHINE_HAS_HPAGE) { + pteptr = (pte_t *) pte_page(pteval)[1].index; + mask = pte_val(pteval) & + (_SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO); + pte_val(pteval) = (_SEGMENT_ENTRY + __pa(pteptr)) | mask; + } + + pmd_val(*pmdp) = pte_val(pteval); +} + +int arch_prepare_hugepage(struct page *page) +{ + unsigned long addr = page_to_phys(page); + pte_t pte; + pte_t *ptep; + int i; + + if (MACHINE_HAS_HPAGE) + return 0; + + ptep = (pte_t *) pte_alloc_one(&init_mm, addr); + if (!ptep) + return -ENOMEM; + + pte = mk_pte(page, PAGE_RW); + for (i = 0; i < PTRS_PER_PTE; i++) { + set_pte_at(&init_mm, addr + i * PAGE_SIZE, ptep + i, pte); + pte_val(pte) += PAGE_SIZE; + } + page[1].index = (unsigned long) ptep; + return 0; +} + +void arch_release_hugepage(struct page *page) +{ + pte_t *ptep; + + if (MACHINE_HAS_HPAGE) + return; + + ptep = (pte_t *) page[1].index; + if (!ptep) + return; + page_table_free(&init_mm, (unsigned long *) ptep); + page[1].index = 0; +} + +pte_t *huge_pte_alloc(struct mm_struct *mm, + unsigned long addr, unsigned long sz) +{ + pgd_t *pgdp; + pud_t *pudp; + pmd_t *pmdp = NULL; + + pgdp = pgd_offset(mm, addr); + pudp = pud_alloc(mm, pgdp, addr); + if (pudp) + pmdp = pmd_alloc(mm, pudp, addr); + return (pte_t *) pmdp; +} + +pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) +{ + pgd_t *pgdp; + pud_t *pudp; + pmd_t *pmdp = NULL; + + pgdp = pgd_offset(mm, addr); + if (pgd_present(*pgdp)) { + pudp = pud_offset(pgdp, addr); + if (pud_present(*pudp)) + pmdp = pmd_offset(pudp, addr); + } + return (pte_t *) pmdp; +} + +int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) +{ + return 0; +} + +struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address, + int write) +{ + return ERR_PTR(-EINVAL); +} + +int pmd_huge(pmd_t pmd) +{ + if (!MACHINE_HAS_HPAGE) + return 0; + + return !!(pmd_val(pmd) & _SEGMENT_ENTRY_LARGE); +} + +int pud_huge(pud_t pud) +{ + return 0; +} + +struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, + pmd_t *pmdp, int write) +{ + struct page *page; + + if (!MACHINE_HAS_HPAGE) + return NULL; + + page = pmd_page(*pmdp); + if (page) + page += ((address & ~HPAGE_MASK) >> PAGE_SHIFT); + return page; +} diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c new file mode 100644 index 00000000..2bea0605 --- /dev/null +++ b/arch/s390/mm/init.c @@ -0,0 +1,262 @@ +/* + * arch/s390/mm/init.c + * + * S390 version + * Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Author(s): Hartmut Penner (hp@de.ibm.com) + * + * Derived from "arch/i386/mm/init.c" + * Copyright (C) 1995 Linus Torvalds + */ + +#include <linux/signal.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/types.h> +#include <linux/ptrace.h> +#include <linux/mman.h> +#include <linux/mm.h> +#include <linux/swap.h> +#include <linux/smp.h> +#include <linux/init.h> +#include <linux/pagemap.h> +#include <linux/bootmem.h> +#include <linux/pfn.h> +#include <linux/poison.h> +#include <linux/initrd.h> +#include <linux/export.h> +#include <linux/gfp.h> +#include <asm/processor.h> +#include <asm/uaccess.h> +#include <asm/pgtable.h> +#include <asm/pgalloc.h> +#include <asm/dma.h> +#include <asm/lowcore.h> +#include <asm/tlb.h> +#include <asm/tlbflush.h> +#include <asm/sections.h> +#include <asm/ctl_reg.h> + +pgd_t swapper_pg_dir[PTRS_PER_PGD] __attribute__((__aligned__(PAGE_SIZE))); + +unsigned long empty_zero_page, zero_page_mask; +EXPORT_SYMBOL(empty_zero_page); + +static unsigned long setup_zero_pages(void) +{ + struct cpuid cpu_id; + unsigned int order; + unsigned long size; + struct page *page; + int i; + + get_cpu_id(&cpu_id); + switch (cpu_id.machine) { + case 0x9672: /* g5 */ + case 0x2064: /* z900 */ + case 0x2066: /* z900 */ + case 0x2084: /* z990 */ + case 0x2086: /* z990 */ + case 0x2094: /* z9-109 */ + case 0x2096: /* z9-109 */ + order = 0; + break; + case 0x2097: /* z10 */ + case 0x2098: /* z10 */ + default: + order = 2; + break; + } + + empty_zero_page = __get_free_pages(GFP_KERNEL | __GFP_ZERO, order); + if (!empty_zero_page) + panic("Out of memory in setup_zero_pages"); + + page = virt_to_page((void *) empty_zero_page); + split_page(page, order); + for (i = 1 << order; i > 0; i--) { + SetPageReserved(page); + page++; + } + + size = PAGE_SIZE << order; + zero_page_mask = (size - 1) & PAGE_MASK; + + return 1UL << order; +} + +/* + * paging_init() sets up the page tables + */ +void __init paging_init(void) +{ + unsigned long max_zone_pfns[MAX_NR_ZONES]; + unsigned long pgd_type, asce_bits; + + init_mm.pgd = swapper_pg_dir; +#ifdef CONFIG_64BIT + if (VMALLOC_END > (1UL << 42)) { + asce_bits = _ASCE_TYPE_REGION2 | _ASCE_TABLE_LENGTH; + pgd_type = _REGION2_ENTRY_EMPTY; + } else { + asce_bits = _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH; + pgd_type = _REGION3_ENTRY_EMPTY; + } +#else + asce_bits = _ASCE_TABLE_LENGTH; + pgd_type = _SEGMENT_ENTRY_EMPTY; +#endif + S390_lowcore.kernel_asce = (__pa(init_mm.pgd) & PAGE_MASK) | asce_bits; + clear_table((unsigned long *) init_mm.pgd, pgd_type, + sizeof(unsigned long)*2048); + vmem_map_init(); + + /* enable virtual mapping in kernel mode */ + __ctl_load(S390_lowcore.kernel_asce, 1, 1); + __ctl_load(S390_lowcore.kernel_asce, 7, 7); + __ctl_load(S390_lowcore.kernel_asce, 13, 13); + arch_local_irq_restore(4UL << (BITS_PER_LONG - 8)); + + atomic_set(&init_mm.context.attach_count, 1); + + sparse_memory_present_with_active_regions(MAX_NUMNODES); + sparse_init(); + memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); + max_zone_pfns[ZONE_DMA] = PFN_DOWN(MAX_DMA_ADDRESS); + max_zone_pfns[ZONE_NORMAL] = max_low_pfn; + free_area_init_nodes(max_zone_pfns); + fault_init(); +} + +void __init mem_init(void) +{ + unsigned long codesize, reservedpages, datasize, initsize; + + max_mapnr = num_physpages = max_low_pfn; + high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); + + /* Setup guest page hinting */ + cmma_init(); + + /* this will put all low memory onto the freelists */ + totalram_pages += free_all_bootmem(); + totalram_pages -= setup_zero_pages(); /* Setup zeroed pages. */ + + reservedpages = 0; + + codesize = (unsigned long) &_etext - (unsigned long) &_text; + datasize = (unsigned long) &_edata - (unsigned long) &_etext; + initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin; + printk("Memory: %luk/%luk available (%ldk kernel code, %ldk reserved, %ldk data, %ldk init)\n", + nr_free_pages() << (PAGE_SHIFT-10), + max_mapnr << (PAGE_SHIFT-10), + codesize >> 10, + reservedpages << (PAGE_SHIFT-10), + datasize >>10, + initsize >> 10); + printk("Write protected kernel read-only data: %#lx - %#lx\n", + (unsigned long)&_stext, + PFN_ALIGN((unsigned long)&_eshared) - 1); +} + +#ifdef CONFIG_DEBUG_PAGEALLOC +void kernel_map_pages(struct page *page, int numpages, int enable) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + unsigned long address; + int i; + + for (i = 0; i < numpages; i++) { + address = page_to_phys(page + i); + pgd = pgd_offset_k(address); + pud = pud_offset(pgd, address); + pmd = pmd_offset(pud, address); + pte = pte_offset_kernel(pmd, address); + if (!enable) { + __ptep_ipte(address, pte); + pte_val(*pte) = _PAGE_TYPE_EMPTY; + continue; + } + *pte = mk_pte_phys(address, __pgprot(_PAGE_TYPE_RW)); + /* Flush cpu write queue. */ + mb(); + } +} +#endif + +void free_init_pages(char *what, unsigned long begin, unsigned long end) +{ + unsigned long addr = begin; + + if (begin >= end) + return; + for (; addr < end; addr += PAGE_SIZE) { + ClearPageReserved(virt_to_page(addr)); + init_page_count(virt_to_page(addr)); + memset((void *)(addr & PAGE_MASK), POISON_FREE_INITMEM, + PAGE_SIZE); + free_page(addr); + totalram_pages++; + } + printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10); +} + +void free_initmem(void) +{ + free_init_pages("unused kernel memory", + (unsigned long)&__init_begin, + (unsigned long)&__init_end); +} + +#ifdef CONFIG_BLK_DEV_INITRD +void free_initrd_mem(unsigned long start, unsigned long end) +{ + free_init_pages("initrd memory", start, end); +} +#endif + +#ifdef CONFIG_MEMORY_HOTPLUG +int arch_add_memory(int nid, u64 start, u64 size) +{ + unsigned long zone_start_pfn, zone_end_pfn, nr_pages; + unsigned long start_pfn = PFN_DOWN(start); + unsigned long size_pages = PFN_DOWN(size); + struct zone *zone; + int rc; + + rc = vmem_add_mapping(start, size); + if (rc) + return rc; + for_each_zone(zone) { + if (zone_idx(zone) != ZONE_MOVABLE) { + /* Add range within existing zone limits */ + zone_start_pfn = zone->zone_start_pfn; + zone_end_pfn = zone->zone_start_pfn + + zone->spanned_pages; + } else { + /* Add remaining range to ZONE_MOVABLE */ + zone_start_pfn = start_pfn; + zone_end_pfn = start_pfn + size_pages; + } + if (start_pfn < zone_start_pfn || start_pfn >= zone_end_pfn) + continue; + nr_pages = (start_pfn + size_pages > zone_end_pfn) ? + zone_end_pfn - start_pfn : size_pages; + rc = __add_pages(nid, zone, start_pfn, nr_pages); + if (rc) + break; + start_pfn += nr_pages; + size_pages -= nr_pages; + if (!size_pages) + break; + } + if (rc) + vmem_remove_mapping(start, size); + return rc; +} +#endif /* CONFIG_MEMORY_HOTPLUG */ diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c new file mode 100644 index 00000000..e1335dc2 --- /dev/null +++ b/arch/s390/mm/maccess.c @@ -0,0 +1,168 @@ +/* + * Access kernel memory without faulting -- s390 specific implementation. + * + * Copyright IBM Corp. 2009 + * + * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>, + * + */ + +#include <linux/uaccess.h> +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/errno.h> +#include <linux/gfp.h> +#include <asm/ctl_reg.h> + +/* + * This function writes to kernel memory bypassing DAT and possible + * write protection. It copies one to four bytes from src to dst + * using the stura instruction. + * Returns the number of bytes copied or -EFAULT. + */ +static long probe_kernel_write_odd(void *dst, const void *src, size_t size) +{ + unsigned long count, aligned; + int offset, mask; + int rc = -EFAULT; + + aligned = (unsigned long) dst & ~3UL; + offset = (unsigned long) dst & 3; + count = min_t(unsigned long, 4 - offset, size); + mask = (0xf << (4 - count)) & 0xf; + mask >>= offset; + asm volatile( + " bras 1,0f\n" + " icm 0,0,0(%3)\n" + "0: l 0,0(%1)\n" + " lra %1,0(%1)\n" + "1: ex %2,0(1)\n" + "2: stura 0,%1\n" + " la %0,0\n" + "3:\n" + EX_TABLE(0b,3b) EX_TABLE(1b,3b) EX_TABLE(2b,3b) + : "+d" (rc), "+a" (aligned) + : "a" (mask), "a" (src) : "cc", "memory", "0", "1"); + return rc ? rc : count; +} + +long probe_kernel_write(void *dst, const void *src, size_t size) +{ + long copied = 0; + + while (size) { + copied = probe_kernel_write_odd(dst, src, size); + if (copied < 0) + break; + dst += copied; + src += copied; + size -= copied; + } + return copied < 0 ? -EFAULT : 0; +} + +static int __memcpy_real(void *dest, void *src, size_t count) +{ + register unsigned long _dest asm("2") = (unsigned long) dest; + register unsigned long _len1 asm("3") = (unsigned long) count; + register unsigned long _src asm("4") = (unsigned long) src; + register unsigned long _len2 asm("5") = (unsigned long) count; + int rc = -EFAULT; + + asm volatile ( + "0: mvcle %1,%2,0x0\n" + "1: jo 0b\n" + " lhi %0,0x0\n" + "2:\n" + EX_TABLE(1b,2b) + : "+d" (rc), "+d" (_dest), "+d" (_src), "+d" (_len1), + "+d" (_len2), "=m" (*((long *) dest)) + : "m" (*((long *) src)) + : "cc", "memory"); + return rc; +} + +/* + * Copy memory in real mode (kernel to kernel) + */ +int memcpy_real(void *dest, void *src, size_t count) +{ + unsigned long flags; + int rc; + + if (!count) + return 0; + local_irq_save(flags); + __arch_local_irq_stnsm(0xfbUL); + rc = __memcpy_real(dest, src, count); + local_irq_restore(flags); + return rc; +} + +/* + * Copy memory to absolute zero + */ +void copy_to_absolute_zero(void *dest, void *src, size_t count) +{ + unsigned long cr0; + + BUG_ON((unsigned long) dest + count >= sizeof(struct _lowcore)); + preempt_disable(); + __ctl_store(cr0, 0, 0); + __ctl_clear_bit(0, 28); /* disable lowcore protection */ + memcpy_real(dest + store_prefix(), src, count); + __ctl_load(cr0, 0, 0); + preempt_enable(); +} + +/* + * Copy memory from kernel (real) to user (virtual) + */ +int copy_to_user_real(void __user *dest, void *src, size_t count) +{ + int offs = 0, size, rc; + char *buf; + + buf = (char *) __get_free_page(GFP_KERNEL); + if (!buf) + return -ENOMEM; + rc = -EFAULT; + while (offs < count) { + size = min(PAGE_SIZE, count - offs); + if (memcpy_real(buf, src + offs, size)) + goto out; + if (copy_to_user(dest + offs, buf, size)) + goto out; + offs += size; + } + rc = 0; +out: + free_page((unsigned long) buf); + return rc; +} + +/* + * Copy memory from user (virtual) to kernel (real) + */ +int copy_from_user_real(void *dest, void __user *src, size_t count) +{ + int offs = 0, size, rc; + char *buf; + + buf = (char *) __get_free_page(GFP_KERNEL); + if (!buf) + return -ENOMEM; + rc = -EFAULT; + while (offs < count) { + size = min(PAGE_SIZE, count - offs); + if (copy_from_user(buf, src + offs, size)) + goto out; + if (memcpy_real(dest + offs, buf, size)) + goto out; + offs += size; + } + rc = 0; +out: + free_page((unsigned long) buf); + return rc; +} diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c new file mode 100644 index 00000000..2857c484 --- /dev/null +++ b/arch/s390/mm/mmap.c @@ -0,0 +1,178 @@ +/* + * linux/arch/s390/mm/mmap.c + * + * flexible mmap layout support + * + * Copyright 2003-2004 Red Hat Inc., Durham, North Carolina. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * + * Started by Ingo Molnar <mingo@elte.hu> + */ + +#include <linux/personality.h> +#include <linux/mm.h> +#include <linux/mman.h> +#include <linux/module.h> +#include <linux/random.h> +#include <linux/compat.h> +#include <asm/pgalloc.h> + +static unsigned long stack_maxrandom_size(void) +{ + if (!(current->flags & PF_RANDOMIZE)) + return 0; + if (current->personality & ADDR_NO_RANDOMIZE) + return 0; + return STACK_RND_MASK << PAGE_SHIFT; +} + +/* + * Top of mmap area (just below the process stack). + * + * Leave at least a ~32 MB hole. + */ +#define MIN_GAP (32*1024*1024) +#define MAX_GAP (STACK_TOP/6*5) + +static inline int mmap_is_legacy(void) +{ + if (current->personality & ADDR_COMPAT_LAYOUT) + return 1; + if (rlimit(RLIMIT_STACK) == RLIM_INFINITY) + return 1; + return sysctl_legacy_va_layout; +} + +static unsigned long mmap_rnd(void) +{ + if (!(current->flags & PF_RANDOMIZE)) + return 0; + /* 8MB randomization for mmap_base */ + return (get_random_int() & 0x7ffUL) << PAGE_SHIFT; +} + +static inline unsigned long mmap_base(void) +{ + unsigned long gap = rlimit(RLIMIT_STACK); + + if (gap < MIN_GAP) + gap = MIN_GAP; + else if (gap > MAX_GAP) + gap = MAX_GAP; + gap &= PAGE_MASK; + return STACK_TOP - stack_maxrandom_size() - mmap_rnd() - gap; +} + +#ifndef CONFIG_64BIT + +/* + * This function, called very early during the creation of a new + * process VM image, sets up which VM layout function to use: + */ +void arch_pick_mmap_layout(struct mm_struct *mm) +{ + /* + * Fall back to the standard layout if the personality + * bit is set, or if the expected stack growth is unlimited: + */ + if (mmap_is_legacy()) { + mm->mmap_base = TASK_UNMAPPED_BASE; + mm->get_unmapped_area = arch_get_unmapped_area; + mm->unmap_area = arch_unmap_area; + } else { + mm->mmap_base = mmap_base(); + mm->get_unmapped_area = arch_get_unmapped_area_topdown; + mm->unmap_area = arch_unmap_area_topdown; + } +} + +#else + +int s390_mmap_check(unsigned long addr, unsigned long len) +{ + if (!is_compat_task() && + len >= TASK_SIZE && TASK_SIZE < (1UL << 53)) + return crst_table_upgrade(current->mm, 1UL << 53); + return 0; +} + +static unsigned long +s390_get_unmapped_area(struct file *filp, unsigned long addr, + unsigned long len, unsigned long pgoff, unsigned long flags) +{ + struct mm_struct *mm = current->mm; + unsigned long area; + int rc; + + area = arch_get_unmapped_area(filp, addr, len, pgoff, flags); + if (!(area & ~PAGE_MASK)) + return area; + if (area == -ENOMEM && !is_compat_task() && TASK_SIZE < (1UL << 53)) { + /* Upgrade the page table to 4 levels and retry. */ + rc = crst_table_upgrade(mm, 1UL << 53); + if (rc) + return (unsigned long) rc; + area = arch_get_unmapped_area(filp, addr, len, pgoff, flags); + } + return area; +} + +static unsigned long +s390_get_unmapped_area_topdown(struct file *filp, const unsigned long addr, + const unsigned long len, const unsigned long pgoff, + const unsigned long flags) +{ + struct mm_struct *mm = current->mm; + unsigned long area; + int rc; + + area = arch_get_unmapped_area_topdown(filp, addr, len, pgoff, flags); + if (!(area & ~PAGE_MASK)) + return area; + if (area == -ENOMEM && !is_compat_task() && TASK_SIZE < (1UL << 53)) { + /* Upgrade the page table to 4 levels and retry. */ + rc = crst_table_upgrade(mm, 1UL << 53); + if (rc) + return (unsigned long) rc; + area = arch_get_unmapped_area_topdown(filp, addr, len, + pgoff, flags); + } + return area; +} +/* + * This function, called very early during the creation of a new + * process VM image, sets up which VM layout function to use: + */ +void arch_pick_mmap_layout(struct mm_struct *mm) +{ + /* + * Fall back to the standard layout if the personality + * bit is set, or if the expected stack growth is unlimited: + */ + if (mmap_is_legacy()) { + mm->mmap_base = TASK_UNMAPPED_BASE; + mm->get_unmapped_area = s390_get_unmapped_area; + mm->unmap_area = arch_unmap_area; + } else { + mm->mmap_base = mmap_base(); + mm->get_unmapped_area = s390_get_unmapped_area_topdown; + mm->unmap_area = arch_unmap_area_topdown; + } +} + +#endif diff --git a/arch/s390/mm/page-states.c b/arch/s390/mm/page-states.c new file mode 100644 index 00000000..a90d45e9 --- /dev/null +++ b/arch/s390/mm/page-states.c @@ -0,0 +1,114 @@ +/* + * Copyright IBM Corp. 2008 + * + * Guest page hinting for unused pages. + * + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> + */ + +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/mm.h> +#include <linux/gfp.h> +#include <linux/init.h> + +#define ESSA_SET_STABLE 1 +#define ESSA_SET_UNUSED 2 + +static int cmma_flag = 1; + +static int __init cmma(char *str) +{ + char *parm; + + parm = strstrip(str); + if (strcmp(parm, "yes") == 0 || strcmp(parm, "on") == 0) { + cmma_flag = 1; + return 1; + } + cmma_flag = 0; + if (strcmp(parm, "no") == 0 || strcmp(parm, "off") == 0) + return 1; + return 0; +} +__setup("cmma=", cmma); + +void __init cmma_init(void) +{ + register unsigned long tmp asm("0") = 0; + register int rc asm("1") = -EOPNOTSUPP; + + if (!cmma_flag) + return; + asm volatile( + " .insn rrf,0xb9ab0000,%1,%1,0,0\n" + "0: la %0,0\n" + "1:\n" + EX_TABLE(0b,1b) + : "+&d" (rc), "+&d" (tmp)); + if (rc) + cmma_flag = 0; +} + +static inline void set_page_unstable(struct page *page, int order) +{ + int i, rc; + + for (i = 0; i < (1 << order); i++) + asm volatile(".insn rrf,0xb9ab0000,%0,%1,%2,0" + : "=&d" (rc) + : "a" (page_to_phys(page + i)), + "i" (ESSA_SET_UNUSED)); +} + +void arch_free_page(struct page *page, int order) +{ + if (!cmma_flag) + return; + set_page_unstable(page, order); +} + +static inline void set_page_stable(struct page *page, int order) +{ + int i, rc; + + for (i = 0; i < (1 << order); i++) + asm volatile(".insn rrf,0xb9ab0000,%0,%1,%2,0" + : "=&d" (rc) + : "a" (page_to_phys(page + i)), + "i" (ESSA_SET_STABLE)); +} + +void arch_alloc_page(struct page *page, int order) +{ + if (!cmma_flag) + return; + set_page_stable(page, order); +} + +void arch_set_page_states(int make_stable) +{ + unsigned long flags, order, t; + struct list_head *l; + struct page *page; + struct zone *zone; + + if (!cmma_flag) + return; + if (make_stable) + drain_local_pages(NULL); + for_each_populated_zone(zone) { + spin_lock_irqsave(&zone->lock, flags); + for_each_migratetype_order(order, t) { + list_for_each(l, &zone->free_area[order].free_list[t]) { + page = list_entry(l, struct page, lru); + if (make_stable) + set_page_stable(page, order); + else + set_page_unstable(page, order); + } + } + spin_unlock_irqrestore(&zone->lock, flags); + } +} diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c new file mode 100644 index 00000000..b36537a5 --- /dev/null +++ b/arch/s390/mm/pageattr.c @@ -0,0 +1,62 @@ +/* + * Copyright IBM Corp. 2011 + * Author(s): Jan Glauber <jang@linux.vnet.ibm.com> + */ +#include <linux/module.h> +#include <linux/mm.h> +#include <linux/hugetlb.h> +#include <asm/cacheflush.h> +#include <asm/pgtable.h> + +static void change_page_attr(unsigned long addr, int numpages, + pte_t (*set) (pte_t)) +{ + pte_t *ptep, pte; + pmd_t *pmdp; + pud_t *pudp; + pgd_t *pgdp; + int i; + + for (i = 0; i < numpages; i++) { + pgdp = pgd_offset(&init_mm, addr); + pudp = pud_offset(pgdp, addr); + pmdp = pmd_offset(pudp, addr); + if (pmd_huge(*pmdp)) { + WARN_ON_ONCE(1); + continue; + } + ptep = pte_offset_kernel(pmdp, addr); + + pte = *ptep; + pte = set(pte); + __ptep_ipte(addr, ptep); + *ptep = pte; + addr += PAGE_SIZE; + } +} + +int set_memory_ro(unsigned long addr, int numpages) +{ + change_page_attr(addr, numpages, pte_wrprotect); + return 0; +} +EXPORT_SYMBOL_GPL(set_memory_ro); + +int set_memory_rw(unsigned long addr, int numpages) +{ + change_page_attr(addr, numpages, pte_mkwrite); + return 0; +} +EXPORT_SYMBOL_GPL(set_memory_rw); + +/* not possible */ +int set_memory_nx(unsigned long addr, int numpages) +{ + return 0; +} +EXPORT_SYMBOL_GPL(set_memory_nx); + +int set_memory_x(unsigned long addr, int numpages) +{ + return 0; +} diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c new file mode 100644 index 00000000..6e765bf0 --- /dev/null +++ b/arch/s390/mm/pgtable.c @@ -0,0 +1,871 @@ +/* + * Copyright IBM Corp. 2007,2011 + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> + */ + +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/gfp.h> +#include <linux/mm.h> +#include <linux/swap.h> +#include <linux/smp.h> +#include <linux/highmem.h> +#include <linux/pagemap.h> +#include <linux/spinlock.h> +#include <linux/module.h> +#include <linux/quicklist.h> +#include <linux/rcupdate.h> +#include <linux/slab.h> + +#include <asm/pgtable.h> +#include <asm/pgalloc.h> +#include <asm/tlb.h> +#include <asm/tlbflush.h> +#include <asm/mmu_context.h> + +#ifndef CONFIG_64BIT +#define ALLOC_ORDER 1 +#define FRAG_MASK 0x0f +#else +#define ALLOC_ORDER 2 +#define FRAG_MASK 0x03 +#endif + + +unsigned long *crst_table_alloc(struct mm_struct *mm) +{ + struct page *page = alloc_pages(GFP_KERNEL, ALLOC_ORDER); + + if (!page) + return NULL; + return (unsigned long *) page_to_phys(page); +} + +void crst_table_free(struct mm_struct *mm, unsigned long *table) +{ + free_pages((unsigned long) table, ALLOC_ORDER); +} + +#ifdef CONFIG_64BIT +int crst_table_upgrade(struct mm_struct *mm, unsigned long limit) +{ + unsigned long *table, *pgd; + unsigned long entry; + + BUG_ON(limit > (1UL << 53)); +repeat: + table = crst_table_alloc(mm); + if (!table) + return -ENOMEM; + spin_lock_bh(&mm->page_table_lock); + if (mm->context.asce_limit < limit) { + pgd = (unsigned long *) mm->pgd; + if (mm->context.asce_limit <= (1UL << 31)) { + entry = _REGION3_ENTRY_EMPTY; + mm->context.asce_limit = 1UL << 42; + mm->context.asce_bits = _ASCE_TABLE_LENGTH | + _ASCE_USER_BITS | + _ASCE_TYPE_REGION3; + } else { + entry = _REGION2_ENTRY_EMPTY; + mm->context.asce_limit = 1UL << 53; + mm->context.asce_bits = _ASCE_TABLE_LENGTH | + _ASCE_USER_BITS | + _ASCE_TYPE_REGION2; + } + crst_table_init(table, entry); + pgd_populate(mm, (pgd_t *) table, (pud_t *) pgd); + mm->pgd = (pgd_t *) table; + mm->task_size = mm->context.asce_limit; + table = NULL; + } + spin_unlock_bh(&mm->page_table_lock); + if (table) + crst_table_free(mm, table); + if (mm->context.asce_limit < limit) + goto repeat; + update_mm(mm, current); + return 0; +} + +void crst_table_downgrade(struct mm_struct *mm, unsigned long limit) +{ + pgd_t *pgd; + + if (mm->context.asce_limit <= limit) + return; + __tlb_flush_mm(mm); + while (mm->context.asce_limit > limit) { + pgd = mm->pgd; + switch (pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) { + case _REGION_ENTRY_TYPE_R2: + mm->context.asce_limit = 1UL << 42; + mm->context.asce_bits = _ASCE_TABLE_LENGTH | + _ASCE_USER_BITS | + _ASCE_TYPE_REGION3; + break; + case _REGION_ENTRY_TYPE_R3: + mm->context.asce_limit = 1UL << 31; + mm->context.asce_bits = _ASCE_TABLE_LENGTH | + _ASCE_USER_BITS | + _ASCE_TYPE_SEGMENT; + break; + default: + BUG(); + } + mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN); + mm->task_size = mm->context.asce_limit; + crst_table_free(mm, (unsigned long *) pgd); + } + update_mm(mm, current); +} +#endif + +#ifdef CONFIG_PGSTE + +/** + * gmap_alloc - allocate a guest address space + * @mm: pointer to the parent mm_struct + * + * Returns a guest address space structure. + */ +struct gmap *gmap_alloc(struct mm_struct *mm) +{ + struct gmap *gmap; + struct page *page; + unsigned long *table; + + gmap = kzalloc(sizeof(struct gmap), GFP_KERNEL); + if (!gmap) + goto out; + INIT_LIST_HEAD(&gmap->crst_list); + gmap->mm = mm; + page = alloc_pages(GFP_KERNEL, ALLOC_ORDER); + if (!page) + goto out_free; + list_add(&page->lru, &gmap->crst_list); + table = (unsigned long *) page_to_phys(page); + crst_table_init(table, _REGION1_ENTRY_EMPTY); + gmap->table = table; + gmap->asce = _ASCE_TYPE_REGION1 | _ASCE_TABLE_LENGTH | + _ASCE_USER_BITS | __pa(table); + list_add(&gmap->list, &mm->context.gmap_list); + return gmap; + +out_free: + kfree(gmap); +out: + return NULL; +} +EXPORT_SYMBOL_GPL(gmap_alloc); + +static int gmap_unlink_segment(struct gmap *gmap, unsigned long *table) +{ + struct gmap_pgtable *mp; + struct gmap_rmap *rmap; + struct page *page; + + if (*table & _SEGMENT_ENTRY_INV) + return 0; + page = pfn_to_page(*table >> PAGE_SHIFT); + mp = (struct gmap_pgtable *) page->index; + list_for_each_entry(rmap, &mp->mapper, list) { + if (rmap->entry != table) + continue; + list_del(&rmap->list); + kfree(rmap); + break; + } + *table = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO | mp->vmaddr; + return 1; +} + +static void gmap_flush_tlb(struct gmap *gmap) +{ + if (MACHINE_HAS_IDTE) + __tlb_flush_idte((unsigned long) gmap->table | + _ASCE_TYPE_REGION1); + else + __tlb_flush_global(); +} + +/** + * gmap_free - free a guest address space + * @gmap: pointer to the guest address space structure + */ +void gmap_free(struct gmap *gmap) +{ + struct page *page, *next; + unsigned long *table; + int i; + + + /* Flush tlb. */ + if (MACHINE_HAS_IDTE) + __tlb_flush_idte((unsigned long) gmap->table | + _ASCE_TYPE_REGION1); + else + __tlb_flush_global(); + + /* Free all segment & region tables. */ + down_read(&gmap->mm->mmap_sem); + spin_lock(&gmap->mm->page_table_lock); + list_for_each_entry_safe(page, next, &gmap->crst_list, lru) { + table = (unsigned long *) page_to_phys(page); + if ((*table & _REGION_ENTRY_TYPE_MASK) == 0) + /* Remove gmap rmap structures for segment table. */ + for (i = 0; i < PTRS_PER_PMD; i++, table++) + gmap_unlink_segment(gmap, table); + __free_pages(page, ALLOC_ORDER); + } + spin_unlock(&gmap->mm->page_table_lock); + up_read(&gmap->mm->mmap_sem); + list_del(&gmap->list); + kfree(gmap); +} +EXPORT_SYMBOL_GPL(gmap_free); + +/** + * gmap_enable - switch primary space to the guest address space + * @gmap: pointer to the guest address space structure + */ +void gmap_enable(struct gmap *gmap) +{ + S390_lowcore.gmap = (unsigned long) gmap; +} +EXPORT_SYMBOL_GPL(gmap_enable); + +/** + * gmap_disable - switch back to the standard primary address space + * @gmap: pointer to the guest address space structure + */ +void gmap_disable(struct gmap *gmap) +{ + S390_lowcore.gmap = 0UL; +} +EXPORT_SYMBOL_GPL(gmap_disable); + +/* + * gmap_alloc_table is assumed to be called with mmap_sem held + */ +static int gmap_alloc_table(struct gmap *gmap, + unsigned long *table, unsigned long init) +{ + struct page *page; + unsigned long *new; + + /* since we dont free the gmap table until gmap_free we can unlock */ + spin_unlock(&gmap->mm->page_table_lock); + page = alloc_pages(GFP_KERNEL, ALLOC_ORDER); + spin_lock(&gmap->mm->page_table_lock); + if (!page) + return -ENOMEM; + new = (unsigned long *) page_to_phys(page); + crst_table_init(new, init); + if (*table & _REGION_ENTRY_INV) { + list_add(&page->lru, &gmap->crst_list); + *table = (unsigned long) new | _REGION_ENTRY_LENGTH | + (*table & _REGION_ENTRY_TYPE_MASK); + } else + __free_pages(page, ALLOC_ORDER); + return 0; +} + +/** + * gmap_unmap_segment - unmap segment from the guest address space + * @gmap: pointer to the guest address space structure + * @addr: address in the guest address space + * @len: length of the memory area to unmap + * + * Returns 0 if the unmap succeded, -EINVAL if not. + */ +int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len) +{ + unsigned long *table; + unsigned long off; + int flush; + + if ((to | len) & (PMD_SIZE - 1)) + return -EINVAL; + if (len == 0 || to + len < to) + return -EINVAL; + + flush = 0; + down_read(&gmap->mm->mmap_sem); + spin_lock(&gmap->mm->page_table_lock); + for (off = 0; off < len; off += PMD_SIZE) { + /* Walk the guest addr space page table */ + table = gmap->table + (((to + off) >> 53) & 0x7ff); + if (*table & _REGION_ENTRY_INV) + goto out; + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + table = table + (((to + off) >> 42) & 0x7ff); + if (*table & _REGION_ENTRY_INV) + goto out; + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + table = table + (((to + off) >> 31) & 0x7ff); + if (*table & _REGION_ENTRY_INV) + goto out; + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + table = table + (((to + off) >> 20) & 0x7ff); + + /* Clear segment table entry in guest address space. */ + flush |= gmap_unlink_segment(gmap, table); + *table = _SEGMENT_ENTRY_INV; + } +out: + spin_unlock(&gmap->mm->page_table_lock); + up_read(&gmap->mm->mmap_sem); + if (flush) + gmap_flush_tlb(gmap); + return 0; +} +EXPORT_SYMBOL_GPL(gmap_unmap_segment); + +/** + * gmap_mmap_segment - map a segment to the guest address space + * @gmap: pointer to the guest address space structure + * @from: source address in the parent address space + * @to: target address in the guest address space + * + * Returns 0 if the mmap succeded, -EINVAL or -ENOMEM if not. + */ +int gmap_map_segment(struct gmap *gmap, unsigned long from, + unsigned long to, unsigned long len) +{ + unsigned long *table; + unsigned long off; + int flush; + + if ((from | to | len) & (PMD_SIZE - 1)) + return -EINVAL; + if (len == 0 || from + len > PGDIR_SIZE || + from + len < from || to + len < to) + return -EINVAL; + + flush = 0; + down_read(&gmap->mm->mmap_sem); + spin_lock(&gmap->mm->page_table_lock); + for (off = 0; off < len; off += PMD_SIZE) { + /* Walk the gmap address space page table */ + table = gmap->table + (((to + off) >> 53) & 0x7ff); + if ((*table & _REGION_ENTRY_INV) && + gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY)) + goto out_unmap; + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + table = table + (((to + off) >> 42) & 0x7ff); + if ((*table & _REGION_ENTRY_INV) && + gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY)) + goto out_unmap; + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + table = table + (((to + off) >> 31) & 0x7ff); + if ((*table & _REGION_ENTRY_INV) && + gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY)) + goto out_unmap; + table = (unsigned long *) (*table & _REGION_ENTRY_ORIGIN); + table = table + (((to + off) >> 20) & 0x7ff); + + /* Store 'from' address in an invalid segment table entry. */ + flush |= gmap_unlink_segment(gmap, table); + *table = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO | (from + off); + } + spin_unlock(&gmap->mm->page_table_lock); + up_read(&gmap->mm->mmap_sem); + if (flush) + gmap_flush_tlb(gmap); + return 0; + +out_unmap: + spin_unlock(&gmap->mm->page_table_lock); + up_read(&gmap->mm->mmap_sem); + gmap_unmap_segment(gmap, to, len); + return -ENOMEM; +} +EXPORT_SYMBOL_GPL(gmap_map_segment); + +/* + * this function is assumed to be called with mmap_sem held + */ +unsigned long __gmap_fault(unsigned long address, struct gmap *gmap) +{ + unsigned long *table, vmaddr, segment; + struct mm_struct *mm; + struct gmap_pgtable *mp; + struct gmap_rmap *rmap; + struct vm_area_struct *vma; + struct page *page; + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + + current->thread.gmap_addr = address; + mm = gmap->mm; + /* Walk the gmap address space page table */ + table = gmap->table + ((address >> 53) & 0x7ff); + if (unlikely(*table & _REGION_ENTRY_INV)) + return -EFAULT; + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + table = table + ((address >> 42) & 0x7ff); + if (unlikely(*table & _REGION_ENTRY_INV)) + return -EFAULT; + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + table = table + ((address >> 31) & 0x7ff); + if (unlikely(*table & _REGION_ENTRY_INV)) + return -EFAULT; + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + table = table + ((address >> 20) & 0x7ff); + + /* Convert the gmap address to an mm address. */ + segment = *table; + if (likely(!(segment & _SEGMENT_ENTRY_INV))) { + page = pfn_to_page(segment >> PAGE_SHIFT); + mp = (struct gmap_pgtable *) page->index; + return mp->vmaddr | (address & ~PMD_MASK); + } else if (segment & _SEGMENT_ENTRY_RO) { + vmaddr = segment & _SEGMENT_ENTRY_ORIGIN; + vma = find_vma(mm, vmaddr); + if (!vma || vma->vm_start > vmaddr) + return -EFAULT; + + /* Walk the parent mm page table */ + pgd = pgd_offset(mm, vmaddr); + pud = pud_alloc(mm, pgd, vmaddr); + if (!pud) + return -ENOMEM; + pmd = pmd_alloc(mm, pud, vmaddr); + if (!pmd) + return -ENOMEM; + if (!pmd_present(*pmd) && + __pte_alloc(mm, vma, pmd, vmaddr)) + return -ENOMEM; + /* pmd now points to a valid segment table entry. */ + rmap = kmalloc(sizeof(*rmap), GFP_KERNEL|__GFP_REPEAT); + if (!rmap) + return -ENOMEM; + /* Link gmap segment table entry location to page table. */ + page = pmd_page(*pmd); + mp = (struct gmap_pgtable *) page->index; + rmap->entry = table; + spin_lock(&mm->page_table_lock); + list_add(&rmap->list, &mp->mapper); + spin_unlock(&mm->page_table_lock); + /* Set gmap segment table entry to page table. */ + *table = pmd_val(*pmd) & PAGE_MASK; + return vmaddr | (address & ~PMD_MASK); + } + return -EFAULT; +} + +unsigned long gmap_fault(unsigned long address, struct gmap *gmap) +{ + unsigned long rc; + + down_read(&gmap->mm->mmap_sem); + rc = __gmap_fault(address, gmap); + up_read(&gmap->mm->mmap_sem); + + return rc; +} +EXPORT_SYMBOL_GPL(gmap_fault); + +void gmap_discard(unsigned long from, unsigned long to, struct gmap *gmap) +{ + + unsigned long *table, address, size; + struct vm_area_struct *vma; + struct gmap_pgtable *mp; + struct page *page; + + down_read(&gmap->mm->mmap_sem); + address = from; + while (address < to) { + /* Walk the gmap address space page table */ + table = gmap->table + ((address >> 53) & 0x7ff); + if (unlikely(*table & _REGION_ENTRY_INV)) { + address = (address + PMD_SIZE) & PMD_MASK; + continue; + } + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + table = table + ((address >> 42) & 0x7ff); + if (unlikely(*table & _REGION_ENTRY_INV)) { + address = (address + PMD_SIZE) & PMD_MASK; + continue; + } + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + table = table + ((address >> 31) & 0x7ff); + if (unlikely(*table & _REGION_ENTRY_INV)) { + address = (address + PMD_SIZE) & PMD_MASK; + continue; + } + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + table = table + ((address >> 20) & 0x7ff); + if (unlikely(*table & _SEGMENT_ENTRY_INV)) { + address = (address + PMD_SIZE) & PMD_MASK; + continue; + } + page = pfn_to_page(*table >> PAGE_SHIFT); + mp = (struct gmap_pgtable *) page->index; + vma = find_vma(gmap->mm, mp->vmaddr); + size = min(to - address, PMD_SIZE - (address & ~PMD_MASK)); + zap_page_range(vma, mp->vmaddr | (address & ~PMD_MASK), + size, NULL); + address = (address + PMD_SIZE) & PMD_MASK; + } + up_read(&gmap->mm->mmap_sem); +} +EXPORT_SYMBOL_GPL(gmap_discard); + +void gmap_unmap_notifier(struct mm_struct *mm, unsigned long *table) +{ + struct gmap_rmap *rmap, *next; + struct gmap_pgtable *mp; + struct page *page; + int flush; + + flush = 0; + spin_lock(&mm->page_table_lock); + page = pfn_to_page(__pa(table) >> PAGE_SHIFT); + mp = (struct gmap_pgtable *) page->index; + list_for_each_entry_safe(rmap, next, &mp->mapper, list) { + *rmap->entry = + _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO | mp->vmaddr; + list_del(&rmap->list); + kfree(rmap); + flush = 1; + } + spin_unlock(&mm->page_table_lock); + if (flush) + __tlb_flush_global(); +} + +static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, + unsigned long vmaddr) +{ + struct page *page; + unsigned long *table; + struct gmap_pgtable *mp; + + page = alloc_page(GFP_KERNEL|__GFP_REPEAT); + if (!page) + return NULL; + mp = kmalloc(sizeof(*mp), GFP_KERNEL|__GFP_REPEAT); + if (!mp) { + __free_page(page); + return NULL; + } + pgtable_page_ctor(page); + mp->vmaddr = vmaddr & PMD_MASK; + INIT_LIST_HEAD(&mp->mapper); + page->index = (unsigned long) mp; + atomic_set(&page->_mapcount, 3); + table = (unsigned long *) page_to_phys(page); + clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/2); + clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2); + return table; +} + +static inline void page_table_free_pgste(unsigned long *table) +{ + struct page *page; + struct gmap_pgtable *mp; + + page = pfn_to_page(__pa(table) >> PAGE_SHIFT); + mp = (struct gmap_pgtable *) page->index; + BUG_ON(!list_empty(&mp->mapper)); + pgtable_page_dtor(page); + atomic_set(&page->_mapcount, -1); + kfree(mp); + __free_page(page); +} + +#else /* CONFIG_PGSTE */ + +static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, + unsigned long vmaddr) +{ + return NULL; +} + +static inline void page_table_free_pgste(unsigned long *table) +{ +} + +static inline void gmap_unmap_notifier(struct mm_struct *mm, + unsigned long *table) +{ +} + +#endif /* CONFIG_PGSTE */ + +static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits) +{ + unsigned int old, new; + + do { + old = atomic_read(v); + new = old ^ bits; + } while (atomic_cmpxchg(v, old, new) != old); + return new; +} + +/* + * page table entry allocation/free routines. + */ +unsigned long *page_table_alloc(struct mm_struct *mm, unsigned long vmaddr) +{ + struct page *page; + unsigned long *table; + unsigned int mask, bit; + + if (mm_has_pgste(mm)) + return page_table_alloc_pgste(mm, vmaddr); + /* Allocate fragments of a 4K page as 1K/2K page table */ + spin_lock_bh(&mm->context.list_lock); + mask = FRAG_MASK; + if (!list_empty(&mm->context.pgtable_list)) { + page = list_first_entry(&mm->context.pgtable_list, + struct page, lru); + table = (unsigned long *) page_to_phys(page); + mask = atomic_read(&page->_mapcount); + mask = mask | (mask >> 4); + } + if ((mask & FRAG_MASK) == FRAG_MASK) { + spin_unlock_bh(&mm->context.list_lock); + page = alloc_page(GFP_KERNEL|__GFP_REPEAT); + if (!page) + return NULL; + pgtable_page_ctor(page); + atomic_set(&page->_mapcount, 1); + table = (unsigned long *) page_to_phys(page); + clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE); + spin_lock_bh(&mm->context.list_lock); + list_add(&page->lru, &mm->context.pgtable_list); + } else { + for (bit = 1; mask & bit; bit <<= 1) + table += PTRS_PER_PTE; + mask = atomic_xor_bits(&page->_mapcount, bit); + if ((mask & FRAG_MASK) == FRAG_MASK) + list_del(&page->lru); + } + spin_unlock_bh(&mm->context.list_lock); + return table; +} + +void page_table_free(struct mm_struct *mm, unsigned long *table) +{ + struct page *page; + unsigned int bit, mask; + + if (mm_has_pgste(mm)) { + gmap_unmap_notifier(mm, table); + return page_table_free_pgste(table); + } + /* Free 1K/2K page table fragment of a 4K page */ + page = pfn_to_page(__pa(table) >> PAGE_SHIFT); + bit = 1 << ((__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t))); + spin_lock_bh(&mm->context.list_lock); + if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK) + list_del(&page->lru); + mask = atomic_xor_bits(&page->_mapcount, bit); + if (mask & FRAG_MASK) + list_add(&page->lru, &mm->context.pgtable_list); + spin_unlock_bh(&mm->context.list_lock); + if (mask == 0) { + pgtable_page_dtor(page); + atomic_set(&page->_mapcount, -1); + __free_page(page); + } +} + +static void __page_table_free_rcu(void *table, unsigned bit) +{ + struct page *page; + + if (bit == FRAG_MASK) + return page_table_free_pgste(table); + /* Free 1K/2K page table fragment of a 4K page */ + page = pfn_to_page(__pa(table) >> PAGE_SHIFT); + if (atomic_xor_bits(&page->_mapcount, bit) == 0) { + pgtable_page_dtor(page); + atomic_set(&page->_mapcount, -1); + __free_page(page); + } +} + +void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table) +{ + struct mm_struct *mm; + struct page *page; + unsigned int bit, mask; + + mm = tlb->mm; + if (mm_has_pgste(mm)) { + gmap_unmap_notifier(mm, table); + table = (unsigned long *) (__pa(table) | FRAG_MASK); + tlb_remove_table(tlb, table); + return; + } + bit = 1 << ((__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t))); + page = pfn_to_page(__pa(table) >> PAGE_SHIFT); + spin_lock_bh(&mm->context.list_lock); + if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK) + list_del(&page->lru); + mask = atomic_xor_bits(&page->_mapcount, bit | (bit << 4)); + if (mask & FRAG_MASK) + list_add_tail(&page->lru, &mm->context.pgtable_list); + spin_unlock_bh(&mm->context.list_lock); + table = (unsigned long *) (__pa(table) | (bit << 4)); + tlb_remove_table(tlb, table); +} + +void __tlb_remove_table(void *_table) +{ + const unsigned long mask = (FRAG_MASK << 4) | FRAG_MASK; + void *table = (void *)((unsigned long) _table & ~mask); + unsigned type = (unsigned long) _table & mask; + + if (type) + __page_table_free_rcu(table, type); + else + free_pages((unsigned long) table, ALLOC_ORDER); +} + +static void tlb_remove_table_smp_sync(void *arg) +{ + /* Simply deliver the interrupt */ +} + +static void tlb_remove_table_one(void *table) +{ + /* + * This isn't an RCU grace period and hence the page-tables cannot be + * assumed to be actually RCU-freed. + * + * It is however sufficient for software page-table walkers that rely + * on IRQ disabling. See the comment near struct mmu_table_batch. + */ + smp_call_function(tlb_remove_table_smp_sync, NULL, 1); + __tlb_remove_table(table); +} + +static void tlb_remove_table_rcu(struct rcu_head *head) +{ + struct mmu_table_batch *batch; + int i; + + batch = container_of(head, struct mmu_table_batch, rcu); + + for (i = 0; i < batch->nr; i++) + __tlb_remove_table(batch->tables[i]); + + free_page((unsigned long)batch); +} + +void tlb_table_flush(struct mmu_gather *tlb) +{ + struct mmu_table_batch **batch = &tlb->batch; + + if (*batch) { + __tlb_flush_mm(tlb->mm); + call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu); + *batch = NULL; + } +} + +void tlb_remove_table(struct mmu_gather *tlb, void *table) +{ + struct mmu_table_batch **batch = &tlb->batch; + + if (*batch == NULL) { + *batch = (struct mmu_table_batch *) + __get_free_page(GFP_NOWAIT | __GFP_NOWARN); + if (*batch == NULL) { + __tlb_flush_mm(tlb->mm); + tlb_remove_table_one(table); + return; + } + (*batch)->nr = 0; + } + (*batch)->tables[(*batch)->nr++] = table; + if ((*batch)->nr == MAX_TABLE_BATCH) + tlb_table_flush(tlb); +} + +/* + * switch on pgstes for its userspace process (for kvm) + */ +int s390_enable_sie(void) +{ + struct task_struct *tsk = current; + struct mm_struct *mm, *old_mm; + + /* Do we have switched amode? If no, we cannot do sie */ + if (user_mode == HOME_SPACE_MODE) + return -EINVAL; + + /* Do we have pgstes? if yes, we are done */ + if (mm_has_pgste(tsk->mm)) + return 0; + + /* lets check if we are allowed to replace the mm */ + task_lock(tsk); + if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 || +#ifdef CONFIG_AIO + !hlist_empty(&tsk->mm->ioctx_list) || +#endif + tsk->mm != tsk->active_mm) { + task_unlock(tsk); + return -EINVAL; + } + task_unlock(tsk); + + /* we copy the mm and let dup_mm create the page tables with_pgstes */ + tsk->mm->context.alloc_pgste = 1; + mm = dup_mm(tsk); + tsk->mm->context.alloc_pgste = 0; + if (!mm) + return -ENOMEM; + + /* Now lets check again if something happened */ + task_lock(tsk); + if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 || +#ifdef CONFIG_AIO + !hlist_empty(&tsk->mm->ioctx_list) || +#endif + tsk->mm != tsk->active_mm) { + mmput(mm); + task_unlock(tsk); + return -EINVAL; + } + + /* ok, we are alone. No ptrace, no threads, etc. */ + old_mm = tsk->mm; + tsk->mm = tsk->active_mm = mm; + preempt_disable(); + update_mm(mm, tsk); + atomic_inc(&mm->context.attach_count); + atomic_dec(&old_mm->context.attach_count); + cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm)); + preempt_enable(); + task_unlock(tsk); + mmput(old_mm); + return 0; +} +EXPORT_SYMBOL_GPL(s390_enable_sie); + +#if defined(CONFIG_DEBUG_PAGEALLOC) && defined(CONFIG_HIBERNATION) +bool kernel_page_present(struct page *page) +{ + unsigned long addr; + int cc; + + addr = page_to_phys(page); + asm volatile( + " lra %1,0(%1)\n" + " ipm %0\n" + " srl %0,28" + : "=d" (cc), "+a" (addr) : : "cc"); + return cc == 0; +} +#endif /* CONFIG_HIBERNATION && CONFIG_DEBUG_PAGEALLOC */ diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c new file mode 100644 index 00000000..4799383e --- /dev/null +++ b/arch/s390/mm/vmem.c @@ -0,0 +1,388 @@ +/* + * arch/s390/mm/vmem.c + * + * Copyright IBM Corp. 2006 + * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com> + */ + +#include <linux/bootmem.h> +#include <linux/pfn.h> +#include <linux/mm.h> +#include <linux/module.h> +#include <linux/list.h> +#include <linux/hugetlb.h> +#include <linux/slab.h> +#include <asm/pgalloc.h> +#include <asm/pgtable.h> +#include <asm/setup.h> +#include <asm/tlbflush.h> +#include <asm/sections.h> + +static DEFINE_MUTEX(vmem_mutex); + +struct memory_segment { + struct list_head list; + unsigned long start; + unsigned long size; +}; + +static LIST_HEAD(mem_segs); + +static void __ref *vmem_alloc_pages(unsigned int order) +{ + if (slab_is_available()) + return (void *)__get_free_pages(GFP_KERNEL, order); + return alloc_bootmem_pages((1 << order) * PAGE_SIZE); +} + +static inline pud_t *vmem_pud_alloc(void) +{ + pud_t *pud = NULL; + +#ifdef CONFIG_64BIT + pud = vmem_alloc_pages(2); + if (!pud) + return NULL; + clear_table((unsigned long *) pud, _REGION3_ENTRY_EMPTY, PAGE_SIZE * 4); +#endif + return pud; +} + +static inline pmd_t *vmem_pmd_alloc(void) +{ + pmd_t *pmd = NULL; + +#ifdef CONFIG_64BIT + pmd = vmem_alloc_pages(2); + if (!pmd) + return NULL; + clear_table((unsigned long *) pmd, _SEGMENT_ENTRY_EMPTY, PAGE_SIZE * 4); +#endif + return pmd; +} + +static pte_t __ref *vmem_pte_alloc(unsigned long address) +{ + pte_t *pte; + + if (slab_is_available()) + pte = (pte_t *) page_table_alloc(&init_mm, address); + else + pte = alloc_bootmem(PTRS_PER_PTE * sizeof(pte_t)); + if (!pte) + return NULL; + clear_table((unsigned long *) pte, _PAGE_TYPE_EMPTY, + PTRS_PER_PTE * sizeof(pte_t)); + return pte; +} + +/* + * Add a physical memory range to the 1:1 mapping. + */ +static int vmem_add_mem(unsigned long start, unsigned long size, int ro) +{ + unsigned long address; + pgd_t *pg_dir; + pud_t *pu_dir; + pmd_t *pm_dir; + pte_t *pt_dir; + pte_t pte; + int ret = -ENOMEM; + + for (address = start; address < start + size; address += PAGE_SIZE) { + pg_dir = pgd_offset_k(address); + if (pgd_none(*pg_dir)) { + pu_dir = vmem_pud_alloc(); + if (!pu_dir) + goto out; + pgd_populate(&init_mm, pg_dir, pu_dir); + } + + pu_dir = pud_offset(pg_dir, address); + if (pud_none(*pu_dir)) { + pm_dir = vmem_pmd_alloc(); + if (!pm_dir) + goto out; + pud_populate(&init_mm, pu_dir, pm_dir); + } + + pte = mk_pte_phys(address, __pgprot(ro ? _PAGE_RO : 0)); + pm_dir = pmd_offset(pu_dir, address); + +#ifdef __s390x__ + if (MACHINE_HAS_HPAGE && !(address & ~HPAGE_MASK) && + (address + HPAGE_SIZE <= start + size) && + (address >= HPAGE_SIZE)) { + pte_val(pte) |= _SEGMENT_ENTRY_LARGE; + pmd_val(*pm_dir) = pte_val(pte); + address += HPAGE_SIZE - PAGE_SIZE; + continue; + } +#endif + if (pmd_none(*pm_dir)) { + pt_dir = vmem_pte_alloc(address); + if (!pt_dir) + goto out; + pmd_populate(&init_mm, pm_dir, pt_dir); + } + + pt_dir = pte_offset_kernel(pm_dir, address); + *pt_dir = pte; + } + ret = 0; +out: + flush_tlb_kernel_range(start, start + size); + return ret; +} + +/* + * Remove a physical memory range from the 1:1 mapping. + * Currently only invalidates page table entries. + */ +static void vmem_remove_range(unsigned long start, unsigned long size) +{ + unsigned long address; + pgd_t *pg_dir; + pud_t *pu_dir; + pmd_t *pm_dir; + pte_t *pt_dir; + pte_t pte; + + pte_val(pte) = _PAGE_TYPE_EMPTY; + for (address = start; address < start + size; address += PAGE_SIZE) { + pg_dir = pgd_offset_k(address); + pu_dir = pud_offset(pg_dir, address); + if (pud_none(*pu_dir)) + continue; + pm_dir = pmd_offset(pu_dir, address); + if (pmd_none(*pm_dir)) + continue; + + if (pmd_huge(*pm_dir)) { + pmd_clear(pm_dir); + address += HPAGE_SIZE - PAGE_SIZE; + continue; + } + + pt_dir = pte_offset_kernel(pm_dir, address); + *pt_dir = pte; + } + flush_tlb_kernel_range(start, start + size); +} + +/* + * Add a backed mem_map array to the virtual mem_map array. + */ +int __meminit vmemmap_populate(struct page *start, unsigned long nr, int node) +{ + unsigned long address, start_addr, end_addr; + pgd_t *pg_dir; + pud_t *pu_dir; + pmd_t *pm_dir; + pte_t *pt_dir; + pte_t pte; + int ret = -ENOMEM; + + start_addr = (unsigned long) start; + end_addr = (unsigned long) (start + nr); + + for (address = start_addr; address < end_addr; address += PAGE_SIZE) { + pg_dir = pgd_offset_k(address); + if (pgd_none(*pg_dir)) { + pu_dir = vmem_pud_alloc(); + if (!pu_dir) + goto out; + pgd_populate(&init_mm, pg_dir, pu_dir); + } + + pu_dir = pud_offset(pg_dir, address); + if (pud_none(*pu_dir)) { + pm_dir = vmem_pmd_alloc(); + if (!pm_dir) + goto out; + pud_populate(&init_mm, pu_dir, pm_dir); + } + + pm_dir = pmd_offset(pu_dir, address); + if (pmd_none(*pm_dir)) { + pt_dir = vmem_pte_alloc(address); + if (!pt_dir) + goto out; + pmd_populate(&init_mm, pm_dir, pt_dir); + } + + pt_dir = pte_offset_kernel(pm_dir, address); + if (pte_none(*pt_dir)) { + unsigned long new_page; + + new_page =__pa(vmem_alloc_pages(0)); + if (!new_page) + goto out; + pte = pfn_pte(new_page >> PAGE_SHIFT, PAGE_KERNEL); + *pt_dir = pte; + } + } + memset(start, 0, nr * sizeof(struct page)); + ret = 0; +out: + flush_tlb_kernel_range(start_addr, end_addr); + return ret; +} + +/* + * Add memory segment to the segment list if it doesn't overlap with + * an already present segment. + */ +static int insert_memory_segment(struct memory_segment *seg) +{ + struct memory_segment *tmp; + + if (seg->start + seg->size > VMEM_MAX_PHYS || + seg->start + seg->size < seg->start) + return -ERANGE; + + list_for_each_entry(tmp, &mem_segs, list) { + if (seg->start >= tmp->start + tmp->size) + continue; + if (seg->start + seg->size <= tmp->start) + continue; + return -ENOSPC; + } + list_add(&seg->list, &mem_segs); + return 0; +} + +/* + * Remove memory segment from the segment list. + */ +static void remove_memory_segment(struct memory_segment *seg) +{ + list_del(&seg->list); +} + +static void __remove_shared_memory(struct memory_segment *seg) +{ + remove_memory_segment(seg); + vmem_remove_range(seg->start, seg->size); +} + +int vmem_remove_mapping(unsigned long start, unsigned long size) +{ + struct memory_segment *seg; + int ret; + + mutex_lock(&vmem_mutex); + + ret = -ENOENT; + list_for_each_entry(seg, &mem_segs, list) { + if (seg->start == start && seg->size == size) + break; + } + + if (seg->start != start || seg->size != size) + goto out; + + ret = 0; + __remove_shared_memory(seg); + kfree(seg); +out: + mutex_unlock(&vmem_mutex); + return ret; +} + +int vmem_add_mapping(unsigned long start, unsigned long size) +{ + struct memory_segment *seg; + int ret; + + mutex_lock(&vmem_mutex); + ret = -ENOMEM; + seg = kzalloc(sizeof(*seg), GFP_KERNEL); + if (!seg) + goto out; + seg->start = start; + seg->size = size; + + ret = insert_memory_segment(seg); + if (ret) + goto out_free; + + ret = vmem_add_mem(start, size, 0); + if (ret) + goto out_remove; + goto out; + +out_remove: + __remove_shared_memory(seg); +out_free: + kfree(seg); +out: + mutex_unlock(&vmem_mutex); + return ret; +} + +/* + * map whole physical memory to virtual memory (identity mapping) + * we reserve enough space in the vmalloc area for vmemmap to hotplug + * additional memory segments. + */ +void __init vmem_map_init(void) +{ + unsigned long ro_start, ro_end; + unsigned long start, end; + int i; + + ro_start = ((unsigned long)&_stext) & PAGE_MASK; + ro_end = PFN_ALIGN((unsigned long)&_eshared); + for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) { + if (memory_chunk[i].type == CHUNK_CRASHK || + memory_chunk[i].type == CHUNK_OLDMEM) + continue; + start = memory_chunk[i].addr; + end = memory_chunk[i].addr + memory_chunk[i].size; + if (start >= ro_end || end <= ro_start) + vmem_add_mem(start, end - start, 0); + else if (start >= ro_start && end <= ro_end) + vmem_add_mem(start, end - start, 1); + else if (start >= ro_start) { + vmem_add_mem(start, ro_end - start, 1); + vmem_add_mem(ro_end, end - ro_end, 0); + } else if (end < ro_end) { + vmem_add_mem(start, ro_start - start, 0); + vmem_add_mem(ro_start, end - ro_start, 1); + } else { + vmem_add_mem(start, ro_start - start, 0); + vmem_add_mem(ro_start, ro_end - ro_start, 1); + vmem_add_mem(ro_end, end - ro_end, 0); + } + } +} + +/* + * Convert memory chunk array to a memory segment list so there is a single + * list that contains both r/w memory and shared memory segments. + */ +static int __init vmem_convert_memory_chunk(void) +{ + struct memory_segment *seg; + int i; + + mutex_lock(&vmem_mutex); + for (i = 0; i < MEMORY_CHUNKS; i++) { + if (!memory_chunk[i].size) + continue; + if (memory_chunk[i].type == CHUNK_CRASHK || + memory_chunk[i].type == CHUNK_OLDMEM) + continue; + seg = kzalloc(sizeof(*seg), GFP_KERNEL); + if (!seg) + panic("Out of memory...\n"); + seg->start = memory_chunk[i].addr; + seg->size = memory_chunk[i].size; + insert_memory_segment(seg); + } + mutex_unlock(&vmem_mutex); + return 0; +} + +core_initcall(vmem_convert_memory_chunk); diff --git a/arch/s390/oprofile/Makefile b/arch/s390/oprofile/Makefile new file mode 100644 index 00000000..524c4b61 --- /dev/null +++ b/arch/s390/oprofile/Makefile @@ -0,0 +1,10 @@ +obj-$(CONFIG_OPROFILE) += oprofile.o + +DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \ + oprof.o cpu_buffer.o buffer_sync.o \ + event_buffer.o oprofile_files.o \ + oprofilefs.o oprofile_stats.o \ + timer_int.o ) + +oprofile-y := $(DRIVER_OBJS) init.o backtrace.o +oprofile-$(CONFIG_64BIT) += hwsampler.o diff --git a/arch/s390/oprofile/backtrace.c b/arch/s390/oprofile/backtrace.c new file mode 100644 index 00000000..bc4b84a3 --- /dev/null +++ b/arch/s390/oprofile/backtrace.c @@ -0,0 +1,79 @@ +/** + * arch/s390/oprofile/backtrace.c + * + * S390 Version + * Copyright (C) 2005 IBM Corporation, IBM Deutschland Entwicklung GmbH. + * Author(s): Andreas Krebbel <Andreas.Krebbel@de.ibm.com> + */ + +#include <linux/oprofile.h> + +#include <asm/processor.h> /* for struct stack_frame */ + +static unsigned long +__show_trace(unsigned int *depth, unsigned long sp, + unsigned long low, unsigned long high) +{ + struct stack_frame *sf; + struct pt_regs *regs; + + while (*depth) { + sp = sp & PSW_ADDR_INSN; + if (sp < low || sp > high - sizeof(*sf)) + return sp; + sf = (struct stack_frame *) sp; + (*depth)--; + oprofile_add_trace(sf->gprs[8] & PSW_ADDR_INSN); + + /* Follow the backchain. */ + while (*depth) { + low = sp; + sp = sf->back_chain & PSW_ADDR_INSN; + if (!sp) + break; + if (sp <= low || sp > high - sizeof(*sf)) + return sp; + sf = (struct stack_frame *) sp; + (*depth)--; + oprofile_add_trace(sf->gprs[8] & PSW_ADDR_INSN); + + } + + if (*depth == 0) + break; + + /* Zero backchain detected, check for interrupt frame. */ + sp = (unsigned long) (sf + 1); + if (sp <= low || sp > high - sizeof(*regs)) + return sp; + regs = (struct pt_regs *) sp; + (*depth)--; + oprofile_add_trace(sf->gprs[8] & PSW_ADDR_INSN); + low = sp; + sp = regs->gprs[15]; + } + return sp; +} + +void s390_backtrace(struct pt_regs * const regs, unsigned int depth) +{ + unsigned long head; + struct stack_frame* head_sf; + + if (user_mode (regs)) + return; + + head = regs->gprs[15]; + head_sf = (struct stack_frame*)head; + + if (!head_sf->back_chain) + return; + + head = head_sf->back_chain; + + head = __show_trace(&depth, head, S390_lowcore.async_stack - ASYNC_SIZE, + S390_lowcore.async_stack); + + __show_trace(&depth, head, S390_lowcore.thread_info, + S390_lowcore.thread_info + THREAD_SIZE); +} diff --git a/arch/s390/oprofile/hwsampler.c b/arch/s390/oprofile/hwsampler.c new file mode 100644 index 00000000..c6646de0 --- /dev/null +++ b/arch/s390/oprofile/hwsampler.c @@ -0,0 +1,1240 @@ +/** + * arch/s390/oprofile/hwsampler.c + * + * Copyright IBM Corp. 2010 + * Author: Heinz Graalfs <graalfs@de.ibm.com> + */ + +#include <linux/kernel_stat.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/smp.h> +#include <linux/errno.h> +#include <linux/workqueue.h> +#include <linux/interrupt.h> +#include <linux/notifier.h> +#include <linux/cpu.h> +#include <linux/semaphore.h> +#include <linux/oom.h> +#include <linux/oprofile.h> + +#include <asm/facility.h> +#include <asm/cpu_mf.h> +#include <asm/irq.h> + +#include "hwsampler.h" +#include "op_counter.h" + +#define MAX_NUM_SDB 511 +#define MIN_NUM_SDB 1 + +#define ALERT_REQ_MASK 0x4000000000000000ul +#define BUFFER_FULL_MASK 0x8000000000000000ul + +DECLARE_PER_CPU(struct hws_cpu_buffer, sampler_cpu_buffer); + +struct hws_execute_parms { + void *buffer; + signed int rc; +}; + +DEFINE_PER_CPU(struct hws_cpu_buffer, sampler_cpu_buffer); +EXPORT_PER_CPU_SYMBOL(sampler_cpu_buffer); + +static DEFINE_MUTEX(hws_sem); +static DEFINE_MUTEX(hws_sem_oom); + +static unsigned char hws_flush_all; +static unsigned int hws_oom; +static struct workqueue_struct *hws_wq; + +static unsigned int hws_state; +enum { + HWS_INIT = 1, + HWS_DEALLOCATED, + HWS_STOPPED, + HWS_STARTED, + HWS_STOPPING }; + +/* set to 1 if called by kernel during memory allocation */ +static unsigned char oom_killer_was_active; +/* size of SDBT and SDB as of allocate API */ +static unsigned long num_sdbt = 100; +static unsigned long num_sdb = 511; +/* sampling interval (machine cycles) */ +static unsigned long interval; + +static unsigned long min_sampler_rate; +static unsigned long max_sampler_rate; + +static int ssctl(void *buffer) +{ + int cc; + + /* set in order to detect a program check */ + cc = 1; + + asm volatile( + "0: .insn s,0xB2870000,0(%1)\n" + "1: ipm %0\n" + " srl %0,28\n" + "2:\n" + EX_TABLE(0b, 2b) EX_TABLE(1b, 2b) + : "+d" (cc), "+a" (buffer) + : "m" (*((struct hws_ssctl_request_block *)buffer)) + : "cc", "memory"); + + return cc ? -EINVAL : 0 ; +} + +static int qsi(void *buffer) +{ + int cc; + cc = 1; + + asm volatile( + "0: .insn s,0xB2860000,0(%1)\n" + "1: lhi %0,0\n" + "2:\n" + EX_TABLE(0b, 2b) EX_TABLE(1b, 2b) + : "=d" (cc), "+a" (buffer) + : "m" (*((struct hws_qsi_info_block *)buffer)) + : "cc", "memory"); + + return cc ? -EINVAL : 0; +} + +static void execute_qsi(void *parms) +{ + struct hws_execute_parms *ep = parms; + + ep->rc = qsi(ep->buffer); +} + +static void execute_ssctl(void *parms) +{ + struct hws_execute_parms *ep = parms; + + ep->rc = ssctl(ep->buffer); +} + +static int smp_ctl_ssctl_stop(int cpu) +{ + int rc; + struct hws_execute_parms ep; + struct hws_cpu_buffer *cb; + + cb = &per_cpu(sampler_cpu_buffer, cpu); + + cb->ssctl.es = 0; + cb->ssctl.cs = 0; + + ep.buffer = &cb->ssctl; + smp_call_function_single(cpu, execute_ssctl, &ep, 1); + rc = ep.rc; + if (rc) { + printk(KERN_ERR "hwsampler: CPU %d CPUMF SSCTL failed.\n", cpu); + dump_stack(); + } + + ep.buffer = &cb->qsi; + smp_call_function_single(cpu, execute_qsi, &ep, 1); + + if (cb->qsi.es || cb->qsi.cs) { + printk(KERN_EMERG "CPUMF sampling did not stop properly.\n"); + dump_stack(); + } + + return rc; +} + +static int smp_ctl_ssctl_deactivate(int cpu) +{ + int rc; + struct hws_execute_parms ep; + struct hws_cpu_buffer *cb; + + cb = &per_cpu(sampler_cpu_buffer, cpu); + + cb->ssctl.es = 1; + cb->ssctl.cs = 0; + + ep.buffer = &cb->ssctl; + smp_call_function_single(cpu, execute_ssctl, &ep, 1); + rc = ep.rc; + if (rc) + printk(KERN_ERR "hwsampler: CPU %d CPUMF SSCTL failed.\n", cpu); + + ep.buffer = &cb->qsi; + smp_call_function_single(cpu, execute_qsi, &ep, 1); + + if (cb->qsi.cs) + printk(KERN_EMERG "CPUMF sampling was not set inactive.\n"); + + return rc; +} + +static int smp_ctl_ssctl_enable_activate(int cpu, unsigned long interval) +{ + int rc; + struct hws_execute_parms ep; + struct hws_cpu_buffer *cb; + + cb = &per_cpu(sampler_cpu_buffer, cpu); + + cb->ssctl.h = 1; + cb->ssctl.tear = cb->first_sdbt; + cb->ssctl.dear = *(unsigned long *) cb->first_sdbt; + cb->ssctl.interval = interval; + cb->ssctl.es = 1; + cb->ssctl.cs = 1; + + ep.buffer = &cb->ssctl; + smp_call_function_single(cpu, execute_ssctl, &ep, 1); + rc = ep.rc; + if (rc) + printk(KERN_ERR "hwsampler: CPU %d CPUMF SSCTL failed.\n", cpu); + + ep.buffer = &cb->qsi; + smp_call_function_single(cpu, execute_qsi, &ep, 1); + if (ep.rc) + printk(KERN_ERR "hwsampler: CPU %d CPUMF QSI failed.\n", cpu); + + return rc; +} + +static int smp_ctl_qsi(int cpu) +{ + struct hws_execute_parms ep; + struct hws_cpu_buffer *cb; + + cb = &per_cpu(sampler_cpu_buffer, cpu); + + ep.buffer = &cb->qsi; + smp_call_function_single(cpu, execute_qsi, &ep, 1); + + return ep.rc; +} + +static inline unsigned long *trailer_entry_ptr(unsigned long v) +{ + void *ret; + + ret = (void *)v; + ret += PAGE_SIZE; + ret -= sizeof(struct hws_trailer_entry); + + return (unsigned long *) ret; +} + +static void hws_ext_handler(struct ext_code ext_code, + unsigned int param32, unsigned long param64) +{ + struct hws_cpu_buffer *cb = &__get_cpu_var(sampler_cpu_buffer); + + if (!(param32 & CPU_MF_INT_SF_MASK)) + return; + + kstat_cpu(smp_processor_id()).irqs[EXTINT_CPM]++; + atomic_xchg(&cb->ext_params, atomic_read(&cb->ext_params) | param32); + + if (hws_wq) + queue_work(hws_wq, &cb->worker); +} + +static void worker(struct work_struct *work); + +static void add_samples_to_oprofile(unsigned cpu, unsigned long *, + unsigned long *dear); + +static void init_all_cpu_buffers(void) +{ + int cpu; + struct hws_cpu_buffer *cb; + + for_each_online_cpu(cpu) { + cb = &per_cpu(sampler_cpu_buffer, cpu); + memset(cb, 0, sizeof(struct hws_cpu_buffer)); + } +} + +static int is_link_entry(unsigned long *s) +{ + return *s & 0x1ul ? 1 : 0; +} + +static unsigned long *get_next_sdbt(unsigned long *s) +{ + return (unsigned long *) (*s & ~0x1ul); +} + +static int prepare_cpu_buffers(void) +{ + int cpu; + int rc; + struct hws_cpu_buffer *cb; + + rc = 0; + for_each_online_cpu(cpu) { + cb = &per_cpu(sampler_cpu_buffer, cpu); + atomic_set(&cb->ext_params, 0); + cb->worker_entry = 0; + cb->sample_overflow = 0; + cb->req_alert = 0; + cb->incorrect_sdbt_entry = 0; + cb->invalid_entry_address = 0; + cb->loss_of_sample_data = 0; + cb->sample_auth_change_alert = 0; + cb->finish = 0; + cb->oom = 0; + cb->stop_mode = 0; + } + + return rc; +} + +/* + * allocate_sdbt() - allocate sampler memory + * @cpu: the cpu for which sampler memory is allocated + * + * A 4K page is allocated for each requested SDBT. + * A maximum of 511 4K pages are allocated for the SDBs in each of the SDBTs. + * Set ALERT_REQ mask in each SDBs trailer. + * Returns zero if successful, <0 otherwise. + */ +static int allocate_sdbt(int cpu) +{ + int j, k, rc; + unsigned long *sdbt; + unsigned long sdb; + unsigned long *tail; + unsigned long *trailer; + struct hws_cpu_buffer *cb; + + cb = &per_cpu(sampler_cpu_buffer, cpu); + + if (cb->first_sdbt) + return -EINVAL; + + sdbt = NULL; + tail = sdbt; + + for (j = 0; j < num_sdbt; j++) { + sdbt = (unsigned long *)get_zeroed_page(GFP_KERNEL); + + mutex_lock(&hws_sem_oom); + /* OOM killer might have been activated */ + barrier(); + if (oom_killer_was_active || !sdbt) { + if (sdbt) + free_page((unsigned long)sdbt); + + goto allocate_sdbt_error; + } + if (cb->first_sdbt == 0) + cb->first_sdbt = (unsigned long)sdbt; + + /* link current page to tail of chain */ + if (tail) + *tail = (unsigned long)(void *)sdbt + 1; + + mutex_unlock(&hws_sem_oom); + + for (k = 0; k < num_sdb; k++) { + /* get and set SDB page */ + sdb = get_zeroed_page(GFP_KERNEL); + + mutex_lock(&hws_sem_oom); + /* OOM killer might have been activated */ + barrier(); + if (oom_killer_was_active || !sdb) { + if (sdb) + free_page(sdb); + + goto allocate_sdbt_error; + } + *sdbt = sdb; + trailer = trailer_entry_ptr(*sdbt); + *trailer = ALERT_REQ_MASK; + sdbt++; + mutex_unlock(&hws_sem_oom); + } + tail = sdbt; + } + mutex_lock(&hws_sem_oom); + if (oom_killer_was_active) + goto allocate_sdbt_error; + + rc = 0; + if (tail) + *tail = (unsigned long) + ((void *)cb->first_sdbt) + 1; + +allocate_sdbt_exit: + mutex_unlock(&hws_sem_oom); + return rc; + +allocate_sdbt_error: + rc = -ENOMEM; + goto allocate_sdbt_exit; +} + +/* + * deallocate_sdbt() - deallocate all sampler memory + * + * For each online CPU all SDBT trees are deallocated. + * Returns the number of freed pages. + */ +static int deallocate_sdbt(void) +{ + int cpu; + int counter; + + counter = 0; + + for_each_online_cpu(cpu) { + unsigned long start; + unsigned long sdbt; + unsigned long *curr; + struct hws_cpu_buffer *cb; + + cb = &per_cpu(sampler_cpu_buffer, cpu); + + if (!cb->first_sdbt) + continue; + + sdbt = cb->first_sdbt; + curr = (unsigned long *) sdbt; + start = sdbt; + + /* we'll free the SDBT after all SDBs are processed... */ + while (1) { + if (!*curr || !sdbt) + break; + + /* watch for link entry reset if found */ + if (is_link_entry(curr)) { + curr = get_next_sdbt(curr); + if (sdbt) + free_page(sdbt); + + /* we are done if we reach the start */ + if ((unsigned long) curr == start) + break; + else + sdbt = (unsigned long) curr; + } else { + /* process SDB pointer */ + if (*curr) { + free_page(*curr); + curr++; + } + } + counter++; + } + cb->first_sdbt = 0; + } + return counter; +} + +static int start_sampling(int cpu) +{ + int rc; + struct hws_cpu_buffer *cb; + + cb = &per_cpu(sampler_cpu_buffer, cpu); + rc = smp_ctl_ssctl_enable_activate(cpu, interval); + if (rc) { + printk(KERN_INFO "hwsampler: CPU %d ssctl failed.\n", cpu); + goto start_exit; + } + + rc = -EINVAL; + if (!cb->qsi.es) { + printk(KERN_INFO "hwsampler: CPU %d ssctl not enabled.\n", cpu); + goto start_exit; + } + + if (!cb->qsi.cs) { + printk(KERN_INFO "hwsampler: CPU %d ssctl not active.\n", cpu); + goto start_exit; + } + + printk(KERN_INFO + "hwsampler: CPU %d, CPUMF Sampling started, interval %lu.\n", + cpu, interval); + + rc = 0; + +start_exit: + return rc; +} + +static int stop_sampling(int cpu) +{ + unsigned long v; + int rc; + struct hws_cpu_buffer *cb; + + rc = smp_ctl_qsi(cpu); + WARN_ON(rc); + + cb = &per_cpu(sampler_cpu_buffer, cpu); + if (!rc && !cb->qsi.es) + printk(KERN_INFO "hwsampler: CPU %d, already stopped.\n", cpu); + + rc = smp_ctl_ssctl_stop(cpu); + if (rc) { + printk(KERN_INFO "hwsampler: CPU %d, ssctl stop error %d.\n", + cpu, rc); + goto stop_exit; + } + + printk(KERN_INFO "hwsampler: CPU %d, CPUMF Sampling stopped.\n", cpu); + +stop_exit: + v = cb->req_alert; + if (v) + printk(KERN_ERR "hwsampler: CPU %d CPUMF Request alert," + " count=%lu.\n", cpu, v); + + v = cb->loss_of_sample_data; + if (v) + printk(KERN_ERR "hwsampler: CPU %d CPUMF Loss of sample data," + " count=%lu.\n", cpu, v); + + v = cb->invalid_entry_address; + if (v) + printk(KERN_ERR "hwsampler: CPU %d CPUMF Invalid entry address," + " count=%lu.\n", cpu, v); + + v = cb->incorrect_sdbt_entry; + if (v) + printk(KERN_ERR + "hwsampler: CPU %d CPUMF Incorrect SDBT address," + " count=%lu.\n", cpu, v); + + v = cb->sample_auth_change_alert; + if (v) + printk(KERN_ERR + "hwsampler: CPU %d CPUMF Sample authorization change," + " count=%lu.\n", cpu, v); + + return rc; +} + +static int check_hardware_prerequisites(void) +{ + if (!test_facility(68)) + return -EOPNOTSUPP; + return 0; +} +/* + * hws_oom_callback() - the OOM callback function + * + * In case the callback is invoked during memory allocation for the + * hw sampler, all obtained memory is deallocated and a flag is set + * so main sampler memory allocation can exit with a failure code. + * In case the callback is invoked during sampling the hw sampler + * is deactivated for all CPUs. + */ +static int hws_oom_callback(struct notifier_block *nfb, + unsigned long dummy, void *parm) +{ + unsigned long *freed; + int cpu; + struct hws_cpu_buffer *cb; + + freed = parm; + + mutex_lock(&hws_sem_oom); + + if (hws_state == HWS_DEALLOCATED) { + /* during memory allocation */ + if (oom_killer_was_active == 0) { + oom_killer_was_active = 1; + *freed += deallocate_sdbt(); + } + } else { + int i; + cpu = get_cpu(); + cb = &per_cpu(sampler_cpu_buffer, cpu); + + if (!cb->oom) { + for_each_online_cpu(i) { + smp_ctl_ssctl_deactivate(i); + cb->oom = 1; + } + cb->finish = 1; + + printk(KERN_INFO + "hwsampler: CPU %d, OOM notify during CPUMF Sampling.\n", + cpu); + } + } + + mutex_unlock(&hws_sem_oom); + + return NOTIFY_OK; +} + +static struct notifier_block hws_oom_notifier = { + .notifier_call = hws_oom_callback +}; + +static int hws_cpu_callback(struct notifier_block *nfb, + unsigned long action, void *hcpu) +{ + /* We do not have sampler space available for all possible CPUs. + All CPUs should be online when hw sampling is activated. */ + return (hws_state <= HWS_DEALLOCATED) ? NOTIFY_OK : NOTIFY_BAD; +} + +static struct notifier_block hws_cpu_notifier = { + .notifier_call = hws_cpu_callback +}; + +/** + * hwsampler_deactivate() - set hardware sampling temporarily inactive + * @cpu: specifies the CPU to be set inactive. + * + * Returns 0 on success, !0 on failure. + */ +int hwsampler_deactivate(unsigned int cpu) +{ + /* + * Deactivate hw sampling temporarily and flush the buffer + * by pushing all the pending samples to oprofile buffer. + * + * This function can be called under one of the following conditions: + * Memory unmap, task is exiting. + */ + int rc; + struct hws_cpu_buffer *cb; + + rc = 0; + mutex_lock(&hws_sem); + + cb = &per_cpu(sampler_cpu_buffer, cpu); + if (hws_state == HWS_STARTED) { + rc = smp_ctl_qsi(cpu); + WARN_ON(rc); + if (cb->qsi.cs) { + rc = smp_ctl_ssctl_deactivate(cpu); + if (rc) { + printk(KERN_INFO + "hwsampler: CPU %d, CPUMF Deactivation failed.\n", cpu); + cb->finish = 1; + hws_state = HWS_STOPPING; + } else { + hws_flush_all = 1; + /* Add work to queue to read pending samples.*/ + queue_work_on(cpu, hws_wq, &cb->worker); + } + } + } + mutex_unlock(&hws_sem); + + if (hws_wq) + flush_workqueue(hws_wq); + + return rc; +} + +/** + * hwsampler_activate() - activate/resume hardware sampling which was deactivated + * @cpu: specifies the CPU to be set active. + * + * Returns 0 on success, !0 on failure. + */ +int hwsampler_activate(unsigned int cpu) +{ + /* + * Re-activate hw sampling. This should be called in pair with + * hwsampler_deactivate(). + */ + int rc; + struct hws_cpu_buffer *cb; + + rc = 0; + mutex_lock(&hws_sem); + + cb = &per_cpu(sampler_cpu_buffer, cpu); + if (hws_state == HWS_STARTED) { + rc = smp_ctl_qsi(cpu); + WARN_ON(rc); + if (!cb->qsi.cs) { + hws_flush_all = 0; + rc = smp_ctl_ssctl_enable_activate(cpu, interval); + if (rc) { + printk(KERN_ERR + "CPU %d, CPUMF activate sampling failed.\n", + cpu); + } + } + } + + mutex_unlock(&hws_sem); + + return rc; +} + +static int check_qsi_on_setup(void) +{ + int rc; + unsigned int cpu; + struct hws_cpu_buffer *cb; + + for_each_online_cpu(cpu) { + cb = &per_cpu(sampler_cpu_buffer, cpu); + rc = smp_ctl_qsi(cpu); + WARN_ON(rc); + if (rc) + return -EOPNOTSUPP; + + if (!cb->qsi.as) { + printk(KERN_INFO "hwsampler: CPUMF sampling is not authorized.\n"); + return -EINVAL; + } + + if (cb->qsi.es) { + printk(KERN_WARNING "hwsampler: CPUMF is still enabled.\n"); + rc = smp_ctl_ssctl_stop(cpu); + if (rc) + return -EINVAL; + + printk(KERN_INFO + "CPU %d, CPUMF Sampling stopped now.\n", cpu); + } + } + return 0; +} + +static int check_qsi_on_start(void) +{ + unsigned int cpu; + int rc; + struct hws_cpu_buffer *cb; + + for_each_online_cpu(cpu) { + cb = &per_cpu(sampler_cpu_buffer, cpu); + rc = smp_ctl_qsi(cpu); + WARN_ON(rc); + + if (!cb->qsi.as) + return -EINVAL; + + if (cb->qsi.es) + return -EINVAL; + + if (cb->qsi.cs) + return -EINVAL; + } + return 0; +} + +static void worker_on_start(unsigned int cpu) +{ + struct hws_cpu_buffer *cb; + + cb = &per_cpu(sampler_cpu_buffer, cpu); + cb->worker_entry = cb->first_sdbt; +} + +static int worker_check_error(unsigned int cpu, int ext_params) +{ + int rc; + unsigned long *sdbt; + struct hws_cpu_buffer *cb; + + rc = 0; + cb = &per_cpu(sampler_cpu_buffer, cpu); + sdbt = (unsigned long *) cb->worker_entry; + + if (!sdbt || !*sdbt) + return -EINVAL; + + if (ext_params & CPU_MF_INT_SF_PRA) + cb->req_alert++; + + if (ext_params & CPU_MF_INT_SF_LSDA) + cb->loss_of_sample_data++; + + if (ext_params & CPU_MF_INT_SF_IAE) { + cb->invalid_entry_address++; + rc = -EINVAL; + } + + if (ext_params & CPU_MF_INT_SF_ISE) { + cb->incorrect_sdbt_entry++; + rc = -EINVAL; + } + + if (ext_params & CPU_MF_INT_SF_SACA) { + cb->sample_auth_change_alert++; + rc = -EINVAL; + } + + return rc; +} + +static void worker_on_finish(unsigned int cpu) +{ + int rc, i; + struct hws_cpu_buffer *cb; + + cb = &per_cpu(sampler_cpu_buffer, cpu); + + if (cb->finish) { + rc = smp_ctl_qsi(cpu); + WARN_ON(rc); + if (cb->qsi.es) { + printk(KERN_INFO + "hwsampler: CPU %d, CPUMF Stop/Deactivate sampling.\n", + cpu); + rc = smp_ctl_ssctl_stop(cpu); + if (rc) + printk(KERN_INFO + "hwsampler: CPU %d, CPUMF Deactivation failed.\n", + cpu); + + for_each_online_cpu(i) { + if (i == cpu) + continue; + if (!cb->finish) { + cb->finish = 1; + queue_work_on(i, hws_wq, + &cb->worker); + } + } + } + } +} + +static void worker_on_interrupt(unsigned int cpu) +{ + unsigned long *sdbt; + unsigned char done; + struct hws_cpu_buffer *cb; + + cb = &per_cpu(sampler_cpu_buffer, cpu); + + sdbt = (unsigned long *) cb->worker_entry; + + done = 0; + /* do not proceed if stop was entered, + * forget the buffers not yet processed */ + while (!done && !cb->stop_mode) { + unsigned long *trailer; + struct hws_trailer_entry *te; + unsigned long *dear = 0; + + trailer = trailer_entry_ptr(*sdbt); + /* leave loop if no more work to do */ + if (!(*trailer & BUFFER_FULL_MASK)) { + done = 1; + if (!hws_flush_all) + continue; + } + + te = (struct hws_trailer_entry *)trailer; + cb->sample_overflow += te->overflow; + + add_samples_to_oprofile(cpu, sdbt, dear); + + /* reset trailer */ + xchg((unsigned char *) te, 0x40); + + /* advance to next sdb slot in current sdbt */ + sdbt++; + /* in case link bit is set use address w/o link bit */ + if (is_link_entry(sdbt)) + sdbt = get_next_sdbt(sdbt); + + cb->worker_entry = (unsigned long)sdbt; + } +} + +static void add_samples_to_oprofile(unsigned int cpu, unsigned long *sdbt, + unsigned long *dear) +{ + struct hws_data_entry *sample_data_ptr; + unsigned long *trailer; + + trailer = trailer_entry_ptr(*sdbt); + if (dear) { + if (dear > trailer) + return; + trailer = dear; + } + + sample_data_ptr = (struct hws_data_entry *)(*sdbt); + + while ((unsigned long *)sample_data_ptr < trailer) { + struct pt_regs *regs = NULL; + struct task_struct *tsk = NULL; + + /* + * Check sampling mode, 1 indicates basic (=customer) sampling + * mode. + */ + if (sample_data_ptr->def != 1) { + /* sample slot is not yet written */ + break; + } else { + /* make sure we don't use it twice, + * the next time the sampler will set it again */ + sample_data_ptr->def = 0; + } + + /* Get pt_regs. */ + if (sample_data_ptr->P == 1) { + /* userspace sample */ + unsigned int pid = sample_data_ptr->prim_asn; + if (!counter_config.user) + goto skip_sample; + rcu_read_lock(); + tsk = pid_task(find_vpid(pid), PIDTYPE_PID); + if (tsk) + regs = task_pt_regs(tsk); + rcu_read_unlock(); + } else { + /* kernelspace sample */ + if (!counter_config.kernel) + goto skip_sample; + regs = task_pt_regs(current); + } + + mutex_lock(&hws_sem); + oprofile_add_ext_hw_sample(sample_data_ptr->ia, regs, 0, + !sample_data_ptr->P, tsk); + mutex_unlock(&hws_sem); + skip_sample: + sample_data_ptr++; + } +} + +static void worker(struct work_struct *work) +{ + unsigned int cpu; + int ext_params; + struct hws_cpu_buffer *cb; + + cb = container_of(work, struct hws_cpu_buffer, worker); + cpu = smp_processor_id(); + ext_params = atomic_xchg(&cb->ext_params, 0); + + if (!cb->worker_entry) + worker_on_start(cpu); + + if (worker_check_error(cpu, ext_params)) + return; + + if (!cb->finish) + worker_on_interrupt(cpu); + + if (cb->finish) + worker_on_finish(cpu); +} + +/** + * hwsampler_allocate() - allocate memory for the hardware sampler + * @sdbt: number of SDBTs per online CPU (must be > 0) + * @sdb: number of SDBs per SDBT (minimum 1, maximum 511) + * + * Returns 0 on success, !0 on failure. + */ +int hwsampler_allocate(unsigned long sdbt, unsigned long sdb) +{ + int cpu, rc; + mutex_lock(&hws_sem); + + rc = -EINVAL; + if (hws_state != HWS_DEALLOCATED) + goto allocate_exit; + + if (sdbt < 1) + goto allocate_exit; + + if (sdb > MAX_NUM_SDB || sdb < MIN_NUM_SDB) + goto allocate_exit; + + num_sdbt = sdbt; + num_sdb = sdb; + + oom_killer_was_active = 0; + register_oom_notifier(&hws_oom_notifier); + + for_each_online_cpu(cpu) { + if (allocate_sdbt(cpu)) { + unregister_oom_notifier(&hws_oom_notifier); + goto allocate_error; + } + } + unregister_oom_notifier(&hws_oom_notifier); + if (oom_killer_was_active) + goto allocate_error; + + hws_state = HWS_STOPPED; + rc = 0; + +allocate_exit: + mutex_unlock(&hws_sem); + return rc; + +allocate_error: + rc = -ENOMEM; + printk(KERN_ERR "hwsampler: CPUMF Memory allocation failed.\n"); + goto allocate_exit; +} + +/** + * hwsampler_deallocate() - deallocate hardware sampler memory + * + * Returns 0 on success, !0 on failure. + */ +int hwsampler_deallocate(void) +{ + int rc; + + mutex_lock(&hws_sem); + + rc = -EINVAL; + if (hws_state != HWS_STOPPED) + goto deallocate_exit; + + measurement_alert_subclass_unregister(); + deallocate_sdbt(); + + hws_state = HWS_DEALLOCATED; + rc = 0; + +deallocate_exit: + mutex_unlock(&hws_sem); + + return rc; +} + +unsigned long hwsampler_query_min_interval(void) +{ + return min_sampler_rate; +} + +unsigned long hwsampler_query_max_interval(void) +{ + return max_sampler_rate; +} + +unsigned long hwsampler_get_sample_overflow_count(unsigned int cpu) +{ + struct hws_cpu_buffer *cb; + + cb = &per_cpu(sampler_cpu_buffer, cpu); + + return cb->sample_overflow; +} + +int hwsampler_setup(void) +{ + int rc; + int cpu; + struct hws_cpu_buffer *cb; + + mutex_lock(&hws_sem); + + rc = -EINVAL; + if (hws_state) + goto setup_exit; + + hws_state = HWS_INIT; + + init_all_cpu_buffers(); + + rc = check_hardware_prerequisites(); + if (rc) + goto setup_exit; + + rc = check_qsi_on_setup(); + if (rc) + goto setup_exit; + + rc = -EINVAL; + hws_wq = create_workqueue("hwsampler"); + if (!hws_wq) + goto setup_exit; + + register_cpu_notifier(&hws_cpu_notifier); + + for_each_online_cpu(cpu) { + cb = &per_cpu(sampler_cpu_buffer, cpu); + INIT_WORK(&cb->worker, worker); + rc = smp_ctl_qsi(cpu); + WARN_ON(rc); + if (min_sampler_rate != cb->qsi.min_sampl_rate) { + if (min_sampler_rate) { + printk(KERN_WARNING + "hwsampler: different min sampler rate values.\n"); + if (min_sampler_rate < cb->qsi.min_sampl_rate) + min_sampler_rate = + cb->qsi.min_sampl_rate; + } else + min_sampler_rate = cb->qsi.min_sampl_rate; + } + if (max_sampler_rate != cb->qsi.max_sampl_rate) { + if (max_sampler_rate) { + printk(KERN_WARNING + "hwsampler: different max sampler rate values.\n"); + if (max_sampler_rate > cb->qsi.max_sampl_rate) + max_sampler_rate = + cb->qsi.max_sampl_rate; + } else + max_sampler_rate = cb->qsi.max_sampl_rate; + } + } + register_external_interrupt(0x1407, hws_ext_handler); + + hws_state = HWS_DEALLOCATED; + rc = 0; + +setup_exit: + mutex_unlock(&hws_sem); + return rc; +} + +int hwsampler_shutdown(void) +{ + int rc; + + mutex_lock(&hws_sem); + + rc = -EINVAL; + if (hws_state == HWS_DEALLOCATED || hws_state == HWS_STOPPED) { + mutex_unlock(&hws_sem); + + if (hws_wq) + flush_workqueue(hws_wq); + + mutex_lock(&hws_sem); + + if (hws_state == HWS_STOPPED) { + measurement_alert_subclass_unregister(); + deallocate_sdbt(); + } + if (hws_wq) { + destroy_workqueue(hws_wq); + hws_wq = NULL; + } + + unregister_external_interrupt(0x1407, hws_ext_handler); + hws_state = HWS_INIT; + rc = 0; + } + mutex_unlock(&hws_sem); + + unregister_cpu_notifier(&hws_cpu_notifier); + + return rc; +} + +/** + * hwsampler_start_all() - start hardware sampling on all online CPUs + * @rate: specifies the used interval when samples are taken + * + * Returns 0 on success, !0 on failure. + */ +int hwsampler_start_all(unsigned long rate) +{ + int rc, cpu; + + mutex_lock(&hws_sem); + + hws_oom = 0; + + rc = -EINVAL; + if (hws_state != HWS_STOPPED) + goto start_all_exit; + + interval = rate; + + /* fail if rate is not valid */ + if (interval < min_sampler_rate || interval > max_sampler_rate) + goto start_all_exit; + + rc = check_qsi_on_start(); + if (rc) + goto start_all_exit; + + rc = prepare_cpu_buffers(); + if (rc) + goto start_all_exit; + + for_each_online_cpu(cpu) { + rc = start_sampling(cpu); + if (rc) + break; + } + if (rc) { + for_each_online_cpu(cpu) { + stop_sampling(cpu); + } + goto start_all_exit; + } + hws_state = HWS_STARTED; + rc = 0; + +start_all_exit: + mutex_unlock(&hws_sem); + + if (rc) + return rc; + + register_oom_notifier(&hws_oom_notifier); + hws_oom = 1; + hws_flush_all = 0; + /* now let them in, 1407 CPUMF external interrupts */ + measurement_alert_subclass_register(); + + return 0; +} + +/** + * hwsampler_stop_all() - stop hardware sampling on all online CPUs + * + * Returns 0 on success, !0 on failure. + */ +int hwsampler_stop_all(void) +{ + int tmp_rc, rc, cpu; + struct hws_cpu_buffer *cb; + + mutex_lock(&hws_sem); + + rc = 0; + if (hws_state == HWS_INIT) { + mutex_unlock(&hws_sem); + return rc; + } + hws_state = HWS_STOPPING; + mutex_unlock(&hws_sem); + + for_each_online_cpu(cpu) { + cb = &per_cpu(sampler_cpu_buffer, cpu); + cb->stop_mode = 1; + tmp_rc = stop_sampling(cpu); + if (tmp_rc) + rc = tmp_rc; + } + + if (hws_wq) + flush_workqueue(hws_wq); + + mutex_lock(&hws_sem); + if (hws_oom) { + unregister_oom_notifier(&hws_oom_notifier); + hws_oom = 0; + } + hws_state = HWS_STOPPED; + mutex_unlock(&hws_sem); + + return rc; +} diff --git a/arch/s390/oprofile/hwsampler.h b/arch/s390/oprofile/hwsampler.h new file mode 100644 index 00000000..1912f3bb --- /dev/null +++ b/arch/s390/oprofile/hwsampler.h @@ -0,0 +1,113 @@ +/* + * CPUMF HW sampler functions and internal structures + * + * Copyright IBM Corp. 2010 + * Author(s): Heinz Graalfs <graalfs@de.ibm.com> + */ + +#ifndef HWSAMPLER_H_ +#define HWSAMPLER_H_ + +#include <linux/workqueue.h> + +struct hws_qsi_info_block /* QUERY SAMPLING information block */ +{ /* Bit(s) */ + unsigned int b0_13:14; /* 0-13: zeros */ + unsigned int as:1; /* 14: sampling authorisation control*/ + unsigned int b15_21:7; /* 15-21: zeros */ + unsigned int es:1; /* 22: sampling enable control */ + unsigned int b23_29:7; /* 23-29: zeros */ + unsigned int cs:1; /* 30: sampling activation control */ + unsigned int:1; /* 31: reserved */ + unsigned int bsdes:16; /* 4-5: size of sampling entry */ + unsigned int:16; /* 6-7: reserved */ + unsigned long min_sampl_rate; /* 8-15: minimum sampling interval */ + unsigned long max_sampl_rate; /* 16-23: maximum sampling interval*/ + unsigned long tear; /* 24-31: TEAR contents */ + unsigned long dear; /* 32-39: DEAR contents */ + unsigned int rsvrd0; /* 40-43: reserved */ + unsigned int cpu_speed; /* 44-47: CPU speed */ + unsigned long long rsvrd1; /* 48-55: reserved */ + unsigned long long rsvrd2; /* 56-63: reserved */ +}; + +struct hws_ssctl_request_block /* SET SAMPLING CONTROLS req block */ +{ /* bytes 0 - 7 Bit(s) */ + unsigned int s:1; /* 0: maximum buffer indicator */ + unsigned int h:1; /* 1: part. level reserved for VM use*/ + unsigned long b2_53:52; /* 2-53: zeros */ + unsigned int es:1; /* 54: sampling enable control */ + unsigned int b55_61:7; /* 55-61: - zeros */ + unsigned int cs:1; /* 62: sampling activation control */ + unsigned int b63:1; /* 63: zero */ + unsigned long interval; /* 8-15: sampling interval */ + unsigned long tear; /* 16-23: TEAR contents */ + unsigned long dear; /* 24-31: DEAR contents */ + /* 32-63: */ + unsigned long rsvrd1; /* reserved */ + unsigned long rsvrd2; /* reserved */ + unsigned long rsvrd3; /* reserved */ + unsigned long rsvrd4; /* reserved */ +}; + +struct hws_cpu_buffer { + unsigned long first_sdbt; /* @ of 1st SDB-Table for this CP*/ + unsigned long worker_entry; + unsigned long sample_overflow; /* taken from SDB ... */ + struct hws_qsi_info_block qsi; + struct hws_ssctl_request_block ssctl; + struct work_struct worker; + atomic_t ext_params; + unsigned long req_alert; + unsigned long loss_of_sample_data; + unsigned long invalid_entry_address; + unsigned long incorrect_sdbt_entry; + unsigned long sample_auth_change_alert; + unsigned int finish:1; + unsigned int oom:1; + unsigned int stop_mode:1; +}; + +struct hws_data_entry { + unsigned int def:16; /* 0-15 Data Entry Format */ + unsigned int R:4; /* 16-19 reserved */ + unsigned int U:4; /* 20-23 Number of unique instruct. */ + unsigned int z:2; /* zeros */ + unsigned int T:1; /* 26 PSW DAT mode */ + unsigned int W:1; /* 27 PSW wait state */ + unsigned int P:1; /* 28 PSW Problem state */ + unsigned int AS:2; /* 29-30 PSW address-space control */ + unsigned int I:1; /* 31 entry valid or invalid */ + unsigned int:16; + unsigned int prim_asn:16; /* primary ASN */ + unsigned long long ia; /* Instruction Address */ + unsigned long long lpp; /* Logical-Partition Program Param. */ + unsigned long long vpp; /* Virtual-Machine Program Param. */ +}; + +struct hws_trailer_entry { + unsigned int f:1; /* 0 - Block Full Indicator */ + unsigned int a:1; /* 1 - Alert request control */ + unsigned long:62; /* 2 - 63: Reserved */ + unsigned long overflow; /* 64 - sample Overflow count */ + unsigned long timestamp; /* 16 - time-stamp */ + unsigned long timestamp1; /* */ + unsigned long reserved1; /* 32 -Reserved */ + unsigned long reserved2; /* */ + unsigned long progusage1; /* 48 - reserved for programming use */ + unsigned long progusage2; /* */ +}; + +int hwsampler_setup(void); +int hwsampler_shutdown(void); +int hwsampler_allocate(unsigned long sdbt, unsigned long sdb); +int hwsampler_deallocate(void); +unsigned long hwsampler_query_min_interval(void); +unsigned long hwsampler_query_max_interval(void); +int hwsampler_start_all(unsigned long interval); +int hwsampler_stop_all(void); +int hwsampler_deactivate(unsigned int cpu); +int hwsampler_activate(unsigned int cpu); +unsigned long hwsampler_get_sample_overflow_count(unsigned int cpu); + +#endif /*HWSAMPLER_H_*/ diff --git a/arch/s390/oprofile/init.c b/arch/s390/oprofile/init.c new file mode 100644 index 00000000..2297be40 --- /dev/null +++ b/arch/s390/oprofile/init.c @@ -0,0 +1,509 @@ +/** + * arch/s390/oprofile/init.c + * + * S390 Version + * Copyright (C) 2002-2011 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Author(s): Thomas Spatzier (tspat@de.ibm.com) + * Author(s): Mahesh Salgaonkar (mahesh@linux.vnet.ibm.com) + * Author(s): Heinz Graalfs (graalfs@linux.vnet.ibm.com) + * Author(s): Andreas Krebbel (krebbel@linux.vnet.ibm.com) + * + * @remark Copyright 2002-2011 OProfile authors + */ + +#include <linux/oprofile.h> +#include <linux/init.h> +#include <linux/errno.h> +#include <linux/fs.h> +#include <linux/module.h> +#include <asm/processor.h> + +#include "../../../drivers/oprofile/oprof.h" + +extern void s390_backtrace(struct pt_regs * const regs, unsigned int depth); + +#ifdef CONFIG_64BIT + +#include "hwsampler.h" +#include "op_counter.h" + +#define DEFAULT_INTERVAL 4127518 + +#define DEFAULT_SDBT_BLOCKS 1 +#define DEFAULT_SDB_BLOCKS 511 + +static unsigned long oprofile_hw_interval = DEFAULT_INTERVAL; +static unsigned long oprofile_min_interval; +static unsigned long oprofile_max_interval; + +static unsigned long oprofile_sdbt_blocks = DEFAULT_SDBT_BLOCKS; +static unsigned long oprofile_sdb_blocks = DEFAULT_SDB_BLOCKS; + +static int hwsampler_enabled; +static int hwsampler_running; /* start_mutex must be held to change */ +static int hwsampler_available; + +static struct oprofile_operations timer_ops; + +struct op_counter_config counter_config; + +enum __force_cpu_type { + reserved = 0, /* do not force */ + timer, +}; +static int force_cpu_type; + +static int set_cpu_type(const char *str, struct kernel_param *kp) +{ + if (!strcmp(str, "timer")) { + force_cpu_type = timer; + printk(KERN_INFO "oprofile: forcing timer to be returned " + "as cpu type\n"); + } else { + force_cpu_type = 0; + } + + return 0; +} +module_param_call(cpu_type, set_cpu_type, NULL, NULL, 0); +MODULE_PARM_DESC(cpu_type, "Force legacy basic mode sampling" + "(report cpu_type \"timer\""); + +static int oprofile_hwsampler_start(void) +{ + int retval; + + hwsampler_running = hwsampler_enabled; + + if (!hwsampler_running) + return timer_ops.start(); + + retval = hwsampler_allocate(oprofile_sdbt_blocks, oprofile_sdb_blocks); + if (retval) + return retval; + + retval = hwsampler_start_all(oprofile_hw_interval); + if (retval) + hwsampler_deallocate(); + + return retval; +} + +static void oprofile_hwsampler_stop(void) +{ + if (!hwsampler_running) { + timer_ops.stop(); + return; + } + + hwsampler_stop_all(); + hwsampler_deallocate(); + return; +} + +/* + * File ops used for: + * /dev/oprofile/0/enabled + * /dev/oprofile/hwsampling/hwsampler (cpu_type = timer) + */ + +static ssize_t hwsampler_read(struct file *file, char __user *buf, + size_t count, loff_t *offset) +{ + return oprofilefs_ulong_to_user(hwsampler_enabled, buf, count, offset); +} + +static ssize_t hwsampler_write(struct file *file, char const __user *buf, + size_t count, loff_t *offset) +{ + unsigned long val; + int retval; + + if (*offset) + return -EINVAL; + + retval = oprofilefs_ulong_from_user(&val, buf, count); + if (retval <= 0) + return retval; + + if (val != 0 && val != 1) + return -EINVAL; + + if (oprofile_started) + /* + * save to do without locking as we set + * hwsampler_running in start() when start_mutex is + * held + */ + return -EBUSY; + + hwsampler_enabled = val; + + return count; +} + +static const struct file_operations hwsampler_fops = { + .read = hwsampler_read, + .write = hwsampler_write, +}; + +/* + * File ops used for: + * /dev/oprofile/0/count + * /dev/oprofile/hwsampling/hw_interval (cpu_type = timer) + * + * Make sure that the value is within the hardware range. + */ + +static ssize_t hw_interval_read(struct file *file, char __user *buf, + size_t count, loff_t *offset) +{ + return oprofilefs_ulong_to_user(oprofile_hw_interval, buf, + count, offset); +} + +static ssize_t hw_interval_write(struct file *file, char const __user *buf, + size_t count, loff_t *offset) +{ + unsigned long val; + int retval; + + if (*offset) + return -EINVAL; + retval = oprofilefs_ulong_from_user(&val, buf, count); + if (retval) + return retval; + if (val < oprofile_min_interval) + oprofile_hw_interval = oprofile_min_interval; + else if (val > oprofile_max_interval) + oprofile_hw_interval = oprofile_max_interval; + else + oprofile_hw_interval = val; + + return count; +} + +static const struct file_operations hw_interval_fops = { + .read = hw_interval_read, + .write = hw_interval_write, +}; + +/* + * File ops used for: + * /dev/oprofile/0/event + * Only a single event with number 0 is supported with this counter. + * + * /dev/oprofile/0/unit_mask + * This is a dummy file needed by the user space tools. + * No value other than 0 is accepted or returned. + */ + +static ssize_t hwsampler_zero_read(struct file *file, char __user *buf, + size_t count, loff_t *offset) +{ + return oprofilefs_ulong_to_user(0, buf, count, offset); +} + +static ssize_t hwsampler_zero_write(struct file *file, char const __user *buf, + size_t count, loff_t *offset) +{ + unsigned long val; + int retval; + + if (*offset) + return -EINVAL; + + retval = oprofilefs_ulong_from_user(&val, buf, count); + if (retval) + return retval; + if (val != 0) + return -EINVAL; + return count; +} + +static const struct file_operations zero_fops = { + .read = hwsampler_zero_read, + .write = hwsampler_zero_write, +}; + +/* /dev/oprofile/0/kernel file ops. */ + +static ssize_t hwsampler_kernel_read(struct file *file, char __user *buf, + size_t count, loff_t *offset) +{ + return oprofilefs_ulong_to_user(counter_config.kernel, + buf, count, offset); +} + +static ssize_t hwsampler_kernel_write(struct file *file, char const __user *buf, + size_t count, loff_t *offset) +{ + unsigned long val; + int retval; + + if (*offset) + return -EINVAL; + + retval = oprofilefs_ulong_from_user(&val, buf, count); + if (retval) + return retval; + + if (val != 0 && val != 1) + return -EINVAL; + + counter_config.kernel = val; + + return count; +} + +static const struct file_operations kernel_fops = { + .read = hwsampler_kernel_read, + .write = hwsampler_kernel_write, +}; + +/* /dev/oprofile/0/user file ops. */ + +static ssize_t hwsampler_user_read(struct file *file, char __user *buf, + size_t count, loff_t *offset) +{ + return oprofilefs_ulong_to_user(counter_config.user, + buf, count, offset); +} + +static ssize_t hwsampler_user_write(struct file *file, char const __user *buf, + size_t count, loff_t *offset) +{ + unsigned long val; + int retval; + + if (*offset) + return -EINVAL; + + retval = oprofilefs_ulong_from_user(&val, buf, count); + if (retval) + return retval; + + if (val != 0 && val != 1) + return -EINVAL; + + counter_config.user = val; + + return count; +} + +static const struct file_operations user_fops = { + .read = hwsampler_user_read, + .write = hwsampler_user_write, +}; + + +/* + * File ops used for: /dev/oprofile/timer/enabled + * The value always has to be the inverted value of hwsampler_enabled. So + * no separate variable is created. That way we do not need locking. + */ + +static ssize_t timer_enabled_read(struct file *file, char __user *buf, + size_t count, loff_t *offset) +{ + return oprofilefs_ulong_to_user(!hwsampler_enabled, buf, count, offset); +} + +static ssize_t timer_enabled_write(struct file *file, char const __user *buf, + size_t count, loff_t *offset) +{ + unsigned long val; + int retval; + + if (*offset) + return -EINVAL; + + retval = oprofilefs_ulong_from_user(&val, buf, count); + if (retval) + return retval; + + if (val != 0 && val != 1) + return -EINVAL; + + /* Timer cannot be disabled without having hardware sampling. */ + if (val == 0 && !hwsampler_available) + return -EINVAL; + + if (oprofile_started) + /* + * save to do without locking as we set + * hwsampler_running in start() when start_mutex is + * held + */ + return -EBUSY; + + hwsampler_enabled = !val; + + return count; +} + +static const struct file_operations timer_enabled_fops = { + .read = timer_enabled_read, + .write = timer_enabled_write, +}; + + +static int oprofile_create_hwsampling_files(struct super_block *sb, + struct dentry *root) +{ + struct dentry *dir; + + dir = oprofilefs_mkdir(sb, root, "timer"); + if (!dir) + return -EINVAL; + + oprofilefs_create_file(sb, dir, "enabled", &timer_enabled_fops); + + if (!hwsampler_available) + return 0; + + /* reinitialize default values */ + hwsampler_enabled = 1; + counter_config.kernel = 1; + counter_config.user = 1; + + if (!force_cpu_type) { + /* + * Create the counter file system. A single virtual + * counter is created which can be used to + * enable/disable hardware sampling dynamically from + * user space. The user space will configure a single + * counter with a single event. The value of 'event' + * and 'unit_mask' are not evaluated by the kernel code + * and can only be set to 0. + */ + + dir = oprofilefs_mkdir(sb, root, "0"); + if (!dir) + return -EINVAL; + + oprofilefs_create_file(sb, dir, "enabled", &hwsampler_fops); + oprofilefs_create_file(sb, dir, "event", &zero_fops); + oprofilefs_create_file(sb, dir, "count", &hw_interval_fops); + oprofilefs_create_file(sb, dir, "unit_mask", &zero_fops); + oprofilefs_create_file(sb, dir, "kernel", &kernel_fops); + oprofilefs_create_file(sb, dir, "user", &user_fops); + oprofilefs_create_ulong(sb, dir, "hw_sdbt_blocks", + &oprofile_sdbt_blocks); + + } else { + /* + * Hardware sampling can be used but the cpu_type is + * forced to timer in order to deal with legacy user + * space tools. The /dev/oprofile/hwsampling fs is + * provided in that case. + */ + dir = oprofilefs_mkdir(sb, root, "hwsampling"); + if (!dir) + return -EINVAL; + + oprofilefs_create_file(sb, dir, "hwsampler", + &hwsampler_fops); + oprofilefs_create_file(sb, dir, "hw_interval", + &hw_interval_fops); + oprofilefs_create_ro_ulong(sb, dir, "hw_min_interval", + &oprofile_min_interval); + oprofilefs_create_ro_ulong(sb, dir, "hw_max_interval", + &oprofile_max_interval); + oprofilefs_create_ulong(sb, dir, "hw_sdbt_blocks", + &oprofile_sdbt_blocks); + } + return 0; +} + +static int oprofile_hwsampler_init(struct oprofile_operations *ops) +{ + /* + * Initialize the timer mode infrastructure as well in order + * to be able to switch back dynamically. oprofile_timer_init + * is not supposed to fail. + */ + if (oprofile_timer_init(ops)) + BUG(); + + memcpy(&timer_ops, ops, sizeof(timer_ops)); + ops->create_files = oprofile_create_hwsampling_files; + + /* + * If the user space tools do not support newer cpu types, + * the force_cpu_type module parameter + * can be used to always return \"timer\" as cpu type. + */ + if (force_cpu_type != timer) { + struct cpuid id; + + get_cpu_id (&id); + + switch (id.machine) { + case 0x2097: case 0x2098: ops->cpu_type = "s390/z10"; break; + case 0x2817: case 0x2818: ops->cpu_type = "s390/z196"; break; + default: return -ENODEV; + } + } + + if (hwsampler_setup()) + return -ENODEV; + + /* + * Query the range for the sampling interval from the + * hardware. + */ + oprofile_min_interval = hwsampler_query_min_interval(); + if (oprofile_min_interval == 0) + return -ENODEV; + oprofile_max_interval = hwsampler_query_max_interval(); + if (oprofile_max_interval == 0) + return -ENODEV; + + /* The initial value should be sane */ + if (oprofile_hw_interval < oprofile_min_interval) + oprofile_hw_interval = oprofile_min_interval; + if (oprofile_hw_interval > oprofile_max_interval) + oprofile_hw_interval = oprofile_max_interval; + + printk(KERN_INFO "oprofile: System z hardware sampling " + "facility found.\n"); + + ops->start = oprofile_hwsampler_start; + ops->stop = oprofile_hwsampler_stop; + + return 0; +} + +static void oprofile_hwsampler_exit(void) +{ + hwsampler_shutdown(); +} + +#endif /* CONFIG_64BIT */ + +int __init oprofile_arch_init(struct oprofile_operations *ops) +{ + ops->backtrace = s390_backtrace; + +#ifdef CONFIG_64BIT + + /* + * -ENODEV is not reported to the caller. The module itself + * will use the timer mode sampling as fallback and this is + * always available. + */ + hwsampler_available = oprofile_hwsampler_init(ops) == 0; + + return 0; +#else + return -ENODEV; +#endif +} + +void oprofile_arch_exit(void) +{ +#ifdef CONFIG_64BIT + oprofile_hwsampler_exit(); +#endif +} diff --git a/arch/s390/oprofile/op_counter.h b/arch/s390/oprofile/op_counter.h new file mode 100644 index 00000000..1a8d3ca0 --- /dev/null +++ b/arch/s390/oprofile/op_counter.h @@ -0,0 +1,23 @@ +/** + * arch/s390/oprofile/op_counter.h + * + * Copyright (C) 2011 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Author(s): Andreas Krebbel (krebbel@linux.vnet.ibm.com) + * + * @remark Copyright 2011 OProfile authors + */ + +#ifndef OP_COUNTER_H +#define OP_COUNTER_H + +struct op_counter_config { + /* `enabled' maps to the hwsampler_file variable. */ + /* `count' maps to the oprofile_hw_interval variable. */ + /* `event' and `unit_mask' are unused. */ + unsigned long kernel; + unsigned long user; +}; + +extern struct op_counter_config counter_config; + +#endif /* OP_COUNTER_H */ |