diff options
Diffstat (limited to 'arch/tile')
204 files changed, 51894 insertions, 0 deletions
diff --git a/arch/tile/Kbuild b/arch/tile/Kbuild new file mode 100644 index 00000000..a9b92271 --- /dev/null +++ b/arch/tile/Kbuild @@ -0,0 +1,3 @@ + +obj-y += kernel/ +obj-y += mm/ diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig new file mode 100644 index 00000000..74239dd7 --- /dev/null +++ b/arch/tile/Kconfig @@ -0,0 +1,383 @@ +# For a description of the syntax of this configuration file, +# see Documentation/kbuild/kconfig-language.txt. + +config TILE + def_bool y + select HAVE_KVM if !TILEGX + select GENERIC_FIND_FIRST_BIT + select USE_GENERIC_SMP_HELPERS + select CC_OPTIMIZE_FOR_SIZE + select HAVE_GENERIC_HARDIRQS + select GENERIC_IRQ_PROBE + select GENERIC_PENDING_IRQ if SMP + select GENERIC_IRQ_SHOW + select HAVE_SYSCALL_WRAPPERS if TILEGX + select SYS_HYPERVISOR + select ARCH_HAVE_NMI_SAFE_CMPXCHG + +# FIXME: investigate whether we need/want these options. +# select HAVE_IOREMAP_PROT +# select HAVE_OPTPROBES +# select HAVE_REGS_AND_STACK_ACCESS_API +# select HAVE_HW_BREAKPOINT +# select PERF_EVENTS +# select HAVE_USER_RETURN_NOTIFIER +# config NO_BOOTMEM +# config ARCH_SUPPORTS_DEBUG_PAGEALLOC +# config HUGETLB_PAGE_SIZE_VARIABLE + +config MMU + def_bool y + +config GENERIC_CSUM + def_bool y + +config SEMAPHORE_SLEEPERS + def_bool y + +config HAVE_ARCH_ALLOC_REMAP + def_bool y + +config HAVE_SETUP_PER_CPU_AREA + def_bool y + +config NEED_PER_CPU_PAGE_FIRST_CHUNK + def_bool y + +config SYS_SUPPORTS_HUGETLBFS + def_bool y + +config GENERIC_CLOCKEVENTS + def_bool y + +# FIXME: tilegx can implement a more efficient rwsem. +config RWSEM_GENERIC_SPINLOCK + def_bool y + +# We have a very flat architecture from a migration point of view, +# so save boot time by presetting this (particularly useful on tile-sim). +config DEFAULT_MIGRATION_COST + int + default "10000000" + +# We only support gcc 4.4 and above, so this should work. +config ARCH_SUPPORTS_OPTIMIZED_INLINING + def_bool y + +config ARCH_PHYS_ADDR_T_64BIT + def_bool y + +config ARCH_DMA_ADDR_T_64BIT + def_bool y + +config NEED_DMA_MAP_STATE + def_bool y + +config LOCKDEP_SUPPORT + def_bool y + +config STACKTRACE_SUPPORT + def_bool y + select STACKTRACE + +# We use discontigmem for now; at some point we may want to switch +# to sparsemem (Tilera bug 7996). +config ARCH_DISCONTIGMEM_ENABLE + def_bool y + +config ARCH_DISCONTIGMEM_DEFAULT + def_bool y + +config TRACE_IRQFLAGS_SUPPORT + def_bool y + +config STRICT_DEVMEM + def_bool y + +# SMP is required for Tilera Linux. +config SMP + def_bool y + +# Allow checking for compile-time determined overflow errors in +# copy_from_user(). There are still unprovable places in the +# generic code as of 2.6.34, so this option is not really compatible +# with -Werror, which is more useful in general. +config DEBUG_COPY_FROM_USER + def_bool n + +config HVC_TILE + select HVC_DRIVER + def_bool y + +# Please note: TILE-Gx support is not yet finalized; this is +# the preliminary support. TILE-Gx drivers are only provided +# with the alpha or beta test versions for Tilera customers. +config TILEGX + depends on EXPERIMENTAL + bool "Building with TILE-Gx (64-bit) compiler and toolchain" + +config 64BIT + depends on TILEGX + def_bool y + +config ARCH_DEFCONFIG + string + default "arch/tile/configs/tilepro_defconfig" if !TILEGX + default "arch/tile/configs/tilegx_defconfig" if TILEGX + +source "init/Kconfig" + +menu "Tilera-specific configuration" + +config NR_CPUS + int "Maximum number of tiles (2-255)" + range 2 255 + depends on SMP + default "64" + ---help--- + Building with 64 is the recommended value, but a slightly + smaller kernel memory footprint results from using a smaller + value on chips with fewer tiles. + +source "kernel/time/Kconfig" + +source "kernel/Kconfig.hz" + +config KEXEC + bool "kexec system call" + ---help--- + kexec is a system call that implements the ability to shutdown your + current kernel, and to start another kernel. It is like a reboot + but it is independent of the system firmware. It is used + to implement the "mboot" Tilera booter. + + The name comes from the similarity to the exec system call. + +config COMPAT + bool "Support 32-bit TILE-Gx binaries in addition to 64-bit" + depends on TILEGX + select COMPAT_BINFMT_ELF + default y + ---help--- + If enabled, the kernel will support running TILE-Gx binaries + that were built with the -m32 option. + +config SYSVIPC_COMPAT + def_bool y + depends on COMPAT && SYSVIPC + +# We do not currently support disabling HIGHMEM on tile64 and tilepro. +config HIGHMEM + bool # "Support for more than 512 MB of RAM" + default !TILEGX + ---help--- + Linux can use the full amount of RAM in the system by + default. However, the address space of TILE processors is + only 4 Gigabytes large. That means that, if you have a large + amount of physical memory, not all of it can be "permanently + mapped" by the kernel. The physical memory that's not + permanently mapped is called "high memory". + + If you are compiling a kernel which will never run on a + machine with more than 512 MB total physical RAM, answer + "false" here. This will result in the kernel mapping all of + physical memory into the top 1 GB of virtual memory space. + + If unsure, say "true". + +# We do not currently support disabling NUMA. +config NUMA + bool # "NUMA Memory Allocation and Scheduler Support" + depends on SMP && DISCONTIGMEM + default y + ---help--- + NUMA memory allocation is required for TILE processors + unless booting with memory striping enabled in the + hypervisor, or with only a single memory controller. + It is recommended that this option always be enabled. + +config NODES_SHIFT + int "Log base 2 of the max number of memory controllers" + default 2 + depends on NEED_MULTIPLE_NODES + ---help--- + By default, 2, i.e. 2^2 == 4 DDR2 controllers. + In a system with more controllers, this value should be raised. + +choice + depends on !TILEGX + prompt "Memory split" if EXPERT + default VMSPLIT_3G + ---help--- + Select the desired split between kernel and user memory. + + If the address range available to the kernel is less than the + physical memory installed, the remaining memory will be available + as "high memory". Accessing high memory is a little more costly + than low memory, as it needs to be mapped into the kernel first. + Note that increasing the kernel address space limits the range + available to user programs, making the address space there + tighter. Selecting anything other than the default 3G/1G split + will also likely make your kernel incompatible with binary-only + kernel modules. + + If you are not absolutely sure what you are doing, leave this + option alone! + + config VMSPLIT_3_75G + bool "3.75G/0.25G user/kernel split (no kernel networking)" + config VMSPLIT_3_5G + bool "3.5G/0.5G user/kernel split" + config VMSPLIT_3G + bool "3G/1G user/kernel split" + config VMSPLIT_2_75G + bool "2.75G/1.25G user/kernel split (for full 1G low memory)" + config VMSPLIT_2_5G + bool "2.5G/1.5G user/kernel split" + config VMSPLIT_2_25G + bool "2.25G/1.75G user/kernel split" + config VMSPLIT_2G + bool "2G/2G user/kernel split" + config VMSPLIT_1G + bool "1G/3G user/kernel split" +endchoice + +config PAGE_OFFSET + hex + depends on !64BIT + default 0xF0000000 if VMSPLIT_3_75G + default 0xE0000000 if VMSPLIT_3_5G + default 0xB0000000 if VMSPLIT_2_75G + default 0xA0000000 if VMSPLIT_2_5G + default 0x90000000 if VMSPLIT_2_25G + default 0x80000000 if VMSPLIT_2G + default 0x40000000 if VMSPLIT_1G + default 0xC0000000 + +source "mm/Kconfig" + +config CMDLINE_BOOL + bool "Built-in kernel command line" + default n + ---help--- + Allow for specifying boot arguments to the kernel at + build time. On some systems (e.g. embedded ones), it is + necessary or convenient to provide some or all of the + kernel boot arguments with the kernel itself (that is, + to not rely on the boot loader to provide them.) + + To compile command line arguments into the kernel, + set this option to 'Y', then fill in the + the boot arguments in CONFIG_CMDLINE. + + Systems with fully functional boot loaders (e.g. mboot, or + if booting over PCI) should leave this option set to 'N'. + +config CMDLINE + string "Built-in kernel command string" + depends on CMDLINE_BOOL + default "" + ---help--- + Enter arguments here that should be compiled into the kernel + image and used at boot time. If the boot loader provides a + command line at boot time, it is appended to this string to + form the full kernel command line, when the system boots. + + However, you can use the CONFIG_CMDLINE_OVERRIDE option to + change this behavior. + + In most cases, the command line (whether built-in or provided + by the boot loader) should specify the device for the root + file system. + +config CMDLINE_OVERRIDE + bool "Built-in command line overrides boot loader arguments" + default n + depends on CMDLINE_BOOL + ---help--- + Set this option to 'Y' to have the kernel ignore the boot loader + command line, and use ONLY the built-in command line. + + This is used to work around broken boot loaders. This should + be set to 'N' under normal conditions. + +config VMALLOC_RESERVE + hex + default 0x1000000 + +config HARDWALL + bool "Hardwall support to allow access to user dynamic network" + default y + +config KERNEL_PL + int "Processor protection level for kernel" + range 1 2 + default "1" + ---help--- + This setting determines the processor protection level the + kernel will be built to run at. Generally you should use + the default value here. + +endmenu # Tilera-specific configuration + +menu "Bus options" + +config PCI + bool "PCI support" + default y + select PCI_DOMAINS + select GENERIC_PCI_IOMAP + ---help--- + Enable PCI root complex support, so PCIe endpoint devices can + be attached to the Tile chip. Many, but not all, PCI devices + are supported under Tilera's root complex driver. + +config PCI_DOMAINS + bool + +config NO_IOMEM + def_bool !PCI + +config NO_IOPORT + def_bool !PCI + +source "drivers/pci/Kconfig" + +config HOTPLUG + bool "Support for hot-pluggable devices" + ---help--- + Say Y here if you want to plug devices into your computer while + the system is running, and be able to use them quickly. In many + cases, the devices can likewise be unplugged at any time too. + One well-known example of this is USB. + +source "drivers/pci/hotplug/Kconfig" + +endmenu + +menu "Executable file formats" + +# only elf supported +config KCORE_ELF + def_bool y + depends on PROC_FS + +source "fs/Kconfig.binfmt" + +endmenu + +source "net/Kconfig" + +source "drivers/Kconfig" + +source "fs/Kconfig" + +source "arch/tile/Kconfig.debug" + +source "security/Kconfig" + +source "crypto/Kconfig" + +source "lib/Kconfig" + +source "arch/tile/kvm/Kconfig" diff --git a/arch/tile/Kconfig.debug b/arch/tile/Kconfig.debug new file mode 100644 index 00000000..ddbfc332 --- /dev/null +++ b/arch/tile/Kconfig.debug @@ -0,0 +1,34 @@ +menu "Kernel hacking" + +source "lib/Kconfig.debug" + +config EARLY_PRINTK + bool "Early printk" if EXPERT && DEBUG_KERNEL + default y + help + Write kernel log output directly via the hypervisor console. + + This is useful for kernel debugging when your machine crashes very + early before the console code is initialized. For normal operation + it is not recommended because it looks ugly and doesn't cooperate + with klogd/syslogd. You should normally N here, + unless you want to debug such a crash. + +config DEBUG_STACKOVERFLOW + bool "Check for stack overflows" + depends on DEBUG_KERNEL + help + This option will cause messages to be printed if free stack space + drops below a certain limit. + +config DEBUG_EXTRA_FLAGS + string "Additional compiler arguments when building with '-g'" + depends on DEBUG_INFO + default "" + help + Debug info can be large, and flags like + `-femit-struct-debug-baseonly' can reduce the kernel file + size and build time noticeably. Such flags are often + helpful if the main use of debug info is line number info. + +endmenu diff --git a/arch/tile/Makefile b/arch/tile/Makefile new file mode 100644 index 00000000..9520bc5a --- /dev/null +++ b/arch/tile/Makefile @@ -0,0 +1,68 @@ +# +# This file is subject to the terms and conditions of the GNU General Public +# License. See the file "COPYING" in the main directory of this archive +# for more details. +# +# This file is included by the global makefile so that you can add your own +# architecture-specific flags and dependencies. Remember to do have actions +# for "archclean" and "archdep" for cleaning up and making dependencies for +# this architecture + +# If building with TILERA_ROOT set (i.e. using the Tilera Multicore +# Development Environment) we can set CROSS_COMPILE based on that. +# If we're not cross-compiling, make sure we're on the right architecture. +# Only bother to test for a few common targets, to avoid useless errors. +ifeq ($(CROSS_COMPILE),) + ifdef TILERA_ROOT + CROSS_COMPILE := $(TILERA_ROOT)/bin/tile- + else + goals := $(if $(MAKECMDGOALS), $(MAKECMDGOALS), all) + ifneq ($(strip $(filter vmlinux modules all,$(goals))),) + HOST_ARCH := $(shell uname -m) + ifneq ($(HOST_ARCH),$(ARCH)) +$(error Set TILERA_ROOT or CROSS_COMPILE when building $(ARCH) on $(HOST_ARCH)) + endif + endif + endif +endif + +ifneq ($(CONFIG_DEBUG_EXTRA_FLAGS),"") +KBUILD_CFLAGS += $(CONFIG_DEBUG_EXTRA_FLAGS) +endif + +LIBGCC_PATH := \ + $(shell $(CC) $(KBUILD_CFLAGS) $(KCFLAGS) -print-libgcc-file-name) + +# Provide the path to use for "make defconfig". +KBUILD_DEFCONFIG := $(ARCH)_defconfig + +# Used as a file extension when useful, e.g. head_$(BITS).o +# Not needed for (e.g.) "$(CC) -m32" since the compiler automatically +# uses the right default anyway. +export BITS +ifeq ($(CONFIG_TILEGX),y) +BITS := 64 +else +BITS := 32 +endif + +head-y := arch/tile/kernel/head_$(BITS).o + +libs-y += arch/tile/lib/ +libs-y += $(LIBGCC_PATH) + +# See arch/tile/Kbuild for content of core part of the kernel +core-y += arch/tile/ + +ifdef TILERA_ROOT +INSTALL_PATH ?= $(TILERA_ROOT)/tile/boot +endif + +install: + install -D -m 755 vmlinux $(INSTALL_PATH)/vmlinux-$(KERNELRELEASE) + install -D -m 644 .config $(INSTALL_PATH)/config-$(KERNELRELEASE) + install -D -m 644 System.map $(INSTALL_PATH)/System.map-$(KERNELRELEASE) + +define archhelp + echo ' install - install kernel into $(INSTALL_PATH)' +endef diff --git a/arch/tile/configs/tilegx_defconfig b/arch/tile/configs/tilegx_defconfig new file mode 100644 index 00000000..b8d99aca --- /dev/null +++ b/arch/tile/configs/tilegx_defconfig @@ -0,0 +1,587 @@ +CONFIG_TILEGX=y +CONFIG_EXPERIMENTAL=y +# CONFIG_LOCALVERSION_AUTO is not set +CONFIG_SYSVIPC=y +CONFIG_POSIX_MQUEUE=y +CONFIG_BSD_PROCESS_ACCT=y +CONFIG_BSD_PROCESS_ACCT_V3=y +CONFIG_FHANDLE=y +CONFIG_TASKSTATS=y +CONFIG_TASK_DELAY_ACCT=y +CONFIG_TASK_XACCT=y +CONFIG_TASK_IO_ACCOUNTING=y +CONFIG_AUDIT=y +CONFIG_LOG_BUF_SHIFT=19 +CONFIG_CGROUPS=y +CONFIG_CGROUP_DEBUG=y +CONFIG_CGROUP_DEVICE=y +CONFIG_CPUSETS=y +CONFIG_CGROUP_CPUACCT=y +CONFIG_RESOURCE_COUNTERS=y +CONFIG_CGROUP_MEM_RES_CTLR=y +CONFIG_CGROUP_MEM_RES_CTLR_SWAP=y +CONFIG_CGROUP_SCHED=y +CONFIG_RT_GROUP_SCHED=y +CONFIG_BLK_CGROUP=y +CONFIG_NAMESPACES=y +CONFIG_RELAY=y +CONFIG_BLK_DEV_INITRD=y +CONFIG_SYSCTL_SYSCALL=y +CONFIG_EMBEDDED=y +# CONFIG_COMPAT_BRK is not set +CONFIG_PROFILING=y +CONFIG_MODULES=y +CONFIG_MODULE_FORCE_LOAD=y +CONFIG_MODULE_UNLOAD=y +CONFIG_BLK_DEV_INTEGRITY=y +CONFIG_PARTITION_ADVANCED=y +CONFIG_OSF_PARTITION=y +CONFIG_AMIGA_PARTITION=y +CONFIG_MAC_PARTITION=y +CONFIG_BSD_DISKLABEL=y +CONFIG_MINIX_SUBPARTITION=y +CONFIG_SOLARIS_X86_PARTITION=y +CONFIG_UNIXWARE_DISKLABEL=y +CONFIG_SGI_PARTITION=y +CONFIG_SUN_PARTITION=y +CONFIG_KARMA_PARTITION=y +CONFIG_EFI_PARTITION=y +CONFIG_CFQ_GROUP_IOSCHED=y +CONFIG_NR_CPUS=100 +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_HZ_100=y +CONFIG_PCI_DEBUG=y +# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set +CONFIG_BINFMT_MISC=y +CONFIG_NET=y +CONFIG_PACKET=y +CONFIG_UNIX=y +CONFIG_XFRM_USER=y +CONFIG_XFRM_SUB_POLICY=y +CONFIG_XFRM_STATISTICS=y +CONFIG_NET_KEY=m +CONFIG_NET_KEY_MIGRATE=y +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_ROUTE_MULTIPATH=y +CONFIG_IP_ROUTE_VERBOSE=y +CONFIG_NET_IPIP=m +CONFIG_IP_MROUTE=y +CONFIG_IP_PIMSM_V1=y +CONFIG_IP_PIMSM_V2=y +CONFIG_SYN_COOKIES=y +CONFIG_INET_AH=m +CONFIG_INET_ESP=m +CONFIG_INET_IPCOMP=m +CONFIG_INET_XFRM_MODE_TRANSPORT=m +CONFIG_INET_XFRM_MODE_TUNNEL=m +CONFIG_INET_XFRM_MODE_BEET=m +CONFIG_INET_DIAG=m +CONFIG_TCP_CONG_ADVANCED=y +CONFIG_TCP_CONG_HSTCP=m +CONFIG_TCP_CONG_HYBLA=m +CONFIG_TCP_CONG_SCALABLE=m +CONFIG_TCP_CONG_LP=m +CONFIG_TCP_CONG_VENO=m +CONFIG_TCP_CONG_YEAH=m +CONFIG_TCP_CONG_ILLINOIS=m +CONFIG_TCP_MD5SIG=y +CONFIG_IPV6=y +CONFIG_IPV6_PRIVACY=y +CONFIG_IPV6_ROUTER_PREF=y +CONFIG_IPV6_ROUTE_INFO=y +CONFIG_IPV6_OPTIMISTIC_DAD=y +CONFIG_INET6_AH=m +CONFIG_INET6_ESP=m +CONFIG_INET6_IPCOMP=m +CONFIG_IPV6_MIP6=m +CONFIG_INET6_XFRM_MODE_TRANSPORT=m +CONFIG_INET6_XFRM_MODE_TUNNEL=m +CONFIG_INET6_XFRM_MODE_BEET=m +CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION=m +CONFIG_IPV6_SIT=m +CONFIG_IPV6_TUNNEL=m +CONFIG_IPV6_MULTIPLE_TABLES=y +CONFIG_IPV6_MROUTE=y +CONFIG_IPV6_PIMSM_V2=y +CONFIG_NETLABEL=y +CONFIG_NETFILTER=y +CONFIG_NF_CONNTRACK=m +CONFIG_NF_CONNTRACK_SECMARK=y +CONFIG_NF_CONNTRACK_ZONES=y +CONFIG_NF_CONNTRACK_EVENTS=y +CONFIG_NF_CT_PROTO_DCCP=m +CONFIG_NF_CT_PROTO_UDPLITE=m +CONFIG_NF_CONNTRACK_AMANDA=m +CONFIG_NF_CONNTRACK_FTP=m +CONFIG_NF_CONNTRACK_H323=m +CONFIG_NF_CONNTRACK_IRC=m +CONFIG_NF_CONNTRACK_NETBIOS_NS=m +CONFIG_NF_CONNTRACK_PPTP=m +CONFIG_NF_CONNTRACK_SANE=m +CONFIG_NF_CONNTRACK_SIP=m +CONFIG_NF_CONNTRACK_TFTP=m +CONFIG_NETFILTER_TPROXY=m +CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m +CONFIG_NETFILTER_XT_TARGET_CONNMARK=m +CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=m +CONFIG_NETFILTER_XT_TARGET_CT=m +CONFIG_NETFILTER_XT_TARGET_DSCP=m +CONFIG_NETFILTER_XT_TARGET_IDLETIMER=m +CONFIG_NETFILTER_XT_TARGET_MARK=m +CONFIG_NETFILTER_XT_TARGET_NFLOG=m +CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m +CONFIG_NETFILTER_XT_TARGET_NOTRACK=m +CONFIG_NETFILTER_XT_TARGET_TEE=m +CONFIG_NETFILTER_XT_TARGET_TPROXY=m +CONFIG_NETFILTER_XT_TARGET_TRACE=m +CONFIG_NETFILTER_XT_TARGET_SECMARK=m +CONFIG_NETFILTER_XT_TARGET_TCPMSS=m +CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m +CONFIG_NETFILTER_XT_MATCH_CLUSTER=m +CONFIG_NETFILTER_XT_MATCH_COMMENT=m +CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m +CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m +CONFIG_NETFILTER_XT_MATCH_CONNMARK=m +CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m +CONFIG_NETFILTER_XT_MATCH_DCCP=m +CONFIG_NETFILTER_XT_MATCH_DSCP=m +CONFIG_NETFILTER_XT_MATCH_ESP=m +CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m +CONFIG_NETFILTER_XT_MATCH_HELPER=m +CONFIG_NETFILTER_XT_MATCH_IPRANGE=m +CONFIG_NETFILTER_XT_MATCH_IPVS=m +CONFIG_NETFILTER_XT_MATCH_LENGTH=m +CONFIG_NETFILTER_XT_MATCH_LIMIT=m +CONFIG_NETFILTER_XT_MATCH_MAC=m +CONFIG_NETFILTER_XT_MATCH_MARK=m +CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m +CONFIG_NETFILTER_XT_MATCH_OSF=m +CONFIG_NETFILTER_XT_MATCH_OWNER=m +CONFIG_NETFILTER_XT_MATCH_POLICY=m +CONFIG_NETFILTER_XT_MATCH_PHYSDEV=m +CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m +CONFIG_NETFILTER_XT_MATCH_QUOTA=m +CONFIG_NETFILTER_XT_MATCH_RATEEST=m +CONFIG_NETFILTER_XT_MATCH_REALM=m +CONFIG_NETFILTER_XT_MATCH_RECENT=m +CONFIG_NETFILTER_XT_MATCH_SOCKET=m +CONFIG_NETFILTER_XT_MATCH_STATE=m +CONFIG_NETFILTER_XT_MATCH_STATISTIC=m +CONFIG_NETFILTER_XT_MATCH_STRING=m +CONFIG_NETFILTER_XT_MATCH_TCPMSS=m +CONFIG_NETFILTER_XT_MATCH_TIME=m +CONFIG_NETFILTER_XT_MATCH_U32=m +CONFIG_IP_VS=m +CONFIG_IP_VS_IPV6=y +CONFIG_IP_VS_PROTO_TCP=y +CONFIG_IP_VS_PROTO_UDP=y +CONFIG_IP_VS_PROTO_ESP=y +CONFIG_IP_VS_PROTO_AH=y +CONFIG_IP_VS_PROTO_SCTP=y +CONFIG_IP_VS_RR=m +CONFIG_IP_VS_WRR=m +CONFIG_IP_VS_LC=m +CONFIG_IP_VS_WLC=m +CONFIG_IP_VS_LBLC=m +CONFIG_IP_VS_LBLCR=m +CONFIG_IP_VS_SED=m +CONFIG_IP_VS_NQ=m +CONFIG_NF_CONNTRACK_IPV4=m +# CONFIG_NF_CONNTRACK_PROC_COMPAT is not set +CONFIG_IP_NF_QUEUE=m +CONFIG_IP_NF_IPTABLES=y +CONFIG_IP_NF_MATCH_AH=m +CONFIG_IP_NF_MATCH_ECN=m +CONFIG_IP_NF_MATCH_TTL=m +CONFIG_IP_NF_FILTER=y +CONFIG_IP_NF_TARGET_REJECT=y +CONFIG_IP_NF_TARGET_LOG=m +CONFIG_IP_NF_TARGET_ULOG=m +CONFIG_IP_NF_MANGLE=m +CONFIG_IP_NF_TARGET_ECN=m +CONFIG_IP_NF_TARGET_TTL=m +CONFIG_IP_NF_RAW=m +CONFIG_IP_NF_SECURITY=m +CONFIG_IP_NF_ARPTABLES=m +CONFIG_IP_NF_ARPFILTER=m +CONFIG_IP_NF_ARP_MANGLE=m +CONFIG_NF_CONNTRACK_IPV6=m +CONFIG_IP6_NF_QUEUE=m +CONFIG_IP6_NF_IPTABLES=m +CONFIG_IP6_NF_MATCH_AH=m +CONFIG_IP6_NF_MATCH_EUI64=m +CONFIG_IP6_NF_MATCH_FRAG=m +CONFIG_IP6_NF_MATCH_OPTS=m +CONFIG_IP6_NF_MATCH_HL=m +CONFIG_IP6_NF_MATCH_IPV6HEADER=m +CONFIG_IP6_NF_MATCH_MH=m +CONFIG_IP6_NF_MATCH_RT=m +CONFIG_IP6_NF_TARGET_HL=m +CONFIG_IP6_NF_TARGET_LOG=m +CONFIG_IP6_NF_FILTER=m +CONFIG_IP6_NF_TARGET_REJECT=m +CONFIG_IP6_NF_MANGLE=m +CONFIG_IP6_NF_RAW=m +CONFIG_IP6_NF_SECURITY=m +CONFIG_BRIDGE_NF_EBTABLES=m +CONFIG_BRIDGE_EBT_BROUTE=m +CONFIG_BRIDGE_EBT_T_FILTER=m +CONFIG_BRIDGE_EBT_T_NAT=m +CONFIG_BRIDGE_EBT_802_3=m +CONFIG_BRIDGE_EBT_AMONG=m +CONFIG_BRIDGE_EBT_ARP=m +CONFIG_BRIDGE_EBT_IP=m +CONFIG_BRIDGE_EBT_IP6=m +CONFIG_BRIDGE_EBT_LIMIT=m +CONFIG_BRIDGE_EBT_MARK=m +CONFIG_BRIDGE_EBT_PKTTYPE=m +CONFIG_BRIDGE_EBT_STP=m +CONFIG_BRIDGE_EBT_VLAN=m +CONFIG_BRIDGE_EBT_ARPREPLY=m +CONFIG_BRIDGE_EBT_DNAT=m +CONFIG_BRIDGE_EBT_MARK_T=m +CONFIG_BRIDGE_EBT_REDIRECT=m +CONFIG_BRIDGE_EBT_SNAT=m +CONFIG_BRIDGE_EBT_LOG=m +CONFIG_BRIDGE_EBT_ULOG=m +CONFIG_BRIDGE_EBT_NFLOG=m +CONFIG_RDS=m +CONFIG_RDS_TCP=m +CONFIG_BRIDGE=m +CONFIG_NET_DSA=y +CONFIG_VLAN_8021Q=m +CONFIG_VLAN_8021Q_GVRP=y +CONFIG_PHONET=m +CONFIG_NET_SCHED=y +CONFIG_NET_SCH_CBQ=m +CONFIG_NET_SCH_HTB=m +CONFIG_NET_SCH_HFSC=m +CONFIG_NET_SCH_PRIO=m +CONFIG_NET_SCH_MULTIQ=m +CONFIG_NET_SCH_RED=m +CONFIG_NET_SCH_SFQ=m +CONFIG_NET_SCH_TEQL=m +CONFIG_NET_SCH_TBF=m +CONFIG_NET_SCH_GRED=m +CONFIG_NET_SCH_DSMARK=m +CONFIG_NET_SCH_NETEM=m +CONFIG_NET_SCH_DRR=m +CONFIG_NET_SCH_INGRESS=m +CONFIG_NET_CLS_BASIC=m +CONFIG_NET_CLS_TCINDEX=m +CONFIG_NET_CLS_ROUTE4=m +CONFIG_NET_CLS_FW=m +CONFIG_NET_CLS_U32=m +CONFIG_CLS_U32_PERF=y +CONFIG_CLS_U32_MARK=y +CONFIG_NET_CLS_RSVP=m +CONFIG_NET_CLS_RSVP6=m +CONFIG_NET_CLS_FLOW=m +CONFIG_NET_CLS_CGROUP=y +CONFIG_NET_EMATCH=y +CONFIG_NET_EMATCH_CMP=m +CONFIG_NET_EMATCH_NBYTE=m +CONFIG_NET_EMATCH_U32=m +CONFIG_NET_EMATCH_META=m +CONFIG_NET_EMATCH_TEXT=m +CONFIG_NET_CLS_ACT=y +CONFIG_NET_ACT_POLICE=m +CONFIG_NET_ACT_GACT=m +CONFIG_GACT_PROB=y +CONFIG_NET_ACT_MIRRED=m +CONFIG_NET_ACT_IPT=m +CONFIG_NET_ACT_NAT=m +CONFIG_NET_ACT_PEDIT=m +CONFIG_NET_ACT_SIMP=m +CONFIG_NET_ACT_SKBEDIT=m +CONFIG_NET_CLS_IND=y +CONFIG_DCB=y +# CONFIG_WIRELESS is not set +CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" +CONFIG_DEVTMPFS=y +CONFIG_DEVTMPFS_MOUNT=y +# CONFIG_FIRMWARE_IN_KERNEL is not set +CONFIG_CONNECTOR=y +CONFIG_BLK_DEV_LOOP=y +CONFIG_BLK_DEV_CRYPTOLOOP=m +CONFIG_BLK_DEV_SX8=m +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_SIZE=16384 +CONFIG_ATA_OVER_ETH=m +CONFIG_RAID_ATTRS=m +CONFIG_SCSI_TGT=m +CONFIG_BLK_DEV_SD=y +CONFIG_SCSI_CONSTANTS=y +CONFIG_SCSI_LOGGING=y +CONFIG_SCSI_SAS_ATA=y +CONFIG_SCSI_MVSAS=y +# CONFIG_SCSI_MVSAS_DEBUG is not set +CONFIG_SCSI_MVSAS_TASKLET=y +CONFIG_ATA=y +CONFIG_SATA_SIL24=y +# CONFIG_ATA_SFF is not set +CONFIG_MD=y +CONFIG_BLK_DEV_MD=y +CONFIG_MD_LINEAR=m +CONFIG_MD_RAID0=m +CONFIG_MD_RAID1=m +CONFIG_MD_RAID10=m +CONFIG_MD_RAID456=m +CONFIG_MULTICORE_RAID456=y +CONFIG_MD_FAULTY=m +CONFIG_BLK_DEV_DM=m +CONFIG_DM_DEBUG=y +CONFIG_DM_CRYPT=m +CONFIG_DM_SNAPSHOT=m +CONFIG_DM_MIRROR=m +CONFIG_DM_LOG_USERSPACE=m +CONFIG_DM_ZERO=m +CONFIG_DM_MULTIPATH=m +CONFIG_DM_MULTIPATH_QL=m +CONFIG_DM_MULTIPATH_ST=m +CONFIG_DM_DELAY=m +CONFIG_DM_UEVENT=y +CONFIG_FUSION=y +CONFIG_FUSION_SAS=y +CONFIG_NETDEVICES=y +CONFIG_BONDING=m +CONFIG_DUMMY=m +CONFIG_IFB=m +CONFIG_MACVLAN=m +CONFIG_MACVTAP=m +CONFIG_NETCONSOLE=m +CONFIG_NETCONSOLE_DYNAMIC=y +CONFIG_NETPOLL_TRAP=y +CONFIG_TUN=y +CONFIG_VETH=m +CONFIG_NET_DSA_MV88E6060=y +CONFIG_NET_DSA_MV88E6131=y +CONFIG_NET_DSA_MV88E6123_61_65=y +# CONFIG_NET_VENDOR_3COM is not set +# CONFIG_NET_VENDOR_ADAPTEC is not set +# CONFIG_NET_VENDOR_ALTEON is not set +# CONFIG_NET_VENDOR_AMD is not set +# CONFIG_NET_VENDOR_ATHEROS is not set +# CONFIG_NET_VENDOR_BROADCOM is not set +# CONFIG_NET_VENDOR_BROCADE is not set +# CONFIG_NET_VENDOR_CHELSIO is not set +# CONFIG_NET_VENDOR_CISCO is not set +# CONFIG_NET_VENDOR_DEC is not set +# CONFIG_NET_VENDOR_DLINK is not set +# CONFIG_NET_VENDOR_EMULEX is not set +# CONFIG_NET_VENDOR_EXAR is not set +# CONFIG_NET_VENDOR_HP is not set +# CONFIG_NET_VENDOR_INTEL is not set +# CONFIG_NET_VENDOR_MARVELL is not set +# CONFIG_NET_VENDOR_MELLANOX is not set +# CONFIG_NET_VENDOR_MICREL is not set +# CONFIG_NET_VENDOR_MYRI is not set +# CONFIG_NET_VENDOR_NATSEMI is not set +# CONFIG_NET_VENDOR_NVIDIA is not set +# CONFIG_NET_VENDOR_OKI is not set +# CONFIG_NET_PACKET_ENGINE is not set +# CONFIG_NET_VENDOR_QLOGIC is not set +# CONFIG_NET_VENDOR_REALTEK is not set +# CONFIG_NET_VENDOR_RDC is not set +# CONFIG_NET_VENDOR_SEEQ is not set +# CONFIG_NET_VENDOR_SILAN is not set +# CONFIG_NET_VENDOR_SIS is not set +# CONFIG_NET_VENDOR_SMSC is not set +# CONFIG_NET_VENDOR_STMICRO is not set +# CONFIG_NET_VENDOR_SUN is not set +# CONFIG_NET_VENDOR_TEHUTI is not set +# CONFIG_NET_VENDOR_TI is not set +# CONFIG_TILE_NET is not set +# CONFIG_NET_VENDOR_VIA is not set +# CONFIG_WLAN is not set +# CONFIG_INPUT_MOUSEDEV is not set +# CONFIG_INPUT_KEYBOARD is not set +# CONFIG_INPUT_MOUSE is not set +# CONFIG_SERIO is not set +# CONFIG_VT is not set +# CONFIG_LEGACY_PTYS is not set +CONFIG_HW_RANDOM=y +CONFIG_HW_RANDOM_TIMERIOMEM=m +CONFIG_I2C=y +CONFIG_I2C_CHARDEV=y +# CONFIG_HWMON is not set +CONFIG_WATCHDOG=y +CONFIG_WATCHDOG_NOWAYOUT=y +# CONFIG_VGA_ARB is not set +# CONFIG_HID_SUPPORT is not set +CONFIG_USB=y +# CONFIG_USB_DEVICE_CLASS is not set +CONFIG_USB_EHCI_HCD=y +CONFIG_USB_OHCI_HCD=y +CONFIG_USB_STORAGE=y +CONFIG_USB_LIBUSUAL=y +CONFIG_EDAC=y +CONFIG_EDAC_MM_EDAC=y +CONFIG_RTC_CLASS=y +CONFIG_RTC_DRV_TILE=y +CONFIG_EXT2_FS=y +CONFIG_EXT2_FS_XATTR=y +CONFIG_EXT2_FS_POSIX_ACL=y +CONFIG_EXT2_FS_SECURITY=y +CONFIG_EXT2_FS_XIP=y +CONFIG_EXT3_FS=y +CONFIG_EXT3_FS_POSIX_ACL=y +CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y +CONFIG_XFS_FS=y +CONFIG_XFS_QUOTA=y +CONFIG_XFS_POSIX_ACL=y +CONFIG_GFS2_FS=m +CONFIG_GFS2_FS_LOCKING_DLM=y +CONFIG_BTRFS_FS=m +CONFIG_BTRFS_FS_POSIX_ACL=y +CONFIG_QUOTA=y +CONFIG_QUOTA_NETLINK_INTERFACE=y +# CONFIG_PRINT_QUOTA_WARNING is not set +CONFIG_QFMT_V2=y +CONFIG_AUTOFS4_FS=m +CONFIG_FUSE_FS=y +CONFIG_CUSE=m +CONFIG_FSCACHE=m +CONFIG_FSCACHE_STATS=y +CONFIG_CACHEFILES=m +CONFIG_ISO9660_FS=m +CONFIG_JOLIET=y +CONFIG_ZISOFS=y +CONFIG_UDF_FS=m +CONFIG_MSDOS_FS=m +CONFIG_VFAT_FS=m +CONFIG_FAT_DEFAULT_IOCHARSET="ascii" +CONFIG_PROC_KCORE=y +CONFIG_TMPFS=y +CONFIG_TMPFS_POSIX_ACL=y +CONFIG_HUGETLBFS=y +CONFIG_ECRYPT_FS=m +CONFIG_CRAMFS=m +CONFIG_SQUASHFS=m +CONFIG_NFS_FS=m +CONFIG_NFS_V3=y +CONFIG_NFS_V3_ACL=y +CONFIG_NFS_V4=y +CONFIG_NFS_V4_1=y +CONFIG_NFS_FSCACHE=y +CONFIG_NFSD=m +CONFIG_NFSD_V3_ACL=y +CONFIG_NFSD_V4=y +CONFIG_CIFS=m +CONFIG_CIFS_STATS=y +CONFIG_CIFS_WEAK_PW_HASH=y +CONFIG_CIFS_UPCALL=y +CONFIG_CIFS_XATTR=y +CONFIG_CIFS_POSIX=y +CONFIG_CIFS_DFS_UPCALL=y +CONFIG_CIFS_FSCACHE=y +CONFIG_NLS_DEFAULT="utf8" +CONFIG_NLS_CODEPAGE_437=y +CONFIG_NLS_CODEPAGE_737=m +CONFIG_NLS_CODEPAGE_775=m +CONFIG_NLS_CODEPAGE_850=m +CONFIG_NLS_CODEPAGE_852=m +CONFIG_NLS_CODEPAGE_855=m +CONFIG_NLS_CODEPAGE_857=m +CONFIG_NLS_CODEPAGE_860=m +CONFIG_NLS_CODEPAGE_861=m +CONFIG_NLS_CODEPAGE_862=m +CONFIG_NLS_CODEPAGE_863=m +CONFIG_NLS_CODEPAGE_864=m +CONFIG_NLS_CODEPAGE_865=m +CONFIG_NLS_CODEPAGE_866=m +CONFIG_NLS_CODEPAGE_869=m +CONFIG_NLS_CODEPAGE_936=m +CONFIG_NLS_CODEPAGE_950=m +CONFIG_NLS_CODEPAGE_932=m +CONFIG_NLS_CODEPAGE_949=m +CONFIG_NLS_CODEPAGE_874=m +CONFIG_NLS_ISO8859_8=m +CONFIG_NLS_CODEPAGE_1250=m +CONFIG_NLS_CODEPAGE_1251=m +CONFIG_NLS_ASCII=y +CONFIG_NLS_ISO8859_1=m +CONFIG_NLS_ISO8859_2=m +CONFIG_NLS_ISO8859_3=m +CONFIG_NLS_ISO8859_4=m +CONFIG_NLS_ISO8859_5=m +CONFIG_NLS_ISO8859_6=m +CONFIG_NLS_ISO8859_7=m +CONFIG_NLS_ISO8859_9=m +CONFIG_NLS_ISO8859_13=m +CONFIG_NLS_ISO8859_14=m +CONFIG_NLS_ISO8859_15=m +CONFIG_NLS_KOI8_R=m +CONFIG_NLS_KOI8_U=m +CONFIG_NLS_UTF8=m +CONFIG_DLM_DEBUG=y +# CONFIG_ENABLE_WARN_DEPRECATED is not set +CONFIG_MAGIC_SYSRQ=y +CONFIG_STRIP_ASM_SYMS=y +CONFIG_DEBUG_FS=y +CONFIG_HEADERS_CHECK=y +CONFIG_LOCKUP_DETECTOR=y +CONFIG_SCHEDSTATS=y +CONFIG_TIMER_STATS=y +CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_REDUCED=y +CONFIG_DEBUG_VM=y +CONFIG_DEBUG_MEMORY_INIT=y +CONFIG_DEBUG_LIST=y +CONFIG_DEBUG_CREDENTIALS=y +CONFIG_DEBUG_FORCE_WEAK_PER_CPU=y +CONFIG_DYNAMIC_DEBUG=y +CONFIG_ASYNC_RAID6_TEST=m +CONFIG_DEBUG_STACKOVERFLOW=y +CONFIG_KEYS_DEBUG_PROC_KEYS=y +CONFIG_SECURITY=y +CONFIG_SECURITYFS=y +CONFIG_SECURITY_NETWORK=y +CONFIG_SECURITY_NETWORK_XFRM=y +CONFIG_SECURITY_SELINUX=y +CONFIG_SECURITY_SELINUX_BOOTPARAM=y +CONFIG_SECURITY_SELINUX_DISABLE=y +CONFIG_CRYPTO_NULL=m +CONFIG_CRYPTO_PCRYPT=m +CONFIG_CRYPTO_CRYPTD=m +CONFIG_CRYPTO_TEST=m +CONFIG_CRYPTO_CCM=m +CONFIG_CRYPTO_GCM=m +CONFIG_CRYPTO_CTS=m +CONFIG_CRYPTO_LRW=m +CONFIG_CRYPTO_PCBC=m +CONFIG_CRYPTO_XTS=m +CONFIG_CRYPTO_HMAC=y +CONFIG_CRYPTO_XCBC=m +CONFIG_CRYPTO_VMAC=m +CONFIG_CRYPTO_CRC32C=y +CONFIG_CRYPTO_MICHAEL_MIC=m +CONFIG_CRYPTO_RMD128=m +CONFIG_CRYPTO_RMD160=m +CONFIG_CRYPTO_RMD256=m +CONFIG_CRYPTO_RMD320=m +CONFIG_CRYPTO_SHA1=y +CONFIG_CRYPTO_SHA256=m +CONFIG_CRYPTO_SHA512=m +CONFIG_CRYPTO_TGR192=m +CONFIG_CRYPTO_WP512=m +CONFIG_CRYPTO_ANUBIS=m +CONFIG_CRYPTO_BLOWFISH=m +CONFIG_CRYPTO_CAMELLIA=m +CONFIG_CRYPTO_CAST5=m +CONFIG_CRYPTO_CAST6=m +CONFIG_CRYPTO_FCRYPT=m +CONFIG_CRYPTO_KHAZAD=m +CONFIG_CRYPTO_SEED=m +CONFIG_CRYPTO_SERPENT=m +CONFIG_CRYPTO_TEA=m +CONFIG_CRYPTO_TWOFISH=m +CONFIG_CRYPTO_ZLIB=m +CONFIG_CRYPTO_LZO=m diff --git a/arch/tile/configs/tilepro_defconfig b/arch/tile/configs/tilepro_defconfig new file mode 100644 index 00000000..2b1fd318 --- /dev/null +++ b/arch/tile/configs/tilepro_defconfig @@ -0,0 +1,579 @@ +CONFIG_EXPERIMENTAL=y +# CONFIG_LOCALVERSION_AUTO is not set +CONFIG_SYSVIPC=y +CONFIG_POSIX_MQUEUE=y +CONFIG_BSD_PROCESS_ACCT=y +CONFIG_BSD_PROCESS_ACCT_V3=y +CONFIG_FHANDLE=y +CONFIG_TASKSTATS=y +CONFIG_TASK_DELAY_ACCT=y +CONFIG_TASK_XACCT=y +CONFIG_TASK_IO_ACCOUNTING=y +CONFIG_AUDIT=y +CONFIG_LOG_BUF_SHIFT=19 +CONFIG_CGROUPS=y +CONFIG_CGROUP_DEBUG=y +CONFIG_CGROUP_DEVICE=y +CONFIG_CPUSETS=y +CONFIG_CGROUP_CPUACCT=y +CONFIG_RESOURCE_COUNTERS=y +CONFIG_CGROUP_MEM_RES_CTLR=y +CONFIG_CGROUP_MEM_RES_CTLR_SWAP=y +CONFIG_CGROUP_SCHED=y +CONFIG_RT_GROUP_SCHED=y +CONFIG_BLK_CGROUP=y +CONFIG_NAMESPACES=y +CONFIG_RELAY=y +CONFIG_BLK_DEV_INITRD=y +CONFIG_SYSCTL_SYSCALL=y +CONFIG_EMBEDDED=y +# CONFIG_COMPAT_BRK is not set +CONFIG_PROFILING=y +CONFIG_MODULES=y +CONFIG_MODULE_FORCE_LOAD=y +CONFIG_MODULE_UNLOAD=y +CONFIG_BLK_DEV_INTEGRITY=y +CONFIG_PARTITION_ADVANCED=y +CONFIG_OSF_PARTITION=y +CONFIG_AMIGA_PARTITION=y +CONFIG_MAC_PARTITION=y +CONFIG_BSD_DISKLABEL=y +CONFIG_MINIX_SUBPARTITION=y +CONFIG_SOLARIS_X86_PARTITION=y +CONFIG_UNIXWARE_DISKLABEL=y +CONFIG_SGI_PARTITION=y +CONFIG_SUN_PARTITION=y +CONFIG_KARMA_PARTITION=y +CONFIG_EFI_PARTITION=y +CONFIG_CFQ_GROUP_IOSCHED=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_HZ_100=y +CONFIG_PCI_DEBUG=y +# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set +CONFIG_BINFMT_MISC=y +CONFIG_NET=y +CONFIG_PACKET=y +CONFIG_UNIX=y +CONFIG_XFRM_USER=y +CONFIG_XFRM_SUB_POLICY=y +CONFIG_XFRM_STATISTICS=y +CONFIG_NET_KEY=m +CONFIG_NET_KEY_MIGRATE=y +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_ROUTE_MULTIPATH=y +CONFIG_IP_ROUTE_VERBOSE=y +CONFIG_NET_IPIP=m +CONFIG_IP_MROUTE=y +CONFIG_IP_PIMSM_V1=y +CONFIG_IP_PIMSM_V2=y +CONFIG_SYN_COOKIES=y +CONFIG_INET_AH=m +CONFIG_INET_ESP=m +CONFIG_INET_IPCOMP=m +CONFIG_INET_XFRM_MODE_TRANSPORT=m +CONFIG_INET_XFRM_MODE_TUNNEL=m +CONFIG_INET_XFRM_MODE_BEET=m +CONFIG_INET_DIAG=m +CONFIG_TCP_CONG_ADVANCED=y +CONFIG_TCP_CONG_HSTCP=m +CONFIG_TCP_CONG_HYBLA=m +CONFIG_TCP_CONG_SCALABLE=m +CONFIG_TCP_CONG_LP=m +CONFIG_TCP_CONG_VENO=m +CONFIG_TCP_CONG_YEAH=m +CONFIG_TCP_CONG_ILLINOIS=m +CONFIG_TCP_MD5SIG=y +CONFIG_IPV6=y +CONFIG_IPV6_PRIVACY=y +CONFIG_IPV6_ROUTER_PREF=y +CONFIG_IPV6_ROUTE_INFO=y +CONFIG_IPV6_OPTIMISTIC_DAD=y +CONFIG_INET6_AH=m +CONFIG_INET6_ESP=m +CONFIG_INET6_IPCOMP=m +CONFIG_IPV6_MIP6=m +CONFIG_INET6_XFRM_MODE_TRANSPORT=m +CONFIG_INET6_XFRM_MODE_TUNNEL=m +CONFIG_INET6_XFRM_MODE_BEET=m +CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION=m +CONFIG_IPV6_SIT=m +CONFIG_IPV6_TUNNEL=m +CONFIG_IPV6_MULTIPLE_TABLES=y +CONFIG_IPV6_MROUTE=y +CONFIG_IPV6_PIMSM_V2=y +CONFIG_NETLABEL=y +CONFIG_NETFILTER=y +CONFIG_NF_CONNTRACK=m +CONFIG_NF_CONNTRACK_SECMARK=y +CONFIG_NF_CONNTRACK_ZONES=y +CONFIG_NF_CONNTRACK_EVENTS=y +CONFIG_NF_CT_PROTO_DCCP=m +CONFIG_NF_CT_PROTO_UDPLITE=m +CONFIG_NF_CONNTRACK_AMANDA=m +CONFIG_NF_CONNTRACK_FTP=m +CONFIG_NF_CONNTRACK_H323=m +CONFIG_NF_CONNTRACK_IRC=m +CONFIG_NF_CONNTRACK_NETBIOS_NS=m +CONFIG_NF_CONNTRACK_PPTP=m +CONFIG_NF_CONNTRACK_SANE=m +CONFIG_NF_CONNTRACK_SIP=m +CONFIG_NF_CONNTRACK_TFTP=m +CONFIG_NETFILTER_TPROXY=m +CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m +CONFIG_NETFILTER_XT_TARGET_CONNMARK=m +CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=m +CONFIG_NETFILTER_XT_TARGET_CT=m +CONFIG_NETFILTER_XT_TARGET_DSCP=m +CONFIG_NETFILTER_XT_TARGET_IDLETIMER=m +CONFIG_NETFILTER_XT_TARGET_MARK=m +CONFIG_NETFILTER_XT_TARGET_NFLOG=m +CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m +CONFIG_NETFILTER_XT_TARGET_NOTRACK=m +CONFIG_NETFILTER_XT_TARGET_TEE=m +CONFIG_NETFILTER_XT_TARGET_TPROXY=m +CONFIG_NETFILTER_XT_TARGET_TRACE=m +CONFIG_NETFILTER_XT_TARGET_SECMARK=m +CONFIG_NETFILTER_XT_TARGET_TCPMSS=m +CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m +CONFIG_NETFILTER_XT_MATCH_CLUSTER=m +CONFIG_NETFILTER_XT_MATCH_COMMENT=m +CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m +CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m +CONFIG_NETFILTER_XT_MATCH_CONNMARK=m +CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m +CONFIG_NETFILTER_XT_MATCH_DCCP=m +CONFIG_NETFILTER_XT_MATCH_DSCP=m +CONFIG_NETFILTER_XT_MATCH_ESP=m +CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m +CONFIG_NETFILTER_XT_MATCH_HELPER=m +CONFIG_NETFILTER_XT_MATCH_IPRANGE=m +CONFIG_NETFILTER_XT_MATCH_IPVS=m +CONFIG_NETFILTER_XT_MATCH_LENGTH=m +CONFIG_NETFILTER_XT_MATCH_LIMIT=m +CONFIG_NETFILTER_XT_MATCH_MAC=m +CONFIG_NETFILTER_XT_MATCH_MARK=m +CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m +CONFIG_NETFILTER_XT_MATCH_OSF=m +CONFIG_NETFILTER_XT_MATCH_OWNER=m +CONFIG_NETFILTER_XT_MATCH_POLICY=m +CONFIG_NETFILTER_XT_MATCH_PHYSDEV=m +CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m +CONFIG_NETFILTER_XT_MATCH_QUOTA=m +CONFIG_NETFILTER_XT_MATCH_RATEEST=m +CONFIG_NETFILTER_XT_MATCH_REALM=m +CONFIG_NETFILTER_XT_MATCH_RECENT=m +CONFIG_NETFILTER_XT_MATCH_SOCKET=m +CONFIG_NETFILTER_XT_MATCH_STATE=m +CONFIG_NETFILTER_XT_MATCH_STATISTIC=m +CONFIG_NETFILTER_XT_MATCH_STRING=m +CONFIG_NETFILTER_XT_MATCH_TCPMSS=m +CONFIG_NETFILTER_XT_MATCH_TIME=m +CONFIG_NETFILTER_XT_MATCH_U32=m +CONFIG_IP_VS=m +CONFIG_IP_VS_IPV6=y +CONFIG_IP_VS_PROTO_TCP=y +CONFIG_IP_VS_PROTO_UDP=y +CONFIG_IP_VS_PROTO_ESP=y +CONFIG_IP_VS_PROTO_AH=y +CONFIG_IP_VS_PROTO_SCTP=y +CONFIG_IP_VS_RR=m +CONFIG_IP_VS_WRR=m +CONFIG_IP_VS_LC=m +CONFIG_IP_VS_WLC=m +CONFIG_IP_VS_LBLC=m +CONFIG_IP_VS_LBLCR=m +CONFIG_IP_VS_SED=m +CONFIG_IP_VS_NQ=m +CONFIG_NF_CONNTRACK_IPV4=m +# CONFIG_NF_CONNTRACK_PROC_COMPAT is not set +CONFIG_IP_NF_QUEUE=m +CONFIG_IP_NF_IPTABLES=y +CONFIG_IP_NF_MATCH_AH=m +CONFIG_IP_NF_MATCH_ECN=m +CONFIG_IP_NF_MATCH_TTL=m +CONFIG_IP_NF_FILTER=y +CONFIG_IP_NF_TARGET_REJECT=y +CONFIG_IP_NF_TARGET_LOG=m +CONFIG_IP_NF_TARGET_ULOG=m +CONFIG_IP_NF_MANGLE=m +CONFIG_IP_NF_TARGET_ECN=m +CONFIG_IP_NF_TARGET_TTL=m +CONFIG_IP_NF_RAW=m +CONFIG_IP_NF_SECURITY=m +CONFIG_IP_NF_ARPTABLES=m +CONFIG_IP_NF_ARPFILTER=m +CONFIG_IP_NF_ARP_MANGLE=m +CONFIG_NF_CONNTRACK_IPV6=m +CONFIG_IP6_NF_QUEUE=m +CONFIG_IP6_NF_IPTABLES=m +CONFIG_IP6_NF_MATCH_AH=m +CONFIG_IP6_NF_MATCH_EUI64=m +CONFIG_IP6_NF_MATCH_FRAG=m +CONFIG_IP6_NF_MATCH_OPTS=m +CONFIG_IP6_NF_MATCH_HL=m +CONFIG_IP6_NF_MATCH_IPV6HEADER=m +CONFIG_IP6_NF_MATCH_MH=m +CONFIG_IP6_NF_MATCH_RT=m +CONFIG_IP6_NF_TARGET_HL=m +CONFIG_IP6_NF_TARGET_LOG=m +CONFIG_IP6_NF_FILTER=m +CONFIG_IP6_NF_TARGET_REJECT=m +CONFIG_IP6_NF_MANGLE=m +CONFIG_IP6_NF_RAW=m +CONFIG_IP6_NF_SECURITY=m +CONFIG_BRIDGE_NF_EBTABLES=m +CONFIG_BRIDGE_EBT_BROUTE=m +CONFIG_BRIDGE_EBT_T_FILTER=m +CONFIG_BRIDGE_EBT_T_NAT=m +CONFIG_BRIDGE_EBT_802_3=m +CONFIG_BRIDGE_EBT_AMONG=m +CONFIG_BRIDGE_EBT_ARP=m +CONFIG_BRIDGE_EBT_IP=m +CONFIG_BRIDGE_EBT_IP6=m +CONFIG_BRIDGE_EBT_LIMIT=m +CONFIG_BRIDGE_EBT_MARK=m +CONFIG_BRIDGE_EBT_PKTTYPE=m +CONFIG_BRIDGE_EBT_STP=m +CONFIG_BRIDGE_EBT_VLAN=m +CONFIG_BRIDGE_EBT_ARPREPLY=m +CONFIG_BRIDGE_EBT_DNAT=m +CONFIG_BRIDGE_EBT_MARK_T=m +CONFIG_BRIDGE_EBT_REDIRECT=m +CONFIG_BRIDGE_EBT_SNAT=m +CONFIG_BRIDGE_EBT_LOG=m +CONFIG_BRIDGE_EBT_ULOG=m +CONFIG_BRIDGE_EBT_NFLOG=m +CONFIG_RDS=m +CONFIG_RDS_TCP=m +CONFIG_BRIDGE=m +CONFIG_NET_DSA=y +CONFIG_VLAN_8021Q=m +CONFIG_VLAN_8021Q_GVRP=y +CONFIG_PHONET=m +CONFIG_NET_SCHED=y +CONFIG_NET_SCH_CBQ=m +CONFIG_NET_SCH_HTB=m +CONFIG_NET_SCH_HFSC=m +CONFIG_NET_SCH_PRIO=m +CONFIG_NET_SCH_MULTIQ=m +CONFIG_NET_SCH_RED=m +CONFIG_NET_SCH_SFQ=m +CONFIG_NET_SCH_TEQL=m +CONFIG_NET_SCH_TBF=m +CONFIG_NET_SCH_GRED=m +CONFIG_NET_SCH_DSMARK=m +CONFIG_NET_SCH_NETEM=m +CONFIG_NET_SCH_DRR=m +CONFIG_NET_SCH_INGRESS=m +CONFIG_NET_CLS_BASIC=m +CONFIG_NET_CLS_TCINDEX=m +CONFIG_NET_CLS_ROUTE4=m +CONFIG_NET_CLS_FW=m +CONFIG_NET_CLS_U32=m +CONFIG_CLS_U32_PERF=y +CONFIG_CLS_U32_MARK=y +CONFIG_NET_CLS_RSVP=m +CONFIG_NET_CLS_RSVP6=m +CONFIG_NET_CLS_FLOW=m +CONFIG_NET_CLS_CGROUP=y +CONFIG_NET_EMATCH=y +CONFIG_NET_EMATCH_CMP=m +CONFIG_NET_EMATCH_NBYTE=m +CONFIG_NET_EMATCH_U32=m +CONFIG_NET_EMATCH_META=m +CONFIG_NET_EMATCH_TEXT=m +CONFIG_NET_CLS_ACT=y +CONFIG_NET_ACT_POLICE=m +CONFIG_NET_ACT_GACT=m +CONFIG_GACT_PROB=y +CONFIG_NET_ACT_MIRRED=m +CONFIG_NET_ACT_IPT=m +CONFIG_NET_ACT_NAT=m +CONFIG_NET_ACT_PEDIT=m +CONFIG_NET_ACT_SIMP=m +CONFIG_NET_ACT_SKBEDIT=m +CONFIG_NET_CLS_IND=y +CONFIG_DCB=y +# CONFIG_WIRELESS is not set +CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" +CONFIG_DEVTMPFS=y +CONFIG_DEVTMPFS_MOUNT=y +# CONFIG_FIRMWARE_IN_KERNEL is not set +CONFIG_CONNECTOR=y +CONFIG_BLK_DEV_LOOP=y +CONFIG_BLK_DEV_CRYPTOLOOP=m +CONFIG_BLK_DEV_SX8=m +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_SIZE=16384 +CONFIG_ATA_OVER_ETH=m +CONFIG_RAID_ATTRS=m +CONFIG_SCSI_TGT=m +CONFIG_BLK_DEV_SD=y +CONFIG_SCSI_CONSTANTS=y +CONFIG_SCSI_LOGGING=y +CONFIG_ATA=y +CONFIG_SATA_SIL24=y +# CONFIG_ATA_SFF is not set +CONFIG_MD=y +CONFIG_BLK_DEV_MD=y +CONFIG_MD_LINEAR=m +CONFIG_MD_RAID0=m +CONFIG_MD_RAID1=m +CONFIG_MD_RAID10=m +CONFIG_MD_RAID456=m +CONFIG_MULTICORE_RAID456=y +CONFIG_MD_FAULTY=m +CONFIG_BLK_DEV_DM=m +CONFIG_DM_DEBUG=y +CONFIG_DM_CRYPT=m +CONFIG_DM_SNAPSHOT=m +CONFIG_DM_MIRROR=m +CONFIG_DM_LOG_USERSPACE=m +CONFIG_DM_ZERO=m +CONFIG_DM_MULTIPATH=m +CONFIG_DM_MULTIPATH_QL=m +CONFIG_DM_MULTIPATH_ST=m +CONFIG_DM_DELAY=m +CONFIG_DM_UEVENT=y +CONFIG_FUSION=y +CONFIG_FUSION_SAS=y +CONFIG_NETDEVICES=y +CONFIG_BONDING=m +CONFIG_DUMMY=m +CONFIG_IFB=m +CONFIG_MACVLAN=m +CONFIG_MACVTAP=m +CONFIG_NETCONSOLE=m +CONFIG_NETCONSOLE_DYNAMIC=y +CONFIG_NETPOLL_TRAP=y +CONFIG_TUN=y +CONFIG_VETH=m +CONFIG_NET_DSA_MV88E6060=y +CONFIG_NET_DSA_MV88E6131=y +CONFIG_NET_DSA_MV88E6123_61_65=y +# CONFIG_NET_VENDOR_3COM is not set +# CONFIG_NET_VENDOR_ADAPTEC is not set +# CONFIG_NET_VENDOR_ALTEON is not set +# CONFIG_NET_VENDOR_AMD is not set +# CONFIG_NET_VENDOR_ATHEROS is not set +# CONFIG_NET_VENDOR_BROADCOM is not set +# CONFIG_NET_VENDOR_BROCADE is not set +# CONFIG_NET_VENDOR_CHELSIO is not set +# CONFIG_NET_VENDOR_CISCO is not set +# CONFIG_NET_VENDOR_DEC is not set +# CONFIG_NET_VENDOR_DLINK is not set +# CONFIG_NET_VENDOR_EMULEX is not set +# CONFIG_NET_VENDOR_EXAR is not set +# CONFIG_NET_VENDOR_HP is not set +# CONFIG_NET_VENDOR_INTEL is not set +# CONFIG_NET_VENDOR_MARVELL is not set +# CONFIG_NET_VENDOR_MELLANOX is not set +# CONFIG_NET_VENDOR_MICREL is not set +# CONFIG_NET_VENDOR_MYRI is not set +# CONFIG_NET_VENDOR_NATSEMI is not set +# CONFIG_NET_VENDOR_NVIDIA is not set +# CONFIG_NET_VENDOR_OKI is not set +# CONFIG_NET_PACKET_ENGINE is not set +# CONFIG_NET_VENDOR_QLOGIC is not set +# CONFIG_NET_VENDOR_REALTEK is not set +# CONFIG_NET_VENDOR_RDC is not set +# CONFIG_NET_VENDOR_SEEQ is not set +# CONFIG_NET_VENDOR_SILAN is not set +# CONFIG_NET_VENDOR_SIS is not set +# CONFIG_NET_VENDOR_SMSC is not set +# CONFIG_NET_VENDOR_STMICRO is not set +# CONFIG_NET_VENDOR_SUN is not set +# CONFIG_NET_VENDOR_TEHUTI is not set +# CONFIG_NET_VENDOR_TI is not set +# CONFIG_NET_VENDOR_VIA is not set +# CONFIG_WLAN is not set +# CONFIG_INPUT_MOUSEDEV is not set +# CONFIG_INPUT_KEYBOARD is not set +# CONFIG_INPUT_MOUSE is not set +# CONFIG_SERIO is not set +# CONFIG_VT is not set +# CONFIG_LEGACY_PTYS is not set +CONFIG_HW_RANDOM=y +CONFIG_HW_RANDOM_TIMERIOMEM=m +CONFIG_I2C=y +CONFIG_I2C_CHARDEV=y +# CONFIG_HWMON is not set +CONFIG_WATCHDOG=y +CONFIG_WATCHDOG_NOWAYOUT=y +# CONFIG_VGA_ARB is not set +# CONFIG_HID_SUPPORT is not set +# CONFIG_USB_SUPPORT is not set +CONFIG_EDAC=y +CONFIG_EDAC_MM_EDAC=y +CONFIG_RTC_CLASS=y +CONFIG_RTC_DRV_TILE=y +CONFIG_EXT2_FS=y +CONFIG_EXT2_FS_XATTR=y +CONFIG_EXT2_FS_POSIX_ACL=y +CONFIG_EXT2_FS_SECURITY=y +CONFIG_EXT2_FS_XIP=y +CONFIG_EXT3_FS=y +CONFIG_EXT3_FS_POSIX_ACL=y +CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y +CONFIG_XFS_FS=y +CONFIG_XFS_QUOTA=y +CONFIG_XFS_POSIX_ACL=y +CONFIG_GFS2_FS=m +CONFIG_GFS2_FS_LOCKING_DLM=y +CONFIG_BTRFS_FS=m +CONFIG_BTRFS_FS_POSIX_ACL=y +CONFIG_QUOTA=y +CONFIG_QUOTA_NETLINK_INTERFACE=y +# CONFIG_PRINT_QUOTA_WARNING is not set +CONFIG_QFMT_V2=y +CONFIG_AUTOFS4_FS=m +CONFIG_FUSE_FS=y +CONFIG_CUSE=m +CONFIG_FSCACHE=m +CONFIG_FSCACHE_STATS=y +CONFIG_CACHEFILES=m +CONFIG_ISO9660_FS=m +CONFIG_JOLIET=y +CONFIG_ZISOFS=y +CONFIG_UDF_FS=m +CONFIG_MSDOS_FS=m +CONFIG_VFAT_FS=m +CONFIG_FAT_DEFAULT_IOCHARSET="ascii" +CONFIG_PROC_KCORE=y +CONFIG_TMPFS=y +CONFIG_TMPFS_POSIX_ACL=y +CONFIG_HUGETLBFS=y +CONFIG_ECRYPT_FS=m +CONFIG_CRAMFS=m +CONFIG_SQUASHFS=m +CONFIG_NFS_FS=m +CONFIG_NFS_V3=y +CONFIG_NFS_V3_ACL=y +CONFIG_NFS_V4=y +CONFIG_NFS_V4_1=y +CONFIG_NFS_FSCACHE=y +CONFIG_NFSD=m +CONFIG_NFSD_V3_ACL=y +CONFIG_NFSD_V4=y +CONFIG_CIFS=m +CONFIG_CIFS_STATS=y +CONFIG_CIFS_WEAK_PW_HASH=y +CONFIG_CIFS_UPCALL=y +CONFIG_CIFS_XATTR=y +CONFIG_CIFS_POSIX=y +CONFIG_CIFS_DFS_UPCALL=y +CONFIG_CIFS_FSCACHE=y +CONFIG_NLS=y +CONFIG_NLS_DEFAULT="utf8" +CONFIG_NLS_CODEPAGE_437=y +CONFIG_NLS_CODEPAGE_737=m +CONFIG_NLS_CODEPAGE_775=m +CONFIG_NLS_CODEPAGE_850=m +CONFIG_NLS_CODEPAGE_852=m +CONFIG_NLS_CODEPAGE_855=m +CONFIG_NLS_CODEPAGE_857=m +CONFIG_NLS_CODEPAGE_860=m +CONFIG_NLS_CODEPAGE_861=m +CONFIG_NLS_CODEPAGE_862=m +CONFIG_NLS_CODEPAGE_863=m +CONFIG_NLS_CODEPAGE_864=m +CONFIG_NLS_CODEPAGE_865=m +CONFIG_NLS_CODEPAGE_866=m +CONFIG_NLS_CODEPAGE_869=m +CONFIG_NLS_CODEPAGE_936=m +CONFIG_NLS_CODEPAGE_950=m +CONFIG_NLS_CODEPAGE_932=m +CONFIG_NLS_CODEPAGE_949=m +CONFIG_NLS_CODEPAGE_874=m +CONFIG_NLS_ISO8859_8=m +CONFIG_NLS_CODEPAGE_1250=m +CONFIG_NLS_CODEPAGE_1251=m +CONFIG_NLS_ASCII=y +CONFIG_NLS_ISO8859_1=m +CONFIG_NLS_ISO8859_2=m +CONFIG_NLS_ISO8859_3=m +CONFIG_NLS_ISO8859_4=m +CONFIG_NLS_ISO8859_5=m +CONFIG_NLS_ISO8859_6=m +CONFIG_NLS_ISO8859_7=m +CONFIG_NLS_ISO8859_9=m +CONFIG_NLS_ISO8859_13=m +CONFIG_NLS_ISO8859_14=m +CONFIG_NLS_ISO8859_15=m +CONFIG_NLS_KOI8_R=m +CONFIG_NLS_KOI8_U=m +CONFIG_NLS_UTF8=m +CONFIG_DLM_DEBUG=y +# CONFIG_ENABLE_WARN_DEPRECATED is not set +CONFIG_FRAME_WARN=2048 +CONFIG_MAGIC_SYSRQ=y +CONFIG_STRIP_ASM_SYMS=y +CONFIG_DEBUG_FS=y +CONFIG_HEADERS_CHECK=y +CONFIG_LOCKUP_DETECTOR=y +CONFIG_SCHEDSTATS=y +CONFIG_TIMER_STATS=y +CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_REDUCED=y +CONFIG_DEBUG_VM=y +CONFIG_DEBUG_MEMORY_INIT=y +CONFIG_DEBUG_LIST=y +CONFIG_DEBUG_CREDENTIALS=y +CONFIG_DEBUG_FORCE_WEAK_PER_CPU=y +CONFIG_DYNAMIC_DEBUG=y +CONFIG_ASYNC_RAID6_TEST=m +CONFIG_DEBUG_STACKOVERFLOW=y +CONFIG_KEYS_DEBUG_PROC_KEYS=y +CONFIG_SECURITY=y +CONFIG_SECURITYFS=y +CONFIG_SECURITY_NETWORK=y +CONFIG_SECURITY_NETWORK_XFRM=y +CONFIG_SECURITY_SELINUX=y +CONFIG_SECURITY_SELINUX_BOOTPARAM=y +CONFIG_SECURITY_SELINUX_DISABLE=y +CONFIG_CRYPTO_NULL=m +CONFIG_CRYPTO_PCRYPT=m +CONFIG_CRYPTO_CRYPTD=m +CONFIG_CRYPTO_TEST=m +CONFIG_CRYPTO_CCM=m +CONFIG_CRYPTO_GCM=m +CONFIG_CRYPTO_CTS=m +CONFIG_CRYPTO_LRW=m +CONFIG_CRYPTO_PCBC=m +CONFIG_CRYPTO_XTS=m +CONFIG_CRYPTO_HMAC=y +CONFIG_CRYPTO_XCBC=m +CONFIG_CRYPTO_VMAC=m +CONFIG_CRYPTO_CRC32C=y +CONFIG_CRYPTO_MICHAEL_MIC=m +CONFIG_CRYPTO_RMD128=m +CONFIG_CRYPTO_RMD160=m +CONFIG_CRYPTO_RMD256=m +CONFIG_CRYPTO_RMD320=m +CONFIG_CRYPTO_SHA1=y +CONFIG_CRYPTO_SHA256=m +CONFIG_CRYPTO_SHA512=m +CONFIG_CRYPTO_TGR192=m +CONFIG_CRYPTO_WP512=m +CONFIG_CRYPTO_ANUBIS=m +CONFIG_CRYPTO_BLOWFISH=m +CONFIG_CRYPTO_CAMELLIA=m +CONFIG_CRYPTO_CAST5=m +CONFIG_CRYPTO_CAST6=m +CONFIG_CRYPTO_FCRYPT=m +CONFIG_CRYPTO_KHAZAD=m +CONFIG_CRYPTO_SEED=m +CONFIG_CRYPTO_SERPENT=m +CONFIG_CRYPTO_TEA=m +CONFIG_CRYPTO_TWOFISH=m +CONFIG_CRYPTO_ZLIB=m +CONFIG_CRYPTO_LZO=m +CONFIG_CRC_CCITT=m +CONFIG_CRC7=m diff --git a/arch/tile/include/arch/Kbuild b/arch/tile/include/arch/Kbuild new file mode 100644 index 00000000..9c0ea24c --- /dev/null +++ b/arch/tile/include/arch/Kbuild @@ -0,0 +1,17 @@ +header-y += abi.h +header-y += chip.h +header-y += chip_tile64.h +header-y += chip_tilegx.h +header-y += chip_tilepro.h +header-y += icache.h +header-y += interrupts.h +header-y += interrupts_32.h +header-y += interrupts_64.h +header-y += opcode.h +header-y += opcode_tilegx.h +header-y += opcode_tilepro.h +header-y += sim.h +header-y += sim_def.h +header-y += spr_def.h +header-y += spr_def_32.h +header-y += spr_def_64.h diff --git a/arch/tile/include/arch/abi.h b/arch/tile/include/arch/abi.h new file mode 100644 index 00000000..c55a3d43 --- /dev/null +++ b/arch/tile/include/arch/abi.h @@ -0,0 +1,141 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/** + * @file + * + * ABI-related register definitions. + */ + +#ifndef __ARCH_ABI_H__ + +#if !defined __need_int_reg_t && !defined __DOXYGEN__ +# define __ARCH_ABI_H__ +# include <arch/chip.h> +#endif + +/* Provide the basic machine types. */ +#ifndef __INT_REG_BITS + +/** Number of bits in a register. */ +#if defined __tilegx__ +# define __INT_REG_BITS 64 +#elif defined __tilepro__ +# define __INT_REG_BITS 32 +#elif !defined __need_int_reg_t +# include <arch/chip.h> +# define __INT_REG_BITS CHIP_WORD_SIZE() +#else +# error Unrecognized architecture with __need_int_reg_t +#endif + +#if __INT_REG_BITS == 64 + +#ifndef __ASSEMBLER__ +/** Unsigned type that can hold a register. */ +typedef unsigned long long __uint_reg_t; + +/** Signed type that can hold a register. */ +typedef long long __int_reg_t; +#endif + +/** String prefix to use for printf(). */ +#define __INT_REG_FMT "ll" + +#else + +#ifndef __ASSEMBLER__ +/** Unsigned type that can hold a register. */ +typedef unsigned long __uint_reg_t; + +/** Signed type that can hold a register. */ +typedef long __int_reg_t; +#endif + +/** String prefix to use for printf(). */ +#define __INT_REG_FMT "l" + +#endif +#endif /* __INT_REG_BITS */ + + +#ifndef __need_int_reg_t + + +#ifndef __ASSEMBLER__ +/** Unsigned type that can hold a register. */ +typedef __uint_reg_t uint_reg_t; + +/** Signed type that can hold a register. */ +typedef __int_reg_t int_reg_t; +#endif + +/** String prefix to use for printf(). */ +#define INT_REG_FMT __INT_REG_FMT + +/** Number of bits in a register. */ +#define INT_REG_BITS __INT_REG_BITS + + +/* Registers 0 - 55 are "normal", but some perform special roles. */ + +#define TREG_FP 52 /**< Frame pointer. */ +#define TREG_TP 53 /**< Thread pointer. */ +#define TREG_SP 54 /**< Stack pointer. */ +#define TREG_LR 55 /**< Link to calling function PC. */ + +/** Index of last normal general-purpose register. */ +#define TREG_LAST_GPR 55 + +/* Registers 56 - 62 are "special" network registers. */ + +#define TREG_SN 56 /**< Static network access. */ +#define TREG_IDN0 57 /**< IDN demux 0 access. */ +#define TREG_IDN1 58 /**< IDN demux 1 access. */ +#define TREG_UDN0 59 /**< UDN demux 0 access. */ +#define TREG_UDN1 60 /**< UDN demux 1 access. */ +#define TREG_UDN2 61 /**< UDN demux 2 access. */ +#define TREG_UDN3 62 /**< UDN demux 3 access. */ + +/* Register 63 is the "special" zero register. */ + +#define TREG_ZERO 63 /**< "Zero" register; always reads as "0". */ + + +/** By convention, this register is used to hold the syscall number. */ +#define TREG_SYSCALL_NR 10 + +/** Name of register that holds the syscall number, for use in assembly. */ +#define TREG_SYSCALL_NR_NAME r10 + + +/** + * The ABI requires callers to allocate a caller state save area of + * this many bytes at the bottom of each stack frame. + */ +#define C_ABI_SAVE_AREA_SIZE (2 * (INT_REG_BITS / 8)) + +/** + * The operand to an 'info' opcode directing the backtracer to not + * try to find the calling frame. + */ +#define INFO_OP_CANNOT_BACKTRACE 2 + + +#endif /* !__need_int_reg_t */ + +/* Make sure we later can get all the definitions and declarations. */ +#undef __need_int_reg_t + +#endif /* !__ARCH_ABI_H__ */ diff --git a/arch/tile/include/arch/chip.h b/arch/tile/include/arch/chip.h new file mode 100644 index 00000000..926d3db0 --- /dev/null +++ b/arch/tile/include/arch/chip.h @@ -0,0 +1,23 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#if __tile_chip__ == 0 +#include <arch/chip_tile64.h> +#elif __tile_chip__ == 1 +#include <arch/chip_tilepro.h> +#elif defined(__tilegx__) +#include <arch/chip_tilegx.h> +#else +#error Unexpected Tilera chip type +#endif diff --git a/arch/tile/include/arch/chip_tile64.h b/arch/tile/include/arch/chip_tile64.h new file mode 100644 index 00000000..261aaba0 --- /dev/null +++ b/arch/tile/include/arch/chip_tile64.h @@ -0,0 +1,258 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/* + * @file + * Global header file. + * This header file specifies defines for TILE64. + */ + +#ifndef __ARCH_CHIP_H__ +#define __ARCH_CHIP_H__ + +/** Specify chip version. + * When possible, prefer the CHIP_xxx symbols below for future-proofing. + * This is intended for cross-compiling; native compilation should + * use the predefined __tile_chip__ symbol. + */ +#define TILE_CHIP 0 + +/** Specify chip revision. + * This provides for the case of a respin of a particular chip type; + * the normal value for this symbol is "0". + * This is intended for cross-compiling; native compilation should + * use the predefined __tile_chip_rev__ symbol. + */ +#define TILE_CHIP_REV 0 + +/** The name of this architecture. */ +#define CHIP_ARCH_NAME "tile64" + +/** The ELF e_machine type for binaries for this chip. */ +#define CHIP_ELF_TYPE() EM_TILE64 + +/** The alternate ELF e_machine type for binaries for this chip. */ +#define CHIP_COMPAT_ELF_TYPE() 0x2506 + +/** What is the native word size of the machine? */ +#define CHIP_WORD_SIZE() 32 + +/** How many bits of a virtual address are used. Extra bits must be + * the sign extension of the low bits. + */ +#define CHIP_VA_WIDTH() 32 + +/** How many bits are in a physical address? */ +#define CHIP_PA_WIDTH() 36 + +/** Size of the L2 cache, in bytes. */ +#define CHIP_L2_CACHE_SIZE() 65536 + +/** Log size of an L2 cache line in bytes. */ +#define CHIP_L2_LOG_LINE_SIZE() 6 + +/** Size of an L2 cache line, in bytes. */ +#define CHIP_L2_LINE_SIZE() (1 << CHIP_L2_LOG_LINE_SIZE()) + +/** Associativity of the L2 cache. */ +#define CHIP_L2_ASSOC() 2 + +/** Size of the L1 data cache, in bytes. */ +#define CHIP_L1D_CACHE_SIZE() 8192 + +/** Log size of an L1 data cache line in bytes. */ +#define CHIP_L1D_LOG_LINE_SIZE() 4 + +/** Size of an L1 data cache line, in bytes. */ +#define CHIP_L1D_LINE_SIZE() (1 << CHIP_L1D_LOG_LINE_SIZE()) + +/** Associativity of the L1 data cache. */ +#define CHIP_L1D_ASSOC() 2 + +/** Size of the L1 instruction cache, in bytes. */ +#define CHIP_L1I_CACHE_SIZE() 8192 + +/** Log size of an L1 instruction cache line in bytes. */ +#define CHIP_L1I_LOG_LINE_SIZE() 6 + +/** Size of an L1 instruction cache line, in bytes. */ +#define CHIP_L1I_LINE_SIZE() (1 << CHIP_L1I_LOG_LINE_SIZE()) + +/** Associativity of the L1 instruction cache. */ +#define CHIP_L1I_ASSOC() 1 + +/** Stride with which flush instructions must be issued. */ +#define CHIP_FLUSH_STRIDE() CHIP_L2_LINE_SIZE() + +/** Stride with which inv instructions must be issued. */ +#define CHIP_INV_STRIDE() CHIP_L1D_LINE_SIZE() + +/** Stride with which finv instructions must be issued. */ +#define CHIP_FINV_STRIDE() CHIP_L1D_LINE_SIZE() + +/** Can the local cache coherently cache data that is homed elsewhere? */ +#define CHIP_HAS_COHERENT_LOCAL_CACHE() 0 + +/** How many simultaneous outstanding victims can the L2 cache have? */ +#define CHIP_MAX_OUTSTANDING_VICTIMS() 2 + +/** Does the TLB support the NC and NOALLOC bits? */ +#define CHIP_HAS_NC_AND_NOALLOC_BITS() 0 + +/** Does the chip support hash-for-home caching? */ +#define CHIP_HAS_CBOX_HOME_MAP() 0 + +/** Number of entries in the chip's home map tables. */ +/* #define CHIP_CBOX_HOME_MAP_SIZE() -- does not apply to chip 0 */ + +/** Do uncacheable requests miss in the cache regardless of whether + * there is matching data? */ +#define CHIP_HAS_ENFORCED_UNCACHEABLE_REQUESTS() 0 + +/** Does the mf instruction wait for victims? */ +#define CHIP_HAS_MF_WAITS_FOR_VICTIMS() 1 + +/** Does the chip have an "inv" instruction that doesn't also flush? */ +#define CHIP_HAS_INV() 0 + +/** Does the chip have a "wh64" instruction? */ +#define CHIP_HAS_WH64() 0 + +/** Does this chip have a 'dword_align' instruction? */ +#define CHIP_HAS_DWORD_ALIGN() 0 + +/** Number of performance counters. */ +#define CHIP_PERFORMANCE_COUNTERS() 2 + +/** Does this chip have auxiliary performance counters? */ +#define CHIP_HAS_AUX_PERF_COUNTERS() 0 + +/** Is the CBOX_MSR1 SPR supported? */ +#define CHIP_HAS_CBOX_MSR1() 0 + +/** Is the TILE_RTF_HWM SPR supported? */ +#define CHIP_HAS_TILE_RTF_HWM() 0 + +/** Is the TILE_WRITE_PENDING SPR supported? */ +#define CHIP_HAS_TILE_WRITE_PENDING() 0 + +/** Is the PROC_STATUS SPR supported? */ +#define CHIP_HAS_PROC_STATUS_SPR() 0 + +/** Is the DSTREAM_PF SPR supported? */ +#define CHIP_HAS_DSTREAM_PF() 0 + +/** Log of the number of mshims we have. */ +#define CHIP_LOG_NUM_MSHIMS() 2 + +/** Are the bases of the interrupt vector areas fixed? */ +#define CHIP_HAS_FIXED_INTVEC_BASE() 1 + +/** Are the interrupt masks split up into 2 SPRs? */ +#define CHIP_HAS_SPLIT_INTR_MASK() 1 + +/** Is the cycle count split up into 2 SPRs? */ +#define CHIP_HAS_SPLIT_CYCLE() 1 + +/** Does the chip have a static network? */ +#define CHIP_HAS_SN() 1 + +/** Does the chip have a static network processor? */ +#define CHIP_HAS_SN_PROC() 1 + +/** Size of the L1 static network processor instruction cache, in bytes. */ +#define CHIP_L1SNI_CACHE_SIZE() 2048 + +/** Does the chip have DMA support in each tile? */ +#define CHIP_HAS_TILE_DMA() 1 + +/** Does the chip have the second revision of the directly accessible + * dynamic networks? This encapsulates a number of characteristics, + * including the absence of the catch-all, the absence of inline message + * tags, the absence of support for network context-switching, and so on. + */ +#define CHIP_HAS_REV1_XDN() 0 + +/** Does the chip have cmpexch and similar (fetchadd, exch, etc.)? */ +#define CHIP_HAS_CMPEXCH() 0 + +/** Does the chip have memory-mapped I/O support? */ +#define CHIP_HAS_MMIO() 0 + +/** Does the chip have post-completion interrupts? */ +#define CHIP_HAS_POST_COMPLETION_INTERRUPTS() 0 + +/** Does the chip have native single step support? */ +#define CHIP_HAS_SINGLE_STEP() 0 + +#ifndef __OPEN_SOURCE__ /* features only relevant to hypervisor-level code */ + +/** How many entries are present in the instruction TLB? */ +#define CHIP_ITLB_ENTRIES() 8 + +/** How many entries are present in the data TLB? */ +#define CHIP_DTLB_ENTRIES() 16 + +/** How many MAF entries does the XAUI shim have? */ +#define CHIP_XAUI_MAF_ENTRIES() 16 + +/** Does the memory shim have a source-id table? */ +#define CHIP_HAS_MSHIM_SRCID_TABLE() 1 + +/** Does the L1 instruction cache clear on reset? */ +#define CHIP_HAS_L1I_CLEAR_ON_RESET() 0 + +/** Does the chip come out of reset with valid coordinates on all tiles? + * Note that if defined, this also implies that the upper left is 1,1. + */ +#define CHIP_HAS_VALID_TILE_COORD_RESET() 0 + +/** Does the chip have unified packet formats? */ +#define CHIP_HAS_UNIFIED_PACKET_FORMATS() 0 + +/** Does the chip support write reordering? */ +#define CHIP_HAS_WRITE_REORDERING() 0 + +/** Does the chip support Y-X routing as well as X-Y? */ +#define CHIP_HAS_Y_X_ROUTING() 0 + +/** Is INTCTRL_3 managed with the correct MPL? */ +#define CHIP_HAS_INTCTRL_3_STATUS_FIX() 0 + +/** Is it possible to configure the chip to be big-endian? */ +#define CHIP_HAS_BIG_ENDIAN_CONFIG() 0 + +/** Is the CACHE_RED_WAY_OVERRIDDEN SPR supported? */ +#define CHIP_HAS_CACHE_RED_WAY_OVERRIDDEN() 0 + +/** Is the DIAG_TRACE_WAY SPR supported? */ +#define CHIP_HAS_DIAG_TRACE_WAY() 0 + +/** Is the MEM_STRIPE_CONFIG SPR supported? */ +#define CHIP_HAS_MEM_STRIPE_CONFIG() 0 + +/** Are the TLB_PERF SPRs supported? */ +#define CHIP_HAS_TLB_PERF() 0 + +/** Is the VDN_SNOOP_SHIM_CTL SPR supported? */ +#define CHIP_HAS_VDN_SNOOP_SHIM_CTL() 0 + +/** Does the chip support rev1 DMA packets? */ +#define CHIP_HAS_REV1_DMA_PACKETS() 0 + +/** Does the chip have an IPI shim? */ +#define CHIP_HAS_IPI() 0 + +#endif /* !__OPEN_SOURCE__ */ +#endif /* __ARCH_CHIP_H__ */ diff --git a/arch/tile/include/arch/chip_tilegx.h b/arch/tile/include/arch/chip_tilegx.h new file mode 100644 index 00000000..ea8e4f2c --- /dev/null +++ b/arch/tile/include/arch/chip_tilegx.h @@ -0,0 +1,258 @@ +/* + * Copyright 2011 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/* + * @file + * Global header file. + * This header file specifies defines for TILE-Gx. + */ + +#ifndef __ARCH_CHIP_H__ +#define __ARCH_CHIP_H__ + +/** Specify chip version. + * When possible, prefer the CHIP_xxx symbols below for future-proofing. + * This is intended for cross-compiling; native compilation should + * use the predefined __tile_chip__ symbol. + */ +#define TILE_CHIP 10 + +/** Specify chip revision. + * This provides for the case of a respin of a particular chip type; + * the normal value for this symbol is "0". + * This is intended for cross-compiling; native compilation should + * use the predefined __tile_chip_rev__ symbol. + */ +#define TILE_CHIP_REV 0 + +/** The name of this architecture. */ +#define CHIP_ARCH_NAME "tilegx" + +/** The ELF e_machine type for binaries for this chip. */ +#define CHIP_ELF_TYPE() EM_TILEGX + +/** The alternate ELF e_machine type for binaries for this chip. */ +#define CHIP_COMPAT_ELF_TYPE() 0x2597 + +/** What is the native word size of the machine? */ +#define CHIP_WORD_SIZE() 64 + +/** How many bits of a virtual address are used. Extra bits must be + * the sign extension of the low bits. + */ +#define CHIP_VA_WIDTH() 42 + +/** How many bits are in a physical address? */ +#define CHIP_PA_WIDTH() 40 + +/** Size of the L2 cache, in bytes. */ +#define CHIP_L2_CACHE_SIZE() 262144 + +/** Log size of an L2 cache line in bytes. */ +#define CHIP_L2_LOG_LINE_SIZE() 6 + +/** Size of an L2 cache line, in bytes. */ +#define CHIP_L2_LINE_SIZE() (1 << CHIP_L2_LOG_LINE_SIZE()) + +/** Associativity of the L2 cache. */ +#define CHIP_L2_ASSOC() 8 + +/** Size of the L1 data cache, in bytes. */ +#define CHIP_L1D_CACHE_SIZE() 32768 + +/** Log size of an L1 data cache line in bytes. */ +#define CHIP_L1D_LOG_LINE_SIZE() 6 + +/** Size of an L1 data cache line, in bytes. */ +#define CHIP_L1D_LINE_SIZE() (1 << CHIP_L1D_LOG_LINE_SIZE()) + +/** Associativity of the L1 data cache. */ +#define CHIP_L1D_ASSOC() 2 + +/** Size of the L1 instruction cache, in bytes. */ +#define CHIP_L1I_CACHE_SIZE() 32768 + +/** Log size of an L1 instruction cache line in bytes. */ +#define CHIP_L1I_LOG_LINE_SIZE() 6 + +/** Size of an L1 instruction cache line, in bytes. */ +#define CHIP_L1I_LINE_SIZE() (1 << CHIP_L1I_LOG_LINE_SIZE()) + +/** Associativity of the L1 instruction cache. */ +#define CHIP_L1I_ASSOC() 2 + +/** Stride with which flush instructions must be issued. */ +#define CHIP_FLUSH_STRIDE() CHIP_L2_LINE_SIZE() + +/** Stride with which inv instructions must be issued. */ +#define CHIP_INV_STRIDE() CHIP_L2_LINE_SIZE() + +/** Stride with which finv instructions must be issued. */ +#define CHIP_FINV_STRIDE() CHIP_L2_LINE_SIZE() + +/** Can the local cache coherently cache data that is homed elsewhere? */ +#define CHIP_HAS_COHERENT_LOCAL_CACHE() 1 + +/** How many simultaneous outstanding victims can the L2 cache have? */ +#define CHIP_MAX_OUTSTANDING_VICTIMS() 128 + +/** Does the TLB support the NC and NOALLOC bits? */ +#define CHIP_HAS_NC_AND_NOALLOC_BITS() 1 + +/** Does the chip support hash-for-home caching? */ +#define CHIP_HAS_CBOX_HOME_MAP() 1 + +/** Number of entries in the chip's home map tables. */ +#define CHIP_CBOX_HOME_MAP_SIZE() 128 + +/** Do uncacheable requests miss in the cache regardless of whether + * there is matching data? */ +#define CHIP_HAS_ENFORCED_UNCACHEABLE_REQUESTS() 1 + +/** Does the mf instruction wait for victims? */ +#define CHIP_HAS_MF_WAITS_FOR_VICTIMS() 0 + +/** Does the chip have an "inv" instruction that doesn't also flush? */ +#define CHIP_HAS_INV() 1 + +/** Does the chip have a "wh64" instruction? */ +#define CHIP_HAS_WH64() 1 + +/** Does this chip have a 'dword_align' instruction? */ +#define CHIP_HAS_DWORD_ALIGN() 0 + +/** Number of performance counters. */ +#define CHIP_PERFORMANCE_COUNTERS() 4 + +/** Does this chip have auxiliary performance counters? */ +#define CHIP_HAS_AUX_PERF_COUNTERS() 1 + +/** Is the CBOX_MSR1 SPR supported? */ +#define CHIP_HAS_CBOX_MSR1() 0 + +/** Is the TILE_RTF_HWM SPR supported? */ +#define CHIP_HAS_TILE_RTF_HWM() 1 + +/** Is the TILE_WRITE_PENDING SPR supported? */ +#define CHIP_HAS_TILE_WRITE_PENDING() 0 + +/** Is the PROC_STATUS SPR supported? */ +#define CHIP_HAS_PROC_STATUS_SPR() 1 + +/** Is the DSTREAM_PF SPR supported? */ +#define CHIP_HAS_DSTREAM_PF() 1 + +/** Log of the number of mshims we have. */ +#define CHIP_LOG_NUM_MSHIMS() 2 + +/** Are the bases of the interrupt vector areas fixed? */ +#define CHIP_HAS_FIXED_INTVEC_BASE() 0 + +/** Are the interrupt masks split up into 2 SPRs? */ +#define CHIP_HAS_SPLIT_INTR_MASK() 0 + +/** Is the cycle count split up into 2 SPRs? */ +#define CHIP_HAS_SPLIT_CYCLE() 0 + +/** Does the chip have a static network? */ +#define CHIP_HAS_SN() 0 + +/** Does the chip have a static network processor? */ +#define CHIP_HAS_SN_PROC() 0 + +/** Size of the L1 static network processor instruction cache, in bytes. */ +/* #define CHIP_L1SNI_CACHE_SIZE() -- does not apply to chip 10 */ + +/** Does the chip have DMA support in each tile? */ +#define CHIP_HAS_TILE_DMA() 0 + +/** Does the chip have the second revision of the directly accessible + * dynamic networks? This encapsulates a number of characteristics, + * including the absence of the catch-all, the absence of inline message + * tags, the absence of support for network context-switching, and so on. + */ +#define CHIP_HAS_REV1_XDN() 1 + +/** Does the chip have cmpexch and similar (fetchadd, exch, etc.)? */ +#define CHIP_HAS_CMPEXCH() 1 + +/** Does the chip have memory-mapped I/O support? */ +#define CHIP_HAS_MMIO() 1 + +/** Does the chip have post-completion interrupts? */ +#define CHIP_HAS_POST_COMPLETION_INTERRUPTS() 1 + +/** Does the chip have native single step support? */ +#define CHIP_HAS_SINGLE_STEP() 1 + +#ifndef __OPEN_SOURCE__ /* features only relevant to hypervisor-level code */ + +/** How many entries are present in the instruction TLB? */ +#define CHIP_ITLB_ENTRIES() 16 + +/** How many entries are present in the data TLB? */ +#define CHIP_DTLB_ENTRIES() 32 + +/** How many MAF entries does the XAUI shim have? */ +#define CHIP_XAUI_MAF_ENTRIES() 32 + +/** Does the memory shim have a source-id table? */ +#define CHIP_HAS_MSHIM_SRCID_TABLE() 0 + +/** Does the L1 instruction cache clear on reset? */ +#define CHIP_HAS_L1I_CLEAR_ON_RESET() 1 + +/** Does the chip come out of reset with valid coordinates on all tiles? + * Note that if defined, this also implies that the upper left is 1,1. + */ +#define CHIP_HAS_VALID_TILE_COORD_RESET() 1 + +/** Does the chip have unified packet formats? */ +#define CHIP_HAS_UNIFIED_PACKET_FORMATS() 1 + +/** Does the chip support write reordering? */ +#define CHIP_HAS_WRITE_REORDERING() 1 + +/** Does the chip support Y-X routing as well as X-Y? */ +#define CHIP_HAS_Y_X_ROUTING() 1 + +/** Is INTCTRL_3 managed with the correct MPL? */ +#define CHIP_HAS_INTCTRL_3_STATUS_FIX() 1 + +/** Is it possible to configure the chip to be big-endian? */ +#define CHIP_HAS_BIG_ENDIAN_CONFIG() 1 + +/** Is the CACHE_RED_WAY_OVERRIDDEN SPR supported? */ +#define CHIP_HAS_CACHE_RED_WAY_OVERRIDDEN() 0 + +/** Is the DIAG_TRACE_WAY SPR supported? */ +#define CHIP_HAS_DIAG_TRACE_WAY() 0 + +/** Is the MEM_STRIPE_CONFIG SPR supported? */ +#define CHIP_HAS_MEM_STRIPE_CONFIG() 1 + +/** Are the TLB_PERF SPRs supported? */ +#define CHIP_HAS_TLB_PERF() 1 + +/** Is the VDN_SNOOP_SHIM_CTL SPR supported? */ +#define CHIP_HAS_VDN_SNOOP_SHIM_CTL() 0 + +/** Does the chip support rev1 DMA packets? */ +#define CHIP_HAS_REV1_DMA_PACKETS() 1 + +/** Does the chip have an IPI shim? */ +#define CHIP_HAS_IPI() 1 + +#endif /* !__OPEN_SOURCE__ */ +#endif /* __ARCH_CHIP_H__ */ diff --git a/arch/tile/include/arch/chip_tilepro.h b/arch/tile/include/arch/chip_tilepro.h new file mode 100644 index 00000000..70017699 --- /dev/null +++ b/arch/tile/include/arch/chip_tilepro.h @@ -0,0 +1,258 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/* + * @file + * Global header file. + * This header file specifies defines for TILEPro. + */ + +#ifndef __ARCH_CHIP_H__ +#define __ARCH_CHIP_H__ + +/** Specify chip version. + * When possible, prefer the CHIP_xxx symbols below for future-proofing. + * This is intended for cross-compiling; native compilation should + * use the predefined __tile_chip__ symbol. + */ +#define TILE_CHIP 1 + +/** Specify chip revision. + * This provides for the case of a respin of a particular chip type; + * the normal value for this symbol is "0". + * This is intended for cross-compiling; native compilation should + * use the predefined __tile_chip_rev__ symbol. + */ +#define TILE_CHIP_REV 0 + +/** The name of this architecture. */ +#define CHIP_ARCH_NAME "tilepro" + +/** The ELF e_machine type for binaries for this chip. */ +#define CHIP_ELF_TYPE() EM_TILEPRO + +/** The alternate ELF e_machine type for binaries for this chip. */ +#define CHIP_COMPAT_ELF_TYPE() 0x2507 + +/** What is the native word size of the machine? */ +#define CHIP_WORD_SIZE() 32 + +/** How many bits of a virtual address are used. Extra bits must be + * the sign extension of the low bits. + */ +#define CHIP_VA_WIDTH() 32 + +/** How many bits are in a physical address? */ +#define CHIP_PA_WIDTH() 36 + +/** Size of the L2 cache, in bytes. */ +#define CHIP_L2_CACHE_SIZE() 65536 + +/** Log size of an L2 cache line in bytes. */ +#define CHIP_L2_LOG_LINE_SIZE() 6 + +/** Size of an L2 cache line, in bytes. */ +#define CHIP_L2_LINE_SIZE() (1 << CHIP_L2_LOG_LINE_SIZE()) + +/** Associativity of the L2 cache. */ +#define CHIP_L2_ASSOC() 4 + +/** Size of the L1 data cache, in bytes. */ +#define CHIP_L1D_CACHE_SIZE() 8192 + +/** Log size of an L1 data cache line in bytes. */ +#define CHIP_L1D_LOG_LINE_SIZE() 4 + +/** Size of an L1 data cache line, in bytes. */ +#define CHIP_L1D_LINE_SIZE() (1 << CHIP_L1D_LOG_LINE_SIZE()) + +/** Associativity of the L1 data cache. */ +#define CHIP_L1D_ASSOC() 2 + +/** Size of the L1 instruction cache, in bytes. */ +#define CHIP_L1I_CACHE_SIZE() 16384 + +/** Log size of an L1 instruction cache line in bytes. */ +#define CHIP_L1I_LOG_LINE_SIZE() 6 + +/** Size of an L1 instruction cache line, in bytes. */ +#define CHIP_L1I_LINE_SIZE() (1 << CHIP_L1I_LOG_LINE_SIZE()) + +/** Associativity of the L1 instruction cache. */ +#define CHIP_L1I_ASSOC() 1 + +/** Stride with which flush instructions must be issued. */ +#define CHIP_FLUSH_STRIDE() CHIP_L2_LINE_SIZE() + +/** Stride with which inv instructions must be issued. */ +#define CHIP_INV_STRIDE() CHIP_L2_LINE_SIZE() + +/** Stride with which finv instructions must be issued. */ +#define CHIP_FINV_STRIDE() CHIP_L2_LINE_SIZE() + +/** Can the local cache coherently cache data that is homed elsewhere? */ +#define CHIP_HAS_COHERENT_LOCAL_CACHE() 1 + +/** How many simultaneous outstanding victims can the L2 cache have? */ +#define CHIP_MAX_OUTSTANDING_VICTIMS() 4 + +/** Does the TLB support the NC and NOALLOC bits? */ +#define CHIP_HAS_NC_AND_NOALLOC_BITS() 1 + +/** Does the chip support hash-for-home caching? */ +#define CHIP_HAS_CBOX_HOME_MAP() 1 + +/** Number of entries in the chip's home map tables. */ +#define CHIP_CBOX_HOME_MAP_SIZE() 64 + +/** Do uncacheable requests miss in the cache regardless of whether + * there is matching data? */ +#define CHIP_HAS_ENFORCED_UNCACHEABLE_REQUESTS() 1 + +/** Does the mf instruction wait for victims? */ +#define CHIP_HAS_MF_WAITS_FOR_VICTIMS() 0 + +/** Does the chip have an "inv" instruction that doesn't also flush? */ +#define CHIP_HAS_INV() 1 + +/** Does the chip have a "wh64" instruction? */ +#define CHIP_HAS_WH64() 1 + +/** Does this chip have a 'dword_align' instruction? */ +#define CHIP_HAS_DWORD_ALIGN() 1 + +/** Number of performance counters. */ +#define CHIP_PERFORMANCE_COUNTERS() 4 + +/** Does this chip have auxiliary performance counters? */ +#define CHIP_HAS_AUX_PERF_COUNTERS() 1 + +/** Is the CBOX_MSR1 SPR supported? */ +#define CHIP_HAS_CBOX_MSR1() 1 + +/** Is the TILE_RTF_HWM SPR supported? */ +#define CHIP_HAS_TILE_RTF_HWM() 1 + +/** Is the TILE_WRITE_PENDING SPR supported? */ +#define CHIP_HAS_TILE_WRITE_PENDING() 1 + +/** Is the PROC_STATUS SPR supported? */ +#define CHIP_HAS_PROC_STATUS_SPR() 1 + +/** Is the DSTREAM_PF SPR supported? */ +#define CHIP_HAS_DSTREAM_PF() 0 + +/** Log of the number of mshims we have. */ +#define CHIP_LOG_NUM_MSHIMS() 2 + +/** Are the bases of the interrupt vector areas fixed? */ +#define CHIP_HAS_FIXED_INTVEC_BASE() 1 + +/** Are the interrupt masks split up into 2 SPRs? */ +#define CHIP_HAS_SPLIT_INTR_MASK() 1 + +/** Is the cycle count split up into 2 SPRs? */ +#define CHIP_HAS_SPLIT_CYCLE() 1 + +/** Does the chip have a static network? */ +#define CHIP_HAS_SN() 1 + +/** Does the chip have a static network processor? */ +#define CHIP_HAS_SN_PROC() 0 + +/** Size of the L1 static network processor instruction cache, in bytes. */ +/* #define CHIP_L1SNI_CACHE_SIZE() -- does not apply to chip 1 */ + +/** Does the chip have DMA support in each tile? */ +#define CHIP_HAS_TILE_DMA() 1 + +/** Does the chip have the second revision of the directly accessible + * dynamic networks? This encapsulates a number of characteristics, + * including the absence of the catch-all, the absence of inline message + * tags, the absence of support for network context-switching, and so on. + */ +#define CHIP_HAS_REV1_XDN() 0 + +/** Does the chip have cmpexch and similar (fetchadd, exch, etc.)? */ +#define CHIP_HAS_CMPEXCH() 0 + +/** Does the chip have memory-mapped I/O support? */ +#define CHIP_HAS_MMIO() 0 + +/** Does the chip have post-completion interrupts? */ +#define CHIP_HAS_POST_COMPLETION_INTERRUPTS() 0 + +/** Does the chip have native single step support? */ +#define CHIP_HAS_SINGLE_STEP() 0 + +#ifndef __OPEN_SOURCE__ /* features only relevant to hypervisor-level code */ + +/** How many entries are present in the instruction TLB? */ +#define CHIP_ITLB_ENTRIES() 16 + +/** How many entries are present in the data TLB? */ +#define CHIP_DTLB_ENTRIES() 16 + +/** How many MAF entries does the XAUI shim have? */ +#define CHIP_XAUI_MAF_ENTRIES() 32 + +/** Does the memory shim have a source-id table? */ +#define CHIP_HAS_MSHIM_SRCID_TABLE() 0 + +/** Does the L1 instruction cache clear on reset? */ +#define CHIP_HAS_L1I_CLEAR_ON_RESET() 1 + +/** Does the chip come out of reset with valid coordinates on all tiles? + * Note that if defined, this also implies that the upper left is 1,1. + */ +#define CHIP_HAS_VALID_TILE_COORD_RESET() 1 + +/** Does the chip have unified packet formats? */ +#define CHIP_HAS_UNIFIED_PACKET_FORMATS() 1 + +/** Does the chip support write reordering? */ +#define CHIP_HAS_WRITE_REORDERING() 1 + +/** Does the chip support Y-X routing as well as X-Y? */ +#define CHIP_HAS_Y_X_ROUTING() 1 + +/** Is INTCTRL_3 managed with the correct MPL? */ +#define CHIP_HAS_INTCTRL_3_STATUS_FIX() 1 + +/** Is it possible to configure the chip to be big-endian? */ +#define CHIP_HAS_BIG_ENDIAN_CONFIG() 1 + +/** Is the CACHE_RED_WAY_OVERRIDDEN SPR supported? */ +#define CHIP_HAS_CACHE_RED_WAY_OVERRIDDEN() 1 + +/** Is the DIAG_TRACE_WAY SPR supported? */ +#define CHIP_HAS_DIAG_TRACE_WAY() 1 + +/** Is the MEM_STRIPE_CONFIG SPR supported? */ +#define CHIP_HAS_MEM_STRIPE_CONFIG() 1 + +/** Are the TLB_PERF SPRs supported? */ +#define CHIP_HAS_TLB_PERF() 1 + +/** Is the VDN_SNOOP_SHIM_CTL SPR supported? */ +#define CHIP_HAS_VDN_SNOOP_SHIM_CTL() 1 + +/** Does the chip support rev1 DMA packets? */ +#define CHIP_HAS_REV1_DMA_PACKETS() 1 + +/** Does the chip have an IPI shim? */ +#define CHIP_HAS_IPI() 0 + +#endif /* !__OPEN_SOURCE__ */ +#endif /* __ARCH_CHIP_H__ */ diff --git a/arch/tile/include/arch/icache.h b/arch/tile/include/arch/icache.h new file mode 100644 index 00000000..762eafa8 --- /dev/null +++ b/arch/tile/include/arch/icache.h @@ -0,0 +1,93 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + */ + +/** + * @file + * + * Support for invalidating bytes in the instruction cache. + */ + +#ifndef __ARCH_ICACHE_H__ +#define __ARCH_ICACHE_H__ + +#include <arch/chip.h> + + +/** + * Invalidate the instruction cache for the given range of memory. + * + * @param addr The start of memory to be invalidated. + * @param size The number of bytes to be invalidated. + * @param page_size The system's page size, e.g. getpagesize() in userspace. + * This value must be a power of two no larger than the page containing + * the code to be invalidated. If the value is smaller than the actual page + * size, this function will still work, but may run slower than necessary. + */ +static __inline void +invalidate_icache(const void* addr, unsigned long size, + unsigned long page_size) +{ + const unsigned long cache_way_size = + CHIP_L1I_CACHE_SIZE() / CHIP_L1I_ASSOC(); + unsigned long max_useful_size; + const char* start, *end; + long num_passes; + + if (__builtin_expect(size == 0, 0)) + return; + +#ifdef __tilegx__ + /* Limit the number of bytes visited to avoid redundant iterations. */ + max_useful_size = (page_size < cache_way_size) ? page_size : cache_way_size; + + /* No PA aliasing is possible, so one pass always suffices. */ + num_passes = 1; +#else + /* Limit the number of bytes visited to avoid redundant iterations. */ + max_useful_size = cache_way_size; + + /* + * Compute how many passes we need (we'll treat 0 as if it were 1). + * This works because we know the page size is a power of two. + */ + num_passes = cache_way_size >> __builtin_ctzl(page_size); +#endif + + if (__builtin_expect(size > max_useful_size, 0)) + size = max_useful_size; + + /* Locate the first and last bytes to be invalidated. */ + start = (const char *)((unsigned long)addr & -CHIP_L1I_LINE_SIZE()); + end = (const char*)addr + size - 1; + + __insn_mf(); + + do + { + const char* p; + + for (p = start; p <= end; p += CHIP_L1I_LINE_SIZE()) + __insn_icoh(p); + + start += page_size; + end += page_size; + } + while (--num_passes > 0); + + __insn_drain(); +} + + +#endif /* __ARCH_ICACHE_H__ */ diff --git a/arch/tile/include/arch/interrupts.h b/arch/tile/include/arch/interrupts.h new file mode 100644 index 00000000..20f8f07d --- /dev/null +++ b/arch/tile/include/arch/interrupts.h @@ -0,0 +1,19 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifdef __tilegx__ +#include <arch/interrupts_64.h> +#else +#include <arch/interrupts_32.h> +#endif diff --git a/arch/tile/include/arch/interrupts_32.h b/arch/tile/include/arch/interrupts_32.h new file mode 100644 index 00000000..96b57105 --- /dev/null +++ b/arch/tile/include/arch/interrupts_32.h @@ -0,0 +1,307 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef __ARCH_INTERRUPTS_H__ +#define __ARCH_INTERRUPTS_H__ + +/** Mask for an interrupt. */ +/* Note: must handle breaking interrupts into high and low words manually. */ +#define INT_MASK_LO(intno) (1 << (intno)) +#define INT_MASK_HI(intno) (1 << ((intno) - 32)) + +#ifndef __ASSEMBLER__ +#define INT_MASK(intno) (1ULL << (intno)) +#endif + + +/** Where a given interrupt executes */ +#define INTERRUPT_VECTOR(i, pl) (0xFC000000 + ((pl) << 24) + ((i) << 8)) + +/** Where to store a vector for a given interrupt. */ +#define USER_INTERRUPT_VECTOR(i) INTERRUPT_VECTOR(i, 0) + +/** The base address of user-level interrupts. */ +#define USER_INTERRUPT_VECTOR_BASE INTERRUPT_VECTOR(0, 0) + + +/** Additional synthetic interrupt. */ +#define INT_BREAKPOINT (63) + +#define INT_ITLB_MISS 0 +#define INT_MEM_ERROR 1 +#define INT_ILL 2 +#define INT_GPV 3 +#define INT_SN_ACCESS 4 +#define INT_IDN_ACCESS 5 +#define INT_UDN_ACCESS 6 +#define INT_IDN_REFILL 7 +#define INT_UDN_REFILL 8 +#define INT_IDN_COMPLETE 9 +#define INT_UDN_COMPLETE 10 +#define INT_SWINT_3 11 +#define INT_SWINT_2 12 +#define INT_SWINT_1 13 +#define INT_SWINT_0 14 +#define INT_UNALIGN_DATA 15 +#define INT_DTLB_MISS 16 +#define INT_DTLB_ACCESS 17 +#define INT_DMATLB_MISS 18 +#define INT_DMATLB_ACCESS 19 +#define INT_SNITLB_MISS 20 +#define INT_SN_NOTIFY 21 +#define INT_SN_FIREWALL 22 +#define INT_IDN_FIREWALL 23 +#define INT_UDN_FIREWALL 24 +#define INT_TILE_TIMER 25 +#define INT_IDN_TIMER 26 +#define INT_UDN_TIMER 27 +#define INT_DMA_NOTIFY 28 +#define INT_IDN_CA 29 +#define INT_UDN_CA 30 +#define INT_IDN_AVAIL 31 +#define INT_UDN_AVAIL 32 +#define INT_PERF_COUNT 33 +#define INT_INTCTRL_3 34 +#define INT_INTCTRL_2 35 +#define INT_INTCTRL_1 36 +#define INT_INTCTRL_0 37 +#define INT_BOOT_ACCESS 38 +#define INT_WORLD_ACCESS 39 +#define INT_I_ASID 40 +#define INT_D_ASID 41 +#define INT_DMA_ASID 42 +#define INT_SNI_ASID 43 +#define INT_DMA_CPL 44 +#define INT_SN_CPL 45 +#define INT_DOUBLE_FAULT 46 +#define INT_SN_STATIC_ACCESS 47 +#define INT_AUX_PERF_COUNT 48 + +#define NUM_INTERRUPTS 49 + +#ifndef __ASSEMBLER__ +#define QUEUED_INTERRUPTS ( \ + INT_MASK(INT_MEM_ERROR) | \ + INT_MASK(INT_DMATLB_MISS) | \ + INT_MASK(INT_DMATLB_ACCESS) | \ + INT_MASK(INT_SNITLB_MISS) | \ + INT_MASK(INT_SN_NOTIFY) | \ + INT_MASK(INT_SN_FIREWALL) | \ + INT_MASK(INT_IDN_FIREWALL) | \ + INT_MASK(INT_UDN_FIREWALL) | \ + INT_MASK(INT_TILE_TIMER) | \ + INT_MASK(INT_IDN_TIMER) | \ + INT_MASK(INT_UDN_TIMER) | \ + INT_MASK(INT_DMA_NOTIFY) | \ + INT_MASK(INT_IDN_CA) | \ + INT_MASK(INT_UDN_CA) | \ + INT_MASK(INT_IDN_AVAIL) | \ + INT_MASK(INT_UDN_AVAIL) | \ + INT_MASK(INT_PERF_COUNT) | \ + INT_MASK(INT_INTCTRL_3) | \ + INT_MASK(INT_INTCTRL_2) | \ + INT_MASK(INT_INTCTRL_1) | \ + INT_MASK(INT_INTCTRL_0) | \ + INT_MASK(INT_BOOT_ACCESS) | \ + INT_MASK(INT_WORLD_ACCESS) | \ + INT_MASK(INT_I_ASID) | \ + INT_MASK(INT_D_ASID) | \ + INT_MASK(INT_DMA_ASID) | \ + INT_MASK(INT_SNI_ASID) | \ + INT_MASK(INT_DMA_CPL) | \ + INT_MASK(INT_SN_CPL) | \ + INT_MASK(INT_DOUBLE_FAULT) | \ + INT_MASK(INT_AUX_PERF_COUNT) | \ + 0) +#define NONQUEUED_INTERRUPTS ( \ + INT_MASK(INT_ITLB_MISS) | \ + INT_MASK(INT_ILL) | \ + INT_MASK(INT_GPV) | \ + INT_MASK(INT_SN_ACCESS) | \ + INT_MASK(INT_IDN_ACCESS) | \ + INT_MASK(INT_UDN_ACCESS) | \ + INT_MASK(INT_IDN_REFILL) | \ + INT_MASK(INT_UDN_REFILL) | \ + INT_MASK(INT_IDN_COMPLETE) | \ + INT_MASK(INT_UDN_COMPLETE) | \ + INT_MASK(INT_SWINT_3) | \ + INT_MASK(INT_SWINT_2) | \ + INT_MASK(INT_SWINT_1) | \ + INT_MASK(INT_SWINT_0) | \ + INT_MASK(INT_UNALIGN_DATA) | \ + INT_MASK(INT_DTLB_MISS) | \ + INT_MASK(INT_DTLB_ACCESS) | \ + INT_MASK(INT_SN_STATIC_ACCESS) | \ + 0) +#define CRITICAL_MASKED_INTERRUPTS ( \ + INT_MASK(INT_MEM_ERROR) | \ + INT_MASK(INT_DMATLB_MISS) | \ + INT_MASK(INT_DMATLB_ACCESS) | \ + INT_MASK(INT_SNITLB_MISS) | \ + INT_MASK(INT_SN_NOTIFY) | \ + INT_MASK(INT_SN_FIREWALL) | \ + INT_MASK(INT_IDN_FIREWALL) | \ + INT_MASK(INT_UDN_FIREWALL) | \ + INT_MASK(INT_TILE_TIMER) | \ + INT_MASK(INT_IDN_TIMER) | \ + INT_MASK(INT_UDN_TIMER) | \ + INT_MASK(INT_DMA_NOTIFY) | \ + INT_MASK(INT_IDN_CA) | \ + INT_MASK(INT_UDN_CA) | \ + INT_MASK(INT_IDN_AVAIL) | \ + INT_MASK(INT_UDN_AVAIL) | \ + INT_MASK(INT_PERF_COUNT) | \ + INT_MASK(INT_INTCTRL_3) | \ + INT_MASK(INT_INTCTRL_2) | \ + INT_MASK(INT_INTCTRL_1) | \ + INT_MASK(INT_INTCTRL_0) | \ + INT_MASK(INT_AUX_PERF_COUNT) | \ + 0) +#define CRITICAL_UNMASKED_INTERRUPTS ( \ + INT_MASK(INT_ITLB_MISS) | \ + INT_MASK(INT_ILL) | \ + INT_MASK(INT_GPV) | \ + INT_MASK(INT_SN_ACCESS) | \ + INT_MASK(INT_IDN_ACCESS) | \ + INT_MASK(INT_UDN_ACCESS) | \ + INT_MASK(INT_IDN_REFILL) | \ + INT_MASK(INT_UDN_REFILL) | \ + INT_MASK(INT_IDN_COMPLETE) | \ + INT_MASK(INT_UDN_COMPLETE) | \ + INT_MASK(INT_SWINT_3) | \ + INT_MASK(INT_SWINT_2) | \ + INT_MASK(INT_SWINT_1) | \ + INT_MASK(INT_SWINT_0) | \ + INT_MASK(INT_UNALIGN_DATA) | \ + INT_MASK(INT_DTLB_MISS) | \ + INT_MASK(INT_DTLB_ACCESS) | \ + INT_MASK(INT_BOOT_ACCESS) | \ + INT_MASK(INT_WORLD_ACCESS) | \ + INT_MASK(INT_I_ASID) | \ + INT_MASK(INT_D_ASID) | \ + INT_MASK(INT_DMA_ASID) | \ + INT_MASK(INT_SNI_ASID) | \ + INT_MASK(INT_DMA_CPL) | \ + INT_MASK(INT_SN_CPL) | \ + INT_MASK(INT_DOUBLE_FAULT) | \ + INT_MASK(INT_SN_STATIC_ACCESS) | \ + 0) +#define MASKABLE_INTERRUPTS ( \ + INT_MASK(INT_MEM_ERROR) | \ + INT_MASK(INT_IDN_REFILL) | \ + INT_MASK(INT_UDN_REFILL) | \ + INT_MASK(INT_IDN_COMPLETE) | \ + INT_MASK(INT_UDN_COMPLETE) | \ + INT_MASK(INT_DMATLB_MISS) | \ + INT_MASK(INT_DMATLB_ACCESS) | \ + INT_MASK(INT_SNITLB_MISS) | \ + INT_MASK(INT_SN_NOTIFY) | \ + INT_MASK(INT_SN_FIREWALL) | \ + INT_MASK(INT_IDN_FIREWALL) | \ + INT_MASK(INT_UDN_FIREWALL) | \ + INT_MASK(INT_TILE_TIMER) | \ + INT_MASK(INT_IDN_TIMER) | \ + INT_MASK(INT_UDN_TIMER) | \ + INT_MASK(INT_DMA_NOTIFY) | \ + INT_MASK(INT_IDN_CA) | \ + INT_MASK(INT_UDN_CA) | \ + INT_MASK(INT_IDN_AVAIL) | \ + INT_MASK(INT_UDN_AVAIL) | \ + INT_MASK(INT_PERF_COUNT) | \ + INT_MASK(INT_INTCTRL_3) | \ + INT_MASK(INT_INTCTRL_2) | \ + INT_MASK(INT_INTCTRL_1) | \ + INT_MASK(INT_INTCTRL_0) | \ + INT_MASK(INT_AUX_PERF_COUNT) | \ + 0) +#define UNMASKABLE_INTERRUPTS ( \ + INT_MASK(INT_ITLB_MISS) | \ + INT_MASK(INT_ILL) | \ + INT_MASK(INT_GPV) | \ + INT_MASK(INT_SN_ACCESS) | \ + INT_MASK(INT_IDN_ACCESS) | \ + INT_MASK(INT_UDN_ACCESS) | \ + INT_MASK(INT_SWINT_3) | \ + INT_MASK(INT_SWINT_2) | \ + INT_MASK(INT_SWINT_1) | \ + INT_MASK(INT_SWINT_0) | \ + INT_MASK(INT_UNALIGN_DATA) | \ + INT_MASK(INT_DTLB_MISS) | \ + INT_MASK(INT_DTLB_ACCESS) | \ + INT_MASK(INT_BOOT_ACCESS) | \ + INT_MASK(INT_WORLD_ACCESS) | \ + INT_MASK(INT_I_ASID) | \ + INT_MASK(INT_D_ASID) | \ + INT_MASK(INT_DMA_ASID) | \ + INT_MASK(INT_SNI_ASID) | \ + INT_MASK(INT_DMA_CPL) | \ + INT_MASK(INT_SN_CPL) | \ + INT_MASK(INT_DOUBLE_FAULT) | \ + INT_MASK(INT_SN_STATIC_ACCESS) | \ + 0) +#define SYNC_INTERRUPTS ( \ + INT_MASK(INT_ITLB_MISS) | \ + INT_MASK(INT_ILL) | \ + INT_MASK(INT_GPV) | \ + INT_MASK(INT_SN_ACCESS) | \ + INT_MASK(INT_IDN_ACCESS) | \ + INT_MASK(INT_UDN_ACCESS) | \ + INT_MASK(INT_IDN_REFILL) | \ + INT_MASK(INT_UDN_REFILL) | \ + INT_MASK(INT_IDN_COMPLETE) | \ + INT_MASK(INT_UDN_COMPLETE) | \ + INT_MASK(INT_SWINT_3) | \ + INT_MASK(INT_SWINT_2) | \ + INT_MASK(INT_SWINT_1) | \ + INT_MASK(INT_SWINT_0) | \ + INT_MASK(INT_UNALIGN_DATA) | \ + INT_MASK(INT_DTLB_MISS) | \ + INT_MASK(INT_DTLB_ACCESS) | \ + INT_MASK(INT_SN_STATIC_ACCESS) | \ + 0) +#define NON_SYNC_INTERRUPTS ( \ + INT_MASK(INT_MEM_ERROR) | \ + INT_MASK(INT_DMATLB_MISS) | \ + INT_MASK(INT_DMATLB_ACCESS) | \ + INT_MASK(INT_SNITLB_MISS) | \ + INT_MASK(INT_SN_NOTIFY) | \ + INT_MASK(INT_SN_FIREWALL) | \ + INT_MASK(INT_IDN_FIREWALL) | \ + INT_MASK(INT_UDN_FIREWALL) | \ + INT_MASK(INT_TILE_TIMER) | \ + INT_MASK(INT_IDN_TIMER) | \ + INT_MASK(INT_UDN_TIMER) | \ + INT_MASK(INT_DMA_NOTIFY) | \ + INT_MASK(INT_IDN_CA) | \ + INT_MASK(INT_UDN_CA) | \ + INT_MASK(INT_IDN_AVAIL) | \ + INT_MASK(INT_UDN_AVAIL) | \ + INT_MASK(INT_PERF_COUNT) | \ + INT_MASK(INT_INTCTRL_3) | \ + INT_MASK(INT_INTCTRL_2) | \ + INT_MASK(INT_INTCTRL_1) | \ + INT_MASK(INT_INTCTRL_0) | \ + INT_MASK(INT_BOOT_ACCESS) | \ + INT_MASK(INT_WORLD_ACCESS) | \ + INT_MASK(INT_I_ASID) | \ + INT_MASK(INT_D_ASID) | \ + INT_MASK(INT_DMA_ASID) | \ + INT_MASK(INT_SNI_ASID) | \ + INT_MASK(INT_DMA_CPL) | \ + INT_MASK(INT_SN_CPL) | \ + INT_MASK(INT_DOUBLE_FAULT) | \ + INT_MASK(INT_AUX_PERF_COUNT) | \ + 0) +#endif /* !__ASSEMBLER__ */ +#endif /* !__ARCH_INTERRUPTS_H__ */ diff --git a/arch/tile/include/arch/interrupts_64.h b/arch/tile/include/arch/interrupts_64.h new file mode 100644 index 00000000..5bb58b2e --- /dev/null +++ b/arch/tile/include/arch/interrupts_64.h @@ -0,0 +1,276 @@ +/* + * Copyright 2011 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef __ARCH_INTERRUPTS_H__ +#define __ARCH_INTERRUPTS_H__ + +/** Mask for an interrupt. */ +#ifdef __ASSEMBLER__ +/* Note: must handle breaking interrupts into high and low words manually. */ +#define INT_MASK(intno) (1 << (intno)) +#else +#define INT_MASK(intno) (1ULL << (intno)) +#endif + + +/** Where a given interrupt executes */ +#define INTERRUPT_VECTOR(i, pl) (0xFC000000 + ((pl) << 24) + ((i) << 8)) + +/** Where to store a vector for a given interrupt. */ +#define USER_INTERRUPT_VECTOR(i) INTERRUPT_VECTOR(i, 0) + +/** The base address of user-level interrupts. */ +#define USER_INTERRUPT_VECTOR_BASE INTERRUPT_VECTOR(0, 0) + + +/** Additional synthetic interrupt. */ +#define INT_BREAKPOINT (63) + +#define INT_MEM_ERROR 0 +#define INT_SINGLE_STEP_3 1 +#define INT_SINGLE_STEP_2 2 +#define INT_SINGLE_STEP_1 3 +#define INT_SINGLE_STEP_0 4 +#define INT_IDN_COMPLETE 5 +#define INT_UDN_COMPLETE 6 +#define INT_ITLB_MISS 7 +#define INT_ILL 8 +#define INT_GPV 9 +#define INT_IDN_ACCESS 10 +#define INT_UDN_ACCESS 11 +#define INT_SWINT_3 12 +#define INT_SWINT_2 13 +#define INT_SWINT_1 14 +#define INT_SWINT_0 15 +#define INT_ILL_TRANS 16 +#define INT_UNALIGN_DATA 17 +#define INT_DTLB_MISS 18 +#define INT_DTLB_ACCESS 19 +#define INT_IDN_FIREWALL 20 +#define INT_UDN_FIREWALL 21 +#define INT_TILE_TIMER 22 +#define INT_AUX_TILE_TIMER 23 +#define INT_IDN_TIMER 24 +#define INT_UDN_TIMER 25 +#define INT_IDN_AVAIL 26 +#define INT_UDN_AVAIL 27 +#define INT_IPI_3 28 +#define INT_IPI_2 29 +#define INT_IPI_1 30 +#define INT_IPI_0 31 +#define INT_PERF_COUNT 32 +#define INT_AUX_PERF_COUNT 33 +#define INT_INTCTRL_3 34 +#define INT_INTCTRL_2 35 +#define INT_INTCTRL_1 36 +#define INT_INTCTRL_0 37 +#define INT_BOOT_ACCESS 38 +#define INT_WORLD_ACCESS 39 +#define INT_I_ASID 40 +#define INT_D_ASID 41 +#define INT_DOUBLE_FAULT 42 + +#define NUM_INTERRUPTS 43 + +#ifndef __ASSEMBLER__ +#define QUEUED_INTERRUPTS ( \ + INT_MASK(INT_MEM_ERROR) | \ + INT_MASK(INT_IDN_COMPLETE) | \ + INT_MASK(INT_UDN_COMPLETE) | \ + INT_MASK(INT_IDN_FIREWALL) | \ + INT_MASK(INT_UDN_FIREWALL) | \ + INT_MASK(INT_TILE_TIMER) | \ + INT_MASK(INT_AUX_TILE_TIMER) | \ + INT_MASK(INT_IDN_TIMER) | \ + INT_MASK(INT_UDN_TIMER) | \ + INT_MASK(INT_IDN_AVAIL) | \ + INT_MASK(INT_UDN_AVAIL) | \ + INT_MASK(INT_IPI_3) | \ + INT_MASK(INT_IPI_2) | \ + INT_MASK(INT_IPI_1) | \ + INT_MASK(INT_IPI_0) | \ + INT_MASK(INT_PERF_COUNT) | \ + INT_MASK(INT_AUX_PERF_COUNT) | \ + INT_MASK(INT_INTCTRL_3) | \ + INT_MASK(INT_INTCTRL_2) | \ + INT_MASK(INT_INTCTRL_1) | \ + INT_MASK(INT_INTCTRL_0) | \ + INT_MASK(INT_BOOT_ACCESS) | \ + INT_MASK(INT_WORLD_ACCESS) | \ + INT_MASK(INT_I_ASID) | \ + INT_MASK(INT_D_ASID) | \ + INT_MASK(INT_DOUBLE_FAULT) | \ + 0) +#define NONQUEUED_INTERRUPTS ( \ + INT_MASK(INT_SINGLE_STEP_3) | \ + INT_MASK(INT_SINGLE_STEP_2) | \ + INT_MASK(INT_SINGLE_STEP_1) | \ + INT_MASK(INT_SINGLE_STEP_0) | \ + INT_MASK(INT_ITLB_MISS) | \ + INT_MASK(INT_ILL) | \ + INT_MASK(INT_GPV) | \ + INT_MASK(INT_IDN_ACCESS) | \ + INT_MASK(INT_UDN_ACCESS) | \ + INT_MASK(INT_SWINT_3) | \ + INT_MASK(INT_SWINT_2) | \ + INT_MASK(INT_SWINT_1) | \ + INT_MASK(INT_SWINT_0) | \ + INT_MASK(INT_ILL_TRANS) | \ + INT_MASK(INT_UNALIGN_DATA) | \ + INT_MASK(INT_DTLB_MISS) | \ + INT_MASK(INT_DTLB_ACCESS) | \ + 0) +#define CRITICAL_MASKED_INTERRUPTS ( \ + INT_MASK(INT_MEM_ERROR) | \ + INT_MASK(INT_SINGLE_STEP_3) | \ + INT_MASK(INT_SINGLE_STEP_2) | \ + INT_MASK(INT_SINGLE_STEP_1) | \ + INT_MASK(INT_SINGLE_STEP_0) | \ + INT_MASK(INT_IDN_COMPLETE) | \ + INT_MASK(INT_UDN_COMPLETE) | \ + INT_MASK(INT_IDN_FIREWALL) | \ + INT_MASK(INT_UDN_FIREWALL) | \ + INT_MASK(INT_TILE_TIMER) | \ + INT_MASK(INT_AUX_TILE_TIMER) | \ + INT_MASK(INT_IDN_TIMER) | \ + INT_MASK(INT_UDN_TIMER) | \ + INT_MASK(INT_IDN_AVAIL) | \ + INT_MASK(INT_UDN_AVAIL) | \ + INT_MASK(INT_IPI_3) | \ + INT_MASK(INT_IPI_2) | \ + INT_MASK(INT_IPI_1) | \ + INT_MASK(INT_IPI_0) | \ + INT_MASK(INT_PERF_COUNT) | \ + INT_MASK(INT_AUX_PERF_COUNT) | \ + INT_MASK(INT_INTCTRL_3) | \ + INT_MASK(INT_INTCTRL_2) | \ + INT_MASK(INT_INTCTRL_1) | \ + INT_MASK(INT_INTCTRL_0) | \ + 0) +#define CRITICAL_UNMASKED_INTERRUPTS ( \ + INT_MASK(INT_ITLB_MISS) | \ + INT_MASK(INT_ILL) | \ + INT_MASK(INT_GPV) | \ + INT_MASK(INT_IDN_ACCESS) | \ + INT_MASK(INT_UDN_ACCESS) | \ + INT_MASK(INT_SWINT_3) | \ + INT_MASK(INT_SWINT_2) | \ + INT_MASK(INT_SWINT_1) | \ + INT_MASK(INT_SWINT_0) | \ + INT_MASK(INT_ILL_TRANS) | \ + INT_MASK(INT_UNALIGN_DATA) | \ + INT_MASK(INT_DTLB_MISS) | \ + INT_MASK(INT_DTLB_ACCESS) | \ + INT_MASK(INT_BOOT_ACCESS) | \ + INT_MASK(INT_WORLD_ACCESS) | \ + INT_MASK(INT_I_ASID) | \ + INT_MASK(INT_D_ASID) | \ + INT_MASK(INT_DOUBLE_FAULT) | \ + 0) +#define MASKABLE_INTERRUPTS ( \ + INT_MASK(INT_MEM_ERROR) | \ + INT_MASK(INT_SINGLE_STEP_3) | \ + INT_MASK(INT_SINGLE_STEP_2) | \ + INT_MASK(INT_SINGLE_STEP_1) | \ + INT_MASK(INT_SINGLE_STEP_0) | \ + INT_MASK(INT_IDN_COMPLETE) | \ + INT_MASK(INT_UDN_COMPLETE) | \ + INT_MASK(INT_IDN_FIREWALL) | \ + INT_MASK(INT_UDN_FIREWALL) | \ + INT_MASK(INT_TILE_TIMER) | \ + INT_MASK(INT_AUX_TILE_TIMER) | \ + INT_MASK(INT_IDN_TIMER) | \ + INT_MASK(INT_UDN_TIMER) | \ + INT_MASK(INT_IDN_AVAIL) | \ + INT_MASK(INT_UDN_AVAIL) | \ + INT_MASK(INT_IPI_3) | \ + INT_MASK(INT_IPI_2) | \ + INT_MASK(INT_IPI_1) | \ + INT_MASK(INT_IPI_0) | \ + INT_MASK(INT_PERF_COUNT) | \ + INT_MASK(INT_AUX_PERF_COUNT) | \ + INT_MASK(INT_INTCTRL_3) | \ + INT_MASK(INT_INTCTRL_2) | \ + INT_MASK(INT_INTCTRL_1) | \ + INT_MASK(INT_INTCTRL_0) | \ + 0) +#define UNMASKABLE_INTERRUPTS ( \ + INT_MASK(INT_ITLB_MISS) | \ + INT_MASK(INT_ILL) | \ + INT_MASK(INT_GPV) | \ + INT_MASK(INT_IDN_ACCESS) | \ + INT_MASK(INT_UDN_ACCESS) | \ + INT_MASK(INT_SWINT_3) | \ + INT_MASK(INT_SWINT_2) | \ + INT_MASK(INT_SWINT_1) | \ + INT_MASK(INT_SWINT_0) | \ + INT_MASK(INT_ILL_TRANS) | \ + INT_MASK(INT_UNALIGN_DATA) | \ + INT_MASK(INT_DTLB_MISS) | \ + INT_MASK(INT_DTLB_ACCESS) | \ + INT_MASK(INT_BOOT_ACCESS) | \ + INT_MASK(INT_WORLD_ACCESS) | \ + INT_MASK(INT_I_ASID) | \ + INT_MASK(INT_D_ASID) | \ + INT_MASK(INT_DOUBLE_FAULT) | \ + 0) +#define SYNC_INTERRUPTS ( \ + INT_MASK(INT_SINGLE_STEP_3) | \ + INT_MASK(INT_SINGLE_STEP_2) | \ + INT_MASK(INT_SINGLE_STEP_1) | \ + INT_MASK(INT_SINGLE_STEP_0) | \ + INT_MASK(INT_IDN_COMPLETE) | \ + INT_MASK(INT_UDN_COMPLETE) | \ + INT_MASK(INT_ITLB_MISS) | \ + INT_MASK(INT_ILL) | \ + INT_MASK(INT_GPV) | \ + INT_MASK(INT_IDN_ACCESS) | \ + INT_MASK(INT_UDN_ACCESS) | \ + INT_MASK(INT_SWINT_3) | \ + INT_MASK(INT_SWINT_2) | \ + INT_MASK(INT_SWINT_1) | \ + INT_MASK(INT_SWINT_0) | \ + INT_MASK(INT_ILL_TRANS) | \ + INT_MASK(INT_UNALIGN_DATA) | \ + INT_MASK(INT_DTLB_MISS) | \ + INT_MASK(INT_DTLB_ACCESS) | \ + 0) +#define NON_SYNC_INTERRUPTS ( \ + INT_MASK(INT_MEM_ERROR) | \ + INT_MASK(INT_IDN_FIREWALL) | \ + INT_MASK(INT_UDN_FIREWALL) | \ + INT_MASK(INT_TILE_TIMER) | \ + INT_MASK(INT_AUX_TILE_TIMER) | \ + INT_MASK(INT_IDN_TIMER) | \ + INT_MASK(INT_UDN_TIMER) | \ + INT_MASK(INT_IDN_AVAIL) | \ + INT_MASK(INT_UDN_AVAIL) | \ + INT_MASK(INT_IPI_3) | \ + INT_MASK(INT_IPI_2) | \ + INT_MASK(INT_IPI_1) | \ + INT_MASK(INT_IPI_0) | \ + INT_MASK(INT_PERF_COUNT) | \ + INT_MASK(INT_AUX_PERF_COUNT) | \ + INT_MASK(INT_INTCTRL_3) | \ + INT_MASK(INT_INTCTRL_2) | \ + INT_MASK(INT_INTCTRL_1) | \ + INT_MASK(INT_INTCTRL_0) | \ + INT_MASK(INT_BOOT_ACCESS) | \ + INT_MASK(INT_WORLD_ACCESS) | \ + INT_MASK(INT_I_ASID) | \ + INT_MASK(INT_D_ASID) | \ + INT_MASK(INT_DOUBLE_FAULT) | \ + 0) +#endif /* !__ASSEMBLER__ */ +#endif /* !__ARCH_INTERRUPTS_H__ */ diff --git a/arch/tile/include/arch/opcode.h b/arch/tile/include/arch/opcode.h new file mode 100644 index 00000000..92d15229 --- /dev/null +++ b/arch/tile/include/arch/opcode.h @@ -0,0 +1,21 @@ +/* + * Copyright 2011 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#if defined(__tilepro__) +#include <arch/opcode_tilepro.h> +#elif defined(__tilegx__) +#include <arch/opcode_tilegx.h> +#else +#error Unexpected Tilera chip type +#endif diff --git a/arch/tile/include/arch/opcode_tilegx.h b/arch/tile/include/arch/opcode_tilegx.h new file mode 100644 index 00000000..c14d02c8 --- /dev/null +++ b/arch/tile/include/arch/opcode_tilegx.h @@ -0,0 +1,1405 @@ +/* TILE-Gx opcode information. + * + * Copyright 2011 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * + * + * + * + */ + +#ifndef __ARCH_OPCODE_H__ +#define __ARCH_OPCODE_H__ + +#ifndef __ASSEMBLER__ + +typedef unsigned long long tilegx_bundle_bits; + +/* These are the bits that determine if a bundle is in the X encoding. */ +#define TILEGX_BUNDLE_MODE_MASK ((tilegx_bundle_bits)3 << 62) + +enum +{ + /* Maximum number of instructions in a bundle (2 for X, 3 for Y). */ + TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE = 3, + + /* How many different pipeline encodings are there? X0, X1, Y0, Y1, Y2. */ + TILEGX_NUM_PIPELINE_ENCODINGS = 5, + + /* Log base 2 of TILEGX_BUNDLE_SIZE_IN_BYTES. */ + TILEGX_LOG2_BUNDLE_SIZE_IN_BYTES = 3, + + /* Instructions take this many bytes. */ + TILEGX_BUNDLE_SIZE_IN_BYTES = 1 << TILEGX_LOG2_BUNDLE_SIZE_IN_BYTES, + + /* Log base 2 of TILEGX_BUNDLE_ALIGNMENT_IN_BYTES. */ + TILEGX_LOG2_BUNDLE_ALIGNMENT_IN_BYTES = 3, + + /* Bundles should be aligned modulo this number of bytes. */ + TILEGX_BUNDLE_ALIGNMENT_IN_BYTES = + (1 << TILEGX_LOG2_BUNDLE_ALIGNMENT_IN_BYTES), + + /* Number of registers (some are magic, such as network I/O). */ + TILEGX_NUM_REGISTERS = 64, +}; + +/* Make a few "tile_" variables to simplify common code between + architectures. */ + +typedef tilegx_bundle_bits tile_bundle_bits; +#define TILE_BUNDLE_SIZE_IN_BYTES TILEGX_BUNDLE_SIZE_IN_BYTES +#define TILE_BUNDLE_ALIGNMENT_IN_BYTES TILEGX_BUNDLE_ALIGNMENT_IN_BYTES +#define TILE_LOG2_BUNDLE_ALIGNMENT_IN_BYTES \ + TILEGX_LOG2_BUNDLE_ALIGNMENT_IN_BYTES + +/* 64-bit pattern for a { bpt ; nop } bundle. */ +#define TILEGX_BPT_BUNDLE 0x286a44ae51485000ULL + +static __inline unsigned int +get_BFEnd_X0(tilegx_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 12)) & 0x3f); +} + +static __inline unsigned int +get_BFOpcodeExtension_X0(tilegx_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 24)) & 0xf); +} + +static __inline unsigned int +get_BFStart_X0(tilegx_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 18)) & 0x3f); +} + +static __inline unsigned int +get_BrOff_X1(tilegx_bundle_bits n) +{ + return (((unsigned int)(n >> 31)) & 0x0000003f) | + (((unsigned int)(n >> 37)) & 0x0001ffc0); +} + +static __inline unsigned int +get_BrType_X1(tilegx_bundle_bits n) +{ + return (((unsigned int)(n >> 54)) & 0x1f); +} + +static __inline unsigned int +get_Dest_Imm8_X1(tilegx_bundle_bits n) +{ + return (((unsigned int)(n >> 31)) & 0x0000003f) | + (((unsigned int)(n >> 43)) & 0x000000c0); +} + +static __inline unsigned int +get_Dest_X0(tilegx_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 0)) & 0x3f); +} + +static __inline unsigned int +get_Dest_X1(tilegx_bundle_bits n) +{ + return (((unsigned int)(n >> 31)) & 0x3f); +} + +static __inline unsigned int +get_Dest_Y0(tilegx_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 0)) & 0x3f); +} + +static __inline unsigned int +get_Dest_Y1(tilegx_bundle_bits n) +{ + return (((unsigned int)(n >> 31)) & 0x3f); +} + +static __inline unsigned int +get_Imm16_X0(tilegx_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 12)) & 0xffff); +} + +static __inline unsigned int +get_Imm16_X1(tilegx_bundle_bits n) +{ + return (((unsigned int)(n >> 43)) & 0xffff); +} + +static __inline unsigned int +get_Imm8OpcodeExtension_X0(tilegx_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 20)) & 0xff); +} + +static __inline unsigned int +get_Imm8OpcodeExtension_X1(tilegx_bundle_bits n) +{ + return (((unsigned int)(n >> 51)) & 0xff); +} + +static __inline unsigned int +get_Imm8_X0(tilegx_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 12)) & 0xff); +} + +static __inline unsigned int +get_Imm8_X1(tilegx_bundle_bits n) +{ + return (((unsigned int)(n >> 43)) & 0xff); +} + +static __inline unsigned int +get_Imm8_Y0(tilegx_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 12)) & 0xff); +} + +static __inline unsigned int +get_Imm8_Y1(tilegx_bundle_bits n) +{ + return (((unsigned int)(n >> 43)) & 0xff); +} + +static __inline unsigned int +get_JumpOff_X1(tilegx_bundle_bits n) +{ + return (((unsigned int)(n >> 31)) & 0x7ffffff); +} + +static __inline unsigned int +get_JumpOpcodeExtension_X1(tilegx_bundle_bits n) +{ + return (((unsigned int)(n >> 58)) & 0x1); +} + +static __inline unsigned int +get_MF_Imm14_X1(tilegx_bundle_bits n) +{ + return (((unsigned int)(n >> 37)) & 0x3fff); +} + +static __inline unsigned int +get_MT_Imm14_X1(tilegx_bundle_bits n) +{ + return (((unsigned int)(n >> 31)) & 0x0000003f) | + (((unsigned int)(n >> 37)) & 0x00003fc0); +} + +static __inline unsigned int +get_Mode(tilegx_bundle_bits n) +{ + return (((unsigned int)(n >> 62)) & 0x3); +} + +static __inline unsigned int +get_Opcode_X0(tilegx_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 28)) & 0x7); +} + +static __inline unsigned int +get_Opcode_X1(tilegx_bundle_bits n) +{ + return (((unsigned int)(n >> 59)) & 0x7); +} + +static __inline unsigned int +get_Opcode_Y0(tilegx_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 27)) & 0xf); +} + +static __inline unsigned int +get_Opcode_Y1(tilegx_bundle_bits n) +{ + return (((unsigned int)(n >> 58)) & 0xf); +} + +static __inline unsigned int +get_Opcode_Y2(tilegx_bundle_bits n) +{ + return (((n >> 26)) & 0x00000001) | + (((unsigned int)(n >> 56)) & 0x00000002); +} + +static __inline unsigned int +get_RRROpcodeExtension_X0(tilegx_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 18)) & 0x3ff); +} + +static __inline unsigned int +get_RRROpcodeExtension_X1(tilegx_bundle_bits n) +{ + return (((unsigned int)(n >> 49)) & 0x3ff); +} + +static __inline unsigned int +get_RRROpcodeExtension_Y0(tilegx_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 18)) & 0x3); +} + +static __inline unsigned int +get_RRROpcodeExtension_Y1(tilegx_bundle_bits n) +{ + return (((unsigned int)(n >> 49)) & 0x3); +} + +static __inline unsigned int +get_ShAmt_X0(tilegx_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 12)) & 0x3f); +} + +static __inline unsigned int +get_ShAmt_X1(tilegx_bundle_bits n) +{ + return (((unsigned int)(n >> 43)) & 0x3f); +} + +static __inline unsigned int +get_ShAmt_Y0(tilegx_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 12)) & 0x3f); +} + +static __inline unsigned int +get_ShAmt_Y1(tilegx_bundle_bits n) +{ + return (((unsigned int)(n >> 43)) & 0x3f); +} + +static __inline unsigned int +get_ShiftOpcodeExtension_X0(tilegx_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 18)) & 0x3ff); +} + +static __inline unsigned int +get_ShiftOpcodeExtension_X1(tilegx_bundle_bits n) +{ + return (((unsigned int)(n >> 49)) & 0x3ff); +} + +static __inline unsigned int +get_ShiftOpcodeExtension_Y0(tilegx_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 18)) & 0x3); +} + +static __inline unsigned int +get_ShiftOpcodeExtension_Y1(tilegx_bundle_bits n) +{ + return (((unsigned int)(n >> 49)) & 0x3); +} + +static __inline unsigned int +get_SrcA_X0(tilegx_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 6)) & 0x3f); +} + +static __inline unsigned int +get_SrcA_X1(tilegx_bundle_bits n) +{ + return (((unsigned int)(n >> 37)) & 0x3f); +} + +static __inline unsigned int +get_SrcA_Y0(tilegx_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 6)) & 0x3f); +} + +static __inline unsigned int +get_SrcA_Y1(tilegx_bundle_bits n) +{ + return (((unsigned int)(n >> 37)) & 0x3f); +} + +static __inline unsigned int +get_SrcA_Y2(tilegx_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 20)) & 0x3f); +} + +static __inline unsigned int +get_SrcBDest_Y2(tilegx_bundle_bits n) +{ + return (((unsigned int)(n >> 51)) & 0x3f); +} + +static __inline unsigned int +get_SrcB_X0(tilegx_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 12)) & 0x3f); +} + +static __inline unsigned int +get_SrcB_X1(tilegx_bundle_bits n) +{ + return (((unsigned int)(n >> 43)) & 0x3f); +} + +static __inline unsigned int +get_SrcB_Y0(tilegx_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 12)) & 0x3f); +} + +static __inline unsigned int +get_SrcB_Y1(tilegx_bundle_bits n) +{ + return (((unsigned int)(n >> 43)) & 0x3f); +} + +static __inline unsigned int +get_UnaryOpcodeExtension_X0(tilegx_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 12)) & 0x3f); +} + +static __inline unsigned int +get_UnaryOpcodeExtension_X1(tilegx_bundle_bits n) +{ + return (((unsigned int)(n >> 43)) & 0x3f); +} + +static __inline unsigned int +get_UnaryOpcodeExtension_Y0(tilegx_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 12)) & 0x3f); +} + +static __inline unsigned int +get_UnaryOpcodeExtension_Y1(tilegx_bundle_bits n) +{ + return (((unsigned int)(n >> 43)) & 0x3f); +} + + +static __inline int +sign_extend(int n, int num_bits) +{ + int shift = (int)(sizeof(int) * 8 - num_bits); + return (n << shift) >> shift; +} + + + +static __inline tilegx_bundle_bits +create_BFEnd_X0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x3f) << 12); +} + +static __inline tilegx_bundle_bits +create_BFOpcodeExtension_X0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0xf) << 24); +} + +static __inline tilegx_bundle_bits +create_BFStart_X0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x3f) << 18); +} + +static __inline tilegx_bundle_bits +create_BrOff_X1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilegx_bundle_bits)(n & 0x0000003f)) << 31) | + (((tilegx_bundle_bits)(n & 0x0001ffc0)) << 37); +} + +static __inline tilegx_bundle_bits +create_BrType_X1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilegx_bundle_bits)(n & 0x1f)) << 54); +} + +static __inline tilegx_bundle_bits +create_Dest_Imm8_X1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilegx_bundle_bits)(n & 0x0000003f)) << 31) | + (((tilegx_bundle_bits)(n & 0x000000c0)) << 43); +} + +static __inline tilegx_bundle_bits +create_Dest_X0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x3f) << 0); +} + +static __inline tilegx_bundle_bits +create_Dest_X1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilegx_bundle_bits)(n & 0x3f)) << 31); +} + +static __inline tilegx_bundle_bits +create_Dest_Y0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x3f) << 0); +} + +static __inline tilegx_bundle_bits +create_Dest_Y1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilegx_bundle_bits)(n & 0x3f)) << 31); +} + +static __inline tilegx_bundle_bits +create_Imm16_X0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0xffff) << 12); +} + +static __inline tilegx_bundle_bits +create_Imm16_X1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilegx_bundle_bits)(n & 0xffff)) << 43); +} + +static __inline tilegx_bundle_bits +create_Imm8OpcodeExtension_X0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0xff) << 20); +} + +static __inline tilegx_bundle_bits +create_Imm8OpcodeExtension_X1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilegx_bundle_bits)(n & 0xff)) << 51); +} + +static __inline tilegx_bundle_bits +create_Imm8_X0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0xff) << 12); +} + +static __inline tilegx_bundle_bits +create_Imm8_X1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilegx_bundle_bits)(n & 0xff)) << 43); +} + +static __inline tilegx_bundle_bits +create_Imm8_Y0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0xff) << 12); +} + +static __inline tilegx_bundle_bits +create_Imm8_Y1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilegx_bundle_bits)(n & 0xff)) << 43); +} + +static __inline tilegx_bundle_bits +create_JumpOff_X1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilegx_bundle_bits)(n & 0x7ffffff)) << 31); +} + +static __inline tilegx_bundle_bits +create_JumpOpcodeExtension_X1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilegx_bundle_bits)(n & 0x1)) << 58); +} + +static __inline tilegx_bundle_bits +create_MF_Imm14_X1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilegx_bundle_bits)(n & 0x3fff)) << 37); +} + +static __inline tilegx_bundle_bits +create_MT_Imm14_X1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilegx_bundle_bits)(n & 0x0000003f)) << 31) | + (((tilegx_bundle_bits)(n & 0x00003fc0)) << 37); +} + +static __inline tilegx_bundle_bits +create_Mode(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilegx_bundle_bits)(n & 0x3)) << 62); +} + +static __inline tilegx_bundle_bits +create_Opcode_X0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x7) << 28); +} + +static __inline tilegx_bundle_bits +create_Opcode_X1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilegx_bundle_bits)(n & 0x7)) << 59); +} + +static __inline tilegx_bundle_bits +create_Opcode_Y0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0xf) << 27); +} + +static __inline tilegx_bundle_bits +create_Opcode_Y1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilegx_bundle_bits)(n & 0xf)) << 58); +} + +static __inline tilegx_bundle_bits +create_Opcode_Y2(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x00000001) << 26) | + (((tilegx_bundle_bits)(n & 0x00000002)) << 56); +} + +static __inline tilegx_bundle_bits +create_RRROpcodeExtension_X0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x3ff) << 18); +} + +static __inline tilegx_bundle_bits +create_RRROpcodeExtension_X1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilegx_bundle_bits)(n & 0x3ff)) << 49); +} + +static __inline tilegx_bundle_bits +create_RRROpcodeExtension_Y0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x3) << 18); +} + +static __inline tilegx_bundle_bits +create_RRROpcodeExtension_Y1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilegx_bundle_bits)(n & 0x3)) << 49); +} + +static __inline tilegx_bundle_bits +create_ShAmt_X0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x3f) << 12); +} + +static __inline tilegx_bundle_bits +create_ShAmt_X1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilegx_bundle_bits)(n & 0x3f)) << 43); +} + +static __inline tilegx_bundle_bits +create_ShAmt_Y0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x3f) << 12); +} + +static __inline tilegx_bundle_bits +create_ShAmt_Y1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilegx_bundle_bits)(n & 0x3f)) << 43); +} + +static __inline tilegx_bundle_bits +create_ShiftOpcodeExtension_X0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x3ff) << 18); +} + +static __inline tilegx_bundle_bits +create_ShiftOpcodeExtension_X1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilegx_bundle_bits)(n & 0x3ff)) << 49); +} + +static __inline tilegx_bundle_bits +create_ShiftOpcodeExtension_Y0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x3) << 18); +} + +static __inline tilegx_bundle_bits +create_ShiftOpcodeExtension_Y1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilegx_bundle_bits)(n & 0x3)) << 49); +} + +static __inline tilegx_bundle_bits +create_SrcA_X0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x3f) << 6); +} + +static __inline tilegx_bundle_bits +create_SrcA_X1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilegx_bundle_bits)(n & 0x3f)) << 37); +} + +static __inline tilegx_bundle_bits +create_SrcA_Y0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x3f) << 6); +} + +static __inline tilegx_bundle_bits +create_SrcA_Y1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilegx_bundle_bits)(n & 0x3f)) << 37); +} + +static __inline tilegx_bundle_bits +create_SrcA_Y2(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x3f) << 20); +} + +static __inline tilegx_bundle_bits +create_SrcBDest_Y2(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilegx_bundle_bits)(n & 0x3f)) << 51); +} + +static __inline tilegx_bundle_bits +create_SrcB_X0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x3f) << 12); +} + +static __inline tilegx_bundle_bits +create_SrcB_X1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilegx_bundle_bits)(n & 0x3f)) << 43); +} + +static __inline tilegx_bundle_bits +create_SrcB_Y0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x3f) << 12); +} + +static __inline tilegx_bundle_bits +create_SrcB_Y1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilegx_bundle_bits)(n & 0x3f)) << 43); +} + +static __inline tilegx_bundle_bits +create_UnaryOpcodeExtension_X0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x3f) << 12); +} + +static __inline tilegx_bundle_bits +create_UnaryOpcodeExtension_X1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilegx_bundle_bits)(n & 0x3f)) << 43); +} + +static __inline tilegx_bundle_bits +create_UnaryOpcodeExtension_Y0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x3f) << 12); +} + +static __inline tilegx_bundle_bits +create_UnaryOpcodeExtension_Y1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilegx_bundle_bits)(n & 0x3f)) << 43); +} + + +enum +{ + ADDI_IMM8_OPCODE_X0 = 1, + ADDI_IMM8_OPCODE_X1 = 1, + ADDI_OPCODE_Y0 = 0, + ADDI_OPCODE_Y1 = 1, + ADDLI_OPCODE_X0 = 1, + ADDLI_OPCODE_X1 = 0, + ADDXI_IMM8_OPCODE_X0 = 2, + ADDXI_IMM8_OPCODE_X1 = 2, + ADDXI_OPCODE_Y0 = 1, + ADDXI_OPCODE_Y1 = 2, + ADDXLI_OPCODE_X0 = 2, + ADDXLI_OPCODE_X1 = 1, + ADDXSC_RRR_0_OPCODE_X0 = 1, + ADDXSC_RRR_0_OPCODE_X1 = 1, + ADDX_RRR_0_OPCODE_X0 = 2, + ADDX_RRR_0_OPCODE_X1 = 2, + ADDX_RRR_0_OPCODE_Y0 = 0, + ADDX_SPECIAL_0_OPCODE_Y1 = 0, + ADD_RRR_0_OPCODE_X0 = 3, + ADD_RRR_0_OPCODE_X1 = 3, + ADD_RRR_0_OPCODE_Y0 = 1, + ADD_SPECIAL_0_OPCODE_Y1 = 1, + ANDI_IMM8_OPCODE_X0 = 3, + ANDI_IMM8_OPCODE_X1 = 3, + ANDI_OPCODE_Y0 = 2, + ANDI_OPCODE_Y1 = 3, + AND_RRR_0_OPCODE_X0 = 4, + AND_RRR_0_OPCODE_X1 = 4, + AND_RRR_5_OPCODE_Y0 = 0, + AND_RRR_5_OPCODE_Y1 = 0, + BEQZT_BRANCH_OPCODE_X1 = 16, + BEQZ_BRANCH_OPCODE_X1 = 17, + BFEXTS_BF_OPCODE_X0 = 4, + BFEXTU_BF_OPCODE_X0 = 5, + BFINS_BF_OPCODE_X0 = 6, + BF_OPCODE_X0 = 3, + BGEZT_BRANCH_OPCODE_X1 = 18, + BGEZ_BRANCH_OPCODE_X1 = 19, + BGTZT_BRANCH_OPCODE_X1 = 20, + BGTZ_BRANCH_OPCODE_X1 = 21, + BLBCT_BRANCH_OPCODE_X1 = 22, + BLBC_BRANCH_OPCODE_X1 = 23, + BLBST_BRANCH_OPCODE_X1 = 24, + BLBS_BRANCH_OPCODE_X1 = 25, + BLEZT_BRANCH_OPCODE_X1 = 26, + BLEZ_BRANCH_OPCODE_X1 = 27, + BLTZT_BRANCH_OPCODE_X1 = 28, + BLTZ_BRANCH_OPCODE_X1 = 29, + BNEZT_BRANCH_OPCODE_X1 = 30, + BNEZ_BRANCH_OPCODE_X1 = 31, + BRANCH_OPCODE_X1 = 2, + CMOVEQZ_RRR_0_OPCODE_X0 = 5, + CMOVEQZ_RRR_4_OPCODE_Y0 = 0, + CMOVNEZ_RRR_0_OPCODE_X0 = 6, + CMOVNEZ_RRR_4_OPCODE_Y0 = 1, + CMPEQI_IMM8_OPCODE_X0 = 4, + CMPEQI_IMM8_OPCODE_X1 = 4, + CMPEQI_OPCODE_Y0 = 3, + CMPEQI_OPCODE_Y1 = 4, + CMPEQ_RRR_0_OPCODE_X0 = 7, + CMPEQ_RRR_0_OPCODE_X1 = 5, + CMPEQ_RRR_3_OPCODE_Y0 = 0, + CMPEQ_RRR_3_OPCODE_Y1 = 2, + CMPEXCH4_RRR_0_OPCODE_X1 = 6, + CMPEXCH_RRR_0_OPCODE_X1 = 7, + CMPLES_RRR_0_OPCODE_X0 = 8, + CMPLES_RRR_0_OPCODE_X1 = 8, + CMPLES_RRR_2_OPCODE_Y0 = 0, + CMPLES_RRR_2_OPCODE_Y1 = 0, + CMPLEU_RRR_0_OPCODE_X0 = 9, + CMPLEU_RRR_0_OPCODE_X1 = 9, + CMPLEU_RRR_2_OPCODE_Y0 = 1, + CMPLEU_RRR_2_OPCODE_Y1 = 1, + CMPLTSI_IMM8_OPCODE_X0 = 5, + CMPLTSI_IMM8_OPCODE_X1 = 5, + CMPLTSI_OPCODE_Y0 = 4, + CMPLTSI_OPCODE_Y1 = 5, + CMPLTS_RRR_0_OPCODE_X0 = 10, + CMPLTS_RRR_0_OPCODE_X1 = 10, + CMPLTS_RRR_2_OPCODE_Y0 = 2, + CMPLTS_RRR_2_OPCODE_Y1 = 2, + CMPLTUI_IMM8_OPCODE_X0 = 6, + CMPLTUI_IMM8_OPCODE_X1 = 6, + CMPLTU_RRR_0_OPCODE_X0 = 11, + CMPLTU_RRR_0_OPCODE_X1 = 11, + CMPLTU_RRR_2_OPCODE_Y0 = 3, + CMPLTU_RRR_2_OPCODE_Y1 = 3, + CMPNE_RRR_0_OPCODE_X0 = 12, + CMPNE_RRR_0_OPCODE_X1 = 12, + CMPNE_RRR_3_OPCODE_Y0 = 1, + CMPNE_RRR_3_OPCODE_Y1 = 3, + CMULAF_RRR_0_OPCODE_X0 = 13, + CMULA_RRR_0_OPCODE_X0 = 14, + CMULFR_RRR_0_OPCODE_X0 = 15, + CMULF_RRR_0_OPCODE_X0 = 16, + CMULHR_RRR_0_OPCODE_X0 = 17, + CMULH_RRR_0_OPCODE_X0 = 18, + CMUL_RRR_0_OPCODE_X0 = 19, + CNTLZ_UNARY_OPCODE_X0 = 1, + CNTLZ_UNARY_OPCODE_Y0 = 1, + CNTTZ_UNARY_OPCODE_X0 = 2, + CNTTZ_UNARY_OPCODE_Y0 = 2, + CRC32_32_RRR_0_OPCODE_X0 = 20, + CRC32_8_RRR_0_OPCODE_X0 = 21, + DBLALIGN2_RRR_0_OPCODE_X0 = 22, + DBLALIGN2_RRR_0_OPCODE_X1 = 13, + DBLALIGN4_RRR_0_OPCODE_X0 = 23, + DBLALIGN4_RRR_0_OPCODE_X1 = 14, + DBLALIGN6_RRR_0_OPCODE_X0 = 24, + DBLALIGN6_RRR_0_OPCODE_X1 = 15, + DBLALIGN_RRR_0_OPCODE_X0 = 25, + DRAIN_UNARY_OPCODE_X1 = 1, + DTLBPR_UNARY_OPCODE_X1 = 2, + EXCH4_RRR_0_OPCODE_X1 = 16, + EXCH_RRR_0_OPCODE_X1 = 17, + FDOUBLE_ADDSUB_RRR_0_OPCODE_X0 = 26, + FDOUBLE_ADD_FLAGS_RRR_0_OPCODE_X0 = 27, + FDOUBLE_MUL_FLAGS_RRR_0_OPCODE_X0 = 28, + FDOUBLE_PACK1_RRR_0_OPCODE_X0 = 29, + FDOUBLE_PACK2_RRR_0_OPCODE_X0 = 30, + FDOUBLE_SUB_FLAGS_RRR_0_OPCODE_X0 = 31, + FDOUBLE_UNPACK_MAX_RRR_0_OPCODE_X0 = 32, + FDOUBLE_UNPACK_MIN_RRR_0_OPCODE_X0 = 33, + FETCHADD4_RRR_0_OPCODE_X1 = 18, + FETCHADDGEZ4_RRR_0_OPCODE_X1 = 19, + FETCHADDGEZ_RRR_0_OPCODE_X1 = 20, + FETCHADD_RRR_0_OPCODE_X1 = 21, + FETCHAND4_RRR_0_OPCODE_X1 = 22, + FETCHAND_RRR_0_OPCODE_X1 = 23, + FETCHOR4_RRR_0_OPCODE_X1 = 24, + FETCHOR_RRR_0_OPCODE_X1 = 25, + FINV_UNARY_OPCODE_X1 = 3, + FLUSHWB_UNARY_OPCODE_X1 = 4, + FLUSH_UNARY_OPCODE_X1 = 5, + FNOP_UNARY_OPCODE_X0 = 3, + FNOP_UNARY_OPCODE_X1 = 6, + FNOP_UNARY_OPCODE_Y0 = 3, + FNOP_UNARY_OPCODE_Y1 = 8, + FSINGLE_ADD1_RRR_0_OPCODE_X0 = 34, + FSINGLE_ADDSUB2_RRR_0_OPCODE_X0 = 35, + FSINGLE_MUL1_RRR_0_OPCODE_X0 = 36, + FSINGLE_MUL2_RRR_0_OPCODE_X0 = 37, + FSINGLE_PACK1_UNARY_OPCODE_X0 = 4, + FSINGLE_PACK1_UNARY_OPCODE_Y0 = 4, + FSINGLE_PACK2_RRR_0_OPCODE_X0 = 38, + FSINGLE_SUB1_RRR_0_OPCODE_X0 = 39, + ICOH_UNARY_OPCODE_X1 = 7, + ILL_UNARY_OPCODE_X1 = 8, + ILL_UNARY_OPCODE_Y1 = 9, + IMM8_OPCODE_X0 = 4, + IMM8_OPCODE_X1 = 3, + INV_UNARY_OPCODE_X1 = 9, + IRET_UNARY_OPCODE_X1 = 10, + JALRP_UNARY_OPCODE_X1 = 11, + JALRP_UNARY_OPCODE_Y1 = 10, + JALR_UNARY_OPCODE_X1 = 12, + JALR_UNARY_OPCODE_Y1 = 11, + JAL_JUMP_OPCODE_X1 = 0, + JRP_UNARY_OPCODE_X1 = 13, + JRP_UNARY_OPCODE_Y1 = 12, + JR_UNARY_OPCODE_X1 = 14, + JR_UNARY_OPCODE_Y1 = 13, + JUMP_OPCODE_X1 = 4, + J_JUMP_OPCODE_X1 = 1, + LD1S_ADD_IMM8_OPCODE_X1 = 7, + LD1S_OPCODE_Y2 = 0, + LD1S_UNARY_OPCODE_X1 = 15, + LD1U_ADD_IMM8_OPCODE_X1 = 8, + LD1U_OPCODE_Y2 = 1, + LD1U_UNARY_OPCODE_X1 = 16, + LD2S_ADD_IMM8_OPCODE_X1 = 9, + LD2S_OPCODE_Y2 = 2, + LD2S_UNARY_OPCODE_X1 = 17, + LD2U_ADD_IMM8_OPCODE_X1 = 10, + LD2U_OPCODE_Y2 = 3, + LD2U_UNARY_OPCODE_X1 = 18, + LD4S_ADD_IMM8_OPCODE_X1 = 11, + LD4S_OPCODE_Y2 = 1, + LD4S_UNARY_OPCODE_X1 = 19, + LD4U_ADD_IMM8_OPCODE_X1 = 12, + LD4U_OPCODE_Y2 = 2, + LD4U_UNARY_OPCODE_X1 = 20, + LDNA_UNARY_OPCODE_X1 = 21, + LDNT1S_ADD_IMM8_OPCODE_X1 = 13, + LDNT1S_UNARY_OPCODE_X1 = 22, + LDNT1U_ADD_IMM8_OPCODE_X1 = 14, + LDNT1U_UNARY_OPCODE_X1 = 23, + LDNT2S_ADD_IMM8_OPCODE_X1 = 15, + LDNT2S_UNARY_OPCODE_X1 = 24, + LDNT2U_ADD_IMM8_OPCODE_X1 = 16, + LDNT2U_UNARY_OPCODE_X1 = 25, + LDNT4S_ADD_IMM8_OPCODE_X1 = 17, + LDNT4S_UNARY_OPCODE_X1 = 26, + LDNT4U_ADD_IMM8_OPCODE_X1 = 18, + LDNT4U_UNARY_OPCODE_X1 = 27, + LDNT_ADD_IMM8_OPCODE_X1 = 19, + LDNT_UNARY_OPCODE_X1 = 28, + LD_ADD_IMM8_OPCODE_X1 = 20, + LD_OPCODE_Y2 = 3, + LD_UNARY_OPCODE_X1 = 29, + LNK_UNARY_OPCODE_X1 = 30, + LNK_UNARY_OPCODE_Y1 = 14, + LWNA_ADD_IMM8_OPCODE_X1 = 21, + MFSPR_IMM8_OPCODE_X1 = 22, + MF_UNARY_OPCODE_X1 = 31, + MM_BF_OPCODE_X0 = 7, + MNZ_RRR_0_OPCODE_X0 = 40, + MNZ_RRR_0_OPCODE_X1 = 26, + MNZ_RRR_4_OPCODE_Y0 = 2, + MNZ_RRR_4_OPCODE_Y1 = 2, + MODE_OPCODE_YA2 = 1, + MODE_OPCODE_YB2 = 2, + MODE_OPCODE_YC2 = 3, + MTSPR_IMM8_OPCODE_X1 = 23, + MULAX_RRR_0_OPCODE_X0 = 41, + MULAX_RRR_3_OPCODE_Y0 = 2, + MULA_HS_HS_RRR_0_OPCODE_X0 = 42, + MULA_HS_HS_RRR_9_OPCODE_Y0 = 0, + MULA_HS_HU_RRR_0_OPCODE_X0 = 43, + MULA_HS_LS_RRR_0_OPCODE_X0 = 44, + MULA_HS_LU_RRR_0_OPCODE_X0 = 45, + MULA_HU_HU_RRR_0_OPCODE_X0 = 46, + MULA_HU_HU_RRR_9_OPCODE_Y0 = 1, + MULA_HU_LS_RRR_0_OPCODE_X0 = 47, + MULA_HU_LU_RRR_0_OPCODE_X0 = 48, + MULA_LS_LS_RRR_0_OPCODE_X0 = 49, + MULA_LS_LS_RRR_9_OPCODE_Y0 = 2, + MULA_LS_LU_RRR_0_OPCODE_X0 = 50, + MULA_LU_LU_RRR_0_OPCODE_X0 = 51, + MULA_LU_LU_RRR_9_OPCODE_Y0 = 3, + MULX_RRR_0_OPCODE_X0 = 52, + MULX_RRR_3_OPCODE_Y0 = 3, + MUL_HS_HS_RRR_0_OPCODE_X0 = 53, + MUL_HS_HS_RRR_8_OPCODE_Y0 = 0, + MUL_HS_HU_RRR_0_OPCODE_X0 = 54, + MUL_HS_LS_RRR_0_OPCODE_X0 = 55, + MUL_HS_LU_RRR_0_OPCODE_X0 = 56, + MUL_HU_HU_RRR_0_OPCODE_X0 = 57, + MUL_HU_HU_RRR_8_OPCODE_Y0 = 1, + MUL_HU_LS_RRR_0_OPCODE_X0 = 58, + MUL_HU_LU_RRR_0_OPCODE_X0 = 59, + MUL_LS_LS_RRR_0_OPCODE_X0 = 60, + MUL_LS_LS_RRR_8_OPCODE_Y0 = 2, + MUL_LS_LU_RRR_0_OPCODE_X0 = 61, + MUL_LU_LU_RRR_0_OPCODE_X0 = 62, + MUL_LU_LU_RRR_8_OPCODE_Y0 = 3, + MZ_RRR_0_OPCODE_X0 = 63, + MZ_RRR_0_OPCODE_X1 = 27, + MZ_RRR_4_OPCODE_Y0 = 3, + MZ_RRR_4_OPCODE_Y1 = 3, + NAP_UNARY_OPCODE_X1 = 32, + NOP_UNARY_OPCODE_X0 = 5, + NOP_UNARY_OPCODE_X1 = 33, + NOP_UNARY_OPCODE_Y0 = 5, + NOP_UNARY_OPCODE_Y1 = 15, + NOR_RRR_0_OPCODE_X0 = 64, + NOR_RRR_0_OPCODE_X1 = 28, + NOR_RRR_5_OPCODE_Y0 = 1, + NOR_RRR_5_OPCODE_Y1 = 1, + ORI_IMM8_OPCODE_X0 = 7, + ORI_IMM8_OPCODE_X1 = 24, + OR_RRR_0_OPCODE_X0 = 65, + OR_RRR_0_OPCODE_X1 = 29, + OR_RRR_5_OPCODE_Y0 = 2, + OR_RRR_5_OPCODE_Y1 = 2, + PCNT_UNARY_OPCODE_X0 = 6, + PCNT_UNARY_OPCODE_Y0 = 6, + REVBITS_UNARY_OPCODE_X0 = 7, + REVBITS_UNARY_OPCODE_Y0 = 7, + REVBYTES_UNARY_OPCODE_X0 = 8, + REVBYTES_UNARY_OPCODE_Y0 = 8, + ROTLI_SHIFT_OPCODE_X0 = 1, + ROTLI_SHIFT_OPCODE_X1 = 1, + ROTLI_SHIFT_OPCODE_Y0 = 0, + ROTLI_SHIFT_OPCODE_Y1 = 0, + ROTL_RRR_0_OPCODE_X0 = 66, + ROTL_RRR_0_OPCODE_X1 = 30, + ROTL_RRR_6_OPCODE_Y0 = 0, + ROTL_RRR_6_OPCODE_Y1 = 0, + RRR_0_OPCODE_X0 = 5, + RRR_0_OPCODE_X1 = 5, + RRR_0_OPCODE_Y0 = 5, + RRR_0_OPCODE_Y1 = 6, + RRR_1_OPCODE_Y0 = 6, + RRR_1_OPCODE_Y1 = 7, + RRR_2_OPCODE_Y0 = 7, + RRR_2_OPCODE_Y1 = 8, + RRR_3_OPCODE_Y0 = 8, + RRR_3_OPCODE_Y1 = 9, + RRR_4_OPCODE_Y0 = 9, + RRR_4_OPCODE_Y1 = 10, + RRR_5_OPCODE_Y0 = 10, + RRR_5_OPCODE_Y1 = 11, + RRR_6_OPCODE_Y0 = 11, + RRR_6_OPCODE_Y1 = 12, + RRR_7_OPCODE_Y0 = 12, + RRR_7_OPCODE_Y1 = 13, + RRR_8_OPCODE_Y0 = 13, + RRR_9_OPCODE_Y0 = 14, + SHIFT_OPCODE_X0 = 6, + SHIFT_OPCODE_X1 = 6, + SHIFT_OPCODE_Y0 = 15, + SHIFT_OPCODE_Y1 = 14, + SHL16INSLI_OPCODE_X0 = 7, + SHL16INSLI_OPCODE_X1 = 7, + SHL1ADDX_RRR_0_OPCODE_X0 = 67, + SHL1ADDX_RRR_0_OPCODE_X1 = 31, + SHL1ADDX_RRR_7_OPCODE_Y0 = 1, + SHL1ADDX_RRR_7_OPCODE_Y1 = 1, + SHL1ADD_RRR_0_OPCODE_X0 = 68, + SHL1ADD_RRR_0_OPCODE_X1 = 32, + SHL1ADD_RRR_1_OPCODE_Y0 = 0, + SHL1ADD_RRR_1_OPCODE_Y1 = 0, + SHL2ADDX_RRR_0_OPCODE_X0 = 69, + SHL2ADDX_RRR_0_OPCODE_X1 = 33, + SHL2ADDX_RRR_7_OPCODE_Y0 = 2, + SHL2ADDX_RRR_7_OPCODE_Y1 = 2, + SHL2ADD_RRR_0_OPCODE_X0 = 70, + SHL2ADD_RRR_0_OPCODE_X1 = 34, + SHL2ADD_RRR_1_OPCODE_Y0 = 1, + SHL2ADD_RRR_1_OPCODE_Y1 = 1, + SHL3ADDX_RRR_0_OPCODE_X0 = 71, + SHL3ADDX_RRR_0_OPCODE_X1 = 35, + SHL3ADDX_RRR_7_OPCODE_Y0 = 3, + SHL3ADDX_RRR_7_OPCODE_Y1 = 3, + SHL3ADD_RRR_0_OPCODE_X0 = 72, + SHL3ADD_RRR_0_OPCODE_X1 = 36, + SHL3ADD_RRR_1_OPCODE_Y0 = 2, + SHL3ADD_RRR_1_OPCODE_Y1 = 2, + SHLI_SHIFT_OPCODE_X0 = 2, + SHLI_SHIFT_OPCODE_X1 = 2, + SHLI_SHIFT_OPCODE_Y0 = 1, + SHLI_SHIFT_OPCODE_Y1 = 1, + SHLXI_SHIFT_OPCODE_X0 = 3, + SHLXI_SHIFT_OPCODE_X1 = 3, + SHLX_RRR_0_OPCODE_X0 = 73, + SHLX_RRR_0_OPCODE_X1 = 37, + SHL_RRR_0_OPCODE_X0 = 74, + SHL_RRR_0_OPCODE_X1 = 38, + SHL_RRR_6_OPCODE_Y0 = 1, + SHL_RRR_6_OPCODE_Y1 = 1, + SHRSI_SHIFT_OPCODE_X0 = 4, + SHRSI_SHIFT_OPCODE_X1 = 4, + SHRSI_SHIFT_OPCODE_Y0 = 2, + SHRSI_SHIFT_OPCODE_Y1 = 2, + SHRS_RRR_0_OPCODE_X0 = 75, + SHRS_RRR_0_OPCODE_X1 = 39, + SHRS_RRR_6_OPCODE_Y0 = 2, + SHRS_RRR_6_OPCODE_Y1 = 2, + SHRUI_SHIFT_OPCODE_X0 = 5, + SHRUI_SHIFT_OPCODE_X1 = 5, + SHRUI_SHIFT_OPCODE_Y0 = 3, + SHRUI_SHIFT_OPCODE_Y1 = 3, + SHRUXI_SHIFT_OPCODE_X0 = 6, + SHRUXI_SHIFT_OPCODE_X1 = 6, + SHRUX_RRR_0_OPCODE_X0 = 76, + SHRUX_RRR_0_OPCODE_X1 = 40, + SHRU_RRR_0_OPCODE_X0 = 77, + SHRU_RRR_0_OPCODE_X1 = 41, + SHRU_RRR_6_OPCODE_Y0 = 3, + SHRU_RRR_6_OPCODE_Y1 = 3, + SHUFFLEBYTES_RRR_0_OPCODE_X0 = 78, + ST1_ADD_IMM8_OPCODE_X1 = 25, + ST1_OPCODE_Y2 = 0, + ST1_RRR_0_OPCODE_X1 = 42, + ST2_ADD_IMM8_OPCODE_X1 = 26, + ST2_OPCODE_Y2 = 1, + ST2_RRR_0_OPCODE_X1 = 43, + ST4_ADD_IMM8_OPCODE_X1 = 27, + ST4_OPCODE_Y2 = 2, + ST4_RRR_0_OPCODE_X1 = 44, + STNT1_ADD_IMM8_OPCODE_X1 = 28, + STNT1_RRR_0_OPCODE_X1 = 45, + STNT2_ADD_IMM8_OPCODE_X1 = 29, + STNT2_RRR_0_OPCODE_X1 = 46, + STNT4_ADD_IMM8_OPCODE_X1 = 30, + STNT4_RRR_0_OPCODE_X1 = 47, + STNT_ADD_IMM8_OPCODE_X1 = 31, + STNT_RRR_0_OPCODE_X1 = 48, + ST_ADD_IMM8_OPCODE_X1 = 32, + ST_OPCODE_Y2 = 3, + ST_RRR_0_OPCODE_X1 = 49, + SUBXSC_RRR_0_OPCODE_X0 = 79, + SUBXSC_RRR_0_OPCODE_X1 = 50, + SUBX_RRR_0_OPCODE_X0 = 80, + SUBX_RRR_0_OPCODE_X1 = 51, + SUBX_RRR_0_OPCODE_Y0 = 2, + SUBX_RRR_0_OPCODE_Y1 = 2, + SUB_RRR_0_OPCODE_X0 = 81, + SUB_RRR_0_OPCODE_X1 = 52, + SUB_RRR_0_OPCODE_Y0 = 3, + SUB_RRR_0_OPCODE_Y1 = 3, + SWINT0_UNARY_OPCODE_X1 = 34, + SWINT1_UNARY_OPCODE_X1 = 35, + SWINT2_UNARY_OPCODE_X1 = 36, + SWINT3_UNARY_OPCODE_X1 = 37, + TBLIDXB0_UNARY_OPCODE_X0 = 9, + TBLIDXB0_UNARY_OPCODE_Y0 = 9, + TBLIDXB1_UNARY_OPCODE_X0 = 10, + TBLIDXB1_UNARY_OPCODE_Y0 = 10, + TBLIDXB2_UNARY_OPCODE_X0 = 11, + TBLIDXB2_UNARY_OPCODE_Y0 = 11, + TBLIDXB3_UNARY_OPCODE_X0 = 12, + TBLIDXB3_UNARY_OPCODE_Y0 = 12, + UNARY_RRR_0_OPCODE_X0 = 82, + UNARY_RRR_0_OPCODE_X1 = 53, + UNARY_RRR_1_OPCODE_Y0 = 3, + UNARY_RRR_1_OPCODE_Y1 = 3, + V1ADDI_IMM8_OPCODE_X0 = 8, + V1ADDI_IMM8_OPCODE_X1 = 33, + V1ADDUC_RRR_0_OPCODE_X0 = 83, + V1ADDUC_RRR_0_OPCODE_X1 = 54, + V1ADD_RRR_0_OPCODE_X0 = 84, + V1ADD_RRR_0_OPCODE_X1 = 55, + V1ADIFFU_RRR_0_OPCODE_X0 = 85, + V1AVGU_RRR_0_OPCODE_X0 = 86, + V1CMPEQI_IMM8_OPCODE_X0 = 9, + V1CMPEQI_IMM8_OPCODE_X1 = 34, + V1CMPEQ_RRR_0_OPCODE_X0 = 87, + V1CMPEQ_RRR_0_OPCODE_X1 = 56, + V1CMPLES_RRR_0_OPCODE_X0 = 88, + V1CMPLES_RRR_0_OPCODE_X1 = 57, + V1CMPLEU_RRR_0_OPCODE_X0 = 89, + V1CMPLEU_RRR_0_OPCODE_X1 = 58, + V1CMPLTSI_IMM8_OPCODE_X0 = 10, + V1CMPLTSI_IMM8_OPCODE_X1 = 35, + V1CMPLTS_RRR_0_OPCODE_X0 = 90, + V1CMPLTS_RRR_0_OPCODE_X1 = 59, + V1CMPLTUI_IMM8_OPCODE_X0 = 11, + V1CMPLTUI_IMM8_OPCODE_X1 = 36, + V1CMPLTU_RRR_0_OPCODE_X0 = 91, + V1CMPLTU_RRR_0_OPCODE_X1 = 60, + V1CMPNE_RRR_0_OPCODE_X0 = 92, + V1CMPNE_RRR_0_OPCODE_X1 = 61, + V1DDOTPUA_RRR_0_OPCODE_X0 = 161, + V1DDOTPUSA_RRR_0_OPCODE_X0 = 93, + V1DDOTPUS_RRR_0_OPCODE_X0 = 94, + V1DDOTPU_RRR_0_OPCODE_X0 = 162, + V1DOTPA_RRR_0_OPCODE_X0 = 95, + V1DOTPUA_RRR_0_OPCODE_X0 = 163, + V1DOTPUSA_RRR_0_OPCODE_X0 = 96, + V1DOTPUS_RRR_0_OPCODE_X0 = 97, + V1DOTPU_RRR_0_OPCODE_X0 = 164, + V1DOTP_RRR_0_OPCODE_X0 = 98, + V1INT_H_RRR_0_OPCODE_X0 = 99, + V1INT_H_RRR_0_OPCODE_X1 = 62, + V1INT_L_RRR_0_OPCODE_X0 = 100, + V1INT_L_RRR_0_OPCODE_X1 = 63, + V1MAXUI_IMM8_OPCODE_X0 = 12, + V1MAXUI_IMM8_OPCODE_X1 = 37, + V1MAXU_RRR_0_OPCODE_X0 = 101, + V1MAXU_RRR_0_OPCODE_X1 = 64, + V1MINUI_IMM8_OPCODE_X0 = 13, + V1MINUI_IMM8_OPCODE_X1 = 38, + V1MINU_RRR_0_OPCODE_X0 = 102, + V1MINU_RRR_0_OPCODE_X1 = 65, + V1MNZ_RRR_0_OPCODE_X0 = 103, + V1MNZ_RRR_0_OPCODE_X1 = 66, + V1MULTU_RRR_0_OPCODE_X0 = 104, + V1MULUS_RRR_0_OPCODE_X0 = 105, + V1MULU_RRR_0_OPCODE_X0 = 106, + V1MZ_RRR_0_OPCODE_X0 = 107, + V1MZ_RRR_0_OPCODE_X1 = 67, + V1SADAU_RRR_0_OPCODE_X0 = 108, + V1SADU_RRR_0_OPCODE_X0 = 109, + V1SHLI_SHIFT_OPCODE_X0 = 7, + V1SHLI_SHIFT_OPCODE_X1 = 7, + V1SHL_RRR_0_OPCODE_X0 = 110, + V1SHL_RRR_0_OPCODE_X1 = 68, + V1SHRSI_SHIFT_OPCODE_X0 = 8, + V1SHRSI_SHIFT_OPCODE_X1 = 8, + V1SHRS_RRR_0_OPCODE_X0 = 111, + V1SHRS_RRR_0_OPCODE_X1 = 69, + V1SHRUI_SHIFT_OPCODE_X0 = 9, + V1SHRUI_SHIFT_OPCODE_X1 = 9, + V1SHRU_RRR_0_OPCODE_X0 = 112, + V1SHRU_RRR_0_OPCODE_X1 = 70, + V1SUBUC_RRR_0_OPCODE_X0 = 113, + V1SUBUC_RRR_0_OPCODE_X1 = 71, + V1SUB_RRR_0_OPCODE_X0 = 114, + V1SUB_RRR_0_OPCODE_X1 = 72, + V2ADDI_IMM8_OPCODE_X0 = 14, + V2ADDI_IMM8_OPCODE_X1 = 39, + V2ADDSC_RRR_0_OPCODE_X0 = 115, + V2ADDSC_RRR_0_OPCODE_X1 = 73, + V2ADD_RRR_0_OPCODE_X0 = 116, + V2ADD_RRR_0_OPCODE_X1 = 74, + V2ADIFFS_RRR_0_OPCODE_X0 = 117, + V2AVGS_RRR_0_OPCODE_X0 = 118, + V2CMPEQI_IMM8_OPCODE_X0 = 15, + V2CMPEQI_IMM8_OPCODE_X1 = 40, + V2CMPEQ_RRR_0_OPCODE_X0 = 119, + V2CMPEQ_RRR_0_OPCODE_X1 = 75, + V2CMPLES_RRR_0_OPCODE_X0 = 120, + V2CMPLES_RRR_0_OPCODE_X1 = 76, + V2CMPLEU_RRR_0_OPCODE_X0 = 121, + V2CMPLEU_RRR_0_OPCODE_X1 = 77, + V2CMPLTSI_IMM8_OPCODE_X0 = 16, + V2CMPLTSI_IMM8_OPCODE_X1 = 41, + V2CMPLTS_RRR_0_OPCODE_X0 = 122, + V2CMPLTS_RRR_0_OPCODE_X1 = 78, + V2CMPLTUI_IMM8_OPCODE_X0 = 17, + V2CMPLTUI_IMM8_OPCODE_X1 = 42, + V2CMPLTU_RRR_0_OPCODE_X0 = 123, + V2CMPLTU_RRR_0_OPCODE_X1 = 79, + V2CMPNE_RRR_0_OPCODE_X0 = 124, + V2CMPNE_RRR_0_OPCODE_X1 = 80, + V2DOTPA_RRR_0_OPCODE_X0 = 125, + V2DOTP_RRR_0_OPCODE_X0 = 126, + V2INT_H_RRR_0_OPCODE_X0 = 127, + V2INT_H_RRR_0_OPCODE_X1 = 81, + V2INT_L_RRR_0_OPCODE_X0 = 128, + V2INT_L_RRR_0_OPCODE_X1 = 82, + V2MAXSI_IMM8_OPCODE_X0 = 18, + V2MAXSI_IMM8_OPCODE_X1 = 43, + V2MAXS_RRR_0_OPCODE_X0 = 129, + V2MAXS_RRR_0_OPCODE_X1 = 83, + V2MINSI_IMM8_OPCODE_X0 = 19, + V2MINSI_IMM8_OPCODE_X1 = 44, + V2MINS_RRR_0_OPCODE_X0 = 130, + V2MINS_RRR_0_OPCODE_X1 = 84, + V2MNZ_RRR_0_OPCODE_X0 = 131, + V2MNZ_RRR_0_OPCODE_X1 = 85, + V2MULFSC_RRR_0_OPCODE_X0 = 132, + V2MULS_RRR_0_OPCODE_X0 = 133, + V2MULTS_RRR_0_OPCODE_X0 = 134, + V2MZ_RRR_0_OPCODE_X0 = 135, + V2MZ_RRR_0_OPCODE_X1 = 86, + V2PACKH_RRR_0_OPCODE_X0 = 136, + V2PACKH_RRR_0_OPCODE_X1 = 87, + V2PACKL_RRR_0_OPCODE_X0 = 137, + V2PACKL_RRR_0_OPCODE_X1 = 88, + V2PACKUC_RRR_0_OPCODE_X0 = 138, + V2PACKUC_RRR_0_OPCODE_X1 = 89, + V2SADAS_RRR_0_OPCODE_X0 = 139, + V2SADAU_RRR_0_OPCODE_X0 = 140, + V2SADS_RRR_0_OPCODE_X0 = 141, + V2SADU_RRR_0_OPCODE_X0 = 142, + V2SHLI_SHIFT_OPCODE_X0 = 10, + V2SHLI_SHIFT_OPCODE_X1 = 10, + V2SHLSC_RRR_0_OPCODE_X0 = 143, + V2SHLSC_RRR_0_OPCODE_X1 = 90, + V2SHL_RRR_0_OPCODE_X0 = 144, + V2SHL_RRR_0_OPCODE_X1 = 91, + V2SHRSI_SHIFT_OPCODE_X0 = 11, + V2SHRSI_SHIFT_OPCODE_X1 = 11, + V2SHRS_RRR_0_OPCODE_X0 = 145, + V2SHRS_RRR_0_OPCODE_X1 = 92, + V2SHRUI_SHIFT_OPCODE_X0 = 12, + V2SHRUI_SHIFT_OPCODE_X1 = 12, + V2SHRU_RRR_0_OPCODE_X0 = 146, + V2SHRU_RRR_0_OPCODE_X1 = 93, + V2SUBSC_RRR_0_OPCODE_X0 = 147, + V2SUBSC_RRR_0_OPCODE_X1 = 94, + V2SUB_RRR_0_OPCODE_X0 = 148, + V2SUB_RRR_0_OPCODE_X1 = 95, + V4ADDSC_RRR_0_OPCODE_X0 = 149, + V4ADDSC_RRR_0_OPCODE_X1 = 96, + V4ADD_RRR_0_OPCODE_X0 = 150, + V4ADD_RRR_0_OPCODE_X1 = 97, + V4INT_H_RRR_0_OPCODE_X0 = 151, + V4INT_H_RRR_0_OPCODE_X1 = 98, + V4INT_L_RRR_0_OPCODE_X0 = 152, + V4INT_L_RRR_0_OPCODE_X1 = 99, + V4PACKSC_RRR_0_OPCODE_X0 = 153, + V4PACKSC_RRR_0_OPCODE_X1 = 100, + V4SHLSC_RRR_0_OPCODE_X0 = 154, + V4SHLSC_RRR_0_OPCODE_X1 = 101, + V4SHL_RRR_0_OPCODE_X0 = 155, + V4SHL_RRR_0_OPCODE_X1 = 102, + V4SHRS_RRR_0_OPCODE_X0 = 156, + V4SHRS_RRR_0_OPCODE_X1 = 103, + V4SHRU_RRR_0_OPCODE_X0 = 157, + V4SHRU_RRR_0_OPCODE_X1 = 104, + V4SUBSC_RRR_0_OPCODE_X0 = 158, + V4SUBSC_RRR_0_OPCODE_X1 = 105, + V4SUB_RRR_0_OPCODE_X0 = 159, + V4SUB_RRR_0_OPCODE_X1 = 106, + WH64_UNARY_OPCODE_X1 = 38, + XORI_IMM8_OPCODE_X0 = 20, + XORI_IMM8_OPCODE_X1 = 45, + XOR_RRR_0_OPCODE_X0 = 160, + XOR_RRR_0_OPCODE_X1 = 107, + XOR_RRR_5_OPCODE_Y0 = 3, + XOR_RRR_5_OPCODE_Y1 = 3 +}; + + +#endif /* __ASSEMBLER__ */ + +#endif /* __ARCH_OPCODE_H__ */ diff --git a/arch/tile/include/arch/opcode_tilepro.h b/arch/tile/include/arch/opcode_tilepro.h new file mode 100644 index 00000000..71b763b8 --- /dev/null +++ b/arch/tile/include/arch/opcode_tilepro.h @@ -0,0 +1,1471 @@ +/* TILEPro opcode information. + * + * Copyright 2011 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * + * + * + * + */ + +#ifndef __ARCH_OPCODE_H__ +#define __ARCH_OPCODE_H__ + +#ifndef __ASSEMBLER__ + +typedef unsigned long long tilepro_bundle_bits; + +/* This is the bit that determines if a bundle is in the Y encoding. */ +#define TILEPRO_BUNDLE_Y_ENCODING_MASK ((tilepro_bundle_bits)1 << 63) + +enum +{ + /* Maximum number of instructions in a bundle (2 for X, 3 for Y). */ + TILEPRO_MAX_INSTRUCTIONS_PER_BUNDLE = 3, + + /* How many different pipeline encodings are there? X0, X1, Y0, Y1, Y2. */ + TILEPRO_NUM_PIPELINE_ENCODINGS = 5, + + /* Log base 2 of TILEPRO_BUNDLE_SIZE_IN_BYTES. */ + TILEPRO_LOG2_BUNDLE_SIZE_IN_BYTES = 3, + + /* Instructions take this many bytes. */ + TILEPRO_BUNDLE_SIZE_IN_BYTES = 1 << TILEPRO_LOG2_BUNDLE_SIZE_IN_BYTES, + + /* Log base 2 of TILEPRO_BUNDLE_ALIGNMENT_IN_BYTES. */ + TILEPRO_LOG2_BUNDLE_ALIGNMENT_IN_BYTES = 3, + + /* Bundles should be aligned modulo this number of bytes. */ + TILEPRO_BUNDLE_ALIGNMENT_IN_BYTES = + (1 << TILEPRO_LOG2_BUNDLE_ALIGNMENT_IN_BYTES), + + /* Log base 2 of TILEPRO_SN_INSTRUCTION_SIZE_IN_BYTES. */ + TILEPRO_LOG2_SN_INSTRUCTION_SIZE_IN_BYTES = 1, + + /* Static network instructions take this many bytes. */ + TILEPRO_SN_INSTRUCTION_SIZE_IN_BYTES = + (1 << TILEPRO_LOG2_SN_INSTRUCTION_SIZE_IN_BYTES), + + /* Number of registers (some are magic, such as network I/O). */ + TILEPRO_NUM_REGISTERS = 64, + + /* Number of static network registers. */ + TILEPRO_NUM_SN_REGISTERS = 4 +}; + +/* Make a few "tile_" variables to simplify common code between + architectures. */ + +typedef tilepro_bundle_bits tile_bundle_bits; +#define TILE_BUNDLE_SIZE_IN_BYTES TILEPRO_BUNDLE_SIZE_IN_BYTES +#define TILE_BUNDLE_ALIGNMENT_IN_BYTES TILEPRO_BUNDLE_ALIGNMENT_IN_BYTES +#define TILE_LOG2_BUNDLE_ALIGNMENT_IN_BYTES \ + TILEPRO_LOG2_BUNDLE_ALIGNMENT_IN_BYTES + +/* 64-bit pattern for a { bpt ; nop } bundle. */ +#define TILEPRO_BPT_BUNDLE 0x400b3cae70166000ULL + +static __inline unsigned int +get_BrOff_SN(tilepro_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 0)) & 0x3ff); +} + +static __inline unsigned int +get_BrOff_X1(tilepro_bundle_bits n) +{ + return (((unsigned int)(n >> 43)) & 0x00007fff) | + (((unsigned int)(n >> 20)) & 0x00018000); +} + +static __inline unsigned int +get_BrType_X1(tilepro_bundle_bits n) +{ + return (((unsigned int)(n >> 31)) & 0xf); +} + +static __inline unsigned int +get_Dest_Imm8_X1(tilepro_bundle_bits n) +{ + return (((unsigned int)(n >> 31)) & 0x0000003f) | + (((unsigned int)(n >> 43)) & 0x000000c0); +} + +static __inline unsigned int +get_Dest_SN(tilepro_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 2)) & 0x3); +} + +static __inline unsigned int +get_Dest_X0(tilepro_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 0)) & 0x3f); +} + +static __inline unsigned int +get_Dest_X1(tilepro_bundle_bits n) +{ + return (((unsigned int)(n >> 31)) & 0x3f); +} + +static __inline unsigned int +get_Dest_Y0(tilepro_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 0)) & 0x3f); +} + +static __inline unsigned int +get_Dest_Y1(tilepro_bundle_bits n) +{ + return (((unsigned int)(n >> 31)) & 0x3f); +} + +static __inline unsigned int +get_Imm16_X0(tilepro_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 12)) & 0xffff); +} + +static __inline unsigned int +get_Imm16_X1(tilepro_bundle_bits n) +{ + return (((unsigned int)(n >> 43)) & 0xffff); +} + +static __inline unsigned int +get_Imm8_SN(tilepro_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 0)) & 0xff); +} + +static __inline unsigned int +get_Imm8_X0(tilepro_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 12)) & 0xff); +} + +static __inline unsigned int +get_Imm8_X1(tilepro_bundle_bits n) +{ + return (((unsigned int)(n >> 43)) & 0xff); +} + +static __inline unsigned int +get_Imm8_Y0(tilepro_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 12)) & 0xff); +} + +static __inline unsigned int +get_Imm8_Y1(tilepro_bundle_bits n) +{ + return (((unsigned int)(n >> 43)) & 0xff); +} + +static __inline unsigned int +get_ImmOpcodeExtension_X0(tilepro_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 20)) & 0x7f); +} + +static __inline unsigned int +get_ImmOpcodeExtension_X1(tilepro_bundle_bits n) +{ + return (((unsigned int)(n >> 51)) & 0x7f); +} + +static __inline unsigned int +get_ImmRROpcodeExtension_SN(tilepro_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 8)) & 0x3); +} + +static __inline unsigned int +get_JOffLong_X1(tilepro_bundle_bits n) +{ + return (((unsigned int)(n >> 43)) & 0x00007fff) | + (((unsigned int)(n >> 20)) & 0x00018000) | + (((unsigned int)(n >> 14)) & 0x001e0000) | + (((unsigned int)(n >> 16)) & 0x07e00000) | + (((unsigned int)(n >> 31)) & 0x18000000); +} + +static __inline unsigned int +get_JOff_X1(tilepro_bundle_bits n) +{ + return (((unsigned int)(n >> 43)) & 0x00007fff) | + (((unsigned int)(n >> 20)) & 0x00018000) | + (((unsigned int)(n >> 14)) & 0x001e0000) | + (((unsigned int)(n >> 16)) & 0x07e00000) | + (((unsigned int)(n >> 31)) & 0x08000000); +} + +static __inline unsigned int +get_MF_Imm15_X1(tilepro_bundle_bits n) +{ + return (((unsigned int)(n >> 37)) & 0x00003fff) | + (((unsigned int)(n >> 44)) & 0x00004000); +} + +static __inline unsigned int +get_MMEnd_X0(tilepro_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 18)) & 0x1f); +} + +static __inline unsigned int +get_MMEnd_X1(tilepro_bundle_bits n) +{ + return (((unsigned int)(n >> 49)) & 0x1f); +} + +static __inline unsigned int +get_MMStart_X0(tilepro_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 23)) & 0x1f); +} + +static __inline unsigned int +get_MMStart_X1(tilepro_bundle_bits n) +{ + return (((unsigned int)(n >> 54)) & 0x1f); +} + +static __inline unsigned int +get_MT_Imm15_X1(tilepro_bundle_bits n) +{ + return (((unsigned int)(n >> 31)) & 0x0000003f) | + (((unsigned int)(n >> 37)) & 0x00003fc0) | + (((unsigned int)(n >> 44)) & 0x00004000); +} + +static __inline unsigned int +get_Mode(tilepro_bundle_bits n) +{ + return (((unsigned int)(n >> 63)) & 0x1); +} + +static __inline unsigned int +get_NoRegOpcodeExtension_SN(tilepro_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 0)) & 0xf); +} + +static __inline unsigned int +get_Opcode_SN(tilepro_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 10)) & 0x3f); +} + +static __inline unsigned int +get_Opcode_X0(tilepro_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 28)) & 0x7); +} + +static __inline unsigned int +get_Opcode_X1(tilepro_bundle_bits n) +{ + return (((unsigned int)(n >> 59)) & 0xf); +} + +static __inline unsigned int +get_Opcode_Y0(tilepro_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 27)) & 0xf); +} + +static __inline unsigned int +get_Opcode_Y1(tilepro_bundle_bits n) +{ + return (((unsigned int)(n >> 59)) & 0xf); +} + +static __inline unsigned int +get_Opcode_Y2(tilepro_bundle_bits n) +{ + return (((unsigned int)(n >> 56)) & 0x7); +} + +static __inline unsigned int +get_RROpcodeExtension_SN(tilepro_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 4)) & 0xf); +} + +static __inline unsigned int +get_RRROpcodeExtension_X0(tilepro_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 18)) & 0x1ff); +} + +static __inline unsigned int +get_RRROpcodeExtension_X1(tilepro_bundle_bits n) +{ + return (((unsigned int)(n >> 49)) & 0x1ff); +} + +static __inline unsigned int +get_RRROpcodeExtension_Y0(tilepro_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 18)) & 0x3); +} + +static __inline unsigned int +get_RRROpcodeExtension_Y1(tilepro_bundle_bits n) +{ + return (((unsigned int)(n >> 49)) & 0x3); +} + +static __inline unsigned int +get_RouteOpcodeExtension_SN(tilepro_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 0)) & 0x3ff); +} + +static __inline unsigned int +get_S_X0(tilepro_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 27)) & 0x1); +} + +static __inline unsigned int +get_S_X1(tilepro_bundle_bits n) +{ + return (((unsigned int)(n >> 58)) & 0x1); +} + +static __inline unsigned int +get_ShAmt_X0(tilepro_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 12)) & 0x1f); +} + +static __inline unsigned int +get_ShAmt_X1(tilepro_bundle_bits n) +{ + return (((unsigned int)(n >> 43)) & 0x1f); +} + +static __inline unsigned int +get_ShAmt_Y0(tilepro_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 12)) & 0x1f); +} + +static __inline unsigned int +get_ShAmt_Y1(tilepro_bundle_bits n) +{ + return (((unsigned int)(n >> 43)) & 0x1f); +} + +static __inline unsigned int +get_SrcA_X0(tilepro_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 6)) & 0x3f); +} + +static __inline unsigned int +get_SrcA_X1(tilepro_bundle_bits n) +{ + return (((unsigned int)(n >> 37)) & 0x3f); +} + +static __inline unsigned int +get_SrcA_Y0(tilepro_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 6)) & 0x3f); +} + +static __inline unsigned int +get_SrcA_Y1(tilepro_bundle_bits n) +{ + return (((unsigned int)(n >> 37)) & 0x3f); +} + +static __inline unsigned int +get_SrcA_Y2(tilepro_bundle_bits n) +{ + return (((n >> 26)) & 0x00000001) | + (((unsigned int)(n >> 50)) & 0x0000003e); +} + +static __inline unsigned int +get_SrcBDest_Y2(tilepro_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 20)) & 0x3f); +} + +static __inline unsigned int +get_SrcB_X0(tilepro_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 12)) & 0x3f); +} + +static __inline unsigned int +get_SrcB_X1(tilepro_bundle_bits n) +{ + return (((unsigned int)(n >> 43)) & 0x3f); +} + +static __inline unsigned int +get_SrcB_Y0(tilepro_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 12)) & 0x3f); +} + +static __inline unsigned int +get_SrcB_Y1(tilepro_bundle_bits n) +{ + return (((unsigned int)(n >> 43)) & 0x3f); +} + +static __inline unsigned int +get_Src_SN(tilepro_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 0)) & 0x3); +} + +static __inline unsigned int +get_UnOpcodeExtension_X0(tilepro_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 12)) & 0x1f); +} + +static __inline unsigned int +get_UnOpcodeExtension_X1(tilepro_bundle_bits n) +{ + return (((unsigned int)(n >> 43)) & 0x1f); +} + +static __inline unsigned int +get_UnOpcodeExtension_Y0(tilepro_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 12)) & 0x1f); +} + +static __inline unsigned int +get_UnOpcodeExtension_Y1(tilepro_bundle_bits n) +{ + return (((unsigned int)(n >> 43)) & 0x1f); +} + +static __inline unsigned int +get_UnShOpcodeExtension_X0(tilepro_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 17)) & 0x3ff); +} + +static __inline unsigned int +get_UnShOpcodeExtension_X1(tilepro_bundle_bits n) +{ + return (((unsigned int)(n >> 48)) & 0x3ff); +} + +static __inline unsigned int +get_UnShOpcodeExtension_Y0(tilepro_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 17)) & 0x7); +} + +static __inline unsigned int +get_UnShOpcodeExtension_Y1(tilepro_bundle_bits n) +{ + return (((unsigned int)(n >> 48)) & 0x7); +} + + +static __inline int +sign_extend(int n, int num_bits) +{ + int shift = (int)(sizeof(int) * 8 - num_bits); + return (n << shift) >> shift; +} + + + +static __inline tilepro_bundle_bits +create_BrOff_SN(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x3ff) << 0); +} + +static __inline tilepro_bundle_bits +create_BrOff_X1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilepro_bundle_bits)(n & 0x00007fff)) << 43) | + (((tilepro_bundle_bits)(n & 0x00018000)) << 20); +} + +static __inline tilepro_bundle_bits +create_BrType_X1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilepro_bundle_bits)(n & 0xf)) << 31); +} + +static __inline tilepro_bundle_bits +create_Dest_Imm8_X1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilepro_bundle_bits)(n & 0x0000003f)) << 31) | + (((tilepro_bundle_bits)(n & 0x000000c0)) << 43); +} + +static __inline tilepro_bundle_bits +create_Dest_SN(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x3) << 2); +} + +static __inline tilepro_bundle_bits +create_Dest_X0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x3f) << 0); +} + +static __inline tilepro_bundle_bits +create_Dest_X1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilepro_bundle_bits)(n & 0x3f)) << 31); +} + +static __inline tilepro_bundle_bits +create_Dest_Y0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x3f) << 0); +} + +static __inline tilepro_bundle_bits +create_Dest_Y1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilepro_bundle_bits)(n & 0x3f)) << 31); +} + +static __inline tilepro_bundle_bits +create_Imm16_X0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0xffff) << 12); +} + +static __inline tilepro_bundle_bits +create_Imm16_X1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilepro_bundle_bits)(n & 0xffff)) << 43); +} + +static __inline tilepro_bundle_bits +create_Imm8_SN(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0xff) << 0); +} + +static __inline tilepro_bundle_bits +create_Imm8_X0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0xff) << 12); +} + +static __inline tilepro_bundle_bits +create_Imm8_X1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilepro_bundle_bits)(n & 0xff)) << 43); +} + +static __inline tilepro_bundle_bits +create_Imm8_Y0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0xff) << 12); +} + +static __inline tilepro_bundle_bits +create_Imm8_Y1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilepro_bundle_bits)(n & 0xff)) << 43); +} + +static __inline tilepro_bundle_bits +create_ImmOpcodeExtension_X0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x7f) << 20); +} + +static __inline tilepro_bundle_bits +create_ImmOpcodeExtension_X1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilepro_bundle_bits)(n & 0x7f)) << 51); +} + +static __inline tilepro_bundle_bits +create_ImmRROpcodeExtension_SN(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x3) << 8); +} + +static __inline tilepro_bundle_bits +create_JOffLong_X1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilepro_bundle_bits)(n & 0x00007fff)) << 43) | + (((tilepro_bundle_bits)(n & 0x00018000)) << 20) | + (((tilepro_bundle_bits)(n & 0x001e0000)) << 14) | + (((tilepro_bundle_bits)(n & 0x07e00000)) << 16) | + (((tilepro_bundle_bits)(n & 0x18000000)) << 31); +} + +static __inline tilepro_bundle_bits +create_JOff_X1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilepro_bundle_bits)(n & 0x00007fff)) << 43) | + (((tilepro_bundle_bits)(n & 0x00018000)) << 20) | + (((tilepro_bundle_bits)(n & 0x001e0000)) << 14) | + (((tilepro_bundle_bits)(n & 0x07e00000)) << 16) | + (((tilepro_bundle_bits)(n & 0x08000000)) << 31); +} + +static __inline tilepro_bundle_bits +create_MF_Imm15_X1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilepro_bundle_bits)(n & 0x00003fff)) << 37) | + (((tilepro_bundle_bits)(n & 0x00004000)) << 44); +} + +static __inline tilepro_bundle_bits +create_MMEnd_X0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x1f) << 18); +} + +static __inline tilepro_bundle_bits +create_MMEnd_X1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilepro_bundle_bits)(n & 0x1f)) << 49); +} + +static __inline tilepro_bundle_bits +create_MMStart_X0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x1f) << 23); +} + +static __inline tilepro_bundle_bits +create_MMStart_X1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilepro_bundle_bits)(n & 0x1f)) << 54); +} + +static __inline tilepro_bundle_bits +create_MT_Imm15_X1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilepro_bundle_bits)(n & 0x0000003f)) << 31) | + (((tilepro_bundle_bits)(n & 0x00003fc0)) << 37) | + (((tilepro_bundle_bits)(n & 0x00004000)) << 44); +} + +static __inline tilepro_bundle_bits +create_Mode(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilepro_bundle_bits)(n & 0x1)) << 63); +} + +static __inline tilepro_bundle_bits +create_NoRegOpcodeExtension_SN(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0xf) << 0); +} + +static __inline tilepro_bundle_bits +create_Opcode_SN(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x3f) << 10); +} + +static __inline tilepro_bundle_bits +create_Opcode_X0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x7) << 28); +} + +static __inline tilepro_bundle_bits +create_Opcode_X1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilepro_bundle_bits)(n & 0xf)) << 59); +} + +static __inline tilepro_bundle_bits +create_Opcode_Y0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0xf) << 27); +} + +static __inline tilepro_bundle_bits +create_Opcode_Y1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilepro_bundle_bits)(n & 0xf)) << 59); +} + +static __inline tilepro_bundle_bits +create_Opcode_Y2(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilepro_bundle_bits)(n & 0x7)) << 56); +} + +static __inline tilepro_bundle_bits +create_RROpcodeExtension_SN(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0xf) << 4); +} + +static __inline tilepro_bundle_bits +create_RRROpcodeExtension_X0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x1ff) << 18); +} + +static __inline tilepro_bundle_bits +create_RRROpcodeExtension_X1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilepro_bundle_bits)(n & 0x1ff)) << 49); +} + +static __inline tilepro_bundle_bits +create_RRROpcodeExtension_Y0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x3) << 18); +} + +static __inline tilepro_bundle_bits +create_RRROpcodeExtension_Y1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilepro_bundle_bits)(n & 0x3)) << 49); +} + +static __inline tilepro_bundle_bits +create_RouteOpcodeExtension_SN(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x3ff) << 0); +} + +static __inline tilepro_bundle_bits +create_S_X0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x1) << 27); +} + +static __inline tilepro_bundle_bits +create_S_X1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilepro_bundle_bits)(n & 0x1)) << 58); +} + +static __inline tilepro_bundle_bits +create_ShAmt_X0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x1f) << 12); +} + +static __inline tilepro_bundle_bits +create_ShAmt_X1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilepro_bundle_bits)(n & 0x1f)) << 43); +} + +static __inline tilepro_bundle_bits +create_ShAmt_Y0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x1f) << 12); +} + +static __inline tilepro_bundle_bits +create_ShAmt_Y1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilepro_bundle_bits)(n & 0x1f)) << 43); +} + +static __inline tilepro_bundle_bits +create_SrcA_X0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x3f) << 6); +} + +static __inline tilepro_bundle_bits +create_SrcA_X1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilepro_bundle_bits)(n & 0x3f)) << 37); +} + +static __inline tilepro_bundle_bits +create_SrcA_Y0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x3f) << 6); +} + +static __inline tilepro_bundle_bits +create_SrcA_Y1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilepro_bundle_bits)(n & 0x3f)) << 37); +} + +static __inline tilepro_bundle_bits +create_SrcA_Y2(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x00000001) << 26) | + (((tilepro_bundle_bits)(n & 0x0000003e)) << 50); +} + +static __inline tilepro_bundle_bits +create_SrcBDest_Y2(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x3f) << 20); +} + +static __inline tilepro_bundle_bits +create_SrcB_X0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x3f) << 12); +} + +static __inline tilepro_bundle_bits +create_SrcB_X1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilepro_bundle_bits)(n & 0x3f)) << 43); +} + +static __inline tilepro_bundle_bits +create_SrcB_Y0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x3f) << 12); +} + +static __inline tilepro_bundle_bits +create_SrcB_Y1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilepro_bundle_bits)(n & 0x3f)) << 43); +} + +static __inline tilepro_bundle_bits +create_Src_SN(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x3) << 0); +} + +static __inline tilepro_bundle_bits +create_UnOpcodeExtension_X0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x1f) << 12); +} + +static __inline tilepro_bundle_bits +create_UnOpcodeExtension_X1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilepro_bundle_bits)(n & 0x1f)) << 43); +} + +static __inline tilepro_bundle_bits +create_UnOpcodeExtension_Y0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x1f) << 12); +} + +static __inline tilepro_bundle_bits +create_UnOpcodeExtension_Y1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilepro_bundle_bits)(n & 0x1f)) << 43); +} + +static __inline tilepro_bundle_bits +create_UnShOpcodeExtension_X0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x3ff) << 17); +} + +static __inline tilepro_bundle_bits +create_UnShOpcodeExtension_X1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilepro_bundle_bits)(n & 0x3ff)) << 48); +} + +static __inline tilepro_bundle_bits +create_UnShOpcodeExtension_Y0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x7) << 17); +} + +static __inline tilepro_bundle_bits +create_UnShOpcodeExtension_Y1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilepro_bundle_bits)(n & 0x7)) << 48); +} + + +enum +{ + ADDBS_U_SPECIAL_0_OPCODE_X0 = 98, + ADDBS_U_SPECIAL_0_OPCODE_X1 = 68, + ADDB_SPECIAL_0_OPCODE_X0 = 1, + ADDB_SPECIAL_0_OPCODE_X1 = 1, + ADDHS_SPECIAL_0_OPCODE_X0 = 99, + ADDHS_SPECIAL_0_OPCODE_X1 = 69, + ADDH_SPECIAL_0_OPCODE_X0 = 2, + ADDH_SPECIAL_0_OPCODE_X1 = 2, + ADDIB_IMM_0_OPCODE_X0 = 1, + ADDIB_IMM_0_OPCODE_X1 = 1, + ADDIH_IMM_0_OPCODE_X0 = 2, + ADDIH_IMM_0_OPCODE_X1 = 2, + ADDI_IMM_0_OPCODE_X0 = 3, + ADDI_IMM_0_OPCODE_X1 = 3, + ADDI_IMM_1_OPCODE_SN = 1, + ADDI_OPCODE_Y0 = 9, + ADDI_OPCODE_Y1 = 7, + ADDLIS_OPCODE_X0 = 1, + ADDLIS_OPCODE_X1 = 2, + ADDLI_OPCODE_X0 = 2, + ADDLI_OPCODE_X1 = 3, + ADDS_SPECIAL_0_OPCODE_X0 = 96, + ADDS_SPECIAL_0_OPCODE_X1 = 66, + ADD_SPECIAL_0_OPCODE_X0 = 3, + ADD_SPECIAL_0_OPCODE_X1 = 3, + ADD_SPECIAL_0_OPCODE_Y0 = 0, + ADD_SPECIAL_0_OPCODE_Y1 = 0, + ADIFFB_U_SPECIAL_0_OPCODE_X0 = 4, + ADIFFH_SPECIAL_0_OPCODE_X0 = 5, + ANDI_IMM_0_OPCODE_X0 = 1, + ANDI_IMM_0_OPCODE_X1 = 4, + ANDI_OPCODE_Y0 = 10, + ANDI_OPCODE_Y1 = 8, + AND_SPECIAL_0_OPCODE_X0 = 6, + AND_SPECIAL_0_OPCODE_X1 = 4, + AND_SPECIAL_2_OPCODE_Y0 = 0, + AND_SPECIAL_2_OPCODE_Y1 = 0, + AULI_OPCODE_X0 = 3, + AULI_OPCODE_X1 = 4, + AVGB_U_SPECIAL_0_OPCODE_X0 = 7, + AVGH_SPECIAL_0_OPCODE_X0 = 8, + BBNST_BRANCH_OPCODE_X1 = 15, + BBNS_BRANCH_OPCODE_X1 = 14, + BBNS_OPCODE_SN = 63, + BBST_BRANCH_OPCODE_X1 = 13, + BBS_BRANCH_OPCODE_X1 = 12, + BBS_OPCODE_SN = 62, + BGEZT_BRANCH_OPCODE_X1 = 7, + BGEZ_BRANCH_OPCODE_X1 = 6, + BGEZ_OPCODE_SN = 61, + BGZT_BRANCH_OPCODE_X1 = 5, + BGZ_BRANCH_OPCODE_X1 = 4, + BGZ_OPCODE_SN = 58, + BITX_UN_0_SHUN_0_OPCODE_X0 = 1, + BITX_UN_0_SHUN_0_OPCODE_Y0 = 1, + BLEZT_BRANCH_OPCODE_X1 = 11, + BLEZ_BRANCH_OPCODE_X1 = 10, + BLEZ_OPCODE_SN = 59, + BLZT_BRANCH_OPCODE_X1 = 9, + BLZ_BRANCH_OPCODE_X1 = 8, + BLZ_OPCODE_SN = 60, + BNZT_BRANCH_OPCODE_X1 = 3, + BNZ_BRANCH_OPCODE_X1 = 2, + BNZ_OPCODE_SN = 57, + BPT_NOREG_RR_IMM_0_OPCODE_SN = 1, + BRANCH_OPCODE_X1 = 5, + BYTEX_UN_0_SHUN_0_OPCODE_X0 = 2, + BYTEX_UN_0_SHUN_0_OPCODE_Y0 = 2, + BZT_BRANCH_OPCODE_X1 = 1, + BZ_BRANCH_OPCODE_X1 = 0, + BZ_OPCODE_SN = 56, + CLZ_UN_0_SHUN_0_OPCODE_X0 = 3, + CLZ_UN_0_SHUN_0_OPCODE_Y0 = 3, + CRC32_32_SPECIAL_0_OPCODE_X0 = 9, + CRC32_8_SPECIAL_0_OPCODE_X0 = 10, + CTZ_UN_0_SHUN_0_OPCODE_X0 = 4, + CTZ_UN_0_SHUN_0_OPCODE_Y0 = 4, + DRAIN_UN_0_SHUN_0_OPCODE_X1 = 1, + DTLBPR_UN_0_SHUN_0_OPCODE_X1 = 2, + DWORD_ALIGN_SPECIAL_0_OPCODE_X0 = 95, + FINV_UN_0_SHUN_0_OPCODE_X1 = 3, + FLUSH_UN_0_SHUN_0_OPCODE_X1 = 4, + FNOP_NOREG_RR_IMM_0_OPCODE_SN = 3, + FNOP_UN_0_SHUN_0_OPCODE_X0 = 5, + FNOP_UN_0_SHUN_0_OPCODE_X1 = 5, + FNOP_UN_0_SHUN_0_OPCODE_Y0 = 5, + FNOP_UN_0_SHUN_0_OPCODE_Y1 = 1, + HALT_NOREG_RR_IMM_0_OPCODE_SN = 0, + ICOH_UN_0_SHUN_0_OPCODE_X1 = 6, + ILL_UN_0_SHUN_0_OPCODE_X1 = 7, + ILL_UN_0_SHUN_0_OPCODE_Y1 = 2, + IMM_0_OPCODE_SN = 0, + IMM_0_OPCODE_X0 = 4, + IMM_0_OPCODE_X1 = 6, + IMM_1_OPCODE_SN = 1, + IMM_OPCODE_0_X0 = 5, + INTHB_SPECIAL_0_OPCODE_X0 = 11, + INTHB_SPECIAL_0_OPCODE_X1 = 5, + INTHH_SPECIAL_0_OPCODE_X0 = 12, + INTHH_SPECIAL_0_OPCODE_X1 = 6, + INTLB_SPECIAL_0_OPCODE_X0 = 13, + INTLB_SPECIAL_0_OPCODE_X1 = 7, + INTLH_SPECIAL_0_OPCODE_X0 = 14, + INTLH_SPECIAL_0_OPCODE_X1 = 8, + INV_UN_0_SHUN_0_OPCODE_X1 = 8, + IRET_UN_0_SHUN_0_OPCODE_X1 = 9, + JALB_OPCODE_X1 = 13, + JALF_OPCODE_X1 = 12, + JALRP_SPECIAL_0_OPCODE_X1 = 9, + JALRR_IMM_1_OPCODE_SN = 3, + JALR_RR_IMM_0_OPCODE_SN = 5, + JALR_SPECIAL_0_OPCODE_X1 = 10, + JB_OPCODE_X1 = 11, + JF_OPCODE_X1 = 10, + JRP_SPECIAL_0_OPCODE_X1 = 11, + JRR_IMM_1_OPCODE_SN = 2, + JR_RR_IMM_0_OPCODE_SN = 4, + JR_SPECIAL_0_OPCODE_X1 = 12, + LBADD_IMM_0_OPCODE_X1 = 22, + LBADD_U_IMM_0_OPCODE_X1 = 23, + LB_OPCODE_Y2 = 0, + LB_UN_0_SHUN_0_OPCODE_X1 = 10, + LB_U_OPCODE_Y2 = 1, + LB_U_UN_0_SHUN_0_OPCODE_X1 = 11, + LHADD_IMM_0_OPCODE_X1 = 24, + LHADD_U_IMM_0_OPCODE_X1 = 25, + LH_OPCODE_Y2 = 2, + LH_UN_0_SHUN_0_OPCODE_X1 = 12, + LH_U_OPCODE_Y2 = 3, + LH_U_UN_0_SHUN_0_OPCODE_X1 = 13, + LNK_SPECIAL_0_OPCODE_X1 = 13, + LWADD_IMM_0_OPCODE_X1 = 26, + LWADD_NA_IMM_0_OPCODE_X1 = 27, + LW_NA_UN_0_SHUN_0_OPCODE_X1 = 24, + LW_OPCODE_Y2 = 4, + LW_UN_0_SHUN_0_OPCODE_X1 = 14, + MAXB_U_SPECIAL_0_OPCODE_X0 = 15, + MAXB_U_SPECIAL_0_OPCODE_X1 = 14, + MAXH_SPECIAL_0_OPCODE_X0 = 16, + MAXH_SPECIAL_0_OPCODE_X1 = 15, + MAXIB_U_IMM_0_OPCODE_X0 = 4, + MAXIB_U_IMM_0_OPCODE_X1 = 5, + MAXIH_IMM_0_OPCODE_X0 = 5, + MAXIH_IMM_0_OPCODE_X1 = 6, + MFSPR_IMM_0_OPCODE_X1 = 7, + MF_UN_0_SHUN_0_OPCODE_X1 = 15, + MINB_U_SPECIAL_0_OPCODE_X0 = 17, + MINB_U_SPECIAL_0_OPCODE_X1 = 16, + MINH_SPECIAL_0_OPCODE_X0 = 18, + MINH_SPECIAL_0_OPCODE_X1 = 17, + MINIB_U_IMM_0_OPCODE_X0 = 6, + MINIB_U_IMM_0_OPCODE_X1 = 8, + MINIH_IMM_0_OPCODE_X0 = 7, + MINIH_IMM_0_OPCODE_X1 = 9, + MM_OPCODE_X0 = 6, + MM_OPCODE_X1 = 7, + MNZB_SPECIAL_0_OPCODE_X0 = 19, + MNZB_SPECIAL_0_OPCODE_X1 = 18, + MNZH_SPECIAL_0_OPCODE_X0 = 20, + MNZH_SPECIAL_0_OPCODE_X1 = 19, + MNZ_SPECIAL_0_OPCODE_X0 = 21, + MNZ_SPECIAL_0_OPCODE_X1 = 20, + MNZ_SPECIAL_1_OPCODE_Y0 = 0, + MNZ_SPECIAL_1_OPCODE_Y1 = 1, + MOVEI_IMM_1_OPCODE_SN = 0, + MOVE_RR_IMM_0_OPCODE_SN = 8, + MTSPR_IMM_0_OPCODE_X1 = 10, + MULHHA_SS_SPECIAL_0_OPCODE_X0 = 22, + MULHHA_SS_SPECIAL_7_OPCODE_Y0 = 0, + MULHHA_SU_SPECIAL_0_OPCODE_X0 = 23, + MULHHA_UU_SPECIAL_0_OPCODE_X0 = 24, + MULHHA_UU_SPECIAL_7_OPCODE_Y0 = 1, + MULHHSA_UU_SPECIAL_0_OPCODE_X0 = 25, + MULHH_SS_SPECIAL_0_OPCODE_X0 = 26, + MULHH_SS_SPECIAL_6_OPCODE_Y0 = 0, + MULHH_SU_SPECIAL_0_OPCODE_X0 = 27, + MULHH_UU_SPECIAL_0_OPCODE_X0 = 28, + MULHH_UU_SPECIAL_6_OPCODE_Y0 = 1, + MULHLA_SS_SPECIAL_0_OPCODE_X0 = 29, + MULHLA_SU_SPECIAL_0_OPCODE_X0 = 30, + MULHLA_US_SPECIAL_0_OPCODE_X0 = 31, + MULHLA_UU_SPECIAL_0_OPCODE_X0 = 32, + MULHLSA_UU_SPECIAL_0_OPCODE_X0 = 33, + MULHLSA_UU_SPECIAL_5_OPCODE_Y0 = 0, + MULHL_SS_SPECIAL_0_OPCODE_X0 = 34, + MULHL_SU_SPECIAL_0_OPCODE_X0 = 35, + MULHL_US_SPECIAL_0_OPCODE_X0 = 36, + MULHL_UU_SPECIAL_0_OPCODE_X0 = 37, + MULLLA_SS_SPECIAL_0_OPCODE_X0 = 38, + MULLLA_SS_SPECIAL_7_OPCODE_Y0 = 2, + MULLLA_SU_SPECIAL_0_OPCODE_X0 = 39, + MULLLA_UU_SPECIAL_0_OPCODE_X0 = 40, + MULLLA_UU_SPECIAL_7_OPCODE_Y0 = 3, + MULLLSA_UU_SPECIAL_0_OPCODE_X0 = 41, + MULLL_SS_SPECIAL_0_OPCODE_X0 = 42, + MULLL_SS_SPECIAL_6_OPCODE_Y0 = 2, + MULLL_SU_SPECIAL_0_OPCODE_X0 = 43, + MULLL_UU_SPECIAL_0_OPCODE_X0 = 44, + MULLL_UU_SPECIAL_6_OPCODE_Y0 = 3, + MVNZ_SPECIAL_0_OPCODE_X0 = 45, + MVNZ_SPECIAL_1_OPCODE_Y0 = 1, + MVZ_SPECIAL_0_OPCODE_X0 = 46, + MVZ_SPECIAL_1_OPCODE_Y0 = 2, + MZB_SPECIAL_0_OPCODE_X0 = 47, + MZB_SPECIAL_0_OPCODE_X1 = 21, + MZH_SPECIAL_0_OPCODE_X0 = 48, + MZH_SPECIAL_0_OPCODE_X1 = 22, + MZ_SPECIAL_0_OPCODE_X0 = 49, + MZ_SPECIAL_0_OPCODE_X1 = 23, + MZ_SPECIAL_1_OPCODE_Y0 = 3, + MZ_SPECIAL_1_OPCODE_Y1 = 2, + NAP_UN_0_SHUN_0_OPCODE_X1 = 16, + NOP_NOREG_RR_IMM_0_OPCODE_SN = 2, + NOP_UN_0_SHUN_0_OPCODE_X0 = 6, + NOP_UN_0_SHUN_0_OPCODE_X1 = 17, + NOP_UN_0_SHUN_0_OPCODE_Y0 = 6, + NOP_UN_0_SHUN_0_OPCODE_Y1 = 3, + NOREG_RR_IMM_0_OPCODE_SN = 0, + NOR_SPECIAL_0_OPCODE_X0 = 50, + NOR_SPECIAL_0_OPCODE_X1 = 24, + NOR_SPECIAL_2_OPCODE_Y0 = 1, + NOR_SPECIAL_2_OPCODE_Y1 = 1, + ORI_IMM_0_OPCODE_X0 = 8, + ORI_IMM_0_OPCODE_X1 = 11, + ORI_OPCODE_Y0 = 11, + ORI_OPCODE_Y1 = 9, + OR_SPECIAL_0_OPCODE_X0 = 51, + OR_SPECIAL_0_OPCODE_X1 = 25, + OR_SPECIAL_2_OPCODE_Y0 = 2, + OR_SPECIAL_2_OPCODE_Y1 = 2, + PACKBS_U_SPECIAL_0_OPCODE_X0 = 103, + PACKBS_U_SPECIAL_0_OPCODE_X1 = 73, + PACKHB_SPECIAL_0_OPCODE_X0 = 52, + PACKHB_SPECIAL_0_OPCODE_X1 = 26, + PACKHS_SPECIAL_0_OPCODE_X0 = 102, + PACKHS_SPECIAL_0_OPCODE_X1 = 72, + PACKLB_SPECIAL_0_OPCODE_X0 = 53, + PACKLB_SPECIAL_0_OPCODE_X1 = 27, + PCNT_UN_0_SHUN_0_OPCODE_X0 = 7, + PCNT_UN_0_SHUN_0_OPCODE_Y0 = 7, + RLI_SHUN_0_OPCODE_X0 = 1, + RLI_SHUN_0_OPCODE_X1 = 1, + RLI_SHUN_0_OPCODE_Y0 = 1, + RLI_SHUN_0_OPCODE_Y1 = 1, + RL_SPECIAL_0_OPCODE_X0 = 54, + RL_SPECIAL_0_OPCODE_X1 = 28, + RL_SPECIAL_3_OPCODE_Y0 = 0, + RL_SPECIAL_3_OPCODE_Y1 = 0, + RR_IMM_0_OPCODE_SN = 0, + S1A_SPECIAL_0_OPCODE_X0 = 55, + S1A_SPECIAL_0_OPCODE_X1 = 29, + S1A_SPECIAL_0_OPCODE_Y0 = 1, + S1A_SPECIAL_0_OPCODE_Y1 = 1, + S2A_SPECIAL_0_OPCODE_X0 = 56, + S2A_SPECIAL_0_OPCODE_X1 = 30, + S2A_SPECIAL_0_OPCODE_Y0 = 2, + S2A_SPECIAL_0_OPCODE_Y1 = 2, + S3A_SPECIAL_0_OPCODE_X0 = 57, + S3A_SPECIAL_0_OPCODE_X1 = 31, + S3A_SPECIAL_5_OPCODE_Y0 = 1, + S3A_SPECIAL_5_OPCODE_Y1 = 1, + SADAB_U_SPECIAL_0_OPCODE_X0 = 58, + SADAH_SPECIAL_0_OPCODE_X0 = 59, + SADAH_U_SPECIAL_0_OPCODE_X0 = 60, + SADB_U_SPECIAL_0_OPCODE_X0 = 61, + SADH_SPECIAL_0_OPCODE_X0 = 62, + SADH_U_SPECIAL_0_OPCODE_X0 = 63, + SBADD_IMM_0_OPCODE_X1 = 28, + SB_OPCODE_Y2 = 5, + SB_SPECIAL_0_OPCODE_X1 = 32, + SEQB_SPECIAL_0_OPCODE_X0 = 64, + SEQB_SPECIAL_0_OPCODE_X1 = 33, + SEQH_SPECIAL_0_OPCODE_X0 = 65, + SEQH_SPECIAL_0_OPCODE_X1 = 34, + SEQIB_IMM_0_OPCODE_X0 = 9, + SEQIB_IMM_0_OPCODE_X1 = 12, + SEQIH_IMM_0_OPCODE_X0 = 10, + SEQIH_IMM_0_OPCODE_X1 = 13, + SEQI_IMM_0_OPCODE_X0 = 11, + SEQI_IMM_0_OPCODE_X1 = 14, + SEQI_OPCODE_Y0 = 12, + SEQI_OPCODE_Y1 = 10, + SEQ_SPECIAL_0_OPCODE_X0 = 66, + SEQ_SPECIAL_0_OPCODE_X1 = 35, + SEQ_SPECIAL_5_OPCODE_Y0 = 2, + SEQ_SPECIAL_5_OPCODE_Y1 = 2, + SHADD_IMM_0_OPCODE_X1 = 29, + SHL8II_IMM_0_OPCODE_SN = 3, + SHLB_SPECIAL_0_OPCODE_X0 = 67, + SHLB_SPECIAL_0_OPCODE_X1 = 36, + SHLH_SPECIAL_0_OPCODE_X0 = 68, + SHLH_SPECIAL_0_OPCODE_X1 = 37, + SHLIB_SHUN_0_OPCODE_X0 = 2, + SHLIB_SHUN_0_OPCODE_X1 = 2, + SHLIH_SHUN_0_OPCODE_X0 = 3, + SHLIH_SHUN_0_OPCODE_X1 = 3, + SHLI_SHUN_0_OPCODE_X0 = 4, + SHLI_SHUN_0_OPCODE_X1 = 4, + SHLI_SHUN_0_OPCODE_Y0 = 2, + SHLI_SHUN_0_OPCODE_Y1 = 2, + SHL_SPECIAL_0_OPCODE_X0 = 69, + SHL_SPECIAL_0_OPCODE_X1 = 38, + SHL_SPECIAL_3_OPCODE_Y0 = 1, + SHL_SPECIAL_3_OPCODE_Y1 = 1, + SHR1_RR_IMM_0_OPCODE_SN = 9, + SHRB_SPECIAL_0_OPCODE_X0 = 70, + SHRB_SPECIAL_0_OPCODE_X1 = 39, + SHRH_SPECIAL_0_OPCODE_X0 = 71, + SHRH_SPECIAL_0_OPCODE_X1 = 40, + SHRIB_SHUN_0_OPCODE_X0 = 5, + SHRIB_SHUN_0_OPCODE_X1 = 5, + SHRIH_SHUN_0_OPCODE_X0 = 6, + SHRIH_SHUN_0_OPCODE_X1 = 6, + SHRI_SHUN_0_OPCODE_X0 = 7, + SHRI_SHUN_0_OPCODE_X1 = 7, + SHRI_SHUN_0_OPCODE_Y0 = 3, + SHRI_SHUN_0_OPCODE_Y1 = 3, + SHR_SPECIAL_0_OPCODE_X0 = 72, + SHR_SPECIAL_0_OPCODE_X1 = 41, + SHR_SPECIAL_3_OPCODE_Y0 = 2, + SHR_SPECIAL_3_OPCODE_Y1 = 2, + SHUN_0_OPCODE_X0 = 7, + SHUN_0_OPCODE_X1 = 8, + SHUN_0_OPCODE_Y0 = 13, + SHUN_0_OPCODE_Y1 = 11, + SH_OPCODE_Y2 = 6, + SH_SPECIAL_0_OPCODE_X1 = 42, + SLTB_SPECIAL_0_OPCODE_X0 = 73, + SLTB_SPECIAL_0_OPCODE_X1 = 43, + SLTB_U_SPECIAL_0_OPCODE_X0 = 74, + SLTB_U_SPECIAL_0_OPCODE_X1 = 44, + SLTEB_SPECIAL_0_OPCODE_X0 = 75, + SLTEB_SPECIAL_0_OPCODE_X1 = 45, + SLTEB_U_SPECIAL_0_OPCODE_X0 = 76, + SLTEB_U_SPECIAL_0_OPCODE_X1 = 46, + SLTEH_SPECIAL_0_OPCODE_X0 = 77, + SLTEH_SPECIAL_0_OPCODE_X1 = 47, + SLTEH_U_SPECIAL_0_OPCODE_X0 = 78, + SLTEH_U_SPECIAL_0_OPCODE_X1 = 48, + SLTE_SPECIAL_0_OPCODE_X0 = 79, + SLTE_SPECIAL_0_OPCODE_X1 = 49, + SLTE_SPECIAL_4_OPCODE_Y0 = 0, + SLTE_SPECIAL_4_OPCODE_Y1 = 0, + SLTE_U_SPECIAL_0_OPCODE_X0 = 80, + SLTE_U_SPECIAL_0_OPCODE_X1 = 50, + SLTE_U_SPECIAL_4_OPCODE_Y0 = 1, + SLTE_U_SPECIAL_4_OPCODE_Y1 = 1, + SLTH_SPECIAL_0_OPCODE_X0 = 81, + SLTH_SPECIAL_0_OPCODE_X1 = 51, + SLTH_U_SPECIAL_0_OPCODE_X0 = 82, + SLTH_U_SPECIAL_0_OPCODE_X1 = 52, + SLTIB_IMM_0_OPCODE_X0 = 12, + SLTIB_IMM_0_OPCODE_X1 = 15, + SLTIB_U_IMM_0_OPCODE_X0 = 13, + SLTIB_U_IMM_0_OPCODE_X1 = 16, + SLTIH_IMM_0_OPCODE_X0 = 14, + SLTIH_IMM_0_OPCODE_X1 = 17, + SLTIH_U_IMM_0_OPCODE_X0 = 15, + SLTIH_U_IMM_0_OPCODE_X1 = 18, + SLTI_IMM_0_OPCODE_X0 = 16, + SLTI_IMM_0_OPCODE_X1 = 19, + SLTI_OPCODE_Y0 = 14, + SLTI_OPCODE_Y1 = 12, + SLTI_U_IMM_0_OPCODE_X0 = 17, + SLTI_U_IMM_0_OPCODE_X1 = 20, + SLTI_U_OPCODE_Y0 = 15, + SLTI_U_OPCODE_Y1 = 13, + SLT_SPECIAL_0_OPCODE_X0 = 83, + SLT_SPECIAL_0_OPCODE_X1 = 53, + SLT_SPECIAL_4_OPCODE_Y0 = 2, + SLT_SPECIAL_4_OPCODE_Y1 = 2, + SLT_U_SPECIAL_0_OPCODE_X0 = 84, + SLT_U_SPECIAL_0_OPCODE_X1 = 54, + SLT_U_SPECIAL_4_OPCODE_Y0 = 3, + SLT_U_SPECIAL_4_OPCODE_Y1 = 3, + SNEB_SPECIAL_0_OPCODE_X0 = 85, + SNEB_SPECIAL_0_OPCODE_X1 = 55, + SNEH_SPECIAL_0_OPCODE_X0 = 86, + SNEH_SPECIAL_0_OPCODE_X1 = 56, + SNE_SPECIAL_0_OPCODE_X0 = 87, + SNE_SPECIAL_0_OPCODE_X1 = 57, + SNE_SPECIAL_5_OPCODE_Y0 = 3, + SNE_SPECIAL_5_OPCODE_Y1 = 3, + SPECIAL_0_OPCODE_X0 = 0, + SPECIAL_0_OPCODE_X1 = 1, + SPECIAL_0_OPCODE_Y0 = 1, + SPECIAL_0_OPCODE_Y1 = 1, + SPECIAL_1_OPCODE_Y0 = 2, + SPECIAL_1_OPCODE_Y1 = 2, + SPECIAL_2_OPCODE_Y0 = 3, + SPECIAL_2_OPCODE_Y1 = 3, + SPECIAL_3_OPCODE_Y0 = 4, + SPECIAL_3_OPCODE_Y1 = 4, + SPECIAL_4_OPCODE_Y0 = 5, + SPECIAL_4_OPCODE_Y1 = 5, + SPECIAL_5_OPCODE_Y0 = 6, + SPECIAL_5_OPCODE_Y1 = 6, + SPECIAL_6_OPCODE_Y0 = 7, + SPECIAL_7_OPCODE_Y0 = 8, + SRAB_SPECIAL_0_OPCODE_X0 = 88, + SRAB_SPECIAL_0_OPCODE_X1 = 58, + SRAH_SPECIAL_0_OPCODE_X0 = 89, + SRAH_SPECIAL_0_OPCODE_X1 = 59, + SRAIB_SHUN_0_OPCODE_X0 = 8, + SRAIB_SHUN_0_OPCODE_X1 = 8, + SRAIH_SHUN_0_OPCODE_X0 = 9, + SRAIH_SHUN_0_OPCODE_X1 = 9, + SRAI_SHUN_0_OPCODE_X0 = 10, + SRAI_SHUN_0_OPCODE_X1 = 10, + SRAI_SHUN_0_OPCODE_Y0 = 4, + SRAI_SHUN_0_OPCODE_Y1 = 4, + SRA_SPECIAL_0_OPCODE_X0 = 90, + SRA_SPECIAL_0_OPCODE_X1 = 60, + SRA_SPECIAL_3_OPCODE_Y0 = 3, + SRA_SPECIAL_3_OPCODE_Y1 = 3, + SUBBS_U_SPECIAL_0_OPCODE_X0 = 100, + SUBBS_U_SPECIAL_0_OPCODE_X1 = 70, + SUBB_SPECIAL_0_OPCODE_X0 = 91, + SUBB_SPECIAL_0_OPCODE_X1 = 61, + SUBHS_SPECIAL_0_OPCODE_X0 = 101, + SUBHS_SPECIAL_0_OPCODE_X1 = 71, + SUBH_SPECIAL_0_OPCODE_X0 = 92, + SUBH_SPECIAL_0_OPCODE_X1 = 62, + SUBS_SPECIAL_0_OPCODE_X0 = 97, + SUBS_SPECIAL_0_OPCODE_X1 = 67, + SUB_SPECIAL_0_OPCODE_X0 = 93, + SUB_SPECIAL_0_OPCODE_X1 = 63, + SUB_SPECIAL_0_OPCODE_Y0 = 3, + SUB_SPECIAL_0_OPCODE_Y1 = 3, + SWADD_IMM_0_OPCODE_X1 = 30, + SWINT0_UN_0_SHUN_0_OPCODE_X1 = 18, + SWINT1_UN_0_SHUN_0_OPCODE_X1 = 19, + SWINT2_UN_0_SHUN_0_OPCODE_X1 = 20, + SWINT3_UN_0_SHUN_0_OPCODE_X1 = 21, + SW_OPCODE_Y2 = 7, + SW_SPECIAL_0_OPCODE_X1 = 64, + TBLIDXB0_UN_0_SHUN_0_OPCODE_X0 = 8, + TBLIDXB0_UN_0_SHUN_0_OPCODE_Y0 = 8, + TBLIDXB1_UN_0_SHUN_0_OPCODE_X0 = 9, + TBLIDXB1_UN_0_SHUN_0_OPCODE_Y0 = 9, + TBLIDXB2_UN_0_SHUN_0_OPCODE_X0 = 10, + TBLIDXB2_UN_0_SHUN_0_OPCODE_Y0 = 10, + TBLIDXB3_UN_0_SHUN_0_OPCODE_X0 = 11, + TBLIDXB3_UN_0_SHUN_0_OPCODE_Y0 = 11, + TNS_UN_0_SHUN_0_OPCODE_X1 = 22, + UN_0_SHUN_0_OPCODE_X0 = 11, + UN_0_SHUN_0_OPCODE_X1 = 11, + UN_0_SHUN_0_OPCODE_Y0 = 5, + UN_0_SHUN_0_OPCODE_Y1 = 5, + WH64_UN_0_SHUN_0_OPCODE_X1 = 23, + XORI_IMM_0_OPCODE_X0 = 2, + XORI_IMM_0_OPCODE_X1 = 21, + XOR_SPECIAL_0_OPCODE_X0 = 94, + XOR_SPECIAL_0_OPCODE_X1 = 65, + XOR_SPECIAL_2_OPCODE_Y0 = 3, + XOR_SPECIAL_2_OPCODE_Y1 = 3 +}; + + +#endif /* __ASSEMBLER__ */ + +#endif /* __ARCH_OPCODE_H__ */ diff --git a/arch/tile/include/arch/sim.h b/arch/tile/include/arch/sim.h new file mode 100644 index 00000000..e54b7b05 --- /dev/null +++ b/arch/tile/include/arch/sim.h @@ -0,0 +1,643 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/** + * @file + * + * Provides an API for controlling the simulator at runtime. + */ + +/** + * @addtogroup arch_sim + * @{ + * + * An API for controlling the simulator at runtime. + * + * The simulator's behavior can be modified while it is running. + * For example, human-readable trace output can be enabled and disabled + * around code of interest. + * + * There are two ways to modify simulator behavior: + * programmatically, by calling various sim_* functions, and + * interactively, by entering commands like "sim set functional true" + * at the tile-monitor prompt. Typing "sim help" at that prompt provides + * a list of interactive commands. + * + * All interactive commands can also be executed programmatically by + * passing a string to the sim_command function. + */ + +#ifndef __ARCH_SIM_H__ +#define __ARCH_SIM_H__ + +#include <arch/sim_def.h> +#include <arch/abi.h> + +#ifndef __ASSEMBLER__ + +#include <arch/spr_def.h> + + +/** + * Return true if the current program is running under a simulator, + * rather than on real hardware. If running on hardware, other "sim_xxx()" + * calls have no useful effect. + */ +static inline int +sim_is_simulator(void) +{ + return __insn_mfspr(SPR_SIM_CONTROL) != 0; +} + + +/** + * Checkpoint the simulator state to a checkpoint file. + * + * The checkpoint file name is either the default or the name specified + * on the command line with "--checkpoint-file". + */ +static __inline void +sim_checkpoint(void) +{ + __insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_CHECKPOINT); +} + + +/** + * Report whether or not various kinds of simulator tracing are enabled. + * + * @return The bitwise OR of these values: + * + * SIM_TRACE_CYCLES (--trace-cycles), + * SIM_TRACE_ROUTER (--trace-router), + * SIM_TRACE_REGISTER_WRITES (--trace-register-writes), + * SIM_TRACE_DISASM (--trace-disasm), + * SIM_TRACE_STALL_INFO (--trace-stall-info) + * SIM_TRACE_MEMORY_CONTROLLER (--trace-memory-controller) + * SIM_TRACE_L2_CACHE (--trace-l2) + * SIM_TRACE_LINES (--trace-lines) + */ +static __inline unsigned int +sim_get_tracing(void) +{ + return __insn_mfspr(SPR_SIM_CONTROL) & SIM_TRACE_FLAG_MASK; +} + + +/** + * Turn on or off different kinds of simulator tracing. + * + * @param mask Either one of these special values: + * + * SIM_TRACE_NONE (turns off tracing), + * SIM_TRACE_ALL (turns on all possible tracing). + * + * or the bitwise OR of these values: + * + * SIM_TRACE_CYCLES (--trace-cycles), + * SIM_TRACE_ROUTER (--trace-router), + * SIM_TRACE_REGISTER_WRITES (--trace-register-writes), + * SIM_TRACE_DISASM (--trace-disasm), + * SIM_TRACE_STALL_INFO (--trace-stall-info) + * SIM_TRACE_MEMORY_CONTROLLER (--trace-memory-controller) + * SIM_TRACE_L2_CACHE (--trace-l2) + * SIM_TRACE_LINES (--trace-lines) + */ +static __inline void +sim_set_tracing(unsigned int mask) +{ + __insn_mtspr(SPR_SIM_CONTROL, SIM_TRACE_SPR_ARG(mask)); +} + + +/** + * Request dumping of different kinds of simulator state. + * + * @param mask Either this special value: + * + * SIM_DUMP_ALL (dump all known state) + * + * or the bitwise OR of these values: + * + * SIM_DUMP_REGS (the register file), + * SIM_DUMP_SPRS (the SPRs), + * SIM_DUMP_ITLB (the iTLB), + * SIM_DUMP_DTLB (the dTLB), + * SIM_DUMP_L1I (the L1 I-cache), + * SIM_DUMP_L1D (the L1 D-cache), + * SIM_DUMP_L2 (the L2 cache), + * SIM_DUMP_SNREGS (the switch register file), + * SIM_DUMP_SNITLB (the switch iTLB), + * SIM_DUMP_SNL1I (the switch L1 I-cache), + * SIM_DUMP_BACKTRACE (the current backtrace) + */ +static __inline void +sim_dump(unsigned int mask) +{ + __insn_mtspr(SPR_SIM_CONTROL, SIM_DUMP_SPR_ARG(mask)); +} + + +/** + * Print a string to the simulator stdout. + * + * @param str The string to be written. + */ +static __inline void +sim_print(const char* str) +{ + for ( ; *str != '\0'; str++) + { + __insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_PUTC | + (*str << _SIM_CONTROL_OPERATOR_BITS)); + } + __insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_PUTC | + (SIM_PUTC_FLUSH_BINARY << _SIM_CONTROL_OPERATOR_BITS)); +} + + +/** + * Print a string to the simulator stdout. + * + * @param str The string to be written (a newline is automatically added). + */ +static __inline void +sim_print_string(const char* str) +{ + for ( ; *str != '\0'; str++) + { + __insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_PUTC | + (*str << _SIM_CONTROL_OPERATOR_BITS)); + } + __insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_PUTC | + (SIM_PUTC_FLUSH_STRING << _SIM_CONTROL_OPERATOR_BITS)); +} + + +/** + * Execute a simulator command string. + * + * Type 'sim help' at the tile-monitor prompt to learn what commands + * are available. Note the use of the tile-monitor "sim" command to + * pass commands to the simulator. + * + * The argument to sim_command() does not include the leading "sim" + * prefix used at the tile-monitor prompt; for example, you might call + * sim_command("trace disasm"). + */ +static __inline void +sim_command(const char* str) +{ + int c; + do + { + c = *str++; + __insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_COMMAND | + (c << _SIM_CONTROL_OPERATOR_BITS)); + } + while (c); +} + + + +#ifndef __DOXYGEN__ + +/** + * The underlying implementation of "_sim_syscall()". + * + * We use extra "and" instructions to ensure that all the values + * we are passing to the simulator are actually valid in the registers + * (i.e. returned from memory) prior to the SIM_CONTROL spr. + */ +static __inline long _sim_syscall0(int val) +{ + long result; + __asm__ __volatile__ ("mtspr SIM_CONTROL, r0" + : "=R00" (result) : "R00" (val)); + return result; +} + +static __inline long _sim_syscall1(int val, long arg1) +{ + long result; + __asm__ __volatile__ ("{ and zero, r1, r1; mtspr SIM_CONTROL, r0 }" + : "=R00" (result) : "R00" (val), "R01" (arg1)); + return result; +} + +static __inline long _sim_syscall2(int val, long arg1, long arg2) +{ + long result; + __asm__ __volatile__ ("{ and zero, r1, r2; mtspr SIM_CONTROL, r0 }" + : "=R00" (result) + : "R00" (val), "R01" (arg1), "R02" (arg2)); + return result; +} + +/* Note that _sim_syscall3() and higher are technically at risk of + receiving an interrupt right before the mtspr bundle, in which case + the register values for arguments 3 and up may still be in flight + to the core from a stack frame reload. */ + +static __inline long _sim_syscall3(int val, long arg1, long arg2, long arg3) +{ + long result; + __asm__ __volatile__ ("{ and zero, r3, r3 };" + "{ and zero, r1, r2; mtspr SIM_CONTROL, r0 }" + : "=R00" (result) + : "R00" (val), "R01" (arg1), "R02" (arg2), + "R03" (arg3)); + return result; +} + +static __inline long _sim_syscall4(int val, long arg1, long arg2, long arg3, + long arg4) +{ + long result; + __asm__ __volatile__ ("{ and zero, r3, r4 };" + "{ and zero, r1, r2; mtspr SIM_CONTROL, r0 }" + : "=R00" (result) + : "R00" (val), "R01" (arg1), "R02" (arg2), + "R03" (arg3), "R04" (arg4)); + return result; +} + +static __inline long _sim_syscall5(int val, long arg1, long arg2, long arg3, + long arg4, long arg5) +{ + long result; + __asm__ __volatile__ ("{ and zero, r3, r4; and zero, r5, r5 };" + "{ and zero, r1, r2; mtspr SIM_CONTROL, r0 }" + : "=R00" (result) + : "R00" (val), "R01" (arg1), "R02" (arg2), + "R03" (arg3), "R04" (arg4), "R05" (arg5)); + return result; +} + +/** + * Make a special syscall to the simulator itself, if running under + * simulation. This is used as the implementation of other functions + * and should not be used outside this file. + * + * @param syscall_num The simulator syscall number. + * @param nr The number of additional arguments provided. + * + * @return Varies by syscall. + */ +#define _sim_syscall(syscall_num, nr, args...) \ + _sim_syscall##nr( \ + ((syscall_num) << _SIM_CONTROL_OPERATOR_BITS) | SIM_CONTROL_SYSCALL, \ + ##args) + + +/* Values for the "access_mask" parameters below. */ +#define SIM_WATCHPOINT_READ 1 +#define SIM_WATCHPOINT_WRITE 2 +#define SIM_WATCHPOINT_EXECUTE 4 + + +static __inline int +sim_add_watchpoint(unsigned int process_id, + unsigned long address, + unsigned long size, + unsigned int access_mask, + unsigned long user_data) +{ + return _sim_syscall(SIM_SYSCALL_ADD_WATCHPOINT, 5, process_id, + address, size, access_mask, user_data); +} + + +static __inline int +sim_remove_watchpoint(unsigned int process_id, + unsigned long address, + unsigned long size, + unsigned int access_mask, + unsigned long user_data) +{ + return _sim_syscall(SIM_SYSCALL_REMOVE_WATCHPOINT, 5, process_id, + address, size, access_mask, user_data); +} + + +/** + * Return value from sim_query_watchpoint. + */ +struct SimQueryWatchpointStatus +{ + /** + * 0 if a watchpoint fired, 1 if no watchpoint fired, or -1 for + * error (meaning a bad process_id). + */ + int syscall_status; + + /** + * The address of the watchpoint that fired (this is the address + * passed to sim_add_watchpoint, not an address within that range + * that actually triggered the watchpoint). + */ + unsigned long address; + + /** The arbitrary user_data installed by sim_add_watchpoint. */ + unsigned long user_data; +}; + + +static __inline struct SimQueryWatchpointStatus +sim_query_watchpoint(unsigned int process_id) +{ + struct SimQueryWatchpointStatus status; + long val = SIM_CONTROL_SYSCALL | + (SIM_SYSCALL_QUERY_WATCHPOINT << _SIM_CONTROL_OPERATOR_BITS); + __asm__ __volatile__ ("{ and zero, r1, r1; mtspr SIM_CONTROL, r0 }" + : "=R00" (status.syscall_status), + "=R01" (status.address), + "=R02" (status.user_data) + : "R00" (val), "R01" (process_id)); + return status; +} + + +/* On the simulator, confirm lines have been evicted everywhere. */ +static __inline void +sim_validate_lines_evicted(unsigned long long pa, unsigned long length) +{ +#ifdef __LP64__ + _sim_syscall(SIM_SYSCALL_VALIDATE_LINES_EVICTED, 2, pa, length); +#else + _sim_syscall(SIM_SYSCALL_VALIDATE_LINES_EVICTED, 4, + 0 /* dummy */, (long)(pa), (long)(pa >> 32), length); +#endif +} + + +/* Return the current CPU speed in cycles per second. */ +static __inline long +sim_query_cpu_speed(void) +{ + return _sim_syscall(SIM_SYSCALL_QUERY_CPU_SPEED, 0); +} + +#endif /* !__DOXYGEN__ */ + + + + +/** + * Modify the shaping parameters of a shim. + * + * @param shim The shim to modify. One of: + * SIM_CONTROL_SHAPING_GBE_0 + * SIM_CONTROL_SHAPING_GBE_1 + * SIM_CONTROL_SHAPING_GBE_2 + * SIM_CONTROL_SHAPING_GBE_3 + * SIM_CONTROL_SHAPING_XGBE_0 + * SIM_CONTROL_SHAPING_XGBE_1 + * + * @param type The type of shaping. This should be the same type of + * shaping that is already in place on the shim. One of: + * SIM_CONTROL_SHAPING_MULTIPLIER + * SIM_CONTROL_SHAPING_PPS + * SIM_CONTROL_SHAPING_BPS + * + * @param units The magnitude of the rate. One of: + * SIM_CONTROL_SHAPING_UNITS_SINGLE + * SIM_CONTROL_SHAPING_UNITS_KILO + * SIM_CONTROL_SHAPING_UNITS_MEGA + * SIM_CONTROL_SHAPING_UNITS_GIGA + * + * @param rate The rate to which to change it. This must fit in + * SIM_CONTROL_SHAPING_RATE_BITS bits or a warning is issued and + * the shaping is not changed. + * + * @return 0 if no problems were detected in the arguments to sim_set_shaping + * or 1 if problems were detected (for example, rate does not fit in 17 bits). + */ +static __inline int +sim_set_shaping(unsigned shim, + unsigned type, + unsigned units, + unsigned rate) +{ + if ((rate & ~((1 << SIM_CONTROL_SHAPING_RATE_BITS) - 1)) != 0) + return 1; + + __insn_mtspr(SPR_SIM_CONTROL, SIM_SHAPING_SPR_ARG(shim, type, units, rate)); + return 0; +} + +#ifdef __tilegx__ + +/** Enable a set of mPIPE links. Pass a -1 link_mask to enable all links. */ +static __inline void +sim_enable_mpipe_links(unsigned mpipe, unsigned long link_mask) +{ + __insn_mtspr(SPR_SIM_CONTROL, + (SIM_CONTROL_ENABLE_MPIPE_LINK_MAGIC_BYTE | + (mpipe << 8) | (1 << 16) | ((uint_reg_t)link_mask << 32))); +} + +/** Disable a set of mPIPE links. Pass a -1 link_mask to disable all links. */ +static __inline void +sim_disable_mpipe_links(unsigned mpipe, unsigned long link_mask) +{ + __insn_mtspr(SPR_SIM_CONTROL, + (SIM_CONTROL_ENABLE_MPIPE_LINK_MAGIC_BYTE | + (mpipe << 8) | (0 << 16) | ((uint_reg_t)link_mask << 32))); +} + +#endif /* __tilegx__ */ + + +/* + * An API for changing "functional" mode. + */ + +#ifndef __DOXYGEN__ + +#define sim_enable_functional() \ + __insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_ENABLE_FUNCTIONAL) + +#define sim_disable_functional() \ + __insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_DISABLE_FUNCTIONAL) + +#endif /* __DOXYGEN__ */ + + +/* + * Profiler support. + */ + +/** + * Turn profiling on for the current task. + * + * Note that this has no effect if run in an environment without + * profiling support (thus, the proper flags to the simulator must + * be supplied). + */ +static __inline void +sim_profiler_enable(void) +{ + __insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_PROFILER_ENABLE); +} + + +/** Turn profiling off for the current task. */ +static __inline void +sim_profiler_disable(void) +{ + __insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_PROFILER_DISABLE); +} + + +/** + * Turn profiling on or off for the current task. + * + * @param enabled If true, turns on profiling. If false, turns it off. + * + * Note that this has no effect if run in an environment without + * profiling support (thus, the proper flags to the simulator must + * be supplied). + */ +static __inline void +sim_profiler_set_enabled(int enabled) +{ + int val = + enabled ? SIM_CONTROL_PROFILER_ENABLE : SIM_CONTROL_PROFILER_DISABLE; + __insn_mtspr(SPR_SIM_CONTROL, val); +} + + +/** + * Return true if and only if profiling is currently enabled + * for the current task. + * + * This returns false even if sim_profiler_enable() was called + * if the current execution environment does not support profiling. + */ +static __inline int +sim_profiler_is_enabled(void) +{ + return ((__insn_mfspr(SPR_SIM_CONTROL) & SIM_PROFILER_ENABLED_MASK) != 0); +} + + +/** + * Reset profiling counters to zero for the current task. + * + * Resetting can be done while profiling is enabled. It does not affect + * the chip-wide profiling counters. + */ +static __inline void +sim_profiler_clear(void) +{ + __insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_PROFILER_CLEAR); +} + + +/** + * Enable specified chip-level profiling counters. + * + * Does not affect the per-task profiling counters. + * + * @param mask Either this special value: + * + * SIM_CHIP_ALL (enables all chip-level components). + * + * or the bitwise OR of these values: + * + * SIM_CHIP_MEMCTL (enable all memory controllers) + * SIM_CHIP_XAUI (enable all XAUI controllers) + * SIM_CHIP_MPIPE (enable all MPIPE controllers) + */ +static __inline void +sim_profiler_chip_enable(unsigned int mask) +{ + __insn_mtspr(SPR_SIM_CONTROL, SIM_PROFILER_CHIP_ENABLE_SPR_ARG(mask)); +} + + +/** + * Disable specified chip-level profiling counters. + * + * Does not affect the per-task profiling counters. + * + * @param mask Either this special value: + * + * SIM_CHIP_ALL (disables all chip-level components). + * + * or the bitwise OR of these values: + * + * SIM_CHIP_MEMCTL (disable all memory controllers) + * SIM_CHIP_XAUI (disable all XAUI controllers) + * SIM_CHIP_MPIPE (disable all MPIPE controllers) + */ +static __inline void +sim_profiler_chip_disable(unsigned int mask) +{ + __insn_mtspr(SPR_SIM_CONTROL, SIM_PROFILER_CHIP_DISABLE_SPR_ARG(mask)); +} + + +/** + * Reset specified chip-level profiling counters to zero. + * + * Does not affect the per-task profiling counters. + * + * @param mask Either this special value: + * + * SIM_CHIP_ALL (clears all chip-level components). + * + * or the bitwise OR of these values: + * + * SIM_CHIP_MEMCTL (clear all memory controllers) + * SIM_CHIP_XAUI (clear all XAUI controllers) + * SIM_CHIP_MPIPE (clear all MPIPE controllers) + */ +static __inline void +sim_profiler_chip_clear(unsigned int mask) +{ + __insn_mtspr(SPR_SIM_CONTROL, SIM_PROFILER_CHIP_CLEAR_SPR_ARG(mask)); +} + + +/* + * Event support. + */ + +#ifndef __DOXYGEN__ + +static __inline void +sim_event_begin(unsigned int x) +{ +#if defined(__tile__) && !defined(__NO_EVENT_SPR__) + __insn_mtspr(SPR_EVENT_BEGIN, x); +#endif +} + +static __inline void +sim_event_end(unsigned int x) +{ +#if defined(__tile__) && !defined(__NO_EVENT_SPR__) + __insn_mtspr(SPR_EVENT_END, x); +#endif +} + +#endif /* !__DOXYGEN__ */ + +#endif /* !__ASSEMBLER__ */ + +#endif /* !__ARCH_SIM_H__ */ + +/** @} */ diff --git a/arch/tile/include/arch/sim_def.h b/arch/tile/include/arch/sim_def.h new file mode 100644 index 00000000..4b44a2b6 --- /dev/null +++ b/arch/tile/include/arch/sim_def.h @@ -0,0 +1,505 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/** + * @file + * + * Some low-level simulator definitions. + */ + +#ifndef __ARCH_SIM_DEF_H__ +#define __ARCH_SIM_DEF_H__ + + +/** + * Internal: the low bits of the SIM_CONTROL_* SPR values specify + * the operation to perform, and the remaining bits are + * an operation-specific parameter (often unused). + */ +#define _SIM_CONTROL_OPERATOR_BITS 8 + + +/* + * Values which can be written to SPR_SIM_CONTROL. + */ + +/** If written to SPR_SIM_CONTROL, stops profiling. */ +#define SIM_CONTROL_PROFILER_DISABLE 0 + +/** If written to SPR_SIM_CONTROL, starts profiling. */ +#define SIM_CONTROL_PROFILER_ENABLE 1 + +/** If written to SPR_SIM_CONTROL, clears profiling counters. */ +#define SIM_CONTROL_PROFILER_CLEAR 2 + +/** If written to SPR_SIM_CONTROL, checkpoints the simulator. */ +#define SIM_CONTROL_CHECKPOINT 3 + +/** + * If written to SPR_SIM_CONTROL, combined with a mask (shifted by 8), + * sets the tracing mask to the given mask. See "sim_set_tracing()". + */ +#define SIM_CONTROL_SET_TRACING 4 + +/** + * If written to SPR_SIM_CONTROL, combined with a mask (shifted by 8), + * dumps the requested items of machine state to the log. + */ +#define SIM_CONTROL_DUMP 5 + +/** If written to SPR_SIM_CONTROL, clears chip-level profiling counters. */ +#define SIM_CONTROL_PROFILER_CHIP_CLEAR 6 + +/** If written to SPR_SIM_CONTROL, disables chip-level profiling. */ +#define SIM_CONTROL_PROFILER_CHIP_DISABLE 7 + +/** If written to SPR_SIM_CONTROL, enables chip-level profiling. */ +#define SIM_CONTROL_PROFILER_CHIP_ENABLE 8 + +/** If written to SPR_SIM_CONTROL, enables chip-level functional mode */ +#define SIM_CONTROL_ENABLE_FUNCTIONAL 9 + +/** If written to SPR_SIM_CONTROL, disables chip-level functional mode. */ +#define SIM_CONTROL_DISABLE_FUNCTIONAL 10 + +/** + * If written to SPR_SIM_CONTROL, enables chip-level functional mode. + * All tiles must perform this write for functional mode to be enabled. + * Ignored in naked boot mode unless --functional is specified. + * WARNING: Only the hypervisor startup code should use this! + */ +#define SIM_CONTROL_ENABLE_FUNCTIONAL_BARRIER 11 + +/** + * If written to SPR_SIM_CONTROL, combined with a character (shifted by 8), + * writes a string directly to the simulator output. Written to once for + * each character in the string, plus a final NUL. Instead of NUL, + * you can also use "SIM_PUTC_FLUSH_STRING" or "SIM_PUTC_FLUSH_BINARY". + */ +/* ISSUE: Document the meaning of "newline", and the handling of NUL. */ +#define SIM_CONTROL_PUTC 12 + +/** + * If written to SPR_SIM_CONTROL, clears the --grind-coherence state for + * this core. This is intended to be used before a loop that will + * invalidate the cache by loading new data and evicting all current data. + * Generally speaking, this API should only be used by system code. + */ +#define SIM_CONTROL_GRINDER_CLEAR 13 + +/** If written to SPR_SIM_CONTROL, shuts down the simulator. */ +#define SIM_CONTROL_SHUTDOWN 14 + +/** + * If written to SPR_SIM_CONTROL, combined with a pid (shifted by 8), + * indicates that a fork syscall just created the given process. + */ +#define SIM_CONTROL_OS_FORK 15 + +/** + * If written to SPR_SIM_CONTROL, combined with a pid (shifted by 8), + * indicates that an exit syscall was just executed by the given process. + */ +#define SIM_CONTROL_OS_EXIT 16 + +/** + * If written to SPR_SIM_CONTROL, combined with a pid (shifted by 8), + * indicates that the OS just switched to the given process. + */ +#define SIM_CONTROL_OS_SWITCH 17 + +/** + * If written to SPR_SIM_CONTROL, combined with a character (shifted by 8), + * indicates that an exec syscall was just executed. Written to once for + * each character in the executable name, plus a final NUL. + */ +#define SIM_CONTROL_OS_EXEC 18 + +/** + * If written to SPR_SIM_CONTROL, combined with a character (shifted by 8), + * indicates that an interpreter (PT_INTERP) was loaded. Written to once + * for each character in "ADDR:PATH", plus a final NUL, where "ADDR" is a + * hex load address starting with "0x", and "PATH" is the executable name. + */ +#define SIM_CONTROL_OS_INTERP 19 + +/** + * If written to SPR_SIM_CONTROL, combined with a character (shifted by 8), + * indicates that a dll was loaded. Written to once for each character + * in "ADDR:PATH", plus a final NUL, where "ADDR" is a hexadecimal load + * address starting with "0x", and "PATH" is the executable name. + */ +#define SIM_CONTROL_DLOPEN 20 + +/** + * If written to SPR_SIM_CONTROL, combined with a character (shifted by 8), + * indicates that a dll was unloaded. Written to once for each character + * in "ADDR", plus a final NUL, where "ADDR" is a hexadecimal load + * address starting with "0x". + */ +#define SIM_CONTROL_DLCLOSE 21 + +/** + * If written to SPR_SIM_CONTROL, combined with a flag (shifted by 8), + * indicates whether to allow data reads to remotely-cached + * dirty cache lines to be cached locally without grinder warnings or + * assertions (used by Linux kernel fast memcpy). + */ +#define SIM_CONTROL_ALLOW_MULTIPLE_CACHING 22 + +/** If written to SPR_SIM_CONTROL, enables memory tracing. */ +#define SIM_CONTROL_ENABLE_MEM_LOGGING 23 + +/** If written to SPR_SIM_CONTROL, disables memory tracing. */ +#define SIM_CONTROL_DISABLE_MEM_LOGGING 24 + +/** + * If written to SPR_SIM_CONTROL, changes the shaping parameters of one of + * the gbe or xgbe shims. Must specify the shim id, the type, the units, and + * the rate, as defined in SIM_SHAPING_SPR_ARG. + */ +#define SIM_CONTROL_SHAPING 25 + +/** + * If written to SPR_SIM_CONTROL, combined with character (shifted by 8), + * requests that a simulator command be executed. Written to once for each + * character in the command, plus a final NUL. + */ +#define SIM_CONTROL_COMMAND 26 + +/** + * If written to SPR_SIM_CONTROL, indicates that the simulated system + * is panicking, to allow debugging via --debug-on-panic. + */ +#define SIM_CONTROL_PANIC 27 + +/** + * If written to SPR_SIM_CONTROL, triggers a simulator syscall. + * See "sim_syscall()" for more info. + */ +#define SIM_CONTROL_SYSCALL 32 + +/** + * If written to SPR_SIM_CONTROL, combined with a pid (shifted by 8), + * provides the pid that subsequent SIM_CONTROL_OS_FORK writes should + * use as the pid, rather than the default previous SIM_CONTROL_OS_SWITCH. + */ +#define SIM_CONTROL_OS_FORK_PARENT 33 + +/** + * If written to SPR_SIM_CONTROL, combined with a mPIPE shim number + * (shifted by 8), clears the pending magic data section. The cleared + * pending magic data section and any subsequently appended magic bytes + * will only take effect when the classifier blast programmer is run. + */ +#define SIM_CONTROL_CLEAR_MPIPE_MAGIC_BYTES 34 + +/** + * If written to SPR_SIM_CONTROL, combined with a mPIPE shim number + * (shifted by 8) and a byte of data (shifted by 16), appends that byte + * to the shim's pending magic data section. The pending magic data + * section takes effect when the classifier blast programmer is run. + */ +#define SIM_CONTROL_APPEND_MPIPE_MAGIC_BYTE 35 + +/** + * If written to SPR_SIM_CONTROL, combined with a mPIPE shim number + * (shifted by 8), an enable=1/disable=0 bit (shifted by 16), and a + * mask of links (shifted by 32), enable or disable the corresponding + * mPIPE links. + */ +#define SIM_CONTROL_ENABLE_MPIPE_LINK_MAGIC_BYTE 36 + + +/* + * Syscall numbers for use with "sim_syscall()". + */ + +/** Syscall number for sim_add_watchpoint(). */ +#define SIM_SYSCALL_ADD_WATCHPOINT 2 + +/** Syscall number for sim_remove_watchpoint(). */ +#define SIM_SYSCALL_REMOVE_WATCHPOINT 3 + +/** Syscall number for sim_query_watchpoint(). */ +#define SIM_SYSCALL_QUERY_WATCHPOINT 4 + +/** + * Syscall number that asserts that the cache lines whose 64-bit PA + * is passed as the second argument to sim_syscall(), and over a + * range passed as the third argument, are no longer in cache. + * The simulator raises an error if this is not the case. + */ +#define SIM_SYSCALL_VALIDATE_LINES_EVICTED 5 + +/** Syscall number for sim_query_cpu_speed(). */ +#define SIM_SYSCALL_QUERY_CPU_SPEED 6 + + +/* + * Bit masks which can be shifted by 8, combined with + * SIM_CONTROL_SET_TRACING, and written to SPR_SIM_CONTROL. + */ + +/** + * @addtogroup arch_sim + * @{ + */ + +/** Enable --trace-cycle when passed to simulator_set_tracing(). */ +#define SIM_TRACE_CYCLES 0x01 + +/** Enable --trace-router when passed to simulator_set_tracing(). */ +#define SIM_TRACE_ROUTER 0x02 + +/** Enable --trace-register-writes when passed to simulator_set_tracing(). */ +#define SIM_TRACE_REGISTER_WRITES 0x04 + +/** Enable --trace-disasm when passed to simulator_set_tracing(). */ +#define SIM_TRACE_DISASM 0x08 + +/** Enable --trace-stall-info when passed to simulator_set_tracing(). */ +#define SIM_TRACE_STALL_INFO 0x10 + +/** Enable --trace-memory-controller when passed to simulator_set_tracing(). */ +#define SIM_TRACE_MEMORY_CONTROLLER 0x20 + +/** Enable --trace-l2 when passed to simulator_set_tracing(). */ +#define SIM_TRACE_L2_CACHE 0x40 + +/** Enable --trace-lines when passed to simulator_set_tracing(). */ +#define SIM_TRACE_LINES 0x80 + +/** Turn off all tracing when passed to simulator_set_tracing(). */ +#define SIM_TRACE_NONE 0 + +/** Turn on all tracing when passed to simulator_set_tracing(). */ +#define SIM_TRACE_ALL (-1) + +/** @} */ + +/** Computes the value to write to SPR_SIM_CONTROL to set tracing flags. */ +#define SIM_TRACE_SPR_ARG(mask) \ + (SIM_CONTROL_SET_TRACING | ((mask) << _SIM_CONTROL_OPERATOR_BITS)) + + +/* + * Bit masks which can be shifted by 8, combined with + * SIM_CONTROL_DUMP, and written to SPR_SIM_CONTROL. + */ + +/** + * @addtogroup arch_sim + * @{ + */ + +/** Dump the general-purpose registers. */ +#define SIM_DUMP_REGS 0x001 + +/** Dump the SPRs. */ +#define SIM_DUMP_SPRS 0x002 + +/** Dump the ITLB. */ +#define SIM_DUMP_ITLB 0x004 + +/** Dump the DTLB. */ +#define SIM_DUMP_DTLB 0x008 + +/** Dump the L1 I-cache. */ +#define SIM_DUMP_L1I 0x010 + +/** Dump the L1 D-cache. */ +#define SIM_DUMP_L1D 0x020 + +/** Dump the L2 cache. */ +#define SIM_DUMP_L2 0x040 + +/** Dump the switch registers. */ +#define SIM_DUMP_SNREGS 0x080 + +/** Dump the switch ITLB. */ +#define SIM_DUMP_SNITLB 0x100 + +/** Dump the switch L1 I-cache. */ +#define SIM_DUMP_SNL1I 0x200 + +/** Dump the current backtrace. */ +#define SIM_DUMP_BACKTRACE 0x400 + +/** Only dump valid lines in caches. */ +#define SIM_DUMP_VALID_LINES 0x800 + +/** Dump everything that is dumpable. */ +#define SIM_DUMP_ALL (-1 & ~SIM_DUMP_VALID_LINES) + +/** @} */ + +/** Computes the value to write to SPR_SIM_CONTROL to dump machine state. */ +#define SIM_DUMP_SPR_ARG(mask) \ + (SIM_CONTROL_DUMP | ((mask) << _SIM_CONTROL_OPERATOR_BITS)) + + +/* + * Bit masks which can be shifted by 8, combined with + * SIM_CONTROL_PROFILER_CHIP_xxx, and written to SPR_SIM_CONTROL. + */ + +/** + * @addtogroup arch_sim + * @{ + */ + +/** Use with with SIM_PROFILER_CHIP_xxx to control the memory controllers. */ +#define SIM_CHIP_MEMCTL 0x001 + +/** Use with with SIM_PROFILER_CHIP_xxx to control the XAUI interface. */ +#define SIM_CHIP_XAUI 0x002 + +/** Use with with SIM_PROFILER_CHIP_xxx to control the PCIe interface. */ +#define SIM_CHIP_PCIE 0x004 + +/** Use with with SIM_PROFILER_CHIP_xxx to control the MPIPE interface. */ +#define SIM_CHIP_MPIPE 0x008 + +/** Use with with SIM_PROFILER_CHIP_xxx to control the TRIO interface. */ +#define SIM_CHIP_TRIO 0x010 + +/** Reference all chip devices. */ +#define SIM_CHIP_ALL (-1) + +/** @} */ + +/** Computes the value to write to SPR_SIM_CONTROL to clear chip statistics. */ +#define SIM_PROFILER_CHIP_CLEAR_SPR_ARG(mask) \ + (SIM_CONTROL_PROFILER_CHIP_CLEAR | ((mask) << _SIM_CONTROL_OPERATOR_BITS)) + +/** Computes the value to write to SPR_SIM_CONTROL to disable chip statistics.*/ +#define SIM_PROFILER_CHIP_DISABLE_SPR_ARG(mask) \ + (SIM_CONTROL_PROFILER_CHIP_DISABLE | ((mask) << _SIM_CONTROL_OPERATOR_BITS)) + +/** Computes the value to write to SPR_SIM_CONTROL to enable chip statistics. */ +#define SIM_PROFILER_CHIP_ENABLE_SPR_ARG(mask) \ + (SIM_CONTROL_PROFILER_CHIP_ENABLE | ((mask) << _SIM_CONTROL_OPERATOR_BITS)) + + + +/* Shim bitrate controls. */ + +/** The number of bits used to store the shim id. */ +#define SIM_CONTROL_SHAPING_SHIM_ID_BITS 3 + +/** + * @addtogroup arch_sim + * @{ + */ + +/** Change the gbe 0 bitrate. */ +#define SIM_CONTROL_SHAPING_GBE_0 0x0 + +/** Change the gbe 1 bitrate. */ +#define SIM_CONTROL_SHAPING_GBE_1 0x1 + +/** Change the gbe 2 bitrate. */ +#define SIM_CONTROL_SHAPING_GBE_2 0x2 + +/** Change the gbe 3 bitrate. */ +#define SIM_CONTROL_SHAPING_GBE_3 0x3 + +/** Change the xgbe 0 bitrate. */ +#define SIM_CONTROL_SHAPING_XGBE_0 0x4 + +/** Change the xgbe 1 bitrate. */ +#define SIM_CONTROL_SHAPING_XGBE_1 0x5 + +/** The type of shaping to do. */ +#define SIM_CONTROL_SHAPING_TYPE_BITS 2 + +/** Control the multiplier. */ +#define SIM_CONTROL_SHAPING_MULTIPLIER 0 + +/** Control the PPS. */ +#define SIM_CONTROL_SHAPING_PPS 1 + +/** Control the BPS. */ +#define SIM_CONTROL_SHAPING_BPS 2 + +/** The number of bits for the units for the shaping parameter. */ +#define SIM_CONTROL_SHAPING_UNITS_BITS 2 + +/** Provide a number in single units. */ +#define SIM_CONTROL_SHAPING_UNITS_SINGLE 0 + +/** Provide a number in kilo units. */ +#define SIM_CONTROL_SHAPING_UNITS_KILO 1 + +/** Provide a number in mega units. */ +#define SIM_CONTROL_SHAPING_UNITS_MEGA 2 + +/** Provide a number in giga units. */ +#define SIM_CONTROL_SHAPING_UNITS_GIGA 3 + +/** @} */ + +/** How many bits are available for the rate. */ +#define SIM_CONTROL_SHAPING_RATE_BITS \ + (32 - (_SIM_CONTROL_OPERATOR_BITS + \ + SIM_CONTROL_SHAPING_SHIM_ID_BITS + \ + SIM_CONTROL_SHAPING_TYPE_BITS + \ + SIM_CONTROL_SHAPING_UNITS_BITS)) + +/** Computes the value to write to SPR_SIM_CONTROL to change a bitrate. */ +#define SIM_SHAPING_SPR_ARG(shim, type, units, rate) \ + (SIM_CONTROL_SHAPING | \ + ((shim) | \ + ((type) << (SIM_CONTROL_SHAPING_SHIM_ID_BITS)) | \ + ((units) << (SIM_CONTROL_SHAPING_SHIM_ID_BITS + \ + SIM_CONTROL_SHAPING_TYPE_BITS)) | \ + ((rate) << (SIM_CONTROL_SHAPING_SHIM_ID_BITS + \ + SIM_CONTROL_SHAPING_TYPE_BITS + \ + SIM_CONTROL_SHAPING_UNITS_BITS))) << _SIM_CONTROL_OPERATOR_BITS) + + +/* + * Values returned when reading SPR_SIM_CONTROL. + * ISSUE: These names should share a longer common prefix. + */ + +/** + * When reading SPR_SIM_CONTROL, the mask of simulator tracing bits + * (SIM_TRACE_xxx values). + */ +#define SIM_TRACE_FLAG_MASK 0xFFFF + +/** When reading SPR_SIM_CONTROL, the mask for whether profiling is enabled. */ +#define SIM_PROFILER_ENABLED_MASK 0x10000 + + +/* + * Special arguments for "SIM_CONTROL_PUTC". + */ + +/** + * Flag value for forcing a PUTC string-flush, including + * coordinate/cycle prefix and newline. + */ +#define SIM_PUTC_FLUSH_STRING 0x100 + +/** + * Flag value for forcing a PUTC binary-data-flush, which skips the + * prefix and does not append a newline. + */ +#define SIM_PUTC_FLUSH_BINARY 0x101 + + +#endif /* __ARCH_SIM_DEF_H__ */ diff --git a/arch/tile/include/arch/spr_def.h b/arch/tile/include/arch/spr_def.h new file mode 100644 index 00000000..d6ba449b --- /dev/null +++ b/arch/tile/include/arch/spr_def.h @@ -0,0 +1,113 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/* Include the proper base SPR definition file. */ +#ifdef __tilegx__ +#include <arch/spr_def_64.h> +#else +#include <arch/spr_def_32.h> +#endif + +#ifdef __KERNEL__ + +/* + * In addition to including the proper base SPR definition file, depending + * on machine architecture, this file defines several macros which allow + * kernel code to use protection-level dependent SPRs without worrying + * about which PL it's running at. In these macros, the PL that the SPR + * or interrupt number applies to is replaced by K. + */ + +#if CONFIG_KERNEL_PL != 1 && CONFIG_KERNEL_PL != 2 +#error CONFIG_KERNEL_PL must be 1 or 2 +#endif + +/* Concatenate 4 strings. */ +#define __concat4(a, b, c, d) a ## b ## c ## d +#define _concat4(a, b, c, d) __concat4(a, b, c, d) + +#ifdef __tilegx__ + +/* TILE-Gx dependent, protection-level dependent SPRs. */ + +#define SPR_INTERRUPT_MASK_K \ + _concat4(SPR_INTERRUPT_MASK_, CONFIG_KERNEL_PL,,) +#define SPR_INTERRUPT_MASK_SET_K \ + _concat4(SPR_INTERRUPT_MASK_SET_, CONFIG_KERNEL_PL,,) +#define SPR_INTERRUPT_MASK_RESET_K \ + _concat4(SPR_INTERRUPT_MASK_RESET_, CONFIG_KERNEL_PL,,) +#define SPR_INTERRUPT_VECTOR_BASE_K \ + _concat4(SPR_INTERRUPT_VECTOR_BASE_, CONFIG_KERNEL_PL,,) + +#define SPR_IPI_MASK_K \ + _concat4(SPR_IPI_MASK_, CONFIG_KERNEL_PL,,) +#define SPR_IPI_MASK_RESET_K \ + _concat4(SPR_IPI_MASK_RESET_, CONFIG_KERNEL_PL,,) +#define SPR_IPI_MASK_SET_K \ + _concat4(SPR_IPI_MASK_SET_, CONFIG_KERNEL_PL,,) +#define SPR_IPI_EVENT_K \ + _concat4(SPR_IPI_EVENT_, CONFIG_KERNEL_PL,,) +#define SPR_IPI_EVENT_RESET_K \ + _concat4(SPR_IPI_EVENT_RESET_, CONFIG_KERNEL_PL,,) +#define SPR_IPI_EVENT_SET_K \ + _concat4(SPR_IPI_EVENT_SET_, CONFIG_KERNEL_PL,,) +#define INT_IPI_K \ + _concat4(INT_IPI_, CONFIG_KERNEL_PL,,) + +#define SPR_SINGLE_STEP_CONTROL_K \ + _concat4(SPR_SINGLE_STEP_CONTROL_, CONFIG_KERNEL_PL,,) +#define SPR_SINGLE_STEP_EN_K_K \ + _concat4(SPR_SINGLE_STEP_EN_, CONFIG_KERNEL_PL, _, CONFIG_KERNEL_PL) +#define INT_SINGLE_STEP_K \ + _concat4(INT_SINGLE_STEP_, CONFIG_KERNEL_PL,,) + +#else + +/* TILEPro dependent, protection-level dependent SPRs. */ + +#define SPR_INTERRUPT_MASK_K_0 \ + _concat4(SPR_INTERRUPT_MASK_, CONFIG_KERNEL_PL, _0,) +#define SPR_INTERRUPT_MASK_K_1 \ + _concat4(SPR_INTERRUPT_MASK_, CONFIG_KERNEL_PL, _1,) +#define SPR_INTERRUPT_MASK_SET_K_0 \ + _concat4(SPR_INTERRUPT_MASK_SET_, CONFIG_KERNEL_PL, _0,) +#define SPR_INTERRUPT_MASK_SET_K_1 \ + _concat4(SPR_INTERRUPT_MASK_SET_, CONFIG_KERNEL_PL, _1,) +#define SPR_INTERRUPT_MASK_RESET_K_0 \ + _concat4(SPR_INTERRUPT_MASK_RESET_, CONFIG_KERNEL_PL, _0,) +#define SPR_INTERRUPT_MASK_RESET_K_1 \ + _concat4(SPR_INTERRUPT_MASK_RESET_, CONFIG_KERNEL_PL, _1,) + +#endif + +/* Generic protection-level dependent SPRs. */ + +#define SPR_SYSTEM_SAVE_K_0 \ + _concat4(SPR_SYSTEM_SAVE_, CONFIG_KERNEL_PL, _0,) +#define SPR_SYSTEM_SAVE_K_1 \ + _concat4(SPR_SYSTEM_SAVE_, CONFIG_KERNEL_PL, _1,) +#define SPR_SYSTEM_SAVE_K_2 \ + _concat4(SPR_SYSTEM_SAVE_, CONFIG_KERNEL_PL, _2,) +#define SPR_SYSTEM_SAVE_K_3 \ + _concat4(SPR_SYSTEM_SAVE_, CONFIG_KERNEL_PL, _3,) +#define SPR_EX_CONTEXT_K_0 \ + _concat4(SPR_EX_CONTEXT_, CONFIG_KERNEL_PL, _0,) +#define SPR_EX_CONTEXT_K_1 \ + _concat4(SPR_EX_CONTEXT_, CONFIG_KERNEL_PL, _1,) +#define SPR_INTCTRL_K_STATUS \ + _concat4(SPR_INTCTRL_, CONFIG_KERNEL_PL, _STATUS,) +#define INT_INTCTRL_K \ + _concat4(INT_INTCTRL_, CONFIG_KERNEL_PL,,) + +#endif /* __KERNEL__ */ diff --git a/arch/tile/include/arch/spr_def_32.h b/arch/tile/include/arch/spr_def_32.h new file mode 100644 index 00000000..bbc1f4c9 --- /dev/null +++ b/arch/tile/include/arch/spr_def_32.h @@ -0,0 +1,201 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef __DOXYGEN__ + +#ifndef __ARCH_SPR_DEF_H__ +#define __ARCH_SPR_DEF_H__ + +#define SPR_AUX_PERF_COUNT_0 0x6005 +#define SPR_AUX_PERF_COUNT_1 0x6006 +#define SPR_AUX_PERF_COUNT_CTL 0x6007 +#define SPR_AUX_PERF_COUNT_STS 0x6008 +#define SPR_CYCLE_HIGH 0x4e06 +#define SPR_CYCLE_LOW 0x4e07 +#define SPR_DMA_BYTE 0x3900 +#define SPR_DMA_CHUNK_SIZE 0x3901 +#define SPR_DMA_CTR 0x3902 +#define SPR_DMA_CTR__REQUEST_MASK 0x1 +#define SPR_DMA_CTR__SUSPEND_MASK 0x2 +#define SPR_DMA_DST_ADDR 0x3903 +#define SPR_DMA_DST_CHUNK_ADDR 0x3904 +#define SPR_DMA_SRC_ADDR 0x3905 +#define SPR_DMA_SRC_CHUNK_ADDR 0x3906 +#define SPR_DMA_STATUS__DONE_MASK 0x1 +#define SPR_DMA_STATUS__BUSY_MASK 0x2 +#define SPR_DMA_STATUS__RUNNING_MASK 0x10 +#define SPR_DMA_STRIDE 0x3907 +#define SPR_DMA_USER_STATUS 0x3908 +#define SPR_DONE 0x4e08 +#define SPR_EVENT_BEGIN 0x4e0d +#define SPR_EVENT_END 0x4e0e +#define SPR_EX_CONTEXT_0_0 0x4a05 +#define SPR_EX_CONTEXT_0_1 0x4a06 +#define SPR_EX_CONTEXT_0_1__PL_SHIFT 0 +#define SPR_EX_CONTEXT_0_1__PL_RMASK 0x3 +#define SPR_EX_CONTEXT_0_1__PL_MASK 0x3 +#define SPR_EX_CONTEXT_0_1__ICS_SHIFT 2 +#define SPR_EX_CONTEXT_0_1__ICS_RMASK 0x1 +#define SPR_EX_CONTEXT_0_1__ICS_MASK 0x4 +#define SPR_EX_CONTEXT_1_0 0x4805 +#define SPR_EX_CONTEXT_1_1 0x4806 +#define SPR_EX_CONTEXT_1_1__PL_SHIFT 0 +#define SPR_EX_CONTEXT_1_1__PL_RMASK 0x3 +#define SPR_EX_CONTEXT_1_1__PL_MASK 0x3 +#define SPR_EX_CONTEXT_1_1__ICS_SHIFT 2 +#define SPR_EX_CONTEXT_1_1__ICS_RMASK 0x1 +#define SPR_EX_CONTEXT_1_1__ICS_MASK 0x4 +#define SPR_EX_CONTEXT_2_0 0x4605 +#define SPR_EX_CONTEXT_2_1 0x4606 +#define SPR_EX_CONTEXT_2_1__PL_SHIFT 0 +#define SPR_EX_CONTEXT_2_1__PL_RMASK 0x3 +#define SPR_EX_CONTEXT_2_1__PL_MASK 0x3 +#define SPR_EX_CONTEXT_2_1__ICS_SHIFT 2 +#define SPR_EX_CONTEXT_2_1__ICS_RMASK 0x1 +#define SPR_EX_CONTEXT_2_1__ICS_MASK 0x4 +#define SPR_FAIL 0x4e09 +#define SPR_INTCTRL_0_STATUS 0x4a07 +#define SPR_INTCTRL_1_STATUS 0x4807 +#define SPR_INTCTRL_2_STATUS 0x4607 +#define SPR_INTERRUPT_CRITICAL_SECTION 0x4e0a +#define SPR_INTERRUPT_MASK_0_0 0x4a08 +#define SPR_INTERRUPT_MASK_0_1 0x4a09 +#define SPR_INTERRUPT_MASK_1_0 0x4809 +#define SPR_INTERRUPT_MASK_1_1 0x480a +#define SPR_INTERRUPT_MASK_2_0 0x4608 +#define SPR_INTERRUPT_MASK_2_1 0x4609 +#define SPR_INTERRUPT_MASK_RESET_0_0 0x4a0a +#define SPR_INTERRUPT_MASK_RESET_0_1 0x4a0b +#define SPR_INTERRUPT_MASK_RESET_1_0 0x480b +#define SPR_INTERRUPT_MASK_RESET_1_1 0x480c +#define SPR_INTERRUPT_MASK_RESET_2_0 0x460a +#define SPR_INTERRUPT_MASK_RESET_2_1 0x460b +#define SPR_INTERRUPT_MASK_SET_0_0 0x4a0c +#define SPR_INTERRUPT_MASK_SET_0_1 0x4a0d +#define SPR_INTERRUPT_MASK_SET_1_0 0x480d +#define SPR_INTERRUPT_MASK_SET_1_1 0x480e +#define SPR_INTERRUPT_MASK_SET_2_0 0x460c +#define SPR_INTERRUPT_MASK_SET_2_1 0x460d +#define SPR_MPL_DMA_CPL_SET_0 0x5800 +#define SPR_MPL_DMA_CPL_SET_1 0x5801 +#define SPR_MPL_DMA_CPL_SET_2 0x5802 +#define SPR_MPL_DMA_NOTIFY_SET_0 0x3800 +#define SPR_MPL_DMA_NOTIFY_SET_1 0x3801 +#define SPR_MPL_DMA_NOTIFY_SET_2 0x3802 +#define SPR_MPL_INTCTRL_0_SET_0 0x4a00 +#define SPR_MPL_INTCTRL_0_SET_1 0x4a01 +#define SPR_MPL_INTCTRL_0_SET_2 0x4a02 +#define SPR_MPL_INTCTRL_1_SET_0 0x4800 +#define SPR_MPL_INTCTRL_1_SET_1 0x4801 +#define SPR_MPL_INTCTRL_1_SET_2 0x4802 +#define SPR_MPL_INTCTRL_2_SET_0 0x4600 +#define SPR_MPL_INTCTRL_2_SET_1 0x4601 +#define SPR_MPL_INTCTRL_2_SET_2 0x4602 +#define SPR_MPL_SN_ACCESS_SET_0 0x0800 +#define SPR_MPL_SN_ACCESS_SET_1 0x0801 +#define SPR_MPL_SN_ACCESS_SET_2 0x0802 +#define SPR_MPL_SN_CPL_SET_0 0x5a00 +#define SPR_MPL_SN_CPL_SET_1 0x5a01 +#define SPR_MPL_SN_CPL_SET_2 0x5a02 +#define SPR_MPL_SN_FIREWALL_SET_0 0x2c00 +#define SPR_MPL_SN_FIREWALL_SET_1 0x2c01 +#define SPR_MPL_SN_FIREWALL_SET_2 0x2c02 +#define SPR_MPL_SN_NOTIFY_SET_0 0x2a00 +#define SPR_MPL_SN_NOTIFY_SET_1 0x2a01 +#define SPR_MPL_SN_NOTIFY_SET_2 0x2a02 +#define SPR_MPL_UDN_ACCESS_SET_0 0x0c00 +#define SPR_MPL_UDN_ACCESS_SET_1 0x0c01 +#define SPR_MPL_UDN_ACCESS_SET_2 0x0c02 +#define SPR_MPL_UDN_AVAIL_SET_0 0x4000 +#define SPR_MPL_UDN_AVAIL_SET_1 0x4001 +#define SPR_MPL_UDN_AVAIL_SET_2 0x4002 +#define SPR_MPL_UDN_CA_SET_0 0x3c00 +#define SPR_MPL_UDN_CA_SET_1 0x3c01 +#define SPR_MPL_UDN_CA_SET_2 0x3c02 +#define SPR_MPL_UDN_COMPLETE_SET_0 0x1400 +#define SPR_MPL_UDN_COMPLETE_SET_1 0x1401 +#define SPR_MPL_UDN_COMPLETE_SET_2 0x1402 +#define SPR_MPL_UDN_FIREWALL_SET_0 0x3000 +#define SPR_MPL_UDN_FIREWALL_SET_1 0x3001 +#define SPR_MPL_UDN_FIREWALL_SET_2 0x3002 +#define SPR_MPL_UDN_REFILL_SET_0 0x1000 +#define SPR_MPL_UDN_REFILL_SET_1 0x1001 +#define SPR_MPL_UDN_REFILL_SET_2 0x1002 +#define SPR_MPL_UDN_TIMER_SET_0 0x3600 +#define SPR_MPL_UDN_TIMER_SET_1 0x3601 +#define SPR_MPL_UDN_TIMER_SET_2 0x3602 +#define SPR_MPL_WORLD_ACCESS_SET_0 0x4e00 +#define SPR_MPL_WORLD_ACCESS_SET_1 0x4e01 +#define SPR_MPL_WORLD_ACCESS_SET_2 0x4e02 +#define SPR_PASS 0x4e0b +#define SPR_PERF_COUNT_0 0x4205 +#define SPR_PERF_COUNT_1 0x4206 +#define SPR_PERF_COUNT_CTL 0x4207 +#define SPR_PERF_COUNT_DN_CTL 0x4210 +#define SPR_PERF_COUNT_STS 0x4208 +#define SPR_PROC_STATUS 0x4f00 +#define SPR_SIM_CONTROL 0x4e0c +#define SPR_SNCTL 0x0805 +#define SPR_SNCTL__FRZFABRIC_MASK 0x1 +#define SPR_SNCTL__FRZPROC_MASK 0x2 +#define SPR_SNPC 0x080b +#define SPR_SNSTATIC 0x080c +#define SPR_SYSTEM_SAVE_0_0 0x4b00 +#define SPR_SYSTEM_SAVE_0_1 0x4b01 +#define SPR_SYSTEM_SAVE_0_2 0x4b02 +#define SPR_SYSTEM_SAVE_0_3 0x4b03 +#define SPR_SYSTEM_SAVE_1_0 0x4900 +#define SPR_SYSTEM_SAVE_1_1 0x4901 +#define SPR_SYSTEM_SAVE_1_2 0x4902 +#define SPR_SYSTEM_SAVE_1_3 0x4903 +#define SPR_SYSTEM_SAVE_2_0 0x4700 +#define SPR_SYSTEM_SAVE_2_1 0x4701 +#define SPR_SYSTEM_SAVE_2_2 0x4702 +#define SPR_SYSTEM_SAVE_2_3 0x4703 +#define SPR_TILE_COORD 0x4c17 +#define SPR_TILE_RTF_HWM 0x4e10 +#define SPR_TILE_TIMER_CONTROL 0x3205 +#define SPR_TILE_WRITE_PENDING 0x4e0f +#define SPR_UDN_AVAIL_EN 0x4005 +#define SPR_UDN_CA_DATA 0x0d00 +#define SPR_UDN_DATA_AVAIL 0x0d03 +#define SPR_UDN_DEADLOCK_TIMEOUT 0x3606 +#define SPR_UDN_DEMUX_CA_COUNT 0x0c05 +#define SPR_UDN_DEMUX_COUNT_0 0x0c06 +#define SPR_UDN_DEMUX_COUNT_1 0x0c07 +#define SPR_UDN_DEMUX_COUNT_2 0x0c08 +#define SPR_UDN_DEMUX_COUNT_3 0x0c09 +#define SPR_UDN_DEMUX_CTL 0x0c0a +#define SPR_UDN_DEMUX_QUEUE_SEL 0x0c0c +#define SPR_UDN_DEMUX_STATUS 0x0c0d +#define SPR_UDN_DEMUX_WRITE_FIFO 0x0c0e +#define SPR_UDN_DIRECTION_PROTECT 0x3005 +#define SPR_UDN_REFILL_EN 0x1005 +#define SPR_UDN_SP_FIFO_DATA 0x0c11 +#define SPR_UDN_SP_FIFO_SEL 0x0c12 +#define SPR_UDN_SP_FREEZE 0x0c13 +#define SPR_UDN_SP_FREEZE__SP_FRZ_MASK 0x1 +#define SPR_UDN_SP_FREEZE__DEMUX_FRZ_MASK 0x2 +#define SPR_UDN_SP_FREEZE__NON_DEST_EXT_MASK 0x4 +#define SPR_UDN_SP_STATE 0x0c14 +#define SPR_UDN_TAG_0 0x0c15 +#define SPR_UDN_TAG_1 0x0c16 +#define SPR_UDN_TAG_2 0x0c17 +#define SPR_UDN_TAG_3 0x0c18 +#define SPR_UDN_TAG_VALID 0x0c19 +#define SPR_UDN_TILE_COORD 0x0c1a + +#endif /* !defined(__ARCH_SPR_DEF_H__) */ + +#endif /* !defined(__DOXYGEN__) */ diff --git a/arch/tile/include/arch/spr_def_64.h b/arch/tile/include/arch/spr_def_64.h new file mode 100644 index 00000000..cd3e5f95 --- /dev/null +++ b/arch/tile/include/arch/spr_def_64.h @@ -0,0 +1,173 @@ +/* + * Copyright 2011 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef __DOXYGEN__ + +#ifndef __ARCH_SPR_DEF_H__ +#define __ARCH_SPR_DEF_H__ + +#define SPR_AUX_PERF_COUNT_0 0x2105 +#define SPR_AUX_PERF_COUNT_1 0x2106 +#define SPR_AUX_PERF_COUNT_CTL 0x2107 +#define SPR_AUX_PERF_COUNT_STS 0x2108 +#define SPR_CMPEXCH_VALUE 0x2780 +#define SPR_CYCLE 0x2781 +#define SPR_DONE 0x2705 +#define SPR_DSTREAM_PF 0x2706 +#define SPR_EVENT_BEGIN 0x2782 +#define SPR_EVENT_END 0x2783 +#define SPR_EX_CONTEXT_0_0 0x2580 +#define SPR_EX_CONTEXT_0_1 0x2581 +#define SPR_EX_CONTEXT_0_1__PL_SHIFT 0 +#define SPR_EX_CONTEXT_0_1__PL_RMASK 0x3 +#define SPR_EX_CONTEXT_0_1__PL_MASK 0x3 +#define SPR_EX_CONTEXT_0_1__ICS_SHIFT 2 +#define SPR_EX_CONTEXT_0_1__ICS_RMASK 0x1 +#define SPR_EX_CONTEXT_0_1__ICS_MASK 0x4 +#define SPR_EX_CONTEXT_1_0 0x2480 +#define SPR_EX_CONTEXT_1_1 0x2481 +#define SPR_EX_CONTEXT_1_1__PL_SHIFT 0 +#define SPR_EX_CONTEXT_1_1__PL_RMASK 0x3 +#define SPR_EX_CONTEXT_1_1__PL_MASK 0x3 +#define SPR_EX_CONTEXT_1_1__ICS_SHIFT 2 +#define SPR_EX_CONTEXT_1_1__ICS_RMASK 0x1 +#define SPR_EX_CONTEXT_1_1__ICS_MASK 0x4 +#define SPR_EX_CONTEXT_2_0 0x2380 +#define SPR_EX_CONTEXT_2_1 0x2381 +#define SPR_EX_CONTEXT_2_1__PL_SHIFT 0 +#define SPR_EX_CONTEXT_2_1__PL_RMASK 0x3 +#define SPR_EX_CONTEXT_2_1__PL_MASK 0x3 +#define SPR_EX_CONTEXT_2_1__ICS_SHIFT 2 +#define SPR_EX_CONTEXT_2_1__ICS_RMASK 0x1 +#define SPR_EX_CONTEXT_2_1__ICS_MASK 0x4 +#define SPR_FAIL 0x2707 +#define SPR_ILL_TRANS_REASON__I_STREAM_VA_RMASK 0x1 +#define SPR_INTCTRL_0_STATUS 0x2505 +#define SPR_INTCTRL_1_STATUS 0x2405 +#define SPR_INTCTRL_2_STATUS 0x2305 +#define SPR_INTERRUPT_CRITICAL_SECTION 0x2708 +#define SPR_INTERRUPT_MASK_0 0x2506 +#define SPR_INTERRUPT_MASK_1 0x2406 +#define SPR_INTERRUPT_MASK_2 0x2306 +#define SPR_INTERRUPT_MASK_RESET_0 0x2507 +#define SPR_INTERRUPT_MASK_RESET_1 0x2407 +#define SPR_INTERRUPT_MASK_RESET_2 0x2307 +#define SPR_INTERRUPT_MASK_SET_0 0x2508 +#define SPR_INTERRUPT_MASK_SET_1 0x2408 +#define SPR_INTERRUPT_MASK_SET_2 0x2308 +#define SPR_INTERRUPT_VECTOR_BASE_0 0x2509 +#define SPR_INTERRUPT_VECTOR_BASE_1 0x2409 +#define SPR_INTERRUPT_VECTOR_BASE_2 0x2309 +#define SPR_INTERRUPT_VECTOR_BASE_3 0x2209 +#define SPR_IPI_EVENT_0 0x1f05 +#define SPR_IPI_EVENT_1 0x1e05 +#define SPR_IPI_EVENT_2 0x1d05 +#define SPR_IPI_EVENT_RESET_0 0x1f06 +#define SPR_IPI_EVENT_RESET_1 0x1e06 +#define SPR_IPI_EVENT_RESET_2 0x1d06 +#define SPR_IPI_EVENT_SET_0 0x1f07 +#define SPR_IPI_EVENT_SET_1 0x1e07 +#define SPR_IPI_EVENT_SET_2 0x1d07 +#define SPR_IPI_MASK_0 0x1f08 +#define SPR_IPI_MASK_1 0x1e08 +#define SPR_IPI_MASK_2 0x1d08 +#define SPR_IPI_MASK_RESET_0 0x1f09 +#define SPR_IPI_MASK_RESET_1 0x1e09 +#define SPR_IPI_MASK_RESET_2 0x1d09 +#define SPR_IPI_MASK_SET_0 0x1f0a +#define SPR_IPI_MASK_SET_1 0x1e0a +#define SPR_IPI_MASK_SET_2 0x1d0a +#define SPR_MPL_AUX_TILE_TIMER_SET_0 0x1700 +#define SPR_MPL_AUX_TILE_TIMER_SET_1 0x1701 +#define SPR_MPL_AUX_TILE_TIMER_SET_2 0x1702 +#define SPR_MPL_INTCTRL_0_SET_0 0x2500 +#define SPR_MPL_INTCTRL_0_SET_1 0x2501 +#define SPR_MPL_INTCTRL_0_SET_2 0x2502 +#define SPR_MPL_INTCTRL_1_SET_0 0x2400 +#define SPR_MPL_INTCTRL_1_SET_1 0x2401 +#define SPR_MPL_INTCTRL_1_SET_2 0x2402 +#define SPR_MPL_INTCTRL_2_SET_0 0x2300 +#define SPR_MPL_INTCTRL_2_SET_1 0x2301 +#define SPR_MPL_INTCTRL_2_SET_2 0x2302 +#define SPR_MPL_UDN_ACCESS_SET_0 0x0b00 +#define SPR_MPL_UDN_ACCESS_SET_1 0x0b01 +#define SPR_MPL_UDN_ACCESS_SET_2 0x0b02 +#define SPR_MPL_UDN_AVAIL_SET_0 0x1b00 +#define SPR_MPL_UDN_AVAIL_SET_1 0x1b01 +#define SPR_MPL_UDN_AVAIL_SET_2 0x1b02 +#define SPR_MPL_UDN_COMPLETE_SET_0 0x0600 +#define SPR_MPL_UDN_COMPLETE_SET_1 0x0601 +#define SPR_MPL_UDN_COMPLETE_SET_2 0x0602 +#define SPR_MPL_UDN_FIREWALL_SET_0 0x1500 +#define SPR_MPL_UDN_FIREWALL_SET_1 0x1501 +#define SPR_MPL_UDN_FIREWALL_SET_2 0x1502 +#define SPR_MPL_UDN_TIMER_SET_0 0x1900 +#define SPR_MPL_UDN_TIMER_SET_1 0x1901 +#define SPR_MPL_UDN_TIMER_SET_2 0x1902 +#define SPR_MPL_WORLD_ACCESS_SET_0 0x2700 +#define SPR_MPL_WORLD_ACCESS_SET_1 0x2701 +#define SPR_MPL_WORLD_ACCESS_SET_2 0x2702 +#define SPR_PASS 0x2709 +#define SPR_PERF_COUNT_0 0x2005 +#define SPR_PERF_COUNT_1 0x2006 +#define SPR_PERF_COUNT_CTL 0x2007 +#define SPR_PERF_COUNT_DN_CTL 0x2008 +#define SPR_PERF_COUNT_STS 0x2009 +#define SPR_PROC_STATUS 0x2784 +#define SPR_SIM_CONTROL 0x2785 +#define SPR_SINGLE_STEP_CONTROL_0 0x0405 +#define SPR_SINGLE_STEP_CONTROL_0__CANCELED_MASK 0x1 +#define SPR_SINGLE_STEP_CONTROL_0__INHIBIT_MASK 0x2 +#define SPR_SINGLE_STEP_CONTROL_1 0x0305 +#define SPR_SINGLE_STEP_CONTROL_1__CANCELED_MASK 0x1 +#define SPR_SINGLE_STEP_CONTROL_1__INHIBIT_MASK 0x2 +#define SPR_SINGLE_STEP_CONTROL_2 0x0205 +#define SPR_SINGLE_STEP_CONTROL_2__CANCELED_MASK 0x1 +#define SPR_SINGLE_STEP_CONTROL_2__INHIBIT_MASK 0x2 +#define SPR_SINGLE_STEP_EN_0_0 0x250a +#define SPR_SINGLE_STEP_EN_0_1 0x240a +#define SPR_SINGLE_STEP_EN_0_2 0x230a +#define SPR_SINGLE_STEP_EN_1_0 0x250b +#define SPR_SINGLE_STEP_EN_1_1 0x240b +#define SPR_SINGLE_STEP_EN_1_2 0x230b +#define SPR_SINGLE_STEP_EN_2_0 0x250c +#define SPR_SINGLE_STEP_EN_2_1 0x240c +#define SPR_SINGLE_STEP_EN_2_2 0x230c +#define SPR_SYSTEM_SAVE_0_0 0x2582 +#define SPR_SYSTEM_SAVE_0_1 0x2583 +#define SPR_SYSTEM_SAVE_0_2 0x2584 +#define SPR_SYSTEM_SAVE_0_3 0x2585 +#define SPR_SYSTEM_SAVE_1_0 0x2482 +#define SPR_SYSTEM_SAVE_1_1 0x2483 +#define SPR_SYSTEM_SAVE_1_2 0x2484 +#define SPR_SYSTEM_SAVE_1_3 0x2485 +#define SPR_SYSTEM_SAVE_2_0 0x2382 +#define SPR_SYSTEM_SAVE_2_1 0x2383 +#define SPR_SYSTEM_SAVE_2_2 0x2384 +#define SPR_SYSTEM_SAVE_2_3 0x2385 +#define SPR_TILE_COORD 0x270b +#define SPR_TILE_RTF_HWM 0x270c +#define SPR_TILE_TIMER_CONTROL 0x1605 +#define SPR_UDN_AVAIL_EN 0x1b05 +#define SPR_UDN_DATA_AVAIL 0x0b80 +#define SPR_UDN_DEADLOCK_TIMEOUT 0x1906 +#define SPR_UDN_DEMUX_COUNT_0 0x0b05 +#define SPR_UDN_DEMUX_COUNT_1 0x0b06 +#define SPR_UDN_DEMUX_COUNT_2 0x0b07 +#define SPR_UDN_DEMUX_COUNT_3 0x0b08 +#define SPR_UDN_DIRECTION_PROTECT 0x1505 + +#endif /* !defined(__ARCH_SPR_DEF_H__) */ + +#endif /* !defined(__DOXYGEN__) */ diff --git a/arch/tile/include/asm/Kbuild b/arch/tile/include/asm/Kbuild new file mode 100644 index 00000000..0bb42642 --- /dev/null +++ b/arch/tile/include/asm/Kbuild @@ -0,0 +1,44 @@ +include include/asm-generic/Kbuild.asm + +header-y += ../arch/ + +header-y += ucontext.h +header-y += hardwall.h + +generic-y += bug.h +generic-y += bugs.h +generic-y += cputime.h +generic-y += device.h +generic-y += div64.h +generic-y += emergency-restart.h +generic-y += errno.h +generic-y += fb.h +generic-y += fcntl.h +generic-y += ioctl.h +generic-y += ioctls.h +generic-y += ipc.h +generic-y += ipcbuf.h +generic-y += irq_regs.h +generic-y += kdebug.h +generic-y += local.h +generic-y += module.h +generic-y += msgbuf.h +generic-y += mutex.h +generic-y += param.h +generic-y += parport.h +generic-y += poll.h +generic-y += posix_types.h +generic-y += resource.h +generic-y += scatterlist.h +generic-y += sembuf.h +generic-y += serial.h +generic-y += shmbuf.h +generic-y += shmparam.h +generic-y += socket.h +generic-y += sockios.h +generic-y += statfs.h +generic-y += termbits.h +generic-y += termios.h +generic-y += types.h +generic-y += ucontext.h +generic-y += xor.h diff --git a/arch/tile/include/asm/asm-offsets.h b/arch/tile/include/asm/asm-offsets.h new file mode 100644 index 00000000..d370ee36 --- /dev/null +++ b/arch/tile/include/asm/asm-offsets.h @@ -0,0 +1 @@ +#include <generated/asm-offsets.h> diff --git a/arch/tile/include/asm/atomic.h b/arch/tile/include/asm/atomic.h new file mode 100644 index 00000000..f2461429 --- /dev/null +++ b/arch/tile/include/asm/atomic.h @@ -0,0 +1,134 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * Atomic primitives. + */ + +#ifndef _ASM_TILE_ATOMIC_H +#define _ASM_TILE_ATOMIC_H + +#include <asm/cmpxchg.h> + +#ifndef __ASSEMBLY__ + +#include <linux/compiler.h> +#include <linux/types.h> + +#define ATOMIC_INIT(i) { (i) } + +/** + * atomic_read - read atomic variable + * @v: pointer of type atomic_t + * + * Atomically reads the value of @v. + */ +static inline int atomic_read(const atomic_t *v) +{ + return ACCESS_ONCE(v->counter); +} + +/** + * atomic_sub_return - subtract integer and return + * @v: pointer of type atomic_t + * @i: integer value to subtract + * + * Atomically subtracts @i from @v and returns @v - @i + */ +#define atomic_sub_return(i, v) atomic_add_return((int)(-(i)), (v)) + +/** + * atomic_sub - subtract integer from atomic variable + * @i: integer value to subtract + * @v: pointer of type atomic_t + * + * Atomically subtracts @i from @v. + */ +#define atomic_sub(i, v) atomic_add((int)(-(i)), (v)) + +/** + * atomic_sub_and_test - subtract value from variable and test result + * @i: integer value to subtract + * @v: pointer of type atomic_t + * + * Atomically subtracts @i from @v and returns true if the result is + * zero, or false for all other cases. + */ +#define atomic_sub_and_test(i, v) (atomic_sub_return((i), (v)) == 0) + +/** + * atomic_inc_return - increment memory and return + * @v: pointer of type atomic_t + * + * Atomically increments @v by 1 and returns the new value. + */ +#define atomic_inc_return(v) atomic_add_return(1, (v)) + +/** + * atomic_dec_return - decrement memory and return + * @v: pointer of type atomic_t + * + * Atomically decrements @v by 1 and returns the new value. + */ +#define atomic_dec_return(v) atomic_sub_return(1, (v)) + +/** + * atomic_inc - increment atomic variable + * @v: pointer of type atomic_t + * + * Atomically increments @v by 1. + */ +#define atomic_inc(v) atomic_add(1, (v)) + +/** + * atomic_dec - decrement atomic variable + * @v: pointer of type atomic_t + * + * Atomically decrements @v by 1. + */ +#define atomic_dec(v) atomic_sub(1, (v)) + +/** + * atomic_dec_and_test - decrement and test + * @v: pointer of type atomic_t + * + * Atomically decrements @v by 1 and returns true if the result is 0. + */ +#define atomic_dec_and_test(v) (atomic_dec_return(v) == 0) + +/** + * atomic_inc_and_test - increment and test + * @v: pointer of type atomic_t + * + * Atomically increments @v by 1 and returns true if the result is 0. + */ +#define atomic_inc_and_test(v) (atomic_inc_return(v) == 0) + +/** + * atomic_add_negative - add and test if negative + * @v: pointer of type atomic_t + * @i: integer value to add + * + * Atomically adds @i to @v and returns true if the result is + * negative, or false when result is greater than or equal to zero. + */ +#define atomic_add_negative(i, v) (atomic_add_return((i), (v)) < 0) + +#endif /* __ASSEMBLY__ */ + +#ifndef __tilegx__ +#include <asm/atomic_32.h> +#else +#include <asm/atomic_64.h> +#endif + +#endif /* _ASM_TILE_ATOMIC_H */ diff --git a/arch/tile/include/asm/atomic_32.h b/arch/tile/include/asm/atomic_32.h new file mode 100644 index 00000000..54d1da82 --- /dev/null +++ b/arch/tile/include/asm/atomic_32.h @@ -0,0 +1,324 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * Do not include directly; use <linux/atomic.h>. + */ + +#ifndef _ASM_TILE_ATOMIC_32_H +#define _ASM_TILE_ATOMIC_32_H + +#include <asm/barrier.h> +#include <arch/chip.h> + +#ifndef __ASSEMBLY__ + +/* Tile-specific routines to support <linux/atomic.h>. */ +int _atomic_xchg(atomic_t *v, int n); +int _atomic_xchg_add(atomic_t *v, int i); +int _atomic_xchg_add_unless(atomic_t *v, int a, int u); +int _atomic_cmpxchg(atomic_t *v, int o, int n); + +/** + * atomic_xchg - atomically exchange contents of memory with a new value + * @v: pointer of type atomic_t + * @i: integer value to store in memory + * + * Atomically sets @v to @i and returns old @v + */ +static inline int atomic_xchg(atomic_t *v, int n) +{ + smp_mb(); /* barrier for proper semantics */ + return _atomic_xchg(v, n); +} + +/** + * atomic_cmpxchg - atomically exchange contents of memory if it matches + * @v: pointer of type atomic_t + * @o: old value that memory should have + * @n: new value to write to memory if it matches + * + * Atomically checks if @v holds @o and replaces it with @n if so. + * Returns the old value at @v. + */ +static inline int atomic_cmpxchg(atomic_t *v, int o, int n) +{ + smp_mb(); /* barrier for proper semantics */ + return _atomic_cmpxchg(v, o, n); +} + +/** + * atomic_add - add integer to atomic variable + * @i: integer value to add + * @v: pointer of type atomic_t + * + * Atomically adds @i to @v. + */ +static inline void atomic_add(int i, atomic_t *v) +{ + _atomic_xchg_add(v, i); +} + +/** + * atomic_add_return - add integer and return + * @v: pointer of type atomic_t + * @i: integer value to add + * + * Atomically adds @i to @v and returns @i + @v + */ +static inline int atomic_add_return(int i, atomic_t *v) +{ + smp_mb(); /* barrier for proper semantics */ + return _atomic_xchg_add(v, i) + i; +} + +/** + * __atomic_add_unless - add unless the number is already a given value + * @v: pointer of type atomic_t + * @a: the amount to add to v... + * @u: ...unless v is equal to u. + * + * Atomically adds @a to @v, so long as @v was not already @u. + * Returns the old value of @v. + */ +static inline int __atomic_add_unless(atomic_t *v, int a, int u) +{ + smp_mb(); /* barrier for proper semantics */ + return _atomic_xchg_add_unless(v, a, u); +} + +/** + * atomic_set - set atomic variable + * @v: pointer of type atomic_t + * @i: required value + * + * Atomically sets the value of @v to @i. + * + * atomic_set() can't be just a raw store, since it would be lost if it + * fell between the load and store of one of the other atomic ops. + */ +static inline void atomic_set(atomic_t *v, int n) +{ + _atomic_xchg(v, n); +} + +/* A 64bit atomic type */ + +typedef struct { + u64 __aligned(8) counter; +} atomic64_t; + +#define ATOMIC64_INIT(val) { (val) } + +u64 _atomic64_xchg(atomic64_t *v, u64 n); +u64 _atomic64_xchg_add(atomic64_t *v, u64 i); +u64 _atomic64_xchg_add_unless(atomic64_t *v, u64 a, u64 u); +u64 _atomic64_cmpxchg(atomic64_t *v, u64 o, u64 n); + +/** + * atomic64_read - read atomic variable + * @v: pointer of type atomic64_t + * + * Atomically reads the value of @v. + */ +static inline u64 atomic64_read(const atomic64_t *v) +{ + /* + * Requires an atomic op to read both 32-bit parts consistently. + * Casting away const is safe since the atomic support routines + * do not write to memory if the value has not been modified. + */ + return _atomic64_xchg_add((atomic64_t *)v, 0); +} + +/** + * atomic64_xchg - atomically exchange contents of memory with a new value + * @v: pointer of type atomic64_t + * @i: integer value to store in memory + * + * Atomically sets @v to @i and returns old @v + */ +static inline u64 atomic64_xchg(atomic64_t *v, u64 n) +{ + smp_mb(); /* barrier for proper semantics */ + return _atomic64_xchg(v, n); +} + +/** + * atomic64_cmpxchg - atomically exchange contents of memory if it matches + * @v: pointer of type atomic64_t + * @o: old value that memory should have + * @n: new value to write to memory if it matches + * + * Atomically checks if @v holds @o and replaces it with @n if so. + * Returns the old value at @v. + */ +static inline u64 atomic64_cmpxchg(atomic64_t *v, u64 o, u64 n) +{ + smp_mb(); /* barrier for proper semantics */ + return _atomic64_cmpxchg(v, o, n); +} + +/** + * atomic64_add - add integer to atomic variable + * @i: integer value to add + * @v: pointer of type atomic64_t + * + * Atomically adds @i to @v. + */ +static inline void atomic64_add(u64 i, atomic64_t *v) +{ + _atomic64_xchg_add(v, i); +} + +/** + * atomic64_add_return - add integer and return + * @v: pointer of type atomic64_t + * @i: integer value to add + * + * Atomically adds @i to @v and returns @i + @v + */ +static inline u64 atomic64_add_return(u64 i, atomic64_t *v) +{ + smp_mb(); /* barrier for proper semantics */ + return _atomic64_xchg_add(v, i) + i; +} + +/** + * atomic64_add_unless - add unless the number is already a given value + * @v: pointer of type atomic64_t + * @a: the amount to add to v... + * @u: ...unless v is equal to u. + * + * Atomically adds @a to @v, so long as @v was not already @u. + * Returns non-zero if @v was not @u, and zero otherwise. + */ +static inline u64 atomic64_add_unless(atomic64_t *v, u64 a, u64 u) +{ + smp_mb(); /* barrier for proper semantics */ + return _atomic64_xchg_add_unless(v, a, u) != u; +} + +/** + * atomic64_set - set atomic variable + * @v: pointer of type atomic64_t + * @i: required value + * + * Atomically sets the value of @v to @i. + * + * atomic64_set() can't be just a raw store, since it would be lost if it + * fell between the load and store of one of the other atomic ops. + */ +static inline void atomic64_set(atomic64_t *v, u64 n) +{ + _atomic64_xchg(v, n); +} + +#define atomic64_add_negative(a, v) (atomic64_add_return((a), (v)) < 0) +#define atomic64_inc(v) atomic64_add(1LL, (v)) +#define atomic64_inc_return(v) atomic64_add_return(1LL, (v)) +#define atomic64_inc_and_test(v) (atomic64_inc_return(v) == 0) +#define atomic64_sub_return(i, v) atomic64_add_return(-(i), (v)) +#define atomic64_sub_and_test(a, v) (atomic64_sub_return((a), (v)) == 0) +#define atomic64_sub(i, v) atomic64_add(-(i), (v)) +#define atomic64_dec(v) atomic64_sub(1LL, (v)) +#define atomic64_dec_return(v) atomic64_sub_return(1LL, (v)) +#define atomic64_dec_and_test(v) (atomic64_dec_return((v)) == 0) +#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1LL, 0LL) + +/* + * We need to barrier before modifying the word, since the _atomic_xxx() + * routines just tns the lock and then read/modify/write of the word. + * But after the word is updated, the routine issues an "mf" before returning, + * and since it's a function call, we don't even need a compiler barrier. + */ +#define smp_mb__before_atomic_dec() smp_mb() +#define smp_mb__before_atomic_inc() smp_mb() +#define smp_mb__after_atomic_dec() do { } while (0) +#define smp_mb__after_atomic_inc() do { } while (0) + +#endif /* !__ASSEMBLY__ */ + +/* + * Internal definitions only beyond this point. + */ + +#define ATOMIC_LOCKS_FOUND_VIA_TABLE() \ + (!CHIP_HAS_CBOX_HOME_MAP() && defined(CONFIG_SMP)) + +#if ATOMIC_LOCKS_FOUND_VIA_TABLE() + +/* Number of entries in atomic_lock_ptr[]. */ +#define ATOMIC_HASH_L1_SHIFT 6 +#define ATOMIC_HASH_L1_SIZE (1 << ATOMIC_HASH_L1_SHIFT) + +/* Number of locks in each struct pointed to by atomic_lock_ptr[]. */ +#define ATOMIC_HASH_L2_SHIFT (CHIP_L2_LOG_LINE_SIZE() - 2) +#define ATOMIC_HASH_L2_SIZE (1 << ATOMIC_HASH_L2_SHIFT) + +#else /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */ + +/* + * Number of atomic locks in atomic_locks[]. Must be a power of two. + * There is no reason for more than PAGE_SIZE / 8 entries, since that + * is the maximum number of pointer bits we can use to index this. + * And we cannot have more than PAGE_SIZE / 4, since this has to + * fit on a single page and each entry takes 4 bytes. + */ +#define ATOMIC_HASH_SHIFT (PAGE_SHIFT - 3) +#define ATOMIC_HASH_SIZE (1 << ATOMIC_HASH_SHIFT) + +#ifndef __ASSEMBLY__ +extern int atomic_locks[]; +#endif + +#endif /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */ + +/* + * All the code that may fault while holding an atomic lock must + * place the pointer to the lock in ATOMIC_LOCK_REG so the fault code + * can correctly release and reacquire the lock. Note that we + * mention the register number in a comment in "lib/atomic_asm.S" to help + * assembly coders from using this register by mistake, so if it + * is changed here, change that comment as well. + */ +#define ATOMIC_LOCK_REG 20 +#define ATOMIC_LOCK_REG_NAME r20 + +#ifndef __ASSEMBLY__ +/* Called from setup to initialize a hash table to point to per_cpu locks. */ +void __init_atomic_per_cpu(void); + +#ifdef CONFIG_SMP +/* Support releasing the atomic lock in do_page_fault_ics(). */ +void __atomic_fault_unlock(int *lock_ptr); +#endif + +/* Private helper routines in lib/atomic_asm_32.S */ +extern struct __get_user __atomic_cmpxchg(volatile int *p, + int *lock, int o, int n); +extern struct __get_user __atomic_xchg(volatile int *p, int *lock, int n); +extern struct __get_user __atomic_xchg_add(volatile int *p, int *lock, int n); +extern struct __get_user __atomic_xchg_add_unless(volatile int *p, + int *lock, int o, int n); +extern struct __get_user __atomic_or(volatile int *p, int *lock, int n); +extern struct __get_user __atomic_andn(volatile int *p, int *lock, int n); +extern struct __get_user __atomic_xor(volatile int *p, int *lock, int n); +extern u64 __atomic64_cmpxchg(volatile u64 *p, int *lock, u64 o, u64 n); +extern u64 __atomic64_xchg(volatile u64 *p, int *lock, u64 n); +extern u64 __atomic64_xchg_add(volatile u64 *p, int *lock, u64 n); +extern u64 __atomic64_xchg_add_unless(volatile u64 *p, + int *lock, u64 o, u64 n); + +#endif /* !__ASSEMBLY__ */ + +#endif /* _ASM_TILE_ATOMIC_32_H */ diff --git a/arch/tile/include/asm/atomic_64.h b/arch/tile/include/asm/atomic_64.h new file mode 100644 index 00000000..f4500c68 --- /dev/null +++ b/arch/tile/include/asm/atomic_64.h @@ -0,0 +1,157 @@ +/* + * Copyright 2011 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * Do not include directly; use <linux/atomic.h>. + */ + +#ifndef _ASM_TILE_ATOMIC_64_H +#define _ASM_TILE_ATOMIC_64_H + +#ifndef __ASSEMBLY__ + +#include <asm/barrier.h> +#include <arch/spr_def.h> + +/* First, the 32-bit atomic ops that are "real" on our 64-bit platform. */ + +#define atomic_set(v, i) ((v)->counter = (i)) + +/* + * The smp_mb() operations throughout are to support the fact that + * Linux requires memory barriers before and after the operation, + * on any routine which updates memory and returns a value. + */ + +static inline int atomic_cmpxchg(atomic_t *v, int o, int n) +{ + int val; + __insn_mtspr(SPR_CMPEXCH_VALUE, o); + smp_mb(); /* barrier for proper semantics */ + val = __insn_cmpexch4((void *)&v->counter, n); + smp_mb(); /* barrier for proper semantics */ + return val; +} + +static inline int atomic_xchg(atomic_t *v, int n) +{ + int val; + smp_mb(); /* barrier for proper semantics */ + val = __insn_exch4((void *)&v->counter, n); + smp_mb(); /* barrier for proper semantics */ + return val; +} + +static inline void atomic_add(int i, atomic_t *v) +{ + __insn_fetchadd4((void *)&v->counter, i); +} + +static inline int atomic_add_return(int i, atomic_t *v) +{ + int val; + smp_mb(); /* barrier for proper semantics */ + val = __insn_fetchadd4((void *)&v->counter, i) + i; + barrier(); /* the "+ i" above will wait on memory */ + return val; +} + +static inline int __atomic_add_unless(atomic_t *v, int a, int u) +{ + int guess, oldval = v->counter; + do { + if (oldval == u) + break; + guess = oldval; + oldval = atomic_cmpxchg(v, guess, guess + a); + } while (guess != oldval); + return oldval; +} + +/* Now the true 64-bit operations. */ + +#define ATOMIC64_INIT(i) { (i) } + +#define atomic64_read(v) ((v)->counter) +#define atomic64_set(v, i) ((v)->counter = (i)) + +static inline long atomic64_cmpxchg(atomic64_t *v, long o, long n) +{ + long val; + smp_mb(); /* barrier for proper semantics */ + __insn_mtspr(SPR_CMPEXCH_VALUE, o); + val = __insn_cmpexch((void *)&v->counter, n); + smp_mb(); /* barrier for proper semantics */ + return val; +} + +static inline long atomic64_xchg(atomic64_t *v, long n) +{ + long val; + smp_mb(); /* barrier for proper semantics */ + val = __insn_exch((void *)&v->counter, n); + smp_mb(); /* barrier for proper semantics */ + return val; +} + +static inline void atomic64_add(long i, atomic64_t *v) +{ + __insn_fetchadd((void *)&v->counter, i); +} + +static inline long atomic64_add_return(long i, atomic64_t *v) +{ + int val; + smp_mb(); /* barrier for proper semantics */ + val = __insn_fetchadd((void *)&v->counter, i) + i; + barrier(); /* the "+ i" above will wait on memory */ + return val; +} + +static inline long atomic64_add_unless(atomic64_t *v, long a, long u) +{ + long guess, oldval = v->counter; + do { + if (oldval == u) + break; + guess = oldval; + oldval = atomic64_cmpxchg(v, guess, guess + a); + } while (guess != oldval); + return oldval != u; +} + +#define atomic64_sub_return(i, v) atomic64_add_return(-(i), (v)) +#define atomic64_sub(i, v) atomic64_add(-(i), (v)) +#define atomic64_inc_return(v) atomic64_add_return(1, (v)) +#define atomic64_dec_return(v) atomic64_sub_return(1, (v)) +#define atomic64_inc(v) atomic64_add(1, (v)) +#define atomic64_dec(v) atomic64_sub(1, (v)) + +#define atomic64_inc_and_test(v) (atomic64_inc_return(v) == 0) +#define atomic64_dec_and_test(v) (atomic64_dec_return(v) == 0) +#define atomic64_sub_and_test(i, v) (atomic64_sub_return((i), (v)) == 0) +#define atomic64_add_negative(i, v) (atomic64_add_return((i), (v)) < 0) + +#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0) + +/* Atomic dec and inc don't implement barrier, so provide them if needed. */ +#define smp_mb__before_atomic_dec() smp_mb() +#define smp_mb__after_atomic_dec() smp_mb() +#define smp_mb__before_atomic_inc() smp_mb() +#define smp_mb__after_atomic_inc() smp_mb() + +/* Define this to indicate that cmpxchg is an efficient operation. */ +#define __HAVE_ARCH_CMPXCHG + +#endif /* !__ASSEMBLY__ */ + +#endif /* _ASM_TILE_ATOMIC_64_H */ diff --git a/arch/tile/include/asm/auxvec.h b/arch/tile/include/asm/auxvec.h new file mode 100644 index 00000000..1d393edb --- /dev/null +++ b/arch/tile/include/asm/auxvec.h @@ -0,0 +1,20 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef _ASM_TILE_AUXVEC_H +#define _ASM_TILE_AUXVEC_H + +/* No extensions to auxvec */ + +#endif /* _ASM_TILE_AUXVEC_H */ diff --git a/arch/tile/include/asm/backtrace.h b/arch/tile/include/asm/backtrace.h new file mode 100644 index 00000000..bd5399a6 --- /dev/null +++ b/arch/tile/include/asm/backtrace.h @@ -0,0 +1,162 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef _ASM_TILE_BACKTRACE_H +#define _ASM_TILE_BACKTRACE_H + +#include <linux/types.h> + +/* Reads 'size' bytes from 'address' and writes the data to 'result'. + * Returns true if successful, else false (e.g. memory not readable). + */ +typedef bool (*BacktraceMemoryReader)(void *result, + unsigned long address, + unsigned int size, + void *extra); + +typedef struct { + /* Current PC. */ + unsigned long pc; + + /* Current stack pointer value. */ + unsigned long sp; + + /* Current frame pointer value (i.e. caller's stack pointer) */ + unsigned long fp; + + /* Internal use only: caller's PC for first frame. */ + unsigned long initial_frame_caller_pc; + + /* Internal use only: callback to read memory. */ + BacktraceMemoryReader read_memory_func; + + /* Internal use only: arbitrary argument to read_memory_func. */ + void *read_memory_func_extra; + +} BacktraceIterator; + + +typedef enum { + + /* We have no idea what the caller's pc is. */ + PC_LOC_UNKNOWN, + + /* The caller's pc is currently in lr. */ + PC_LOC_IN_LR, + + /* The caller's pc can be found by dereferencing the caller's sp. */ + PC_LOC_ON_STACK + +} CallerPCLocation; + + +typedef enum { + + /* We have no idea what the caller's sp is. */ + SP_LOC_UNKNOWN, + + /* The caller's sp is currently in r52. */ + SP_LOC_IN_R52, + + /* The caller's sp can be found by adding a certain constant + * to the current value of sp. + */ + SP_LOC_OFFSET + +} CallerSPLocation; + + +/* Bit values ORed into CALLER_* values for info ops. */ +enum { + /* Setting the low bit on any of these values means the info op + * applies only to one bundle ago. + */ + ONE_BUNDLE_AGO_FLAG = 1, + + /* Setting this bit on a CALLER_SP_* value means the PC is in LR. + * If not set, PC is on the stack. + */ + PC_IN_LR_FLAG = 2, + + /* This many of the low bits of a CALLER_SP_* value are for the + * flag bits above. + */ + NUM_INFO_OP_FLAGS = 2, + + /* We cannot have one in the memory pipe so this is the maximum. */ + MAX_INFO_OPS_PER_BUNDLE = 2 +}; + + +/* Internal constants used to define 'info' operands. */ +enum { + /* 0 and 1 are reserved, as are all negative numbers. */ + + CALLER_UNKNOWN_BASE = 2, + + CALLER_SP_IN_R52_BASE = 4, + + CALLER_SP_OFFSET_BASE = 8, +}; + + +/* Current backtracer state describing where it thinks the caller is. */ +typedef struct { + /* + * Public fields + */ + + /* How do we find the caller's PC? */ + CallerPCLocation pc_location : 8; + + /* How do we find the caller's SP? */ + CallerSPLocation sp_location : 8; + + /* If sp_location == SP_LOC_OFFSET, then caller_sp == sp + + * loc->sp_offset. Else this field is undefined. + */ + uint16_t sp_offset; + + /* In the most recently visited bundle a terminating bundle? */ + bool at_terminating_bundle; + + /* + * Private fields + */ + + /* Will the forward scanner see someone clobbering sp + * (i.e. changing it with something other than addi sp, sp, N?) + */ + bool sp_clobber_follows; + + /* Operand to next "visible" info op (no more than one bundle past + * the next terminating bundle), or -32768 if none. + */ + int16_t next_info_operand; + + /* Is the info of in next_info_op in the very next bundle? */ + bool is_next_info_operand_adjacent; + +} CallerLocation; + +extern void backtrace_init(BacktraceIterator *state, + BacktraceMemoryReader read_memory_func, + void *read_memory_func_extra, + unsigned long pc, unsigned long lr, + unsigned long sp, unsigned long r52); + + +extern bool backtrace_next(BacktraceIterator *state); + +#endif /* _ASM_TILE_BACKTRACE_H */ diff --git a/arch/tile/include/asm/barrier.h b/arch/tile/include/asm/barrier.h new file mode 100644 index 00000000..990a217a --- /dev/null +++ b/arch/tile/include/asm/barrier.h @@ -0,0 +1,148 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef _ASM_TILE_BARRIER_H +#define _ASM_TILE_BARRIER_H + +#ifndef __ASSEMBLY__ + +#include <linux/types.h> +#include <arch/chip.h> +#include <arch/spr_def.h> +#include <asm/timex.h> + +/* + * read_barrier_depends - Flush all pending reads that subsequents reads + * depend on. + * + * No data-dependent reads from memory-like regions are ever reordered + * over this barrier. All reads preceding this primitive are guaranteed + * to access memory (but not necessarily other CPUs' caches) before any + * reads following this primitive that depend on the data return by + * any of the preceding reads. This primitive is much lighter weight than + * rmb() on most CPUs, and is never heavier weight than is + * rmb(). + * + * These ordering constraints are respected by both the local CPU + * and the compiler. + * + * Ordering is not guaranteed by anything other than these primitives, + * not even by data dependencies. See the documentation for + * memory_barrier() for examples and URLs to more information. + * + * For example, the following code would force ordering (the initial + * value of "a" is zero, "b" is one, and "p" is "&a"): + * + * <programlisting> + * CPU 0 CPU 1 + * + * b = 2; + * memory_barrier(); + * p = &b; q = p; + * read_barrier_depends(); + * d = *q; + * </programlisting> + * + * because the read of "*q" depends on the read of "p" and these + * two reads are separated by a read_barrier_depends(). However, + * the following code, with the same initial values for "a" and "b": + * + * <programlisting> + * CPU 0 CPU 1 + * + * a = 2; + * memory_barrier(); + * b = 3; y = b; + * read_barrier_depends(); + * x = a; + * </programlisting> + * + * does not enforce ordering, since there is no data dependency between + * the read of "a" and the read of "b". Therefore, on some CPUs, such + * as Alpha, "y" could be set to 3 and "x" to 0. Use rmb() + * in cases like this where there are no data dependencies. + */ +#define read_barrier_depends() do { } while (0) + +#define __sync() __insn_mf() + +#if !CHIP_HAS_MF_WAITS_FOR_VICTIMS() +#include <hv/syscall_public.h> +/* + * Issue an uncacheable load to each memory controller, then + * wait until those loads have completed. + */ +static inline void __mb_incoherent(void) +{ + long clobber_r10; + asm volatile("swint2" + : "=R10" (clobber_r10) + : "R10" (HV_SYS_fence_incoherent) + : "r0", "r1", "r2", "r3", "r4", + "r5", "r6", "r7", "r8", "r9", + "r11", "r12", "r13", "r14", + "r15", "r16", "r17", "r18", "r19", + "r20", "r21", "r22", "r23", "r24", + "r25", "r26", "r27", "r28", "r29"); +} +#endif + +/* Fence to guarantee visibility of stores to incoherent memory. */ +static inline void +mb_incoherent(void) +{ + __insn_mf(); + +#if !CHIP_HAS_MF_WAITS_FOR_VICTIMS() + { +#if CHIP_HAS_TILE_WRITE_PENDING() + const unsigned long WRITE_TIMEOUT_CYCLES = 400; + unsigned long start = get_cycles_low(); + do { + if (__insn_mfspr(SPR_TILE_WRITE_PENDING) == 0) + return; + } while ((get_cycles_low() - start) < WRITE_TIMEOUT_CYCLES); +#endif /* CHIP_HAS_TILE_WRITE_PENDING() */ + (void) __mb_incoherent(); + } +#endif /* CHIP_HAS_MF_WAITS_FOR_VICTIMS() */ +} + +#define fast_wmb() __sync() +#define fast_rmb() __sync() +#define fast_mb() __sync() +#define fast_iob() mb_incoherent() + +#define wmb() fast_wmb() +#define rmb() fast_rmb() +#define mb() fast_mb() +#define iob() fast_iob() + +#ifdef CONFIG_SMP +#define smp_mb() mb() +#define smp_rmb() rmb() +#define smp_wmb() wmb() +#define smp_read_barrier_depends() read_barrier_depends() +#else +#define smp_mb() barrier() +#define smp_rmb() barrier() +#define smp_wmb() barrier() +#define smp_read_barrier_depends() do { } while (0) +#endif + +#define set_mb(var, value) \ + do { var = value; mb(); } while (0) + +#endif /* !__ASSEMBLY__ */ +#endif /* _ASM_TILE_BARRIER_H */ diff --git a/arch/tile/include/asm/bitops.h b/arch/tile/include/asm/bitops.h new file mode 100644 index 00000000..bd186c4e --- /dev/null +++ b/arch/tile/include/asm/bitops.h @@ -0,0 +1,128 @@ +/* + * Copyright 1992, Linus Torvalds. + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef _ASM_TILE_BITOPS_H +#define _ASM_TILE_BITOPS_H + +#include <linux/types.h> + +#ifndef _LINUX_BITOPS_H +#error only <linux/bitops.h> can be included directly +#endif + +#ifdef __tilegx__ +#include <asm/bitops_64.h> +#else +#include <asm/bitops_32.h> +#endif + +/** + * __ffs - find first set bit in word + * @word: The word to search + * + * Undefined if no set bit exists, so code should check against 0 first. + */ +static inline unsigned long __ffs(unsigned long word) +{ + return __builtin_ctzl(word); +} + +/** + * ffz - find first zero bit in word + * @word: The word to search + * + * Undefined if no zero exists, so code should check against ~0UL first. + */ +static inline unsigned long ffz(unsigned long word) +{ + return __builtin_ctzl(~word); +} + +/** + * __fls - find last set bit in word + * @word: The word to search + * + * Undefined if no set bit exists, so code should check against 0 first. + */ +static inline unsigned long __fls(unsigned long word) +{ + return (sizeof(word) * 8) - 1 - __builtin_clzl(word); +} + +/** + * ffs - find first set bit in word + * @x: the word to search + * + * This is defined the same way as the libc and compiler builtin ffs + * routines, therefore differs in spirit from the other bitops. + * + * ffs(value) returns 0 if value is 0 or the position of the first + * set bit if value is nonzero. The first (least significant) bit + * is at position 1. + */ +static inline int ffs(int x) +{ + return __builtin_ffs(x); +} + +static inline int fls64(__u64 w) +{ + return (sizeof(__u64) * 8) - __builtin_clzll(w); +} + +/** + * fls - find last set bit in word + * @x: the word to search + * + * This is defined in a similar way as the libc and compiler builtin + * ffs, but returns the position of the most significant set bit. + * + * fls(value) returns 0 if value is 0 or the position of the last + * set bit if value is nonzero. The last (most significant) bit is + * at position 32. + */ +static inline int fls(int x) +{ + return fls64((unsigned int) x); +} + +static inline unsigned int __arch_hweight32(unsigned int w) +{ + return __builtin_popcount(w); +} + +static inline unsigned int __arch_hweight16(unsigned int w) +{ + return __builtin_popcount(w & 0xffff); +} + +static inline unsigned int __arch_hweight8(unsigned int w) +{ + return __builtin_popcount(w & 0xff); +} + +static inline unsigned long __arch_hweight64(__u64 w) +{ + return __builtin_popcountll(w); +} + +#include <asm-generic/bitops/const_hweight.h> +#include <asm-generic/bitops/lock.h> +#include <asm-generic/bitops/find.h> +#include <asm-generic/bitops/sched.h> +#include <asm-generic/bitops/non-atomic.h> +#include <asm-generic/bitops/le.h> + +#endif /* _ASM_TILE_BITOPS_H */ diff --git a/arch/tile/include/asm/bitops_32.h b/arch/tile/include/asm/bitops_32.h new file mode 100644 index 00000000..ddc4c1ef --- /dev/null +++ b/arch/tile/include/asm/bitops_32.h @@ -0,0 +1,130 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef _ASM_TILE_BITOPS_32_H +#define _ASM_TILE_BITOPS_32_H + +#include <linux/compiler.h> +#include <linux/atomic.h> + +/* Tile-specific routines to support <asm/bitops.h>. */ +unsigned long _atomic_or(volatile unsigned long *p, unsigned long mask); +unsigned long _atomic_andn(volatile unsigned long *p, unsigned long mask); +unsigned long _atomic_xor(volatile unsigned long *p, unsigned long mask); + +/** + * set_bit - Atomically set a bit in memory + * @nr: the bit to set + * @addr: the address to start counting from + * + * This function is atomic and may not be reordered. + * See __set_bit() if you do not require the atomic guarantees. + * Note that @nr may be almost arbitrarily large; this function is not + * restricted to acting on a single-word quantity. + */ +static inline void set_bit(unsigned nr, volatile unsigned long *addr) +{ + _atomic_or(addr + BIT_WORD(nr), BIT_MASK(nr)); +} + +/** + * clear_bit - Clears a bit in memory + * @nr: Bit to clear + * @addr: Address to start counting from + * + * clear_bit() is atomic and may not be reordered. + * See __clear_bit() if you do not require the atomic guarantees. + * Note that @nr may be almost arbitrarily large; this function is not + * restricted to acting on a single-word quantity. + * + * clear_bit() may not contain a memory barrier, so if it is used for + * locking purposes, you should call smp_mb__before_clear_bit() and/or + * smp_mb__after_clear_bit() to ensure changes are visible on other cpus. + */ +static inline void clear_bit(unsigned nr, volatile unsigned long *addr) +{ + _atomic_andn(addr + BIT_WORD(nr), BIT_MASK(nr)); +} + +/** + * change_bit - Toggle a bit in memory + * @nr: Bit to change + * @addr: Address to start counting from + * + * change_bit() is atomic and may not be reordered. + * See __change_bit() if you do not require the atomic guarantees. + * Note that @nr may be almost arbitrarily large; this function is not + * restricted to acting on a single-word quantity. + */ +static inline void change_bit(unsigned nr, volatile unsigned long *addr) +{ + _atomic_xor(addr + BIT_WORD(nr), BIT_MASK(nr)); +} + +/** + * test_and_set_bit - Set a bit and return its old value + * @nr: Bit to set + * @addr: Address to count from + * + * This operation is atomic and cannot be reordered. + * It also implies a memory barrier. + */ +static inline int test_and_set_bit(unsigned nr, volatile unsigned long *addr) +{ + unsigned long mask = BIT_MASK(nr); + addr += BIT_WORD(nr); + smp_mb(); /* barrier for proper semantics */ + return (_atomic_or(addr, mask) & mask) != 0; +} + +/** + * test_and_clear_bit - Clear a bit and return its old value + * @nr: Bit to clear + * @addr: Address to count from + * + * This operation is atomic and cannot be reordered. + * It also implies a memory barrier. + */ +static inline int test_and_clear_bit(unsigned nr, volatile unsigned long *addr) +{ + unsigned long mask = BIT_MASK(nr); + addr += BIT_WORD(nr); + smp_mb(); /* barrier for proper semantics */ + return (_atomic_andn(addr, mask) & mask) != 0; +} + +/** + * test_and_change_bit - Change a bit and return its old value + * @nr: Bit to change + * @addr: Address to count from + * + * This operation is atomic and cannot be reordered. + * It also implies a memory barrier. + */ +static inline int test_and_change_bit(unsigned nr, + volatile unsigned long *addr) +{ + unsigned long mask = BIT_MASK(nr); + addr += BIT_WORD(nr); + smp_mb(); /* barrier for proper semantics */ + return (_atomic_xor(addr, mask) & mask) != 0; +} + +/* See discussion at smp_mb__before_atomic_dec() in <asm/atomic_32.h>. */ +#define smp_mb__before_clear_bit() smp_mb() +#define smp_mb__after_clear_bit() do {} while (0) + +#include <asm-generic/bitops/ext2-atomic.h> + +#endif /* _ASM_TILE_BITOPS_32_H */ diff --git a/arch/tile/include/asm/bitops_64.h b/arch/tile/include/asm/bitops_64.h new file mode 100644 index 00000000..60b87ee5 --- /dev/null +++ b/arch/tile/include/asm/bitops_64.h @@ -0,0 +1,101 @@ +/* + * Copyright 2011 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef _ASM_TILE_BITOPS_64_H +#define _ASM_TILE_BITOPS_64_H + +#include <linux/compiler.h> +#include <linux/atomic.h> + +/* See <asm/bitops.h> for API comments. */ + +static inline void set_bit(unsigned nr, volatile unsigned long *addr) +{ + unsigned long mask = (1UL << (nr % BITS_PER_LONG)); + __insn_fetchor((void *)(addr + nr / BITS_PER_LONG), mask); +} + +static inline void clear_bit(unsigned nr, volatile unsigned long *addr) +{ + unsigned long mask = (1UL << (nr % BITS_PER_LONG)); + __insn_fetchand((void *)(addr + nr / BITS_PER_LONG), ~mask); +} + +#define smp_mb__before_clear_bit() smp_mb() +#define smp_mb__after_clear_bit() smp_mb() + + +static inline void change_bit(unsigned nr, volatile unsigned long *addr) +{ + unsigned long mask = (1UL << (nr % BITS_PER_LONG)); + unsigned long guess, oldval; + addr += nr / BITS_PER_LONG; + oldval = *addr; + do { + guess = oldval; + oldval = atomic64_cmpxchg((atomic64_t *)addr, + guess, guess ^ mask); + } while (guess != oldval); +} + + +/* + * The test_and_xxx_bit() routines require a memory fence before we + * start the operation, and after the operation completes. We use + * smp_mb() before, and rely on the "!= 0" comparison, plus a compiler + * barrier(), to block until the atomic op is complete. + */ + +static inline int test_and_set_bit(unsigned nr, volatile unsigned long *addr) +{ + int val; + unsigned long mask = (1UL << (nr % BITS_PER_LONG)); + smp_mb(); /* barrier for proper semantics */ + val = (__insn_fetchor((void *)(addr + nr / BITS_PER_LONG), mask) + & mask) != 0; + barrier(); + return val; +} + + +static inline int test_and_clear_bit(unsigned nr, volatile unsigned long *addr) +{ + int val; + unsigned long mask = (1UL << (nr % BITS_PER_LONG)); + smp_mb(); /* barrier for proper semantics */ + val = (__insn_fetchand((void *)(addr + nr / BITS_PER_LONG), ~mask) + & mask) != 0; + barrier(); + return val; +} + + +static inline int test_and_change_bit(unsigned nr, + volatile unsigned long *addr) +{ + unsigned long mask = (1UL << (nr % BITS_PER_LONG)); + unsigned long guess, oldval; + addr += nr / BITS_PER_LONG; + oldval = *addr; + do { + guess = oldval; + oldval = atomic64_cmpxchg((atomic64_t *)addr, + guess, guess ^ mask); + } while (guess != oldval); + return (oldval & mask) != 0; +} + +#include <asm-generic/bitops/ext2-atomic-setbit.h> + +#endif /* _ASM_TILE_BITOPS_64_H */ diff --git a/arch/tile/include/asm/bitsperlong.h b/arch/tile/include/asm/bitsperlong.h new file mode 100644 index 00000000..58c771f2 --- /dev/null +++ b/arch/tile/include/asm/bitsperlong.h @@ -0,0 +1,26 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef _ASM_TILE_BITSPERLONG_H +#define _ASM_TILE_BITSPERLONG_H + +#ifdef __LP64__ +# define __BITS_PER_LONG 64 +#else +# define __BITS_PER_LONG 32 +#endif + +#include <asm-generic/bitsperlong.h> + +#endif /* _ASM_TILE_BITSPERLONG_H */ diff --git a/arch/tile/include/asm/byteorder.h b/arch/tile/include/asm/byteorder.h new file mode 100644 index 00000000..9558416d --- /dev/null +++ b/arch/tile/include/asm/byteorder.h @@ -0,0 +1 @@ +#include <linux/byteorder/little_endian.h> diff --git a/arch/tile/include/asm/cache.h b/arch/tile/include/asm/cache.h new file mode 100644 index 00000000..392e5333 --- /dev/null +++ b/arch/tile/include/asm/cache.h @@ -0,0 +1,51 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef _ASM_TILE_CACHE_H +#define _ASM_TILE_CACHE_H + +#include <arch/chip.h> + +/* bytes per L1 data cache line */ +#define L1_CACHE_SHIFT CHIP_L1D_LOG_LINE_SIZE() +#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) + +/* bytes per L2 cache line */ +#define L2_CACHE_SHIFT CHIP_L2_LOG_LINE_SIZE() +#define L2_CACHE_BYTES (1 << L2_CACHE_SHIFT) +#define L2_CACHE_ALIGN(x) (((x)+(L2_CACHE_BYTES-1)) & -L2_CACHE_BYTES) + +/* + * TILE-Gx is fully coherent so we don't need to define ARCH_DMA_MINALIGN. + */ +#ifndef __tilegx__ +#define ARCH_DMA_MINALIGN L2_CACHE_BYTES +#endif + +/* use the cache line size for the L2, which is where it counts */ +#define SMP_CACHE_BYTES_SHIFT L2_CACHE_SHIFT +#define SMP_CACHE_BYTES L2_CACHE_BYTES +#define INTERNODE_CACHE_SHIFT L2_CACHE_SHIFT +#define INTERNODE_CACHE_BYTES L2_CACHE_BYTES + +/* Group together read-mostly things to avoid cache false sharing */ +#define __read_mostly __attribute__((__section__(".data..read_mostly"))) + +/* + * Attribute for data that is kept read/write coherent until the end of + * initialization, then bumped to read/only incoherent for performance. + */ +#define __write_once __attribute__((__section__(".w1data"))) + +#endif /* _ASM_TILE_CACHE_H */ diff --git a/arch/tile/include/asm/cacheflush.h b/arch/tile/include/asm/cacheflush.h new file mode 100644 index 00000000..0fc63c48 --- /dev/null +++ b/arch/tile/include/asm/cacheflush.h @@ -0,0 +1,164 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef _ASM_TILE_CACHEFLUSH_H +#define _ASM_TILE_CACHEFLUSH_H + +#include <arch/chip.h> + +/* Keep includes the same across arches. */ +#include <linux/mm.h> +#include <linux/cache.h> +#include <arch/icache.h> + +/* Caches are physically-indexed and so don't need special treatment */ +#define flush_cache_all() do { } while (0) +#define flush_cache_mm(mm) do { } while (0) +#define flush_cache_dup_mm(mm) do { } while (0) +#define flush_cache_range(vma, start, end) do { } while (0) +#define flush_cache_page(vma, vmaddr, pfn) do { } while (0) +#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0 +#define flush_dcache_page(page) do { } while (0) +#define flush_dcache_mmap_lock(mapping) do { } while (0) +#define flush_dcache_mmap_unlock(mapping) do { } while (0) +#define flush_cache_vmap(start, end) do { } while (0) +#define flush_cache_vunmap(start, end) do { } while (0) +#define flush_icache_page(vma, pg) do { } while (0) +#define flush_icache_user_range(vma, pg, adr, len) do { } while (0) + +/* Flush the icache just on this cpu */ +extern void __flush_icache_range(unsigned long start, unsigned long end); + +/* Flush the entire icache on this cpu. */ +#define __flush_icache() __flush_icache_range(0, CHIP_L1I_CACHE_SIZE()) + +#ifdef CONFIG_SMP +/* + * When the kernel writes to its own text we need to do an SMP + * broadcast to make the L1I coherent everywhere. This includes + * module load and single step. + */ +extern void flush_icache_range(unsigned long start, unsigned long end); +#else +#define flush_icache_range __flush_icache_range +#endif + +/* + * An update to an executable user page requires icache flushing. + * We could carefully update only tiles that are running this process, + * and rely on the fact that we flush the icache on every context + * switch to avoid doing extra work here. But for now, I'll be + * conservative and just do a global icache flush. + */ +static inline void copy_to_user_page(struct vm_area_struct *vma, + struct page *page, unsigned long vaddr, + void *dst, void *src, int len) +{ + memcpy(dst, src, len); + if (vma->vm_flags & VM_EXEC) { + flush_icache_range((unsigned long) dst, + (unsigned long) dst + len); + } +} + +#define copy_from_user_page(vma, page, vaddr, dst, src, len) \ + memcpy((dst), (src), (len)) + +/* + * Invalidate a VA range; pads to L2 cacheline boundaries. + * + * Note that on TILE64, __inv_buffer() actually flushes modified + * cache lines in addition to invalidating them, i.e., it's the + * same as __finv_buffer(). + */ +static inline void __inv_buffer(void *buffer, size_t size) +{ + char *next = (char *)((long)buffer & -L2_CACHE_BYTES); + char *finish = (char *)L2_CACHE_ALIGN((long)buffer + size); + while (next < finish) { + __insn_inv(next); + next += CHIP_INV_STRIDE(); + } +} + +/* Flush a VA range; pads to L2 cacheline boundaries. */ +static inline void __flush_buffer(void *buffer, size_t size) +{ + char *next = (char *)((long)buffer & -L2_CACHE_BYTES); + char *finish = (char *)L2_CACHE_ALIGN((long)buffer + size); + while (next < finish) { + __insn_flush(next); + next += CHIP_FLUSH_STRIDE(); + } +} + +/* Flush & invalidate a VA range; pads to L2 cacheline boundaries. */ +static inline void __finv_buffer(void *buffer, size_t size) +{ + char *next = (char *)((long)buffer & -L2_CACHE_BYTES); + char *finish = (char *)L2_CACHE_ALIGN((long)buffer + size); + while (next < finish) { + __insn_finv(next); + next += CHIP_FINV_STRIDE(); + } +} + + +/* Invalidate a VA range and wait for it to be complete. */ +static inline void inv_buffer(void *buffer, size_t size) +{ + __inv_buffer(buffer, size); + mb(); +} + +/* + * Flush a locally-homecached VA range and wait for the evicted + * cachelines to hit memory. + */ +static inline void flush_buffer_local(void *buffer, size_t size) +{ + __flush_buffer(buffer, size); + mb_incoherent(); +} + +/* + * Flush and invalidate a locally-homecached VA range and wait for the + * evicted cachelines to hit memory. + */ +static inline void finv_buffer_local(void *buffer, size_t size) +{ + __finv_buffer(buffer, size); + mb_incoherent(); +} + +/* + * Flush and invalidate a VA range that is homed remotely, waiting + * until the memory controller holds the flushed values. If "hfh" is + * true, we will do a more expensive flush involving additional loads + * to make sure we have touched all the possible home cpus of a buffer + * that is homed with "hash for home". + */ +void finv_buffer_remote(void *buffer, size_t size, int hfh); + +/* + * On SMP systems, when the scheduler does migration-cost autodetection, + * it needs a way to flush as much of the CPU's caches as possible: + * + * TODO: fill this in! + */ +static inline void sched_cacheflush(void) +{ +} + +#endif /* _ASM_TILE_CACHEFLUSH_H */ diff --git a/arch/tile/include/asm/checksum.h b/arch/tile/include/asm/checksum.h new file mode 100644 index 00000000..a120766c --- /dev/null +++ b/arch/tile/include/asm/checksum.h @@ -0,0 +1,24 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef _ASM_TILE_CHECKSUM_H +#define _ASM_TILE_CHECKSUM_H + +#include <asm-generic/checksum.h> + +/* Allow us to provide a more optimized do_csum(). */ +__wsum do_csum(const unsigned char *buff, int len); +#define do_csum do_csum + +#endif /* _ASM_TILE_CHECKSUM_H */ diff --git a/arch/tile/include/asm/cmpxchg.h b/arch/tile/include/asm/cmpxchg.h new file mode 100644 index 00000000..276f067e --- /dev/null +++ b/arch/tile/include/asm/cmpxchg.h @@ -0,0 +1,73 @@ +/* + * cmpxchg.h -- forked from asm/atomic.h with this copyright: + * + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + */ + +#ifndef _ASM_TILE_CMPXCHG_H +#define _ASM_TILE_CMPXCHG_H + +#ifndef __ASSEMBLY__ + +/* Nonexistent functions intended to cause link errors. */ +extern unsigned long __xchg_called_with_bad_pointer(void); +extern unsigned long __cmpxchg_called_with_bad_pointer(void); + +#define xchg(ptr, x) \ + ({ \ + typeof(*(ptr)) __x; \ + switch (sizeof(*(ptr))) { \ + case 4: \ + __x = (typeof(__x))(typeof(__x-__x))atomic_xchg( \ + (atomic_t *)(ptr), \ + (u32)(typeof((x)-(x)))(x)); \ + break; \ + case 8: \ + __x = (typeof(__x))(typeof(__x-__x))atomic64_xchg( \ + (atomic64_t *)(ptr), \ + (u64)(typeof((x)-(x)))(x)); \ + break; \ + default: \ + __xchg_called_with_bad_pointer(); \ + } \ + __x; \ + }) + +#define cmpxchg(ptr, o, n) \ + ({ \ + typeof(*(ptr)) __x; \ + switch (sizeof(*(ptr))) { \ + case 4: \ + __x = (typeof(__x))(typeof(__x-__x))atomic_cmpxchg( \ + (atomic_t *)(ptr), \ + (u32)(typeof((o)-(o)))(o), \ + (u32)(typeof((n)-(n)))(n)); \ + break; \ + case 8: \ + __x = (typeof(__x))(typeof(__x-__x))atomic64_cmpxchg( \ + (atomic64_t *)(ptr), \ + (u64)(typeof((o)-(o)))(o), \ + (u64)(typeof((n)-(n)))(n)); \ + break; \ + default: \ + __cmpxchg_called_with_bad_pointer(); \ + } \ + __x; \ + }) + +#define tas(ptr) (xchg((ptr), 1)) + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_TILE_CMPXCHG_H */ diff --git a/arch/tile/include/asm/compat.h b/arch/tile/include/asm/compat.h new file mode 100644 index 00000000..4b4b2896 --- /dev/null +++ b/arch/tile/include/asm/compat.h @@ -0,0 +1,256 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef _ASM_TILE_COMPAT_H +#define _ASM_TILE_COMPAT_H + +/* + * Architecture specific compatibility types + */ +#include <linux/types.h> +#include <linux/sched.h> + +#define COMPAT_USER_HZ 100 + +/* "long" and pointer-based types are different. */ +typedef s32 compat_long_t; +typedef u32 compat_ulong_t; +typedef u32 compat_size_t; +typedef s32 compat_ssize_t; +typedef s32 compat_off_t; +typedef s32 compat_time_t; +typedef s32 compat_clock_t; +typedef u32 compat_ino_t; +typedef u32 compat_caddr_t; +typedef u32 compat_uptr_t; + +/* Many types are "int" or otherwise the same. */ +typedef __kernel_pid_t compat_pid_t; +typedef __kernel_uid_t __compat_uid_t; +typedef __kernel_gid_t __compat_gid_t; +typedef __kernel_uid32_t __compat_uid32_t; +typedef __kernel_uid32_t __compat_gid32_t; +typedef __kernel_mode_t compat_mode_t; +typedef __kernel_dev_t compat_dev_t; +typedef __kernel_loff_t compat_loff_t; +typedef __kernel_nlink_t compat_nlink_t; +typedef __kernel_ipc_pid_t compat_ipc_pid_t; +typedef __kernel_daddr_t compat_daddr_t; +typedef __kernel_fsid_t compat_fsid_t; +typedef __kernel_timer_t compat_timer_t; +typedef __kernel_key_t compat_key_t; +typedef int compat_int_t; +typedef s64 compat_s64; +typedef uint compat_uint_t; +typedef u64 compat_u64; + +/* We use the same register dump format in 32-bit images. */ +typedef unsigned long compat_elf_greg_t; +#define COMPAT_ELF_NGREG (sizeof(struct pt_regs) / sizeof(compat_elf_greg_t)) +typedef compat_elf_greg_t compat_elf_gregset_t[COMPAT_ELF_NGREG]; + +struct compat_timespec { + compat_time_t tv_sec; + s32 tv_nsec; +}; + +struct compat_timeval { + compat_time_t tv_sec; + s32 tv_usec; +}; + +#define compat_stat stat +#define compat_statfs statfs + +struct compat_sysctl { + unsigned int name; + int nlen; + unsigned int oldval; + unsigned int oldlenp; + unsigned int newval; + unsigned int newlen; + unsigned int __unused[4]; +}; + + +struct compat_flock { + short l_type; + short l_whence; + compat_off_t l_start; + compat_off_t l_len; + compat_pid_t l_pid; +}; + +#define F_GETLK64 12 /* using 'struct flock64' */ +#define F_SETLK64 13 +#define F_SETLKW64 14 + +struct compat_flock64 { + short l_type; + short l_whence; + compat_loff_t l_start; + compat_loff_t l_len; + compat_pid_t l_pid; +}; + +#define COMPAT_RLIM_INFINITY 0xffffffff + +#define _COMPAT_NSIG 64 +#define _COMPAT_NSIG_BPW 32 + +typedef u32 compat_sigset_word; + +#define COMPAT_OFF_T_MAX 0x7fffffff +#define COMPAT_LOFF_T_MAX 0x7fffffffffffffffL + +struct compat_ipc64_perm { + compat_key_t key; + __compat_uid32_t uid; + __compat_gid32_t gid; + __compat_uid32_t cuid; + __compat_gid32_t cgid; + unsigned short mode; + unsigned short __pad1; + unsigned short seq; + unsigned short __pad2; + compat_ulong_t unused1; + compat_ulong_t unused2; +}; + +struct compat_semid64_ds { + struct compat_ipc64_perm sem_perm; + compat_time_t sem_otime; + compat_ulong_t __unused1; + compat_time_t sem_ctime; + compat_ulong_t __unused2; + compat_ulong_t sem_nsems; + compat_ulong_t __unused3; + compat_ulong_t __unused4; +}; + +struct compat_msqid64_ds { + struct compat_ipc64_perm msg_perm; + compat_time_t msg_stime; + compat_ulong_t __unused1; + compat_time_t msg_rtime; + compat_ulong_t __unused2; + compat_time_t msg_ctime; + compat_ulong_t __unused3; + compat_ulong_t msg_cbytes; + compat_ulong_t msg_qnum; + compat_ulong_t msg_qbytes; + compat_pid_t msg_lspid; + compat_pid_t msg_lrpid; + compat_ulong_t __unused4; + compat_ulong_t __unused5; +}; + +struct compat_shmid64_ds { + struct compat_ipc64_perm shm_perm; + compat_size_t shm_segsz; + compat_time_t shm_atime; + compat_ulong_t __unused1; + compat_time_t shm_dtime; + compat_ulong_t __unused2; + compat_time_t shm_ctime; + compat_ulong_t __unused3; + compat_pid_t shm_cpid; + compat_pid_t shm_lpid; + compat_ulong_t shm_nattch; + compat_ulong_t __unused4; + compat_ulong_t __unused5; +}; + +/* + * A pointer passed in from user mode. This should not + * be used for syscall parameters, just declare them + * as pointers because the syscall entry code will have + * appropriately converted them already. + */ + +static inline void __user *compat_ptr(compat_uptr_t uptr) +{ + return (void __user *)(long)(s32)uptr; +} + +static inline compat_uptr_t ptr_to_compat(void __user *uptr) +{ + return (u32)(unsigned long)uptr; +} + +/* Sign-extend when storing a kernel pointer to a user's ptregs. */ +static inline unsigned long ptr_to_compat_reg(void __user *uptr) +{ + return (long)(int)(long __force)uptr; +} + +static inline void __user *arch_compat_alloc_user_space(long len) +{ + struct pt_regs *regs = task_pt_regs(current); + return (void __user *)regs->sp - len; +} + +static inline int is_compat_task(void) +{ + return current_thread_info()->status & TS_COMPAT; +} + +extern int compat_setup_rt_frame(int sig, struct k_sigaction *ka, + siginfo_t *info, sigset_t *set, + struct pt_regs *regs); + +/* Compat syscalls. */ +struct compat_sigaction; +struct compat_siginfo; +struct compat_sigaltstack; +long compat_sys_execve(const char __user *path, + compat_uptr_t __user *argv, + compat_uptr_t __user *envp, struct pt_regs *); +long compat_sys_rt_sigaction(int sig, struct compat_sigaction __user *act, + struct compat_sigaction __user *oact, + size_t sigsetsize); +long compat_sys_rt_sigqueueinfo(int pid, int sig, + struct compat_siginfo __user *uinfo); +long compat_sys_rt_sigreturn(struct pt_regs *); +long compat_sys_sigaltstack(const struct compat_sigaltstack __user *uss_ptr, + struct compat_sigaltstack __user *uoss_ptr, + struct pt_regs *); +long compat_sys_truncate64(char __user *filename, u32 dummy, u32 low, u32 high); +long compat_sys_ftruncate64(unsigned int fd, u32 dummy, u32 low, u32 high); +long compat_sys_pread64(unsigned int fd, char __user *ubuf, size_t count, + u32 dummy, u32 low, u32 high); +long compat_sys_pwrite64(unsigned int fd, char __user *ubuf, size_t count, + u32 dummy, u32 low, u32 high); +long compat_sys_lookup_dcookie(u32 low, u32 high, char __user *buf, size_t len); +long compat_sys_sync_file_range2(int fd, unsigned int flags, + u32 offset_lo, u32 offset_hi, + u32 nbytes_lo, u32 nbytes_hi); +long compat_sys_fallocate(int fd, int mode, + u32 offset_lo, u32 offset_hi, + u32 len_lo, u32 len_hi); +long compat_sys_sched_rr_get_interval(compat_pid_t pid, + struct compat_timespec __user *interval); + +/* Tilera Linux syscalls that don't have "compat" versions. */ +#define compat_sys_flush_cache sys_flush_cache + +/* These are the intvec_64.S trampolines. */ +long _compat_sys_execve(const char __user *path, + const compat_uptr_t __user *argv, + const compat_uptr_t __user *envp); +long _compat_sys_sigaltstack(const struct compat_sigaltstack __user *uss_ptr, + struct compat_sigaltstack __user *uoss_ptr); +long _compat_sys_rt_sigreturn(void); + +#endif /* _ASM_TILE_COMPAT_H */ diff --git a/arch/tile/include/asm/current.h b/arch/tile/include/asm/current.h new file mode 100644 index 00000000..da21acf0 --- /dev/null +++ b/arch/tile/include/asm/current.h @@ -0,0 +1,31 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef _ASM_TILE_CURRENT_H +#define _ASM_TILE_CURRENT_H + +#include <linux/thread_info.h> + +struct task_struct; + +static inline struct task_struct *get_current(void) +{ + return current_thread_info()->task; +} +#define current get_current() + +/* Return a usable "task_struct" pointer even if the real one is corrupt. */ +struct task_struct *validate_current(void); + +#endif /* _ASM_TILE_CURRENT_H */ diff --git a/arch/tile/include/asm/delay.h b/arch/tile/include/asm/delay.h new file mode 100644 index 00000000..97b0e69e --- /dev/null +++ b/arch/tile/include/asm/delay.h @@ -0,0 +1,34 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef _ASM_TILE_DELAY_H +#define _ASM_TILE_DELAY_H + +/* Undefined functions to get compile-time errors. */ +extern void __bad_udelay(void); +extern void __bad_ndelay(void); + +extern void __udelay(unsigned long usecs); +extern void __ndelay(unsigned long nsecs); +extern void __delay(unsigned long loops); + +#define udelay(n) (__builtin_constant_p(n) ? \ + ((n) > 20000 ? __bad_udelay() : __ndelay((n) * 1000)) : \ + __udelay(n)) + +#define ndelay(n) (__builtin_constant_p(n) ? \ + ((n) > 20000 ? __bad_ndelay() : __ndelay(n)) : \ + __ndelay(n)) + +#endif /* _ASM_TILE_DELAY_H */ diff --git a/arch/tile/include/asm/dma-mapping.h b/arch/tile/include/asm/dma-mapping.h new file mode 100644 index 00000000..eaa06d17 --- /dev/null +++ b/arch/tile/include/asm/dma-mapping.h @@ -0,0 +1,94 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef _ASM_TILE_DMA_MAPPING_H +#define _ASM_TILE_DMA_MAPPING_H + +#include <linux/mm.h> +#include <linux/scatterlist.h> +#include <linux/cache.h> +#include <linux/io.h> + +/* + * Note that on x86 and powerpc, there is a "struct dma_mapping_ops" + * that is used for all the DMA operations. For now, we don't have an + * equivalent on tile, because we only have a single way of doing DMA. + * (Tilera bug 7994 to use dma_mapping_ops.) + */ + +#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) +#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h) + +extern dma_addr_t dma_map_single(struct device *dev, void *ptr, size_t size, + enum dma_data_direction); +extern void dma_unmap_single(struct device *dev, dma_addr_t dma_addr, + size_t size, enum dma_data_direction); +extern int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction); +extern void dma_unmap_sg(struct device *dev, struct scatterlist *sg, + int nhwentries, enum dma_data_direction); +extern dma_addr_t dma_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction); +extern void dma_unmap_page(struct device *dev, dma_addr_t dma_address, + size_t size, enum dma_data_direction); +extern void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, + int nelems, enum dma_data_direction); +extern void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, + int nelems, enum dma_data_direction); + + +void *dma_alloc_coherent(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t flag); + +void dma_free_coherent(struct device *dev, size_t size, + void *vaddr, dma_addr_t dma_handle); + +extern void dma_sync_single_for_cpu(struct device *, dma_addr_t, size_t, + enum dma_data_direction); +extern void dma_sync_single_for_device(struct device *, dma_addr_t, + size_t, enum dma_data_direction); +extern void dma_sync_single_range_for_cpu(struct device *, dma_addr_t, + unsigned long offset, size_t, + enum dma_data_direction); +extern void dma_sync_single_range_for_device(struct device *, dma_addr_t, + unsigned long offset, size_t, + enum dma_data_direction); +extern void dma_cache_sync(struct device *dev, void *vaddr, size_t, + enum dma_data_direction); + +static inline int +dma_mapping_error(struct device *dev, dma_addr_t dma_addr) +{ + return 0; +} + +static inline int +dma_supported(struct device *dev, u64 mask) +{ + return 1; +} + +static inline int +dma_set_mask(struct device *dev, u64 mask) +{ + if (!dev->dma_mask || !dma_supported(dev, mask)) + return -EIO; + + *dev->dma_mask = mask; + + return 0; +} + +#endif /* _ASM_TILE_DMA_MAPPING_H */ diff --git a/arch/tile/include/asm/dma.h b/arch/tile/include/asm/dma.h new file mode 100644 index 00000000..12a7ca16 --- /dev/null +++ b/arch/tile/include/asm/dma.h @@ -0,0 +1,25 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef _ASM_TILE_DMA_H +#define _ASM_TILE_DMA_H + +#include <asm-generic/dma.h> + +/* Needed by drivers/pci/quirks.c */ +#ifdef CONFIG_PCI +extern int isa_dma_bridge_buggy; +#endif + +#endif /* _ASM_TILE_DMA_H */ diff --git a/arch/tile/include/asm/edac.h b/arch/tile/include/asm/edac.h new file mode 100644 index 00000000..87fc83ee --- /dev/null +++ b/arch/tile/include/asm/edac.h @@ -0,0 +1,29 @@ +/* + * Copyright 2011 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef _ASM_TILE_EDAC_H +#define _ASM_TILE_EDAC_H + +/* ECC atomic, DMA, SMP and interrupt safe scrub function */ + +static inline void atomic_scrub(void *va, u32 size) +{ + /* + * These is nothing to be done here because CE is + * corrected by the mshim. + */ + return; +} + +#endif /* _ASM_TILE_EDAC_H */ diff --git a/arch/tile/include/asm/elf.h b/arch/tile/include/asm/elf.h new file mode 100644 index 00000000..623a6bb7 --- /dev/null +++ b/arch/tile/include/asm/elf.h @@ -0,0 +1,167 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef _ASM_TILE_ELF_H +#define _ASM_TILE_ELF_H + +/* + * ELF register definitions. + */ + +#include <arch/chip.h> + +#include <linux/ptrace.h> +#include <asm/byteorder.h> +#include <asm/page.h> + +typedef unsigned long elf_greg_t; + +#define ELF_NGREG (sizeof(struct pt_regs) / sizeof(elf_greg_t)) +typedef elf_greg_t elf_gregset_t[ELF_NGREG]; + +#define EM_TILE64 187 +#define EM_TILEPRO 188 +#define EM_TILEGX 191 + +/* Provide a nominal data structure. */ +#define ELF_NFPREG 0 +typedef double elf_fpreg_t; +typedef elf_fpreg_t elf_fpregset_t[ELF_NFPREG]; + +#ifdef __tilegx__ +#define ELF_CLASS ELFCLASS64 +#else +#define ELF_CLASS ELFCLASS32 +#endif +#define ELF_DATA ELFDATA2LSB + +/* + * There seems to be a bug in how compat_binfmt_elf.c works: it + * #undefs ELF_ARCH, but it is then used in binfmt_elf.c for fill_note_info(). + * Hack around this by providing an enum value of ELF_ARCH. + */ +enum { ELF_ARCH = CHIP_ELF_TYPE() }; +#define ELF_ARCH ELF_ARCH + +/* + * This is used to ensure we don't load something for the wrong architecture. + */ +#define elf_check_arch(x) \ + ((x)->e_ident[EI_CLASS] == ELF_CLASS && \ + (x)->e_machine == CHIP_ELF_TYPE()) + +/* The module loader only handles a few relocation types. */ +#ifndef __tilegx__ +#define R_TILE_32 1 +#define R_TILE_JOFFLONG_X1 15 +#define R_TILE_IMM16_X0_LO 25 +#define R_TILE_IMM16_X1_LO 26 +#define R_TILE_IMM16_X0_HA 29 +#define R_TILE_IMM16_X1_HA 30 +#else +#define R_TILEGX_64 1 +#define R_TILEGX_JUMPOFF_X1 21 +#define R_TILEGX_IMM16_X0_HW0 36 +#define R_TILEGX_IMM16_X1_HW0 37 +#define R_TILEGX_IMM16_X0_HW1 38 +#define R_TILEGX_IMM16_X1_HW1 39 +#define R_TILEGX_IMM16_X0_HW2_LAST 48 +#define R_TILEGX_IMM16_X1_HW2_LAST 49 +#endif + +/* Use standard page size for core dumps. */ +#define ELF_EXEC_PAGESIZE PAGE_SIZE + +/* + * This is the location that an ET_DYN program is loaded if exec'ed. Typical + * use of this is to invoke "./ld.so someprog" to test out a new version of + * the loader. We need to make sure that it is out of the way of the program + * that it will "exec", and that there is sufficient room for the brk. + */ +#define ELF_ET_DYN_BASE (TASK_SIZE / 3 * 2) + +#define ELF_CORE_COPY_REGS(_dest, _regs) \ + memcpy((char *) &_dest, (char *) _regs, \ + sizeof(struct pt_regs)); + +/* No additional FP registers to copy. */ +#define ELF_CORE_COPY_FPREGS(t, fpu) 0 + +/* + * This yields a mask that user programs can use to figure out what + * instruction set this CPU supports. This could be done in user space, + * but it's not easy, and we've already done it here. + */ +#define ELF_HWCAP (0) + +/* + * This yields a string that ld.so will use to load implementation + * specific libraries for optimization. This is more specific in + * intent than poking at uname or /proc/cpuinfo. + */ +#define ELF_PLATFORM (NULL) + +extern void elf_plat_init(struct pt_regs *regs, unsigned long load_addr); + +#define ELF_PLAT_INIT(_r, load_addr) elf_plat_init(_r, load_addr) + +extern int dump_task_regs(struct task_struct *, elf_gregset_t *); +#define ELF_CORE_COPY_TASK_REGS(tsk, elf_regs) dump_task_regs(tsk, elf_regs) + +/* Tilera Linux has no personalities currently, so no need to do anything. */ +#define SET_PERSONALITY(ex) do { } while (0) + +#define ARCH_HAS_SETUP_ADDITIONAL_PAGES +/* Support auto-mapping of the user interrupt vectors. */ +struct linux_binprm; +extern int arch_setup_additional_pages(struct linux_binprm *bprm, + int executable_stack); +#ifdef CONFIG_COMPAT + +#define COMPAT_ELF_PLATFORM "tilegx-m32" + +/* + * "Compat" binaries have the same machine type, but 32-bit class, + * since they're not a separate machine type, but just a 32-bit + * variant of the standard 64-bit architecture. + */ +#define compat_elf_check_arch(x) \ + ((x)->e_ident[EI_CLASS] == ELFCLASS32 && \ + (x)->e_machine == CHIP_ELF_TYPE()) + +#define compat_start_thread(regs, ip, usp) do { \ + regs->pc = ptr_to_compat_reg((void *)(ip)); \ + regs->sp = ptr_to_compat_reg((void *)(usp)); \ + } while (0) + +/* + * Use SET_PERSONALITY to indicate compatibility via TS_COMPAT. + */ +#undef SET_PERSONALITY +#define SET_PERSONALITY(ex) \ +do { \ + current->personality = PER_LINUX; \ + current_thread_info()->status &= ~TS_COMPAT; \ +} while (0) +#define COMPAT_SET_PERSONALITY(ex) \ +do { \ + current->personality = PER_LINUX_32BIT; \ + current_thread_info()->status |= TS_COMPAT; \ +} while (0) + +#define COMPAT_ELF_ET_DYN_BASE (0xffffffff / 3 * 2) + +#endif /* CONFIG_COMPAT */ + +#endif /* _ASM_TILE_ELF_H */ diff --git a/arch/tile/include/asm/exec.h b/arch/tile/include/asm/exec.h new file mode 100644 index 00000000..a714e195 --- /dev/null +++ b/arch/tile/include/asm/exec.h @@ -0,0 +1,20 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef _ASM_TILE_EXEC_H +#define _ASM_TILE_EXEC_H + +#define arch_align_stack(x) (x) + +#endif /* _ASM_TILE_EXEC_H */ diff --git a/arch/tile/include/asm/fixmap.h b/arch/tile/include/asm/fixmap.h new file mode 100644 index 00000000..c66f7933 --- /dev/null +++ b/arch/tile/include/asm/fixmap.h @@ -0,0 +1,118 @@ +/* + * Copyright (C) 1998 Ingo Molnar + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef _ASM_TILE_FIXMAP_H +#define _ASM_TILE_FIXMAP_H + +#include <asm/page.h> + +#ifndef __ASSEMBLY__ +#include <linux/kernel.h> +#ifdef CONFIG_HIGHMEM +#include <linux/threads.h> +#include <asm/kmap_types.h> +#endif + +#define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT)) +#define __virt_to_fix(x) ((FIXADDR_TOP - ((x)&PAGE_MASK)) >> PAGE_SHIFT) + +/* + * Here we define all the compile-time 'special' virtual + * addresses. The point is to have a constant address at + * compile time, but to set the physical address only + * in the boot process. We allocate these special addresses + * from the end of supervisor virtual memory backwards. + * Also this lets us do fail-safe vmalloc(), we + * can guarantee that these special addresses and + * vmalloc()-ed addresses never overlap. + * + * these 'compile-time allocated' memory buffers are + * fixed-size 4k pages. (or larger if used with an increment + * higher than 1) use fixmap_set(idx,phys) to associate + * physical memory with fixmap indices. + * + * TLB entries of such buffers will not be flushed across + * task switches. + * + * We don't bother with a FIX_HOLE since above the fixmaps + * is unmapped memory in any case. + */ +enum fixed_addresses { +#ifdef CONFIG_HIGHMEM + FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */ + FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1, +#endif + __end_of_permanent_fixed_addresses, + + /* + * Temporary boot-time mappings, used before ioremap() is functional. + * Not currently needed by the Tile architecture. + */ +#define NR_FIX_BTMAPS 0 +#if NR_FIX_BTMAPS + FIX_BTMAP_END = __end_of_permanent_fixed_addresses, + FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS - 1, + __end_of_fixed_addresses +#else + __end_of_fixed_addresses = __end_of_permanent_fixed_addresses +#endif +}; + +extern void __set_fixmap(enum fixed_addresses idx, + unsigned long phys, pgprot_t flags); + +#define set_fixmap(idx, phys) \ + __set_fixmap(idx, phys, PAGE_KERNEL) +#define clear_fixmap(idx) \ + __set_fixmap(idx, 0, __pgprot(0)) + +#define __FIXADDR_SIZE (__end_of_permanent_fixed_addresses << PAGE_SHIFT) +#define __FIXADDR_BOOT_SIZE (__end_of_fixed_addresses << PAGE_SHIFT) +#define FIXADDR_START (FIXADDR_TOP + PAGE_SIZE - __FIXADDR_SIZE) +#define FIXADDR_BOOT_START (FIXADDR_TOP + PAGE_SIZE - __FIXADDR_BOOT_SIZE) + +extern void __this_fixmap_does_not_exist(void); + +/* + * 'index to address' translation. If anyone tries to use the idx + * directly without tranlation, we catch the bug with a NULL-deference + * kernel oops. Illegal ranges of incoming indices are caught too. + */ +static __always_inline unsigned long fix_to_virt(const unsigned int idx) +{ + /* + * this branch gets completely eliminated after inlining, + * except when someone tries to use fixaddr indices in an + * illegal way. (such as mixing up address types or using + * out-of-range indices). + * + * If it doesn't get removed, the linker will complain + * loudly with a reasonably clear error message.. + */ + if (idx >= __end_of_fixed_addresses) + __this_fixmap_does_not_exist(); + + return __fix_to_virt(idx); +} + +static inline unsigned long virt_to_fix(const unsigned long vaddr) +{ + BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START); + return __virt_to_fix(vaddr); +} + +#endif /* !__ASSEMBLY__ */ + +#endif /* _ASM_TILE_FIXMAP_H */ diff --git a/arch/tile/include/asm/ftrace.h b/arch/tile/include/asm/ftrace.h new file mode 100644 index 00000000..461459b0 --- /dev/null +++ b/arch/tile/include/asm/ftrace.h @@ -0,0 +1,20 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef _ASM_TILE_FTRACE_H +#define _ASM_TILE_FTRACE_H + +/* empty */ + +#endif /* _ASM_TILE_FTRACE_H */ diff --git a/arch/tile/include/asm/futex.h b/arch/tile/include/asm/futex.h new file mode 100644 index 00000000..d03ec124 --- /dev/null +++ b/arch/tile/include/asm/futex.h @@ -0,0 +1,142 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * These routines make two important assumptions: + * + * 1. atomic_t is really an int and can be freely cast back and forth + * (validated in __init_atomic_per_cpu). + * + * 2. userspace uses sys_cmpxchg() for all atomic operations, thus using + * the same locking convention that all the kernel atomic routines use. + */ + +#ifndef _ASM_TILE_FUTEX_H +#define _ASM_TILE_FUTEX_H + +#ifndef __ASSEMBLY__ + +#include <linux/futex.h> +#include <linux/uaccess.h> +#include <linux/errno.h> + +extern struct __get_user futex_set(u32 __user *v, int i); +extern struct __get_user futex_add(u32 __user *v, int n); +extern struct __get_user futex_or(u32 __user *v, int n); +extern struct __get_user futex_andn(u32 __user *v, int n); +extern struct __get_user futex_cmpxchg(u32 __user *v, int o, int n); + +#ifndef __tilegx__ +extern struct __get_user futex_xor(u32 __user *v, int n); +#else +static inline struct __get_user futex_xor(u32 __user *uaddr, int n) +{ + struct __get_user asm_ret = __get_user_4(uaddr); + if (!asm_ret.err) { + int oldval, newval; + do { + oldval = asm_ret.val; + newval = oldval ^ n; + asm_ret = futex_cmpxchg(uaddr, oldval, newval); + } while (asm_ret.err == 0 && oldval != asm_ret.val); + } + return asm_ret; +} +#endif + +static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) +{ + int op = (encoded_op >> 28) & 7; + int cmp = (encoded_op >> 24) & 15; + int oparg = (encoded_op << 8) >> 20; + int cmparg = (encoded_op << 20) >> 20; + int ret; + struct __get_user asm_ret; + + if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) + oparg = 1 << oparg; + + if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) + return -EFAULT; + + pagefault_disable(); + switch (op) { + case FUTEX_OP_SET: + asm_ret = futex_set(uaddr, oparg); + break; + case FUTEX_OP_ADD: + asm_ret = futex_add(uaddr, oparg); + break; + case FUTEX_OP_OR: + asm_ret = futex_or(uaddr, oparg); + break; + case FUTEX_OP_ANDN: + asm_ret = futex_andn(uaddr, oparg); + break; + case FUTEX_OP_XOR: + asm_ret = futex_xor(uaddr, oparg); + break; + default: + asm_ret.err = -ENOSYS; + } + pagefault_enable(); + + ret = asm_ret.err; + + if (!ret) { + switch (cmp) { + case FUTEX_OP_CMP_EQ: + ret = (asm_ret.val == cmparg); + break; + case FUTEX_OP_CMP_NE: + ret = (asm_ret.val != cmparg); + break; + case FUTEX_OP_CMP_LT: + ret = (asm_ret.val < cmparg); + break; + case FUTEX_OP_CMP_GE: + ret = (asm_ret.val >= cmparg); + break; + case FUTEX_OP_CMP_LE: + ret = (asm_ret.val <= cmparg); + break; + case FUTEX_OP_CMP_GT: + ret = (asm_ret.val > cmparg); + break; + default: + ret = -ENOSYS; + } + } + return ret; +} + +static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, + u32 oldval, u32 newval) +{ + struct __get_user asm_ret; + + if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) + return -EFAULT; + + asm_ret = futex_cmpxchg(uaddr, oldval, newval); + *uval = asm_ret.val; + return asm_ret.err; +} + +#ifndef __tilegx__ +/* Return failure from the atomic wrappers. */ +struct __get_user __atomic_bad_address(int __user *addr); +#endif + +#endif /* !__ASSEMBLY__ */ + +#endif /* _ASM_TILE_FUTEX_H */ diff --git a/arch/tile/include/asm/hardirq.h b/arch/tile/include/asm/hardirq.h new file mode 100644 index 00000000..822390f9 --- /dev/null +++ b/arch/tile/include/asm/hardirq.h @@ -0,0 +1,47 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef _ASM_TILE_HARDIRQ_H +#define _ASM_TILE_HARDIRQ_H + +#include <linux/threads.h> +#include <linux/cache.h> + +#include <asm/irq.h> + +typedef struct { + unsigned int __softirq_pending; + long idle_timestamp; + + /* Hard interrupt statistics. */ + unsigned int irq_timer_count; + unsigned int irq_syscall_count; + unsigned int irq_resched_count; + unsigned int irq_hv_flush_count; + unsigned int irq_call_count; + unsigned int irq_hv_msg_count; + unsigned int irq_dev_intr_count; + +} ____cacheline_aligned irq_cpustat_t; + +DECLARE_PER_CPU(irq_cpustat_t, irq_stat); + +#define __ARCH_IRQ_STAT +#define __IRQ_STAT(cpu, member) (per_cpu(irq_stat, cpu).member) + +#include <linux/irq_cpustat.h> /* Standard mappings for irq_cpustat_t above */ + +#define HARDIRQ_BITS 8 + +#endif /* _ASM_TILE_HARDIRQ_H */ diff --git a/arch/tile/include/asm/hardwall.h b/arch/tile/include/asm/hardwall.h new file mode 100644 index 00000000..2ac42284 --- /dev/null +++ b/arch/tile/include/asm/hardwall.h @@ -0,0 +1,65 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * Provide methods for the HARDWALL_FILE for accessing the UDN. + */ + +#ifndef _ASM_TILE_HARDWALL_H +#define _ASM_TILE_HARDWALL_H + +#include <linux/ioctl.h> + +#define HARDWALL_IOCTL_BASE 0xa2 + +/* + * The HARDWALL_CREATE() ioctl is a macro with a "size" argument. + * The resulting ioctl value is passed to the kernel in conjunction + * with a pointer to a little-endian bitmask of cpus, which must be + * physically in a rectangular configuration on the chip. + * The "size" is the number of bytes of cpu mask data. + */ +#define _HARDWALL_CREATE 1 +#define HARDWALL_CREATE(size) \ + _IOC(_IOC_READ, HARDWALL_IOCTL_BASE, _HARDWALL_CREATE, (size)) + +#define _HARDWALL_ACTIVATE 2 +#define HARDWALL_ACTIVATE \ + _IO(HARDWALL_IOCTL_BASE, _HARDWALL_ACTIVATE) + +#define _HARDWALL_DEACTIVATE 3 +#define HARDWALL_DEACTIVATE \ + _IO(HARDWALL_IOCTL_BASE, _HARDWALL_DEACTIVATE) + +#define _HARDWALL_GET_ID 4 +#define HARDWALL_GET_ID \ + _IO(HARDWALL_IOCTL_BASE, _HARDWALL_GET_ID) + +#ifndef __KERNEL__ + +/* This is the canonical name expected by userspace. */ +#define HARDWALL_FILE "/dev/hardwall" + +#else + +/* /proc hooks for hardwall. */ +struct proc_dir_entry; +#ifdef CONFIG_HARDWALL +void proc_tile_hardwall_init(struct proc_dir_entry *root); +int proc_pid_hardwall(struct task_struct *task, char *buffer); +#else +static inline void proc_tile_hardwall_init(struct proc_dir_entry *root) {} +#endif + +#endif + +#endif /* _ASM_TILE_HARDWALL_H */ diff --git a/arch/tile/include/asm/highmem.h b/arch/tile/include/asm/highmem.h new file mode 100644 index 00000000..fc8429a3 --- /dev/null +++ b/arch/tile/include/asm/highmem.h @@ -0,0 +1,72 @@ +/* + * Copyright (C) 1999 Gerhard Wichert, Siemens AG + * Gerhard.Wichert@pdb.siemens.de + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * Used in CONFIG_HIGHMEM systems for memory pages which + * are not addressable by direct kernel virtual addresses. + * + */ + +#ifndef _ASM_TILE_HIGHMEM_H +#define _ASM_TILE_HIGHMEM_H + +#include <linux/interrupt.h> +#include <linux/threads.h> +#include <asm/tlbflush.h> +#include <asm/homecache.h> + +/* declarations for highmem.c */ +extern unsigned long highstart_pfn, highend_pfn; + +extern pte_t *pkmap_page_table; + +/* + * Ordering is: + * + * FIXADDR_TOP + * fixed_addresses + * FIXADDR_START + * temp fixed addresses + * FIXADDR_BOOT_START + * Persistent kmap area + * PKMAP_BASE + * VMALLOC_END + * Vmalloc area + * VMALLOC_START + * high_memory + */ +#define LAST_PKMAP_MASK (LAST_PKMAP-1) +#define PKMAP_NR(virt) ((virt-PKMAP_BASE) >> PAGE_SHIFT) +#define PKMAP_ADDR(nr) (PKMAP_BASE + ((nr) << PAGE_SHIFT)) + +void *kmap_high(struct page *page); +void kunmap_high(struct page *page); +void *kmap(struct page *page); +void kunmap(struct page *page); +void *kmap_fix_kpte(struct page *page, int finished); + +/* This macro is used only in map_new_virtual() to map "page". */ +#define kmap_prot page_to_kpgprot(page) + +void *kmap_atomic(struct page *page); +void __kunmap_atomic(void *kvaddr); +void *kmap_atomic_pfn(unsigned long pfn); +void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot); +struct page *kmap_atomic_to_page(void *ptr); +void *kmap_atomic_prot(struct page *page, pgprot_t prot); +void kmap_atomic_fix_kpte(struct page *page, int finished); + +#define flush_cache_kmaps() do { } while (0) + +#endif /* _ASM_TILE_HIGHMEM_H */ diff --git a/arch/tile/include/asm/homecache.h b/arch/tile/include/asm/homecache.h new file mode 100644 index 00000000..a8243865 --- /dev/null +++ b/arch/tile/include/asm/homecache.h @@ -0,0 +1,125 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * Handle issues around the Tile "home cache" model of coherence. + */ + +#ifndef _ASM_TILE_HOMECACHE_H +#define _ASM_TILE_HOMECACHE_H + +#include <asm/page.h> +#include <linux/cpumask.h> + +struct page; +struct task_struct; +struct vm_area_struct; +struct zone; + +/* + * Coherence point for the page is its memory controller. + * It is not present in any cache (L1 or L2). + */ +#define PAGE_HOME_UNCACHED -1 + +/* + * Is this page immutable (unwritable) and thus able to be cached more + * widely than would otherwise be possible? On tile64 this means we + * mark the PTE to cache locally; on tilepro it means we have "nc" set. + */ +#define PAGE_HOME_IMMUTABLE -2 + +/* + * Each cpu considers its own cache to be the home for the page, + * which makes it incoherent. + */ +#define PAGE_HOME_INCOHERENT -3 + +#if CHIP_HAS_CBOX_HOME_MAP() +/* Home for the page is distributed via hash-for-home. */ +#define PAGE_HOME_HASH -4 +#endif + +/* Homing is unknown or unspecified. Not valid for page_home(). */ +#define PAGE_HOME_UNKNOWN -5 + +/* Home on the current cpu. Not valid for page_home(). */ +#define PAGE_HOME_HERE -6 + +/* Support wrapper to use instead of explicit hv_flush_remote(). */ +extern void flush_remote(unsigned long cache_pfn, unsigned long cache_length, + const struct cpumask *cache_cpumask, + HV_VirtAddr tlb_va, unsigned long tlb_length, + unsigned long tlb_pgsize, + const struct cpumask *tlb_cpumask, + HV_Remote_ASID *asids, int asidcount); + +/* Set homing-related bits in a PTE (can also pass a pgprot_t). */ +extern pte_t pte_set_home(pte_t pte, int home); + +/* Do a cache eviction on the specified cpus. */ +extern void homecache_evict(const struct cpumask *mask); + +/* + * Change a kernel page's homecache. It must not be mapped in user space. + * If !CONFIG_HOMECACHE, only usable on LOWMEM, and can only be called when + * no other cpu can reference the page, and causes a full-chip cache/TLB flush. + */ +extern void homecache_change_page_home(struct page *, int order, int home); + +/* + * Flush a page out of whatever cache(s) it is in. + * This is more than just finv, since it properly handles waiting + * for the data to reach memory on tilepro, but it can be quite + * heavyweight, particularly on hash-for-home memory. + */ +extern void homecache_flush_cache(struct page *, int order); + +/* + * Allocate a page with the given GFP flags, home, and optionally + * node. These routines are actually just wrappers around the normal + * alloc_pages() / alloc_pages_node() functions, which set and clear + * a per-cpu variable to communicate with homecache_new_kernel_page(). + * If !CONFIG_HOMECACHE, uses homecache_change_page_home(). + */ +extern struct page *homecache_alloc_pages(gfp_t gfp_mask, + unsigned int order, int home); +extern struct page *homecache_alloc_pages_node(int nid, gfp_t gfp_mask, + unsigned int order, int home); +#define homecache_alloc_page(gfp_mask, home) \ + homecache_alloc_pages(gfp_mask, 0, home) + +/* + * These routines are just pass-throughs to free_pages() when + * we support full homecaching. If !CONFIG_HOMECACHE, then these + * routines use homecache_change_page_home() to reset the home + * back to the default before returning the page to the allocator. + */ +void homecache_free_pages(unsigned long addr, unsigned int order); +#define homecache_free_page(page) \ + homecache_free_pages((page), 0) + + + +/* + * Report the page home for LOWMEM pages by examining their kernel PTE, + * or for highmem pages as the default home. + */ +extern int page_home(struct page *); + +#define homecache_migrate_kthread() do {} while (0) + +#define homecache_kpte_lock() 0 +#define homecache_kpte_unlock(flags) do {} while (0) + + +#endif /* _ASM_TILE_HOMECACHE_H */ diff --git a/arch/tile/include/asm/hugetlb.h b/arch/tile/include/asm/hugetlb.h new file mode 100644 index 00000000..d396d180 --- /dev/null +++ b/arch/tile/include/asm/hugetlb.h @@ -0,0 +1,109 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef _ASM_TILE_HUGETLB_H +#define _ASM_TILE_HUGETLB_H + +#include <asm/page.h> + + +static inline int is_hugepage_only_range(struct mm_struct *mm, + unsigned long addr, + unsigned long len) { + return 0; +} + +/* + * If the arch doesn't supply something else, assume that hugepage + * size aligned regions are ok without further preparation. + */ +static inline int prepare_hugepage_range(struct file *file, + unsigned long addr, unsigned long len) +{ + struct hstate *h = hstate_file(file); + if (len & ~huge_page_mask(h)) + return -EINVAL; + if (addr & ~huge_page_mask(h)) + return -EINVAL; + return 0; +} + +static inline void hugetlb_prefault_arch_hook(struct mm_struct *mm) +{ +} + +static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb, + unsigned long addr, unsigned long end, + unsigned long floor, + unsigned long ceiling) +{ + free_pgd_range(tlb, addr, end, floor, ceiling); +} + +static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte) +{ + set_pte(ptep, pte); +} + +static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + return ptep_get_and_clear(mm, addr, ptep); +} + +static inline void huge_ptep_clear_flush(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep) +{ + ptep_clear_flush(vma, addr, ptep); +} + +static inline int huge_pte_none(pte_t pte) +{ + return pte_none(pte); +} + +static inline pte_t huge_pte_wrprotect(pte_t pte) +{ + return pte_wrprotect(pte); +} + +static inline void huge_ptep_set_wrprotect(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + ptep_set_wrprotect(mm, addr, ptep); +} + +static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep, + pte_t pte, int dirty) +{ + return ptep_set_access_flags(vma, addr, ptep, pte, dirty); +} + +static inline pte_t huge_ptep_get(pte_t *ptep) +{ + return *ptep; +} + +static inline int arch_prepare_hugepage(struct page *page) +{ + return 0; +} + +static inline void arch_release_hugepage(struct page *page) +{ +} + +#endif /* _ASM_TILE_HUGETLB_H */ diff --git a/arch/tile/include/asm/hv_driver.h b/arch/tile/include/asm/hv_driver.h new file mode 100644 index 00000000..ad614de8 --- /dev/null +++ b/arch/tile/include/asm/hv_driver.h @@ -0,0 +1,60 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * This header defines a wrapper interface for managing hypervisor + * device calls that will result in an interrupt at some later time. + * In particular, this provides wrappers for hv_preada() and + * hv_pwritea(). + */ + +#ifndef _ASM_TILE_HV_DRIVER_H +#define _ASM_TILE_HV_DRIVER_H + +#include <hv/hypervisor.h> + +struct hv_driver_cb; + +/* A callback to be invoked when an operation completes. */ +typedef void hv_driver_callback_t(struct hv_driver_cb *cb, __hv32 result); + +/* + * A structure to hold information about an outstanding call. + * The driver must allocate a separate structure for each call. + */ +struct hv_driver_cb { + hv_driver_callback_t *callback; /* Function to call on interrupt. */ + void *dev; /* Driver-specific state variable. */ +}; + +/* Wrapper for invoking hv_dev_preada(). */ +static inline int +tile_hv_dev_preada(int devhdl, __hv32 flags, __hv32 sgl_len, + HV_SGL sgl[/* sgl_len */], __hv64 offset, + struct hv_driver_cb *callback) +{ + return hv_dev_preada(devhdl, flags, sgl_len, sgl, + offset, (HV_IntArg)callback); +} + +/* Wrapper for invoking hv_dev_pwritea(). */ +static inline int +tile_hv_dev_pwritea(int devhdl, __hv32 flags, __hv32 sgl_len, + HV_SGL sgl[/* sgl_len */], __hv64 offset, + struct hv_driver_cb *callback) +{ + return hv_dev_pwritea(devhdl, flags, sgl_len, sgl, + offset, (HV_IntArg)callback); +} + + +#endif /* _ASM_TILE_HV_DRIVER_H */ diff --git a/arch/tile/include/asm/hw_irq.h b/arch/tile/include/asm/hw_irq.h new file mode 100644 index 00000000..4fac5fbf --- /dev/null +++ b/arch/tile/include/asm/hw_irq.h @@ -0,0 +1,18 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef _ASM_TILE_HW_IRQ_H +#define _ASM_TILE_HW_IRQ_H + +#endif /* _ASM_TILE_HW_IRQ_H */ diff --git a/arch/tile/include/asm/ide.h b/arch/tile/include/asm/ide.h new file mode 100644 index 00000000..3c6f2ed8 --- /dev/null +++ b/arch/tile/include/asm/ide.h @@ -0,0 +1,25 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef _ASM_TILE_IDE_H +#define _ASM_TILE_IDE_H + +/* For IDE on PCI */ +#define MAX_HWIFS 10 + +#define ide_default_io_ctl(base) (0) + +#include <asm-generic/ide_iops.h> + +#endif /* _ASM_TILE_IDE_H */ diff --git a/arch/tile/include/asm/io.h b/arch/tile/include/asm/io.h new file mode 100644 index 00000000..d2152deb --- /dev/null +++ b/arch/tile/include/asm/io.h @@ -0,0 +1,305 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef _ASM_TILE_IO_H +#define _ASM_TILE_IO_H + +#include <linux/kernel.h> +#include <linux/bug.h> +#include <asm/page.h> + +#define IO_SPACE_LIMIT 0xfffffffful + +/* + * Convert a physical pointer to a virtual kernel pointer for /dev/mem + * access. + */ +#define xlate_dev_mem_ptr(p) __va(p) + +/* + * Convert a virtual cached pointer to an uncached pointer. + */ +#define xlate_dev_kmem_ptr(p) p + +/* + * Change "struct page" to physical address. + */ +#define page_to_phys(page) ((dma_addr_t)page_to_pfn(page) << PAGE_SHIFT) + +/* + * Some places try to pass in an loff_t for PHYSADDR (?!), so we cast it to + * long before casting it to a pointer to avoid compiler warnings. + */ +#if CHIP_HAS_MMIO() +extern void __iomem *ioremap(resource_size_t offset, unsigned long size); +extern void __iomem *ioremap_prot(resource_size_t offset, unsigned long size, + pgprot_t pgprot); +extern void iounmap(volatile void __iomem *addr); +#else +#define ioremap(physaddr, size) ((void __iomem *)(unsigned long)(physaddr)) +#define iounmap(addr) ((void)0) +#endif + +#define ioremap_nocache(physaddr, size) ioremap(physaddr, size) +#define ioremap_wc(physaddr, size) ioremap(physaddr, size) +#define ioremap_writethrough(physaddr, size) ioremap(physaddr, size) +#define ioremap_fullcache(physaddr, size) ioremap(physaddr, size) + +#define mmiowb() + +/* Conversion between virtual and physical mappings. */ +#define mm_ptov(addr) ((void *)phys_to_virt(addr)) +#define mm_vtop(addr) ((unsigned long)virt_to_phys(addr)) + +#ifdef CONFIG_PCI + +extern u8 _tile_readb(unsigned long addr); +extern u16 _tile_readw(unsigned long addr); +extern u32 _tile_readl(unsigned long addr); +extern u64 _tile_readq(unsigned long addr); +extern void _tile_writeb(u8 val, unsigned long addr); +extern void _tile_writew(u16 val, unsigned long addr); +extern void _tile_writel(u32 val, unsigned long addr); +extern void _tile_writeq(u64 val, unsigned long addr); + +#else + +/* + * The Tile architecture does not support IOMEM unless PCI is enabled. + * Unfortunately we can't yet simply not declare these methods, + * since some generic code that compiles into the kernel, but + * we never run, uses them unconditionally. + */ + +static inline int iomem_panic(void) +{ + panic("readb/writeb and friends do not exist on tile without PCI"); + return 0; +} + +static inline u8 _tile_readb(unsigned long addr) +{ + return iomem_panic(); +} + +static inline u16 _tile_readw(unsigned long addr) +{ + return iomem_panic(); +} + +static inline u32 _tile_readl(unsigned long addr) +{ + return iomem_panic(); +} + +static inline u64 _tile_readq(unsigned long addr) +{ + return iomem_panic(); +} + +static inline void _tile_writeb(u8 val, unsigned long addr) +{ + iomem_panic(); +} + +static inline void _tile_writew(u16 val, unsigned long addr) +{ + iomem_panic(); +} + +static inline void _tile_writel(u32 val, unsigned long addr) +{ + iomem_panic(); +} + +static inline void _tile_writeq(u64 val, unsigned long addr) +{ + iomem_panic(); +} + +#endif + +#define readb(addr) _tile_readb((unsigned long)addr) +#define readw(addr) _tile_readw((unsigned long)addr) +#define readl(addr) _tile_readl((unsigned long)addr) +#define readq(addr) _tile_readq((unsigned long)addr) +#define writeb(val, addr) _tile_writeb(val, (unsigned long)addr) +#define writew(val, addr) _tile_writew(val, (unsigned long)addr) +#define writel(val, addr) _tile_writel(val, (unsigned long)addr) +#define writeq(val, addr) _tile_writeq(val, (unsigned long)addr) + +#define __raw_readb readb +#define __raw_readw readw +#define __raw_readl readl +#define __raw_readq readq +#define __raw_writeb writeb +#define __raw_writew writew +#define __raw_writel writel +#define __raw_writeq writeq + +#define readb_relaxed readb +#define readw_relaxed readw +#define readl_relaxed readl +#define readq_relaxed readq + +#define ioread8 readb +#define ioread16 readw +#define ioread32 readl +#define ioread64 readq +#define iowrite8 writeb +#define iowrite16 writew +#define iowrite32 writel +#define iowrite64 writeq + +static inline void memset_io(void *dst, int val, size_t len) +{ + int x; + BUG_ON((unsigned long)dst & 0x3); + val = (val & 0xff) * 0x01010101; + for (x = 0; x < len; x += 4) + writel(val, dst + x); +} + +static inline void memcpy_fromio(void *dst, const volatile void __iomem *src, + size_t len) +{ + int x; + BUG_ON((unsigned long)src & 0x3); + for (x = 0; x < len; x += 4) + *(u32 *)(dst + x) = readl(src + x); +} + +static inline void memcpy_toio(volatile void __iomem *dst, const void *src, + size_t len) +{ + int x; + BUG_ON((unsigned long)dst & 0x3); + for (x = 0; x < len; x += 4) + writel(*(u32 *)(src + x), dst + x); +} + +/* + * The Tile architecture does not support IOPORT, even with PCI. + * Unfortunately we can't yet simply not declare these methods, + * since some generic code that compiles into the kernel, but + * we never run, uses them unconditionally. + */ + +static inline long ioport_panic(void) +{ + panic("inb/outb and friends do not exist on tile"); + return 0; +} + +static inline void __iomem *ioport_map(unsigned long port, unsigned int len) +{ + pr_info("ioport_map: mapping IO resources is unsupported on tile.\n"); + return NULL; +} + +static inline void ioport_unmap(void __iomem *addr) +{ + ioport_panic(); +} + +static inline u8 inb(unsigned long addr) +{ + return ioport_panic(); +} + +static inline u16 inw(unsigned long addr) +{ + return ioport_panic(); +} + +static inline u32 inl(unsigned long addr) +{ + return ioport_panic(); +} + +static inline void outb(u8 b, unsigned long addr) +{ + ioport_panic(); +} + +static inline void outw(u16 b, unsigned long addr) +{ + ioport_panic(); +} + +static inline void outl(u32 b, unsigned long addr) +{ + ioport_panic(); +} + +#define inb_p(addr) inb(addr) +#define inw_p(addr) inw(addr) +#define inl_p(addr) inl(addr) +#define outb_p(x, addr) outb((x), (addr)) +#define outw_p(x, addr) outw((x), (addr)) +#define outl_p(x, addr) outl((x), (addr)) + +static inline void insb(unsigned long addr, void *buffer, int count) +{ + ioport_panic(); +} + +static inline void insw(unsigned long addr, void *buffer, int count) +{ + ioport_panic(); +} + +static inline void insl(unsigned long addr, void *buffer, int count) +{ + ioport_panic(); +} + +static inline void outsb(unsigned long addr, const void *buffer, int count) +{ + ioport_panic(); +} + +static inline void outsw(unsigned long addr, const void *buffer, int count) +{ + ioport_panic(); +} + +static inline void outsl(unsigned long addr, const void *buffer, int count) +{ + ioport_panic(); +} + +#define ioread16be(addr) be16_to_cpu(ioread16(addr)) +#define ioread32be(addr) be32_to_cpu(ioread32(addr)) +#define iowrite16be(v, addr) iowrite16(be16_to_cpu(v), (addr)) +#define iowrite32be(v, addr) iowrite32(be32_to_cpu(v), (addr)) + +#define ioread8_rep(p, dst, count) \ + insb((unsigned long) (p), (dst), (count)) +#define ioread16_rep(p, dst, count) \ + insw((unsigned long) (p), (dst), (count)) +#define ioread32_rep(p, dst, count) \ + insl((unsigned long) (p), (dst), (count)) + +#define iowrite8_rep(p, src, count) \ + outsb((unsigned long) (p), (src), (count)) +#define iowrite16_rep(p, src, count) \ + outsw((unsigned long) (p), (src), (count)) +#define iowrite32_rep(p, src, count) \ + outsl((unsigned long) (p), (src), (count)) + +#define virt_to_bus virt_to_phys +#define bus_to_virt phys_to_virt + +#endif /* _ASM_TILE_IO_H */ diff --git a/arch/tile/include/asm/irq.h b/arch/tile/include/asm/irq.h new file mode 100644 index 00000000..33cff9a3 --- /dev/null +++ b/arch/tile/include/asm/irq.h @@ -0,0 +1,79 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef _ASM_TILE_IRQ_H +#define _ASM_TILE_IRQ_H + +#include <linux/hardirq.h> + +/* The hypervisor interface provides 32 IRQs. */ +#define NR_IRQS 32 + +/* IRQ numbers used for linux IPIs. */ +#define IRQ_RESCHEDULE 0 + +#define irq_canonicalize(irq) (irq) + +void ack_bad_irq(unsigned int irq); + +/* + * Different ways of handling interrupts. Tile interrupts are always + * per-cpu; there is no global interrupt controller to implement + * enable/disable. Most onboard devices can send their interrupts to + * many tiles at the same time, and Tile-specific drivers know how to + * deal with this. + * + * However, generic devices (usually PCIE based, sometimes GPIO) + * expect that interrupts will fire on a single core at a time and + * that the irq can be enabled or disabled from any core at any time. + * We implement this by directing such interrupts to a single core. + * + * One added wrinkle is that PCI interrupts can be either + * hardware-cleared (legacy interrupts) or software cleared (MSI). + * Other generic device systems (GPIO) are always software-cleared. + * + * The enums below are used by drivers for onboard devices, including + * the internals of PCI root complex and GPIO. They allow the driver + * to tell the generic irq code what kind of interrupt is mapped to a + * particular IRQ number. + */ +enum { + /* per-cpu interrupt; use enable/disable_percpu_irq() to mask */ + TILE_IRQ_PERCPU, + /* global interrupt, hardware responsible for clearing. */ + TILE_IRQ_HW_CLEAR, + /* global interrupt, software responsible for clearing. */ + TILE_IRQ_SW_CLEAR, +}; + + +/* + * Paravirtualized drivers should call this when they dynamically + * allocate a new IRQ or discover an IRQ that was pre-allocated by the + * hypervisor for use with their particular device. This gives the + * IRQ subsystem an opportunity to do interrupt-type-specific + * initialization. + * + * ISSUE: We should modify this API so that registering anything + * except percpu interrupts also requires providing callback methods + * for enabling and disabling the interrupt. This would allow the + * generic IRQ code to proxy enable/disable_irq() calls back into the + * PCI subsystem, which in turn could enable or disable the interrupt + * at the PCI shim. + */ +void tile_irq_activate(unsigned int irq, int tile_irq_type); + +void setup_irq_regs(void); + +#endif /* _ASM_TILE_IRQ_H */ diff --git a/arch/tile/include/asm/irqflags.h b/arch/tile/include/asm/irqflags.h new file mode 100644 index 00000000..5db0ce54 --- /dev/null +++ b/arch/tile/include/asm/irqflags.h @@ -0,0 +1,282 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef _ASM_TILE_IRQFLAGS_H +#define _ASM_TILE_IRQFLAGS_H + +#include <arch/interrupts.h> +#include <arch/chip.h> + +#if !defined(__tilegx__) && defined(__ASSEMBLY__) + +/* + * The set of interrupts we want to allow when interrupts are nominally + * disabled. The remainder are effectively "NMI" interrupts from + * the point of view of the generic Linux code. Note that synchronous + * interrupts (aka "non-queued") are not blocked by the mask in any case. + */ +#if CHIP_HAS_AUX_PERF_COUNTERS() +#define LINUX_MASKABLE_INTERRUPTS_HI \ + (~(INT_MASK_HI(INT_PERF_COUNT) | INT_MASK_HI(INT_AUX_PERF_COUNT))) +#else +#define LINUX_MASKABLE_INTERRUPTS_HI \ + (~(INT_MASK_HI(INT_PERF_COUNT))) +#endif + +#else + +#if CHIP_HAS_AUX_PERF_COUNTERS() +#define LINUX_MASKABLE_INTERRUPTS \ + (~(INT_MASK(INT_PERF_COUNT) | INT_MASK(INT_AUX_PERF_COUNT))) +#else +#define LINUX_MASKABLE_INTERRUPTS \ + (~(INT_MASK(INT_PERF_COUNT))) +#endif + +#endif + +#ifndef __ASSEMBLY__ + +/* NOTE: we can't include <linux/percpu.h> due to #include dependencies. */ +#include <asm/percpu.h> +#include <arch/spr_def.h> + +/* Set and clear kernel interrupt masks. */ +#if CHIP_HAS_SPLIT_INTR_MASK() +#if INT_PERF_COUNT < 32 || INT_AUX_PERF_COUNT < 32 || INT_MEM_ERROR >= 32 +# error Fix assumptions about which word various interrupts are in +#endif +#define interrupt_mask_set(n) do { \ + int __n = (n); \ + int __mask = 1 << (__n & 0x1f); \ + if (__n < 32) \ + __insn_mtspr(SPR_INTERRUPT_MASK_SET_K_0, __mask); \ + else \ + __insn_mtspr(SPR_INTERRUPT_MASK_SET_K_1, __mask); \ +} while (0) +#define interrupt_mask_reset(n) do { \ + int __n = (n); \ + int __mask = 1 << (__n & 0x1f); \ + if (__n < 32) \ + __insn_mtspr(SPR_INTERRUPT_MASK_RESET_K_0, __mask); \ + else \ + __insn_mtspr(SPR_INTERRUPT_MASK_RESET_K_1, __mask); \ +} while (0) +#define interrupt_mask_check(n) ({ \ + int __n = (n); \ + (((__n < 32) ? \ + __insn_mfspr(SPR_INTERRUPT_MASK_K_0) : \ + __insn_mfspr(SPR_INTERRUPT_MASK_K_1)) \ + >> (__n & 0x1f)) & 1; \ +}) +#define interrupt_mask_set_mask(mask) do { \ + unsigned long long __m = (mask); \ + __insn_mtspr(SPR_INTERRUPT_MASK_SET_K_0, (unsigned long)(__m)); \ + __insn_mtspr(SPR_INTERRUPT_MASK_SET_K_1, (unsigned long)(__m>>32)); \ +} while (0) +#define interrupt_mask_reset_mask(mask) do { \ + unsigned long long __m = (mask); \ + __insn_mtspr(SPR_INTERRUPT_MASK_RESET_K_0, (unsigned long)(__m)); \ + __insn_mtspr(SPR_INTERRUPT_MASK_RESET_K_1, (unsigned long)(__m>>32)); \ +} while (0) +#else +#define interrupt_mask_set(n) \ + __insn_mtspr(SPR_INTERRUPT_MASK_SET_K, (1UL << (n))) +#define interrupt_mask_reset(n) \ + __insn_mtspr(SPR_INTERRUPT_MASK_RESET_K, (1UL << (n))) +#define interrupt_mask_check(n) \ + ((__insn_mfspr(SPR_INTERRUPT_MASK_K) >> (n)) & 1) +#define interrupt_mask_set_mask(mask) \ + __insn_mtspr(SPR_INTERRUPT_MASK_SET_K, (mask)) +#define interrupt_mask_reset_mask(mask) \ + __insn_mtspr(SPR_INTERRUPT_MASK_RESET_K, (mask)) +#endif + +/* + * The set of interrupts we want active if irqs are enabled. + * Note that in particular, the tile timer interrupt comes and goes + * from this set, since we have no other way to turn off the timer. + * Likewise, INTCTRL_K is removed and re-added during device + * interrupts, as is the the hardwall UDN_FIREWALL interrupt. + * We use a low bit (MEM_ERROR) as our sentinel value and make sure it + * is always claimed as an "active interrupt" so we can query that bit + * to know our current state. + */ +DECLARE_PER_CPU(unsigned long long, interrupts_enabled_mask); +#define INITIAL_INTERRUPTS_ENABLED INT_MASK(INT_MEM_ERROR) + +/* Disable interrupts. */ +#define arch_local_irq_disable() \ + interrupt_mask_set_mask(LINUX_MASKABLE_INTERRUPTS) + +/* Disable all interrupts, including NMIs. */ +#define arch_local_irq_disable_all() \ + interrupt_mask_set_mask(-1UL) + +/* Re-enable all maskable interrupts. */ +#define arch_local_irq_enable() \ + interrupt_mask_reset_mask(__get_cpu_var(interrupts_enabled_mask)) + +/* Disable or enable interrupts based on flag argument. */ +#define arch_local_irq_restore(disabled) do { \ + if (disabled) \ + arch_local_irq_disable(); \ + else \ + arch_local_irq_enable(); \ +} while (0) + +/* Return true if "flags" argument means interrupts are disabled. */ +#define arch_irqs_disabled_flags(flags) ((flags) != 0) + +/* Return true if interrupts are currently disabled. */ +#define arch_irqs_disabled() interrupt_mask_check(INT_MEM_ERROR) + +/* Save whether interrupts are currently disabled. */ +#define arch_local_save_flags() arch_irqs_disabled() + +/* Save whether interrupts are currently disabled, then disable them. */ +#define arch_local_irq_save() ({ \ + unsigned long __flags = arch_local_save_flags(); \ + arch_local_irq_disable(); \ + __flags; }) + +/* Prevent the given interrupt from being enabled next time we enable irqs. */ +#define arch_local_irq_mask(interrupt) \ + (__get_cpu_var(interrupts_enabled_mask) &= ~INT_MASK(interrupt)) + +/* Prevent the given interrupt from being enabled immediately. */ +#define arch_local_irq_mask_now(interrupt) do { \ + arch_local_irq_mask(interrupt); \ + interrupt_mask_set(interrupt); \ +} while (0) + +/* Allow the given interrupt to be enabled next time we enable irqs. */ +#define arch_local_irq_unmask(interrupt) \ + (__get_cpu_var(interrupts_enabled_mask) |= INT_MASK(interrupt)) + +/* Allow the given interrupt to be enabled immediately, if !irqs_disabled. */ +#define arch_local_irq_unmask_now(interrupt) do { \ + arch_local_irq_unmask(interrupt); \ + if (!irqs_disabled()) \ + interrupt_mask_reset(interrupt); \ +} while (0) + +#else /* __ASSEMBLY__ */ + +/* We provide a somewhat more restricted set for assembly. */ + +#ifdef __tilegx__ + +#if INT_MEM_ERROR != 0 +# error Fix IRQ_DISABLED() macro +#endif + +/* Return 0 or 1 to indicate whether interrupts are currently disabled. */ +#define IRQS_DISABLED(tmp) \ + mfspr tmp, SPR_INTERRUPT_MASK_K; \ + andi tmp, tmp, 1 + +/* Load up a pointer to &interrupts_enabled_mask. */ +#define GET_INTERRUPTS_ENABLED_MASK_PTR(reg) \ + moveli reg, hw2_last(interrupts_enabled_mask); \ + shl16insli reg, reg, hw1(interrupts_enabled_mask); \ + shl16insli reg, reg, hw0(interrupts_enabled_mask); \ + add reg, reg, tp + +/* Disable interrupts. */ +#define IRQ_DISABLE(tmp0, tmp1) \ + moveli tmp0, hw2_last(LINUX_MASKABLE_INTERRUPTS); \ + shl16insli tmp0, tmp0, hw1(LINUX_MASKABLE_INTERRUPTS); \ + shl16insli tmp0, tmp0, hw0(LINUX_MASKABLE_INTERRUPTS); \ + mtspr SPR_INTERRUPT_MASK_SET_K, tmp0 + +/* Disable ALL synchronous interrupts (used by NMI entry). */ +#define IRQ_DISABLE_ALL(tmp) \ + movei tmp, -1; \ + mtspr SPR_INTERRUPT_MASK_SET_K, tmp + +/* Enable interrupts. */ +#define IRQ_ENABLE(tmp0, tmp1) \ + GET_INTERRUPTS_ENABLED_MASK_PTR(tmp0); \ + ld tmp0, tmp0; \ + mtspr SPR_INTERRUPT_MASK_RESET_K, tmp0 + +#else /* !__tilegx__ */ + +/* + * Return 0 or 1 to indicate whether interrupts are currently disabled. + * Note that it's important that we use a bit from the "low" mask word, + * since when we are enabling, that is the word we write first, so if we + * are interrupted after only writing half of the mask, the interrupt + * handler will correctly observe that we have interrupts enabled, and + * will enable interrupts itself on return from the interrupt handler + * (making the original code's write of the "high" mask word idempotent). + */ +#define IRQS_DISABLED(tmp) \ + mfspr tmp, SPR_INTERRUPT_MASK_K_0; \ + shri tmp, tmp, INT_MEM_ERROR; \ + andi tmp, tmp, 1 + +/* Load up a pointer to &interrupts_enabled_mask. */ +#define GET_INTERRUPTS_ENABLED_MASK_PTR(reg) \ + moveli reg, lo16(interrupts_enabled_mask); \ + auli reg, reg, ha16(interrupts_enabled_mask); \ + add reg, reg, tp + +/* Disable interrupts. */ +#define IRQ_DISABLE(tmp0, tmp1) \ + { \ + movei tmp0, -1; \ + moveli tmp1, lo16(LINUX_MASKABLE_INTERRUPTS_HI) \ + }; \ + { \ + mtspr SPR_INTERRUPT_MASK_SET_K_0, tmp0; \ + auli tmp1, tmp1, ha16(LINUX_MASKABLE_INTERRUPTS_HI) \ + }; \ + mtspr SPR_INTERRUPT_MASK_SET_K_1, tmp1 + +/* Disable ALL synchronous interrupts (used by NMI entry). */ +#define IRQ_DISABLE_ALL(tmp) \ + movei tmp, -1; \ + mtspr SPR_INTERRUPT_MASK_SET_K_0, tmp; \ + mtspr SPR_INTERRUPT_MASK_SET_K_1, tmp + +/* Enable interrupts. */ +#define IRQ_ENABLE(tmp0, tmp1) \ + GET_INTERRUPTS_ENABLED_MASK_PTR(tmp0); \ + { \ + lw tmp0, tmp0; \ + addi tmp1, tmp0, 4 \ + }; \ + lw tmp1, tmp1; \ + mtspr SPR_INTERRUPT_MASK_RESET_K_0, tmp0; \ + mtspr SPR_INTERRUPT_MASK_RESET_K_1, tmp1 +#endif + +/* + * Do the CPU's IRQ-state tracing from assembly code. We call a + * C function, but almost everywhere we do, we don't mind clobbering + * all the caller-saved registers. + */ +#ifdef CONFIG_TRACE_IRQFLAGS +# define TRACE_IRQS_ON jal trace_hardirqs_on +# define TRACE_IRQS_OFF jal trace_hardirqs_off +#else +# define TRACE_IRQS_ON +# define TRACE_IRQS_OFF +#endif + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_TILE_IRQFLAGS_H */ diff --git a/arch/tile/include/asm/kexec.h b/arch/tile/include/asm/kexec.h new file mode 100644 index 00000000..c11a6cc7 --- /dev/null +++ b/arch/tile/include/asm/kexec.h @@ -0,0 +1,53 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * based on kexec.h from other architectures in linux-2.6.18 + */ + +#ifndef _ASM_TILE_KEXEC_H +#define _ASM_TILE_KEXEC_H + +#include <asm/page.h> + +/* Maximum physical address we can use pages from. */ +#define KEXEC_SOURCE_MEMORY_LIMIT TASK_SIZE +/* Maximum address we can reach in physical address mode. */ +#define KEXEC_DESTINATION_MEMORY_LIMIT TASK_SIZE +/* Maximum address we can use for the control code buffer. */ +#define KEXEC_CONTROL_MEMORY_LIMIT TASK_SIZE + +#define KEXEC_CONTROL_PAGE_SIZE PAGE_SIZE + +/* + * We don't bother to provide a unique identifier, since we can only + * reboot with a single type of kernel image anyway. + */ +#define KEXEC_ARCH KEXEC_ARCH_DEFAULT + +/* Use the tile override for the page allocator. */ +struct page *kimage_alloc_pages_arch(gfp_t gfp_mask, unsigned int order); +#define kimage_alloc_pages_arch kimage_alloc_pages_arch + +#define MAX_NOTE_BYTES 1024 + +/* Defined in arch/tile/kernel/relocate_kernel.S */ +extern const unsigned char relocate_new_kernel[]; +extern const unsigned long relocate_new_kernel_size; +extern void relocate_new_kernel_end(void); + +/* Provide a dummy definition to avoid build failures. */ +static inline void crash_setup_regs(struct pt_regs *n, struct pt_regs *o) +{ +} + +#endif /* _ASM_TILE_KEXEC_H */ diff --git a/arch/tile/include/asm/kmap_types.h b/arch/tile/include/asm/kmap_types.h new file mode 100644 index 00000000..3d0f2024 --- /dev/null +++ b/arch/tile/include/asm/kmap_types.h @@ -0,0 +1,57 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef _ASM_TILE_KMAP_TYPES_H +#define _ASM_TILE_KMAP_TYPES_H + +/* + * In 32-bit TILE Linux we have to balance the desire to have a lot of + * nested atomic mappings with the fact that large page sizes and many + * processors chew up address space quickly. In a typical + * 64-processor, 64KB-page layout build, making KM_TYPE_NR one larger + * adds 4MB of required address-space. For now we leave KM_TYPE_NR + * set to depth 8. + */ +enum km_type { + KM_TYPE_NR = 8 +}; + +/* + * We provide dummy definitions of all the stray values that used to be + * required for kmap_atomic() and no longer are. + */ +enum { + KM_BOUNCE_READ, + KM_SKB_SUNRPC_DATA, + KM_SKB_DATA_SOFTIRQ, + KM_USER0, + KM_USER1, + KM_BIO_SRC_IRQ, + KM_BIO_DST_IRQ, + KM_PTE0, + KM_PTE1, + KM_IRQ0, + KM_IRQ1, + KM_SOFTIRQ0, + KM_SOFTIRQ1, + KM_SYNC_ICACHE, + KM_SYNC_DCACHE, + KM_UML_USERCOPY, + KM_IRQ_PTE, + KM_NMI, + KM_NMI_PTE, + KM_KDB +}; + +#endif /* _ASM_TILE_KMAP_TYPES_H */ diff --git a/arch/tile/include/asm/linkage.h b/arch/tile/include/asm/linkage.h new file mode 100644 index 00000000..e121c397 --- /dev/null +++ b/arch/tile/include/asm/linkage.h @@ -0,0 +1,51 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef _ASM_TILE_LINKAGE_H +#define _ASM_TILE_LINKAGE_H + +#include <feedback.h> + +#define __ALIGN .align 8 + +/* + * The STD_ENTRY and STD_ENDPROC macros put the function in a + * self-named .text.foo section, and if linker feedback collection + * is enabled, add a suitable call to the feedback collection code. + * STD_ENTRY_SECTION lets you specify a non-standard section name. + */ + +#define STD_ENTRY(name) \ + .pushsection .text.##name, "ax"; \ + ENTRY(name); \ + FEEDBACK_ENTER(name) + +#define STD_ENTRY_SECTION(name, section) \ + .pushsection section, "ax"; \ + ENTRY(name); \ + FEEDBACK_ENTER_EXPLICIT(name, section, .Lend_##name - name) + +#define STD_ENDPROC(name) \ + ENDPROC(name); \ + .Lend_##name:; \ + .popsection + +/* Create a file-static function entry set up for feedback gathering. */ +#define STD_ENTRY_LOCAL(name) \ + .pushsection .text.##name, "ax"; \ + ALIGN; \ + name:; \ + FEEDBACK_ENTER(name) + +#endif /* _ASM_TILE_LINKAGE_H */ diff --git a/arch/tile/include/asm/memprof.h b/arch/tile/include/asm/memprof.h new file mode 100644 index 00000000..359949be --- /dev/null +++ b/arch/tile/include/asm/memprof.h @@ -0,0 +1,33 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * The hypervisor's memory controller profiling infrastructure allows + * the programmer to find out what fraction of the available memory + * bandwidth is being consumed at each memory controller. The + * profiler provides start, stop, and clear operations to allows + * profiling over a specific time window, as well as an interface for + * reading the most recent profile values. + * + * This header declares IOCTL codes necessary to control memprof. + */ +#ifndef _ASM_TILE_MEMPROF_H +#define _ASM_TILE_MEMPROF_H + +#include <linux/ioctl.h> + +#define MEMPROF_IOCTL_TYPE 0xB4 +#define MEMPROF_IOCTL_START _IO(MEMPROF_IOCTL_TYPE, 0) +#define MEMPROF_IOCTL_STOP _IO(MEMPROF_IOCTL_TYPE, 1) +#define MEMPROF_IOCTL_CLEAR _IO(MEMPROF_IOCTL_TYPE, 2) + +#endif /* _ASM_TILE_MEMPROF_H */ diff --git a/arch/tile/include/asm/mman.h b/arch/tile/include/asm/mman.h new file mode 100644 index 00000000..81b8fc34 --- /dev/null +++ b/arch/tile/include/asm/mman.h @@ -0,0 +1,41 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef _ASM_TILE_MMAN_H +#define _ASM_TILE_MMAN_H + +#include <asm-generic/mman-common.h> +#include <arch/chip.h> + +/* Standard Linux flags */ + +#define MAP_POPULATE 0x0040 /* populate (prefault) pagetables */ +#define MAP_NONBLOCK 0x0080 /* do not block on IO */ +#define MAP_GROWSDOWN 0x0100 /* stack-like segment */ +#define MAP_STACK MAP_GROWSDOWN /* provide convenience alias */ +#define MAP_LOCKED 0x0200 /* pages are locked */ +#define MAP_NORESERVE 0x0400 /* don't check for reservations */ +#define MAP_DENYWRITE 0x0800 /* ETXTBSY */ +#define MAP_EXECUTABLE 0x1000 /* mark it as an executable */ +#define MAP_HUGETLB 0x4000 /* create a huge page mapping */ + + +/* + * Flags for mlockall + */ +#define MCL_CURRENT 1 /* lock all current mappings */ +#define MCL_FUTURE 2 /* lock all future mappings */ + + +#endif /* _ASM_TILE_MMAN_H */ diff --git a/arch/tile/include/asm/mmu.h b/arch/tile/include/asm/mmu.h new file mode 100644 index 00000000..92f94c77 --- /dev/null +++ b/arch/tile/include/asm/mmu.h @@ -0,0 +1,31 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef _ASM_TILE_MMU_H +#define _ASM_TILE_MMU_H + +/* Capture any arch- and mm-specific information. */ +struct mm_context { + /* + * Written under the mmap_sem semaphore; read without the + * semaphore but atomically, but it is conservatively set. + */ + unsigned int priority_cached; +}; + +typedef struct mm_context mm_context_t; + +void leave_mm(int cpu); + +#endif /* _ASM_TILE_MMU_H */ diff --git a/arch/tile/include/asm/mmu_context.h b/arch/tile/include/asm/mmu_context.h new file mode 100644 index 00000000..15fb2464 --- /dev/null +++ b/arch/tile/include/asm/mmu_context.h @@ -0,0 +1,131 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef _ASM_TILE_MMU_CONTEXT_H +#define _ASM_TILE_MMU_CONTEXT_H + +#include <linux/smp.h> +#include <asm/setup.h> +#include <asm/page.h> +#include <asm/pgalloc.h> +#include <asm/pgtable.h> +#include <asm/tlbflush.h> +#include <asm/homecache.h> +#include <asm-generic/mm_hooks.h> + +static inline int +init_new_context(struct task_struct *tsk, struct mm_struct *mm) +{ + return 0; +} + +/* Note that arch/tile/kernel/head.S also calls hv_install_context() */ +static inline void __install_page_table(pgd_t *pgdir, int asid, pgprot_t prot) +{ + /* FIXME: DIRECTIO should not always be set. FIXME. */ + int rc = hv_install_context(__pa(pgdir), prot, asid, HV_CTX_DIRECTIO); + if (rc < 0) + panic("hv_install_context failed: %d", rc); +} + +static inline void install_page_table(pgd_t *pgdir, int asid) +{ + pte_t *ptep = virt_to_pte(NULL, (unsigned long)pgdir); + __install_page_table(pgdir, asid, *ptep); +} + +/* + * "Lazy" TLB mode is entered when we are switching to a kernel task, + * which borrows the mm of the previous task. The goal of this + * optimization is to avoid having to install a new page table. On + * early x86 machines (where the concept originated) you couldn't do + * anything short of a full page table install for invalidation, so + * handling a remote TLB invalidate required doing a page table + * re-install. Someone clearly decided that it was silly to keep + * doing this while in "lazy" TLB mode, so the optimization involves + * installing the swapper page table instead the first time one + * occurs, and clearing the cpu out of cpu_vm_mask, so the cpu running + * the kernel task doesn't need to take any more interrupts. At that + * point it's then necessary to explicitly reinstall it when context + * switching back to the original mm. + * + * On Tile, we have to do a page-table install whenever DMA is enabled, + * so in that case lazy mode doesn't help anyway. And more generally, + * we have efficient per-page TLB shootdown, and don't expect to spend + * that much time in kernel tasks in general, so just leaving the + * kernel task borrowing the old page table, but handling TLB + * shootdowns, is a reasonable thing to do. And importantly, this + * lets us use the hypervisor's internal APIs for TLB shootdown, which + * means we don't have to worry about having TLB shootdowns blocked + * when Linux is disabling interrupts; see the page migration code for + * an example of where it's important for TLB shootdowns to complete + * even when interrupts are disabled at the Linux level. + */ +static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *t) +{ +#if CHIP_HAS_TILE_DMA() + /* + * We have to do an "identity" page table switch in order to + * clear any pending DMA interrupts. + */ + if (current->thread.tile_dma_state.enabled) + install_page_table(mm->pgd, __get_cpu_var(current_asid)); +#endif +} + +static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, + struct task_struct *tsk) +{ + if (likely(prev != next)) { + + int cpu = smp_processor_id(); + + /* Pick new ASID. */ + int asid = __get_cpu_var(current_asid) + 1; + if (asid > max_asid) { + asid = min_asid; + local_flush_tlb(); + } + __get_cpu_var(current_asid) = asid; + + /* Clear cpu from the old mm, and set it in the new one. */ + cpumask_clear_cpu(cpu, mm_cpumask(prev)); + cpumask_set_cpu(cpu, mm_cpumask(next)); + + /* Re-load page tables */ + install_page_table(next->pgd, asid); + + /* See how we should set the red/black cache info */ + check_mm_caching(prev, next); + + /* + * Since we're changing to a new mm, we have to flush + * the icache in case some physical page now being mapped + * has subsequently been repurposed and has new code. + */ + __flush_icache(); + + } +} + +static inline void activate_mm(struct mm_struct *prev_mm, + struct mm_struct *next_mm) +{ + switch_mm(prev_mm, next_mm, NULL); +} + +#define destroy_context(mm) do { } while (0) +#define deactivate_mm(tsk, mm) do { } while (0) + +#endif /* _ASM_TILE_MMU_CONTEXT_H */ diff --git a/arch/tile/include/asm/mmzone.h b/arch/tile/include/asm/mmzone.h new file mode 100644 index 00000000..9d3dbce8 --- /dev/null +++ b/arch/tile/include/asm/mmzone.h @@ -0,0 +1,70 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef _ASM_TILE_MMZONE_H +#define _ASM_TILE_MMZONE_H + +extern struct pglist_data node_data[]; +#define NODE_DATA(nid) (&node_data[nid]) + +extern void get_memcfg_numa(void); + +#ifdef CONFIG_DISCONTIGMEM + +#include <asm/page.h> + +/* + * Generally, memory ranges are always doled out by the hypervisor in + * fixed-size, power-of-two increments. That would make computing the node + * very easy. We could just take a couple high bits of the PA, which + * denote the memory shim, and we'd be done. However, when we're doing + * memory striping, this may not be true; PAs with different high bit + * values might be in the same node. Thus, we keep a lookup table to + * translate the high bits of the PFN to the node number. + */ +extern int highbits_to_node[]; + +static inline int pfn_to_nid(unsigned long pfn) +{ + return highbits_to_node[__pfn_to_highbits(pfn)]; +} + +#define kern_addr_valid(kaddr) virt_addr_valid((void *)kaddr) + +static inline int pfn_valid(int pfn) +{ + int nid = pfn_to_nid(pfn); + + if (nid >= 0) + return (pfn < node_end_pfn(nid)); + return 0; +} + +/* Information on the NUMA nodes that we compute early */ +extern unsigned long node_start_pfn[]; +extern unsigned long node_end_pfn[]; +extern unsigned long node_memmap_pfn[]; +extern unsigned long node_percpu_pfn[]; +extern unsigned long node_free_pfn[]; +#ifdef CONFIG_HIGHMEM +extern unsigned long node_lowmem_end_pfn[]; +#endif +#ifdef CONFIG_PCI +extern unsigned long pci_reserve_start_pfn; +extern unsigned long pci_reserve_end_pfn; +#endif + +#endif /* CONFIG_DISCONTIGMEM */ + +#endif /* _ASM_TILE_MMZONE_H */ diff --git a/arch/tile/include/asm/page.h b/arch/tile/include/asm/page.h new file mode 100644 index 00000000..db93518f --- /dev/null +++ b/arch/tile/include/asm/page.h @@ -0,0 +1,336 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef _ASM_TILE_PAGE_H +#define _ASM_TILE_PAGE_H + +#include <linux/const.h> +#include <hv/hypervisor.h> +#include <arch/chip.h> + +/* PAGE_SHIFT and HPAGE_SHIFT determine the page sizes. */ +#define PAGE_SHIFT HV_LOG2_PAGE_SIZE_SMALL +#define HPAGE_SHIFT HV_LOG2_PAGE_SIZE_LARGE + +#define PAGE_SIZE (_AC(1, UL) << PAGE_SHIFT) +#define HPAGE_SIZE (_AC(1, UL) << HPAGE_SHIFT) + +#define PAGE_MASK (~(PAGE_SIZE - 1)) +#define HPAGE_MASK (~(HPAGE_SIZE - 1)) + +/* + * If the Kconfig doesn't specify, set a maximum zone order that + * is enough so that we can create huge pages from small pages given + * the respective sizes of the two page types. See <linux/mmzone.h>. + */ +#ifndef CONFIG_FORCE_MAX_ZONEORDER +#define CONFIG_FORCE_MAX_ZONEORDER (HPAGE_SHIFT - PAGE_SHIFT + 1) +#endif + +#ifndef __ASSEMBLY__ + +#include <linux/types.h> +#include <linux/string.h> + +struct page; + +static inline void clear_page(void *page) +{ + memset(page, 0, PAGE_SIZE); +} + +static inline void copy_page(void *to, void *from) +{ + memcpy(to, from, PAGE_SIZE); +} + +static inline void clear_user_page(void *page, unsigned long vaddr, + struct page *pg) +{ + clear_page(page); +} + +static inline void copy_user_page(void *to, void *from, unsigned long vaddr, + struct page *topage) +{ + copy_page(to, from); +} + +/* + * Hypervisor page tables are made of the same basic structure. + */ + +typedef HV_PTE pte_t; +typedef HV_PTE pgd_t; +typedef HV_PTE pgprot_t; + +/* + * User L2 page tables are managed as one L2 page table per page, + * because we use the page allocator for them. This keeps the allocation + * simple and makes it potentially useful to implement HIGHPTE at some point. + * However, it's also inefficient, since L2 page tables are much smaller + * than pages (currently 2KB vs 64KB). So we should revisit this. + */ +typedef struct page *pgtable_t; + +/* Must be a macro since it is used to create constants. */ +#define __pgprot(val) hv_pte(val) + +/* Rarely-used initializers, typically with a "zero" value. */ +#define __pte(x) hv_pte(x) +#define __pgd(x) hv_pte(x) + +static inline u64 pgprot_val(pgprot_t pgprot) +{ + return hv_pte_val(pgprot); +} + +static inline u64 pte_val(pte_t pte) +{ + return hv_pte_val(pte); +} + +static inline u64 pgd_val(pgd_t pgd) +{ + return hv_pte_val(pgd); +} + +#ifdef __tilegx__ + +typedef HV_PTE pmd_t; + +#define __pmd(x) hv_pte(x) + +static inline u64 pmd_val(pmd_t pmd) +{ + return hv_pte_val(pmd); +} + +#endif + +static inline __attribute_const__ int get_order(unsigned long size) +{ + return BITS_PER_LONG - __builtin_clzl((size - 1) >> PAGE_SHIFT); +} + +#endif /* !__ASSEMBLY__ */ + +#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) + +#define HUGE_MAX_HSTATE 2 + +#ifdef CONFIG_HUGETLB_PAGE +#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA +#endif + +/* Each memory controller has PAs distinct in their high bits. */ +#define NR_PA_HIGHBIT_SHIFT (CHIP_PA_WIDTH() - CHIP_LOG_NUM_MSHIMS()) +#define NR_PA_HIGHBIT_VALUES (1 << CHIP_LOG_NUM_MSHIMS()) +#define __pa_to_highbits(pa) ((phys_addr_t)(pa) >> NR_PA_HIGHBIT_SHIFT) +#define __pfn_to_highbits(pfn) ((pfn) >> (NR_PA_HIGHBIT_SHIFT - PAGE_SHIFT)) + +#ifdef __tilegx__ + +/* + * We reserve the lower half of memory for user-space programs, and the + * upper half for system code. We re-map all of physical memory in the + * upper half, which takes a quarter of our VA space. Then we have + * the vmalloc regions. The supervisor code lives at 0xfffffff700000000, + * with the hypervisor above that. + * + * Loadable kernel modules are placed immediately after the static + * supervisor code, with each being allocated a 256MB region of + * address space, so we don't have to worry about the range of "jal" + * and other branch instructions. + * + * For now we keep life simple and just allocate one pmd (4GB) for vmalloc. + * Similarly, for now we don't play any struct page mapping games. + */ + +#if CHIP_PA_WIDTH() + 2 > CHIP_VA_WIDTH() +# error Too much PA to map with the VA available! +#endif +#define HALF_VA_SPACE (_AC(1, UL) << (CHIP_VA_WIDTH() - 1)) + +#define MEM_LOW_END (HALF_VA_SPACE - 1) /* low half */ +#define MEM_HIGH_START (-HALF_VA_SPACE) /* high half */ +#define PAGE_OFFSET MEM_HIGH_START +#define _VMALLOC_START _AC(0xfffffff500000000, UL) /* 4 GB */ +#define HUGE_VMAP_BASE _AC(0xfffffff600000000, UL) /* 4 GB */ +#define MEM_SV_START _AC(0xfffffff700000000, UL) /* 256 MB */ +#define MEM_SV_INTRPT MEM_SV_START +#define MEM_MODULE_START _AC(0xfffffff710000000, UL) /* 256 MB */ +#define MEM_MODULE_END (MEM_MODULE_START + (256*1024*1024)) +#define MEM_HV_START _AC(0xfffffff800000000, UL) /* 32 GB */ + +/* Highest DTLB address we will use */ +#define KERNEL_HIGH_VADDR MEM_SV_START + +/* Since we don't currently provide any fixmaps, we use an impossible VA. */ +#define FIXADDR_TOP MEM_HV_START + +#else /* !__tilegx__ */ + +/* + * A PAGE_OFFSET of 0xC0000000 means that the kernel has + * a virtual address space of one gigabyte, which limits the + * amount of physical memory you can use to about 768MB. + * If you want more physical memory than this then see the CONFIG_HIGHMEM + * option in the kernel configuration. + * + * The top 16MB chunk in the table below is unavailable to Linux. Since + * the kernel interrupt vectors must live at ether 0xfe000000 or 0xfd000000 + * (depending on whether the kernel is at PL2 or Pl1), we map all of the + * bottom of RAM at this address with a huge page table entry to minimize + * its ITLB footprint (as well as at PAGE_OFFSET). The last architected + * requirement is that user interrupt vectors live at 0xfc000000, so we + * make that range of memory available to user processes. The remaining + * regions are sized as shown; the first four addresses use the PL 1 + * values, and after that, we show "typical" values, since the actual + * addresses depend on kernel #defines. + * + * MEM_HV_INTRPT 0xfe000000 + * MEM_SV_INTRPT (kernel code) 0xfd000000 + * MEM_USER_INTRPT (user vector) 0xfc000000 + * FIX_KMAP_xxx 0xf8000000 (via NR_CPUS * KM_TYPE_NR) + * PKMAP_BASE 0xf7000000 (via LAST_PKMAP) + * HUGE_VMAP 0xf3000000 (via CONFIG_NR_HUGE_VMAPS) + * VMALLOC_START 0xf0000000 (via __VMALLOC_RESERVE) + * mapped LOWMEM 0xc0000000 + */ + +#define MEM_USER_INTRPT _AC(0xfc000000, UL) +#if CONFIG_KERNEL_PL == 1 +#define MEM_SV_INTRPT _AC(0xfd000000, UL) +#define MEM_HV_INTRPT _AC(0xfe000000, UL) +#else +#define MEM_GUEST_INTRPT _AC(0xfd000000, UL) +#define MEM_SV_INTRPT _AC(0xfe000000, UL) +#define MEM_HV_INTRPT _AC(0xff000000, UL) +#endif + +#define INTRPT_SIZE 0x4000 + +/* Tolerate page size larger than the architecture interrupt region size. */ +#if PAGE_SIZE > INTRPT_SIZE +#undef INTRPT_SIZE +#define INTRPT_SIZE PAGE_SIZE +#endif + +#define KERNEL_HIGH_VADDR MEM_USER_INTRPT +#define FIXADDR_TOP (KERNEL_HIGH_VADDR - PAGE_SIZE) + +#define PAGE_OFFSET _AC(CONFIG_PAGE_OFFSET, UL) + +/* On 32-bit architectures we mix kernel modules in with other vmaps. */ +#define MEM_MODULE_START VMALLOC_START +#define MEM_MODULE_END VMALLOC_END + +#endif /* __tilegx__ */ + +#ifndef __ASSEMBLY__ + +#ifdef CONFIG_HIGHMEM + +/* Map kernel virtual addresses to page frames, in HPAGE_SIZE chunks. */ +extern unsigned long pbase_map[]; +extern void *vbase_map[]; + +static inline unsigned long kaddr_to_pfn(const volatile void *_kaddr) +{ + unsigned long kaddr = (unsigned long)_kaddr; + return pbase_map[kaddr >> HPAGE_SHIFT] + + ((kaddr & (HPAGE_SIZE - 1)) >> PAGE_SHIFT); +} + +static inline void *pfn_to_kaddr(unsigned long pfn) +{ + return vbase_map[__pfn_to_highbits(pfn)] + (pfn << PAGE_SHIFT); +} + +static inline phys_addr_t virt_to_phys(const volatile void *kaddr) +{ + unsigned long pfn = kaddr_to_pfn(kaddr); + return ((phys_addr_t)pfn << PAGE_SHIFT) + + ((unsigned long)kaddr & (PAGE_SIZE-1)); +} + +static inline void *phys_to_virt(phys_addr_t paddr) +{ + return pfn_to_kaddr(paddr >> PAGE_SHIFT) + (paddr & (PAGE_SIZE-1)); +} + +/* With HIGHMEM, we pack PAGE_OFFSET through high_memory with all valid VAs. */ +static inline int virt_addr_valid(const volatile void *kaddr) +{ + extern void *high_memory; /* copied from <linux/mm.h> */ + return ((unsigned long)kaddr >= PAGE_OFFSET && kaddr < high_memory); +} + +#else /* !CONFIG_HIGHMEM */ + +static inline unsigned long kaddr_to_pfn(const volatile void *kaddr) +{ + return ((unsigned long)kaddr - PAGE_OFFSET) >> PAGE_SHIFT; +} + +static inline void *pfn_to_kaddr(unsigned long pfn) +{ + return (void *)((pfn << PAGE_SHIFT) + PAGE_OFFSET); +} + +static inline phys_addr_t virt_to_phys(const volatile void *kaddr) +{ + return (phys_addr_t)((unsigned long)kaddr - PAGE_OFFSET); +} + +static inline void *phys_to_virt(phys_addr_t paddr) +{ + return (void *)((unsigned long)paddr + PAGE_OFFSET); +} + +/* Check that the given address is within some mapped range of PAs. */ +#define virt_addr_valid(kaddr) pfn_valid(kaddr_to_pfn(kaddr)) + +#endif /* !CONFIG_HIGHMEM */ + +/* All callers are not consistent in how they call these functions. */ +#define __pa(kaddr) virt_to_phys((void *)(unsigned long)(kaddr)) +#define __va(paddr) phys_to_virt((phys_addr_t)(paddr)) + +extern int devmem_is_allowed(unsigned long pagenr); + +#ifdef CONFIG_FLATMEM +static inline int pfn_valid(unsigned long pfn) +{ + return pfn < max_mapnr; +} +#endif + +/* Provide as macros since these require some other headers included. */ +#define page_to_pa(page) ((phys_addr_t)(page_to_pfn(page)) << PAGE_SHIFT) +#define virt_to_page(kaddr) pfn_to_page(kaddr_to_pfn((void *)(kaddr))) +#define page_to_virt(page) pfn_to_kaddr(page_to_pfn(page)) + +struct mm_struct; +extern pte_t *virt_to_pte(struct mm_struct *mm, unsigned long addr); + +#endif /* !__ASSEMBLY__ */ + +#define VM_DATA_DEFAULT_FLAGS \ + (VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) + +#include <asm-generic/memory_model.h> + +#endif /* _ASM_TILE_PAGE_H */ diff --git a/arch/tile/include/asm/pci.h b/arch/tile/include/asm/pci.h new file mode 100644 index 00000000..32e6cbe8 --- /dev/null +++ b/arch/tile/include/asm/pci.h @@ -0,0 +1,97 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef _ASM_TILE_PCI_H +#define _ASM_TILE_PCI_H + +#include <linux/pci.h> +#include <asm-generic/pci_iomap.h> + +/* + * Structure of a PCI controller (host bridge) + */ +struct pci_controller { + int index; /* PCI domain number */ + struct pci_bus *root_bus; + + int first_busno; + int last_busno; + + int hv_cfg_fd[2]; /* config{0,1} fds for this PCIe controller */ + int hv_mem_fd; /* fd to Hypervisor for MMIO operations */ + + struct pci_ops *ops; + + int irq_base; /* Base IRQ from the Hypervisor */ + int plx_gen1; /* flag for PLX Gen 1 configuration */ + + /* Address ranges that are routed to this controller/bridge. */ + struct resource mem_resources[3]; +}; + +/* + * The hypervisor maps the entirety of CPA-space as bus addresses, so + * bus addresses are physical addresses. The networking and block + * device layers use this boolean for bounce buffer decisions. + */ +#define PCI_DMA_BUS_IS_PHYS 1 + +int __init tile_pci_init(void); +int __init pcibios_init(void); + +static inline void pci_iounmap(struct pci_dev *dev, void __iomem *addr) {} + +void __devinit pcibios_fixup_bus(struct pci_bus *bus); + +#define TILE_NUM_PCIE 2 + +#define pci_domain_nr(bus) (((struct pci_controller *)(bus)->sysdata)->index) + +/* + * This decides whether to display the domain number in /proc. + */ +static inline int pci_proc_domain(struct pci_bus *bus) +{ + return 1; +} + +/* + * pcibios_assign_all_busses() tells whether or not the bus numbers + * should be reassigned, in case the BIOS didn't do it correctly, or + * in case we don't have a BIOS and we want to let Linux do it. + */ +static inline int pcibios_assign_all_busses(void) +{ + return 1; +} + +#define PCIBIOS_MIN_MEM 0 +#define PCIBIOS_MIN_IO 0 + +/* + * This flag tells if the platform is TILEmpower that needs + * special configuration for the PLX switch chip. + */ +extern int tile_plx_gen1; + +/* Use any cpu for PCI. */ +#define cpumask_of_pcibus(bus) cpu_online_mask + +/* implement the pci_ DMA API in terms of the generic device dma_ one */ +#include <asm-generic/pci-dma-compat.h> + +/* generic pci stuff */ +#include <asm-generic/pci.h> + +#endif /* _ASM_TILE_PCI_H */ diff --git a/arch/tile/include/asm/percpu.h b/arch/tile/include/asm/percpu.h new file mode 100644 index 00000000..63294f5a --- /dev/null +++ b/arch/tile/include/asm/percpu.h @@ -0,0 +1,24 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef _ASM_TILE_PERCPU_H +#define _ASM_TILE_PERCPU_H + +register unsigned long __my_cpu_offset __asm__("tp"); +#define __my_cpu_offset __my_cpu_offset +#define set_my_cpu_offset(tp) (__my_cpu_offset = (tp)) + +#include <asm-generic/percpu.h> + +#endif /* _ASM_TILE_PERCPU_H */ diff --git a/arch/tile/include/asm/pgalloc.h b/arch/tile/include/asm/pgalloc.h new file mode 100644 index 00000000..e919c0bd --- /dev/null +++ b/arch/tile/include/asm/pgalloc.h @@ -0,0 +1,122 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef _ASM_TILE_PGALLOC_H +#define _ASM_TILE_PGALLOC_H + +#include <linux/threads.h> +#include <linux/mm.h> +#include <linux/mmzone.h> +#include <asm/fixmap.h> +#include <hv/hypervisor.h> + +/* Bits for the size of the second-level page table. */ +#define L2_KERNEL_PGTABLE_SHIFT \ + (HV_LOG2_PAGE_SIZE_LARGE - HV_LOG2_PAGE_SIZE_SMALL + HV_LOG2_PTE_SIZE) + +/* We currently allocate user L2 page tables by page (unlike kernel L2s). */ +#if L2_KERNEL_PGTABLE_SHIFT < HV_LOG2_PAGE_SIZE_SMALL +#define L2_USER_PGTABLE_SHIFT HV_LOG2_PAGE_SIZE_SMALL +#else +#define L2_USER_PGTABLE_SHIFT L2_KERNEL_PGTABLE_SHIFT +#endif + +/* How many pages do we need, as an "order", for a user L2 page table? */ +#define L2_USER_PGTABLE_ORDER (L2_USER_PGTABLE_SHIFT - HV_LOG2_PAGE_SIZE_SMALL) + +/* How big is a kernel L2 page table? */ +#define L2_KERNEL_PGTABLE_SIZE (1 << L2_KERNEL_PGTABLE_SHIFT) + +static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) +{ +#ifdef CONFIG_64BIT + set_pte(pmdp, pmd); +#else + set_pte(&pmdp->pud.pgd, pmd.pud.pgd); +#endif +} + +static inline void pmd_populate_kernel(struct mm_struct *mm, + pmd_t *pmd, pte_t *ptep) +{ + set_pmd(pmd, ptfn_pmd(__pa(ptep) >> HV_LOG2_PAGE_TABLE_ALIGN, + __pgprot(_PAGE_PRESENT))); +} + +static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, + pgtable_t page) +{ + set_pmd(pmd, ptfn_pmd(HV_PFN_TO_PTFN(page_to_pfn(page)), + __pgprot(_PAGE_PRESENT))); +} + +/* + * Allocate and free page tables. + */ + +extern pgd_t *pgd_alloc(struct mm_struct *mm); +extern void pgd_free(struct mm_struct *mm, pgd_t *pgd); + +extern pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address); +extern void pte_free(struct mm_struct *mm, struct page *pte); + +#define pmd_pgtable(pmd) pmd_page(pmd) + +static inline pte_t * +pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) +{ + return pfn_to_kaddr(page_to_pfn(pte_alloc_one(mm, address))); +} + +static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) +{ + BUG_ON((unsigned long)pte & (PAGE_SIZE-1)); + pte_free(mm, virt_to_page(pte)); +} + +extern void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte, + unsigned long address); + +#define check_pgt_cache() do { } while (0) + +/* + * Get the small-page pte_t lowmem entry for a given pfn. + * This may or may not be in use, depending on whether the initial + * huge-page entry for the page has already been shattered. + */ +pte_t *get_prealloc_pte(unsigned long pfn); + +/* During init, we can shatter kernel huge pages if needed. */ +void shatter_pmd(pmd_t *pmd); + +/* After init, a more complex technique is required. */ +void shatter_huge_page(unsigned long addr); + +#ifdef __tilegx__ +/* We share a single page allocator for both L1 and L2 page tables. */ +#if HV_L1_SIZE != HV_L2_SIZE +# error Rework assumption that L1 and L2 page tables are same size. +#endif +#define L1_USER_PGTABLE_ORDER L2_USER_PGTABLE_ORDER +#define pud_populate(mm, pud, pmd) \ + pmd_populate_kernel((mm), (pmd_t *)(pud), (pte_t *)(pmd)) +#define pmd_alloc_one(mm, addr) \ + ((pmd_t *)page_to_virt(pte_alloc_one((mm), (addr)))) +#define pmd_free(mm, pmdp) \ + pte_free((mm), virt_to_page(pmdp)) +#define __pmd_free_tlb(tlb, pmdp, address) \ + __pte_free_tlb((tlb), virt_to_page(pmdp), (address)) +#endif + +#endif /* _ASM_TILE_PGALLOC_H */ diff --git a/arch/tile/include/asm/pgtable.h b/arch/tile/include/asm/pgtable.h new file mode 100644 index 00000000..67490910 --- /dev/null +++ b/arch/tile/include/asm/pgtable.h @@ -0,0 +1,465 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * This file contains the functions and defines necessary to modify and use + * the TILE page table tree. + */ + +#ifndef _ASM_TILE_PGTABLE_H +#define _ASM_TILE_PGTABLE_H + +#include <hv/hypervisor.h> + +#ifndef __ASSEMBLY__ + +#include <linux/bitops.h> +#include <linux/threads.h> +#include <linux/slab.h> +#include <linux/list.h> +#include <linux/spinlock.h> +#include <asm/processor.h> +#include <asm/fixmap.h> + +struct mm_struct; +struct vm_area_struct; + +/* + * ZERO_PAGE is a global shared page that is always zero: used + * for zero-mapped memory areas etc.. + */ +extern unsigned long empty_zero_page[PAGE_SIZE/sizeof(unsigned long)]; +#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page)) + +extern pgd_t swapper_pg_dir[]; +extern pgprot_t swapper_pgprot; +extern struct kmem_cache *pgd_cache; +extern spinlock_t pgd_lock; +extern struct list_head pgd_list; + +/* + * The very last slots in the pgd_t are for addresses unusable by Linux + * (pgd_addr_invalid() returns true). So we use them for the list structure. + * The x86 code we are modelled on uses the page->private/index fields + * (older 2.6 kernels) or the lru list (newer 2.6 kernels), but since + * our pgds are so much smaller than a page, it seems a waste to + * spend a whole page on each pgd. + */ +#define PGD_LIST_OFFSET \ + ((PTRS_PER_PGD * sizeof(pgd_t)) - sizeof(struct list_head)) +#define pgd_to_list(pgd) \ + ((struct list_head *)((char *)(pgd) + PGD_LIST_OFFSET)) +#define list_to_pgd(list) \ + ((pgd_t *)((char *)(list) - PGD_LIST_OFFSET)) + +extern void pgtable_cache_init(void); +extern void paging_init(void); +extern void set_page_homes(void); + +#define FIRST_USER_ADDRESS 0 + +#define _PAGE_PRESENT HV_PTE_PRESENT +#define _PAGE_HUGE_PAGE HV_PTE_PAGE +#define _PAGE_READABLE HV_PTE_READABLE +#define _PAGE_WRITABLE HV_PTE_WRITABLE +#define _PAGE_EXECUTABLE HV_PTE_EXECUTABLE +#define _PAGE_ACCESSED HV_PTE_ACCESSED +#define _PAGE_DIRTY HV_PTE_DIRTY +#define _PAGE_GLOBAL HV_PTE_GLOBAL +#define _PAGE_USER HV_PTE_USER + +/* + * All the "standard" bits. Cache-control bits are managed elsewhere. + * This is used to test for valid level-2 page table pointers by checking + * all the bits, and to mask away the cache control bits for mprotect. + */ +#define _PAGE_ALL (\ + _PAGE_PRESENT | \ + _PAGE_HUGE_PAGE | \ + _PAGE_READABLE | \ + _PAGE_WRITABLE | \ + _PAGE_EXECUTABLE | \ + _PAGE_ACCESSED | \ + _PAGE_DIRTY | \ + _PAGE_GLOBAL | \ + _PAGE_USER \ +) + +#define PAGE_NONE \ + __pgprot(_PAGE_PRESENT | _PAGE_ACCESSED) +#define PAGE_SHARED \ + __pgprot(_PAGE_PRESENT | _PAGE_READABLE | _PAGE_WRITABLE | \ + _PAGE_USER | _PAGE_ACCESSED) + +#define PAGE_SHARED_EXEC \ + __pgprot(_PAGE_PRESENT | _PAGE_READABLE | _PAGE_WRITABLE | \ + _PAGE_EXECUTABLE | _PAGE_USER | _PAGE_ACCESSED) +#define PAGE_COPY_NOEXEC \ + __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_READABLE) +#define PAGE_COPY_EXEC \ + __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | \ + _PAGE_READABLE | _PAGE_EXECUTABLE) +#define PAGE_COPY \ + PAGE_COPY_NOEXEC +#define PAGE_READONLY \ + __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_READABLE) +#define PAGE_READONLY_EXEC \ + __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | \ + _PAGE_READABLE | _PAGE_EXECUTABLE) + +#define _PAGE_KERNEL_RO \ + (_PAGE_PRESENT | _PAGE_GLOBAL | _PAGE_READABLE | _PAGE_ACCESSED) +#define _PAGE_KERNEL \ + (_PAGE_KERNEL_RO | _PAGE_WRITABLE | _PAGE_DIRTY) +#define _PAGE_KERNEL_EXEC (_PAGE_KERNEL_RO | _PAGE_EXECUTABLE) + +#define PAGE_KERNEL __pgprot(_PAGE_KERNEL) +#define PAGE_KERNEL_RO __pgprot(_PAGE_KERNEL_RO) +#define PAGE_KERNEL_EXEC __pgprot(_PAGE_KERNEL_EXEC) + +#define page_to_kpgprot(p) PAGE_KERNEL + +/* + * We could tighten these up, but for now writable or executable + * implies readable. + */ +#define __P000 PAGE_NONE +#define __P001 PAGE_READONLY +#define __P010 PAGE_COPY /* this is write-only, which we won't support */ +#define __P011 PAGE_COPY +#define __P100 PAGE_READONLY_EXEC +#define __P101 PAGE_READONLY_EXEC +#define __P110 PAGE_COPY_EXEC +#define __P111 PAGE_COPY_EXEC + +#define __S000 PAGE_NONE +#define __S001 PAGE_READONLY +#define __S010 PAGE_SHARED +#define __S011 PAGE_SHARED +#define __S100 PAGE_READONLY_EXEC +#define __S101 PAGE_READONLY_EXEC +#define __S110 PAGE_SHARED_EXEC +#define __S111 PAGE_SHARED_EXEC + +/* + * All the normal _PAGE_ALL bits are ignored for PMDs, except PAGE_PRESENT + * and PAGE_HUGE_PAGE, which must be one and zero, respectively. + * We set the ignored bits to zero. + */ +#define _PAGE_TABLE _PAGE_PRESENT + +/* Inherit the caching flags from the old protection bits. */ +#define pgprot_modify(oldprot, newprot) \ + (pgprot_t) { ((oldprot).val & ~_PAGE_ALL) | (newprot).val } + +/* Just setting the PFN to zero suffices. */ +#define pte_pgprot(x) hv_pte_set_pfn((x), 0) + +/* + * For PTEs and PDEs, we must clear the Present bit first when + * clearing a page table entry, so clear the bottom half first and + * enforce ordering with a barrier. + */ +static inline void __pte_clear(pte_t *ptep) +{ +#ifdef __tilegx__ + ptep->val = 0; +#else + u32 *tmp = (u32 *)ptep; + tmp[0] = 0; + barrier(); + tmp[1] = 0; +#endif +} +#define pte_clear(mm, addr, ptep) __pte_clear(ptep) + +/* + * The following only work if pte_present() is true. + * Undefined behaviour if not.. + */ +#define pte_present hv_pte_get_present +#define pte_user hv_pte_get_user +#define pte_read hv_pte_get_readable +#define pte_dirty hv_pte_get_dirty +#define pte_young hv_pte_get_accessed +#define pte_write hv_pte_get_writable +#define pte_exec hv_pte_get_executable +#define pte_huge hv_pte_get_page +#define pte_rdprotect hv_pte_clear_readable +#define pte_exprotect hv_pte_clear_executable +#define pte_mkclean hv_pte_clear_dirty +#define pte_mkold hv_pte_clear_accessed +#define pte_wrprotect hv_pte_clear_writable +#define pte_mksmall hv_pte_clear_page +#define pte_mkread hv_pte_set_readable +#define pte_mkexec hv_pte_set_executable +#define pte_mkdirty hv_pte_set_dirty +#define pte_mkyoung hv_pte_set_accessed +#define pte_mkwrite hv_pte_set_writable +#define pte_mkhuge hv_pte_set_page + +#define pte_special(pte) 0 +#define pte_mkspecial(pte) (pte) + +/* + * Use some spare bits in the PTE for user-caching tags. + */ +#define pte_set_forcecache hv_pte_set_client0 +#define pte_get_forcecache hv_pte_get_client0 +#define pte_clear_forcecache hv_pte_clear_client0 +#define pte_set_anyhome hv_pte_set_client1 +#define pte_get_anyhome hv_pte_get_client1 +#define pte_clear_anyhome hv_pte_clear_client1 + +/* + * A migrating PTE has PAGE_PRESENT clear but all the other bits preserved. + */ +#define pte_migrating hv_pte_get_migrating +#define pte_mkmigrate(x) hv_pte_set_migrating(hv_pte_clear_present(x)) +#define pte_donemigrate(x) hv_pte_set_present(hv_pte_clear_migrating(x)) + +#define pte_ERROR(e) \ + pr_err("%s:%d: bad pte 0x%016llx.\n", __FILE__, __LINE__, pte_val(e)) +#define pgd_ERROR(e) \ + pr_err("%s:%d: bad pgd 0x%016llx.\n", __FILE__, __LINE__, pgd_val(e)) + +/* Return PA and protection info for a given kernel VA. */ +int va_to_cpa_and_pte(void *va, phys_addr_t *cpa, pte_t *pte); + +/* + * __set_pte() ensures we write the 64-bit PTE with 32-bit words in + * the right order on 32-bit platforms and also allows us to write + * hooks to check valid PTEs, etc., if we want. + */ +void __set_pte(pte_t *ptep, pte_t pte); + +/* + * set_pte() sets the given PTE and also sanity-checks the + * requested PTE against the page homecaching. Unspecified parts + * of the PTE are filled in when it is written to memory, i.e. all + * caching attributes if "!forcecache", or the home cpu if "anyhome". + */ +extern void set_pte(pte_t *ptep, pte_t pte); +#define set_pte_at(mm, addr, ptep, pteval) set_pte(ptep, pteval) +#define set_pte_atomic(pteptr, pteval) set_pte(pteptr, pteval) + +#define pte_page(x) pfn_to_page(pte_pfn(x)) + +static inline int pte_none(pte_t pte) +{ + return !pte.val; +} + +static inline unsigned long pte_pfn(pte_t pte) +{ + return hv_pte_get_pfn(pte); +} + +/* Set or get the remote cache cpu in a pgprot with remote caching. */ +extern pgprot_t set_remote_cache_cpu(pgprot_t prot, int cpu); +extern int get_remote_cache_cpu(pgprot_t prot); + +static inline pte_t pfn_pte(unsigned long pfn, pgprot_t prot) +{ + return hv_pte_set_pfn(prot, pfn); +} + +/* Support for priority mappings. */ +extern void start_mm_caching(struct mm_struct *mm); +extern void check_mm_caching(struct mm_struct *prev, struct mm_struct *next); + +/* + * Support non-linear file mappings (see sys_remap_file_pages). + * This is defined by CLIENT1 set but CLIENT0 and _PAGE_PRESENT clear, and the + * file offset in the 32 high bits. + */ +#define _PAGE_FILE HV_PTE_CLIENT1 +#define PTE_FILE_MAX_BITS 32 +#define pte_file(pte) (hv_pte_get_client1(pte) && !hv_pte_get_client0(pte)) +#define pte_to_pgoff(pte) ((pte).val >> 32) +#define pgoff_to_pte(off) ((pte_t) { (((long long)(off)) << 32) | _PAGE_FILE }) + +/* + * Encode and de-code a swap entry (see <linux/swapops.h>). + * We put the swap file type+offset in the 32 high bits; + * I believe we can just leave the low bits clear. + */ +#define __swp_type(swp) ((swp).val & 0x1f) +#define __swp_offset(swp) ((swp).val >> 5) +#define __swp_entry(type, off) ((swp_entry_t) { (type) | ((off) << 5) }) +#define __pte_to_swp_entry(pte) ((swp_entry_t) { (pte).val >> 32 }) +#define __swp_entry_to_pte(swp) ((pte_t) { (((long long) ((swp).val)) << 32) }) + +/* + * Conversion functions: convert a page and protection to a page entry, + * and a page entry and page directory to the page they refer to. + */ + +#define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot)) + +/* + * If we are doing an mprotect(), just accept the new vma->vm_page_prot + * value and combine it with the PFN from the old PTE to get a new PTE. + */ +static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) +{ + return pfn_pte(hv_pte_get_pfn(pte), newprot); +} + +/* + * The pgd page can be thought of an array like this: pgd_t[PTRS_PER_PGD] + * + * This macro returns the index of the entry in the pgd page which would + * control the given virtual address. + */ +#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1)) + +/* + * pgd_offset() returns a (pgd_t *) + * pgd_index() is used get the offset into the pgd page's array of pgd_t's. + */ +#define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address)) + +/* + * A shortcut which implies the use of the kernel's pgd, instead + * of a process's. + */ +#define pgd_offset_k(address) pgd_offset(&init_mm, address) + +#if defined(CONFIG_HIGHPTE) +extern pte_t *pte_offset_map(pmd_t *, unsigned long address); +#define pte_unmap(pte) kunmap_atomic(pte) +#else +#define pte_offset_map(dir, address) pte_offset_kernel(dir, address) +#define pte_unmap(pte) do { } while (0) +#endif + +/* Clear a non-executable kernel PTE and flush it from the TLB. */ +#define kpte_clear_flush(ptep, vaddr) \ +do { \ + pte_clear(&init_mm, (vaddr), (ptep)); \ + local_flush_tlb_page(FLUSH_NONEXEC, (vaddr), PAGE_SIZE); \ +} while (0) + +/* + * The kernel page tables contain what we need, and we flush when we + * change specific page table entries. + */ +#define update_mmu_cache(vma, address, pte) do { } while (0) + +#ifdef CONFIG_FLATMEM +#define kern_addr_valid(addr) (1) +#endif /* CONFIG_FLATMEM */ + +#define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \ + remap_pfn_range(vma, vaddr, pfn, size, prot) + +extern void vmalloc_sync_all(void); + +#endif /* !__ASSEMBLY__ */ + +#ifdef __tilegx__ +#include <asm/pgtable_64.h> +#else +#include <asm/pgtable_32.h> +#endif + +#ifndef __ASSEMBLY__ + +static inline int pmd_none(pmd_t pmd) +{ + /* + * Only check low word on 32-bit platforms, since it might be + * out of sync with upper half. + */ + return (unsigned long)pmd_val(pmd) == 0; +} + +static inline int pmd_present(pmd_t pmd) +{ + return pmd_val(pmd) & _PAGE_PRESENT; +} + +static inline int pmd_bad(pmd_t pmd) +{ + return ((pmd_val(pmd) & _PAGE_ALL) != _PAGE_TABLE); +} + +static inline unsigned long pages_to_mb(unsigned long npg) +{ + return npg >> (20 - PAGE_SHIFT); +} + +/* + * The pmd can be thought of an array like this: pmd_t[PTRS_PER_PMD] + * + * This function returns the index of the entry in the pmd which would + * control the given virtual address. + */ +static inline unsigned long pmd_index(unsigned long address) +{ + return (address >> PMD_SHIFT) & (PTRS_PER_PMD - 1); +} + +/* + * A given kernel pmd_t maps to a specific virtual address (either a + * kernel huge page or a kernel pte_t table). Since kernel pte_t + * tables can be aligned at sub-page granularity, this function can + * return non-page-aligned pointers, despite its name. + */ +static inline unsigned long pmd_page_vaddr(pmd_t pmd) +{ + phys_addr_t pa = + (phys_addr_t)pmd_ptfn(pmd) << HV_LOG2_PAGE_TABLE_ALIGN; + return (unsigned long)__va(pa); +} + +/* + * A pmd_t points to the base of a huge page or to a pte_t array. + * If a pte_t array, since we can have multiple per page, we don't + * have a one-to-one mapping of pmd_t's to pages. However, this is + * OK for pte_lockptr(), since we just end up with potentially one + * lock being used for several pte_t arrays. + */ +#define pmd_page(pmd) pfn_to_page(HV_PTFN_TO_PFN(pmd_ptfn(pmd))) + +/* + * The pte page can be thought of an array like this: pte_t[PTRS_PER_PTE] + * + * This macro returns the index of the entry in the pte page which would + * control the given virtual address. + */ +static inline unsigned long pte_index(unsigned long address) +{ + return (address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1); +} + +static inline pte_t *pte_offset_kernel(pmd_t *pmd, unsigned long address) +{ + return (pte_t *)pmd_page_vaddr(*pmd) + pte_index(address); +} + +static inline int pmd_huge_page(pmd_t pmd) +{ + return pmd_val(pmd) & _PAGE_HUGE_PAGE; +} + +#include <asm-generic/pgtable.h> + +/* Support /proc/NN/pgtable API. */ +struct seq_file; +int arch_proc_pgtable_show(struct seq_file *m, struct mm_struct *mm, + unsigned long vaddr, pte_t *ptep, void **datap); + +#endif /* !__ASSEMBLY__ */ + +#endif /* _ASM_TILE_PGTABLE_H */ diff --git a/arch/tile/include/asm/pgtable_32.h b/arch/tile/include/asm/pgtable_32.h new file mode 100644 index 00000000..9f985297 --- /dev/null +++ b/arch/tile/include/asm/pgtable_32.h @@ -0,0 +1,135 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + */ + +#ifndef _ASM_TILE_PGTABLE_32_H +#define _ASM_TILE_PGTABLE_32_H + +/* + * The level-1 index is defined by the huge page size. A PGD is composed + * of PTRS_PER_PGD pgd_t's and is the top level of the page table. + */ +#define PGDIR_SHIFT HV_LOG2_PAGE_SIZE_LARGE +#define PGDIR_SIZE HV_PAGE_SIZE_LARGE +#define PGDIR_MASK (~(PGDIR_SIZE-1)) +#define PTRS_PER_PGD (1 << (32 - PGDIR_SHIFT)) +#define SIZEOF_PGD (PTRS_PER_PGD * sizeof(pgd_t)) + +/* + * The level-2 index is defined by the difference between the huge + * page size and the normal page size. A PTE is composed of + * PTRS_PER_PTE pte_t's and is the bottom level of the page table. + * Note that the hypervisor docs use PTE for what we call pte_t, so + * this nomenclature is somewhat confusing. + */ +#define PTRS_PER_PTE (1 << (HV_LOG2_PAGE_SIZE_LARGE - HV_LOG2_PAGE_SIZE_SMALL)) +#define SIZEOF_PTE (PTRS_PER_PTE * sizeof(pte_t)) + +#ifndef __ASSEMBLY__ + +/* + * Right now we initialize only a single pte table. It can be extended + * easily, subsequent pte tables have to be allocated in one physical + * chunk of RAM. + * + * HOWEVER, if we are using an allocation scheme with slop after the + * end of the page table (e.g. where our L2 page tables are 2KB but + * our pages are 64KB and we are allocating via the page allocator) + * we can't extend it easily. + */ +#define LAST_PKMAP PTRS_PER_PTE + +#define PKMAP_BASE ((FIXADDR_BOOT_START - PAGE_SIZE*LAST_PKMAP) & PGDIR_MASK) + +#ifdef CONFIG_HIGHMEM +# define __VMAPPING_END (PKMAP_BASE & ~(HPAGE_SIZE-1)) +#else +# define __VMAPPING_END (FIXADDR_START & ~(HPAGE_SIZE-1)) +#endif + +#ifdef CONFIG_HUGEVMAP +#define HUGE_VMAP_END __VMAPPING_END +#define HUGE_VMAP_BASE (HUGE_VMAP_END - CONFIG_NR_HUGE_VMAPS * HPAGE_SIZE) +#define _VMALLOC_END HUGE_VMAP_BASE +#else +#define _VMALLOC_END __VMAPPING_END +#endif + +/* + * Align the vmalloc area to an L2 page table, and leave a guard page + * at the beginning and end. The vmalloc code also puts in an internal + * guard page between each allocation. + */ +#define VMALLOC_END (_VMALLOC_END - PAGE_SIZE) +extern unsigned long VMALLOC_RESERVE /* = CONFIG_VMALLOC_RESERVE */; +#define _VMALLOC_START (_VMALLOC_END - VMALLOC_RESERVE) +#define VMALLOC_START (_VMALLOC_START + PAGE_SIZE) + +/* This is the maximum possible amount of lowmem. */ +#define MAXMEM (_VMALLOC_START - PAGE_OFFSET) + +/* We have no pmd or pud since we are strictly a two-level page table */ +#include <asm-generic/pgtable-nopmd.h> + +/* We don't define any pgds for these addresses. */ +static inline int pgd_addr_invalid(unsigned long addr) +{ + return addr >= MEM_HV_INTRPT; +} + +/* + * Provide versions of these routines that can be used safely when + * the hypervisor may be asynchronously modifying dirty/accessed bits. + * ptep_get_and_clear() matches the generic one but we provide it to + * be parallel with the 64-bit code. + */ +#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG +#define __HAVE_ARCH_PTEP_SET_WRPROTECT + +extern int ptep_test_and_clear_young(struct vm_area_struct *, + unsigned long addr, pte_t *); +extern void ptep_set_wrprotect(struct mm_struct *, + unsigned long addr, pte_t *); + +#define __HAVE_ARCH_PTEP_GET_AND_CLEAR +static inline pte_t ptep_get_and_clear(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + pte_t pte = *ptep; + pte_clear(_mm, addr, ptep); + return pte; +} + +static inline void __set_pmd(pmd_t *pmdp, pmd_t pmdval) +{ + set_pte(&pmdp->pud.pgd, pmdval.pud.pgd); +} + +/* Create a pmd from a PTFN. */ +static inline pmd_t ptfn_pmd(unsigned long ptfn, pgprot_t prot) +{ + return (pmd_t){ { hv_pte_set_ptfn(prot, ptfn) } }; +} + +/* Return the page-table frame number (ptfn) that a pmd_t points at. */ +#define pmd_ptfn(pmd) hv_pte_get_ptfn((pmd).pud.pgd) + +static inline void pmd_clear(pmd_t *pmdp) +{ + __pte_clear(&pmdp->pud.pgd); +} + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_TILE_PGTABLE_32_H */ diff --git a/arch/tile/include/asm/pgtable_64.h b/arch/tile/include/asm/pgtable_64.h new file mode 100644 index 00000000..fd803285 --- /dev/null +++ b/arch/tile/include/asm/pgtable_64.h @@ -0,0 +1,175 @@ +/* + * Copyright 2011 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + */ + +#ifndef _ASM_TILE_PGTABLE_64_H +#define _ASM_TILE_PGTABLE_64_H + +/* The level-0 page table breaks the address space into 32-bit chunks. */ +#define PGDIR_SHIFT HV_LOG2_L1_SPAN +#define PGDIR_SIZE HV_L1_SPAN +#define PGDIR_MASK (~(PGDIR_SIZE-1)) +#define PTRS_PER_PGD HV_L0_ENTRIES +#define SIZEOF_PGD (PTRS_PER_PGD * sizeof(pgd_t)) + +/* + * The level-1 index is defined by the huge page size. A PMD is composed + * of PTRS_PER_PMD pgd_t's and is the middle level of the page table. + */ +#define PMD_SHIFT HV_LOG2_PAGE_SIZE_LARGE +#define PMD_SIZE HV_PAGE_SIZE_LARGE +#define PMD_MASK (~(PMD_SIZE-1)) +#define PTRS_PER_PMD (1 << (PGDIR_SHIFT - PMD_SHIFT)) +#define SIZEOF_PMD (PTRS_PER_PMD * sizeof(pmd_t)) + +/* + * The level-2 index is defined by the difference between the huge + * page size and the normal page size. A PTE is composed of + * PTRS_PER_PTE pte_t's and is the bottom level of the page table. + * Note that the hypervisor docs use PTE for what we call pte_t, so + * this nomenclature is somewhat confusing. + */ +#define PTRS_PER_PTE (1 << (HV_LOG2_PAGE_SIZE_LARGE - HV_LOG2_PAGE_SIZE_SMALL)) +#define SIZEOF_PTE (PTRS_PER_PTE * sizeof(pte_t)) + +/* + * Align the vmalloc area to an L2 page table, and leave a guard page + * at the beginning and end. The vmalloc code also puts in an internal + * guard page between each allocation. + */ +#define _VMALLOC_END HUGE_VMAP_BASE +#define VMALLOC_END (_VMALLOC_END - PAGE_SIZE) +#define VMALLOC_START (_VMALLOC_START + PAGE_SIZE) + +#define HUGE_VMAP_END (HUGE_VMAP_BASE + PGDIR_SIZE) + +#ifndef __ASSEMBLY__ + +/* We have no pud since we are a three-level page table. */ +#include <asm-generic/pgtable-nopud.h> + +static inline int pud_none(pud_t pud) +{ + return pud_val(pud) == 0; +} + +static inline int pud_present(pud_t pud) +{ + return pud_val(pud) & _PAGE_PRESENT; +} + +#define pmd_ERROR(e) \ + pr_err("%s:%d: bad pmd 0x%016llx.\n", __FILE__, __LINE__, pmd_val(e)) + +static inline void pud_clear(pud_t *pudp) +{ + __pte_clear(&pudp->pgd); +} + +static inline int pud_bad(pud_t pud) +{ + return ((pud_val(pud) & _PAGE_ALL) != _PAGE_TABLE); +} + +/* Return the page-table frame number (ptfn) that a pud_t points at. */ +#define pud_ptfn(pud) hv_pte_get_ptfn((pud).pgd) + +/* + * A given kernel pud_t maps to a kernel pmd_t table at a specific + * virtual address. Since kernel pmd_t tables can be aligned at + * sub-page granularity, this macro can return non-page-aligned + * pointers, despite its name. + */ +#define pud_page_vaddr(pud) \ + (__va((phys_addr_t)pud_ptfn(pud) << HV_LOG2_PAGE_TABLE_ALIGN)) + +/* + * A pud_t points to a pmd_t array. Since we can have multiple per + * page, we don't have a one-to-one mapping of pud_t's to pages. + */ +#define pud_page(pud) pfn_to_page(HV_PTFN_TO_PFN(pud_ptfn(pud))) + +static inline unsigned long pud_index(unsigned long address) +{ + return (address >> PUD_SHIFT) & (PTRS_PER_PUD - 1); +} + +#define pmd_offset(pud, address) \ + ((pmd_t *)pud_page_vaddr(*(pud)) + pmd_index(address)) + +static inline void __set_pmd(pmd_t *pmdp, pmd_t pmdval) +{ + set_pte(pmdp, pmdval); +} + +/* Create a pmd from a PTFN and pgprot. */ +static inline pmd_t ptfn_pmd(unsigned long ptfn, pgprot_t prot) +{ + return hv_pte_set_ptfn(prot, ptfn); +} + +/* Return the page-table frame number (ptfn) that a pmd_t points at. */ +static inline unsigned long pmd_ptfn(pmd_t pmd) +{ + return hv_pte_get_ptfn(pmd); +} + +static inline void pmd_clear(pmd_t *pmdp) +{ + __pte_clear(pmdp); +} + +/* Normalize an address to having the correct high bits set. */ +#define pgd_addr_normalize pgd_addr_normalize +static inline unsigned long pgd_addr_normalize(unsigned long addr) +{ + return ((long)addr << (CHIP_WORD_SIZE() - CHIP_VA_WIDTH())) >> + (CHIP_WORD_SIZE() - CHIP_VA_WIDTH()); +} + +/* We don't define any pgds for these addresses. */ +static inline int pgd_addr_invalid(unsigned long addr) +{ + return addr >= MEM_HV_START || + (addr > MEM_LOW_END && addr < MEM_HIGH_START); +} + +/* + * Use atomic instructions to provide atomicity against the hypervisor. + */ +#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG +static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep) +{ + return (__insn_fetchand(&ptep->val, ~HV_PTE_ACCESSED) >> + HV_PTE_INDEX_ACCESSED) & 0x1; +} + +#define __HAVE_ARCH_PTEP_SET_WRPROTECT +static inline void ptep_set_wrprotect(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + __insn_fetchand(&ptep->val, ~HV_PTE_WRITABLE); +} + +#define __HAVE_ARCH_PTEP_GET_AND_CLEAR +static inline pte_t ptep_get_and_clear(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + return hv_pte(__insn_exch(&ptep->val, 0UL)); +} + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_TILE_PGTABLE_64_H */ diff --git a/arch/tile/include/asm/processor.h b/arch/tile/include/asm/processor.h new file mode 100644 index 00000000..34c1e01f --- /dev/null +++ b/arch/tile/include/asm/processor.h @@ -0,0 +1,357 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef _ASM_TILE_PROCESSOR_H +#define _ASM_TILE_PROCESSOR_H + +#ifndef __ASSEMBLY__ + +/* + * NOTE: we don't include <linux/ptrace.h> or <linux/percpu.h> as one + * normally would, due to #include dependencies. + */ +#include <linux/types.h> +#include <asm/ptrace.h> +#include <asm/percpu.h> + +#include <arch/chip.h> +#include <arch/spr_def.h> + +struct task_struct; +struct thread_struct; + +typedef struct { + unsigned long seg; +} mm_segment_t; + +/* + * Default implementation of macro that returns current + * instruction pointer ("program counter"). + */ +void *current_text_addr(void); + +#if CHIP_HAS_TILE_DMA() +/* Capture the state of a suspended DMA. */ +struct tile_dma_state { + int enabled; + unsigned long src; + unsigned long dest; + unsigned long strides; + unsigned long chunk_size; + unsigned long src_chunk; + unsigned long dest_chunk; + unsigned long byte; + unsigned long status; +}; + +/* + * A mask of the DMA status register for selecting only the 'running' + * and 'done' bits. + */ +#define DMA_STATUS_MASK \ + (SPR_DMA_STATUS__RUNNING_MASK | SPR_DMA_STATUS__DONE_MASK) +#endif + +/* + * Track asynchronous TLB events (faults and access violations) + * that occur while we are in kernel mode from DMA or the SN processor. + */ +struct async_tlb { + short fault_num; /* original fault number; 0 if none */ + char is_fault; /* was it a fault (vs an access violation) */ + char is_write; /* for fault: was it caused by a write? */ + unsigned long address; /* what address faulted? */ +}; + +#ifdef CONFIG_HARDWALL +struct hardwall_info; +#endif + +struct thread_struct { + /* kernel stack pointer */ + unsigned long ksp; + /* kernel PC */ + unsigned long pc; + /* starting user stack pointer (for page migration) */ + unsigned long usp0; + /* pid of process that created this one */ + pid_t creator_pid; +#if CHIP_HAS_TILE_DMA() + /* DMA info for suspended threads (byte == 0 means no DMA state) */ + struct tile_dma_state tile_dma_state; +#endif + /* User EX_CONTEXT registers */ + unsigned long ex_context[2]; + /* User SYSTEM_SAVE registers */ + unsigned long system_save[4]; + /* User interrupt mask */ + unsigned long long interrupt_mask; + /* User interrupt-control 0 state */ + unsigned long intctrl_0; +#if CHIP_HAS_PROC_STATUS_SPR() + /* Any other miscellaneous processor state bits */ + unsigned long proc_status; +#endif +#if !CHIP_HAS_FIXED_INTVEC_BASE() + /* Interrupt base for PL0 interrupts */ + unsigned long interrupt_vector_base; +#endif +#if CHIP_HAS_TILE_RTF_HWM() + /* Tile cache retry fifo high-water mark */ + unsigned long tile_rtf_hwm; +#endif +#if CHIP_HAS_DSTREAM_PF() + /* Data stream prefetch control */ + unsigned long dstream_pf; +#endif +#ifdef CONFIG_HARDWALL + /* Is this task tied to an activated hardwall? */ + struct hardwall_info *hardwall; + /* Chains this task into the list at hardwall->list. */ + struct list_head hardwall_list; +#endif +#if CHIP_HAS_TILE_DMA() + /* Async DMA TLB fault information */ + struct async_tlb dma_async_tlb; +#endif +#if CHIP_HAS_SN_PROC() + /* Was static network processor when we were switched out? */ + int sn_proc_running; + /* Async SNI TLB fault information */ + struct async_tlb sn_async_tlb; +#endif +}; + +#endif /* !__ASSEMBLY__ */ + +/* + * Start with "sp" this many bytes below the top of the kernel stack. + * This preserves the invariant that a called function may write to *sp. + */ +#define STACK_TOP_DELTA 8 + +/* + * When entering the kernel via a fault, start with the top of the + * pt_regs structure this many bytes below the top of the page. + * This aligns the pt_regs structure optimally for cache-line access. + */ +#ifdef __tilegx__ +#define KSTK_PTREGS_GAP 48 +#else +#define KSTK_PTREGS_GAP 56 +#endif + +#ifndef __ASSEMBLY__ + +#ifdef __tilegx__ +#define TASK_SIZE_MAX (MEM_LOW_END + 1) +#else +#define TASK_SIZE_MAX PAGE_OFFSET +#endif + +/* TASK_SIZE and related variables are always checked in "current" context. */ +#ifdef CONFIG_COMPAT +#define COMPAT_TASK_SIZE (1UL << 31) +#define TASK_SIZE ((current_thread_info()->status & TS_COMPAT) ?\ + COMPAT_TASK_SIZE : TASK_SIZE_MAX) +#else +#define TASK_SIZE TASK_SIZE_MAX +#endif + +/* We provide a minimal "vdso" a la x86; just the sigreturn code for now. */ +#define VDSO_BASE (TASK_SIZE - PAGE_SIZE) + +#define STACK_TOP VDSO_BASE + +/* STACK_TOP_MAX is used temporarily in execve and should not check COMPAT. */ +#define STACK_TOP_MAX TASK_SIZE_MAX + +/* + * This decides where the kernel will search for a free chunk of vm + * space during mmap's, if it is using bottom-up mapping. + */ +#define TASK_UNMAPPED_BASE (PAGE_ALIGN(TASK_SIZE / 3)) + +#define HAVE_ARCH_PICK_MMAP_LAYOUT + +#define INIT_THREAD { \ + .ksp = (unsigned long)init_stack + THREAD_SIZE - STACK_TOP_DELTA, \ + .interrupt_mask = -1ULL \ +} + +/* Kernel stack top for the task that first boots on this cpu. */ +DECLARE_PER_CPU(unsigned long, boot_sp); + +/* PC to boot from on this cpu. */ +DECLARE_PER_CPU(unsigned long, boot_pc); + +/* Do necessary setup to start up a newly executed thread. */ +static inline void start_thread(struct pt_regs *regs, + unsigned long pc, unsigned long usp) +{ + regs->pc = pc; + regs->sp = usp; +} + +/* Free all resources held by a thread. */ +static inline void release_thread(struct task_struct *dead_task) +{ + /* Nothing for now */ +} + +/* Prepare to copy thread state - unlazy all lazy status. */ +#define prepare_to_copy(tsk) do { } while (0) + +extern int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags); + +extern int do_work_pending(struct pt_regs *regs, u32 flags); + + +/* + * Return saved (kernel) PC of a blocked thread. + * Only used in a printk() in kernel/sched.c, so don't work too hard. + */ +#define thread_saved_pc(t) ((t)->thread.pc) + +unsigned long get_wchan(struct task_struct *p); + +/* Return initial ksp value for given task. */ +#define task_ksp0(task) ((unsigned long)(task)->stack + THREAD_SIZE) + +/* Return some info about the user process TASK. */ +#define KSTK_TOP(task) (task_ksp0(task) - STACK_TOP_DELTA) +#define task_pt_regs(task) \ + ((struct pt_regs *)(task_ksp0(task) - KSTK_PTREGS_GAP) - 1) +#define task_sp(task) (task_pt_regs(task)->sp) +#define task_pc(task) (task_pt_regs(task)->pc) +/* Aliases for pc and sp (used in fs/proc/array.c) */ +#define KSTK_EIP(task) task_pc(task) +#define KSTK_ESP(task) task_sp(task) + +/* Standard format for printing registers and other word-size data. */ +#ifdef __tilegx__ +# define REGFMT "0x%016lx" +#else +# define REGFMT "0x%08lx" +#endif + +/* + * Do some slow action (e.g. read a slow SPR). + * Note that this must also have compiler-barrier semantics since + * it may be used in a busy loop reading memory. + */ +static inline void cpu_relax(void) +{ + __insn_mfspr(SPR_PASS); + barrier(); +} + +/* Info on this processor (see fs/proc/cpuinfo.c) */ +struct seq_operations; +extern const struct seq_operations cpuinfo_op; + +/* Provide information about the chip model. */ +extern char chip_model[64]; + +/* Data on which physical memory controller corresponds to which NUMA node. */ +extern int node_controller[]; + +#if CHIP_HAS_CBOX_HOME_MAP() +/* Does the heap allocator return hash-for-home pages by default? */ +extern int hash_default; + +/* Should kernel stack pages be hash-for-home? */ +extern int kstack_hash; + +/* Does MAP_ANONYMOUS return hash-for-home pages by default? */ +#define uheap_hash hash_default + +#else +#define hash_default 0 +#define kstack_hash 0 +#define uheap_hash 0 +#endif + +/* Are we using huge pages in the TLB for kernel data? */ +extern int kdata_huge; + +/* Support standard Linux prefetching. */ +#define ARCH_HAS_PREFETCH +#define prefetch(x) __builtin_prefetch(x) +#define PREFETCH_STRIDE CHIP_L2_LINE_SIZE() + +/* Bring a value into the L1D, faulting the TLB if necessary. */ +#ifdef __tilegx__ +#define prefetch_L1(x) __insn_prefetch_l1_fault((void *)(x)) +#else +#define prefetch_L1(x) __insn_prefetch_L1((void *)(x)) +#endif + +#else /* __ASSEMBLY__ */ + +/* Do some slow action (e.g. read a slow SPR). */ +#define CPU_RELAX mfspr zero, SPR_PASS + +#endif /* !__ASSEMBLY__ */ + +/* Assembly code assumes that the PL is in the low bits. */ +#if SPR_EX_CONTEXT_1_1__PL_SHIFT != 0 +# error Fix assembly assumptions about PL +#endif + +/* We sometimes use these macros for EX_CONTEXT_0_1 as well. */ +#if SPR_EX_CONTEXT_1_1__PL_SHIFT != SPR_EX_CONTEXT_0_1__PL_SHIFT || \ + SPR_EX_CONTEXT_1_1__PL_RMASK != SPR_EX_CONTEXT_0_1__PL_RMASK || \ + SPR_EX_CONTEXT_1_1__ICS_SHIFT != SPR_EX_CONTEXT_0_1__ICS_SHIFT || \ + SPR_EX_CONTEXT_1_1__ICS_RMASK != SPR_EX_CONTEXT_0_1__ICS_RMASK +# error Fix assumptions that EX1 macros work for both PL0 and PL1 +#endif + +/* Allow pulling apart and recombining the PL and ICS bits in EX_CONTEXT. */ +#define EX1_PL(ex1) \ + (((ex1) >> SPR_EX_CONTEXT_1_1__PL_SHIFT) & SPR_EX_CONTEXT_1_1__PL_RMASK) +#define EX1_ICS(ex1) \ + (((ex1) >> SPR_EX_CONTEXT_1_1__ICS_SHIFT) & SPR_EX_CONTEXT_1_1__ICS_RMASK) +#define PL_ICS_EX1(pl, ics) \ + (((pl) << SPR_EX_CONTEXT_1_1__PL_SHIFT) | \ + ((ics) << SPR_EX_CONTEXT_1_1__ICS_SHIFT)) + +/* + * Provide symbolic constants for PLs. + * Note that assembly code assumes that USER_PL is zero. + */ +#define USER_PL 0 +#if CONFIG_KERNEL_PL == 2 +#define GUEST_PL 1 +#endif +#define KERNEL_PL CONFIG_KERNEL_PL + +/* SYSTEM_SAVE_K_0 holds the current cpu number ORed with ksp0. */ +#define CPU_LOG_MASK_VALUE 12 +#define CPU_MASK_VALUE ((1 << CPU_LOG_MASK_VALUE) - 1) +#if CONFIG_NR_CPUS > CPU_MASK_VALUE +# error Too many cpus! +#endif +#define raw_smp_processor_id() \ + ((int)__insn_mfspr(SPR_SYSTEM_SAVE_K_0) & CPU_MASK_VALUE) +#define get_current_ksp0() \ + (__insn_mfspr(SPR_SYSTEM_SAVE_K_0) & ~CPU_MASK_VALUE) +#define next_current_ksp0(task) ({ \ + unsigned long __ksp0 = task_ksp0(task); \ + int __cpu = raw_smp_processor_id(); \ + BUG_ON(__ksp0 & CPU_MASK_VALUE); \ + __ksp0 | __cpu; \ +}) + +#endif /* _ASM_TILE_PROCESSOR_H */ diff --git a/arch/tile/include/asm/ptrace.h b/arch/tile/include/asm/ptrace.h new file mode 100644 index 00000000..c6cddd7e --- /dev/null +++ b/arch/tile/include/asm/ptrace.h @@ -0,0 +1,164 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef _ASM_TILE_PTRACE_H +#define _ASM_TILE_PTRACE_H + +#include <arch/chip.h> +#include <arch/abi.h> + +/* These must match struct pt_regs, below. */ +#if CHIP_WORD_SIZE() == 32 +#define PTREGS_OFFSET_REG(n) ((n)*4) +#else +#define PTREGS_OFFSET_REG(n) ((n)*8) +#endif +#define PTREGS_OFFSET_BASE 0 +#define PTREGS_OFFSET_TP PTREGS_OFFSET_REG(53) +#define PTREGS_OFFSET_SP PTREGS_OFFSET_REG(54) +#define PTREGS_OFFSET_LR PTREGS_OFFSET_REG(55) +#define PTREGS_NR_GPRS 56 +#define PTREGS_OFFSET_PC PTREGS_OFFSET_REG(56) +#define PTREGS_OFFSET_EX1 PTREGS_OFFSET_REG(57) +#define PTREGS_OFFSET_FAULTNUM PTREGS_OFFSET_REG(58) +#define PTREGS_OFFSET_ORIG_R0 PTREGS_OFFSET_REG(59) +#define PTREGS_OFFSET_FLAGS PTREGS_OFFSET_REG(60) +#if CHIP_HAS_CMPEXCH() +#define PTREGS_OFFSET_CMPEXCH PTREGS_OFFSET_REG(61) +#endif +#define PTREGS_SIZE PTREGS_OFFSET_REG(64) + +#ifndef __ASSEMBLY__ + +#ifdef __KERNEL__ +/* Benefit from consistent use of "long" on all chips. */ +typedef unsigned long pt_reg_t; +#else +/* Provide appropriate length type to userspace regardless of -m32/-m64. */ +typedef uint_reg_t pt_reg_t; +#endif + +/* + * This struct defines the way the registers are stored on the stack during a + * system call or exception. "struct sigcontext" has the same shape. + */ +struct pt_regs { + /* Saved main processor registers; 56..63 are special. */ + /* tp, sp, and lr must immediately follow regs[] for aliasing. */ + pt_reg_t regs[53]; + pt_reg_t tp; /* aliases regs[TREG_TP] */ + pt_reg_t sp; /* aliases regs[TREG_SP] */ + pt_reg_t lr; /* aliases regs[TREG_LR] */ + + /* Saved special registers. */ + pt_reg_t pc; /* stored in EX_CONTEXT_K_0 */ + pt_reg_t ex1; /* stored in EX_CONTEXT_K_1 (PL and ICS bit) */ + pt_reg_t faultnum; /* fault number (INT_SWINT_1 for syscall) */ + pt_reg_t orig_r0; /* r0 at syscall entry, else zero */ + pt_reg_t flags; /* flags (see below) */ +#if !CHIP_HAS_CMPEXCH() + pt_reg_t pad[3]; +#else + pt_reg_t cmpexch; /* value of CMPEXCH_VALUE SPR at interrupt */ + pt_reg_t pad[2]; +#endif +}; + +#endif /* __ASSEMBLY__ */ + +#define PTRACE_GETREGS 12 +#define PTRACE_SETREGS 13 +#define PTRACE_GETFPREGS 14 +#define PTRACE_SETFPREGS 15 + +/* Support TILE-specific ptrace options, with events starting at 16. */ +#define PTRACE_O_TRACEMIGRATE 0x00010000 +#define PTRACE_EVENT_MIGRATE 16 +#ifdef __KERNEL__ +#define PTRACE_O_MASK_TILE (PTRACE_O_TRACEMIGRATE) +#define PT_TRACE_MIGRATE 0x00080000 +#define PT_TRACE_MASK_TILE (PT_TRACE_MIGRATE) +#endif + +#ifdef __KERNEL__ + +/* Flag bits in pt_regs.flags */ +#define PT_FLAGS_DISABLE_IRQ 1 /* on return to kernel, disable irqs */ +#define PT_FLAGS_CALLER_SAVES 2 /* caller-save registers are valid */ +#define PT_FLAGS_RESTORE_REGS 4 /* restore callee-save regs on return */ + +#ifndef __ASSEMBLY__ + +#define instruction_pointer(regs) ((regs)->pc) +#define profile_pc(regs) instruction_pointer(regs) + +/* Does the process account for user or for system time? */ +#define user_mode(regs) (EX1_PL((regs)->ex1) == USER_PL) + +/* Fill in a struct pt_regs with the current kernel registers. */ +struct pt_regs *get_pt_regs(struct pt_regs *); + +/* Trace the current syscall. */ +extern void do_syscall_trace(void); + +#define arch_has_single_step() (1) + +/* + * A structure for all single-stepper state. + * + * Also update defines in assembler section if it changes + */ +struct single_step_state { + /* the page to which we will write hacked-up bundles */ + void __user *buffer; + + union { + int flags; + struct { + unsigned long is_enabled:1, update:1, update_reg:6; + }; + }; + + unsigned long orig_pc; /* the original PC */ + unsigned long next_pc; /* return PC if no branch (PC + 1) */ + unsigned long branch_next_pc; /* return PC if we did branch/jump */ + unsigned long update_value; /* value to restore to update_target */ +}; + +/* Single-step the instruction at regs->pc */ +extern void single_step_once(struct pt_regs *regs); + +/* Clean up after execve(). */ +extern void single_step_execve(void); + +struct task_struct; + +extern void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, + int error_code); + +#ifdef __tilegx__ +/* We need this since sigval_t has a user pointer in it, for GETSIGINFO etc. */ +#define __ARCH_WANT_COMPAT_SYS_PTRACE +#endif + +#endif /* !__ASSEMBLY__ */ + +#define SINGLESTEP_STATE_MASK_IS_ENABLED 0x1 +#define SINGLESTEP_STATE_MASK_UPDATE 0x2 +#define SINGLESTEP_STATE_TARGET_LB 2 +#define SINGLESTEP_STATE_TARGET_UB 7 + +#endif /* !__KERNEL__ */ + +#endif /* _ASM_TILE_PTRACE_H */ diff --git a/arch/tile/include/asm/sections.h b/arch/tile/include/asm/sections.h new file mode 100644 index 00000000..d062d463 --- /dev/null +++ b/arch/tile/include/asm/sections.h @@ -0,0 +1,44 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef _ASM_TILE_SECTIONS_H +#define _ASM_TILE_SECTIONS_H + +#define arch_is_kernel_data arch_is_kernel_data + +#include <asm-generic/sections.h> + +/* Text and data are at different areas in the kernel VA space. */ +extern char _sinitdata[], _einitdata[]; + +/* Write-once data is writable only till the end of initialization. */ +extern char __w1data_begin[], __w1data_end[]; + + +/* Not exactly sections, but PC comparison points in the code. */ +extern char __rt_sigreturn[], __rt_sigreturn_end[]; +#ifndef __tilegx__ +extern char sys_cmpxchg[], __sys_cmpxchg_end[]; +extern char __sys_cmpxchg_grab_lock[]; +extern char __start_atomic_asm_code[], __end_atomic_asm_code[]; +#endif + +/* Handle the discontiguity between _sdata and _stext. */ +static inline int arch_is_kernel_data(unsigned long addr) +{ + return addr >= (unsigned long)_sdata && + addr < (unsigned long)_end; +} + +#endif /* _ASM_TILE_SECTIONS_H */ diff --git a/arch/tile/include/asm/setup.h b/arch/tile/include/asm/setup.h new file mode 100644 index 00000000..e58613e0 --- /dev/null +++ b/arch/tile/include/asm/setup.h @@ -0,0 +1,58 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef _ASM_TILE_SETUP_H +#define _ASM_TILE_SETUP_H + +#define COMMAND_LINE_SIZE 2048 + +#ifdef __KERNEL__ + +#include <linux/pfn.h> +#include <linux/init.h> + +/* + * Reserved space for vmalloc and iomap - defined in asm/page.h + */ +#define MAXMEM_PFN PFN_DOWN(MAXMEM) + +void early_panic(const char *fmt, ...); +void warn_early_printk(void); +void __init disable_early_printk(void); + +/* Init-time routine to do tile-specific per-cpu setup. */ +void setup_cpu(int boot); + +/* User-level DMA management functions */ +void grant_dma_mpls(void); +void restrict_dma_mpls(void); + +#ifdef CONFIG_HARDWALL +/* User-level network management functions */ +void reset_network_state(void); +void grant_network_mpls(void); +void restrict_network_mpls(void); +struct task_struct; +int hardwall_deactivate(struct task_struct *task); + +/* Hook hardwall code into changes in affinity. */ +#define arch_set_cpus_allowed(p, new_mask) do { \ + if (p->thread.hardwall && !cpumask_equal(&p->cpus_allowed, new_mask)) \ + hardwall_deactivate(p); \ +} while (0) +#endif + +#endif /* __KERNEL__ */ + +#endif /* _ASM_TILE_SETUP_H */ diff --git a/arch/tile/include/asm/sigcontext.h b/arch/tile/include/asm/sigcontext.h new file mode 100644 index 00000000..6348e59d --- /dev/null +++ b/arch/tile/include/asm/sigcontext.h @@ -0,0 +1,37 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef _ASM_TILE_SIGCONTEXT_H +#define _ASM_TILE_SIGCONTEXT_H + +/* Don't pollute the namespace since <signal.h> includes this file. */ +#define __need_int_reg_t +#include <arch/abi.h> + +/* + * struct sigcontext has the same shape as struct pt_regs, + * but is simplified since we know the fault is from userspace. + */ +struct sigcontext { + __uint_reg_t gregs[53]; /* General-purpose registers. */ + __uint_reg_t tp; /* Aliases gregs[TREG_TP]. */ + __uint_reg_t sp; /* Aliases gregs[TREG_SP]. */ + __uint_reg_t lr; /* Aliases gregs[TREG_LR]. */ + __uint_reg_t pc; /* Program counter. */ + __uint_reg_t ics; /* In Interrupt Critical Section? */ + __uint_reg_t faultnum; /* Fault number. */ + __uint_reg_t pad[5]; +}; + +#endif /* _ASM_TILE_SIGCONTEXT_H */ diff --git a/arch/tile/include/asm/sigframe.h b/arch/tile/include/asm/sigframe.h new file mode 100644 index 00000000..994d3d30 --- /dev/null +++ b/arch/tile/include/asm/sigframe.h @@ -0,0 +1,33 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef _ASM_TILE_SIGFRAME_H +#define _ASM_TILE_SIGFRAME_H + +/* Indicate that syscall return should not examine r0 */ +#define INT_SWINT_1_SIGRETURN (~0) + +#ifndef __ASSEMBLY__ + +#include <arch/abi.h> + +struct rt_sigframe { + unsigned char save_area[C_ABI_SAVE_AREA_SIZE]; /* caller save area */ + struct siginfo info; + struct ucontext uc; +}; + +#endif /* !__ASSEMBLY__ */ + +#endif /* _ASM_TILE_SIGFRAME_H */ diff --git a/arch/tile/include/asm/siginfo.h b/arch/tile/include/asm/siginfo.h new file mode 100644 index 00000000..56d661bb --- /dev/null +++ b/arch/tile/include/asm/siginfo.h @@ -0,0 +1,34 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef _ASM_TILE_SIGINFO_H +#define _ASM_TILE_SIGINFO_H + +#define __ARCH_SI_TRAPNO + +#ifdef __LP64__ +# define __ARCH_SI_PREAMBLE_SIZE (4 * sizeof(int)) +#endif + +#include <asm-generic/siginfo.h> + +/* + * Additional Tile-specific SIGILL si_codes + */ +#define ILL_DBLFLT (__SI_FAULT|9) /* double fault */ +#define ILL_HARDWALL (__SI_FAULT|10) /* user networks hardwall violation */ +#undef NSIGILL +#define NSIGILL 10 + +#endif /* _ASM_TILE_SIGINFO_H */ diff --git a/arch/tile/include/asm/signal.h b/arch/tile/include/asm/signal.h new file mode 100644 index 00000000..1e5e49aa --- /dev/null +++ b/arch/tile/include/asm/signal.h @@ -0,0 +1,39 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef _ASM_TILE_SIGNAL_H +#define _ASM_TILE_SIGNAL_H + +/* Do not notify a ptracer when this signal is handled. */ +#define SA_NOPTRACE 0x02000000u + +/* Used in earlier Tilera releases, so keeping for binary compatibility. */ +#define SA_RESTORER 0x04000000u + +#include <asm-generic/signal.h> + +#if defined(__KERNEL__) +#if !defined(__ASSEMBLY__) +struct pt_regs; +int restore_sigcontext(struct pt_regs *, struct sigcontext __user *); +int setup_sigcontext(struct sigcontext __user *, struct pt_regs *); +void do_signal(struct pt_regs *regs); +void signal_fault(const char *type, struct pt_regs *, + void __user *frame, int sig); +void trace_unhandled_signal(const char *type, struct pt_regs *regs, + unsigned long address, int signo); +#endif +#endif + +#endif /* _ASM_TILE_SIGNAL_H */ diff --git a/arch/tile/include/asm/smp.h b/arch/tile/include/asm/smp.h new file mode 100644 index 00000000..1aa759ae --- /dev/null +++ b/arch/tile/include/asm/smp.h @@ -0,0 +1,140 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef _ASM_TILE_SMP_H +#define _ASM_TILE_SMP_H + +#ifdef CONFIG_SMP + +#include <asm/processor.h> +#include <linux/cpumask.h> +#include <linux/irqreturn.h> +#include <hv/hypervisor.h> + +/* Set up this tile to support receiving hypervisor messages */ +void init_messaging(void); + +/* Set up this tile to support receiving device interrupts and IPIs. */ +void init_per_tile_IRQs(void); + +/* Send a message to processors specified in mask */ +void send_IPI_many(const struct cpumask *mask, int tag); + +/* Send a message to all but the sending processor */ +void send_IPI_allbutself(int tag); + +/* Send a message to a specific processor */ +void send_IPI_single(int dest, int tag); + +/* Process an IPI message */ +void evaluate_message(int tag); + +/* Boot a secondary cpu */ +void online_secondary(void); + +/* Topology of the supervisor tile grid, and coordinates of boot processor */ +extern HV_Topology smp_topology; + +/* Accessors for grid size */ +#define smp_height (smp_topology.height) +#define smp_width (smp_topology.width) + +/* Convenience functions for converting cpu <-> coords. */ +static inline int cpu_x(int cpu) +{ + return cpu % smp_width; +} +static inline int cpu_y(int cpu) +{ + return cpu / smp_width; +} +static inline int xy_to_cpu(int x, int y) +{ + return y * smp_width + x; +} + +/* Hypervisor message tags sent via the tile send_IPI*() routines. */ +#define MSG_TAG_START_CPU 1 +#define MSG_TAG_STOP_CPU 2 +#define MSG_TAG_CALL_FUNCTION_MANY 3 +#define MSG_TAG_CALL_FUNCTION_SINGLE 4 + +/* Hook for the generic smp_call_function_many() routine. */ +static inline void arch_send_call_function_ipi_mask(struct cpumask *mask) +{ + send_IPI_many(mask, MSG_TAG_CALL_FUNCTION_MANY); +} + +/* Hook for the generic smp_call_function_single() routine. */ +static inline void arch_send_call_function_single_ipi(int cpu) +{ + send_IPI_single(cpu, MSG_TAG_CALL_FUNCTION_SINGLE); +} + +/* Print out the boot string describing which cpus were disabled. */ +void print_disabled_cpus(void); + +#else /* !CONFIG_SMP */ + +#define smp_master_cpu 0 +#define smp_height 1 +#define smp_width 1 +#define cpu_x(cpu) 0 +#define cpu_y(cpu) 0 +#define xy_to_cpu(x, y) 0 + +#endif /* !CONFIG_SMP */ + + +/* Which cpus may be used as the lotar in a page table entry. */ +extern struct cpumask cpu_lotar_map; +#define cpu_is_valid_lotar(cpu) cpumask_test_cpu((cpu), &cpu_lotar_map) + +#if CHIP_HAS_CBOX_HOME_MAP() +/* Which processors are used for hash-for-home mapping */ +extern struct cpumask hash_for_home_map; +#endif + +/* Which cpus can have their cache flushed by hv_flush_remote(). */ +extern struct cpumask cpu_cacheable_map; +#define cpu_cacheable(cpu) cpumask_test_cpu((cpu), &cpu_cacheable_map) + +/* Convert an HV_LOTAR value into a cpu. */ +static inline int hv_lotar_to_cpu(HV_LOTAR lotar) +{ + return HV_LOTAR_X(lotar) + (HV_LOTAR_Y(lotar) * smp_width); +} + +/* + * Extension of <linux/cpumask.h> functionality when you just want + * to express a mask or suppression or inclusion region without + * being too concerned about exactly which cpus are valid in that region. + */ +int bitmap_parselist_crop(const char *bp, unsigned long *maskp, int nmaskbits); + +#define cpulist_parse_crop(buf, dst) \ + __cpulist_parse_crop((buf), (dst), NR_CPUS) +static inline int __cpulist_parse_crop(const char *buf, struct cpumask *dstp, + int nbits) +{ + return bitmap_parselist_crop(buf, cpumask_bits(dstp), nbits); +} + +/* Initialize the IPI subsystem. */ +void ipi_init(void); + +/* Function for start-cpu message to cause us to jump to. */ +extern unsigned long start_cpu_function_addr; + +#endif /* _ASM_TILE_SMP_H */ diff --git a/arch/tile/include/asm/spinlock.h b/arch/tile/include/asm/spinlock.h new file mode 100644 index 00000000..1a8bd474 --- /dev/null +++ b/arch/tile/include/asm/spinlock.h @@ -0,0 +1,24 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef _ASM_TILE_SPINLOCK_H +#define _ASM_TILE_SPINLOCK_H + +#ifdef __tilegx__ +#include <asm/spinlock_64.h> +#else +#include <asm/spinlock_32.h> +#endif + +#endif /* _ASM_TILE_SPINLOCK_H */ diff --git a/arch/tile/include/asm/spinlock_32.h b/arch/tile/include/asm/spinlock_32.h new file mode 100644 index 00000000..c0a77b38 --- /dev/null +++ b/arch/tile/include/asm/spinlock_32.h @@ -0,0 +1,129 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * 32-bit SMP spinlocks. + */ + +#ifndef _ASM_TILE_SPINLOCK_32_H +#define _ASM_TILE_SPINLOCK_32_H + +#include <linux/atomic.h> +#include <asm/page.h> +#include <linux/compiler.h> + +/* + * We only use even ticket numbers so the '1' inserted by a tns is + * an unambiguous "ticket is busy" flag. + */ +#define TICKET_QUANTUM 2 + + +/* + * SMP ticket spinlocks, allowing only a single CPU anywhere + * + * (the type definitions are in asm/spinlock_types.h) + */ +static inline int arch_spin_is_locked(arch_spinlock_t *lock) +{ + /* + * Note that even if a new ticket is in the process of being + * acquired, so lock->next_ticket is 1, it's still reasonable + * to claim the lock is held, since it will be momentarily + * if not already. There's no need to wait for a "valid" + * lock->next_ticket to become available. + */ + return lock->next_ticket != lock->current_ticket; +} + +void arch_spin_lock(arch_spinlock_t *lock); + +/* We cannot take an interrupt after getting a ticket, so don't enable them. */ +#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) + +int arch_spin_trylock(arch_spinlock_t *lock); + +static inline void arch_spin_unlock(arch_spinlock_t *lock) +{ + /* For efficiency, overlap fetching the old ticket with the wmb(). */ + int old_ticket = lock->current_ticket; + wmb(); /* guarantee anything modified under the lock is visible */ + lock->current_ticket = old_ticket + TICKET_QUANTUM; +} + +void arch_spin_unlock_wait(arch_spinlock_t *lock); + +/* + * Read-write spinlocks, allowing multiple readers + * but only one writer. + * + * We use a "tns/store-back" technique on a single word to manage + * the lock state, looping around to retry if the tns returns 1. + */ + +/* Internal layout of the word; do not use. */ +#define _WR_NEXT_SHIFT 8 +#define _WR_CURR_SHIFT 16 +#define _WR_WIDTH 8 +#define _RD_COUNT_SHIFT 24 +#define _RD_COUNT_WIDTH 8 + +/** + * arch_read_can_lock() - would read_trylock() succeed? + */ +static inline int arch_read_can_lock(arch_rwlock_t *rwlock) +{ + return (rwlock->lock << _RD_COUNT_WIDTH) == 0; +} + +/** + * arch_write_can_lock() - would write_trylock() succeed? + */ +static inline int arch_write_can_lock(arch_rwlock_t *rwlock) +{ + return rwlock->lock == 0; +} + +/** + * arch_read_lock() - acquire a read lock. + */ +void arch_read_lock(arch_rwlock_t *rwlock); + +/** + * arch_write_lock() - acquire a write lock. + */ +void arch_write_lock(arch_rwlock_t *rwlock); + +/** + * arch_read_trylock() - try to acquire a read lock. + */ +int arch_read_trylock(arch_rwlock_t *rwlock); + +/** + * arch_write_trylock() - try to acquire a write lock. + */ +int arch_write_trylock(arch_rwlock_t *rwlock); + +/** + * arch_read_unlock() - release a read lock. + */ +void arch_read_unlock(arch_rwlock_t *rwlock); + +/** + * arch_write_unlock() - release a write lock. + */ +void arch_write_unlock(arch_rwlock_t *rwlock); + +#define arch_read_lock_flags(lock, flags) arch_read_lock(lock) +#define arch_write_lock_flags(lock, flags) arch_write_lock(lock) + +#endif /* _ASM_TILE_SPINLOCK_32_H */ diff --git a/arch/tile/include/asm/spinlock_64.h b/arch/tile/include/asm/spinlock_64.h new file mode 100644 index 00000000..5f8b6a09 --- /dev/null +++ b/arch/tile/include/asm/spinlock_64.h @@ -0,0 +1,161 @@ +/* + * Copyright 2011 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * 64-bit SMP ticket spinlocks, allowing only a single CPU anywhere + * (the type definitions are in asm/spinlock_types.h) + */ + +#ifndef _ASM_TILE_SPINLOCK_64_H +#define _ASM_TILE_SPINLOCK_64_H + +/* Shifts and masks for the various fields in "lock". */ +#define __ARCH_SPIN_CURRENT_SHIFT 17 +#define __ARCH_SPIN_NEXT_MASK 0x7fff +#define __ARCH_SPIN_NEXT_OVERFLOW 0x8000 + +/* + * Return the "current" portion of a ticket lock value, + * i.e. the number that currently owns the lock. + */ +static inline int arch_spin_current(u32 val) +{ + return val >> __ARCH_SPIN_CURRENT_SHIFT; +} + +/* + * Return the "next" portion of a ticket lock value, + * i.e. the number that the next task to try to acquire the lock will get. + */ +static inline int arch_spin_next(u32 val) +{ + return val & __ARCH_SPIN_NEXT_MASK; +} + +/* The lock is locked if a task would have to wait to get it. */ +static inline int arch_spin_is_locked(arch_spinlock_t *lock) +{ + u32 val = lock->lock; + return arch_spin_current(val) != arch_spin_next(val); +} + +/* Bump the current ticket so the next task owns the lock. */ +static inline void arch_spin_unlock(arch_spinlock_t *lock) +{ + wmb(); /* guarantee anything modified under the lock is visible */ + __insn_fetchadd4(&lock->lock, 1U << __ARCH_SPIN_CURRENT_SHIFT); +} + +void arch_spin_unlock_wait(arch_spinlock_t *lock); + +void arch_spin_lock_slow(arch_spinlock_t *lock, u32 val); + +/* Grab the "next" ticket number and bump it atomically. + * If the current ticket is not ours, go to the slow path. + * We also take the slow path if the "next" value overflows. + */ +static inline void arch_spin_lock(arch_spinlock_t *lock) +{ + u32 val = __insn_fetchadd4(&lock->lock, 1); + u32 ticket = val & (__ARCH_SPIN_NEXT_MASK | __ARCH_SPIN_NEXT_OVERFLOW); + if (unlikely(arch_spin_current(val) != ticket)) + arch_spin_lock_slow(lock, ticket); +} + +/* Try to get the lock, and return whether we succeeded. */ +int arch_spin_trylock(arch_spinlock_t *lock); + +/* We cannot take an interrupt after getting a ticket, so don't enable them. */ +#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) + +/* + * Read-write spinlocks, allowing multiple readers + * but only one writer. + * + * We use fetchadd() for readers, and fetchor() with the sign bit + * for writers. + */ + +#define __WRITE_LOCK_BIT (1 << 31) + +static inline int arch_write_val_locked(int val) +{ + return val < 0; /* Optimize "val & __WRITE_LOCK_BIT". */ +} + +/** + * read_can_lock - would read_trylock() succeed? + * @lock: the rwlock in question. + */ +static inline int arch_read_can_lock(arch_rwlock_t *rw) +{ + return !arch_write_val_locked(rw->lock); +} + +/** + * write_can_lock - would write_trylock() succeed? + * @lock: the rwlock in question. + */ +static inline int arch_write_can_lock(arch_rwlock_t *rw) +{ + return rw->lock == 0; +} + +extern void __read_lock_failed(arch_rwlock_t *rw); + +static inline void arch_read_lock(arch_rwlock_t *rw) +{ + u32 val = __insn_fetchaddgez4(&rw->lock, 1); + if (unlikely(arch_write_val_locked(val))) + __read_lock_failed(rw); +} + +extern void __write_lock_failed(arch_rwlock_t *rw, u32 val); + +static inline void arch_write_lock(arch_rwlock_t *rw) +{ + u32 val = __insn_fetchor4(&rw->lock, __WRITE_LOCK_BIT); + if (unlikely(val != 0)) + __write_lock_failed(rw, val); +} + +static inline void arch_read_unlock(arch_rwlock_t *rw) +{ + __insn_mf(); + __insn_fetchadd4(&rw->lock, -1); +} + +static inline void arch_write_unlock(arch_rwlock_t *rw) +{ + __insn_mf(); + __insn_exch4(&rw->lock, 0); /* Avoid waiting in the write buffer. */ +} + +static inline int arch_read_trylock(arch_rwlock_t *rw) +{ + return !arch_write_val_locked(__insn_fetchaddgez4(&rw->lock, 1)); +} + +static inline int arch_write_trylock(arch_rwlock_t *rw) +{ + u32 val = __insn_fetchor4(&rw->lock, __WRITE_LOCK_BIT); + if (likely(val == 0)) + return 1; + if (!arch_write_val_locked(val)) + __insn_fetchand4(&rw->lock, ~__WRITE_LOCK_BIT); + return 0; +} + +#define arch_read_lock_flags(lock, flags) arch_read_lock(lock) +#define arch_write_lock_flags(lock, flags) arch_write_lock(lock) + +#endif /* _ASM_TILE_SPINLOCK_64_H */ diff --git a/arch/tile/include/asm/spinlock_types.h b/arch/tile/include/asm/spinlock_types.h new file mode 100644 index 00000000..a71f59b4 --- /dev/null +++ b/arch/tile/include/asm/spinlock_types.h @@ -0,0 +1,60 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef _ASM_TILE_SPINLOCK_TYPES_H +#define _ASM_TILE_SPINLOCK_TYPES_H + +#ifndef __LINUX_SPINLOCK_TYPES_H +# error "please don't include this file directly" +#endif + +#ifdef __tilegx__ + +/* Low 15 bits are "next"; high 15 bits are "current". */ +typedef struct arch_spinlock { + unsigned int lock; +} arch_spinlock_t; + +#define __ARCH_SPIN_LOCK_UNLOCKED { 0 } + +/* High bit is "writer owns"; low 31 bits are a count of readers. */ +typedef struct arch_rwlock { + unsigned int lock; +} arch_rwlock_t; + +#define __ARCH_RW_LOCK_UNLOCKED { 0 } + +#else + +typedef struct arch_spinlock { + /* Next ticket number to hand out. */ + int next_ticket; + /* The ticket number that currently owns this lock. */ + int current_ticket; +} arch_spinlock_t; + +#define __ARCH_SPIN_LOCK_UNLOCKED { 0, 0 } + +/* + * Byte 0 for tns (only the low bit is used), byte 1 for ticket-lock "next", + * byte 2 for ticket-lock "current", byte 3 for reader count. + */ +typedef struct arch_rwlock { + unsigned int lock; +} arch_rwlock_t; + +#define __ARCH_RW_LOCK_UNLOCKED { 0 } + +#endif +#endif /* _ASM_TILE_SPINLOCK_TYPES_H */ diff --git a/arch/tile/include/asm/stack.h b/arch/tile/include/asm/stack.h new file mode 100644 index 00000000..0e9d382a --- /dev/null +++ b/arch/tile/include/asm/stack.h @@ -0,0 +1,74 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef _ASM_TILE_STACK_H +#define _ASM_TILE_STACK_H + +#include <linux/types.h> +#include <linux/sched.h> +#include <asm/backtrace.h> +#include <asm/page.h> +#include <hv/hypervisor.h> + +/* Everything we need to keep track of a backtrace iteration */ +struct KBacktraceIterator { + BacktraceIterator it; + struct task_struct *task; /* task we are backtracing */ + int end; /* iteration complete. */ + int new_context; /* new context is starting */ + int profile; /* profiling, so stop on async intrpt */ + int verbose; /* printk extra info (don't want to + * do this for profiling) */ + int is_current; /* backtracing current task */ +}; + +/* Iteration methods for kernel backtraces */ + +/* + * Initialize a KBacktraceIterator from a task_struct, and optionally from + * a set of registers. If the registers are omitted, the process is + * assumed to be descheduled, and registers are read from the process's + * thread_struct and stack. "verbose" means to printk some additional + * information about fault handlers as we pass them on the stack. + */ +extern void KBacktraceIterator_init(struct KBacktraceIterator *kbt, + struct task_struct *, struct pt_regs *); + +/* Initialize iterator based on current stack. */ +extern void KBacktraceIterator_init_current(struct KBacktraceIterator *kbt); + +/* Helper method for above. */ +extern void _KBacktraceIterator_init_current(struct KBacktraceIterator *kbt, + ulong pc, ulong lr, ulong sp, ulong r52); + +/* No more frames? */ +extern int KBacktraceIterator_end(struct KBacktraceIterator *kbt); + +/* Advance to the next frame. */ +extern void KBacktraceIterator_next(struct KBacktraceIterator *kbt); + +/* + * Dump stack given complete register info. Use only from the + * architecture-specific code; show_stack() + * and dump_stack() (in entry.S) are architecture-independent entry points. + */ +extern void tile_show_stack(struct KBacktraceIterator *, int headers); + +/* Dump stack of current process, with registers to seed the backtrace. */ +extern void dump_stack_regs(struct pt_regs *); + +/* Helper method for assembly dump_stack(). */ +extern void _dump_stack(int dummy, ulong pc, ulong lr, ulong sp, ulong r52); + +#endif /* _ASM_TILE_STACK_H */ diff --git a/arch/tile/include/asm/stat.h b/arch/tile/include/asm/stat.h new file mode 100644 index 00000000..c0db34d5 --- /dev/null +++ b/arch/tile/include/asm/stat.h @@ -0,0 +1,4 @@ +#if defined(__KERNEL__) && defined(CONFIG_COMPAT) +#define __ARCH_WANT_STAT64 /* Used for compat_sys_stat64() etc. */ +#endif +#include <asm-generic/stat.h> diff --git a/arch/tile/include/asm/string.h b/arch/tile/include/asm/string.h new file mode 100644 index 00000000..7535cf1a --- /dev/null +++ b/arch/tile/include/asm/string.h @@ -0,0 +1,32 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef _ASM_TILE_STRING_H +#define _ASM_TILE_STRING_H + +#define __HAVE_ARCH_MEMCHR +#define __HAVE_ARCH_MEMSET +#define __HAVE_ARCH_MEMCPY +#define __HAVE_ARCH_MEMMOVE +#define __HAVE_ARCH_STRCHR +#define __HAVE_ARCH_STRLEN + +extern __kernel_size_t strlen(const char *); +extern char *strchr(const char *s, int c); +extern void *memchr(const void *s, int c, size_t n); +extern void *memset(void *, int, __kernel_size_t); +extern void *memcpy(void *, const void *, __kernel_size_t); +extern void *memmove(void *, const void *, __kernel_size_t); + +#endif /* _ASM_TILE_STRING_H */ diff --git a/arch/tile/include/asm/swab.h b/arch/tile/include/asm/swab.h new file mode 100644 index 00000000..7c37b38f --- /dev/null +++ b/arch/tile/include/asm/swab.h @@ -0,0 +1,23 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef _ASM_TILE_SWAB_H +#define _ASM_TILE_SWAB_H + +/* Tile gcc is always >= 4.3.0, so we use __builtin_bswap. */ +#define __arch_swab32(x) __builtin_bswap32(x) +#define __arch_swab64(x) __builtin_bswap64(x) +#define __arch_swab16(x) (__builtin_bswap32(x) >> 16) + +#endif /* _ASM_TILE_SWAB_H */ diff --git a/arch/tile/include/asm/switch_to.h b/arch/tile/include/asm/switch_to.h new file mode 100644 index 00000000..1d48c5fe --- /dev/null +++ b/arch/tile/include/asm/switch_to.h @@ -0,0 +1,76 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef _ASM_TILE_SWITCH_TO_H +#define _ASM_TILE_SWITCH_TO_H + +#include <arch/sim_def.h> + +/* + * switch_to(n) should switch tasks to task nr n, first + * checking that n isn't the current task, in which case it does nothing. + * The number of callee-saved registers saved on the kernel stack + * is defined here for use in copy_thread() and must agree with __switch_to(). + */ +#define CALLEE_SAVED_FIRST_REG 30 +#define CALLEE_SAVED_REGS_COUNT 24 /* r30 to r52, plus an empty to align */ + +#ifndef __ASSEMBLY__ + +struct task_struct; + +/* + * Pause the DMA engine and static network before task switching. + */ +#define prepare_arch_switch(next) _prepare_arch_switch(next) +void _prepare_arch_switch(struct task_struct *next); + +struct task_struct; +#define switch_to(prev, next, last) ((last) = _switch_to((prev), (next))) +extern struct task_struct *_switch_to(struct task_struct *prev, + struct task_struct *next); + +/* Helper function for _switch_to(). */ +extern struct task_struct *__switch_to(struct task_struct *prev, + struct task_struct *next, + unsigned long new_system_save_k_0); + +/* Address that switched-away from tasks are at. */ +extern unsigned long get_switch_to_pc(void); + +/* + * Kernel threads can check to see if they need to migrate their + * stack whenever they return from a context switch; for user + * threads, we defer until they are returning to user-space. + */ +#define finish_arch_switch(prev) do { \ + if (unlikely((prev)->state == TASK_DEAD)) \ + __insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_OS_EXIT | \ + ((prev)->pid << _SIM_CONTROL_OPERATOR_BITS)); \ + __insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_OS_SWITCH | \ + (current->pid << _SIM_CONTROL_OPERATOR_BITS)); \ + if (current->mm == NULL && !kstack_hash && \ + current_thread_info()->homecache_cpu != smp_processor_id()) \ + homecache_migrate_kthread(); \ +} while (0) + +/* Support function for forking a new task. */ +void ret_from_fork(void); + +/* Called from ret_from_fork() when a new process starts up. */ +struct task_struct *sim_notify_fork(struct task_struct *prev); + +#endif /* !__ASSEMBLY__ */ + +#endif /* _ASM_TILE_SWITCH_TO_H */ diff --git a/arch/tile/include/asm/syscall.h b/arch/tile/include/asm/syscall.h new file mode 100644 index 00000000..d35e0dcb --- /dev/null +++ b/arch/tile/include/asm/syscall.h @@ -0,0 +1,79 @@ +/* + * Copyright (C) 2008-2009 Red Hat, Inc. All rights reserved. + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * See asm-generic/syscall.h for descriptions of what we must do here. + */ + +#ifndef _ASM_TILE_SYSCALL_H +#define _ASM_TILE_SYSCALL_H + +#include <linux/sched.h> +#include <linux/err.h> +#include <arch/abi.h> + +/* + * Only the low 32 bits of orig_r0 are meaningful, so we return int. + * This importantly ignores the high bits on 64-bit, so comparisons + * sign-extend the low 32 bits. + */ +static inline int syscall_get_nr(struct task_struct *t, struct pt_regs *regs) +{ + return regs->regs[TREG_SYSCALL_NR]; +} + +static inline void syscall_rollback(struct task_struct *task, + struct pt_regs *regs) +{ + regs->regs[0] = regs->orig_r0; +} + +static inline long syscall_get_error(struct task_struct *task, + struct pt_regs *regs) +{ + unsigned long error = regs->regs[0]; + return IS_ERR_VALUE(error) ? error : 0; +} + +static inline long syscall_get_return_value(struct task_struct *task, + struct pt_regs *regs) +{ + return regs->regs[0]; +} + +static inline void syscall_set_return_value(struct task_struct *task, + struct pt_regs *regs, + int error, long val) +{ + regs->regs[0] = (long) error ?: val; +} + +static inline void syscall_get_arguments(struct task_struct *task, + struct pt_regs *regs, + unsigned int i, unsigned int n, + unsigned long *args) +{ + BUG_ON(i + n > 6); + memcpy(args, ®s[i], n * sizeof(args[0])); +} + +static inline void syscall_set_arguments(struct task_struct *task, + struct pt_regs *regs, + unsigned int i, unsigned int n, + const unsigned long *args) +{ + BUG_ON(i + n > 6); + memcpy(®s[i], args, n * sizeof(args[0])); +} + +#endif /* _ASM_TILE_SYSCALL_H */ diff --git a/arch/tile/include/asm/syscalls.h b/arch/tile/include/asm/syscalls.h new file mode 100644 index 00000000..3b5507c3 --- /dev/null +++ b/arch/tile/include/asm/syscalls.h @@ -0,0 +1,76 @@ +/* + * syscalls.h - Linux syscall interfaces (arch-specific) + * + * Copyright (c) 2008 Jaswinder Singh Rajput + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef _ASM_TILE_SYSCALLS_H +#define _ASM_TILE_SYSCALLS_H + +#include <linux/compiler.h> +#include <linux/linkage.h> +#include <linux/signal.h> +#include <linux/types.h> +#include <linux/compat.h> + +/* The array of function pointers for syscalls. */ +extern void *sys_call_table[]; +#ifdef CONFIG_COMPAT +extern void *compat_sys_call_table[]; +#endif + +/* + * Note that by convention, any syscall which requires the current + * register set takes an additional "struct pt_regs *" pointer; a + * _sys_xxx() trampoline in intvec*.S just sets up the pointer and + * jumps to sys_xxx(). + */ + +/* kernel/sys.c */ +ssize_t sys32_readahead(int fd, u32 offset_lo, u32 offset_hi, u32 count); +long sys32_fadvise64(int fd, u32 offset_lo, u32 offset_hi, + u32 len, int advice); +int sys32_fadvise64_64(int fd, u32 offset_lo, u32 offset_hi, + u32 len_lo, u32 len_hi, int advice); +long sys_flush_cache(void); +#ifndef __tilegx__ /* No mmap() in the 32-bit kernel. */ +#define sys_mmap sys_mmap +#endif + +#ifndef __tilegx__ +/* mm/fault.c */ +long sys_cmpxchg_badaddr(unsigned long address, struct pt_regs *); +long _sys_cmpxchg_badaddr(unsigned long address); +#endif + +#ifdef CONFIG_COMPAT +/* These four are not defined for 64-bit, but serve as "compat" syscalls. */ +long sys_fcntl64(unsigned int fd, unsigned int cmd, unsigned long arg); +long sys_fstat64(unsigned long fd, struct stat64 __user *statbuf); +long sys_truncate64(const char __user *path, loff_t length); +long sys_ftruncate64(unsigned int fd, loff_t length); +#endif + +/* These are the intvec*.S trampolines. */ +long _sys_sigaltstack(const stack_t __user *, stack_t __user *); +long _sys_rt_sigreturn(void); +long _sys_clone(unsigned long clone_flags, unsigned long newsp, + void __user *parent_tid, void __user *child_tid); +long _sys_execve(const char __user *filename, + const char __user *const __user *argv, + const char __user *const __user *envp); + +#include <asm-generic/syscalls.h> + +#endif /* _ASM_TILE_SYSCALLS_H */ diff --git a/arch/tile/include/asm/thread_info.h b/arch/tile/include/asm/thread_info.h new file mode 100644 index 00000000..7594764d --- /dev/null +++ b/arch/tile/include/asm/thread_info.h @@ -0,0 +1,175 @@ +/* + * Copyright (C) 2002 David Howells (dhowells@redhat.com) + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef _ASM_TILE_THREAD_INFO_H +#define _ASM_TILE_THREAD_INFO_H + +#include <asm/processor.h> +#include <asm/page.h> +#ifndef __ASSEMBLY__ + +/* + * Low level task data that assembly code needs immediate access to. + * The structure is placed at the bottom of the supervisor stack. + */ +struct thread_info { + struct task_struct *task; /* main task structure */ + struct exec_domain *exec_domain; /* execution domain */ + unsigned long flags; /* low level flags */ + unsigned long status; /* thread-synchronous flags */ + __u32 homecache_cpu; /* CPU we are homecached on */ + __u32 cpu; /* current CPU */ + int preempt_count; /* 0 => preemptable, + <0 => BUG */ + + mm_segment_t addr_limit; /* thread address space + (KERNEL_DS or USER_DS) */ + struct restart_block restart_block; + struct single_step_state *step_state; /* single step state + (if non-zero) */ +}; + +/* + * macros/functions for gaining access to the thread information structure. + */ +#define INIT_THREAD_INFO(tsk) \ +{ \ + .task = &tsk, \ + .exec_domain = &default_exec_domain, \ + .flags = 0, \ + .cpu = 0, \ + .preempt_count = INIT_PREEMPT_COUNT, \ + .addr_limit = KERNEL_DS, \ + .restart_block = { \ + .fn = do_no_restart_syscall, \ + }, \ + .step_state = NULL, \ +} + +#define init_thread_info (init_thread_union.thread_info) +#define init_stack (init_thread_union.stack) + +#endif /* !__ASSEMBLY__ */ + +#if PAGE_SIZE < 8192 +#define THREAD_SIZE_ORDER (13 - PAGE_SHIFT) +#else +#define THREAD_SIZE_ORDER (0) +#endif +#define THREAD_SIZE_PAGES (1 << THREAD_SIZE_ORDER) + +#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) +#define LOG2_THREAD_SIZE (PAGE_SHIFT + THREAD_SIZE_ORDER) + +#define STACK_WARN (THREAD_SIZE/8) + +#ifndef __ASSEMBLY__ + +/* How to get the thread information struct from C. */ +register unsigned long stack_pointer __asm__("sp"); + +#define current_thread_info() \ + ((struct thread_info *)(stack_pointer & -THREAD_SIZE)) + +#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR +extern struct thread_info *alloc_thread_info_node(struct task_struct *task, int node); +extern void free_thread_info(struct thread_info *info); + +/* Sit on a nap instruction until interrupted. */ +extern void smp_nap(void); + +/* Enable interrupts racelessly and nap forever: helper for cpu_idle(). */ +extern void _cpu_idle(void); + +/* Switch boot idle thread to a freshly-allocated stack and free old stack. */ +extern void cpu_idle_on_new_stack(struct thread_info *old_ti, + unsigned long new_sp, + unsigned long new_ss10); + +#else /* __ASSEMBLY__ */ + +/* + * How to get the thread information struct from assembly. + * Note that we use different macros since different architectures + * have different semantics in their "mm" instruction and we would + * like to guarantee that the macro expands to exactly one instruction. + */ +#ifdef __tilegx__ +#define EXTRACT_THREAD_INFO(reg) mm reg, zero, LOG2_THREAD_SIZE, 63 +#else +#define GET_THREAD_INFO(reg) mm reg, sp, zero, LOG2_THREAD_SIZE, 31 +#endif + +#endif /* !__ASSEMBLY__ */ + +#define PREEMPT_ACTIVE 0x10000000 + +/* + * Thread information flags that various assembly files may need to access. + * Keep flags accessed frequently in low bits, particular since it makes + * it easier to build constants in assembly. + */ +#define TIF_SIGPENDING 0 /* signal pending */ +#define TIF_NEED_RESCHED 1 /* rescheduling necessary */ +#define TIF_SINGLESTEP 2 /* restore singlestep on return to + user mode */ +#define TIF_ASYNC_TLB 3 /* got an async TLB fault in kernel */ +#define TIF_SYSCALL_TRACE 4 /* syscall trace active */ +#define TIF_SYSCALL_AUDIT 5 /* syscall auditing active */ +#define TIF_SECCOMP 6 /* secure computing */ +#define TIF_MEMDIE 7 /* OOM killer at work */ +#define TIF_NOTIFY_RESUME 8 /* callback before returning to user */ + +#define _TIF_SIGPENDING (1<<TIF_SIGPENDING) +#define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED) +#define _TIF_SINGLESTEP (1<<TIF_SINGLESTEP) +#define _TIF_ASYNC_TLB (1<<TIF_ASYNC_TLB) +#define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE) +#define _TIF_SYSCALL_AUDIT (1<<TIF_SYSCALL_AUDIT) +#define _TIF_SECCOMP (1<<TIF_SECCOMP) +#define _TIF_MEMDIE (1<<TIF_MEMDIE) +#define _TIF_NOTIFY_RESUME (1<<TIF_NOTIFY_RESUME) + +/* Work to do on any return to user space. */ +#define _TIF_ALLWORK_MASK \ + (_TIF_SIGPENDING|_TIF_NEED_RESCHED|_TIF_SINGLESTEP|\ + _TIF_ASYNC_TLB|_TIF_NOTIFY_RESUME) + +/* + * Thread-synchronous status. + * + * This is different from the flags in that nobody else + * ever touches our thread-synchronous status, so we don't + * have to worry about atomic accesses. + */ +#ifdef __tilegx__ +#define TS_COMPAT 0x0001 /* 32-bit compatibility mode */ +#endif +#define TS_POLLING 0x0004 /* in idle loop but not sleeping */ +#define TS_RESTORE_SIGMASK 0x0008 /* restore signal mask in do_signal */ + +#define tsk_is_polling(t) (task_thread_info(t)->status & TS_POLLING) + +#ifndef __ASSEMBLY__ +#define HAVE_SET_RESTORE_SIGMASK 1 +static inline void set_restore_sigmask(void) +{ + struct thread_info *ti = current_thread_info(); + ti->status |= TS_RESTORE_SIGMASK; + set_bit(TIF_SIGPENDING, &ti->flags); +} +#endif /* !__ASSEMBLY__ */ + +#endif /* _ASM_TILE_THREAD_INFO_H */ diff --git a/arch/tile/include/asm/tile-desc.h b/arch/tile/include/asm/tile-desc.h new file mode 100644 index 00000000..43849bf7 --- /dev/null +++ b/arch/tile/include/asm/tile-desc.h @@ -0,0 +1,19 @@ +/* + * Copyright 2011 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef __tilegx__ +#include <asm/tile-desc_32.h> +#else +#include <asm/tile-desc_64.h> +#endif diff --git a/arch/tile/include/asm/tile-desc_32.h b/arch/tile/include/asm/tile-desc_32.h new file mode 100644 index 00000000..f09c5c43 --- /dev/null +++ b/arch/tile/include/asm/tile-desc_32.h @@ -0,0 +1,553 @@ +/* TILEPro opcode information. + * + * Copyright 2011 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * + * + * + * + */ + +#ifndef opcode_tilepro_h +#define opcode_tilepro_h + +#include <arch/opcode.h> + + +enum +{ + TILEPRO_MAX_OPERANDS = 5 /* mm */ +}; + +typedef enum +{ + TILEPRO_OPC_BPT, + TILEPRO_OPC_INFO, + TILEPRO_OPC_INFOL, + TILEPRO_OPC_J, + TILEPRO_OPC_JAL, + TILEPRO_OPC_MOVE, + TILEPRO_OPC_MOVE_SN, + TILEPRO_OPC_MOVEI, + TILEPRO_OPC_MOVEI_SN, + TILEPRO_OPC_MOVELI, + TILEPRO_OPC_MOVELI_SN, + TILEPRO_OPC_MOVELIS, + TILEPRO_OPC_PREFETCH, + TILEPRO_OPC_RAISE, + TILEPRO_OPC_ADD, + TILEPRO_OPC_ADD_SN, + TILEPRO_OPC_ADDB, + TILEPRO_OPC_ADDB_SN, + TILEPRO_OPC_ADDBS_U, + TILEPRO_OPC_ADDBS_U_SN, + TILEPRO_OPC_ADDH, + TILEPRO_OPC_ADDH_SN, + TILEPRO_OPC_ADDHS, + TILEPRO_OPC_ADDHS_SN, + TILEPRO_OPC_ADDI, + TILEPRO_OPC_ADDI_SN, + TILEPRO_OPC_ADDIB, + TILEPRO_OPC_ADDIB_SN, + TILEPRO_OPC_ADDIH, + TILEPRO_OPC_ADDIH_SN, + TILEPRO_OPC_ADDLI, + TILEPRO_OPC_ADDLI_SN, + TILEPRO_OPC_ADDLIS, + TILEPRO_OPC_ADDS, + TILEPRO_OPC_ADDS_SN, + TILEPRO_OPC_ADIFFB_U, + TILEPRO_OPC_ADIFFB_U_SN, + TILEPRO_OPC_ADIFFH, + TILEPRO_OPC_ADIFFH_SN, + TILEPRO_OPC_AND, + TILEPRO_OPC_AND_SN, + TILEPRO_OPC_ANDI, + TILEPRO_OPC_ANDI_SN, + TILEPRO_OPC_AULI, + TILEPRO_OPC_AVGB_U, + TILEPRO_OPC_AVGB_U_SN, + TILEPRO_OPC_AVGH, + TILEPRO_OPC_AVGH_SN, + TILEPRO_OPC_BBNS, + TILEPRO_OPC_BBNS_SN, + TILEPRO_OPC_BBNST, + TILEPRO_OPC_BBNST_SN, + TILEPRO_OPC_BBS, + TILEPRO_OPC_BBS_SN, + TILEPRO_OPC_BBST, + TILEPRO_OPC_BBST_SN, + TILEPRO_OPC_BGEZ, + TILEPRO_OPC_BGEZ_SN, + TILEPRO_OPC_BGEZT, + TILEPRO_OPC_BGEZT_SN, + TILEPRO_OPC_BGZ, + TILEPRO_OPC_BGZ_SN, + TILEPRO_OPC_BGZT, + TILEPRO_OPC_BGZT_SN, + TILEPRO_OPC_BITX, + TILEPRO_OPC_BITX_SN, + TILEPRO_OPC_BLEZ, + TILEPRO_OPC_BLEZ_SN, + TILEPRO_OPC_BLEZT, + TILEPRO_OPC_BLEZT_SN, + TILEPRO_OPC_BLZ, + TILEPRO_OPC_BLZ_SN, + TILEPRO_OPC_BLZT, + TILEPRO_OPC_BLZT_SN, + TILEPRO_OPC_BNZ, + TILEPRO_OPC_BNZ_SN, + TILEPRO_OPC_BNZT, + TILEPRO_OPC_BNZT_SN, + TILEPRO_OPC_BYTEX, + TILEPRO_OPC_BYTEX_SN, + TILEPRO_OPC_BZ, + TILEPRO_OPC_BZ_SN, + TILEPRO_OPC_BZT, + TILEPRO_OPC_BZT_SN, + TILEPRO_OPC_CLZ, + TILEPRO_OPC_CLZ_SN, + TILEPRO_OPC_CRC32_32, + TILEPRO_OPC_CRC32_32_SN, + TILEPRO_OPC_CRC32_8, + TILEPRO_OPC_CRC32_8_SN, + TILEPRO_OPC_CTZ, + TILEPRO_OPC_CTZ_SN, + TILEPRO_OPC_DRAIN, + TILEPRO_OPC_DTLBPR, + TILEPRO_OPC_DWORD_ALIGN, + TILEPRO_OPC_DWORD_ALIGN_SN, + TILEPRO_OPC_FINV, + TILEPRO_OPC_FLUSH, + TILEPRO_OPC_FNOP, + TILEPRO_OPC_ICOH, + TILEPRO_OPC_ILL, + TILEPRO_OPC_INTHB, + TILEPRO_OPC_INTHB_SN, + TILEPRO_OPC_INTHH, + TILEPRO_OPC_INTHH_SN, + TILEPRO_OPC_INTLB, + TILEPRO_OPC_INTLB_SN, + TILEPRO_OPC_INTLH, + TILEPRO_OPC_INTLH_SN, + TILEPRO_OPC_INV, + TILEPRO_OPC_IRET, + TILEPRO_OPC_JALB, + TILEPRO_OPC_JALF, + TILEPRO_OPC_JALR, + TILEPRO_OPC_JALRP, + TILEPRO_OPC_JB, + TILEPRO_OPC_JF, + TILEPRO_OPC_JR, + TILEPRO_OPC_JRP, + TILEPRO_OPC_LB, + TILEPRO_OPC_LB_SN, + TILEPRO_OPC_LB_U, + TILEPRO_OPC_LB_U_SN, + TILEPRO_OPC_LBADD, + TILEPRO_OPC_LBADD_SN, + TILEPRO_OPC_LBADD_U, + TILEPRO_OPC_LBADD_U_SN, + TILEPRO_OPC_LH, + TILEPRO_OPC_LH_SN, + TILEPRO_OPC_LH_U, + TILEPRO_OPC_LH_U_SN, + TILEPRO_OPC_LHADD, + TILEPRO_OPC_LHADD_SN, + TILEPRO_OPC_LHADD_U, + TILEPRO_OPC_LHADD_U_SN, + TILEPRO_OPC_LNK, + TILEPRO_OPC_LNK_SN, + TILEPRO_OPC_LW, + TILEPRO_OPC_LW_SN, + TILEPRO_OPC_LW_NA, + TILEPRO_OPC_LW_NA_SN, + TILEPRO_OPC_LWADD, + TILEPRO_OPC_LWADD_SN, + TILEPRO_OPC_LWADD_NA, + TILEPRO_OPC_LWADD_NA_SN, + TILEPRO_OPC_MAXB_U, + TILEPRO_OPC_MAXB_U_SN, + TILEPRO_OPC_MAXH, + TILEPRO_OPC_MAXH_SN, + TILEPRO_OPC_MAXIB_U, + TILEPRO_OPC_MAXIB_U_SN, + TILEPRO_OPC_MAXIH, + TILEPRO_OPC_MAXIH_SN, + TILEPRO_OPC_MF, + TILEPRO_OPC_MFSPR, + TILEPRO_OPC_MINB_U, + TILEPRO_OPC_MINB_U_SN, + TILEPRO_OPC_MINH, + TILEPRO_OPC_MINH_SN, + TILEPRO_OPC_MINIB_U, + TILEPRO_OPC_MINIB_U_SN, + TILEPRO_OPC_MINIH, + TILEPRO_OPC_MINIH_SN, + TILEPRO_OPC_MM, + TILEPRO_OPC_MNZ, + TILEPRO_OPC_MNZ_SN, + TILEPRO_OPC_MNZB, + TILEPRO_OPC_MNZB_SN, + TILEPRO_OPC_MNZH, + TILEPRO_OPC_MNZH_SN, + TILEPRO_OPC_MTSPR, + TILEPRO_OPC_MULHH_SS, + TILEPRO_OPC_MULHH_SS_SN, + TILEPRO_OPC_MULHH_SU, + TILEPRO_OPC_MULHH_SU_SN, + TILEPRO_OPC_MULHH_UU, + TILEPRO_OPC_MULHH_UU_SN, + TILEPRO_OPC_MULHHA_SS, + TILEPRO_OPC_MULHHA_SS_SN, + TILEPRO_OPC_MULHHA_SU, + TILEPRO_OPC_MULHHA_SU_SN, + TILEPRO_OPC_MULHHA_UU, + TILEPRO_OPC_MULHHA_UU_SN, + TILEPRO_OPC_MULHHSA_UU, + TILEPRO_OPC_MULHHSA_UU_SN, + TILEPRO_OPC_MULHL_SS, + TILEPRO_OPC_MULHL_SS_SN, + TILEPRO_OPC_MULHL_SU, + TILEPRO_OPC_MULHL_SU_SN, + TILEPRO_OPC_MULHL_US, + TILEPRO_OPC_MULHL_US_SN, + TILEPRO_OPC_MULHL_UU, + TILEPRO_OPC_MULHL_UU_SN, + TILEPRO_OPC_MULHLA_SS, + TILEPRO_OPC_MULHLA_SS_SN, + TILEPRO_OPC_MULHLA_SU, + TILEPRO_OPC_MULHLA_SU_SN, + TILEPRO_OPC_MULHLA_US, + TILEPRO_OPC_MULHLA_US_SN, + TILEPRO_OPC_MULHLA_UU, + TILEPRO_OPC_MULHLA_UU_SN, + TILEPRO_OPC_MULHLSA_UU, + TILEPRO_OPC_MULHLSA_UU_SN, + TILEPRO_OPC_MULLL_SS, + TILEPRO_OPC_MULLL_SS_SN, + TILEPRO_OPC_MULLL_SU, + TILEPRO_OPC_MULLL_SU_SN, + TILEPRO_OPC_MULLL_UU, + TILEPRO_OPC_MULLL_UU_SN, + TILEPRO_OPC_MULLLA_SS, + TILEPRO_OPC_MULLLA_SS_SN, + TILEPRO_OPC_MULLLA_SU, + TILEPRO_OPC_MULLLA_SU_SN, + TILEPRO_OPC_MULLLA_UU, + TILEPRO_OPC_MULLLA_UU_SN, + TILEPRO_OPC_MULLLSA_UU, + TILEPRO_OPC_MULLLSA_UU_SN, + TILEPRO_OPC_MVNZ, + TILEPRO_OPC_MVNZ_SN, + TILEPRO_OPC_MVZ, + TILEPRO_OPC_MVZ_SN, + TILEPRO_OPC_MZ, + TILEPRO_OPC_MZ_SN, + TILEPRO_OPC_MZB, + TILEPRO_OPC_MZB_SN, + TILEPRO_OPC_MZH, + TILEPRO_OPC_MZH_SN, + TILEPRO_OPC_NAP, + TILEPRO_OPC_NOP, + TILEPRO_OPC_NOR, + TILEPRO_OPC_NOR_SN, + TILEPRO_OPC_OR, + TILEPRO_OPC_OR_SN, + TILEPRO_OPC_ORI, + TILEPRO_OPC_ORI_SN, + TILEPRO_OPC_PACKBS_U, + TILEPRO_OPC_PACKBS_U_SN, + TILEPRO_OPC_PACKHB, + TILEPRO_OPC_PACKHB_SN, + TILEPRO_OPC_PACKHS, + TILEPRO_OPC_PACKHS_SN, + TILEPRO_OPC_PACKLB, + TILEPRO_OPC_PACKLB_SN, + TILEPRO_OPC_PCNT, + TILEPRO_OPC_PCNT_SN, + TILEPRO_OPC_RL, + TILEPRO_OPC_RL_SN, + TILEPRO_OPC_RLI, + TILEPRO_OPC_RLI_SN, + TILEPRO_OPC_S1A, + TILEPRO_OPC_S1A_SN, + TILEPRO_OPC_S2A, + TILEPRO_OPC_S2A_SN, + TILEPRO_OPC_S3A, + TILEPRO_OPC_S3A_SN, + TILEPRO_OPC_SADAB_U, + TILEPRO_OPC_SADAB_U_SN, + TILEPRO_OPC_SADAH, + TILEPRO_OPC_SADAH_SN, + TILEPRO_OPC_SADAH_U, + TILEPRO_OPC_SADAH_U_SN, + TILEPRO_OPC_SADB_U, + TILEPRO_OPC_SADB_U_SN, + TILEPRO_OPC_SADH, + TILEPRO_OPC_SADH_SN, + TILEPRO_OPC_SADH_U, + TILEPRO_OPC_SADH_U_SN, + TILEPRO_OPC_SB, + TILEPRO_OPC_SBADD, + TILEPRO_OPC_SEQ, + TILEPRO_OPC_SEQ_SN, + TILEPRO_OPC_SEQB, + TILEPRO_OPC_SEQB_SN, + TILEPRO_OPC_SEQH, + TILEPRO_OPC_SEQH_SN, + TILEPRO_OPC_SEQI, + TILEPRO_OPC_SEQI_SN, + TILEPRO_OPC_SEQIB, + TILEPRO_OPC_SEQIB_SN, + TILEPRO_OPC_SEQIH, + TILEPRO_OPC_SEQIH_SN, + TILEPRO_OPC_SH, + TILEPRO_OPC_SHADD, + TILEPRO_OPC_SHL, + TILEPRO_OPC_SHL_SN, + TILEPRO_OPC_SHLB, + TILEPRO_OPC_SHLB_SN, + TILEPRO_OPC_SHLH, + TILEPRO_OPC_SHLH_SN, + TILEPRO_OPC_SHLI, + TILEPRO_OPC_SHLI_SN, + TILEPRO_OPC_SHLIB, + TILEPRO_OPC_SHLIB_SN, + TILEPRO_OPC_SHLIH, + TILEPRO_OPC_SHLIH_SN, + TILEPRO_OPC_SHR, + TILEPRO_OPC_SHR_SN, + TILEPRO_OPC_SHRB, + TILEPRO_OPC_SHRB_SN, + TILEPRO_OPC_SHRH, + TILEPRO_OPC_SHRH_SN, + TILEPRO_OPC_SHRI, + TILEPRO_OPC_SHRI_SN, + TILEPRO_OPC_SHRIB, + TILEPRO_OPC_SHRIB_SN, + TILEPRO_OPC_SHRIH, + TILEPRO_OPC_SHRIH_SN, + TILEPRO_OPC_SLT, + TILEPRO_OPC_SLT_SN, + TILEPRO_OPC_SLT_U, + TILEPRO_OPC_SLT_U_SN, + TILEPRO_OPC_SLTB, + TILEPRO_OPC_SLTB_SN, + TILEPRO_OPC_SLTB_U, + TILEPRO_OPC_SLTB_U_SN, + TILEPRO_OPC_SLTE, + TILEPRO_OPC_SLTE_SN, + TILEPRO_OPC_SLTE_U, + TILEPRO_OPC_SLTE_U_SN, + TILEPRO_OPC_SLTEB, + TILEPRO_OPC_SLTEB_SN, + TILEPRO_OPC_SLTEB_U, + TILEPRO_OPC_SLTEB_U_SN, + TILEPRO_OPC_SLTEH, + TILEPRO_OPC_SLTEH_SN, + TILEPRO_OPC_SLTEH_U, + TILEPRO_OPC_SLTEH_U_SN, + TILEPRO_OPC_SLTH, + TILEPRO_OPC_SLTH_SN, + TILEPRO_OPC_SLTH_U, + TILEPRO_OPC_SLTH_U_SN, + TILEPRO_OPC_SLTI, + TILEPRO_OPC_SLTI_SN, + TILEPRO_OPC_SLTI_U, + TILEPRO_OPC_SLTI_U_SN, + TILEPRO_OPC_SLTIB, + TILEPRO_OPC_SLTIB_SN, + TILEPRO_OPC_SLTIB_U, + TILEPRO_OPC_SLTIB_U_SN, + TILEPRO_OPC_SLTIH, + TILEPRO_OPC_SLTIH_SN, + TILEPRO_OPC_SLTIH_U, + TILEPRO_OPC_SLTIH_U_SN, + TILEPRO_OPC_SNE, + TILEPRO_OPC_SNE_SN, + TILEPRO_OPC_SNEB, + TILEPRO_OPC_SNEB_SN, + TILEPRO_OPC_SNEH, + TILEPRO_OPC_SNEH_SN, + TILEPRO_OPC_SRA, + TILEPRO_OPC_SRA_SN, + TILEPRO_OPC_SRAB, + TILEPRO_OPC_SRAB_SN, + TILEPRO_OPC_SRAH, + TILEPRO_OPC_SRAH_SN, + TILEPRO_OPC_SRAI, + TILEPRO_OPC_SRAI_SN, + TILEPRO_OPC_SRAIB, + TILEPRO_OPC_SRAIB_SN, + TILEPRO_OPC_SRAIH, + TILEPRO_OPC_SRAIH_SN, + TILEPRO_OPC_SUB, + TILEPRO_OPC_SUB_SN, + TILEPRO_OPC_SUBB, + TILEPRO_OPC_SUBB_SN, + TILEPRO_OPC_SUBBS_U, + TILEPRO_OPC_SUBBS_U_SN, + TILEPRO_OPC_SUBH, + TILEPRO_OPC_SUBH_SN, + TILEPRO_OPC_SUBHS, + TILEPRO_OPC_SUBHS_SN, + TILEPRO_OPC_SUBS, + TILEPRO_OPC_SUBS_SN, + TILEPRO_OPC_SW, + TILEPRO_OPC_SWADD, + TILEPRO_OPC_SWINT0, + TILEPRO_OPC_SWINT1, + TILEPRO_OPC_SWINT2, + TILEPRO_OPC_SWINT3, + TILEPRO_OPC_TBLIDXB0, + TILEPRO_OPC_TBLIDXB0_SN, + TILEPRO_OPC_TBLIDXB1, + TILEPRO_OPC_TBLIDXB1_SN, + TILEPRO_OPC_TBLIDXB2, + TILEPRO_OPC_TBLIDXB2_SN, + TILEPRO_OPC_TBLIDXB3, + TILEPRO_OPC_TBLIDXB3_SN, + TILEPRO_OPC_TNS, + TILEPRO_OPC_TNS_SN, + TILEPRO_OPC_WH64, + TILEPRO_OPC_XOR, + TILEPRO_OPC_XOR_SN, + TILEPRO_OPC_XORI, + TILEPRO_OPC_XORI_SN, + TILEPRO_OPC_NONE +} tilepro_mnemonic; + + + + +typedef enum +{ + TILEPRO_PIPELINE_X0, + TILEPRO_PIPELINE_X1, + TILEPRO_PIPELINE_Y0, + TILEPRO_PIPELINE_Y1, + TILEPRO_PIPELINE_Y2, +} tilepro_pipeline; + +#define tilepro_is_x_pipeline(p) ((int)(p) <= (int)TILEPRO_PIPELINE_X1) + +typedef enum +{ + TILEPRO_OP_TYPE_REGISTER, + TILEPRO_OP_TYPE_IMMEDIATE, + TILEPRO_OP_TYPE_ADDRESS, + TILEPRO_OP_TYPE_SPR +} tilepro_operand_type; + +struct tilepro_operand +{ + /* Is this operand a register, immediate or address? */ + tilepro_operand_type type; + + /* The default relocation type for this operand. */ + signed int default_reloc : 16; + + /* How many bits is this value? (used for range checking) */ + unsigned int num_bits : 5; + + /* Is the value signed? (used for range checking) */ + unsigned int is_signed : 1; + + /* Is this operand a source register? */ + unsigned int is_src_reg : 1; + + /* Is this operand written? (i.e. is it a destination register) */ + unsigned int is_dest_reg : 1; + + /* Is this operand PC-relative? */ + unsigned int is_pc_relative : 1; + + /* By how many bits do we right shift the value before inserting? */ + unsigned int rightshift : 2; + + /* Return the bits for this operand to be ORed into an existing bundle. */ + tilepro_bundle_bits (*insert) (int op); + + /* Extract this operand and return it. */ + unsigned int (*extract) (tilepro_bundle_bits bundle); +}; + + +extern const struct tilepro_operand tilepro_operands[]; + +/* One finite-state machine per pipe for rapid instruction decoding. */ +extern const unsigned short * const +tilepro_bundle_decoder_fsms[TILEPRO_NUM_PIPELINE_ENCODINGS]; + + +struct tilepro_opcode +{ + /* The opcode mnemonic, e.g. "add" */ + const char *name; + + /* The enum value for this mnemonic. */ + tilepro_mnemonic mnemonic; + + /* A bit mask of which of the five pipes this instruction + is compatible with: + X0 0x01 + X1 0x02 + Y0 0x04 + Y1 0x08 + Y2 0x10 */ + unsigned char pipes; + + /* How many operands are there? */ + unsigned char num_operands; + + /* Which register does this write implicitly, or TREG_ZERO if none? */ + unsigned char implicitly_written_register; + + /* Can this be bundled with other instructions (almost always true). */ + unsigned char can_bundle; + + /* The description of the operands. Each of these is an + * index into the tilepro_operands[] table. */ + unsigned char operands[TILEPRO_NUM_PIPELINE_ENCODINGS][TILEPRO_MAX_OPERANDS]; + +}; + +extern const struct tilepro_opcode tilepro_opcodes[]; + + +/* Used for non-textual disassembly into structs. */ +struct tilepro_decoded_instruction +{ + const struct tilepro_opcode *opcode; + const struct tilepro_operand *operands[TILEPRO_MAX_OPERANDS]; + int operand_values[TILEPRO_MAX_OPERANDS]; +}; + + +/* Disassemble a bundle into a struct for machine processing. */ +extern int parse_insn_tilepro(tilepro_bundle_bits bits, + unsigned int pc, + struct tilepro_decoded_instruction + decoded[TILEPRO_MAX_INSTRUCTIONS_PER_BUNDLE]); + + +/* Given a set of bundle bits and a specific pipe, returns which + * instruction the bundle contains in that pipe. + */ +extern const struct tilepro_opcode * +find_opcode(tilepro_bundle_bits bits, tilepro_pipeline pipe); + + + +#endif /* opcode_tilepro_h */ diff --git a/arch/tile/include/asm/tile-desc_64.h b/arch/tile/include/asm/tile-desc_64.h new file mode 100644 index 00000000..1819efcb --- /dev/null +++ b/arch/tile/include/asm/tile-desc_64.h @@ -0,0 +1,483 @@ +/* TILE-Gx opcode information. + * + * Copyright 2011 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * + * + * + * + */ + +#ifndef opcode_tile_h +#define opcode_tile_h + +#include <arch/opcode.h> + + +enum +{ + TILEGX_MAX_OPERANDS = 4 /* bfexts */ +}; + +typedef enum +{ + TILEGX_OPC_BPT, + TILEGX_OPC_INFO, + TILEGX_OPC_INFOL, + TILEGX_OPC_MOVE, + TILEGX_OPC_MOVEI, + TILEGX_OPC_MOVELI, + TILEGX_OPC_PREFETCH, + TILEGX_OPC_PREFETCH_ADD_L1, + TILEGX_OPC_PREFETCH_ADD_L1_FAULT, + TILEGX_OPC_PREFETCH_ADD_L2, + TILEGX_OPC_PREFETCH_ADD_L2_FAULT, + TILEGX_OPC_PREFETCH_ADD_L3, + TILEGX_OPC_PREFETCH_ADD_L3_FAULT, + TILEGX_OPC_PREFETCH_L1, + TILEGX_OPC_PREFETCH_L1_FAULT, + TILEGX_OPC_PREFETCH_L2, + TILEGX_OPC_PREFETCH_L2_FAULT, + TILEGX_OPC_PREFETCH_L3, + TILEGX_OPC_PREFETCH_L3_FAULT, + TILEGX_OPC_RAISE, + TILEGX_OPC_ADD, + TILEGX_OPC_ADDI, + TILEGX_OPC_ADDLI, + TILEGX_OPC_ADDX, + TILEGX_OPC_ADDXI, + TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXSC, + TILEGX_OPC_AND, + TILEGX_OPC_ANDI, + TILEGX_OPC_BEQZ, + TILEGX_OPC_BEQZT, + TILEGX_OPC_BFEXTS, + TILEGX_OPC_BFEXTU, + TILEGX_OPC_BFINS, + TILEGX_OPC_BGEZ, + TILEGX_OPC_BGEZT, + TILEGX_OPC_BGTZ, + TILEGX_OPC_BGTZT, + TILEGX_OPC_BLBC, + TILEGX_OPC_BLBCT, + TILEGX_OPC_BLBS, + TILEGX_OPC_BLBST, + TILEGX_OPC_BLEZ, + TILEGX_OPC_BLEZT, + TILEGX_OPC_BLTZ, + TILEGX_OPC_BLTZT, + TILEGX_OPC_BNEZ, + TILEGX_OPC_BNEZT, + TILEGX_OPC_CLZ, + TILEGX_OPC_CMOVEQZ, + TILEGX_OPC_CMOVNEZ, + TILEGX_OPC_CMPEQ, + TILEGX_OPC_CMPEQI, + TILEGX_OPC_CMPEXCH, + TILEGX_OPC_CMPEXCH4, + TILEGX_OPC_CMPLES, + TILEGX_OPC_CMPLEU, + TILEGX_OPC_CMPLTS, + TILEGX_OPC_CMPLTSI, + TILEGX_OPC_CMPLTU, + TILEGX_OPC_CMPLTUI, + TILEGX_OPC_CMPNE, + TILEGX_OPC_CMUL, + TILEGX_OPC_CMULA, + TILEGX_OPC_CMULAF, + TILEGX_OPC_CMULF, + TILEGX_OPC_CMULFR, + TILEGX_OPC_CMULH, + TILEGX_OPC_CMULHR, + TILEGX_OPC_CRC32_32, + TILEGX_OPC_CRC32_8, + TILEGX_OPC_CTZ, + TILEGX_OPC_DBLALIGN, + TILEGX_OPC_DBLALIGN2, + TILEGX_OPC_DBLALIGN4, + TILEGX_OPC_DBLALIGN6, + TILEGX_OPC_DRAIN, + TILEGX_OPC_DTLBPR, + TILEGX_OPC_EXCH, + TILEGX_OPC_EXCH4, + TILEGX_OPC_FDOUBLE_ADD_FLAGS, + TILEGX_OPC_FDOUBLE_ADDSUB, + TILEGX_OPC_FDOUBLE_MUL_FLAGS, + TILEGX_OPC_FDOUBLE_PACK1, + TILEGX_OPC_FDOUBLE_PACK2, + TILEGX_OPC_FDOUBLE_SUB_FLAGS, + TILEGX_OPC_FDOUBLE_UNPACK_MAX, + TILEGX_OPC_FDOUBLE_UNPACK_MIN, + TILEGX_OPC_FETCHADD, + TILEGX_OPC_FETCHADD4, + TILEGX_OPC_FETCHADDGEZ, + TILEGX_OPC_FETCHADDGEZ4, + TILEGX_OPC_FETCHAND, + TILEGX_OPC_FETCHAND4, + TILEGX_OPC_FETCHOR, + TILEGX_OPC_FETCHOR4, + TILEGX_OPC_FINV, + TILEGX_OPC_FLUSH, + TILEGX_OPC_FLUSHWB, + TILEGX_OPC_FNOP, + TILEGX_OPC_FSINGLE_ADD1, + TILEGX_OPC_FSINGLE_ADDSUB2, + TILEGX_OPC_FSINGLE_MUL1, + TILEGX_OPC_FSINGLE_MUL2, + TILEGX_OPC_FSINGLE_PACK1, + TILEGX_OPC_FSINGLE_PACK2, + TILEGX_OPC_FSINGLE_SUB1, + TILEGX_OPC_ICOH, + TILEGX_OPC_ILL, + TILEGX_OPC_INV, + TILEGX_OPC_IRET, + TILEGX_OPC_J, + TILEGX_OPC_JAL, + TILEGX_OPC_JALR, + TILEGX_OPC_JALRP, + TILEGX_OPC_JR, + TILEGX_OPC_JRP, + TILEGX_OPC_LD, + TILEGX_OPC_LD1S, + TILEGX_OPC_LD1S_ADD, + TILEGX_OPC_LD1U, + TILEGX_OPC_LD1U_ADD, + TILEGX_OPC_LD2S, + TILEGX_OPC_LD2S_ADD, + TILEGX_OPC_LD2U, + TILEGX_OPC_LD2U_ADD, + TILEGX_OPC_LD4S, + TILEGX_OPC_LD4S_ADD, + TILEGX_OPC_LD4U, + TILEGX_OPC_LD4U_ADD, + TILEGX_OPC_LD_ADD, + TILEGX_OPC_LDNA, + TILEGX_OPC_LDNA_ADD, + TILEGX_OPC_LDNT, + TILEGX_OPC_LDNT1S, + TILEGX_OPC_LDNT1S_ADD, + TILEGX_OPC_LDNT1U, + TILEGX_OPC_LDNT1U_ADD, + TILEGX_OPC_LDNT2S, + TILEGX_OPC_LDNT2S_ADD, + TILEGX_OPC_LDNT2U, + TILEGX_OPC_LDNT2U_ADD, + TILEGX_OPC_LDNT4S, + TILEGX_OPC_LDNT4S_ADD, + TILEGX_OPC_LDNT4U, + TILEGX_OPC_LDNT4U_ADD, + TILEGX_OPC_LDNT_ADD, + TILEGX_OPC_LNK, + TILEGX_OPC_MF, + TILEGX_OPC_MFSPR, + TILEGX_OPC_MM, + TILEGX_OPC_MNZ, + TILEGX_OPC_MTSPR, + TILEGX_OPC_MUL_HS_HS, + TILEGX_OPC_MUL_HS_HU, + TILEGX_OPC_MUL_HS_LS, + TILEGX_OPC_MUL_HS_LU, + TILEGX_OPC_MUL_HU_HU, + TILEGX_OPC_MUL_HU_LS, + TILEGX_OPC_MUL_HU_LU, + TILEGX_OPC_MUL_LS_LS, + TILEGX_OPC_MUL_LS_LU, + TILEGX_OPC_MUL_LU_LU, + TILEGX_OPC_MULA_HS_HS, + TILEGX_OPC_MULA_HS_HU, + TILEGX_OPC_MULA_HS_LS, + TILEGX_OPC_MULA_HS_LU, + TILEGX_OPC_MULA_HU_HU, + TILEGX_OPC_MULA_HU_LS, + TILEGX_OPC_MULA_HU_LU, + TILEGX_OPC_MULA_LS_LS, + TILEGX_OPC_MULA_LS_LU, + TILEGX_OPC_MULA_LU_LU, + TILEGX_OPC_MULAX, + TILEGX_OPC_MULX, + TILEGX_OPC_MZ, + TILEGX_OPC_NAP, + TILEGX_OPC_NOP, + TILEGX_OPC_NOR, + TILEGX_OPC_OR, + TILEGX_OPC_ORI, + TILEGX_OPC_PCNT, + TILEGX_OPC_REVBITS, + TILEGX_OPC_REVBYTES, + TILEGX_OPC_ROTL, + TILEGX_OPC_ROTLI, + TILEGX_OPC_SHL, + TILEGX_OPC_SHL16INSLI, + TILEGX_OPC_SHL1ADD, + TILEGX_OPC_SHL1ADDX, + TILEGX_OPC_SHL2ADD, + TILEGX_OPC_SHL2ADDX, + TILEGX_OPC_SHL3ADD, + TILEGX_OPC_SHL3ADDX, + TILEGX_OPC_SHLI, + TILEGX_OPC_SHLX, + TILEGX_OPC_SHLXI, + TILEGX_OPC_SHRS, + TILEGX_OPC_SHRSI, + TILEGX_OPC_SHRU, + TILEGX_OPC_SHRUI, + TILEGX_OPC_SHRUX, + TILEGX_OPC_SHRUXI, + TILEGX_OPC_SHUFFLEBYTES, + TILEGX_OPC_ST, + TILEGX_OPC_ST1, + TILEGX_OPC_ST1_ADD, + TILEGX_OPC_ST2, + TILEGX_OPC_ST2_ADD, + TILEGX_OPC_ST4, + TILEGX_OPC_ST4_ADD, + TILEGX_OPC_ST_ADD, + TILEGX_OPC_STNT, + TILEGX_OPC_STNT1, + TILEGX_OPC_STNT1_ADD, + TILEGX_OPC_STNT2, + TILEGX_OPC_STNT2_ADD, + TILEGX_OPC_STNT4, + TILEGX_OPC_STNT4_ADD, + TILEGX_OPC_STNT_ADD, + TILEGX_OPC_SUB, + TILEGX_OPC_SUBX, + TILEGX_OPC_SUBXSC, + TILEGX_OPC_SWINT0, + TILEGX_OPC_SWINT1, + TILEGX_OPC_SWINT2, + TILEGX_OPC_SWINT3, + TILEGX_OPC_TBLIDXB0, + TILEGX_OPC_TBLIDXB1, + TILEGX_OPC_TBLIDXB2, + TILEGX_OPC_TBLIDXB3, + TILEGX_OPC_V1ADD, + TILEGX_OPC_V1ADDI, + TILEGX_OPC_V1ADDUC, + TILEGX_OPC_V1ADIFFU, + TILEGX_OPC_V1AVGU, + TILEGX_OPC_V1CMPEQ, + TILEGX_OPC_V1CMPEQI, + TILEGX_OPC_V1CMPLES, + TILEGX_OPC_V1CMPLEU, + TILEGX_OPC_V1CMPLTS, + TILEGX_OPC_V1CMPLTSI, + TILEGX_OPC_V1CMPLTU, + TILEGX_OPC_V1CMPLTUI, + TILEGX_OPC_V1CMPNE, + TILEGX_OPC_V1DDOTPU, + TILEGX_OPC_V1DDOTPUA, + TILEGX_OPC_V1DDOTPUS, + TILEGX_OPC_V1DDOTPUSA, + TILEGX_OPC_V1DOTP, + TILEGX_OPC_V1DOTPA, + TILEGX_OPC_V1DOTPU, + TILEGX_OPC_V1DOTPUA, + TILEGX_OPC_V1DOTPUS, + TILEGX_OPC_V1DOTPUSA, + TILEGX_OPC_V1INT_H, + TILEGX_OPC_V1INT_L, + TILEGX_OPC_V1MAXU, + TILEGX_OPC_V1MAXUI, + TILEGX_OPC_V1MINU, + TILEGX_OPC_V1MINUI, + TILEGX_OPC_V1MNZ, + TILEGX_OPC_V1MULTU, + TILEGX_OPC_V1MULU, + TILEGX_OPC_V1MULUS, + TILEGX_OPC_V1MZ, + TILEGX_OPC_V1SADAU, + TILEGX_OPC_V1SADU, + TILEGX_OPC_V1SHL, + TILEGX_OPC_V1SHLI, + TILEGX_OPC_V1SHRS, + TILEGX_OPC_V1SHRSI, + TILEGX_OPC_V1SHRU, + TILEGX_OPC_V1SHRUI, + TILEGX_OPC_V1SUB, + TILEGX_OPC_V1SUBUC, + TILEGX_OPC_V2ADD, + TILEGX_OPC_V2ADDI, + TILEGX_OPC_V2ADDSC, + TILEGX_OPC_V2ADIFFS, + TILEGX_OPC_V2AVGS, + TILEGX_OPC_V2CMPEQ, + TILEGX_OPC_V2CMPEQI, + TILEGX_OPC_V2CMPLES, + TILEGX_OPC_V2CMPLEU, + TILEGX_OPC_V2CMPLTS, + TILEGX_OPC_V2CMPLTSI, + TILEGX_OPC_V2CMPLTU, + TILEGX_OPC_V2CMPLTUI, + TILEGX_OPC_V2CMPNE, + TILEGX_OPC_V2DOTP, + TILEGX_OPC_V2DOTPA, + TILEGX_OPC_V2INT_H, + TILEGX_OPC_V2INT_L, + TILEGX_OPC_V2MAXS, + TILEGX_OPC_V2MAXSI, + TILEGX_OPC_V2MINS, + TILEGX_OPC_V2MINSI, + TILEGX_OPC_V2MNZ, + TILEGX_OPC_V2MULFSC, + TILEGX_OPC_V2MULS, + TILEGX_OPC_V2MULTS, + TILEGX_OPC_V2MZ, + TILEGX_OPC_V2PACKH, + TILEGX_OPC_V2PACKL, + TILEGX_OPC_V2PACKUC, + TILEGX_OPC_V2SADAS, + TILEGX_OPC_V2SADAU, + TILEGX_OPC_V2SADS, + TILEGX_OPC_V2SADU, + TILEGX_OPC_V2SHL, + TILEGX_OPC_V2SHLI, + TILEGX_OPC_V2SHLSC, + TILEGX_OPC_V2SHRS, + TILEGX_OPC_V2SHRSI, + TILEGX_OPC_V2SHRU, + TILEGX_OPC_V2SHRUI, + TILEGX_OPC_V2SUB, + TILEGX_OPC_V2SUBSC, + TILEGX_OPC_V4ADD, + TILEGX_OPC_V4ADDSC, + TILEGX_OPC_V4INT_H, + TILEGX_OPC_V4INT_L, + TILEGX_OPC_V4PACKSC, + TILEGX_OPC_V4SHL, + TILEGX_OPC_V4SHLSC, + TILEGX_OPC_V4SHRS, + TILEGX_OPC_V4SHRU, + TILEGX_OPC_V4SUB, + TILEGX_OPC_V4SUBSC, + TILEGX_OPC_WH64, + TILEGX_OPC_XOR, + TILEGX_OPC_XORI, + TILEGX_OPC_NONE +} tilegx_mnemonic; + + + +typedef enum +{ + TILEGX_PIPELINE_X0, + TILEGX_PIPELINE_X1, + TILEGX_PIPELINE_Y0, + TILEGX_PIPELINE_Y1, + TILEGX_PIPELINE_Y2, +} tilegx_pipeline; + +#define tilegx_is_x_pipeline(p) ((int)(p) <= (int)TILEGX_PIPELINE_X1) + +typedef enum +{ + TILEGX_OP_TYPE_REGISTER, + TILEGX_OP_TYPE_IMMEDIATE, + TILEGX_OP_TYPE_ADDRESS, + TILEGX_OP_TYPE_SPR +} tilegx_operand_type; + +struct tilegx_operand +{ + /* Is this operand a register, immediate or address? */ + tilegx_operand_type type; + + /* The default relocation type for this operand. */ + signed int default_reloc : 16; + + /* How many bits is this value? (used for range checking) */ + unsigned int num_bits : 5; + + /* Is the value signed? (used for range checking) */ + unsigned int is_signed : 1; + + /* Is this operand a source register? */ + unsigned int is_src_reg : 1; + + /* Is this operand written? (i.e. is it a destination register) */ + unsigned int is_dest_reg : 1; + + /* Is this operand PC-relative? */ + unsigned int is_pc_relative : 1; + + /* By how many bits do we right shift the value before inserting? */ + unsigned int rightshift : 2; + + /* Return the bits for this operand to be ORed into an existing bundle. */ + tilegx_bundle_bits (*insert) (int op); + + /* Extract this operand and return it. */ + unsigned int (*extract) (tilegx_bundle_bits bundle); +}; + + +extern const struct tilegx_operand tilegx_operands[]; + +/* One finite-state machine per pipe for rapid instruction decoding. */ +extern const unsigned short * const +tilegx_bundle_decoder_fsms[TILEGX_NUM_PIPELINE_ENCODINGS]; + + +struct tilegx_opcode +{ + /* The opcode mnemonic, e.g. "add" */ + const char *name; + + /* The enum value for this mnemonic. */ + tilegx_mnemonic mnemonic; + + /* A bit mask of which of the five pipes this instruction + is compatible with: + X0 0x01 + X1 0x02 + Y0 0x04 + Y1 0x08 + Y2 0x10 */ + unsigned char pipes; + + /* How many operands are there? */ + unsigned char num_operands; + + /* Which register does this write implicitly, or TREG_ZERO if none? */ + unsigned char implicitly_written_register; + + /* Can this be bundled with other instructions (almost always true). */ + unsigned char can_bundle; + + /* The description of the operands. Each of these is an + * index into the tilegx_operands[] table. */ + unsigned char operands[TILEGX_NUM_PIPELINE_ENCODINGS][TILEGX_MAX_OPERANDS]; + +}; + +extern const struct tilegx_opcode tilegx_opcodes[]; + +/* Used for non-textual disassembly into structs. */ +struct tilegx_decoded_instruction +{ + const struct tilegx_opcode *opcode; + const struct tilegx_operand *operands[TILEGX_MAX_OPERANDS]; + long long operand_values[TILEGX_MAX_OPERANDS]; +}; + + +/* Disassemble a bundle into a struct for machine processing. */ +extern int parse_insn_tilegx(tilegx_bundle_bits bits, + unsigned long long pc, + struct tilegx_decoded_instruction + decoded[TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE]); + + + +#endif /* opcode_tilegx_h */ diff --git a/arch/tile/include/asm/timex.h b/arch/tile/include/asm/timex.h new file mode 100644 index 00000000..dc987d53 --- /dev/null +++ b/arch/tile/include/asm/timex.h @@ -0,0 +1,52 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef _ASM_TILE_TIMEX_H +#define _ASM_TILE_TIMEX_H + +/* + * This rate should be a multiple of the possible HZ values (100, 250, 1000) + * and a fraction of the possible hardware timer frequencies. Our timer + * frequency is highly tunable but also quite precise, so for the primary use + * of this value (setting ACT_HZ from HZ) we just pick a value that causes + * ACT_HZ to be set to HZ. We make the value somewhat large just to be + * more robust in case someone tries out a new value of HZ. + */ +#define CLOCK_TICK_RATE 1000000 + +typedef unsigned long long cycles_t; + +#if CHIP_HAS_SPLIT_CYCLE() +cycles_t get_cycles(void); +#define get_cycles_low() __insn_mfspr(SPR_CYCLE_LOW) +#else +static inline cycles_t get_cycles(void) +{ + return __insn_mfspr(SPR_CYCLE); +} +#define get_cycles_low() __insn_mfspr(SPR_CYCLE) /* just get all 64 bits */ +#endif + +cycles_t get_clock_rate(void); + +/* Convert nanoseconds to core clock cycles. */ +cycles_t ns2cycles(unsigned long nsecs); + +/* Called at cpu initialization to set some low-level constants. */ +void setup_clock(void); + +/* Called at cpu initialization to start the tile-timer clock device. */ +void setup_tile_timer(void); + +#endif /* _ASM_TILE_TIMEX_H */ diff --git a/arch/tile/include/asm/tlb.h b/arch/tile/include/asm/tlb.h new file mode 100644 index 00000000..4a891a1a --- /dev/null +++ b/arch/tile/include/asm/tlb.h @@ -0,0 +1,25 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef _ASM_TILE_TLB_H +#define _ASM_TILE_TLB_H + +#define tlb_start_vma(tlb, vma) do { } while (0) +#define tlb_end_vma(tlb, vma) do { } while (0) +#define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0) +#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm) + +#include <asm-generic/tlb.h> + +#endif /* _ASM_TILE_TLB_H */ diff --git a/arch/tile/include/asm/tlbflush.h b/arch/tile/include/asm/tlbflush.h new file mode 100644 index 00000000..96199d21 --- /dev/null +++ b/arch/tile/include/asm/tlbflush.h @@ -0,0 +1,128 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef _ASM_TILE_TLBFLUSH_H +#define _ASM_TILE_TLBFLUSH_H + +#include <linux/mm.h> +#include <linux/sched.h> +#include <linux/smp.h> +#include <asm/cacheflush.h> +#include <asm/page.h> +#include <hv/hypervisor.h> + +/* + * Rather than associating each mm with its own ASID, we just use + * ASIDs to allow us to lazily flush the TLB when we switch mms. + * This way we only have to do an actual TLB flush on mm switch + * every time we wrap ASIDs, not every single time we switch. + * + * FIXME: We might improve performance by keeping ASIDs around + * properly, though since the hypervisor direct-maps VAs to TSB + * entries, we're likely to have lost at least the executable page + * mappings by the time we switch back to the original mm. + */ +DECLARE_PER_CPU(int, current_asid); + +/* The hypervisor tells us what ASIDs are available to us. */ +extern int min_asid, max_asid; + +static inline unsigned long hv_page_size(const struct vm_area_struct *vma) +{ + return (vma->vm_flags & VM_HUGETLB) ? HPAGE_SIZE : PAGE_SIZE; +} + +/* Pass as vma pointer for non-executable mapping, if no vma available. */ +#define FLUSH_NONEXEC ((const struct vm_area_struct *)-1UL) + +/* Flush a single user page on this cpu. */ +static inline void local_flush_tlb_page(const struct vm_area_struct *vma, + unsigned long addr, + unsigned long page_size) +{ + int rc = hv_flush_page(addr, page_size); + if (rc < 0) + panic("hv_flush_page(%#lx,%#lx) failed: %d", + addr, page_size, rc); + if (!vma || (vma != FLUSH_NONEXEC && (vma->vm_flags & VM_EXEC))) + __flush_icache(); +} + +/* Flush range of user pages on this cpu. */ +static inline void local_flush_tlb_pages(const struct vm_area_struct *vma, + unsigned long addr, + unsigned long page_size, + unsigned long len) +{ + int rc = hv_flush_pages(addr, page_size, len); + if (rc < 0) + panic("hv_flush_pages(%#lx,%#lx,%#lx) failed: %d", + addr, page_size, len, rc); + if (!vma || (vma != FLUSH_NONEXEC && (vma->vm_flags & VM_EXEC))) + __flush_icache(); +} + +/* Flush all user pages on this cpu. */ +static inline void local_flush_tlb(void) +{ + int rc = hv_flush_all(1); /* preserve global mappings */ + if (rc < 0) + panic("hv_flush_all(1) failed: %d", rc); + __flush_icache(); +} + +/* + * Global pages have to be flushed a bit differently. Not a real + * performance problem because this does not happen often. + */ +static inline void local_flush_tlb_all(void) +{ + int i; + for (i = 0; ; ++i) { + HV_VirtAddrRange r = hv_inquire_virtual(i); + if (r.size == 0) + break; + local_flush_tlb_pages(NULL, r.start, PAGE_SIZE, r.size); + local_flush_tlb_pages(NULL, r.start, HPAGE_SIZE, r.size); + } +} + +/* + * TLB flushing: + * + * - flush_tlb() flushes the current mm struct TLBs + * - flush_tlb_all() flushes all processes TLBs + * - flush_tlb_mm(mm) flushes the specified mm context TLB's + * - flush_tlb_page(vma, vmaddr) flushes one page + * - flush_tlb_range(vma, start, end) flushes a range of pages + * - flush_tlb_kernel_range(start, end) flushes a range of kernel pages + * - flush_tlb_others(cpumask, mm, va) flushes TLBs on other cpus + * + * Here (as in vm_area_struct), "end" means the first byte after + * our end address. + */ + +extern void flush_tlb_all(void); +extern void flush_tlb_kernel_range(unsigned long start, unsigned long end); +extern void flush_tlb_current_task(void); +extern void flush_tlb_mm(struct mm_struct *); +extern void flush_tlb_page(const struct vm_area_struct *, unsigned long); +extern void flush_tlb_page_mm(const struct vm_area_struct *, + struct mm_struct *, unsigned long); +extern void flush_tlb_range(const struct vm_area_struct *, + unsigned long start, unsigned long end); + +#define flush_tlb() flush_tlb_current_task() + +#endif /* _ASM_TILE_TLBFLUSH_H */ diff --git a/arch/tile/include/asm/topology.h b/arch/tile/include/asm/topology.h new file mode 100644 index 00000000..6fdd0c86 --- /dev/null +++ b/arch/tile/include/asm/topology.h @@ -0,0 +1,124 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef _ASM_TILE_TOPOLOGY_H +#define _ASM_TILE_TOPOLOGY_H + +#ifdef CONFIG_NUMA + +#include <linux/cpumask.h> + +/* Mappings between logical cpu number and node number. */ +extern struct cpumask node_2_cpu_mask[]; +extern char cpu_2_node[]; + +/* Returns the number of the node containing CPU 'cpu'. */ +static inline int cpu_to_node(int cpu) +{ + return cpu_2_node[cpu]; +} + +/* + * Returns the number of the node containing Node 'node'. + * This architecture is flat, so it is a pretty simple function! + */ +#define parent_node(node) (node) + +/* Returns a bitmask of CPUs on Node 'node'. */ +static inline const struct cpumask *cpumask_of_node(int node) +{ + return &node_2_cpu_mask[node]; +} + +/* For now, use numa node -1 for global allocation. */ +#define pcibus_to_node(bus) ((void)(bus), -1) + +/* + * TILE architecture has many cores integrated in one processor, so we need + * setup bigger balance_interval for both CPU/NODE scheduling domains to + * reduce process scheduling costs. + */ + +/* sched_domains SD_CPU_INIT for TILE architecture */ +#define SD_CPU_INIT (struct sched_domain) { \ + .min_interval = 4, \ + .max_interval = 128, \ + .busy_factor = 64, \ + .imbalance_pct = 125, \ + .cache_nice_tries = 1, \ + .busy_idx = 2, \ + .idle_idx = 1, \ + .newidle_idx = 0, \ + .wake_idx = 0, \ + .forkexec_idx = 0, \ + \ + .flags = 1*SD_LOAD_BALANCE \ + | 1*SD_BALANCE_NEWIDLE \ + | 1*SD_BALANCE_EXEC \ + | 1*SD_BALANCE_FORK \ + | 0*SD_BALANCE_WAKE \ + | 0*SD_WAKE_AFFINE \ + | 0*SD_PREFER_LOCAL \ + | 0*SD_SHARE_CPUPOWER \ + | 0*SD_SHARE_PKG_RESOURCES \ + | 0*SD_SERIALIZE \ + , \ + .last_balance = jiffies, \ + .balance_interval = 32, \ +} + +/* sched_domains SD_NODE_INIT for TILE architecture */ +#define SD_NODE_INIT (struct sched_domain) { \ + .min_interval = 16, \ + .max_interval = 512, \ + .busy_factor = 32, \ + .imbalance_pct = 125, \ + .cache_nice_tries = 1, \ + .busy_idx = 3, \ + .idle_idx = 1, \ + .newidle_idx = 2, \ + .wake_idx = 1, \ + .flags = 1*SD_LOAD_BALANCE \ + | 1*SD_BALANCE_NEWIDLE \ + | 1*SD_BALANCE_EXEC \ + | 1*SD_BALANCE_FORK \ + | 0*SD_BALANCE_WAKE \ + | 0*SD_WAKE_AFFINE \ + | 0*SD_PREFER_LOCAL \ + | 0*SD_SHARE_CPUPOWER \ + | 0*SD_SHARE_PKG_RESOURCES \ + | 1*SD_SERIALIZE \ + , \ + .last_balance = jiffies, \ + .balance_interval = 128, \ +} + +/* By definition, we create nodes based on online memory. */ +#define node_has_online_mem(nid) 1 + +#endif /* CONFIG_NUMA */ + +#include <asm-generic/topology.h> + +#ifdef CONFIG_SMP +#define topology_physical_package_id(cpu) ((void)(cpu), 0) +#define topology_core_id(cpu) (cpu) +#define topology_core_cpumask(cpu) ((void)(cpu), cpu_online_mask) +#define topology_thread_cpumask(cpu) cpumask_of(cpu) + +/* indicates that pointers to the topology struct cpumask maps are valid */ +#define arch_provides_topology_pointers yes +#endif + +#endif /* _ASM_TILE_TOPOLOGY_H */ diff --git a/arch/tile/include/asm/traps.h b/arch/tile/include/asm/traps.h new file mode 100644 index 00000000..e28c3df4 --- /dev/null +++ b/arch/tile/include/asm/traps.h @@ -0,0 +1,74 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef _ASM_TILE_TRAPS_H +#define _ASM_TILE_TRAPS_H + +#include <arch/chip.h> + +/* mm/fault.c */ +void do_page_fault(struct pt_regs *, int fault_num, + unsigned long address, unsigned long write); +#if CHIP_HAS_TILE_DMA() || CHIP_HAS_SN_PROC() +void do_async_page_fault(struct pt_regs *); +#endif + +#ifndef __tilegx__ +/* + * We return this structure in registers to avoid having to write + * additional save/restore code in the intvec.S caller. + */ +struct intvec_state { + void *handler; + unsigned long vecnum; + unsigned long fault_num; + unsigned long info; + unsigned long retval; +}; +struct intvec_state do_page_fault_ics(struct pt_regs *regs, int fault_num, + unsigned long address, + unsigned long info); +#endif + +/* kernel/traps.c */ +void do_trap(struct pt_regs *, int fault_num, unsigned long reason); +void kernel_double_fault(int dummy, ulong pc, ulong lr, ulong sp, ulong r52); + +/* kernel/time.c */ +void do_timer_interrupt(struct pt_regs *, int fault_num); + +/* kernel/messaging.c */ +void hv_message_intr(struct pt_regs *, int intnum); + +/* kernel/irq.c */ +void tile_dev_intr(struct pt_regs *, int intnum); + +#ifdef CONFIG_HARDWALL +/* kernel/hardwall.c */ +void do_hardwall_trap(struct pt_regs *, int fault_num); +#endif + +/* kernel/ptrace.c */ +void do_breakpoint(struct pt_regs *, int fault_num); + + +#ifdef __tilegx__ +/* kernel/single_step.c */ +void gx_singlestep_handle(struct pt_regs *, int fault_num); + +/* kernel/intvec_64.S */ +void fill_ra_stack(void); +#endif + +#endif /* _ASM_TILE_TRAPS_H */ diff --git a/arch/tile/include/asm/uaccess.h b/arch/tile/include/asm/uaccess.h new file mode 100644 index 00000000..ef34d2ca --- /dev/null +++ b/arch/tile/include/asm/uaccess.h @@ -0,0 +1,580 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef _ASM_TILE_UACCESS_H +#define _ASM_TILE_UACCESS_H + +/* + * User space memory access functions + */ +#include <linux/sched.h> +#include <linux/mm.h> +#include <asm-generic/uaccess-unaligned.h> +#include <asm/processor.h> +#include <asm/page.h> + +#define VERIFY_READ 0 +#define VERIFY_WRITE 1 + +/* + * The fs value determines whether argument validity checking should be + * performed or not. If get_fs() == USER_DS, checking is performed, with + * get_fs() == KERNEL_DS, checking is bypassed. + * + * For historical reasons, these macros are grossly misnamed. + */ +#define MAKE_MM_SEG(a) ((mm_segment_t) { (a) }) + +#define KERNEL_DS MAKE_MM_SEG(-1UL) +#define USER_DS MAKE_MM_SEG(PAGE_OFFSET) + +#define get_ds() (KERNEL_DS) +#define get_fs() (current_thread_info()->addr_limit) +#define set_fs(x) (current_thread_info()->addr_limit = (x)) + +#define segment_eq(a, b) ((a).seg == (b).seg) + +#ifndef __tilegx__ +/* + * We could allow mapping all 16 MB at 0xfc000000, but we set up a + * special hack in arch_setup_additional_pages() to auto-create a mapping + * for the first 16 KB, and it would seem strange to have different + * user-accessible semantics for memory at 0xfc000000 and above 0xfc004000. + */ +static inline int is_arch_mappable_range(unsigned long addr, + unsigned long size) +{ + return (addr >= MEM_USER_INTRPT && + addr < (MEM_USER_INTRPT + INTRPT_SIZE) && + size <= (MEM_USER_INTRPT + INTRPT_SIZE) - addr); +} +#define is_arch_mappable_range is_arch_mappable_range +#else +#define is_arch_mappable_range(addr, size) 0 +#endif + +/* + * Test whether a block of memory is a valid user space address. + * Returns 0 if the range is valid, nonzero otherwise. + */ +int __range_ok(unsigned long addr, unsigned long size); + +/** + * access_ok: - Checks if a user space pointer is valid + * @type: Type of access: %VERIFY_READ or %VERIFY_WRITE. Note that + * %VERIFY_WRITE is a superset of %VERIFY_READ - if it is safe + * to write to a block, it is always safe to read from it. + * @addr: User space pointer to start of block to check + * @size: Size of block to check + * + * Context: User context only. This function may sleep. + * + * Checks if a pointer to a block of memory in user space is valid. + * + * Returns true (nonzero) if the memory block may be valid, false (zero) + * if it is definitely invalid. + * + * Note that, depending on architecture, this function probably just + * checks that the pointer is in the user space range - after calling + * this function, memory access functions may still return -EFAULT. + */ +#define access_ok(type, addr, size) ({ \ + __chk_user_ptr(addr); \ + likely(__range_ok((unsigned long)(addr), (size)) == 0); \ +}) + +/* + * The exception table consists of pairs of addresses: the first is the + * address of an instruction that is allowed to fault, and the second is + * the address at which the program should continue. No registers are + * modified, so it is entirely up to the continuation code to figure out + * what to do. + * + * All the routines below use bits of fixup code that are out of line + * with the main instruction path. This means when everything is well, + * we don't even have to jump over them. Further, they do not intrude + * on our cache or tlb entries. + */ + +struct exception_table_entry { + unsigned long insn, fixup; +}; + +extern int fixup_exception(struct pt_regs *regs); + +/* + * We return the __get_user_N function results in a structure, + * thus in r0 and r1. If "err" is zero, "val" is the result + * of the read; otherwise, "err" is -EFAULT. + * + * We rarely need 8-byte values on a 32-bit architecture, but + * we size the structure to accommodate. In practice, for the + * the smaller reads, we can zero the high word for free, and + * the caller will ignore it by virtue of casting anyway. + */ +struct __get_user { + unsigned long long val; + int err; +}; + +/* + * FIXME: we should express these as inline extended assembler, since + * they're fundamentally just a variable dereference and some + * supporting exception_table gunk. Note that (a la i386) we can + * extend the copy_to_user and copy_from_user routines to call into + * such extended assembler routines, though we will have to use a + * different return code in that case (1, 2, or 4, rather than -EFAULT). + */ +extern struct __get_user __get_user_1(const void __user *); +extern struct __get_user __get_user_2(const void __user *); +extern struct __get_user __get_user_4(const void __user *); +extern struct __get_user __get_user_8(const void __user *); +extern int __put_user_1(long, void __user *); +extern int __put_user_2(long, void __user *); +extern int __put_user_4(long, void __user *); +extern int __put_user_8(long long, void __user *); + +/* Unimplemented routines to cause linker failures */ +extern struct __get_user __get_user_bad(void); +extern int __put_user_bad(void); + +/* + * Careful: we have to cast the result to the type of the pointer + * for sign reasons. + */ +/** + * __get_user: - Get a simple variable from user space, with less checking. + * @x: Variable to store result. + * @ptr: Source address, in user space. + * + * Context: User context only. This function may sleep. + * + * This macro copies a single simple variable from user space to kernel + * space. It supports simple types like char and int, but not larger + * data types like structures or arrays. + * + * @ptr must have pointer-to-simple-variable type, and the result of + * dereferencing @ptr must be assignable to @x without a cast. + * + * Returns zero on success, or -EFAULT on error. + * On error, the variable @x is set to zero. + * + * Caller must check the pointer with access_ok() before calling this + * function. + */ +#define __get_user(x, ptr) \ +({ struct __get_user __ret; \ + __typeof__(*(ptr)) const __user *__gu_addr = (ptr); \ + __chk_user_ptr(__gu_addr); \ + switch (sizeof(*(__gu_addr))) { \ + case 1: \ + __ret = __get_user_1(__gu_addr); \ + break; \ + case 2: \ + __ret = __get_user_2(__gu_addr); \ + break; \ + case 4: \ + __ret = __get_user_4(__gu_addr); \ + break; \ + case 8: \ + __ret = __get_user_8(__gu_addr); \ + break; \ + default: \ + __ret = __get_user_bad(); \ + break; \ + } \ + (x) = (__typeof__(*__gu_addr)) (__typeof__(*__gu_addr - *__gu_addr)) \ + __ret.val; \ + __ret.err; \ +}) + +/** + * __put_user: - Write a simple value into user space, with less checking. + * @x: Value to copy to user space. + * @ptr: Destination address, in user space. + * + * Context: User context only. This function may sleep. + * + * This macro copies a single simple value from kernel space to user + * space. It supports simple types like char and int, but not larger + * data types like structures or arrays. + * + * @ptr must have pointer-to-simple-variable type, and @x must be assignable + * to the result of dereferencing @ptr. + * + * Caller must check the pointer with access_ok() before calling this + * function. + * + * Returns zero on success, or -EFAULT on error. + * + * Implementation note: The "case 8" logic of casting to the type of + * the result of subtracting the value from itself is basically a way + * of keeping all integer types the same, but casting any pointers to + * ptrdiff_t, i.e. also an integer type. This way there are no + * questionable casts seen by the compiler on an ILP32 platform. + */ +#define __put_user(x, ptr) \ +({ \ + int __pu_err = 0; \ + __typeof__(*(ptr)) __user *__pu_addr = (ptr); \ + typeof(*__pu_addr) __pu_val = (x); \ + __chk_user_ptr(__pu_addr); \ + switch (sizeof(__pu_val)) { \ + case 1: \ + __pu_err = __put_user_1((long)__pu_val, __pu_addr); \ + break; \ + case 2: \ + __pu_err = __put_user_2((long)__pu_val, __pu_addr); \ + break; \ + case 4: \ + __pu_err = __put_user_4((long)__pu_val, __pu_addr); \ + break; \ + case 8: \ + __pu_err = \ + __put_user_8((__typeof__(__pu_val - __pu_val))__pu_val,\ + __pu_addr); \ + break; \ + default: \ + __pu_err = __put_user_bad(); \ + break; \ + } \ + __pu_err; \ +}) + +/* + * The versions of get_user and put_user without initial underscores + * check the address of their arguments to make sure they are not + * in kernel space. + */ +#define put_user(x, ptr) \ +({ \ + __typeof__(*(ptr)) __user *__Pu_addr = (ptr); \ + access_ok(VERIFY_WRITE, (__Pu_addr), sizeof(*(__Pu_addr))) ? \ + __put_user((x), (__Pu_addr)) : \ + -EFAULT; \ +}) + +#define get_user(x, ptr) \ +({ \ + __typeof__(*(ptr)) const __user *__Gu_addr = (ptr); \ + access_ok(VERIFY_READ, (__Gu_addr), sizeof(*(__Gu_addr))) ? \ + __get_user((x), (__Gu_addr)) : \ + ((x) = 0, -EFAULT); \ +}) + +/** + * __copy_to_user() - copy data into user space, with less checking. + * @to: Destination address, in user space. + * @from: Source address, in kernel space. + * @n: Number of bytes to copy. + * + * Context: User context only. This function may sleep. + * + * Copy data from kernel space to user space. Caller must check + * the specified block with access_ok() before calling this function. + * + * Returns number of bytes that could not be copied. + * On success, this will be zero. + * + * An alternate version - __copy_to_user_inatomic() - is designed + * to be called from atomic context, typically bracketed by calls + * to pagefault_disable() and pagefault_enable(). + */ +extern unsigned long __must_check __copy_to_user_inatomic( + void __user *to, const void *from, unsigned long n); + +static inline unsigned long __must_check +__copy_to_user(void __user *to, const void *from, unsigned long n) +{ + might_fault(); + return __copy_to_user_inatomic(to, from, n); +} + +static inline unsigned long __must_check +copy_to_user(void __user *to, const void *from, unsigned long n) +{ + if (access_ok(VERIFY_WRITE, to, n)) + n = __copy_to_user(to, from, n); + return n; +} + +/** + * __copy_from_user() - copy data from user space, with less checking. + * @to: Destination address, in kernel space. + * @from: Source address, in user space. + * @n: Number of bytes to copy. + * + * Context: User context only. This function may sleep. + * + * Copy data from user space to kernel space. Caller must check + * the specified block with access_ok() before calling this function. + * + * Returns number of bytes that could not be copied. + * On success, this will be zero. + * + * If some data could not be copied, this function will pad the copied + * data to the requested size using zero bytes. + * + * An alternate version - __copy_from_user_inatomic() - is designed + * to be called from atomic context, typically bracketed by calls + * to pagefault_disable() and pagefault_enable(). This version + * does *NOT* pad with zeros. + */ +extern unsigned long __must_check __copy_from_user_inatomic( + void *to, const void __user *from, unsigned long n); +extern unsigned long __must_check __copy_from_user_zeroing( + void *to, const void __user *from, unsigned long n); + +static inline unsigned long __must_check +__copy_from_user(void *to, const void __user *from, unsigned long n) +{ + might_fault(); + return __copy_from_user_zeroing(to, from, n); +} + +static inline unsigned long __must_check +_copy_from_user(void *to, const void __user *from, unsigned long n) +{ + if (access_ok(VERIFY_READ, from, n)) + n = __copy_from_user(to, from, n); + else + memset(to, 0, n); + return n; +} + +#ifdef CONFIG_DEBUG_COPY_FROM_USER +extern void copy_from_user_overflow(void) + __compiletime_warning("copy_from_user() size is not provably correct"); + +static inline unsigned long __must_check copy_from_user(void *to, + const void __user *from, + unsigned long n) +{ + int sz = __compiletime_object_size(to); + + if (likely(sz == -1 || sz >= n)) + n = _copy_from_user(to, from, n); + else + copy_from_user_overflow(); + + return n; +} +#else +#define copy_from_user _copy_from_user +#endif + +#ifdef __tilegx__ +/** + * __copy_in_user() - copy data within user space, with less checking. + * @to: Destination address, in user space. + * @from: Source address, in kernel space. + * @n: Number of bytes to copy. + * + * Context: User context only. This function may sleep. + * + * Copy data from user space to user space. Caller must check + * the specified blocks with access_ok() before calling this function. + * + * Returns number of bytes that could not be copied. + * On success, this will be zero. + */ +extern unsigned long __copy_in_user_inatomic( + void __user *to, const void __user *from, unsigned long n); + +static inline unsigned long __must_check +__copy_in_user(void __user *to, const void __user *from, unsigned long n) +{ + might_sleep(); + return __copy_in_user_inatomic(to, from, n); +} + +static inline unsigned long __must_check +copy_in_user(void __user *to, const void __user *from, unsigned long n) +{ + if (access_ok(VERIFY_WRITE, to, n) && access_ok(VERIFY_READ, from, n)) + n = __copy_in_user(to, from, n); + return n; +} +#endif + + +/** + * strlen_user: - Get the size of a string in user space. + * @str: The string to measure. + * + * Context: User context only. This function may sleep. + * + * Get the size of a NUL-terminated string in user space. + * + * Returns the size of the string INCLUDING the terminating NUL. + * On exception, returns 0. + * + * If there is a limit on the length of a valid string, you may wish to + * consider using strnlen_user() instead. + */ +extern long strnlen_user_asm(const char __user *str, long n); +static inline long __must_check strnlen_user(const char __user *str, long n) +{ + might_fault(); + return strnlen_user_asm(str, n); +} +#define strlen_user(str) strnlen_user(str, LONG_MAX) + +/** + * strncpy_from_user: - Copy a NUL terminated string from userspace, with less checking. + * @dst: Destination address, in kernel space. This buffer must be at + * least @count bytes long. + * @src: Source address, in user space. + * @count: Maximum number of bytes to copy, including the trailing NUL. + * + * Copies a NUL-terminated string from userspace to kernel space. + * Caller must check the specified block with access_ok() before calling + * this function. + * + * On success, returns the length of the string (not including the trailing + * NUL). + * + * If access to userspace fails, returns -EFAULT (some data may have been + * copied). + * + * If @count is smaller than the length of the string, copies @count bytes + * and returns @count. + */ +extern long strncpy_from_user_asm(char *dst, const char __user *src, long); +static inline long __must_check __strncpy_from_user( + char *dst, const char __user *src, long count) +{ + might_fault(); + return strncpy_from_user_asm(dst, src, count); +} +static inline long __must_check strncpy_from_user( + char *dst, const char __user *src, long count) +{ + if (access_ok(VERIFY_READ, src, 1)) + return __strncpy_from_user(dst, src, count); + return -EFAULT; +} + +/** + * clear_user: - Zero a block of memory in user space. + * @mem: Destination address, in user space. + * @len: Number of bytes to zero. + * + * Zero a block of memory in user space. + * + * Returns number of bytes that could not be cleared. + * On success, this will be zero. + */ +extern unsigned long clear_user_asm(void __user *mem, unsigned long len); +static inline unsigned long __must_check __clear_user( + void __user *mem, unsigned long len) +{ + might_fault(); + return clear_user_asm(mem, len); +} +static inline unsigned long __must_check clear_user( + void __user *mem, unsigned long len) +{ + if (access_ok(VERIFY_WRITE, mem, len)) + return __clear_user(mem, len); + return len; +} + +/** + * flush_user: - Flush a block of memory in user space from cache. + * @mem: Destination address, in user space. + * @len: Number of bytes to flush. + * + * Returns number of bytes that could not be flushed. + * On success, this will be zero. + */ +extern unsigned long flush_user_asm(void __user *mem, unsigned long len); +static inline unsigned long __must_check __flush_user( + void __user *mem, unsigned long len) +{ + int retval; + + might_fault(); + retval = flush_user_asm(mem, len); + mb_incoherent(); + return retval; +} + +static inline unsigned long __must_check flush_user( + void __user *mem, unsigned long len) +{ + if (access_ok(VERIFY_WRITE, mem, len)) + return __flush_user(mem, len); + return len; +} + +/** + * inv_user: - Invalidate a block of memory in user space from cache. + * @mem: Destination address, in user space. + * @len: Number of bytes to invalidate. + * + * Returns number of bytes that could not be invalidated. + * On success, this will be zero. + * + * Note that on Tile64, the "inv" operation is in fact a + * "flush and invalidate", so cache write-backs will occur prior + * to the cache being marked invalid. + */ +extern unsigned long inv_user_asm(void __user *mem, unsigned long len); +static inline unsigned long __must_check __inv_user( + void __user *mem, unsigned long len) +{ + int retval; + + might_fault(); + retval = inv_user_asm(mem, len); + mb_incoherent(); + return retval; +} +static inline unsigned long __must_check inv_user( + void __user *mem, unsigned long len) +{ + if (access_ok(VERIFY_WRITE, mem, len)) + return __inv_user(mem, len); + return len; +} + +/** + * finv_user: - Flush-inval a block of memory in user space from cache. + * @mem: Destination address, in user space. + * @len: Number of bytes to invalidate. + * + * Returns number of bytes that could not be flush-invalidated. + * On success, this will be zero. + */ +extern unsigned long finv_user_asm(void __user *mem, unsigned long len); +static inline unsigned long __must_check __finv_user( + void __user *mem, unsigned long len) +{ + int retval; + + might_fault(); + retval = finv_user_asm(mem, len); + mb_incoherent(); + return retval; +} +static inline unsigned long __must_check finv_user( + void __user *mem, unsigned long len) +{ + if (access_ok(VERIFY_WRITE, mem, len)) + return __finv_user(mem, len); + return len; +} + +#endif /* _ASM_TILE_UACCESS_H */ diff --git a/arch/tile/include/asm/unaligned.h b/arch/tile/include/asm/unaligned.h new file mode 100644 index 00000000..37dfbe59 --- /dev/null +++ b/arch/tile/include/asm/unaligned.h @@ -0,0 +1,39 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef _ASM_TILE_UNALIGNED_H +#define _ASM_TILE_UNALIGNED_H + +#include <linux/unaligned/le_struct.h> +#include <linux/unaligned/be_byteshift.h> +#include <linux/unaligned/generic.h> +#define get_unaligned __get_unaligned_le +#define put_unaligned __put_unaligned_le + +/* + * Is the kernel doing fixups of unaligned accesses? If <0, no kernel + * intervention occurs and SIGBUS is delivered with no data address + * info. If 0, the kernel single-steps the instruction to discover + * the data address to provide with the SIGBUS. If 1, the kernel does + * a fixup. + */ +extern int unaligned_fixup; + +/* Is the kernel printing on each unaligned fixup? */ +extern int unaligned_printk; + +/* Number of unaligned fixups performed */ +extern unsigned int unaligned_fixup_count; + +#endif /* _ASM_TILE_UNALIGNED_H */ diff --git a/arch/tile/include/asm/unistd.h b/arch/tile/include/asm/unistd.h new file mode 100644 index 00000000..f70bf1c5 --- /dev/null +++ b/arch/tile/include/asm/unistd.h @@ -0,0 +1,47 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#if !defined(_ASM_TILE_UNISTD_H) || defined(__SYSCALL) +#define _ASM_TILE_UNISTD_H + +#if !defined(__LP64__) || defined(__SYSCALL_COMPAT) +/* Use the flavor of this syscall that matches the 32-bit API better. */ +#define __ARCH_WANT_SYNC_FILE_RANGE2 +#endif + +/* Use the standard ABI for syscalls. */ +#include <asm-generic/unistd.h> + +/* Additional Tilera-specific syscalls. */ +#define __NR_flush_cache (__NR_arch_specific_syscall + 1) +__SYSCALL(__NR_flush_cache, sys_flush_cache) + +#ifndef __tilegx__ +/* "Fast" syscalls provide atomic support for 32-bit chips. */ +#define __NR_FAST_cmpxchg -1 +#define __NR_FAST_atomic_update -2 +#define __NR_FAST_cmpxchg64 -3 +#define __NR_cmpxchg_badaddr (__NR_arch_specific_syscall + 0) +__SYSCALL(__NR_cmpxchg_badaddr, sys_cmpxchg_badaddr) +#endif + +#ifdef __KERNEL__ +/* In compat mode, we use sys_llseek() for compat_sys_llseek(). */ +#ifdef CONFIG_COMPAT +#define __ARCH_WANT_SYS_LLSEEK +#endif +#define __ARCH_WANT_SYS_NEWFSTATAT +#endif + +#endif /* _ASM_TILE_UNISTD_H */ diff --git a/arch/tile/include/asm/user.h b/arch/tile/include/asm/user.h new file mode 100644 index 00000000..cbc8b4d5 --- /dev/null +++ b/arch/tile/include/asm/user.h @@ -0,0 +1,21 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + */ + +#ifndef _ASM_TILE_USER_H +#define _ASM_TILE_USER_H + +/* This header is for a.out file formats, which TILE does not support. */ + +#endif /* _ASM_TILE_USER_H */ diff --git a/arch/tile/include/asm/vga.h b/arch/tile/include/asm/vga.h new file mode 100644 index 00000000..7b46e754 --- /dev/null +++ b/arch/tile/include/asm/vga.h @@ -0,0 +1,39 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * Access to VGA videoram. + */ + +#ifndef _ASM_TILE_VGA_H +#define _ASM_TILE_VGA_H + +#include <asm/io.h> + +#define VT_BUF_HAVE_RW + +static inline void scr_writew(u16 val, volatile u16 *addr) +{ + __raw_writew(val, (volatile u16 __iomem *) addr); +} + +static inline u16 scr_readw(volatile const u16 *addr) +{ + return __raw_readw((volatile const u16 __iomem *) addr); +} + +#define vga_readb(a) readb((u8 __iomem *)(a)) +#define vga_writeb(v,a) writeb(v, (u8 __iomem *)(a)) + +#define VGA_MAP_MEM(x,s) ((unsigned long) ioremap(x, s)) + +#endif diff --git a/arch/tile/include/hv/drv_mshim_intf.h b/arch/tile/include/hv/drv_mshim_intf.h new file mode 100644 index 00000000..c6ef3bdc --- /dev/null +++ b/arch/tile/include/hv/drv_mshim_intf.h @@ -0,0 +1,50 @@ +/* + * Copyright 2011 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/** + * @file drv_mshim_intf.h + * Interface definitions for the Linux EDAC memory controller driver. + */ + +#ifndef _SYS_HV_INCLUDE_DRV_MSHIM_INTF_H +#define _SYS_HV_INCLUDE_DRV_MSHIM_INTF_H + +/** Number of memory controllers in the public API. */ +#define TILE_MAX_MSHIMS 4 + +/** Memory info under each memory controller. */ +struct mshim_mem_info +{ + uint64_t mem_size; /**< Total memory size in bytes. */ + uint8_t mem_type; /**< Memory type, DDR2 or DDR3. */ + uint8_t mem_ecc; /**< Memory supports ECC. */ +}; + +/** + * DIMM error structure. + * For now, only correctable errors are counted and the mshim doesn't record + * the error PA. HV takes panic upon uncorrectable errors. + */ +struct mshim_mem_error +{ + uint32_t sbe_count; /**< Number of single-bit errors. */ +}; + +/** Read this offset to get the memory info per mshim. */ +#define MSHIM_MEM_INFO_OFF 0x100 + +/** Read this offset to check DIMM error. */ +#define MSHIM_MEM_ERROR_OFF 0x200 + +#endif /* _SYS_HV_INCLUDE_DRV_MSHIM_INTF_H */ diff --git a/arch/tile/include/hv/drv_pcie_rc_intf.h b/arch/tile/include/hv/drv_pcie_rc_intf.h new file mode 100644 index 00000000..9bd2243b --- /dev/null +++ b/arch/tile/include/hv/drv_pcie_rc_intf.h @@ -0,0 +1,38 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/** + * @file drv_pcie_rc_intf.h + * Interface definitions for the PCIE Root Complex. + */ + +#ifndef _SYS_HV_DRV_PCIE_RC_INTF_H +#define _SYS_HV_DRV_PCIE_RC_INTF_H + +/** File offset for reading the interrupt base number used for PCIE legacy + interrupts and PLX Gen 1 requirement flag */ +#define PCIE_RC_CONFIG_MASK_OFF 0 + + +/** + * Structure used for obtaining PCIe config information, read from the PCIE + * subsystem /ctl file at initialization + */ +typedef struct pcie_rc_config +{ + int intr; /**< interrupt number used for downcall */ + int plx_gen1; /**< flag for PLX Gen 1 configuration */ +} pcie_rc_config_t; + +#endif /* _SYS_HV_DRV_PCIE_RC_INTF_H */ diff --git a/arch/tile/include/hv/drv_srom_intf.h b/arch/tile/include/hv/drv_srom_intf.h new file mode 100644 index 00000000..6395faa6 --- /dev/null +++ b/arch/tile/include/hv/drv_srom_intf.h @@ -0,0 +1,41 @@ +/* + * Copyright 2011 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/** + * @file drv_srom_intf.h + * Interface definitions for the SPI Flash ROM driver. + */ + +#ifndef _SYS_HV_INCLUDE_DRV_SROM_INTF_H +#define _SYS_HV_INCLUDE_DRV_SROM_INTF_H + +/** Read this offset to get the total device size. */ +#define SROM_TOTAL_SIZE_OFF 0xF0000000 + +/** Read this offset to get the device sector size. */ +#define SROM_SECTOR_SIZE_OFF 0xF0000004 + +/** Read this offset to get the device page size. */ +#define SROM_PAGE_SIZE_OFF 0xF0000008 + +/** Write this offset to flush any pending writes. */ +#define SROM_FLUSH_OFF 0xF1000000 + +/** Write this offset, plus the byte offset of the start of a sector, to + * erase a sector. Any write data is ignored, but there must be at least + * one byte of write data. Only applies when the driver is in MTD mode. + */ +#define SROM_ERASE_OFF 0xF2000000 + +#endif /* _SYS_HV_INCLUDE_DRV_SROM_INTF_H */ diff --git a/arch/tile/include/hv/drv_xgbe_impl.h b/arch/tile/include/hv/drv_xgbe_impl.h new file mode 100644 index 00000000..3a73b2b4 --- /dev/null +++ b/arch/tile/include/hv/drv_xgbe_impl.h @@ -0,0 +1,300 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/** + * @file drivers/xgbe/impl.h + * Implementation details for the NetIO library. + */ + +#ifndef __DRV_XGBE_IMPL_H__ +#define __DRV_XGBE_IMPL_H__ + +#include <hv/netio_errors.h> +#include <hv/netio_intf.h> +#include <hv/drv_xgbe_intf.h> + + +/** How many groups we have (log2). */ +#define LOG2_NUM_GROUPS (12) +/** How many groups we have. */ +#define NUM_GROUPS (1 << LOG2_NUM_GROUPS) + +/** Number of output requests we'll buffer per tile. */ +#define EPP_REQS_PER_TILE (32) + +/** Words used in an eDMA command without checksum acceleration. */ +#define EDMA_WDS_NO_CSUM 8 +/** Words used in an eDMA command with checksum acceleration. */ +#define EDMA_WDS_CSUM 10 +/** Total available words in the eDMA command FIFO. */ +#define EDMA_WDS_TOTAL 128 + + +/* + * FIXME: These definitions are internal and should have underscores! + * NOTE: The actual numeric values here are intentional and allow us to + * optimize the concept "if small ... else if large ... else ...", by + * checking for the low bit being set, and then for non-zero. + * These are used as array indices, so they must have the values (0, 1, 2) + * in some order. + */ +#define SIZE_SMALL (1) /**< Small packet queue. */ +#define SIZE_LARGE (2) /**< Large packet queue. */ +#define SIZE_JUMBO (0) /**< Jumbo packet queue. */ + +/** The number of "SIZE_xxx" values. */ +#define NETIO_NUM_SIZES 3 + + +/* + * Default numbers of packets for IPP drivers. These values are chosen + * such that CIPP1 will not overflow its L2 cache. + */ + +/** The default number of small packets. */ +#define NETIO_DEFAULT_SMALL_PACKETS 2750 +/** The default number of large packets. */ +#define NETIO_DEFAULT_LARGE_PACKETS 2500 +/** The default number of jumbo packets. */ +#define NETIO_DEFAULT_JUMBO_PACKETS 250 + + +/** Log2 of the size of a memory arena. */ +#define NETIO_ARENA_SHIFT 24 /* 16 MB */ +/** Size of a memory arena. */ +#define NETIO_ARENA_SIZE (1 << NETIO_ARENA_SHIFT) + + +/** A queue of packets. + * + * This structure partially defines a queue of packets waiting to be + * processed. The queue as a whole is written to by an interrupt handler and + * read by non-interrupt code; this data structure is what's touched by the + * interrupt handler. The other part of the queue state, the read offset, is + * kept in user space, not in hypervisor space, so it is in a separate data + * structure. + * + * The read offset (__packet_receive_read in the user part of the queue + * structure) points to the next packet to be read. When the read offset is + * equal to the write offset, the queue is empty; therefore the queue must + * contain one more slot than the required maximum queue size. + * + * Here's an example of all 3 state variables and what they mean. All + * pointers move left to right. + * + * @code + * I I V V V V I I I I + * 0 1 2 3 4 5 6 7 8 9 10 + * ^ ^ ^ ^ + * | | | + * | | __last_packet_plus_one + * | __buffer_write + * __packet_receive_read + * @endcode + * + * This queue has 10 slots, and thus can hold 9 packets (_last_packet_plus_one + * = 10). The read pointer is at 2, and the write pointer is at 6; thus, + * there are valid, unread packets in slots 2, 3, 4, and 5. The remaining + * slots are invalid (do not contain a packet). + */ +typedef struct { + /** Byte offset of the next notify packet to be written: zero for the first + * packet on the queue, sizeof (netio_pkt_t) for the second packet on the + * queue, etc. */ + volatile uint32_t __packet_write; + + /** Offset of the packet after the last valid packet (i.e., when any + * pointer is incremented to this value, it wraps back to zero). */ + uint32_t __last_packet_plus_one; +} +__netio_packet_queue_t; + + +/** A queue of buffers. + * + * This structure partially defines a queue of empty buffers which have been + * obtained via requests to the IPP. (The elements of the queue are packet + * handles, which are transformed into a full netio_pkt_t when the buffer is + * retrieved.) The queue as a whole is written to by an interrupt handler and + * read by non-interrupt code; this data structure is what's touched by the + * interrupt handler. The other parts of the queue state, the read offset and + * requested write offset, are kept in user space, not in hypervisor space, so + * they are in a separate data structure. + * + * The read offset (__buffer_read in the user part of the queue structure) + * points to the next buffer to be read. When the read offset is equal to the + * write offset, the queue is empty; therefore the queue must contain one more + * slot than the required maximum queue size. + * + * The requested write offset (__buffer_requested_write in the user part of + * the queue structure) points to the slot which will hold the next buffer we + * request from the IPP, once we get around to sending such a request. When + * the requested write offset is equal to the write offset, no requests for + * new buffers are outstanding; when the requested write offset is one greater + * than the read offset, no more requests may be sent. + * + * Note that, unlike the packet_queue, the buffer_queue places incoming + * buffers at decreasing addresses. This makes the check for "is it time to + * wrap the buffer pointer" cheaper in the assembly code which receives new + * buffers, and means that the value which defines the queue size, + * __last_buffer, is different than in the packet queue. Also, the offset + * used in the packet_queue is already scaled by the size of a packet; here we + * use unscaled slot indices for the offsets. (These differences are + * historical, and in the future it's possible that the packet_queue will look + * more like this queue.) + * + * @code + * Here's an example of all 4 state variables and what they mean. Remember: + * all pointers move right to left. + * + * V V V I I R R V V V + * 0 1 2 3 4 5 6 7 8 9 + * ^ ^ ^ ^ + * | | | | + * | | | __last_buffer + * | | __buffer_write + * | __buffer_requested_write + * __buffer_read + * @endcode + * + * This queue has 10 slots, and thus can hold 9 buffers (_last_buffer = 9). + * The read pointer is at 2, and the write pointer is at 6; thus, there are + * valid, unread buffers in slots 2, 1, 0, 9, 8, and 7. The requested write + * pointer is at 4; thus, requests have been made to the IPP for buffers which + * will be placed in slots 6 and 5 when they arrive. Finally, the remaining + * slots are invalid (do not contain a buffer). + */ +typedef struct +{ + /** Ordinal number of the next buffer to be written: 0 for the first slot in + * the queue, 1 for the second slot in the queue, etc. */ + volatile uint32_t __buffer_write; + + /** Ordinal number of the last buffer (i.e., when any pointer is decremented + * below zero, it is reloaded with this value). */ + uint32_t __last_buffer; +} +__netio_buffer_queue_t; + + +/** + * An object for providing Ethernet packets to a process. + */ +typedef struct __netio_queue_impl_t +{ + /** The queue of packets waiting to be received. */ + __netio_packet_queue_t __packet_receive_queue; + /** The intr bit mask that IDs this device. */ + unsigned int __intr_id; + /** Offset to queues of empty buffers, one per size. */ + uint32_t __buffer_queue[NETIO_NUM_SIZES]; + /** The address of the first EPP tile, or -1 if no EPP. */ + /* ISSUE: Actually this is always "0" or "~0". */ + uint32_t __epp_location; + /** The queue ID that this queue represents. */ + unsigned int __queue_id; + /** Number of acknowledgements received. */ + volatile uint32_t __acks_received; + /** Last completion number received for packet_sendv. */ + volatile uint32_t __last_completion_rcv; + /** Number of packets allowed to be outstanding. */ + uint32_t __max_outstanding; + /** First VA available for packets. */ + void* __va_0; + /** First VA in second range available for packets. */ + void* __va_1; + /** Padding to align the "__packets" field to the size of a netio_pkt_t. */ + uint32_t __padding[3]; + /** The packets themselves. */ + netio_pkt_t __packets[0]; +} +netio_queue_impl_t; + + +/** + * An object for managing the user end of a NetIO queue. + */ +typedef struct __netio_queue_user_impl_t +{ + /** The next incoming packet to be read. */ + uint32_t __packet_receive_read; + /** The next empty buffers to be read, one index per size. */ + uint8_t __buffer_read[NETIO_NUM_SIZES]; + /** Where the empty buffer we next request from the IPP will go, one index + * per size. */ + uint8_t __buffer_requested_write[NETIO_NUM_SIZES]; + /** PCIe interface flag. */ + uint8_t __pcie; + /** Number of packets left to be received before we send a credit update. */ + uint32_t __receive_credit_remaining; + /** Value placed in __receive_credit_remaining when it reaches zero. */ + uint32_t __receive_credit_interval; + /** First fast I/O routine index. */ + uint32_t __fastio_index; + /** Number of acknowledgements expected. */ + uint32_t __acks_outstanding; + /** Last completion number requested. */ + uint32_t __last_completion_req; + /** File descriptor for driver. */ + int __fd; +} +netio_queue_user_impl_t; + + +#define NETIO_GROUP_CHUNK_SIZE 64 /**< Max # groups in one IPP request */ +#define NETIO_BUCKET_CHUNK_SIZE 64 /**< Max # buckets in one IPP request */ + + +/** Internal structure used to convey packet send information to the + * hypervisor. FIXME: Actually, it's not used for that anymore, but + * netio_packet_send() still uses it internally. + */ +typedef struct +{ + uint16_t flags; /**< Packet flags (__NETIO_SEND_FLG_xxx) */ + uint16_t transfer_size; /**< Size of packet */ + uint32_t va; /**< VA of start of packet */ + __netio_pkt_handle_t handle; /**< Packet handle */ + uint32_t csum0; /**< First checksum word */ + uint32_t csum1; /**< Second checksum word */ +} +__netio_send_cmd_t; + + +/** Flags used in two contexts: + * - As the "flags" member in the __netio_send_cmd_t, above; used only + * for netio_pkt_send_{prepare,commit}. + * - As part of the flags passed to the various send packet fast I/O calls. + */ + +/** Need acknowledgement on this packet. Note that some code in the + * normal send_pkt fast I/O handler assumes that this is equal to 1. */ +#define __NETIO_SEND_FLG_ACK 0x1 + +/** Do checksum on this packet. (Only used with the __netio_send_cmd_t; + * normal packet sends use a special fast I/O index to denote checksumming, + * and multi-segment sends test the checksum descriptor.) */ +#define __NETIO_SEND_FLG_CSUM 0x2 + +/** Get a completion on this packet. Only used with multi-segment sends. */ +#define __NETIO_SEND_FLG_COMPLETION 0x4 + +/** Position of the number-of-extra-segments value in the flags word. + Only used with multi-segment sends. */ +#define __NETIO_SEND_FLG_XSEG_SHIFT 3 + +/** Width of the number-of-extra-segments value in the flags word. */ +#define __NETIO_SEND_FLG_XSEG_WIDTH 2 + +#endif /* __DRV_XGBE_IMPL_H__ */ diff --git a/arch/tile/include/hv/drv_xgbe_intf.h b/arch/tile/include/hv/drv_xgbe_intf.h new file mode 100644 index 00000000..f13188ac --- /dev/null +++ b/arch/tile/include/hv/drv_xgbe_intf.h @@ -0,0 +1,615 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/** + * @file drv_xgbe_intf.h + * Interface to the hypervisor XGBE driver. + */ + +#ifndef __DRV_XGBE_INTF_H__ +#define __DRV_XGBE_INTF_H__ + +/** + * An object for forwarding VAs and PAs to the hypervisor. + * @ingroup types + * + * This allows the supervisor to specify a number of areas of memory to + * store packet buffers. + */ +typedef struct +{ + /** The physical address of the memory. */ + HV_PhysAddr pa; + /** Page table entry for the memory. This is only used to derive the + * memory's caching mode; the PA bits are ignored. */ + HV_PTE pte; + /** The virtual address of the memory. */ + HV_VirtAddr va; + /** Size (in bytes) of the memory area. */ + int size; + +} +netio_ipp_address_t; + +/** The various pread/pwrite offsets into the hypervisor-level driver. + * @ingroup types + */ +typedef enum +{ + /** Inform the Linux driver of the address of the NetIO arena memory. + * This offset is actually only used to convey information from netio + * to the Linux driver; it never makes it from there to the hypervisor. + * Write-only; takes a uint32_t specifying the VA address. */ + NETIO_FIXED_ADDR = 0x5000000000000000ULL, + + /** Inform the Linux driver of the size of the NetIO arena memory. + * This offset is actually only used to convey information from netio + * to the Linux driver; it never makes it from there to the hypervisor. + * Write-only; takes a uint32_t specifying the VA size. */ + NETIO_FIXED_SIZE = 0x5100000000000000ULL, + + /** Register current tile with IPP. Write then read: write, takes a + * netio_input_config_t, read returns a pointer to a netio_queue_impl_t. */ + NETIO_IPP_INPUT_REGISTER_OFF = 0x6000000000000000ULL, + + /** Unregister current tile from IPP. Write-only, takes a dummy argument. */ + NETIO_IPP_INPUT_UNREGISTER_OFF = 0x6100000000000000ULL, + + /** Start packets flowing. Write-only, takes a dummy argument. */ + NETIO_IPP_INPUT_INIT_OFF = 0x6200000000000000ULL, + + /** Stop packets flowing. Write-only, takes a dummy argument. */ + NETIO_IPP_INPUT_UNINIT_OFF = 0x6300000000000000ULL, + + /** Configure group (typically we group on VLAN). Write-only: takes an + * array of netio_group_t's, low 24 bits of the offset is the base group + * number times the size of a netio_group_t. */ + NETIO_IPP_INPUT_GROUP_CFG_OFF = 0x6400000000000000ULL, + + /** Configure bucket. Write-only: takes an array of netio_bucket_t's, low + * 24 bits of the offset is the base bucket number times the size of a + * netio_bucket_t. */ + NETIO_IPP_INPUT_BUCKET_CFG_OFF = 0x6500000000000000ULL, + + /** Get/set a parameter. Read or write: read or write data is the parameter + * value, low 32 bits of the offset is a __netio_getset_offset_t. */ + NETIO_IPP_PARAM_OFF = 0x6600000000000000ULL, + + /** Get fast I/O index. Read-only; returns a 4-byte base index value. */ + NETIO_IPP_GET_FASTIO_OFF = 0x6700000000000000ULL, + + /** Configure hijack IP address. Packets with this IPv4 dest address + * go to bucket NETIO_NUM_BUCKETS - 1. Write-only: takes an IP address + * in some standard form. FIXME: Define the form! */ + NETIO_IPP_INPUT_HIJACK_CFG_OFF = 0x6800000000000000ULL, + + /** + * Offsets beyond this point are reserved for the supervisor (although that + * enforcement must be done by the supervisor driver itself). + */ + NETIO_IPP_USER_MAX_OFF = 0x6FFFFFFFFFFFFFFFULL, + + /** Register I/O memory. Write-only, takes a netio_ipp_address_t. */ + NETIO_IPP_IOMEM_REGISTER_OFF = 0x7000000000000000ULL, + + /** Unregister I/O memory. Write-only, takes a netio_ipp_address_t. */ + NETIO_IPP_IOMEM_UNREGISTER_OFF = 0x7100000000000000ULL, + + /* Offsets greater than 0x7FFFFFFF can't be used directly from Linux + * userspace code due to limitations in the pread/pwrite syscalls. */ + + /** Drain LIPP buffers. */ + NETIO_IPP_DRAIN_OFF = 0xFA00000000000000ULL, + + /** Supply a netio_ipp_address_t to be used as shared memory for the + * LEPP command queue. */ + NETIO_EPP_SHM_OFF = 0xFB00000000000000ULL, + + /* 0xFC... is currently unused. */ + + /** Stop IPP/EPP tiles. Write-only, takes a dummy argument. */ + NETIO_IPP_STOP_SHIM_OFF = 0xFD00000000000000ULL, + + /** Start IPP/EPP tiles. Write-only, takes a dummy argument. */ + NETIO_IPP_START_SHIM_OFF = 0xFE00000000000000ULL, + + /** Supply packet arena. Write-only, takes an array of + * netio_ipp_address_t values. */ + NETIO_IPP_ADDRESS_OFF = 0xFF00000000000000ULL, +} netio_hv_offset_t; + +/** Extract the base offset from an offset */ +#define NETIO_BASE_OFFSET(off) ((off) & 0xFF00000000000000ULL) +/** Extract the local offset from an offset */ +#define NETIO_LOCAL_OFFSET(off) ((off) & 0x00FFFFFFFFFFFFFFULL) + + +/** + * Get/set offset. + */ +typedef union +{ + struct + { + uint64_t addr:48; /**< Class-specific address */ + unsigned int class:8; /**< Class (e.g., NETIO_PARAM) */ + unsigned int opcode:8; /**< High 8 bits of NETIO_IPP_PARAM_OFF */ + } + bits; /**< Bitfields */ + uint64_t word; /**< Aggregated value to use as the offset */ +} +__netio_getset_offset_t; + +/** + * Fast I/O index offsets (must be contiguous). + */ +typedef enum +{ + NETIO_FASTIO_ALLOCATE = 0, /**< Get empty packet buffer */ + NETIO_FASTIO_FREE_BUFFER = 1, /**< Give buffer back to IPP */ + NETIO_FASTIO_RETURN_CREDITS = 2, /**< Give credits to IPP */ + NETIO_FASTIO_SEND_PKT_NOCK = 3, /**< Send a packet, no checksum */ + NETIO_FASTIO_SEND_PKT_CK = 4, /**< Send a packet, with checksum */ + NETIO_FASTIO_SEND_PKT_VEC = 5, /**< Send a vector of packets */ + NETIO_FASTIO_SENDV_PKT = 6, /**< Sendv one packet */ + NETIO_FASTIO_NUM_INDEX = 7, /**< Total number of fast I/O indices */ +} netio_fastio_index_t; + +/** 3-word return type for Fast I/O call. */ +typedef struct +{ + int err; /**< Error code. */ + uint32_t val0; /**< Value. Meaning depends upon the specific call. */ + uint32_t val1; /**< Value. Meaning depends upon the specific call. */ +} netio_fastio_rv3_t; + +/** 0-argument fast I/O call */ +int __netio_fastio0(uint32_t fastio_index); +/** 1-argument fast I/O call */ +int __netio_fastio1(uint32_t fastio_index, uint32_t arg0); +/** 3-argument fast I/O call, 2-word return value */ +netio_fastio_rv3_t __netio_fastio3_rv3(uint32_t fastio_index, uint32_t arg0, + uint32_t arg1, uint32_t arg2); +/** 4-argument fast I/O call */ +int __netio_fastio4(uint32_t fastio_index, uint32_t arg0, uint32_t arg1, + uint32_t arg2, uint32_t arg3); +/** 6-argument fast I/O call */ +int __netio_fastio6(uint32_t fastio_index, uint32_t arg0, uint32_t arg1, + uint32_t arg2, uint32_t arg3, uint32_t arg4, uint32_t arg5); +/** 9-argument fast I/O call */ +int __netio_fastio9(uint32_t fastio_index, uint32_t arg0, uint32_t arg1, + uint32_t arg2, uint32_t arg3, uint32_t arg4, uint32_t arg5, + uint32_t arg6, uint32_t arg7, uint32_t arg8); + +/** Allocate an empty packet. + * @param fastio_index Fast I/O index. + * @param size Size of the packet to allocate. + */ +#define __netio_fastio_allocate(fastio_index, size) \ + __netio_fastio1((fastio_index) + NETIO_FASTIO_ALLOCATE, size) + +/** Free a buffer. + * @param fastio_index Fast I/O index. + * @param handle Handle for the packet to free. + */ +#define __netio_fastio_free_buffer(fastio_index, handle) \ + __netio_fastio1((fastio_index) + NETIO_FASTIO_FREE_BUFFER, handle) + +/** Increment our receive credits. + * @param fastio_index Fast I/O index. + * @param credits Number of credits to add. + */ +#define __netio_fastio_return_credits(fastio_index, credits) \ + __netio_fastio1((fastio_index) + NETIO_FASTIO_RETURN_CREDITS, credits) + +/** Send packet, no checksum. + * @param fastio_index Fast I/O index. + * @param ackflag Nonzero if we want an ack. + * @param size Size of the packet. + * @param va Virtual address of start of packet. + * @param handle Packet handle. + */ +#define __netio_fastio_send_pkt_nock(fastio_index, ackflag, size, va, handle) \ + __netio_fastio4((fastio_index) + NETIO_FASTIO_SEND_PKT_NOCK, ackflag, \ + size, va, handle) + +/** Send packet, calculate checksum. + * @param fastio_index Fast I/O index. + * @param ackflag Nonzero if we want an ack. + * @param size Size of the packet. + * @param va Virtual address of start of packet. + * @param handle Packet handle. + * @param csum0 Shim checksum header. + * @param csum1 Checksum seed. + */ +#define __netio_fastio_send_pkt_ck(fastio_index, ackflag, size, va, handle, \ + csum0, csum1) \ + __netio_fastio6((fastio_index) + NETIO_FASTIO_SEND_PKT_CK, ackflag, \ + size, va, handle, csum0, csum1) + + +/** Format for the "csum0" argument to the __netio_fastio_send routines + * and LEPP. Note that this is currently exactly identical to the + * ShimProtocolOffloadHeader. + */ +typedef union +{ + struct + { + unsigned int start_byte:7; /**< The first byte to be checksummed */ + unsigned int count:14; /**< Number of bytes to be checksummed. */ + unsigned int destination_byte:7; /**< The byte to write the checksum to. */ + unsigned int reserved:4; /**< Reserved. */ + } bits; /**< Decomposed method of access. */ + unsigned int word; /**< To send out the IDN. */ +} __netio_checksum_header_t; + + +/** Sendv packet with 1 or 2 segments. + * @param fastio_index Fast I/O index. + * @param flags Ack/csum/notify flags in low 3 bits; number of segments minus + * 1 in next 2 bits; expected checksum in high 16 bits. + * @param confno Confirmation number to request, if notify flag set. + * @param csum0 Checksum descriptor; if zero, no checksum. + * @param va_F Virtual address of first segment. + * @param va_L Virtual address of last segment, if 2 segments. + * @param len_F_L Length of first segment in low 16 bits; length of last + * segment, if 2 segments, in high 16 bits. + */ +#define __netio_fastio_sendv_pkt_1_2(fastio_index, flags, confno, csum0, \ + va_F, va_L, len_F_L) \ + __netio_fastio6((fastio_index) + NETIO_FASTIO_SENDV_PKT, flags, confno, \ + csum0, va_F, va_L, len_F_L) + +/** Send packet on PCIe interface. + * @param fastio_index Fast I/O index. + * @param flags Ack/csum/notify flags in low 3 bits. + * @param confno Confirmation number to request, if notify flag set. + * @param csum0 Checksum descriptor; Hard wired 0, not needed for PCIe. + * @param va_F Virtual address of the packet buffer. + * @param va_L Virtual address of last segment, if 2 segments. Hard wired 0. + * @param len_F_L Length of the packet buffer in low 16 bits. + */ +#define __netio_fastio_send_pcie_pkt(fastio_index, flags, confno, csum0, \ + va_F, va_L, len_F_L) \ + __netio_fastio6((fastio_index) + PCIE_FASTIO_SENDV_PKT, flags, confno, \ + csum0, va_F, va_L, len_F_L) + +/** Sendv packet with 3 or 4 segments. + * @param fastio_index Fast I/O index. + * @param flags Ack/csum/notify flags in low 3 bits; number of segments minus + * 1 in next 2 bits; expected checksum in high 16 bits. + * @param confno Confirmation number to request, if notify flag set. + * @param csum0 Checksum descriptor; if zero, no checksum. + * @param va_F Virtual address of first segment. + * @param va_L Virtual address of last segment (third segment if 3 segments, + * fourth segment if 4 segments). + * @param len_F_L Length of first segment in low 16 bits; length of last + * segment in high 16 bits. + * @param va_M0 Virtual address of "middle 0" segment; this segment is sent + * second when there are three segments, and third if there are four. + * @param va_M1 Virtual address of "middle 1" segment; this segment is sent + * second when there are four segments. + * @param len_M0_M1 Length of middle 0 segment in low 16 bits; length of middle + * 1 segment, if 4 segments, in high 16 bits. + */ +#define __netio_fastio_sendv_pkt_3_4(fastio_index, flags, confno, csum0, va_F, \ + va_L, len_F_L, va_M0, va_M1, len_M0_M1) \ + __netio_fastio9((fastio_index) + NETIO_FASTIO_SENDV_PKT, flags, confno, \ + csum0, va_F, va_L, len_F_L, va_M0, va_M1, len_M0_M1) + +/** Send vector of packets. + * @param fastio_index Fast I/O index. + * @param seqno Number of packets transmitted so far on this interface; + * used to decide which packets should be acknowledged. + * @param nentries Number of entries in vector. + * @param va Virtual address of start of vector entry array. + * @return 3-word netio_fastio_rv3_t structure. The structure's err member + * is an error code, or zero if no error. The val0 member is the + * updated value of seqno; it has been incremented by 1 for each + * packet sent. That increment may be less than nentries if an + * error occurred, or if some of the entries in the vector contain + * handles equal to NETIO_PKT_HANDLE_NONE. The val1 member is the + * updated value of nentries; it has been decremented by 1 for each + * vector entry processed. Again, that decrement may be less than + * nentries (leaving the returned value positive) if an error + * occurred. + */ +#define __netio_fastio_send_pkt_vec(fastio_index, seqno, nentries, va) \ + __netio_fastio3_rv3((fastio_index) + NETIO_FASTIO_SEND_PKT_VEC, seqno, \ + nentries, va) + + +/** An egress DMA command for LEPP. */ +typedef struct +{ + /** Is this a TSO transfer? + * + * NOTE: This field is always 0, to distinguish it from + * lepp_tso_cmd_t. It must come first! + */ + uint8_t tso : 1; + + /** Unused padding bits. */ + uint8_t _unused : 3; + + /** Should this packet be sent directly from caches instead of DRAM, + * using hash-for-home to locate the packet data? + */ + uint8_t hash_for_home : 1; + + /** Should we compute a checksum? */ + uint8_t compute_checksum : 1; + + /** Is this the final buffer for this packet? + * + * A single packet can be split over several input buffers (a "gather" + * operation). This flag indicates that this is the last buffer + * in a packet. + */ + uint8_t end_of_packet : 1; + + /** Should LEPP advance 'comp_busy' when this DMA is fully finished? */ + uint8_t send_completion : 1; + + /** High bits of Client Physical Address of the start of the buffer + * to be egressed. + * + * NOTE: Only 6 bits are actually needed here, as CPAs are + * currently 38 bits. So two bits could be scavenged from this. + */ + uint8_t cpa_hi; + + /** The number of bytes to be egressed. */ + uint16_t length; + + /** Low 32 bits of Client Physical Address of the start of the buffer + * to be egressed. + */ + uint32_t cpa_lo; + + /** Checksum information (only used if 'compute_checksum'). */ + __netio_checksum_header_t checksum_data; + +} lepp_cmd_t; + + +/** A chunk of physical memory for a TSO egress. */ +typedef struct +{ + /** The low bits of the CPA. */ + uint32_t cpa_lo; + /** The high bits of the CPA. */ + uint16_t cpa_hi : 15; + /** Should this packet be sent directly from caches instead of DRAM, + * using hash-for-home to locate the packet data? + */ + uint16_t hash_for_home : 1; + /** The length in bytes. */ + uint16_t length; +} lepp_frag_t; + + +/** An LEPP command that handles TSO. */ +typedef struct +{ + /** Is this a TSO transfer? + * + * NOTE: This field is always 1, to distinguish it from + * lepp_cmd_t. It must come first! + */ + uint8_t tso : 1; + + /** Unused padding bits. */ + uint8_t _unused : 7; + + /** Size of the header[] array in bytes. It must be in the range + * [40, 127], which are the smallest header for a TCP packet over + * Ethernet and the maximum possible prepend size supported by + * hardware, respectively. Note that the array storage must be + * padded out to a multiple of four bytes so that the following + * LEPP command is aligned properly. + */ + uint8_t header_size; + + /** Byte offset of the IP header in header[]. */ + uint8_t ip_offset; + + /** Byte offset of the TCP header in header[]. */ + uint8_t tcp_offset; + + /** The number of bytes to use for the payload of each packet, + * except of course the last one, which may not have enough bytes. + * This means that each Ethernet packet except the last will have a + * size of header_size + payload_size. + */ + uint16_t payload_size; + + /** The length of the 'frags' array that follows this struct. */ + uint16_t num_frags; + + /** The actual frags. */ + lepp_frag_t frags[0 /* Variable-sized; num_frags entries. */]; + + /* + * The packet header template logically follows frags[], + * but you can't declare that in C. + * + * uint32_t header[header_size_in_words_rounded_up]; + */ + +} lepp_tso_cmd_t; + + +/** An LEPP completion ring entry. */ +typedef void* lepp_comp_t; + + +/** Maximum number of frags for one TSO command. This is adapted from + * linux's "MAX_SKB_FRAGS", and presumably over-estimates by one, for + * our page size of exactly 65536. We add one for a "body" fragment. + */ +#define LEPP_MAX_FRAGS (65536 / HV_PAGE_SIZE_SMALL + 2 + 1) + +/** Total number of bytes needed for an lepp_tso_cmd_t. */ +#define LEPP_TSO_CMD_SIZE(num_frags, header_size) \ + (sizeof(lepp_tso_cmd_t) + \ + (num_frags) * sizeof(lepp_frag_t) + \ + (((header_size) + 3) & -4)) + +/** The size of the lepp "cmd" queue. */ +#define LEPP_CMD_QUEUE_BYTES \ + (((CHIP_L2_CACHE_SIZE() - 2 * CHIP_L2_LINE_SIZE()) / \ + (sizeof(lepp_cmd_t) + sizeof(lepp_comp_t))) * sizeof(lepp_cmd_t)) + +/** The largest possible command that can go in lepp_queue_t::cmds[]. */ +#define LEPP_MAX_CMD_SIZE LEPP_TSO_CMD_SIZE(LEPP_MAX_FRAGS, 128) + +/** The largest possible value of lepp_queue_t::cmd_{head, tail} (inclusive). + */ +#define LEPP_CMD_LIMIT \ + (LEPP_CMD_QUEUE_BYTES - LEPP_MAX_CMD_SIZE) + +/** The maximum number of completions in an LEPP queue. */ +#define LEPP_COMP_QUEUE_SIZE \ + ((LEPP_CMD_LIMIT + sizeof(lepp_cmd_t) - 1) / sizeof(lepp_cmd_t)) + +/** Increment an index modulo the queue size. */ +#define LEPP_QINC(var) \ + (var = __insn_mnz(var - (LEPP_COMP_QUEUE_SIZE - 1), var + 1)) + +/** A queue used to convey egress commands from the client to LEPP. */ +typedef struct +{ + /** Index of first completion not yet processed by user code. + * If this is equal to comp_busy, there are no such completions. + * + * NOTE: This is only read/written by the user. + */ + unsigned int comp_head; + + /** Index of first completion record not yet completed. + * If this is equal to comp_tail, there are no such completions. + * This index gets advanced (modulo LEPP_QUEUE_SIZE) whenever + * a command with the 'completion' bit set is finished. + * + * NOTE: This is only written by LEPP, only read by the user. + */ + volatile unsigned int comp_busy; + + /** Index of the first empty slot in the completion ring. + * Entries from this up to but not including comp_head (in ring order) + * can be filled in with completion data. + * + * NOTE: This is only read/written by the user. + */ + unsigned int comp_tail; + + /** Byte index of first command enqueued for LEPP but not yet processed. + * + * This is always divisible by sizeof(void*) and always <= LEPP_CMD_LIMIT. + * + * NOTE: LEPP advances this counter as soon as it no longer needs + * the cmds[] storage for this entry, but the transfer is not actually + * complete (i.e. the buffer pointed to by the command is no longer + * needed) until comp_busy advances. + * + * If this is equal to cmd_tail, the ring is empty. + * + * NOTE: This is only written by LEPP, only read by the user. + */ + volatile unsigned int cmd_head; + + /** Byte index of first empty slot in the command ring. This field can + * be incremented up to but not equal to cmd_head (because that would + * mean the ring is empty). + * + * This is always divisible by sizeof(void*) and always <= LEPP_CMD_LIMIT. + * + * NOTE: This is read/written by the user, only read by LEPP. + */ + volatile unsigned int cmd_tail; + + /** A ring of variable-sized egress DMA commands. + * + * NOTE: Only written by the user, only read by LEPP. + */ + char cmds[LEPP_CMD_QUEUE_BYTES] + __attribute__((aligned(CHIP_L2_LINE_SIZE()))); + + /** A ring of user completion data. + * NOTE: Only read/written by the user. + */ + lepp_comp_t comps[LEPP_COMP_QUEUE_SIZE] + __attribute__((aligned(CHIP_L2_LINE_SIZE()))); +} lepp_queue_t; + + +/** An internal helper function for determining the number of entries + * available in a ring buffer, given that there is one sentinel. + */ +static inline unsigned int +_lepp_num_free_slots(unsigned int head, unsigned int tail) +{ + /* + * One entry is reserved for use as a sentinel, to distinguish + * "empty" from "full". So we compute + * (head - tail - 1) % LEPP_QUEUE_SIZE, but without using a slow % operation. + */ + return (head - tail - 1) + ((head <= tail) ? LEPP_COMP_QUEUE_SIZE : 0); +} + + +/** Returns how many new comp entries can be enqueued. */ +static inline unsigned int +lepp_num_free_comp_slots(const lepp_queue_t* q) +{ + return _lepp_num_free_slots(q->comp_head, q->comp_tail); +} + +static inline int +lepp_qsub(int v1, int v2) +{ + int delta = v1 - v2; + return delta + ((delta >> 31) & LEPP_COMP_QUEUE_SIZE); +} + + +/** FIXME: Check this from linux, via a new "pwrite()" call. */ +#define LIPP_VERSION 1 + + +/** We use exactly two bytes of alignment padding. */ +#define LIPP_PACKET_PADDING 2 + +/** The minimum size of a "small" buffer (including the padding). */ +#define LIPP_SMALL_PACKET_SIZE 128 + +/* + * NOTE: The following two values should total to less than around + * 13582, to keep the total size used for "lipp_state_t" below 64K. + */ + +/** The maximum number of "small" buffers. + * This is enough for 53 network cpus with 128 credits. Note that + * if these are exhausted, we will fall back to using large buffers. + */ +#define LIPP_SMALL_BUFFERS 6785 + +/** The maximum number of "large" buffers. + * This is enough for 53 network cpus with 128 credits. + */ +#define LIPP_LARGE_BUFFERS 6785 + +#endif /* __DRV_XGBE_INTF_H__ */ diff --git a/arch/tile/include/hv/hypervisor.h b/arch/tile/include/hv/hypervisor.h new file mode 100644 index 00000000..72ec1e97 --- /dev/null +++ b/arch/tile/include/hv/hypervisor.h @@ -0,0 +1,2427 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/** + * @file hypervisor.h + * The hypervisor's public API. + */ + +#ifndef _TILE_HV_H +#define _TILE_HV_H + +#include <arch/chip.h> + +/* Linux builds want unsigned long constants, but assembler wants numbers */ +#ifdef __ASSEMBLER__ +/** One, for assembler */ +#define __HV_SIZE_ONE 1 +#elif !defined(__tile__) && CHIP_VA_WIDTH() > 32 +/** One, for 64-bit on host */ +#define __HV_SIZE_ONE 1ULL +#else +/** One, for Linux */ +#define __HV_SIZE_ONE 1UL +#endif + +/** The log2 of the span of a level-1 page table, in bytes. + */ +#define HV_LOG2_L1_SPAN 32 + +/** The span of a level-1 page table, in bytes. + */ +#define HV_L1_SPAN (__HV_SIZE_ONE << HV_LOG2_L1_SPAN) + +/** The log2 of the size of small pages, in bytes. This value should + * be verified at runtime by calling hv_sysconf(HV_SYSCONF_PAGE_SIZE_SMALL). + */ +#define HV_LOG2_PAGE_SIZE_SMALL 16 + +/** The size of small pages, in bytes. This value should be verified + * at runtime by calling hv_sysconf(HV_SYSCONF_PAGE_SIZE_SMALL). + */ +#define HV_PAGE_SIZE_SMALL (__HV_SIZE_ONE << HV_LOG2_PAGE_SIZE_SMALL) + +/** The log2 of the size of large pages, in bytes. This value should be + * verified at runtime by calling hv_sysconf(HV_SYSCONF_PAGE_SIZE_LARGE). + */ +#define HV_LOG2_PAGE_SIZE_LARGE 24 + +/** The size of large pages, in bytes. This value should be verified + * at runtime by calling hv_sysconf(HV_SYSCONF_PAGE_SIZE_LARGE). + */ +#define HV_PAGE_SIZE_LARGE (__HV_SIZE_ONE << HV_LOG2_PAGE_SIZE_LARGE) + +/** The log2 of the granularity at which page tables must be aligned; + * in other words, the CPA for a page table must have this many zero + * bits at the bottom of the address. + */ +#define HV_LOG2_PAGE_TABLE_ALIGN 11 + +/** The granularity at which page tables must be aligned. + */ +#define HV_PAGE_TABLE_ALIGN (__HV_SIZE_ONE << HV_LOG2_PAGE_TABLE_ALIGN) + +/** Normal start of hypervisor glue in client physical memory. */ +#define HV_GLUE_START_CPA 0x10000 + +/** This much space is reserved at HV_GLUE_START_CPA + * for the hypervisor glue. The client program must start at + * some address higher than this, and in particular the address of + * its text section should be equal to zero modulo HV_PAGE_SIZE_LARGE + * so that relative offsets to the HV glue are correct. + */ +#define HV_GLUE_RESERVED_SIZE 0x10000 + +/** Each entry in the hv dispatch array takes this many bytes. */ +#define HV_DISPATCH_ENTRY_SIZE 32 + +/** Version of the hypervisor interface defined by this file */ +#define _HV_VERSION 11 + +/* Index into hypervisor interface dispatch code blocks. + * + * Hypervisor calls are invoked from user space by calling code + * at an address HV_BASE_ADDRESS + (index) * HV_DISPATCH_ENTRY_SIZE, + * where index is one of these enum values. + * + * Normally a supervisor is expected to produce a set of symbols + * starting at HV_BASE_ADDRESS that obey this convention, but a user + * program could call directly through function pointers if desired. + * + * These numbers are part of the binary API and will not be changed + * without updating HV_VERSION, which should be a rare event. + */ + +/** reserved. */ +#define _HV_DISPATCH_RESERVED 0 + +/** hv_init */ +#define HV_DISPATCH_INIT 1 + +/** hv_install_context */ +#define HV_DISPATCH_INSTALL_CONTEXT 2 + +/** hv_sysconf */ +#define HV_DISPATCH_SYSCONF 3 + +/** hv_get_rtc */ +#define HV_DISPATCH_GET_RTC 4 + +/** hv_set_rtc */ +#define HV_DISPATCH_SET_RTC 5 + +/** hv_flush_asid */ +#define HV_DISPATCH_FLUSH_ASID 6 + +/** hv_flush_page */ +#define HV_DISPATCH_FLUSH_PAGE 7 + +/** hv_flush_pages */ +#define HV_DISPATCH_FLUSH_PAGES 8 + +/** hv_restart */ +#define HV_DISPATCH_RESTART 9 + +/** hv_halt */ +#define HV_DISPATCH_HALT 10 + +/** hv_power_off */ +#define HV_DISPATCH_POWER_OFF 11 + +/** hv_inquire_physical */ +#define HV_DISPATCH_INQUIRE_PHYSICAL 12 + +/** hv_inquire_memory_controller */ +#define HV_DISPATCH_INQUIRE_MEMORY_CONTROLLER 13 + +/** hv_inquire_virtual */ +#define HV_DISPATCH_INQUIRE_VIRTUAL 14 + +/** hv_inquire_asid */ +#define HV_DISPATCH_INQUIRE_ASID 15 + +/** hv_nanosleep */ +#define HV_DISPATCH_NANOSLEEP 16 + +/** hv_console_read_if_ready */ +#define HV_DISPATCH_CONSOLE_READ_IF_READY 17 + +/** hv_console_write */ +#define HV_DISPATCH_CONSOLE_WRITE 18 + +/** hv_downcall_dispatch */ +#define HV_DISPATCH_DOWNCALL_DISPATCH 19 + +/** hv_inquire_topology */ +#define HV_DISPATCH_INQUIRE_TOPOLOGY 20 + +/** hv_fs_findfile */ +#define HV_DISPATCH_FS_FINDFILE 21 + +/** hv_fs_fstat */ +#define HV_DISPATCH_FS_FSTAT 22 + +/** hv_fs_pread */ +#define HV_DISPATCH_FS_PREAD 23 + +/** hv_physaddr_read64 */ +#define HV_DISPATCH_PHYSADDR_READ64 24 + +/** hv_physaddr_write64 */ +#define HV_DISPATCH_PHYSADDR_WRITE64 25 + +/** hv_get_command_line */ +#define HV_DISPATCH_GET_COMMAND_LINE 26 + +/** hv_set_caching */ +#define HV_DISPATCH_SET_CACHING 27 + +/** hv_bzero_page */ +#define HV_DISPATCH_BZERO_PAGE 28 + +/** hv_register_message_state */ +#define HV_DISPATCH_REGISTER_MESSAGE_STATE 29 + +/** hv_send_message */ +#define HV_DISPATCH_SEND_MESSAGE 30 + +/** hv_receive_message */ +#define HV_DISPATCH_RECEIVE_MESSAGE 31 + +/** hv_inquire_context */ +#define HV_DISPATCH_INQUIRE_CONTEXT 32 + +/** hv_start_all_tiles */ +#define HV_DISPATCH_START_ALL_TILES 33 + +/** hv_dev_open */ +#define HV_DISPATCH_DEV_OPEN 34 + +/** hv_dev_close */ +#define HV_DISPATCH_DEV_CLOSE 35 + +/** hv_dev_pread */ +#define HV_DISPATCH_DEV_PREAD 36 + +/** hv_dev_pwrite */ +#define HV_DISPATCH_DEV_PWRITE 37 + +/** hv_dev_poll */ +#define HV_DISPATCH_DEV_POLL 38 + +/** hv_dev_poll_cancel */ +#define HV_DISPATCH_DEV_POLL_CANCEL 39 + +/** hv_dev_preada */ +#define HV_DISPATCH_DEV_PREADA 40 + +/** hv_dev_pwritea */ +#define HV_DISPATCH_DEV_PWRITEA 41 + +/** hv_flush_remote */ +#define HV_DISPATCH_FLUSH_REMOTE 42 + +/** hv_console_putc */ +#define HV_DISPATCH_CONSOLE_PUTC 43 + +/** hv_inquire_tiles */ +#define HV_DISPATCH_INQUIRE_TILES 44 + +/** hv_confstr */ +#define HV_DISPATCH_CONFSTR 45 + +/** hv_reexec */ +#define HV_DISPATCH_REEXEC 46 + +/** hv_set_command_line */ +#define HV_DISPATCH_SET_COMMAND_LINE 47 + +#if !CHIP_HAS_IPI() + +/** hv_clear_intr */ +#define HV_DISPATCH_CLEAR_INTR 48 + +/** hv_enable_intr */ +#define HV_DISPATCH_ENABLE_INTR 49 + +/** hv_disable_intr */ +#define HV_DISPATCH_DISABLE_INTR 50 + +/** hv_raise_intr */ +#define HV_DISPATCH_RAISE_INTR 51 + +/** hv_trigger_ipi */ +#define HV_DISPATCH_TRIGGER_IPI 52 + +#endif /* !CHIP_HAS_IPI() */ + +/** hv_store_mapping */ +#define HV_DISPATCH_STORE_MAPPING 53 + +/** hv_inquire_realpa */ +#define HV_DISPATCH_INQUIRE_REALPA 54 + +/** hv_flush_all */ +#define HV_DISPATCH_FLUSH_ALL 55 + +#if CHIP_HAS_IPI() +/** hv_get_ipi_pte */ +#define HV_DISPATCH_GET_IPI_PTE 56 +#endif + +/** One more than the largest dispatch value */ +#define _HV_DISPATCH_END 57 + + +#ifndef __ASSEMBLER__ + +#ifdef __KERNEL__ +#include <asm/types.h> +typedef u32 __hv32; /**< 32-bit value */ +typedef u64 __hv64; /**< 64-bit value */ +#else +#include <stdint.h> +typedef uint32_t __hv32; /**< 32-bit value */ +typedef uint64_t __hv64; /**< 64-bit value */ +#endif + + +/** Hypervisor physical address. */ +typedef __hv64 HV_PhysAddr; + +#if CHIP_VA_WIDTH() > 32 +/** Hypervisor virtual address. */ +typedef __hv64 HV_VirtAddr; +#else +/** Hypervisor virtual address. */ +typedef __hv32 HV_VirtAddr; +#endif /* CHIP_VA_WIDTH() > 32 */ + +/** Hypervisor ASID. */ +typedef unsigned int HV_ASID; + +/** Hypervisor tile location for a memory access + * ("location overridden target"). + */ +typedef unsigned int HV_LOTAR; + +/** Hypervisor size of a page. */ +typedef unsigned long HV_PageSize; + +/** A page table entry. + */ +typedef struct +{ + __hv64 val; /**< Value of PTE */ +} HV_PTE; + +/** Hypervisor error code. */ +typedef int HV_Errno; + +#endif /* !__ASSEMBLER__ */ + +#define HV_OK 0 /**< No error */ +#define HV_EINVAL -801 /**< Invalid argument */ +#define HV_ENODEV -802 /**< No such device */ +#define HV_ENOENT -803 /**< No such file or directory */ +#define HV_EBADF -804 /**< Bad file number */ +#define HV_EFAULT -805 /**< Bad address */ +#define HV_ERECIP -806 /**< Bad recipients */ +#define HV_E2BIG -807 /**< Message too big */ +#define HV_ENOTSUP -808 /**< Service not supported */ +#define HV_EBUSY -809 /**< Device busy */ +#define HV_ENOSYS -810 /**< Invalid syscall */ +#define HV_EPERM -811 /**< No permission */ +#define HV_ENOTREADY -812 /**< Device not ready */ +#define HV_EIO -813 /**< I/O error */ +#define HV_ENOMEM -814 /**< Out of memory */ +#define HV_EAGAIN -815 /**< Try again */ + +#define HV_ERR_MAX -801 /**< Largest HV error code */ +#define HV_ERR_MIN -815 /**< Smallest HV error code */ + +#ifndef __ASSEMBLER__ + +/** Pass HV_VERSION to hv_init to request this version of the interface. */ +typedef enum { HV_VERSION = _HV_VERSION } HV_VersionNumber; + +/** Initializes the hypervisor. + * + * @param interface_version_number The version of the hypervisor interface + * that this program expects, typically HV_VERSION. + * @param chip_num Architecture number of the chip the client was built for. + * @param chip_rev_num Revision number of the chip the client was built for. + */ +void hv_init(HV_VersionNumber interface_version_number, + int chip_num, int chip_rev_num); + + +/** Queries we can make for hv_sysconf(). + * + * These numbers are part of the binary API and guaranteed not to change. + */ +typedef enum { + /** An invalid value; do not use. */ + _HV_SYSCONF_RESERVED = 0, + + /** The length of the glue section containing the hv_ procs, in bytes. */ + HV_SYSCONF_GLUE_SIZE = 1, + + /** The size of small pages, in bytes. */ + HV_SYSCONF_PAGE_SIZE_SMALL = 2, + + /** The size of large pages, in bytes. */ + HV_SYSCONF_PAGE_SIZE_LARGE = 3, + + /** Processor clock speed, in hertz. */ + HV_SYSCONF_CPU_SPEED = 4, + + /** Processor temperature, in degrees Kelvin. The value + * HV_SYSCONF_TEMP_KTOC may be subtracted from this to get degrees + * Celsius. If that Celsius value is HV_SYSCONF_OVERTEMP, this indicates + * that the temperature has hit an upper limit and is no longer being + * accurately tracked. + */ + HV_SYSCONF_CPU_TEMP = 5, + + /** Board temperature, in degrees Kelvin. The value + * HV_SYSCONF_TEMP_KTOC may be subtracted from this to get degrees + * Celsius. If that Celsius value is HV_SYSCONF_OVERTEMP, this indicates + * that the temperature has hit an upper limit and is no longer being + * accurately tracked. + */ + HV_SYSCONF_BOARD_TEMP = 6 + +} HV_SysconfQuery; + +/** Offset to subtract from returned Kelvin temperature to get degrees + Celsius. */ +#define HV_SYSCONF_TEMP_KTOC 273 + +/** Pseudo-temperature value indicating that the temperature has + * pegged at its upper limit and is no longer accurate; note that this is + * the value after subtracting HV_SYSCONF_TEMP_KTOC. */ +#define HV_SYSCONF_OVERTEMP 999 + +/** Query a configuration value from the hypervisor. + * @param query Which value is requested (HV_SYSCONF_xxx). + * @return The requested value, or -1 the requested value is illegal or + * unavailable. + */ +long hv_sysconf(HV_SysconfQuery query); + + +/** Queries we can make for hv_confstr(). + * + * These numbers are part of the binary API and guaranteed not to change. + */ +typedef enum { + /** An invalid value; do not use. */ + _HV_CONFSTR_RESERVED = 0, + + /** Board part number. */ + HV_CONFSTR_BOARD_PART_NUM = 1, + + /** Board serial number. */ + HV_CONFSTR_BOARD_SERIAL_NUM = 2, + + /** Chip serial number. */ + HV_CONFSTR_CHIP_SERIAL_NUM = 3, + + /** Board revision level. */ + HV_CONFSTR_BOARD_REV = 4, + + /** Hypervisor software version. */ + HV_CONFSTR_HV_SW_VER = 5, + + /** The name for this chip model. */ + HV_CONFSTR_CHIP_MODEL = 6, + + /** Human-readable board description. */ + HV_CONFSTR_BOARD_DESC = 7, + + /** Human-readable description of the hypervisor configuration. */ + HV_CONFSTR_HV_CONFIG = 8, + + /** Human-readable version string for the boot image (for instance, + * who built it and when, what configuration file was used). */ + HV_CONFSTR_HV_CONFIG_VER = 9, + + /** Mezzanine part number. */ + HV_CONFSTR_MEZZ_PART_NUM = 10, + + /** Mezzanine serial number. */ + HV_CONFSTR_MEZZ_SERIAL_NUM = 11, + + /** Mezzanine revision level. */ + HV_CONFSTR_MEZZ_REV = 12, + + /** Human-readable mezzanine description. */ + HV_CONFSTR_MEZZ_DESC = 13, + + /** Control path for the onboard network switch. */ + HV_CONFSTR_SWITCH_CONTROL = 14, + + /** Chip revision level. */ + HV_CONFSTR_CHIP_REV = 15 + +} HV_ConfstrQuery; + +/** Query a configuration string from the hypervisor. + * + * @param query Identifier for the specific string to be retrieved + * (HV_CONFSTR_xxx). + * @param buf Buffer in which to place the string. + * @param len Length of the buffer. + * @return If query is valid, then the length of the corresponding string, + * including the trailing null; if this is greater than len, the string + * was truncated. If query is invalid, HV_EINVAL. If the specified + * buffer is not writable by the client, HV_EFAULT. + */ +int hv_confstr(HV_ConfstrQuery query, HV_VirtAddr buf, int len); + +/** Tile coordinate */ +typedef struct +{ + /** X coordinate, relative to supervisor's top-left coordinate */ + int x; + + /** Y coordinate, relative to supervisor's top-left coordinate */ + int y; +} HV_Coord; + + +#if CHIP_HAS_IPI() + +/** Get the PTE for sending an IPI to a particular tile. + * + * @param tile Tile which will receive the IPI. + * @param pl Indicates which IPI registers: 0 = IPI_0, 1 = IPI_1. + * @param pte Filled with resulting PTE. + * @result Zero if no error, non-zero for invalid parameters. + */ +int hv_get_ipi_pte(HV_Coord tile, int pl, HV_PTE* pte); + +#else /* !CHIP_HAS_IPI() */ + +/** A set of interrupts. */ +typedef __hv32 HV_IntrMask; + +/** The low interrupt numbers are reserved for use by the client in + * delivering IPIs. Any interrupt numbers higher than this value are + * reserved for use by HV device drivers. */ +#define HV_MAX_IPI_INTERRUPT 7 + +/** Enable a set of device interrupts. + * + * @param enab_mask Bitmap of interrupts to enable. + */ +void hv_enable_intr(HV_IntrMask enab_mask); + +/** Disable a set of device interrupts. + * + * @param disab_mask Bitmap of interrupts to disable. + */ +void hv_disable_intr(HV_IntrMask disab_mask); + +/** Clear a set of device interrupts. + * + * @param clear_mask Bitmap of interrupts to clear. + */ +void hv_clear_intr(HV_IntrMask clear_mask); + +/** Raise a set of device interrupts. + * + * @param raise_mask Bitmap of interrupts to raise. + */ +void hv_raise_intr(HV_IntrMask raise_mask); + +/** Trigger a one-shot interrupt on some tile + * + * @param tile Which tile to interrupt. + * @param interrupt Interrupt number to trigger; must be between 0 and + * HV_MAX_IPI_INTERRUPT. + * @return HV_OK on success, or a hypervisor error code. + */ +HV_Errno hv_trigger_ipi(HV_Coord tile, int interrupt); + +#endif /* !CHIP_HAS_IPI() */ + +/** Store memory mapping in debug memory so that external debugger can read it. + * A maximum of 16 entries can be stored. + * + * @param va VA of memory that is mapped. + * @param len Length of mapped memory. + * @param pa PA of memory that is mapped. + * @return 0 on success, -1 if the maximum number of mappings is exceeded. + */ +int hv_store_mapping(HV_VirtAddr va, unsigned int len, HV_PhysAddr pa); + +/** Given a client PA and a length, return its real (HV) PA. + * + * @param cpa Client physical address. + * @param len Length of mapped memory. + * @return physical address, or -1 if cpa or len is not valid. + */ +HV_PhysAddr hv_inquire_realpa(HV_PhysAddr cpa, unsigned int len); + +/** RTC return flag for no RTC chip present. + */ +#define HV_RTC_NO_CHIP 0x1 + +/** RTC return flag for low-voltage condition, indicating that battery had + * died and time read is unreliable. + */ +#define HV_RTC_LOW_VOLTAGE 0x2 + +/** Date/Time of day */ +typedef struct { +#if CHIP_WORD_SIZE() > 32 + __hv64 tm_sec; /**< Seconds, 0-59 */ + __hv64 tm_min; /**< Minutes, 0-59 */ + __hv64 tm_hour; /**< Hours, 0-23 */ + __hv64 tm_mday; /**< Day of month, 0-30 */ + __hv64 tm_mon; /**< Month, 0-11 */ + __hv64 tm_year; /**< Years since 1900, 0-199 */ + __hv64 flags; /**< Return flags, 0 if no error */ +#else + __hv32 tm_sec; /**< Seconds, 0-59 */ + __hv32 tm_min; /**< Minutes, 0-59 */ + __hv32 tm_hour; /**< Hours, 0-23 */ + __hv32 tm_mday; /**< Day of month, 0-30 */ + __hv32 tm_mon; /**< Month, 0-11 */ + __hv32 tm_year; /**< Years since 1900, 0-199 */ + __hv32 flags; /**< Return flags, 0 if no error */ +#endif +} HV_RTCTime; + +/** Read the current time-of-day clock. + * @return HV_RTCTime of current time (GMT). + */ +HV_RTCTime hv_get_rtc(void); + + +/** Set the current time-of-day clock. + * @param time time to reset time-of-day to (GMT). + */ +void hv_set_rtc(HV_RTCTime time); + +/** Installs a context, comprising a page table and other attributes. + * + * Once this service completes, page_table will be used to translate + * subsequent virtual address references to physical memory. + * + * Installing a context does not cause an implicit TLB flush. Before + * reusing an ASID value for a different address space, the client is + * expected to flush old references from the TLB with hv_flush_asid(). + * (Alternately, hv_flush_all() may be used to flush many ASIDs at once.) + * After invalidating a page table entry, changing its attributes, or + * changing its target CPA, the client is expected to flush old references + * from the TLB with hv_flush_page() or hv_flush_pages(). Making a + * previously invalid page valid does not require a flush. + * + * Specifying an invalid ASID, or an invalid CPA (client physical address) + * (either as page_table_pointer, or within the referenced table), + * or another page table data item documented as above as illegal may + * lead to client termination; since the validation of the table is + * done as needed, this may happen before the service returns, or at + * some later time, or never, depending upon the client's pattern of + * memory references. Page table entries which supply translations for + * invalid virtual addresses may result in client termination, or may + * be silently ignored. "Invalid" in this context means a value which + * was not provided to the client via the appropriate hv_inquire_* routine. + * + * To support changing the instruction VAs at the same time as + * installing the new page table, this call explicitly supports + * setting the "lr" register to a different address and then jumping + * directly to the hv_install_context() routine. In this case, the + * new page table does not need to contain any mapping for the + * hv_install_context address itself. + * + * @param page_table Root of the page table. + * @param access PTE providing info on how to read the page table. This + * value must be consistent between multiple tiles sharing a page table, + * and must also be consistent with any virtual mappings the client + * may be using to access the page table. + * @param asid HV_ASID the page table is to be used for. + * @param flags Context flags, denoting attributes or privileges of the + * current context (HV_CTX_xxx). + * @return Zero on success, or a hypervisor error code on failure. + */ +int hv_install_context(HV_PhysAddr page_table, HV_PTE access, HV_ASID asid, + __hv32 flags); + +#endif /* !__ASSEMBLER__ */ + +#define HV_CTX_DIRECTIO 0x1 /**< Direct I/O requests are accepted from + PL0. */ + +#ifndef __ASSEMBLER__ + +/** Value returned from hv_inquire_context(). */ +typedef struct +{ + /** Physical address of page table */ + HV_PhysAddr page_table; + + /** PTE which defines access method for top of page table */ + HV_PTE access; + + /** ASID associated with this page table */ + HV_ASID asid; + + /** Context flags */ + __hv32 flags; +} HV_Context; + +/** Retrieve information about the currently installed context. + * @return The data passed to the last successful hv_install_context call. + */ +HV_Context hv_inquire_context(void); + + +/** Flushes all translations associated with the named address space + * identifier from the TLB and any other hypervisor data structures. + * Translations installed with the "global" bit are not flushed. + * + * Specifying an invalid ASID may lead to client termination. "Invalid" + * in this context means a value which was not provided to the client + * via <tt>hv_inquire_asid()</tt>. + * + * @param asid HV_ASID whose entries are to be flushed. + * @return Zero on success, or a hypervisor error code on failure. +*/ +int hv_flush_asid(HV_ASID asid); + + +/** Flushes all translations associated with the named virtual address + * and page size from the TLB and other hypervisor data structures. Only + * pages visible to the current ASID are affected; note that this includes + * global pages in addition to pages specific to the current ASID. + * + * The supplied VA need not be aligned; it may be anywhere in the + * subject page. + * + * Specifying an invalid virtual address may lead to client termination, + * or may silently succeed. "Invalid" in this context means a value + * which was not provided to the client via hv_inquire_virtual. + * + * @param address Address of the page to flush. + * @param page_size Size of pages to assume. + * @return Zero on success, or a hypervisor error code on failure. + */ +int hv_flush_page(HV_VirtAddr address, HV_PageSize page_size); + + +/** Flushes all translations associated with the named virtual address range + * and page size from the TLB and other hypervisor data structures. Only + * pages visible to the current ASID are affected; note that this includes + * global pages in addition to pages specific to the current ASID. + * + * The supplied VA need not be aligned; it may be anywhere in the + * subject page. + * + * Specifying an invalid virtual address may lead to client termination, + * or may silently succeed. "Invalid" in this context means a value + * which was not provided to the client via hv_inquire_virtual. + * + * @param start Address to flush. + * @param page_size Size of pages to assume. + * @param size The number of bytes to flush. Any page in the range + * [start, start + size) will be flushed from the TLB. + * @return Zero on success, or a hypervisor error code on failure. + */ +int hv_flush_pages(HV_VirtAddr start, HV_PageSize page_size, + unsigned long size); + + +/** Flushes all non-global translations (if preserve_global is true), + * or absolutely all translations (if preserve_global is false). + * + * @param preserve_global Non-zero if we want to preserve "global" mappings. + * @return Zero on success, or a hypervisor error code on failure. +*/ +int hv_flush_all(int preserve_global); + + +/** Restart machine with optional restart command and optional args. + * @param cmd Const pointer to command to restart with, or NULL + * @param args Const pointer to argument string to restart with, or NULL + */ +void hv_restart(HV_VirtAddr cmd, HV_VirtAddr args); + + +/** Halt machine. */ +void hv_halt(void); + + +/** Power off machine. */ +void hv_power_off(void); + + +/** Re-enter virtual-is-physical memory translation mode and restart + * execution at a given address. + * @param entry Client physical address at which to begin execution. + * @return A hypervisor error code on failure; if the operation is + * successful the call does not return. + */ +int hv_reexec(HV_PhysAddr entry); + + +/** Chip topology */ +typedef struct +{ + /** Relative coordinates of the querying tile */ + HV_Coord coord; + + /** Width of the querying supervisor's tile rectangle. */ + int width; + + /** Height of the querying supervisor's tile rectangle. */ + int height; + +} HV_Topology; + +/** Returns information about the tile coordinate system. + * + * Each supervisor is given a rectangle of tiles it potentially controls. + * These tiles are labeled using a relative coordinate system with (0,0) as + * the upper left tile regardless of their physical location on the chip. + * + * This call returns both the size of that rectangle and the position + * within that rectangle of the querying tile. + * + * Not all tiles within that rectangle may be available to the supervisor; + * to get the precise set of available tiles, you must also call + * hv_inquire_tiles(HV_INQ_TILES_AVAIL, ...). + **/ +HV_Topology hv_inquire_topology(void); + +/** Sets of tiles we can retrieve with hv_inquire_tiles(). + * + * These numbers are part of the binary API and guaranteed not to change. + */ +typedef enum { + /** An invalid value; do not use. */ + _HV_INQ_TILES_RESERVED = 0, + + /** All available tiles within the supervisor's tile rectangle. */ + HV_INQ_TILES_AVAIL = 1, + + /** The set of tiles used for hash-for-home caching. */ + HV_INQ_TILES_HFH_CACHE = 2, + + /** The set of tiles that can be legally used as a LOTAR for a PTE. */ + HV_INQ_TILES_LOTAR = 3 +} HV_InqTileSet; + +/** Returns specific information about various sets of tiles within the + * supervisor's tile rectangle. + * + * @param set Which set of tiles to retrieve. + * @param cpumask Pointer to a returned bitmask (in row-major order, + * supervisor-relative) of tiles. The low bit of the first word + * corresponds to the tile at the upper left-hand corner of the + * supervisor's rectangle. In order for the supervisor to know the + * buffer length to supply, it should first call hv_inquire_topology. + * @param length Number of bytes available for the returned bitmask. + **/ +HV_Errno hv_inquire_tiles(HV_InqTileSet set, HV_VirtAddr cpumask, int length); + + +/** An identifier for a memory controller. Multiple memory controllers + * may be connected to one chip, and this uniquely identifies each one. + */ +typedef int HV_MemoryController; + +/** A range of physical memory. */ +typedef struct +{ + HV_PhysAddr start; /**< Starting address. */ + __hv64 size; /**< Size in bytes. */ + HV_MemoryController controller; /**< Which memory controller owns this. */ +} HV_PhysAddrRange; + +/** Returns information about a range of physical memory. + * + * hv_inquire_physical() returns one of the ranges of client + * physical addresses which are available to this client. + * + * The first range is retrieved by specifying an idx of 0, and + * successive ranges are returned with subsequent idx values. Ranges + * are ordered by increasing start address (i.e., as idx increases, + * so does start), do not overlap, and do not touch (i.e., the + * available memory is described with the fewest possible ranges). + * + * If an out-of-range idx value is specified, the returned size will be zero. + * A client can count the number of ranges by increasing idx until the + * returned size is zero. There will always be at least one valid range. + * + * Some clients might not be prepared to deal with more than one + * physical address range; they still ought to call this routine and + * issue a warning message if they're given more than one range, on the + * theory that whoever configured the hypervisor to provide that memory + * should know that it's being wasted. + */ +HV_PhysAddrRange hv_inquire_physical(int idx); + +/** Possible DIMM types. */ +typedef enum +{ + NO_DIMM = 0, /**< No DIMM */ + DDR2 = 1, /**< DDR2 */ + DDR3 = 2 /**< DDR3 */ +} HV_DIMM_Type; + +#ifdef __tilegx__ + +/** Log2 of minimum DIMM bytes supported by the memory controller. */ +#define HV_MSH_MIN_DIMM_SIZE_SHIFT 29 + +/** Max number of DIMMs contained by one memory controller. */ +#define HV_MSH_MAX_DIMMS 8 + +#else + +/** Log2 of minimum DIMM bytes supported by the memory controller. */ +#define HV_MSH_MIN_DIMM_SIZE_SHIFT 26 + +/** Max number of DIMMs contained by one memory controller. */ +#define HV_MSH_MAX_DIMMS 2 + +#endif + +/** Number of bits to right-shift to get the DIMM type. */ +#define HV_DIMM_TYPE_SHIFT 0 + +/** Bits to mask to get the DIMM type. */ +#define HV_DIMM_TYPE_MASK 0xf + +/** Number of bits to right-shift to get the DIMM size. */ +#define HV_DIMM_SIZE_SHIFT 4 + +/** Bits to mask to get the DIMM size. */ +#define HV_DIMM_SIZE_MASK 0xf + +/** Memory controller information. */ +typedef struct +{ + HV_Coord coord; /**< Relative tile coordinates of the port used by a + specified tile to communicate with this controller. */ + __hv64 speed; /**< Speed of this controller in bytes per second. */ +} HV_MemoryControllerInfo; + +/** Returns information about a particular memory controller. + * + * hv_inquire_memory_controller(coord,idx) returns information about a + * particular controller. Two pieces of information are returned: + * - The relative coordinates of the port on the controller that the specified + * tile would use to contact it. The relative coordinates may lie + * outside the supervisor's rectangle, i.e. the controller may not + * be attached to a node managed by the querying node's supervisor. + * In particular note that x or y may be negative. + * - The speed of the memory controller. (This is a not-to-exceed value + * based on the raw hardware data rate, and may not be achievable in + * practice; it is provided to give clients information on the relative + * performance of the available controllers.) + * + * Clients should avoid calling this interface with invalid values. + * A client who does may be terminated. + * @param coord Tile for which to calculate the relative port position. + * @param controller Index of the controller; identical to value returned + * from other routines like hv_inquire_physical. + * @return Information about the controller. + */ +HV_MemoryControllerInfo hv_inquire_memory_controller(HV_Coord coord, + int controller); + + +/** A range of virtual memory. */ +typedef struct +{ + HV_VirtAddr start; /**< Starting address. */ + __hv64 size; /**< Size in bytes. */ +} HV_VirtAddrRange; + +/** Returns information about a range of virtual memory. + * + * hv_inquire_virtual() returns one of the ranges of client + * virtual addresses which are available to this client. + * + * The first range is retrieved by specifying an idx of 0, and + * successive ranges are returned with subsequent idx values. Ranges + * are ordered by increasing start address (i.e., as idx increases, + * so does start), do not overlap, and do not touch (i.e., the + * available memory is described with the fewest possible ranges). + * + * If an out-of-range idx value is specified, the returned size will be zero. + * A client can count the number of ranges by increasing idx until the + * returned size is zero. There will always be at least one valid range. + * + * Some clients may well have various virtual addresses hardwired + * into themselves; for instance, their instruction stream may + * have been compiled expecting to live at a particular address. + * Such clients should use this interface to verify they've been + * given the virtual address space they expect, and issue a (potentially + * fatal) warning message otherwise. + * + * Note that the returned size is a __hv64, not a __hv32, so it is + * possible to express a single range spanning the entire 32-bit + * address space. + */ +HV_VirtAddrRange hv_inquire_virtual(int idx); + + +/** A range of ASID values. */ +typedef struct +{ + HV_ASID start; /**< First ASID in the range. */ + unsigned int size; /**< Number of ASIDs. Zero for an invalid range. */ +} HV_ASIDRange; + +/** Returns information about a range of ASIDs. + * + * hv_inquire_asid() returns one of the ranges of address + * space identifiers which are available to this client. + * + * The first range is retrieved by specifying an idx of 0, and + * successive ranges are returned with subsequent idx values. Ranges + * are ordered by increasing start value (i.e., as idx increases, + * so does start), do not overlap, and do not touch (i.e., the + * available ASIDs are described with the fewest possible ranges). + * + * If an out-of-range idx value is specified, the returned size will be zero. + * A client can count the number of ranges by increasing idx until the + * returned size is zero. There will always be at least one valid range. + */ +HV_ASIDRange hv_inquire_asid(int idx); + + +/** Waits for at least the specified number of nanoseconds then returns. + * + * NOTE: this deprecated function currently assumes a 750 MHz clock, + * and is thus not generally suitable for use. New code should call + * hv_sysconf(HV_SYSCONF_CPU_SPEED), compute a cycle count to wait for, + * and delay by looping while checking the cycle counter SPR. + * + * @param nanosecs The number of nanoseconds to sleep. + */ +void hv_nanosleep(int nanosecs); + + +/** Reads a character from the console without blocking. + * + * @return A value from 0-255 indicates the value successfully read. + * A negative value means no value was ready. + */ +int hv_console_read_if_ready(void); + + +/** Writes a character to the console, blocking if the console is busy. + * + * This call cannot fail. If the console is broken for some reason, + * output will simply vanish. + * @param byte Character to write. + */ +void hv_console_putc(int byte); + + +/** Writes a string to the console, blocking if the console is busy. + * @param bytes Pointer to characters to write. + * @param len Number of characters to write. + * @return Number of characters written, or HV_EFAULT if the buffer is invalid. + */ +int hv_console_write(HV_VirtAddr bytes, int len); + + +/** Dispatch the next interrupt from the client downcall mechanism. + * + * The hypervisor uses downcalls to notify the client of asynchronous + * events. Some of these events are hypervisor-created (like incoming + * messages). Some are regular interrupts which initially occur in + * the hypervisor, and are normally handled directly by the client; + * when these occur in a client's interrupt critical section, they must + * be delivered through the downcall mechanism. + * + * A downcall is initially delivered to the client as an INTCTRL_CL + * interrupt, where CL is the client's PL. Upon entry to the INTCTRL_CL + * vector, the client must immediately invoke the hv_downcall_dispatch + * service. This service will not return; instead it will cause one of + * the client's actual downcall-handling interrupt vectors to be entered. + * The EX_CONTEXT registers in the client will be set so that when the + * client irets, it will return to the code which was interrupted by the + * INTCTRL_CL interrupt. + * + * Under some circumstances, the firing of INTCTRL_CL can race with + * the lowering of a device interrupt. In such a case, the + * hv_downcall_dispatch service may issue an iret instruction instead + * of entering one of the client's actual downcall-handling interrupt + * vectors. This will return execution to the location that was + * interrupted by INTCTRL_CL. + * + * Any saving of registers should be done by the actual handling + * vectors; no registers should be changed by the INTCTRL_CL handler. + * In particular, the client should not use a jal instruction to invoke + * the hv_downcall_dispatch service, as that would overwrite the client's + * lr register. Note that the hv_downcall_dispatch service may overwrite + * one or more of the client's system save registers. + * + * The client must not modify the INTCTRL_CL_STATUS SPR. The hypervisor + * will set this register to cause a downcall to happen, and will clear + * it when no further downcalls are pending. + * + * When a downcall vector is entered, the INTCTRL_CL interrupt will be + * masked. When the client is done processing a downcall, and is ready + * to accept another, it must unmask this interrupt; if more downcalls + * are pending, this will cause the INTCTRL_CL vector to be reentered. + * Currently the following interrupt vectors can be entered through a + * downcall: + * + * INT_MESSAGE_RCV_DWNCL (hypervisor message available) + * INT_DEV_INTR_DWNCL (device interrupt) + * INT_DMATLB_MISS_DWNCL (DMA TLB miss) + * INT_SNITLB_MISS_DWNCL (SNI TLB miss) + * INT_DMATLB_ACCESS_DWNCL (DMA TLB access violation) + */ +void hv_downcall_dispatch(void); + +#endif /* !__ASSEMBLER__ */ + +/** We use actual interrupt vectors which never occur (they're only there + * to allow setting MPLs for related SPRs) for our downcall vectors. + */ +/** Message receive downcall interrupt vector */ +#define INT_MESSAGE_RCV_DWNCL INT_BOOT_ACCESS +/** DMA TLB miss downcall interrupt vector */ +#define INT_DMATLB_MISS_DWNCL INT_DMA_ASID +/** Static nework processor instruction TLB miss interrupt vector */ +#define INT_SNITLB_MISS_DWNCL INT_SNI_ASID +/** DMA TLB access violation downcall interrupt vector */ +#define INT_DMATLB_ACCESS_DWNCL INT_DMA_CPL +/** Device interrupt downcall interrupt vector */ +#define INT_DEV_INTR_DWNCL INT_WORLD_ACCESS + +#ifndef __ASSEMBLER__ + +/** Requests the inode for a specific full pathname. + * + * Performs a lookup in the hypervisor filesystem for a given filename. + * Multiple calls with the same filename will always return the same inode. + * If there is no such filename, HV_ENOENT is returned. + * A bad filename pointer may result in HV_EFAULT instead. + * + * @param filename Constant pointer to name of requested file + * @return Inode of requested file + */ +int hv_fs_findfile(HV_VirtAddr filename); + + +/** Data returned from an fstat request. + * Note that this structure should be no more than 40 bytes in size so + * that it can always be returned completely in registers. + */ +typedef struct +{ + int size; /**< Size of file (or HV_Errno on error) */ + unsigned int flags; /**< Flags (see HV_FS_FSTAT_FLAGS) */ +} HV_FS_StatInfo; + +/** Bitmask flags for fstat request */ +typedef enum +{ + HV_FS_ISDIR = 0x0001 /**< Is the entry a directory? */ +} HV_FS_FSTAT_FLAGS; + +/** Get stat information on a given file inode. + * + * Return information on the file with the given inode. + * + * IF the HV_FS_ISDIR bit is set, the "file" is a directory. Reading + * it will return NUL-separated filenames (no directory part) relative + * to the path to the inode of the directory "file". These can be + * appended to the path to the directory "file" after a forward slash + * to create additional filenames. Note that it is not required + * that all valid paths be decomposable into valid parent directories; + * a filesystem may validly have just a few files, none of which have + * HV_FS_ISDIR set. However, if clients may wish to enumerate the + * files in the filesystem, it is recommended to include all the + * appropriate parent directory "files" to give a consistent view. + * + * An invalid file inode will cause an HV_EBADF error to be returned. + * + * @param inode The inode number of the query + * @return An HV_FS_StatInfo structure + */ +HV_FS_StatInfo hv_fs_fstat(int inode); + + +/** Read data from a specific hypervisor file. + * On error, may return HV_EBADF for a bad inode or HV_EFAULT for a bad buf. + * Reads near the end of the file will return fewer bytes than requested. + * Reads at or beyond the end of a file will return zero. + * + * @param inode the hypervisor file to read + * @param buf the buffer to read data into + * @param length the number of bytes of data to read + * @param offset the offset into the file to read the data from + * @return number of bytes successfully read, or an HV_Errno code + */ +int hv_fs_pread(int inode, HV_VirtAddr buf, int length, int offset); + + +/** Read a 64-bit word from the specified physical address. + * The address must be 8-byte aligned. + * Specifying an invalid physical address will lead to client termination. + * @param addr The physical address to read + * @param access The PTE describing how to read the memory + * @return The 64-bit value read from the given address + */ +unsigned long long hv_physaddr_read64(HV_PhysAddr addr, HV_PTE access); + + +/** Write a 64-bit word to the specified physical address. + * The address must be 8-byte aligned. + * Specifying an invalid physical address will lead to client termination. + * @param addr The physical address to write + * @param access The PTE that says how to write the memory + * @param val The 64-bit value to write to the given address + */ +void hv_physaddr_write64(HV_PhysAddr addr, HV_PTE access, + unsigned long long val); + + +/** Get the value of the command-line for the supervisor, if any. + * This will not include the filename of the booted supervisor, but may + * include configured-in boot arguments or the hv_restart() arguments. + * If the buffer is not long enough the hypervisor will NUL the first + * character of the buffer but not write any other data. + * @param buf The virtual address to write the command-line string to. + * @param length The length of buf, in characters. + * @return The actual length of the command line, including the trailing NUL + * (may be larger than "length"). + */ +int hv_get_command_line(HV_VirtAddr buf, int length); + + +/** Set a new value for the command-line for the supervisor, which will + * be returned from subsequent invocations of hv_get_command_line() on + * this tile. + * @param buf The virtual address to read the command-line string from. + * @param length The length of buf, in characters; must be no more than + * HV_COMMAND_LINE_LEN. + * @return Zero if successful, or a hypervisor error code. + */ +HV_Errno hv_set_command_line(HV_VirtAddr buf, int length); + +/** Maximum size of a command line passed to hv_set_command_line(); note + * that a line returned from hv_get_command_line() could be larger than + * this.*/ +#define HV_COMMAND_LINE_LEN 256 + +/** Tell the hypervisor how to cache non-priority pages + * (its own as well as pages explicitly represented in page tables). + * Normally these will be represented as red/black pages, but + * when the supervisor starts to allocate "priority" pages in the PTE + * the hypervisor will need to start marking those pages as (e.g.) "red" + * and non-priority pages as either "black" (if they cache-alias + * with the existing priority pages) or "red/black" (if they don't). + * The bitmask provides information on which parts of the cache + * have been used for pinned pages so far on this tile; if (1 << N) + * appears in the bitmask, that indicates that a page has been marked + * "priority" whose PFN equals N, mod 8. + * @param bitmask A bitmap of priority page set values + */ +void hv_set_caching(unsigned int bitmask); + + +/** Zero out a specified number of pages. + * The va and size must both be multiples of 4096. + * Caches are bypassed and memory is directly set to zero. + * This API is implemented only in the magic hypervisor and is intended + * to provide a performance boost to the minimal supervisor by + * giving it a fast way to zero memory pages when allocating them. + * @param va Virtual address where the page has been mapped + * @param size Number of bytes (must be a page size multiple) + */ +void hv_bzero_page(HV_VirtAddr va, unsigned int size); + + +/** State object for the hypervisor messaging subsystem. */ +typedef struct +{ +#if CHIP_VA_WIDTH() > 32 + __hv64 opaque[2]; /**< No user-serviceable parts inside */ +#else + __hv32 opaque[2]; /**< No user-serviceable parts inside */ +#endif +} +HV_MsgState; + +/** Register to receive incoming messages. + * + * This routine configures the current tile so that it can receive + * incoming messages. It must be called before the client can receive + * messages with the hv_receive_message routine, and must be called on + * each tile which will receive messages. + * + * msgstate is the virtual address of a state object of type HV_MsgState. + * Once the state is registered, the client must not read or write the + * state object; doing so will cause undefined results. + * + * If this routine is called with msgstate set to 0, the client's message + * state will be freed and it will no longer be able to receive messages. + * Note that this may cause the loss of any as-yet-undelivered messages + * for the client. + * + * If another client attempts to send a message to a client which has + * not yet called hv_register_message_state, or which has freed its + * message state, the message will not be delivered, as if the client + * had insufficient buffering. + * + * This routine returns HV_OK if the registration was successful, and + * HV_EINVAL if the supplied state object is unsuitable. Note that some + * errors may not be detected during this routine, but might be detected + * during a subsequent message delivery. + * @param msgstate State object. + **/ +HV_Errno hv_register_message_state(HV_MsgState* msgstate); + +/** Possible message recipient states. */ +typedef enum +{ + HV_TO_BE_SENT, /**< Not sent (not attempted, or recipient not ready) */ + HV_SENT, /**< Successfully sent */ + HV_BAD_RECIP /**< Bad recipient coordinates (permanent error) */ +} HV_Recip_State; + +/** Message recipient. */ +typedef struct +{ + /** X coordinate, relative to supervisor's top-left coordinate */ + unsigned int x:11; + + /** Y coordinate, relative to supervisor's top-left coordinate */ + unsigned int y:11; + + /** Status of this recipient */ + HV_Recip_State state:10; +} HV_Recipient; + +/** Send a message to a set of recipients. + * + * This routine sends a message to a set of recipients. + * + * recips is an array of HV_Recipient structures. Each specifies a tile, + * and a message state; initially, it is expected that the state will + * be set to HV_TO_BE_SENT. nrecip specifies the number of recipients + * in the recips array. + * + * For each recipient whose state is HV_TO_BE_SENT, the hypervisor attempts + * to send that tile the specified message. In order to successfully + * receive the message, the receiver must be a valid tile to which the + * sender has access, must not be the sending tile itself, and must have + * sufficient free buffer space. (The hypervisor guarantees that each + * tile which has called hv_register_message_state() will be able to + * buffer one message from every other tile which can legally send to it; + * more space may be provided but is not guaranteed.) If an invalid tile + * is specified, the recipient's state is set to HV_BAD_RECIP; this is a + * permanent delivery error. If the message is successfully delivered + * to the recipient's buffer, the recipient's state is set to HV_SENT. + * Otherwise, the recipient's state is unchanged. Message delivery is + * synchronous; all attempts to send messages are completed before this + * routine returns. + * + * If no permanent delivery errors were encountered, the routine returns + * the number of messages successfully sent: that is, the number of + * recipients whose states changed from HV_TO_BE_SENT to HV_SENT during + * this operation. If any permanent delivery errors were encountered, + * the routine returns HV_ERECIP. In the event of permanent delivery + * errors, it may be the case that delivery was not attempted to all + * recipients; if any messages were successfully delivered, however, + * recipients' state values will be updated appropriately. + * + * It is explicitly legal to specify a recipient structure whose state + * is not HV_TO_BE_SENT; such a recipient is ignored. One suggested way + * of using hv_send_message to send a message to multiple tiles is to set + * up a list of recipients, and then call the routine repeatedly with the + * same list, each time accumulating the number of messages successfully + * sent, until all messages are sent, a permanent error is encountered, + * or the desired number of attempts have been made. When used in this + * way, the routine will deliver each message no more than once to each + * recipient. + * + * Note that a message being successfully delivered to the recipient's + * buffer space does not guarantee that it is received by the recipient, + * either immediately or at any time in the future; the recipient might + * never call hv_receive_message, or could register a different state + * buffer, losing the message. + * + * Specifying the same recipient more than once in the recipient list + * is an error, which will not result in an error return but which may + * or may not result in more than one message being delivered to the + * recipient tile. + * + * buf and buflen specify the message to be sent. buf is a virtual address + * which must be currently mapped in the client's page table; if not, the + * routine returns HV_EFAULT. buflen must be greater than zero and less + * than or equal to HV_MAX_MESSAGE_SIZE, and nrecip must be less than the + * number of tiles to which the sender has access; if not, the routine + * returns HV_EINVAL. + * @param recips List of recipients. + * @param nrecip Number of recipients. + * @param buf Address of message data. + * @param buflen Length of message data. + **/ +int hv_send_message(HV_Recipient *recips, int nrecip, + HV_VirtAddr buf, int buflen); + +/** Maximum hypervisor message size, in bytes */ +#define HV_MAX_MESSAGE_SIZE 28 + + +/** Return value from hv_receive_message() */ +typedef struct +{ + int msglen; /**< Message length in bytes, or an error code */ + __hv32 source; /**< Code identifying message sender (HV_MSG_xxx) */ +} HV_RcvMsgInfo; + +#define HV_MSG_TILE 0x0 /**< Message source is another tile */ +#define HV_MSG_INTR 0x1 /**< Message source is a driver interrupt */ + +/** Receive a message. + * + * This routine retrieves a message from the client's incoming message + * buffer. + * + * Multiple messages sent from a particular sending tile to a particular + * receiving tile are received in the order that they were sent; however, + * no ordering is guaranteed between messages sent by different tiles. + * + * Whenever the a client's message buffer is empty, the first message + * subsequently received will cause the client's MESSAGE_RCV_DWNCL + * interrupt vector to be invoked through the interrupt downcall mechanism + * (see the description of the hv_downcall_dispatch() routine for details + * on downcalls). + * + * Another message-available downcall will not occur until a call to + * this routine is made when the message buffer is empty, and a message + * subsequently arrives. Note that such a downcall could occur while + * this routine is executing. If the calling code does not wish this + * to happen, it is recommended that this routine be called with the + * INTCTRL_1 interrupt masked, or inside an interrupt critical section. + * + * msgstate is the value previously passed to hv_register_message_state(). + * buf is the virtual address of the buffer into which the message will + * be written; buflen is the length of the buffer. + * + * This routine returns an HV_RcvMsgInfo structure. The msglen member + * of that structure is the length of the message received, zero if no + * message is available, or HV_E2BIG if the message is too large for the + * specified buffer. If the message is too large, it is not consumed, + * and may be retrieved by a subsequent call to this routine specifying + * a sufficiently large buffer. A buffer which is HV_MAX_MESSAGE_SIZE + * bytes long is guaranteed to be able to receive any possible message. + * + * The source member of the HV_RcvMsgInfo structure describes the sender + * of the message. For messages sent by another client tile via an + * hv_send_message() call, this value is HV_MSG_TILE; for messages sent + * as a result of a device interrupt, this value is HV_MSG_INTR. + */ + +HV_RcvMsgInfo hv_receive_message(HV_MsgState msgstate, HV_VirtAddr buf, + int buflen); + + +/** Start remaining tiles owned by this supervisor. Initially, only one tile + * executes the client program; after it calls this service, the other tiles + * are started. This allows the initial tile to do one-time configuration + * of shared data structures without having to lock them against simultaneous + * access. + */ +void hv_start_all_tiles(void); + + +/** Open a hypervisor device. + * + * This service initializes an I/O device and its hypervisor driver software, + * and makes it available for use. The open operation is per-device per-chip; + * once it has been performed, the device handle returned may be used in other + * device services calls made by any tile. + * + * @param name Name of the device. A base device name is just a text string + * (say, "pcie"). If there is more than one instance of a device, the + * base name is followed by a slash and a device number (say, "pcie/0"). + * Some devices may support further structure beneath those components; + * most notably, devices which require control operations do so by + * supporting reads and/or writes to a control device whose name + * includes a trailing "/ctl" (say, "pcie/0/ctl"). + * @param flags Flags (HV_DEV_xxx). + * @return A positive integer device handle, or a negative error code. + */ +int hv_dev_open(HV_VirtAddr name, __hv32 flags); + + +/** Close a hypervisor device. + * + * This service uninitializes an I/O device and its hypervisor driver + * software, and makes it unavailable for use. The close operation is + * per-device per-chip; once it has been performed, the device is no longer + * available. Normally there is no need to ever call the close service. + * + * @param devhdl Device handle of the device to be closed. + * @return Zero if the close is successful, otherwise, a negative error code. + */ +int hv_dev_close(int devhdl); + + +/** Read data from a hypervisor device synchronously. + * + * This service transfers data from a hypervisor device to a memory buffer. + * When the service returns, the data has been written from the memory buffer, + * and the buffer will not be further modified by the driver. + * + * No ordering is guaranteed between requests issued from different tiles. + * + * Devices may choose to support both the synchronous and asynchronous read + * operations, only one of them, or neither of them. + * + * @param devhdl Device handle of the device to be read from. + * @param flags Flags (HV_DEV_xxx). + * @param va Virtual address of the target data buffer. This buffer must + * be mapped in the currently installed page table; if not, HV_EFAULT + * may be returned. + * @param len Number of bytes to be transferred. + * @param offset Driver-dependent offset. For a random-access device, this is + * often a byte offset from the beginning of the device; in other cases, + * like on a control device, it may have a different meaning. + * @return A non-negative value if the read was at least partially successful; + * otherwise, a negative error code. The precise interpretation of + * the return value is driver-dependent, but many drivers will return + * the number of bytes successfully transferred. + */ +int hv_dev_pread(int devhdl, __hv32 flags, HV_VirtAddr va, __hv32 len, + __hv64 offset); + +#define HV_DEV_NB_EMPTY 0x1 /**< Don't block when no bytes of data can + be transferred. */ +#define HV_DEV_NB_PARTIAL 0x2 /**< Don't block when some bytes, but not all + of the requested bytes, can be + transferred. */ +#define HV_DEV_NOCACHE 0x4 /**< The caller warrants that none of the + cache lines which might contain data + from the requested buffer are valid. + Useful with asynchronous operations + only. */ + +#define HV_DEV_ALLFLAGS (HV_DEV_NB_EMPTY | HV_DEV_NB_PARTIAL | \ + HV_DEV_NOCACHE) /**< All HV_DEV_xxx flags */ + +/** Write data to a hypervisor device synchronously. + * + * This service transfers data from a memory buffer to a hypervisor device. + * When the service returns, the data has been read from the memory buffer, + * and the buffer may be overwritten by the client; the data may not + * necessarily have been conveyed to the actual hardware I/O interface. + * + * No ordering is guaranteed between requests issued from different tiles. + * + * Devices may choose to support both the synchronous and asynchronous write + * operations, only one of them, or neither of them. + * + * @param devhdl Device handle of the device to be written to. + * @param flags Flags (HV_DEV_xxx). + * @param va Virtual address of the source data buffer. This buffer must + * be mapped in the currently installed page table; if not, HV_EFAULT + * may be returned. + * @param len Number of bytes to be transferred. + * @param offset Driver-dependent offset. For a random-access device, this is + * often a byte offset from the beginning of the device; in other cases, + * like on a control device, it may have a different meaning. + * @return A non-negative value if the write was at least partially successful; + * otherwise, a negative error code. The precise interpretation of + * the return value is driver-dependent, but many drivers will return + * the number of bytes successfully transferred. + */ +int hv_dev_pwrite(int devhdl, __hv32 flags, HV_VirtAddr va, __hv32 len, + __hv64 offset); + + +/** Interrupt arguments, used in the asynchronous I/O interfaces. */ +#if CHIP_VA_WIDTH() > 32 +typedef __hv64 HV_IntArg; +#else +typedef __hv32 HV_IntArg; +#endif + +/** Interrupt messages are delivered via the mechanism as normal messages, + * but have a message source of HV_DEV_INTR. The message is formatted + * as an HV_IntrMsg structure. + */ + +typedef struct +{ + HV_IntArg intarg; /**< Interrupt argument, passed to the poll/preada/pwritea + services */ + HV_IntArg intdata; /**< Interrupt-specific interrupt data */ +} HV_IntrMsg; + +/** Request an interrupt message when a device condition is satisfied. + * + * This service requests that an interrupt message be delivered to the + * requesting tile when a device becomes readable or writable, or when any + * data queued to the device via previous write operations from this tile + * has been actually sent out on the hardware I/O interface. Devices may + * choose to support any, all, or none of the available conditions. + * + * If multiple conditions are specified, only one message will be + * delivered. If the event mask delivered to that interrupt handler + * indicates that some of the conditions have not yet occurred, the + * client must issue another poll() call if it wishes to wait for those + * conditions. + * + * Only one poll may be outstanding per device handle per tile. If more than + * one tile is polling on the same device and condition, they will all be + * notified when it happens. Because of this, clients may not assume that + * the condition signaled is necessarily still true when they request a + * subsequent service; for instance, the readable data which caused the + * poll call to interrupt may have been read by another tile in the interim. + * + * The notification interrupt message could come directly, or via the + * downcall (intctrl1) method, depending on what the tile is doing + * when the condition is satisfied. Note that it is possible for the + * requested interrupt to be delivered after this service is called but + * before it returns. + * + * @param devhdl Device handle of the device to be polled. + * @param events Flags denoting the events which will cause the interrupt to + * be delivered (HV_DEVPOLL_xxx). + * @param intarg Value which will be delivered as the intarg member of the + * eventual interrupt message; the intdata member will be set to a + * mask of HV_DEVPOLL_xxx values indicating which conditions have been + * satisifed. + * @return Zero if the interrupt was successfully scheduled; otherwise, a + * negative error code. + */ +int hv_dev_poll(int devhdl, __hv32 events, HV_IntArg intarg); + +#define HV_DEVPOLL_READ 0x1 /**< Test device for readability */ +#define HV_DEVPOLL_WRITE 0x2 /**< Test device for writability */ +#define HV_DEVPOLL_FLUSH 0x4 /**< Test device for output drained */ + + +/** Cancel a request for an interrupt when a device event occurs. + * + * This service requests that no interrupt be delivered when the events + * noted in the last-issued poll() call happen. Once this service returns, + * the interrupt has been canceled; however, it is possible for the interrupt + * to be delivered after this service is called but before it returns. + * + * @param devhdl Device handle of the device on which to cancel polling. + * @return Zero if the poll was successfully canceled; otherwise, a negative + * error code. + */ +int hv_dev_poll_cancel(int devhdl); + + +/** Scatter-gather list for preada/pwritea calls. */ +typedef struct +#if CHIP_VA_WIDTH() <= 32 +__attribute__ ((packed, aligned(4))) +#endif +{ + HV_PhysAddr pa; /**< Client physical address of the buffer segment. */ + HV_PTE pte; /**< Page table entry describing the caching and location + override characteristics of the buffer segment. Some + drivers ignore this element and will require that + the NOCACHE flag be set on their requests. */ + __hv32 len; /**< Length of the buffer segment. */ +} HV_SGL; + +#define HV_SGL_MAXLEN 16 /**< Maximum number of entries in a scatter-gather + list */ + +/** Read data from a hypervisor device asynchronously. + * + * This service transfers data from a hypervisor device to a memory buffer. + * When the service returns, the read has been scheduled. When the read + * completes, an interrupt message will be delivered, and the buffer will + * not be further modified by the driver. + * + * The number of possible outstanding asynchronous requests is defined by + * each driver, but it is recommended that it be at least two requests + * per tile per device. + * + * No ordering is guaranteed between synchronous and asynchronous requests, + * even those issued on the same tile. + * + * The completion interrupt message could come directly, or via the downcall + * (intctrl1) method, depending on what the tile is doing when the read + * completes. Interrupts do not coalesce; one is delivered for each + * asynchronous I/O request. Note that it is possible for the requested + * interrupt to be delivered after this service is called but before it + * returns. + * + * Devices may choose to support both the synchronous and asynchronous read + * operations, only one of them, or neither of them. + * + * @param devhdl Device handle of the device to be read from. + * @param flags Flags (HV_DEV_xxx). + * @param sgl_len Number of elements in the scatter-gather list. + * @param sgl Scatter-gather list describing the memory to which data will be + * written. + * @param offset Driver-dependent offset. For a random-access device, this is + * often a byte offset from the beginning of the device; in other cases, + * like on a control device, it may have a different meaning. + * @param intarg Value which will be delivered as the intarg member of the + * eventual interrupt message; the intdata member will be set to the + * normal return value from the read request. + * @return Zero if the read was successfully scheduled; otherwise, a negative + * error code. Note that some drivers may choose to pre-validate + * their arguments, and may thus detect certain device error + * conditions at this time rather than when the completion notification + * occurs, but this is not required. + */ +int hv_dev_preada(int devhdl, __hv32 flags, __hv32 sgl_len, + HV_SGL sgl[/* sgl_len */], __hv64 offset, HV_IntArg intarg); + + +/** Write data to a hypervisor device asynchronously. + * + * This service transfers data from a memory buffer to a hypervisor + * device. When the service returns, the write has been scheduled. + * When the write completes, an interrupt message will be delivered, + * and the buffer may be overwritten by the client; the data may not + * necessarily have been conveyed to the actual hardware I/O interface. + * + * The number of possible outstanding asynchronous requests is defined by + * each driver, but it is recommended that it be at least two requests + * per tile per device. + * + * No ordering is guaranteed between synchronous and asynchronous requests, + * even those issued on the same tile. + * + * The completion interrupt message could come directly, or via the downcall + * (intctrl1) method, depending on what the tile is doing when the read + * completes. Interrupts do not coalesce; one is delivered for each + * asynchronous I/O request. Note that it is possible for the requested + * interrupt to be delivered after this service is called but before it + * returns. + * + * Devices may choose to support both the synchronous and asynchronous write + * operations, only one of them, or neither of them. + * + * @param devhdl Device handle of the device to be read from. + * @param flags Flags (HV_DEV_xxx). + * @param sgl_len Number of elements in the scatter-gather list. + * @param sgl Scatter-gather list describing the memory from which data will be + * read. + * @param offset Driver-dependent offset. For a random-access device, this is + * often a byte offset from the beginning of the device; in other cases, + * like on a control device, it may have a different meaning. + * @param intarg Value which will be delivered as the intarg member of the + * eventual interrupt message; the intdata member will be set to the + * normal return value from the write request. + * @return Zero if the write was successfully scheduled; otherwise, a negative + * error code. Note that some drivers may choose to pre-validate + * their arguments, and may thus detect certain device error + * conditions at this time rather than when the completion notification + * occurs, but this is not required. + */ +int hv_dev_pwritea(int devhdl, __hv32 flags, __hv32 sgl_len, + HV_SGL sgl[/* sgl_len */], __hv64 offset, HV_IntArg intarg); + + +/** Define a pair of tile and ASID to identify a user process context. */ +typedef struct +{ + /** X coordinate, relative to supervisor's top-left coordinate */ + unsigned int x:11; + + /** Y coordinate, relative to supervisor's top-left coordinate */ + unsigned int y:11; + + /** ASID of the process on this x,y tile */ + HV_ASID asid:10; +} HV_Remote_ASID; + +/** Flush cache and/or TLB state on remote tiles. + * + * @param cache_pa Client physical address to flush from cache (ignored if + * the length encoded in cache_control is zero, or if + * HV_FLUSH_EVICT_L2 is set, or if cache_cpumask is NULL). + * @param cache_control This argument allows you to specify a length of + * physical address space to flush (maximum HV_FLUSH_MAX_CACHE_LEN). + * You can "or" in HV_FLUSH_EVICT_L2 to flush the whole L2 cache. + * You can "or" in HV_FLUSH_EVICT_L1I to flush the whole L1I cache. + * HV_FLUSH_ALL flushes all caches. + * @param cache_cpumask Bitmask (in row-major order, supervisor-relative) of + * tile indices to perform cache flush on. The low bit of the first + * word corresponds to the tile at the upper left-hand corner of the + * supervisor's rectangle. If passed as a NULL pointer, equivalent + * to an empty bitmask. On chips which support hash-for-home caching, + * if passed as -1, equivalent to a mask containing tiles which could + * be doing hash-for-home caching. + * @param tlb_va Virtual address to flush from TLB (ignored if + * tlb_length is zero or tlb_cpumask is NULL). + * @param tlb_length Number of bytes of data to flush from the TLB. + * @param tlb_pgsize Page size to use for TLB flushes. + * tlb_va and tlb_length need not be aligned to this size. + * @param tlb_cpumask Bitmask for tlb flush, like cache_cpumask. + * If passed as a NULL pointer, equivalent to an empty bitmask. + * @param asids Pointer to an HV_Remote_ASID array of tile/ASID pairs to flush. + * @param asidcount Number of HV_Remote_ASID entries in asids[]. + * @return Zero for success, or else HV_EINVAL or HV_EFAULT for errors that + * are detected while parsing the arguments. + */ +int hv_flush_remote(HV_PhysAddr cache_pa, unsigned long cache_control, + unsigned long* cache_cpumask, + HV_VirtAddr tlb_va, unsigned long tlb_length, + unsigned long tlb_pgsize, unsigned long* tlb_cpumask, + HV_Remote_ASID* asids, int asidcount); + +/** Include in cache_control to ensure a flush of the entire L2. */ +#define HV_FLUSH_EVICT_L2 (1UL << 31) + +/** Include in cache_control to ensure a flush of the entire L1I. */ +#define HV_FLUSH_EVICT_L1I (1UL << 30) + +/** Maximum legal size to use for the "length" component of cache_control. */ +#define HV_FLUSH_MAX_CACHE_LEN ((1UL << 30) - 1) + +/** Use for cache_control to ensure a flush of all caches. */ +#define HV_FLUSH_ALL -1UL + +#else /* __ASSEMBLER__ */ + +/** Include in cache_control to ensure a flush of the entire L2. */ +#define HV_FLUSH_EVICT_L2 (1 << 31) + +/** Include in cache_control to ensure a flush of the entire L1I. */ +#define HV_FLUSH_EVICT_L1I (1 << 30) + +/** Maximum legal size to use for the "length" component of cache_control. */ +#define HV_FLUSH_MAX_CACHE_LEN ((1 << 30) - 1) + +/** Use for cache_control to ensure a flush of all caches. */ +#define HV_FLUSH_ALL -1 + +#endif /* __ASSEMBLER__ */ + +#ifndef __ASSEMBLER__ + +/** Return a 64-bit value corresponding to the PTE if needed */ +#define hv_pte_val(pte) ((pte).val) + +/** Cast a 64-bit value to an HV_PTE */ +#define hv_pte(val) ((HV_PTE) { val }) + +#endif /* !__ASSEMBLER__ */ + + +/** Bits in the size of an HV_PTE */ +#define HV_LOG2_PTE_SIZE 3 + +/** Size of an HV_PTE */ +#define HV_PTE_SIZE (1 << HV_LOG2_PTE_SIZE) + + +/* Bits in HV_PTE's low word. */ +#define HV_PTE_INDEX_PRESENT 0 /**< PTE is valid */ +#define HV_PTE_INDEX_MIGRATING 1 /**< Page is migrating */ +#define HV_PTE_INDEX_CLIENT0 2 /**< Page client state 0 */ +#define HV_PTE_INDEX_CLIENT1 3 /**< Page client state 1 */ +#define HV_PTE_INDEX_NC 4 /**< L1$/L2$ incoherent with L3$ */ +#define HV_PTE_INDEX_NO_ALLOC_L1 5 /**< Page is uncached in local L1$ */ +#define HV_PTE_INDEX_NO_ALLOC_L2 6 /**< Page is uncached in local L2$ */ +#define HV_PTE_INDEX_CACHED_PRIORITY 7 /**< Page is priority cached */ +#define HV_PTE_INDEX_PAGE 8 /**< PTE describes a page */ +#define HV_PTE_INDEX_GLOBAL 9 /**< Page is global */ +#define HV_PTE_INDEX_USER 10 /**< Page is user-accessible */ +#define HV_PTE_INDEX_ACCESSED 11 /**< Page has been accessed */ +#define HV_PTE_INDEX_DIRTY 12 /**< Page has been written */ + /* Bits 13-15 are reserved for + future use. */ +#define HV_PTE_INDEX_MODE 16 /**< Page mode; see HV_PTE_MODE_xxx */ +#define HV_PTE_MODE_BITS 3 /**< Number of bits in mode */ + /* Bit 19 is reserved for + future use. */ +#define HV_PTE_INDEX_LOTAR 20 /**< Page's LOTAR; must be high bits + of word */ +#define HV_PTE_LOTAR_BITS 12 /**< Number of bits in a LOTAR */ + +/* Bits in HV_PTE's high word. */ +#define HV_PTE_INDEX_READABLE 32 /**< Page is readable */ +#define HV_PTE_INDEX_WRITABLE 33 /**< Page is writable */ +#define HV_PTE_INDEX_EXECUTABLE 34 /**< Page is executable */ +#define HV_PTE_INDEX_PTFN 35 /**< Page's PTFN; must be high bits + of word */ +#define HV_PTE_PTFN_BITS 29 /**< Number of bits in a PTFN */ + +/** Position of the PFN field within the PTE (subset of the PTFN). */ +#define HV_PTE_INDEX_PFN (HV_PTE_INDEX_PTFN + (HV_LOG2_PAGE_SIZE_SMALL - \ + HV_LOG2_PAGE_TABLE_ALIGN)) + +/** Length of the PFN field within the PTE (subset of the PTFN). */ +#define HV_PTE_INDEX_PFN_BITS (HV_PTE_INDEX_PTFN_BITS - \ + (HV_LOG2_PAGE_SIZE_SMALL - \ + HV_LOG2_PAGE_TABLE_ALIGN)) + +/* + * Legal values for the PTE's mode field + */ +/** Data is not resident in any caches; loads and stores access memory + * directly. + */ +#define HV_PTE_MODE_UNCACHED 1 + +/** Data is resident in the tile's local L1 and/or L2 caches; if a load + * or store misses there, it goes to memory. + * + * The copy in the local L1$/L2$ is not invalidated when the copy in + * memory is changed. + */ +#define HV_PTE_MODE_CACHE_NO_L3 2 + +/** Data is resident in the tile's local L1 and/or L2 caches. If a load + * or store misses there, it goes to an L3 cache in a designated tile; + * if it misses there, it goes to memory. + * + * If the NC bit is not set, the copy in the local L1$/L2$ is invalidated + * when the copy in the remote L3$ is changed. Otherwise, such + * invalidation will not occur. + * + * Chips for which CHIP_HAS_COHERENT_LOCAL_CACHE() is 0 do not support + * invalidation from an L3$ to another tile's L1$/L2$. If the NC bit is + * clear on such a chip, no copy is kept in the local L1$/L2$ in this mode. + */ +#define HV_PTE_MODE_CACHE_TILE_L3 3 + +/** Data is resident in the tile's local L1 and/or L2 caches. If a load + * or store misses there, it goes to an L3 cache in one of a set of + * designated tiles; if it misses there, it goes to memory. Which tile + * is chosen from the set depends upon a hash function applied to the + * physical address. This mode is not supported on chips for which + * CHIP_HAS_CBOX_HOME_MAP() is 0. + * + * If the NC bit is not set, the copy in the local L1$/L2$ is invalidated + * when the copy in the remote L3$ is changed. Otherwise, such + * invalidation will not occur. + * + * Chips for which CHIP_HAS_COHERENT_LOCAL_CACHE() is 0 do not support + * invalidation from an L3$ to another tile's L1$/L2$. If the NC bit is + * clear on such a chip, no copy is kept in the local L1$/L2$ in this mode. + */ +#define HV_PTE_MODE_CACHE_HASH_L3 4 + +/** Data is not resident in memory; accesses are instead made to an I/O + * device, whose tile coordinates are given by the PTE's LOTAR field. + * This mode is only supported on chips for which CHIP_HAS_MMIO() is 1. + * The EXECUTABLE bit may not be set in an MMIO PTE. + */ +#define HV_PTE_MODE_MMIO 5 + + +/* C wants 1ULL so it is typed as __hv64, but the assembler needs just numbers. + * The assembler can't handle shifts greater than 31, but treats them + * as shifts mod 32, so assembler code must be aware of which word + * the bit belongs in when using these macros. + */ +#ifdef __ASSEMBLER__ +#define __HV_PTE_ONE 1 /**< One, for assembler */ +#else +#define __HV_PTE_ONE 1ULL /**< One, for C */ +#endif + +/** Is this PTE present? + * + * If this bit is set, this PTE represents a valid translation or level-2 + * page table pointer. Otherwise, the page table does not contain a + * translation for the subject virtual pages. + * + * If this bit is not set, the other bits in the PTE are not + * interpreted by the hypervisor, and may contain any value. + */ +#define HV_PTE_PRESENT (__HV_PTE_ONE << HV_PTE_INDEX_PRESENT) + +/** Does this PTE map a page? + * + * If this bit is set in the level-1 page table, the entry should be + * interpreted as a level-2 page table entry mapping a large page. + * + * This bit should not be modified by the client while PRESENT is set, as + * doing so may race with the hypervisor's update of ACCESSED and DIRTY bits. + * + * In a level-2 page table, this bit is ignored and must be zero. + */ +#define HV_PTE_PAGE (__HV_PTE_ONE << HV_PTE_INDEX_PAGE) + +/** Is this a global (non-ASID) mapping? + * + * If this bit is set, the translations established by this PTE will + * not be flushed from the TLB by the hv_flush_asid() service; they + * will be flushed by the hv_flush_page() or hv_flush_pages() services. + * + * Setting this bit for translations which are identical in all page + * tables (for instance, code and data belonging to a client OS) can + * be very beneficial, as it will reduce the number of TLB misses. + * Note that, while it is not an error which will be detected by the + * hypervisor, it is an extremely bad idea to set this bit for + * translations which are _not_ identical in all page tables. + * + * This bit should not be modified by the client while PRESENT is set, as + * doing so may race with the hypervisor's update of ACCESSED and DIRTY bits. + * + * This bit is ignored in level-1 PTEs unless the Page bit is set. + */ +#define HV_PTE_GLOBAL (__HV_PTE_ONE << HV_PTE_INDEX_GLOBAL) + +/** Is this mapping accessible to users? + * + * If this bit is set, code running at any PL will be permitted to + * access the virtual addresses mapped by this PTE. Otherwise, only + * code running at PL 1 or above will be allowed to do so. + * + * This bit should not be modified by the client while PRESENT is set, as + * doing so may race with the hypervisor's update of ACCESSED and DIRTY bits. + * + * This bit is ignored in level-1 PTEs unless the Page bit is set. + */ +#define HV_PTE_USER (__HV_PTE_ONE << HV_PTE_INDEX_USER) + +/** Has this mapping been accessed? + * + * This bit is set by the hypervisor when the memory described by the + * translation is accessed for the first time. It is never cleared by + * the hypervisor, but may be cleared by the client. After the bit + * has been cleared, subsequent references are not guaranteed to set + * it again until the translation has been flushed from the TLB. + * + * This bit is ignored in level-1 PTEs unless the Page bit is set. + */ +#define HV_PTE_ACCESSED (__HV_PTE_ONE << HV_PTE_INDEX_ACCESSED) + +/** Is this mapping dirty? + * + * This bit is set by the hypervisor when the memory described by the + * translation is written for the first time. It is never cleared by + * the hypervisor, but may be cleared by the client. After the bit + * has been cleared, subsequent references are not guaranteed to set + * it again until the translation has been flushed from the TLB. + * + * This bit is ignored in level-1 PTEs unless the Page bit is set. + */ +#define HV_PTE_DIRTY (__HV_PTE_ONE << HV_PTE_INDEX_DIRTY) + +/** Migrating bit in PTE. + * + * This bit is guaranteed not to be inspected or modified by the + * hypervisor. The name is indicative of the suggested use by the client + * to tag pages whose L3 cache is being migrated from one cpu to another. + */ +#define HV_PTE_MIGRATING (__HV_PTE_ONE << HV_PTE_INDEX_MIGRATING) + +/** Client-private bit in PTE. + * + * This bit is guaranteed not to be inspected or modified by the + * hypervisor. + */ +#define HV_PTE_CLIENT0 (__HV_PTE_ONE << HV_PTE_INDEX_CLIENT0) + +/** Client-private bit in PTE. + * + * This bit is guaranteed not to be inspected or modified by the + * hypervisor. + */ +#define HV_PTE_CLIENT1 (__HV_PTE_ONE << HV_PTE_INDEX_CLIENT1) + +/** Non-coherent (NC) bit in PTE. + * + * If this bit is set, the mapping that is set up will be non-coherent + * (also known as non-inclusive). This means that changes to the L3 + * cache will not cause a local copy to be invalidated. It is generally + * recommended only for read-only mappings. + * + * In level-1 PTEs, if the Page bit is clear, this bit determines how the + * level-2 page table is accessed. + */ +#define HV_PTE_NC (__HV_PTE_ONE << HV_PTE_INDEX_NC) + +/** Is this page prevented from filling the L1$? + * + * If this bit is set, the page described by the PTE will not be cached + * the local cpu's L1 cache. + * + * If CHIP_HAS_NC_AND_NOALLOC_BITS() is not true in <chip.h> for this chip, + * it is illegal to use this attribute, and may cause client termination. + * + * In level-1 PTEs, if the Page bit is clear, this bit + * determines how the level-2 page table is accessed. + */ +#define HV_PTE_NO_ALLOC_L1 (__HV_PTE_ONE << HV_PTE_INDEX_NO_ALLOC_L1) + +/** Is this page prevented from filling the L2$? + * + * If this bit is set, the page described by the PTE will not be cached + * the local cpu's L2 cache. + * + * If CHIP_HAS_NC_AND_NOALLOC_BITS() is not true in <chip.h> for this chip, + * it is illegal to use this attribute, and may cause client termination. + * + * In level-1 PTEs, if the Page bit is clear, this bit determines how the + * level-2 page table is accessed. + */ +#define HV_PTE_NO_ALLOC_L2 (__HV_PTE_ONE << HV_PTE_INDEX_NO_ALLOC_L2) + +/** Is this a priority page? + * + * If this bit is set, the page described by the PTE will be given + * priority in the cache. Normally this translates into allowing the + * page to use only the "red" half of the cache. The client may wish to + * then use the hv_set_caching service to specify that other pages which + * alias this page will use only the "black" half of the cache. + * + * If the Cached Priority bit is clear, the hypervisor uses the + * current hv_set_caching() value to choose how to cache the page. + * + * It is illegal to set the Cached Priority bit if the Non-Cached bit + * is set and the Cached Remotely bit is clear, i.e. if requests to + * the page map directly to memory. + * + * This bit is ignored in level-1 PTEs unless the Page bit is set. + */ +#define HV_PTE_CACHED_PRIORITY (__HV_PTE_ONE << \ + HV_PTE_INDEX_CACHED_PRIORITY) + +/** Is this a readable mapping? + * + * If this bit is set, code will be permitted to read from (e.g., + * issue load instructions against) the virtual addresses mapped by + * this PTE. + * + * It is illegal for this bit to be clear if the Writable bit is set. + * + * This bit is ignored in level-1 PTEs unless the Page bit is set. + */ +#define HV_PTE_READABLE (__HV_PTE_ONE << HV_PTE_INDEX_READABLE) + +/** Is this a writable mapping? + * + * If this bit is set, code will be permitted to write to (e.g., issue + * store instructions against) the virtual addresses mapped by this + * PTE. + * + * This bit is ignored in level-1 PTEs unless the Page bit is set. + */ +#define HV_PTE_WRITABLE (__HV_PTE_ONE << HV_PTE_INDEX_WRITABLE) + +/** Is this an executable mapping? + * + * If this bit is set, code will be permitted to execute from + * (e.g., jump to) the virtual addresses mapped by this PTE. + * + * This bit applies to any processor on the tile, if there are more + * than one. + * + * This bit is ignored in level-1 PTEs unless the Page bit is set. + */ +#define HV_PTE_EXECUTABLE (__HV_PTE_ONE << HV_PTE_INDEX_EXECUTABLE) + +/** The width of a LOTAR's x or y bitfield. */ +#define HV_LOTAR_WIDTH 11 + +/** Converts an x,y pair to a LOTAR value. */ +#define HV_XY_TO_LOTAR(x, y) ((HV_LOTAR)(((x) << HV_LOTAR_WIDTH) | (y))) + +/** Extracts the X component of a lotar. */ +#define HV_LOTAR_X(lotar) ((lotar) >> HV_LOTAR_WIDTH) + +/** Extracts the Y component of a lotar. */ +#define HV_LOTAR_Y(lotar) ((lotar) & ((1 << HV_LOTAR_WIDTH) - 1)) + +#ifndef __ASSEMBLER__ + +/** Define accessor functions for a PTE bit. */ +#define _HV_BIT(name, bit) \ +static __inline int \ +hv_pte_get_##name(HV_PTE pte) \ +{ \ + return (pte.val >> HV_PTE_INDEX_##bit) & 1; \ +} \ + \ +static __inline HV_PTE \ +hv_pte_set_##name(HV_PTE pte) \ +{ \ + pte.val |= 1ULL << HV_PTE_INDEX_##bit; \ + return pte; \ +} \ + \ +static __inline HV_PTE \ +hv_pte_clear_##name(HV_PTE pte) \ +{ \ + pte.val &= ~(1ULL << HV_PTE_INDEX_##bit); \ + return pte; \ +} + +/* Generate accessors to get, set, and clear various PTE flags. + */ +_HV_BIT(present, PRESENT) +_HV_BIT(page, PAGE) +_HV_BIT(client0, CLIENT0) +_HV_BIT(client1, CLIENT1) +_HV_BIT(migrating, MIGRATING) +_HV_BIT(nc, NC) +_HV_BIT(readable, READABLE) +_HV_BIT(writable, WRITABLE) +_HV_BIT(executable, EXECUTABLE) +_HV_BIT(accessed, ACCESSED) +_HV_BIT(dirty, DIRTY) +_HV_BIT(no_alloc_l1, NO_ALLOC_L1) +_HV_BIT(no_alloc_l2, NO_ALLOC_L2) +_HV_BIT(cached_priority, CACHED_PRIORITY) +_HV_BIT(global, GLOBAL) +_HV_BIT(user, USER) + +#undef _HV_BIT + +/** Get the page mode from the PTE. + * + * This field generally determines whether and how accesses to the page + * are cached; the HV_PTE_MODE_xxx symbols define the legal values for the + * page mode. The NC, NO_ALLOC_L1, and NO_ALLOC_L2 bits modify this + * general policy. + */ +static __inline unsigned int +hv_pte_get_mode(const HV_PTE pte) +{ + return (((__hv32) pte.val) >> HV_PTE_INDEX_MODE) & + ((1 << HV_PTE_MODE_BITS) - 1); +} + +/** Set the page mode into a PTE. See hv_pte_get_mode. */ +static __inline HV_PTE +hv_pte_set_mode(HV_PTE pte, unsigned int val) +{ + pte.val &= ~(((1ULL << HV_PTE_MODE_BITS) - 1) << HV_PTE_INDEX_MODE); + pte.val |= val << HV_PTE_INDEX_MODE; + return pte; +} + +/** Get the page frame number from the PTE. + * + * This field contains the upper bits of the CPA (client physical + * address) of the target page; the complete CPA is this field with + * HV_LOG2_PAGE_SIZE_SMALL zero bits appended to it. + * + * For PTEs in a level-1 page table where the Page bit is set, the + * CPA must be aligned modulo the large page size. + */ +static __inline unsigned int +hv_pte_get_pfn(const HV_PTE pte) +{ + return pte.val >> HV_PTE_INDEX_PFN; +} + + +/** Set the page frame number into a PTE. See hv_pte_get_pfn. */ +static __inline HV_PTE +hv_pte_set_pfn(HV_PTE pte, unsigned int val) +{ + /* + * Note that the use of "PTFN" in the next line is intentional; we + * don't want any garbage lower bits left in that field. + */ + pte.val &= ~(((1ULL << HV_PTE_PTFN_BITS) - 1) << HV_PTE_INDEX_PTFN); + pte.val |= (__hv64) val << HV_PTE_INDEX_PFN; + return pte; +} + +/** Get the page table frame number from the PTE. + * + * This field contains the upper bits of the CPA (client physical + * address) of the target page table; the complete CPA is this field with + * with HV_PAGE_TABLE_ALIGN zero bits appended to it. + * + * For PTEs in a level-1 page table when the Page bit is not set, the + * CPA must be aligned modulo the sticter of HV_PAGE_TABLE_ALIGN and + * the level-2 page table size. + */ +static __inline unsigned long +hv_pte_get_ptfn(const HV_PTE pte) +{ + return pte.val >> HV_PTE_INDEX_PTFN; +} + + +/** Set the page table frame number into a PTE. See hv_pte_get_ptfn. */ +static __inline HV_PTE +hv_pte_set_ptfn(HV_PTE pte, unsigned long val) +{ + pte.val &= ~(((1ULL << HV_PTE_PTFN_BITS)-1) << HV_PTE_INDEX_PTFN); + pte.val |= (__hv64) val << HV_PTE_INDEX_PTFN; + return pte; +} + + +/** Get the remote tile caching this page. + * + * Specifies the remote tile which is providing the L3 cache for this page. + * + * This field is ignored unless the page mode is HV_PTE_MODE_CACHE_TILE_L3. + * + * In level-1 PTEs, if the Page bit is clear, this field determines how the + * level-2 page table is accessed. + */ +static __inline unsigned int +hv_pte_get_lotar(const HV_PTE pte) +{ + unsigned int lotar = ((__hv32) pte.val) >> HV_PTE_INDEX_LOTAR; + + return HV_XY_TO_LOTAR( (lotar >> (HV_PTE_LOTAR_BITS / 2)), + (lotar & ((1 << (HV_PTE_LOTAR_BITS / 2)) - 1)) ); +} + + +/** Set the remote tile caching a page into a PTE. See hv_pte_get_lotar. */ +static __inline HV_PTE +hv_pte_set_lotar(HV_PTE pte, unsigned int val) +{ + unsigned int x = HV_LOTAR_X(val); + unsigned int y = HV_LOTAR_Y(val); + + pte.val &= ~(((1ULL << HV_PTE_LOTAR_BITS)-1) << HV_PTE_INDEX_LOTAR); + pte.val |= (x << (HV_PTE_INDEX_LOTAR + HV_PTE_LOTAR_BITS / 2)) | + (y << HV_PTE_INDEX_LOTAR); + return pte; +} + +#endif /* !__ASSEMBLER__ */ + +/** Converts a client physical address to a pfn. */ +#define HV_CPA_TO_PFN(p) ((p) >> HV_LOG2_PAGE_SIZE_SMALL) + +/** Converts a pfn to a client physical address. */ +#define HV_PFN_TO_CPA(p) (((HV_PhysAddr)(p)) << HV_LOG2_PAGE_SIZE_SMALL) + +/** Converts a client physical address to a ptfn. */ +#define HV_CPA_TO_PTFN(p) ((p) >> HV_LOG2_PAGE_TABLE_ALIGN) + +/** Converts a ptfn to a client physical address. */ +#define HV_PTFN_TO_CPA(p) (((HV_PhysAddr)(p)) << HV_LOG2_PAGE_TABLE_ALIGN) + +/** Converts a ptfn to a pfn. */ +#define HV_PTFN_TO_PFN(p) \ + ((p) >> (HV_LOG2_PAGE_SIZE_SMALL - HV_LOG2_PAGE_TABLE_ALIGN)) + +/** Converts a pfn to a ptfn. */ +#define HV_PFN_TO_PTFN(p) \ + ((p) << (HV_LOG2_PAGE_SIZE_SMALL - HV_LOG2_PAGE_TABLE_ALIGN)) + +#if CHIP_VA_WIDTH() > 32 + +/** Log number of HV_PTE entries in L0 page table */ +#define HV_LOG2_L0_ENTRIES (CHIP_VA_WIDTH() - HV_LOG2_L1_SPAN) + +/** Number of HV_PTE entries in L0 page table */ +#define HV_L0_ENTRIES (1 << HV_LOG2_L0_ENTRIES) + +/** Log size of L0 page table in bytes */ +#define HV_LOG2_L0_SIZE (HV_LOG2_PTE_SIZE + HV_LOG2_L0_ENTRIES) + +/** Size of L0 page table in bytes */ +#define HV_L0_SIZE (1 << HV_LOG2_L0_SIZE) + +#ifdef __ASSEMBLER__ + +/** Index in L0 for a specific VA */ +#define HV_L0_INDEX(va) \ + (((va) >> HV_LOG2_L1_SPAN) & (HV_L0_ENTRIES - 1)) + +#else + +/** Index in L1 for a specific VA */ +#define HV_L0_INDEX(va) \ + (((HV_VirtAddr)(va) >> HV_LOG2_L1_SPAN) & (HV_L0_ENTRIES - 1)) + +#endif + +#endif /* CHIP_VA_WIDTH() > 32 */ + +/** Log number of HV_PTE entries in L1 page table */ +#define HV_LOG2_L1_ENTRIES (HV_LOG2_L1_SPAN - HV_LOG2_PAGE_SIZE_LARGE) + +/** Number of HV_PTE entries in L1 page table */ +#define HV_L1_ENTRIES (1 << HV_LOG2_L1_ENTRIES) + +/** Log size of L1 page table in bytes */ +#define HV_LOG2_L1_SIZE (HV_LOG2_PTE_SIZE + HV_LOG2_L1_ENTRIES) + +/** Size of L1 page table in bytes */ +#define HV_L1_SIZE (1 << HV_LOG2_L1_SIZE) + +/** Log number of HV_PTE entries in level-2 page table */ +#define HV_LOG2_L2_ENTRIES (HV_LOG2_PAGE_SIZE_LARGE - HV_LOG2_PAGE_SIZE_SMALL) + +/** Number of HV_PTE entries in level-2 page table */ +#define HV_L2_ENTRIES (1 << HV_LOG2_L2_ENTRIES) + +/** Log size of level-2 page table in bytes */ +#define HV_LOG2_L2_SIZE (HV_LOG2_PTE_SIZE + HV_LOG2_L2_ENTRIES) + +/** Size of level-2 page table in bytes */ +#define HV_L2_SIZE (1 << HV_LOG2_L2_SIZE) + +#ifdef __ASSEMBLER__ + +#if CHIP_VA_WIDTH() > 32 + +/** Index in L1 for a specific VA */ +#define HV_L1_INDEX(va) \ + (((va) >> HV_LOG2_PAGE_SIZE_LARGE) & (HV_L1_ENTRIES - 1)) + +#else /* CHIP_VA_WIDTH() > 32 */ + +/** Index in L1 for a specific VA */ +#define HV_L1_INDEX(va) \ + (((va) >> HV_LOG2_PAGE_SIZE_LARGE)) + +#endif /* CHIP_VA_WIDTH() > 32 */ + +/** Index in level-2 page table for a specific VA */ +#define HV_L2_INDEX(va) \ + (((va) >> HV_LOG2_PAGE_SIZE_SMALL) & (HV_L2_ENTRIES - 1)) + +#else /* __ASSEMBLER __ */ + +#if CHIP_VA_WIDTH() > 32 + +/** Index in L1 for a specific VA */ +#define HV_L1_INDEX(va) \ + (((HV_VirtAddr)(va) >> HV_LOG2_PAGE_SIZE_LARGE) & (HV_L1_ENTRIES - 1)) + +#else /* CHIP_VA_WIDTH() > 32 */ + +/** Index in L1 for a specific VA */ +#define HV_L1_INDEX(va) \ + (((HV_VirtAddr)(va) >> HV_LOG2_PAGE_SIZE_LARGE)) + +#endif /* CHIP_VA_WIDTH() > 32 */ + +/** Index in level-2 page table for a specific VA */ +#define HV_L2_INDEX(va) \ + (((HV_VirtAddr)(va) >> HV_LOG2_PAGE_SIZE_SMALL) & (HV_L2_ENTRIES - 1)) + +#endif /* __ASSEMBLER __ */ + +#endif /* _TILE_HV_H */ diff --git a/arch/tile/include/hv/netio_errors.h b/arch/tile/include/hv/netio_errors.h new file mode 100644 index 00000000..e1591bff --- /dev/null +++ b/arch/tile/include/hv/netio_errors.h @@ -0,0 +1,122 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/** + * Error codes returned from NetIO routines. + */ + +#ifndef __NETIO_ERRORS_H__ +#define __NETIO_ERRORS_H__ + +/** + * @addtogroup error + * + * @brief The error codes returned by NetIO functions. + * + * NetIO functions return 0 (defined as ::NETIO_NO_ERROR) on success, and + * a negative value if an error occurs. + * + * In cases where a NetIO function failed due to a error reported by + * system libraries, the error code will be the negation of the + * system errno at the time of failure. The @ref netio_strerror() + * function will deliver error strings for both NetIO and system error + * codes. + * + * @{ + */ + +/** The set of all NetIO errors. */ +typedef enum +{ + /** Operation successfully completed. */ + NETIO_NO_ERROR = 0, + + /** A packet was successfully retrieved from an input queue. */ + NETIO_PKT = 0, + + /** Largest NetIO error number. */ + NETIO_ERR_MAX = -701, + + /** The tile is not registered with the IPP. */ + NETIO_NOT_REGISTERED = -701, + + /** No packet was available to retrieve from the input queue. */ + NETIO_NOPKT = -702, + + /** The requested function is not implemented. */ + NETIO_NOT_IMPLEMENTED = -703, + + /** On a registration operation, the target queue already has the maximum + * number of tiles registered for it, and no more may be added. On a + * packet send operation, the output queue is full and nothing more can + * be queued until some of the queued packets are actually transmitted. */ + NETIO_QUEUE_FULL = -704, + + /** The calling process or thread is not bound to exactly one CPU. */ + NETIO_BAD_AFFINITY = -705, + + /** Cannot allocate memory on requested controllers. */ + NETIO_CANNOT_HOME = -706, + + /** On a registration operation, the IPP specified is not configured + * to support the options requested; for instance, the application + * wants a specific type of tagged headers which the configured IPP + * doesn't support. Or, the supplied configuration information is + * not self-consistent, or is out of range; for instance, specifying + * both NETIO_RECV and NETIO_NO_RECV, or asking for more than + * NETIO_MAX_SEND_BUFFERS to be preallocated. On a VLAN or bucket + * configure operation, the number of items, or the base item, was + * out of range. + */ + NETIO_BAD_CONFIG = -707, + + /** Too many tiles have registered to transmit packets. */ + NETIO_TOOMANY_XMIT = -708, + + /** Packet transmission was attempted on a queue which was registered + with transmit disabled. */ + NETIO_UNREG_XMIT = -709, + + /** This tile is already registered with the IPP. */ + NETIO_ALREADY_REGISTERED = -710, + + /** The Ethernet link is down. The application should try again later. */ + NETIO_LINK_DOWN = -711, + + /** An invalid memory buffer has been specified. This may be an unmapped + * virtual address, or one which does not meet alignment requirements. + * For netio_input_register(), this error may be returned when multiple + * processes specify different memory regions to be used for NetIO + * buffers. That can happen if these processes specify explicit memory + * regions with the ::NETIO_FIXED_BUFFER_VA flag, or if tmc_cmem_init() + * has not been called by a common ancestor of the processes. + */ + NETIO_FAULT = -712, + + /** Cannot combine user-managed shared memory and cache coherence. */ + NETIO_BAD_CACHE_CONFIG = -713, + + /** Smallest NetIO error number. */ + NETIO_ERR_MIN = -713, + +#ifndef __DOXYGEN__ + /** Used internally to mean that no response is needed; never returned to + * an application. */ + NETIO_NO_RESPONSE = 1 +#endif +} netio_error_t; + +/** @} */ + +#endif /* __NETIO_ERRORS_H__ */ diff --git a/arch/tile/include/hv/netio_intf.h b/arch/tile/include/hv/netio_intf.h new file mode 100644 index 00000000..8d20972a --- /dev/null +++ b/arch/tile/include/hv/netio_intf.h @@ -0,0 +1,2975 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/** + * NetIO interface structures and macros. + */ + +#ifndef __NETIO_INTF_H__ +#define __NETIO_INTF_H__ + +#include <hv/netio_errors.h> + +#ifdef __KERNEL__ +#include <linux/types.h> +#else +#include <stdint.h> +#endif + +#if !defined(__HV__) && !defined(__BOGUX__) && !defined(__KERNEL__) +#include <assert.h> +#define netio_assert assert /**< Enable assertions from macros */ +#else +#define netio_assert(...) ((void)(0)) /**< Disable assertions from macros */ +#endif + +/* + * If none of these symbols are defined, we're building libnetio in an + * environment where we have pthreads, so we'll enable locking. + */ +#if !defined(__HV__) && !defined(__BOGUX__) && !defined(__KERNEL__) && \ + !defined(__NEWLIB__) +#define _NETIO_PTHREAD /**< Include a mutex in netio_queue_t below */ + +/* + * If NETIO_UNLOCKED is defined, we don't do use per-cpu locks on + * per-packet NetIO operations. We still do pthread locking on things + * like netio_input_register, though. This is used for building + * libnetio_unlocked. + */ +#ifndef NETIO_UNLOCKED + +/* Avoid PLT overhead by using our own inlined per-cpu lock. */ +#include <sched.h> +typedef int _netio_percpu_mutex_t; + +static __inline int +_netio_percpu_mutex_init(_netio_percpu_mutex_t* lock) +{ + *lock = 0; + return 0; +} + +static __inline int +_netio_percpu_mutex_lock(_netio_percpu_mutex_t* lock) +{ + while (__builtin_expect(__insn_tns(lock), 0)) + sched_yield(); + return 0; +} + +static __inline int +_netio_percpu_mutex_unlock(_netio_percpu_mutex_t* lock) +{ + *lock = 0; + return 0; +} + +#else /* NETIO_UNLOCKED */ + +/* Don't do any locking for per-packet NetIO operations. */ +typedef int _netio_percpu_mutex_t; +#define _netio_percpu_mutex_init(L) +#define _netio_percpu_mutex_lock(L) +#define _netio_percpu_mutex_unlock(L) + +#endif /* NETIO_UNLOCKED */ +#endif /* !__HV__, !__BOGUX, !__KERNEL__, !__NEWLIB__ */ + +/** How many tiles can register for a given queue. + * @ingroup setup */ +#define NETIO_MAX_TILES_PER_QUEUE 64 + + +/** Largest permissible queue identifier. + * @ingroup setup */ +#define NETIO_MAX_QUEUE_ID 255 + + +#ifndef __DOXYGEN__ + +/* Metadata packet checksum/ethertype flags. */ + +/** The L4 checksum has not been calculated. */ +#define _NETIO_PKT_NO_L4_CSUM_SHIFT 0 +#define _NETIO_PKT_NO_L4_CSUM_RMASK 1 +#define _NETIO_PKT_NO_L4_CSUM_MASK \ + (_NETIO_PKT_NO_L4_CSUM_RMASK << _NETIO_PKT_NO_L4_CSUM_SHIFT) + +/** The L3 checksum has not been calculated. */ +#define _NETIO_PKT_NO_L3_CSUM_SHIFT 1 +#define _NETIO_PKT_NO_L3_CSUM_RMASK 1 +#define _NETIO_PKT_NO_L3_CSUM_MASK \ + (_NETIO_PKT_NO_L3_CSUM_RMASK << _NETIO_PKT_NO_L3_CSUM_SHIFT) + +/** The L3 checksum is incorrect (or perhaps has not been calculated). */ +#define _NETIO_PKT_BAD_L3_CSUM_SHIFT 2 +#define _NETIO_PKT_BAD_L3_CSUM_RMASK 1 +#define _NETIO_PKT_BAD_L3_CSUM_MASK \ + (_NETIO_PKT_BAD_L3_CSUM_RMASK << _NETIO_PKT_BAD_L3_CSUM_SHIFT) + +/** The Ethernet packet type is unrecognized. */ +#define _NETIO_PKT_TYPE_UNRECOGNIZED_SHIFT 3 +#define _NETIO_PKT_TYPE_UNRECOGNIZED_RMASK 1 +#define _NETIO_PKT_TYPE_UNRECOGNIZED_MASK \ + (_NETIO_PKT_TYPE_UNRECOGNIZED_RMASK << \ + _NETIO_PKT_TYPE_UNRECOGNIZED_SHIFT) + +/* Metadata packet type flags. */ + +/** Where the packet type bits are; this field is the index into + * _netio_pkt_info. */ +#define _NETIO_PKT_TYPE_SHIFT 4 +#define _NETIO_PKT_TYPE_RMASK 0x3F + +/** How many VLAN tags the packet has, and, if we have two, which one we + * actually grouped on. A VLAN within a proprietary (Marvell or Broadcom) + * tag is counted here. */ +#define _NETIO_PKT_VLAN_SHIFT 4 +#define _NETIO_PKT_VLAN_RMASK 0x3 +#define _NETIO_PKT_VLAN_MASK \ + (_NETIO_PKT_VLAN_RMASK << _NETIO_PKT_VLAN_SHIFT) +#define _NETIO_PKT_VLAN_NONE 0 /* No VLAN tag. */ +#define _NETIO_PKT_VLAN_ONE 1 /* One VLAN tag. */ +#define _NETIO_PKT_VLAN_TWO_OUTER 2 /* Two VLAN tags, outer one used. */ +#define _NETIO_PKT_VLAN_TWO_INNER 3 /* Two VLAN tags, inner one used. */ + +/** Which proprietary tags the packet has. */ +#define _NETIO_PKT_TAG_SHIFT 6 +#define _NETIO_PKT_TAG_RMASK 0x3 +#define _NETIO_PKT_TAG_MASK \ + (_NETIO_PKT_TAG_RMASK << _NETIO_PKT_TAG_SHIFT) +#define _NETIO_PKT_TAG_NONE 0 /* No proprietary tags. */ +#define _NETIO_PKT_TAG_MRVL 1 /* Marvell HyperG.Stack tags. */ +#define _NETIO_PKT_TAG_MRVL_EXT 2 /* HyperG.Stack extended tags. */ +#define _NETIO_PKT_TAG_BRCM 3 /* Broadcom HiGig tags. */ + +/** Whether a packet has an LLC + SNAP header. */ +#define _NETIO_PKT_SNAP_SHIFT 8 +#define _NETIO_PKT_SNAP_RMASK 0x1 +#define _NETIO_PKT_SNAP_MASK \ + (_NETIO_PKT_SNAP_RMASK << _NETIO_PKT_SNAP_SHIFT) + +/* NOTE: Bits 9 and 10 are unused. */ + +/** Length of any custom data before the L2 header, in words. */ +#define _NETIO_PKT_CUSTOM_LEN_SHIFT 11 +#define _NETIO_PKT_CUSTOM_LEN_RMASK 0x1F +#define _NETIO_PKT_CUSTOM_LEN_MASK \ + (_NETIO_PKT_CUSTOM_LEN_RMASK << _NETIO_PKT_CUSTOM_LEN_SHIFT) + +/** The L4 checksum is incorrect (or perhaps has not been calculated). */ +#define _NETIO_PKT_BAD_L4_CSUM_SHIFT 16 +#define _NETIO_PKT_BAD_L4_CSUM_RMASK 0x1 +#define _NETIO_PKT_BAD_L4_CSUM_MASK \ + (_NETIO_PKT_BAD_L4_CSUM_RMASK << _NETIO_PKT_BAD_L4_CSUM_SHIFT) + +/** Length of the L2 header, in words. */ +#define _NETIO_PKT_L2_LEN_SHIFT 17 +#define _NETIO_PKT_L2_LEN_RMASK 0x1F +#define _NETIO_PKT_L2_LEN_MASK \ + (_NETIO_PKT_L2_LEN_RMASK << _NETIO_PKT_L2_LEN_SHIFT) + + +/* Flags in minimal packet metadata. */ + +/** We need an eDMA checksum on this packet. */ +#define _NETIO_PKT_NEED_EDMA_CSUM_SHIFT 0 +#define _NETIO_PKT_NEED_EDMA_CSUM_RMASK 1 +#define _NETIO_PKT_NEED_EDMA_CSUM_MASK \ + (_NETIO_PKT_NEED_EDMA_CSUM_RMASK << _NETIO_PKT_NEED_EDMA_CSUM_SHIFT) + +/* Data within the packet information table. */ + +/* Note that, for efficiency, code which uses these fields assumes that none + * of the shift values below are zero. See uses below for an explanation. */ + +/** Offset within the L2 header of the innermost ethertype (in halfwords). */ +#define _NETIO_PKT_INFO_ETYPE_SHIFT 6 +#define _NETIO_PKT_INFO_ETYPE_RMASK 0x1F + +/** Offset within the L2 header of the VLAN tag (in halfwords). */ +#define _NETIO_PKT_INFO_VLAN_SHIFT 11 +#define _NETIO_PKT_INFO_VLAN_RMASK 0x1F + +#endif + + +/** The size of a memory buffer representing a small packet. + * @ingroup egress */ +#define SMALL_PACKET_SIZE 256 + +/** The size of a memory buffer representing a large packet. + * @ingroup egress */ +#define LARGE_PACKET_SIZE 2048 + +/** The size of a memory buffer representing a jumbo packet. + * @ingroup egress */ +#define JUMBO_PACKET_SIZE (12 * 1024) + + +/* Common ethertypes. + * @ingroup ingress */ +/** @{ */ +/** The ethertype of IPv4. */ +#define ETHERTYPE_IPv4 (0x0800) +/** The ethertype of ARP. */ +#define ETHERTYPE_ARP (0x0806) +/** The ethertype of VLANs. */ +#define ETHERTYPE_VLAN (0x8100) +/** The ethertype of a Q-in-Q header. */ +#define ETHERTYPE_Q_IN_Q (0x9100) +/** The ethertype of IPv6. */ +#define ETHERTYPE_IPv6 (0x86DD) +/** The ethertype of MPLS. */ +#define ETHERTYPE_MPLS (0x8847) +/** @} */ + + +/** The possible return values of NETIO_PKT_STATUS. + * @ingroup ingress + */ +typedef enum +{ + /** No problems were detected with this packet. */ + NETIO_PKT_STATUS_OK, + /** The packet is undersized; this is expected behavior if the packet's + * ethertype is unrecognized, but otherwise the packet is likely corrupt. */ + NETIO_PKT_STATUS_UNDERSIZE, + /** The packet is oversized and some trailing bytes have been discarded. + This is expected behavior for short packets, since it's impossible to + precisely determine the amount of padding which may have been added to + them to make them meet the minimum Ethernet packet size. */ + NETIO_PKT_STATUS_OVERSIZE, + /** The packet was judged to be corrupt by hardware (for instance, it had + a bad CRC, or part of it was discarded due to lack of buffer space in + the I/O shim) and should be discarded. */ + NETIO_PKT_STATUS_BAD +} netio_pkt_status_t; + + +/** Log2 of how many buckets we have. */ +#define NETIO_LOG2_NUM_BUCKETS (10) + +/** How many buckets we have. + * @ingroup ingress */ +#define NETIO_NUM_BUCKETS (1 << NETIO_LOG2_NUM_BUCKETS) + + +/** + * @brief A group-to-bucket identifier. + * + * @ingroup setup + * + * This tells us what to do with a given group. + */ +typedef union { + /** The header broken down into bits. */ + struct { + /** Whether we should balance on L4, if available */ + unsigned int __balance_on_l4:1; + /** Whether we should balance on L3, if available */ + unsigned int __balance_on_l3:1; + /** Whether we should balance on L2, if available */ + unsigned int __balance_on_l2:1; + /** Reserved for future use */ + unsigned int __reserved:1; + /** The base bucket to use to send traffic */ + unsigned int __bucket_base:NETIO_LOG2_NUM_BUCKETS; + /** The mask to apply to the balancing value. This must be one less + * than a power of two, e.g. 0x3 or 0xFF. + */ + unsigned int __bucket_mask:NETIO_LOG2_NUM_BUCKETS; + /** Pad to 32 bits */ + unsigned int __padding:(32 - 4 - 2 * NETIO_LOG2_NUM_BUCKETS); + } bits; + /** To send out the IDN. */ + unsigned int word; +} +netio_group_t; + + +/** + * @brief A VLAN-to-bucket identifier. + * + * @ingroup setup + * + * This tells us what to do with a given VLAN. + */ +typedef netio_group_t netio_vlan_t; + + +/** + * A bucket-to-queue mapping. + * @ingroup setup + */ +typedef unsigned char netio_bucket_t; + + +/** + * A packet size can always fit in a netio_size_t. + * @ingroup setup + */ +typedef unsigned int netio_size_t; + + +/** + * @brief Ethernet standard (ingress) packet metadata. + * + * @ingroup ingress + * + * This is additional data associated with each packet. + * This structure is opaque and accessed through the @ref ingress. + * + * Also, the buffer population operation currently assumes that standard + * metadata is at least as large as minimal metadata, and will need to be + * modified if that is no longer the case. + */ +typedef struct +{ +#ifdef __DOXYGEN__ + /** This structure is opaque. */ + unsigned char opaque[24]; +#else + /** The overall ordinal of the packet */ + unsigned int __packet_ordinal; + /** The ordinal of the packet within the group */ + unsigned int __group_ordinal; + /** The best flow hash IPP could compute. */ + unsigned int __flow_hash; + /** Flags pertaining to checksum calculation, packet type, etc. */ + unsigned int __flags; + /** The first word of "user data". */ + unsigned int __user_data_0; + /** The second word of "user data". */ + unsigned int __user_data_1; +#endif +} +netio_pkt_metadata_t; + + +/** To ensure that the L3 header is aligned mod 4, the L2 header should be + * aligned mod 4 plus 2, since every supported L2 header is 4n + 2 bytes + * long. The standard way to do this is to simply add 2 bytes of padding + * before the L2 header. + */ +#define NETIO_PACKET_PADDING 2 + + + +/** + * @brief Ethernet minimal (egress) packet metadata. + * + * @ingroup egress + * + * This structure represents information about packets which have + * been processed by @ref netio_populate_buffer() or + * @ref netio_populate_prepend_buffer(). This structure is opaque + * and accessed through the @ref egress. + * + * @internal This structure is actually copied into the memory used by + * standard metadata, which is assumed to be large enough. + */ +typedef struct +{ +#ifdef __DOXYGEN__ + /** This structure is opaque. */ + unsigned char opaque[14]; +#else + /** The offset of the L2 header from the start of the packet data. */ + unsigned short l2_offset; + /** The offset of the L3 header from the start of the packet data. */ + unsigned short l3_offset; + /** Where to write the checksum. */ + unsigned char csum_location; + /** Where to start checksumming from. */ + unsigned char csum_start; + /** Flags pertaining to checksum calculation etc. */ + unsigned short flags; + /** The L2 length of the packet. */ + unsigned short l2_length; + /** The checksum with which to seed the checksum generator. */ + unsigned short csum_seed; + /** How much to checksum. */ + unsigned short csum_length; +#endif +} +netio_pkt_minimal_metadata_t; + + +#ifndef __DOXYGEN__ + +/** + * @brief An I/O notification header. + * + * This is the first word of data received from an I/O shim in a notification + * packet. It contains framing and status information. + */ +typedef union +{ + unsigned int word; /**< The whole word. */ + /** The various fields. */ + struct + { + unsigned int __channel:7; /**< Resource channel. */ + unsigned int __type:4; /**< Type. */ + unsigned int __ack:1; /**< Whether an acknowledgement is needed. */ + unsigned int __reserved:1; /**< Reserved. */ + unsigned int __protocol:1; /**< A protocol-specific word is added. */ + unsigned int __status:2; /**< Status of the transfer. */ + unsigned int __framing:2; /**< Framing of the transfer. */ + unsigned int __transfer_size:14; /**< Transfer size in bytes (total). */ + } bits; +} +__netio_pkt_notif_t; + + +/** + * Returns the base address of the packet. + */ +#define _NETIO_PKT_HANDLE_BASE(p) \ + ((unsigned char*)((p).word & 0xFFFFFFC0)) + +/** + * Returns the base address of the packet. + */ +#define _NETIO_PKT_BASE(p) \ + _NETIO_PKT_HANDLE_BASE(p->__packet) + +/** + * @brief An I/O notification packet (second word) + * + * This is the second word of data received from an I/O shim in a notification + * packet. This is the virtual address of the packet buffer, plus some flag + * bits. (The virtual address of the packet is always 256-byte aligned so we + * have room for 8 bits' worth of flags in the low 8 bits.) + * + * @internal + * NOTE: The low two bits must contain "__queue", so the "packet size" + * (SIZE_SMALL, SIZE_LARGE, or SIZE_JUMBO) can be determined quickly. + * + * If __addr or __offset are moved, _NETIO_PKT_BASE + * (defined right below this) must be changed. + */ +typedef union +{ + unsigned int word; /**< The whole word. */ + /** The various fields. */ + struct + { + /** Which queue the packet will be returned to once it is sent back to + the IPP. This is one of the SIZE_xxx values. */ + unsigned int __queue:2; + + /** The IPP handle of the sending IPP. */ + unsigned int __ipp_handle:2; + + /** Reserved for future use. */ + unsigned int __reserved:1; + + /** If 1, this packet has minimal (egress) metadata; otherwise, it + has standard (ingress) metadata. */ + unsigned int __minimal:1; + + /** Offset of the metadata within the packet. This value is multiplied + * by 64 and added to the base packet address to get the metadata + * address. Note that this field is aligned within the word such that + * you can easily extract the metadata address with a 26-bit mask. */ + unsigned int __offset:2; + + /** The top 24 bits of the packet's virtual address. */ + unsigned int __addr:24; + } bits; +} +__netio_pkt_handle_t; + +#endif /* !__DOXYGEN__ */ + + +/** + * @brief A handle for an I/O packet's storage. + * @ingroup ingress + * + * netio_pkt_handle_t encodes the concept of a ::netio_pkt_t with its + * packet metadata removed. It is a much smaller type that exists to + * facilitate applications where the full ::netio_pkt_t type is too + * large, such as those that cache enormous numbers of packets or wish + * to transmit packet descriptors over the UDN. + * + * Because there is no metadata, most ::netio_pkt_t operations cannot be + * performed on a netio_pkt_handle_t. It supports only + * netio_free_handle() (to free the buffer) and + * NETIO_PKT_CUSTOM_DATA_H() (to access a pointer to its contents). + * The application must acquire any additional metadata it wants from the + * original ::netio_pkt_t and record it separately. + * + * A netio_pkt_handle_t can be extracted from a ::netio_pkt_t by calling + * NETIO_PKT_HANDLE(). An invalid handle (analogous to NULL) can be + * created by assigning the value ::NETIO_PKT_HANDLE_NONE. A handle can + * be tested for validity with NETIO_PKT_HANDLE_IS_VALID(). + */ +typedef struct +{ + unsigned int word; /**< Opaque bits. */ +} netio_pkt_handle_t; + +/** + * @brief A packet descriptor. + * + * @ingroup ingress + * @ingroup egress + * + * This data structure represents a packet. The structure is manipulated + * through the @ref ingress and the @ref egress. + * + * While the contents of a netio_pkt_t are opaque, the structure itself is + * portable. This means that it may be shared between all tiles which have + * done a netio_input_register() call for the interface on which the pkt_t + * was initially received (via netio_get_packet()) or retrieved (via + * netio_get_buffer()). The contents of a netio_pkt_t can be transmitted to + * another tile via shared memory, or via a UDN message, or by other means. + * The destination tile may then use the pkt_t as if it had originally been + * received locally; it may read or write the packet's data, read its + * metadata, free the packet, send the packet, transfer the netio_pkt_t to + * yet another tile, and so forth. + * + * Once a netio_pkt_t has been transferred to a second tile, the first tile + * should not reference the original copy; in particular, if more than one + * tile frees or sends the same netio_pkt_t, the IPP's packet free lists will + * become corrupted. Note also that each tile which reads or modifies + * packet data must obey the memory coherency rules outlined in @ref input. + */ +typedef struct +{ +#ifdef __DOXYGEN__ + /** This structure is opaque. */ + unsigned char opaque[32]; +#else + /** For an ingress packet (one with standard metadata), this is the + * notification header we got from the I/O shim. For an egress packet + * (one with minimal metadata), this word is zero if the packet has not + * been populated, and nonzero if it has. */ + __netio_pkt_notif_t __notif_header; + + /** Virtual address of the packet buffer, plus state flags. */ + __netio_pkt_handle_t __packet; + + /** Metadata associated with the packet. */ + netio_pkt_metadata_t __metadata; +#endif +} +netio_pkt_t; + + +#ifndef __DOXYGEN__ + +#define __NETIO_PKT_NOTIF_HEADER(pkt) ((pkt)->__notif_header) +#define __NETIO_PKT_IPP_HANDLE(pkt) ((pkt)->__packet.bits.__ipp_handle) +#define __NETIO_PKT_QUEUE(pkt) ((pkt)->__packet.bits.__queue) +#define __NETIO_PKT_NOTIF_HEADER_M(mda, pkt) ((pkt)->__notif_header) +#define __NETIO_PKT_IPP_HANDLE_M(mda, pkt) ((pkt)->__packet.bits.__ipp_handle) +#define __NETIO_PKT_MINIMAL(pkt) ((pkt)->__packet.bits.__minimal) +#define __NETIO_PKT_QUEUE_M(mda, pkt) ((pkt)->__packet.bits.__queue) +#define __NETIO_PKT_FLAGS_M(mda, pkt) ((mda)->__flags) + +/* Packet information table, used by the attribute access functions below. */ +extern const uint16_t _netio_pkt_info[]; + +#endif /* __DOXYGEN__ */ + + +#ifndef __DOXYGEN__ +/* These macros are deprecated and will disappear in a future MDE release. */ +#define NETIO_PKT_GOOD_CHECKSUM(pkt) \ + NETIO_PKT_L4_CSUM_CORRECT(pkt) +#define NETIO_PKT_GOOD_CHECKSUM_M(mda, pkt) \ + NETIO_PKT_L4_CSUM_CORRECT_M(mda, pkt) +#endif /* __DOXYGEN__ */ + + +/* Packet attribute access functions. */ + +/** Return a pointer to the metadata for a packet. + * @ingroup ingress + * + * Calling this function once and passing the result to other retrieval + * functions with a "_M" suffix usually improves performance. This + * function must be called on an 'ingress' packet (i.e. one retrieved + * by @ref netio_get_packet(), on which @ref netio_populate_buffer() or + * @ref netio_populate_prepend_buffer have not been called). Use of this + * function on an 'egress' packet will cause an assertion failure. + * + * @param[in] pkt Packet on which to operate. + * @return A pointer to the packet's standard metadata. + */ +static __inline netio_pkt_metadata_t* +NETIO_PKT_METADATA(netio_pkt_t* pkt) +{ + netio_assert(!pkt->__packet.bits.__minimal); + return &pkt->__metadata; +} + + +/** Return a pointer to the minimal metadata for a packet. + * @ingroup egress + * + * Calling this function once and passing the result to other retrieval + * functions with a "_MM" suffix usually improves performance. This + * function must be called on an 'egress' packet (i.e. one on which + * @ref netio_populate_buffer() or @ref netio_populate_prepend_buffer() + * have been called, or one retrieved by @ref netio_get_buffer()). Use of + * this function on an 'ingress' packet will cause an assertion failure. + * + * @param[in] pkt Packet on which to operate. + * @return A pointer to the packet's standard metadata. + */ +static __inline netio_pkt_minimal_metadata_t* +NETIO_PKT_MINIMAL_METADATA(netio_pkt_t* pkt) +{ + netio_assert(pkt->__packet.bits.__minimal); + return (netio_pkt_minimal_metadata_t*) &pkt->__metadata; +} + + +/** Determine whether a packet has 'minimal' metadata. + * @ingroup pktfuncs + * + * This function will return nonzero if the packet is an 'egress' + * packet (i.e. one on which @ref netio_populate_buffer() or + * @ref netio_populate_prepend_buffer() have been called, or one + * retrieved by @ref netio_get_buffer()), and zero if the packet + * is an 'ingress' packet (i.e. one retrieved by @ref netio_get_packet(), + * which has not been converted into an 'egress' packet). + * + * @param[in] pkt Packet on which to operate. + * @return Nonzero if the packet has minimal metadata. + */ +static __inline unsigned int +NETIO_PKT_IS_MINIMAL(netio_pkt_t* pkt) +{ + return pkt->__packet.bits.__minimal; +} + + +/** Return a handle for a packet's storage. + * @ingroup pktfuncs + * + * @param[in] pkt Packet on which to operate. + * @return A handle for the packet's storage. + */ +static __inline netio_pkt_handle_t +NETIO_PKT_HANDLE(netio_pkt_t* pkt) +{ + netio_pkt_handle_t h; + h.word = pkt->__packet.word; + return h; +} + + +/** A special reserved value indicating the absence of a packet handle. + * + * @ingroup pktfuncs + */ +#define NETIO_PKT_HANDLE_NONE ((netio_pkt_handle_t) { 0 }) + + +/** Test whether a packet handle is valid. + * + * Applications may wish to use the reserved value NETIO_PKT_HANDLE_NONE + * to indicate no packet at all. This function tests to see if a packet + * handle is a real handle, not this special reserved value. + * + * @ingroup pktfuncs + * + * @param[in] handle Handle on which to operate. + * @return One if the packet handle is valid, else zero. + */ +static __inline unsigned int +NETIO_PKT_HANDLE_IS_VALID(netio_pkt_handle_t handle) +{ + return handle.word != 0; +} + + + +/** Return a pointer to the start of the packet's custom header. + * A custom header may or may not be present, depending upon the IPP; its + * contents and alignment are also IPP-dependent. Currently, none of the + * standard IPPs supplied by Tilera produce a custom header. If present, + * the custom header precedes the L2 header in the packet buffer. + * @ingroup ingress + * + * @param[in] handle Handle on which to operate. + * @return A pointer to start of the packet. + */ +static __inline unsigned char* +NETIO_PKT_CUSTOM_DATA_H(netio_pkt_handle_t handle) +{ + return _NETIO_PKT_HANDLE_BASE(handle) + NETIO_PACKET_PADDING; +} + + +/** Return the length of the packet's custom header. + * A custom header may or may not be present, depending upon the IPP; its + * contents and alignment are also IPP-dependent. Currently, none of the + * standard IPPs supplied by Tilera produce a custom header. If present, + * the custom header precedes the L2 header in the packet buffer. + * + * @ingroup ingress + * + * @param[in] mda Pointer to packet's standard metadata. + * @param[in] pkt Packet on which to operate. + * @return The length of the packet's custom header, in bytes. + */ +static __inline netio_size_t +NETIO_PKT_CUSTOM_HEADER_LENGTH_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ + /* + * Note that we effectively need to extract a quantity from the flags word + * which is measured in words, and then turn it into bytes by shifting + * it left by 2. We do this all at once by just shifting right two less + * bits, and shifting the mask up two bits. + */ + return ((mda->__flags >> (_NETIO_PKT_CUSTOM_LEN_SHIFT - 2)) & + (_NETIO_PKT_CUSTOM_LEN_RMASK << 2)); +} + + +/** Return the length of the packet, starting with the custom header. + * A custom header may or may not be present, depending upon the IPP; its + * contents and alignment are also IPP-dependent. Currently, none of the + * standard IPPs supplied by Tilera produce a custom header. If present, + * the custom header precedes the L2 header in the packet buffer. + * @ingroup ingress + * + * @param[in] mda Pointer to packet's standard metadata. + * @param[in] pkt Packet on which to operate. + * @return The length of the packet, in bytes. + */ +static __inline netio_size_t +NETIO_PKT_CUSTOM_LENGTH_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ + return (__NETIO_PKT_NOTIF_HEADER(pkt).bits.__transfer_size - + NETIO_PACKET_PADDING); +} + + +/** Return a pointer to the start of the packet's custom header. + * A custom header may or may not be present, depending upon the IPP; its + * contents and alignment are also IPP-dependent. Currently, none of the + * standard IPPs supplied by Tilera produce a custom header. If present, + * the custom header precedes the L2 header in the packet buffer. + * @ingroup ingress + * + * @param[in] mda Pointer to packet's standard metadata. + * @param[in] pkt Packet on which to operate. + * @return A pointer to start of the packet. + */ +static __inline unsigned char* +NETIO_PKT_CUSTOM_DATA_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ + return NETIO_PKT_CUSTOM_DATA_H(NETIO_PKT_HANDLE(pkt)); +} + + +/** Return the length of the packet's L2 (Ethernet plus VLAN or SNAP) header. + * @ingroup ingress + * + * @param[in] mda Pointer to packet's standard metadata. + * @param[in] pkt Packet on which to operate. + * @return The length of the packet's L2 header, in bytes. + */ +static __inline netio_size_t +NETIO_PKT_L2_HEADER_LENGTH_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ + /* + * Note that we effectively need to extract a quantity from the flags word + * which is measured in words, and then turn it into bytes by shifting + * it left by 2. We do this all at once by just shifting right two less + * bits, and shifting the mask up two bits. We then add two bytes. + */ + return ((mda->__flags >> (_NETIO_PKT_L2_LEN_SHIFT - 2)) & + (_NETIO_PKT_L2_LEN_RMASK << 2)) + 2; +} + + +/** Return the length of the packet, starting with the L2 (Ethernet) header. + * @ingroup ingress + * + * @param[in] mda Pointer to packet's standard metadata. + * @param[in] pkt Packet on which to operate. + * @return The length of the packet, in bytes. + */ +static __inline netio_size_t +NETIO_PKT_L2_LENGTH_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ + return (NETIO_PKT_CUSTOM_LENGTH_M(mda, pkt) - + NETIO_PKT_CUSTOM_HEADER_LENGTH_M(mda,pkt)); +} + + +/** Return a pointer to the start of the packet's L2 (Ethernet) header. + * @ingroup ingress + * + * @param[in] mda Pointer to packet's standard metadata. + * @param[in] pkt Packet on which to operate. + * @return A pointer to start of the packet. + */ +static __inline unsigned char* +NETIO_PKT_L2_DATA_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ + return (NETIO_PKT_CUSTOM_DATA_M(mda, pkt) + + NETIO_PKT_CUSTOM_HEADER_LENGTH_M(mda, pkt)); +} + + +/** Retrieve the length of the packet, starting with the L3 (generally, + * the IP) header. + * @ingroup ingress + * + * @param[in] mda Pointer to packet's standard metadata. + * @param[in] pkt Packet on which to operate. + * @return Length of the packet's L3 header and data, in bytes. + */ +static __inline netio_size_t +NETIO_PKT_L3_LENGTH_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ + return (NETIO_PKT_L2_LENGTH_M(mda, pkt) - + NETIO_PKT_L2_HEADER_LENGTH_M(mda,pkt)); +} + + +/** Return a pointer to the packet's L3 (generally, the IP) header. + * @ingroup ingress + * + * Note that we guarantee word alignment of the L3 header. + * + * @param[in] mda Pointer to packet's standard metadata. + * @param[in] pkt Packet on which to operate. + * @return A pointer to the packet's L3 header. + */ +static __inline unsigned char* +NETIO_PKT_L3_DATA_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ + return (NETIO_PKT_L2_DATA_M(mda, pkt) + + NETIO_PKT_L2_HEADER_LENGTH_M(mda, pkt)); +} + + +/** Return the ordinal of the packet. + * @ingroup ingress + * + * Each packet is given an ordinal number when it is delivered by the IPP. + * In the medium term, the ordinal is unique and monotonically increasing, + * being incremented by 1 for each packet; the ordinal of the first packet + * delivered after the IPP starts is zero. (Since the ordinal is of finite + * size, given enough input packets, it will eventually wrap around to zero; + * in the long term, therefore, ordinals are not unique.) The ordinals + * handed out by different IPPs are not disjoint, so two packets from + * different IPPs may have identical ordinals. Packets dropped by the + * IPP or by the I/O shim are not assigned ordinals. + * + * @param[in] mda Pointer to packet's standard metadata. + * @param[in] pkt Packet on which to operate. + * @return The packet's per-IPP packet ordinal. + */ +static __inline unsigned int +NETIO_PKT_ORDINAL_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ + return mda->__packet_ordinal; +} + + +/** Return the per-group ordinal of the packet. + * @ingroup ingress + * + * Each packet is given a per-group ordinal number when it is + * delivered by the IPP. By default, the group is the packet's VLAN, + * although IPP can be recompiled to use different values. In + * the medium term, the ordinal is unique and monotonically + * increasing, being incremented by 1 for each packet; the ordinal of + * the first packet distributed to a particular group is zero. + * (Since the ordinal is of finite size, given enough input packets, + * it will eventually wrap around to zero; in the long term, + * therefore, ordinals are not unique.) The ordinals handed out by + * different IPPs are not disjoint, so two packets from different IPPs + * may have identical ordinals; similarly, packets distributed to + * different groups may have identical ordinals. Packets dropped by + * the IPP or by the I/O shim are not assigned ordinals. + * + * @param[in] mda Pointer to packet's standard metadata. + * @param[in] pkt Packet on which to operate. + * @return The packet's per-IPP, per-group ordinal. + */ +static __inline unsigned int +NETIO_PKT_GROUP_ORDINAL_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ + return mda->__group_ordinal; +} + + +/** Return the VLAN ID assigned to the packet. + * @ingroup ingress + * + * This value is usually contained within the packet header. + * + * This value will be zero if the packet does not have a VLAN tag, or if + * this value was not extracted from the packet. + * + * @param[in] mda Pointer to packet's standard metadata. + * @param[in] pkt Packet on which to operate. + * @return The packet's VLAN ID. + */ +static __inline unsigned short +NETIO_PKT_VLAN_ID_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ + int vl = (mda->__flags >> _NETIO_PKT_VLAN_SHIFT) & _NETIO_PKT_VLAN_RMASK; + unsigned short* pkt_p; + int index; + unsigned short val; + + if (vl == _NETIO_PKT_VLAN_NONE) + return 0; + + pkt_p = (unsigned short*) NETIO_PKT_L2_DATA_M(mda, pkt); + index = (mda->__flags >> _NETIO_PKT_TYPE_SHIFT) & _NETIO_PKT_TYPE_RMASK; + + val = pkt_p[(_netio_pkt_info[index] >> _NETIO_PKT_INFO_VLAN_SHIFT) & + _NETIO_PKT_INFO_VLAN_RMASK]; + +#ifdef __TILECC__ + return (__insn_bytex(val) >> 16) & 0xFFF; +#else + return (__builtin_bswap32(val) >> 16) & 0xFFF; +#endif +} + + +/** Return the ethertype of the packet. + * @ingroup ingress + * + * This value is usually contained within the packet header. + * + * This value is reliable if @ref NETIO_PKT_ETHERTYPE_RECOGNIZED_M() + * returns true, and otherwise, may not be well defined. + * + * @param[in] mda Pointer to packet's standard metadata. + * @param[in] pkt Packet on which to operate. + * @return The packet's ethertype. + */ +static __inline unsigned short +NETIO_PKT_ETHERTYPE_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ + unsigned short* pkt_p = (unsigned short*) NETIO_PKT_L2_DATA_M(mda, pkt); + int index = (mda->__flags >> _NETIO_PKT_TYPE_SHIFT) & _NETIO_PKT_TYPE_RMASK; + + unsigned short val = + pkt_p[(_netio_pkt_info[index] >> _NETIO_PKT_INFO_ETYPE_SHIFT) & + _NETIO_PKT_INFO_ETYPE_RMASK]; + + return __builtin_bswap32(val) >> 16; +} + + +/** Return the flow hash computed on the packet. + * @ingroup ingress + * + * For TCP and UDP packets, this hash is calculated by hashing together + * the "5-tuple" values, specifically the source IP address, destination + * IP address, protocol type, source port and destination port. + * The hash value is intended to be helpful for millions of distinct + * flows. + * + * For IPv4 or IPv6 packets which are neither TCP nor UDP, the flow hash is + * derived by hashing together the source and destination IP addresses. + * + * For MPLS-encapsulated packets, the flow hash is derived by hashing + * the first MPLS label. + * + * For all other packets the flow hash is computed from the source + * and destination Ethernet addresses. + * + * The hash is symmetric, meaning it produces the same value if the + * source and destination are swapped. The only exceptions are + * tunneling protocols 0x04 (IP in IP Encapsulation), 0x29 (Simple + * Internet Protocol), 0x2F (General Routing Encapsulation) and 0x32 + * (Encap Security Payload), which use only the destination address + * since the source address is not meaningful. + * + * @param[in] mda Pointer to packet's standard metadata. + * @param[in] pkt Packet on which to operate. + * @return The packet's 32-bit flow hash. + */ +static __inline unsigned int +NETIO_PKT_FLOW_HASH_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ + return mda->__flow_hash; +} + + +/** Return the first word of "user data" for the packet. + * + * The contents of the user data words depend on the IPP. + * + * When using the standard ipp1, ipp2, or ipp4 sub-drivers, the first + * word of user data contains the least significant bits of the 64-bit + * arrival cycle count (see @c get_cycle_count_low()). + * + * See the <em>System Programmer's Guide</em> for details. + * + * @ingroup ingress + * + * @param[in] mda Pointer to packet's standard metadata. + * @param[in] pkt Packet on which to operate. + * @return The packet's first word of "user data". + */ +static __inline unsigned int +NETIO_PKT_USER_DATA_0_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ + return mda->__user_data_0; +} + + +/** Return the second word of "user data" for the packet. + * + * The contents of the user data words depend on the IPP. + * + * When using the standard ipp1, ipp2, or ipp4 sub-drivers, the second + * word of user data contains the most significant bits of the 64-bit + * arrival cycle count (see @c get_cycle_count_high()). + * + * See the <em>System Programmer's Guide</em> for details. + * + * @ingroup ingress + * + * @param[in] mda Pointer to packet's standard metadata. + * @param[in] pkt Packet on which to operate. + * @return The packet's second word of "user data". + */ +static __inline unsigned int +NETIO_PKT_USER_DATA_1_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ + return mda->__user_data_1; +} + + +/** Determine whether the L4 (TCP/UDP) checksum was calculated. + * @ingroup ingress + * + * @param[in] mda Pointer to packet's standard metadata. + * @param[in] pkt Packet on which to operate. + * @return Nonzero if the L4 checksum was calculated. + */ +static __inline unsigned int +NETIO_PKT_L4_CSUM_CALCULATED_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ + return !(mda->__flags & _NETIO_PKT_NO_L4_CSUM_MASK); +} + + +/** Determine whether the L4 (TCP/UDP) checksum was calculated and found to + * be correct. + * @ingroup ingress + * + * @param[in] mda Pointer to packet's standard metadata. + * @param[in] pkt Packet on which to operate. + * @return Nonzero if the checksum was calculated and is correct. + */ +static __inline unsigned int +NETIO_PKT_L4_CSUM_CORRECT_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ + return !(mda->__flags & + (_NETIO_PKT_BAD_L4_CSUM_MASK | _NETIO_PKT_NO_L4_CSUM_MASK)); +} + + +/** Determine whether the L3 (IP) checksum was calculated. + * @ingroup ingress + * + * @param[in] mda Pointer to packet's standard metadata. + * @param[in] pkt Packet on which to operate. + * @return Nonzero if the L3 (IP) checksum was calculated. +*/ +static __inline unsigned int +NETIO_PKT_L3_CSUM_CALCULATED_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ + return !(mda->__flags & _NETIO_PKT_NO_L3_CSUM_MASK); +} + + +/** Determine whether the L3 (IP) checksum was calculated and found to be + * correct. + * @ingroup ingress + * + * @param[in] mda Pointer to packet's standard metadata. + * @param[in] pkt Packet on which to operate. + * @return Nonzero if the checksum was calculated and is correct. + */ +static __inline unsigned int +NETIO_PKT_L3_CSUM_CORRECT_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ + return !(mda->__flags & + (_NETIO_PKT_BAD_L3_CSUM_MASK | _NETIO_PKT_NO_L3_CSUM_MASK)); +} + + +/** Determine whether the ethertype was recognized and L3 packet data was + * processed. + * @ingroup ingress + * + * @param[in] mda Pointer to packet's standard metadata. + * @param[in] pkt Packet on which to operate. + * @return Nonzero if the ethertype was recognized and L3 packet data was + * processed. + */ +static __inline unsigned int +NETIO_PKT_ETHERTYPE_RECOGNIZED_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ + return !(mda->__flags & _NETIO_PKT_TYPE_UNRECOGNIZED_MASK); +} + + +/** Retrieve the status of a packet and any errors that may have occurred + * during ingress processing (length mismatches, CRC errors, etc.). + * @ingroup ingress + * + * Note that packets for which @ref NETIO_PKT_ETHERTYPE_RECOGNIZED() + * returns zero are always reported as underlength, as there is no a priori + * means to determine their length. Normally, applications should use + * @ref NETIO_PKT_BAD_M() instead of explicitly checking status with this + * function. + * + * @param[in] mda Pointer to packet's standard metadata. + * @param[in] pkt Packet on which to operate. + * @return The packet's status. + */ +static __inline netio_pkt_status_t +NETIO_PKT_STATUS_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ + return (netio_pkt_status_t) __NETIO_PKT_NOTIF_HEADER(pkt).bits.__status; +} + + +/** Report whether a packet is bad (i.e., was shorter than expected based on + * its headers, or had a bad CRC). + * @ingroup ingress + * + * Note that this function does not verify L3 or L4 checksums. + * + * @param[in] mda Pointer to packet's standard metadata. + * @param[in] pkt Packet on which to operate. + * @return Nonzero if the packet is bad and should be discarded. + */ +static __inline unsigned int +NETIO_PKT_BAD_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ + return ((NETIO_PKT_STATUS_M(mda, pkt) & 1) && + (NETIO_PKT_ETHERTYPE_RECOGNIZED_M(mda, pkt) || + NETIO_PKT_STATUS_M(mda, pkt) == NETIO_PKT_STATUS_BAD)); +} + + +/** Return the length of the packet, starting with the L2 (Ethernet) header. + * @ingroup egress + * + * @param[in] mmd Pointer to packet's minimal metadata. + * @param[in] pkt Packet on which to operate. + * @return The length of the packet, in bytes. + */ +static __inline netio_size_t +NETIO_PKT_L2_LENGTH_MM(netio_pkt_minimal_metadata_t* mmd, netio_pkt_t* pkt) +{ + return mmd->l2_length; +} + + +/** Return the length of the L2 (Ethernet) header. + * @ingroup egress + * + * @param[in] mmd Pointer to packet's minimal metadata. + * @param[in] pkt Packet on which to operate. + * @return The length of the packet's L2 header, in bytes. + */ +static __inline netio_size_t +NETIO_PKT_L2_HEADER_LENGTH_MM(netio_pkt_minimal_metadata_t* mmd, + netio_pkt_t* pkt) +{ + return mmd->l3_offset - mmd->l2_offset; +} + + +/** Return the length of the packet, starting with the L3 (IP) header. + * @ingroup egress + * + * @param[in] mmd Pointer to packet's minimal metadata. + * @param[in] pkt Packet on which to operate. + * @return Length of the packet's L3 header and data, in bytes. + */ +static __inline netio_size_t +NETIO_PKT_L3_LENGTH_MM(netio_pkt_minimal_metadata_t* mmd, netio_pkt_t* pkt) +{ + return (NETIO_PKT_L2_LENGTH_MM(mmd, pkt) - + NETIO_PKT_L2_HEADER_LENGTH_MM(mmd, pkt)); +} + + +/** Return a pointer to the packet's L3 (generally, the IP) header. + * @ingroup egress + * + * Note that we guarantee word alignment of the L3 header. + * + * @param[in] mmd Pointer to packet's minimal metadata. + * @param[in] pkt Packet on which to operate. + * @return A pointer to the packet's L3 header. + */ +static __inline unsigned char* +NETIO_PKT_L3_DATA_MM(netio_pkt_minimal_metadata_t* mmd, netio_pkt_t* pkt) +{ + return _NETIO_PKT_BASE(pkt) + mmd->l3_offset; +} + + +/** Return a pointer to the packet's L2 (Ethernet) header. + * @ingroup egress + * + * @param[in] mmd Pointer to packet's minimal metadata. + * @param[in] pkt Packet on which to operate. + * @return A pointer to start of the packet. + */ +static __inline unsigned char* +NETIO_PKT_L2_DATA_MM(netio_pkt_minimal_metadata_t* mmd, netio_pkt_t* pkt) +{ + return _NETIO_PKT_BASE(pkt) + mmd->l2_offset; +} + + +/** Retrieve the status of a packet and any errors that may have occurred + * during ingress processing (length mismatches, CRC errors, etc.). + * @ingroup ingress + * + * Note that packets for which @ref NETIO_PKT_ETHERTYPE_RECOGNIZED() + * returns zero are always reported as underlength, as there is no a priori + * means to determine their length. Normally, applications should use + * @ref NETIO_PKT_BAD() instead of explicitly checking status with this + * function. + * + * @param[in] pkt Packet on which to operate. + * @return The packet's status. + */ +static __inline netio_pkt_status_t +NETIO_PKT_STATUS(netio_pkt_t* pkt) +{ + netio_assert(!pkt->__packet.bits.__minimal); + + return (netio_pkt_status_t) __NETIO_PKT_NOTIF_HEADER(pkt).bits.__status; +} + + +/** Report whether a packet is bad (i.e., was shorter than expected based on + * its headers, or had a bad CRC). + * @ingroup ingress + * + * Note that this function does not verify L3 or L4 checksums. + * + * @param[in] pkt Packet on which to operate. + * @return Nonzero if the packet is bad and should be discarded. + */ +static __inline unsigned int +NETIO_PKT_BAD(netio_pkt_t* pkt) +{ + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); + + return NETIO_PKT_BAD_M(mda, pkt); +} + + +/** Return the length of the packet's custom header. + * A custom header may or may not be present, depending upon the IPP; its + * contents and alignment are also IPP-dependent. Currently, none of the + * standard IPPs supplied by Tilera produce a custom header. If present, + * the custom header precedes the L2 header in the packet buffer. + * @ingroup pktfuncs + * + * @param[in] pkt Packet on which to operate. + * @return The length of the packet's custom header, in bytes. + */ +static __inline netio_size_t +NETIO_PKT_CUSTOM_HEADER_LENGTH(netio_pkt_t* pkt) +{ + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); + + return NETIO_PKT_CUSTOM_HEADER_LENGTH_M(mda, pkt); +} + + +/** Return the length of the packet, starting with the custom header. + * A custom header may or may not be present, depending upon the IPP; its + * contents and alignment are also IPP-dependent. Currently, none of the + * standard IPPs supplied by Tilera produce a custom header. If present, + * the custom header precedes the L2 header in the packet buffer. + * @ingroup pktfuncs + * + * @param[in] pkt Packet on which to operate. + * @return The length of the packet, in bytes. + */ +static __inline netio_size_t +NETIO_PKT_CUSTOM_LENGTH(netio_pkt_t* pkt) +{ + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); + + return NETIO_PKT_CUSTOM_LENGTH_M(mda, pkt); +} + + +/** Return a pointer to the packet's custom header. + * A custom header may or may not be present, depending upon the IPP; its + * contents and alignment are also IPP-dependent. Currently, none of the + * standard IPPs supplied by Tilera produce a custom header. If present, + * the custom header precedes the L2 header in the packet buffer. + * @ingroup pktfuncs + * + * @param[in] pkt Packet on which to operate. + * @return A pointer to start of the packet. + */ +static __inline unsigned char* +NETIO_PKT_CUSTOM_DATA(netio_pkt_t* pkt) +{ + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); + + return NETIO_PKT_CUSTOM_DATA_M(mda, pkt); +} + + +/** Return the length of the packet's L2 (Ethernet plus VLAN or SNAP) header. + * @ingroup pktfuncs + * + * @param[in] pkt Packet on which to operate. + * @return The length of the packet's L2 header, in bytes. + */ +static __inline netio_size_t +NETIO_PKT_L2_HEADER_LENGTH(netio_pkt_t* pkt) +{ + if (NETIO_PKT_IS_MINIMAL(pkt)) + { + netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt); + + return NETIO_PKT_L2_HEADER_LENGTH_MM(mmd, pkt); + } + else + { + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); + + return NETIO_PKT_L2_HEADER_LENGTH_M(mda, pkt); + } +} + + +/** Return the length of the packet, starting with the L2 (Ethernet) header. + * @ingroup pktfuncs + * + * @param[in] pkt Packet on which to operate. + * @return The length of the packet, in bytes. + */ +static __inline netio_size_t +NETIO_PKT_L2_LENGTH(netio_pkt_t* pkt) +{ + if (NETIO_PKT_IS_MINIMAL(pkt)) + { + netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt); + + return NETIO_PKT_L2_LENGTH_MM(mmd, pkt); + } + else + { + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); + + return NETIO_PKT_L2_LENGTH_M(mda, pkt); + } +} + + +/** Return a pointer to the packet's L2 (Ethernet) header. + * @ingroup pktfuncs + * + * @param[in] pkt Packet on which to operate. + * @return A pointer to start of the packet. + */ +static __inline unsigned char* +NETIO_PKT_L2_DATA(netio_pkt_t* pkt) +{ + if (NETIO_PKT_IS_MINIMAL(pkt)) + { + netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt); + + return NETIO_PKT_L2_DATA_MM(mmd, pkt); + } + else + { + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); + + return NETIO_PKT_L2_DATA_M(mda, pkt); + } +} + + +/** Retrieve the length of the packet, starting with the L3 (generally, the IP) + * header. + * @ingroup pktfuncs + * + * @param[in] pkt Packet on which to operate. + * @return Length of the packet's L3 header and data, in bytes. + */ +static __inline netio_size_t +NETIO_PKT_L3_LENGTH(netio_pkt_t* pkt) +{ + if (NETIO_PKT_IS_MINIMAL(pkt)) + { + netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt); + + return NETIO_PKT_L3_LENGTH_MM(mmd, pkt); + } + else + { + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); + + return NETIO_PKT_L3_LENGTH_M(mda, pkt); + } +} + + +/** Return a pointer to the packet's L3 (generally, the IP) header. + * @ingroup pktfuncs + * + * Note that we guarantee word alignment of the L3 header. + * + * @param[in] pkt Packet on which to operate. + * @return A pointer to the packet's L3 header. + */ +static __inline unsigned char* +NETIO_PKT_L3_DATA(netio_pkt_t* pkt) +{ + if (NETIO_PKT_IS_MINIMAL(pkt)) + { + netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt); + + return NETIO_PKT_L3_DATA_MM(mmd, pkt); + } + else + { + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); + + return NETIO_PKT_L3_DATA_M(mda, pkt); + } +} + + +/** Return the ordinal of the packet. + * @ingroup ingress + * + * Each packet is given an ordinal number when it is delivered by the IPP. + * In the medium term, the ordinal is unique and monotonically increasing, + * being incremented by 1 for each packet; the ordinal of the first packet + * delivered after the IPP starts is zero. (Since the ordinal is of finite + * size, given enough input packets, it will eventually wrap around to zero; + * in the long term, therefore, ordinals are not unique.) The ordinals + * handed out by different IPPs are not disjoint, so two packets from + * different IPPs may have identical ordinals. Packets dropped by the + * IPP or by the I/O shim are not assigned ordinals. + * + * + * @param[in] pkt Packet on which to operate. + * @return The packet's per-IPP packet ordinal. + */ +static __inline unsigned int +NETIO_PKT_ORDINAL(netio_pkt_t* pkt) +{ + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); + + return NETIO_PKT_ORDINAL_M(mda, pkt); +} + + +/** Return the per-group ordinal of the packet. + * @ingroup ingress + * + * Each packet is given a per-group ordinal number when it is + * delivered by the IPP. By default, the group is the packet's VLAN, + * although IPP can be recompiled to use different values. In + * the medium term, the ordinal is unique and monotonically + * increasing, being incremented by 1 for each packet; the ordinal of + * the first packet distributed to a particular group is zero. + * (Since the ordinal is of finite size, given enough input packets, + * it will eventually wrap around to zero; in the long term, + * therefore, ordinals are not unique.) The ordinals handed out by + * different IPPs are not disjoint, so two packets from different IPPs + * may have identical ordinals; similarly, packets distributed to + * different groups may have identical ordinals. Packets dropped by + * the IPP or by the I/O shim are not assigned ordinals. + * + * @param[in] pkt Packet on which to operate. + * @return The packet's per-IPP, per-group ordinal. + */ +static __inline unsigned int +NETIO_PKT_GROUP_ORDINAL(netio_pkt_t* pkt) +{ + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); + + return NETIO_PKT_GROUP_ORDINAL_M(mda, pkt); +} + + +/** Return the VLAN ID assigned to the packet. + * @ingroup ingress + * + * This is usually also contained within the packet header. If the packet + * does not have a VLAN tag, the VLAN ID returned by this function is zero. + * + * @param[in] pkt Packet on which to operate. + * @return The packet's VLAN ID. + */ +static __inline unsigned short +NETIO_PKT_VLAN_ID(netio_pkt_t* pkt) +{ + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); + + return NETIO_PKT_VLAN_ID_M(mda, pkt); +} + + +/** Return the ethertype of the packet. + * @ingroup ingress + * + * This value is reliable if @ref NETIO_PKT_ETHERTYPE_RECOGNIZED() + * returns true, and otherwise, may not be well defined. + * + * @param[in] pkt Packet on which to operate. + * @return The packet's ethertype. + */ +static __inline unsigned short +NETIO_PKT_ETHERTYPE(netio_pkt_t* pkt) +{ + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); + + return NETIO_PKT_ETHERTYPE_M(mda, pkt); +} + + +/** Return the flow hash computed on the packet. + * @ingroup ingress + * + * For TCP and UDP packets, this hash is calculated by hashing together + * the "5-tuple" values, specifically the source IP address, destination + * IP address, protocol type, source port and destination port. + * The hash value is intended to be helpful for millions of distinct + * flows. + * + * For IPv4 or IPv6 packets which are neither TCP nor UDP, the flow hash is + * derived by hashing together the source and destination IP addresses. + * + * For MPLS-encapsulated packets, the flow hash is derived by hashing + * the first MPLS label. + * + * For all other packets the flow hash is computed from the source + * and destination Ethernet addresses. + * + * The hash is symmetric, meaning it produces the same value if the + * source and destination are swapped. The only exceptions are + * tunneling protocols 0x04 (IP in IP Encapsulation), 0x29 (Simple + * Internet Protocol), 0x2F (General Routing Encapsulation) and 0x32 + * (Encap Security Payload), which use only the destination address + * since the source address is not meaningful. + * + * @param[in] pkt Packet on which to operate. + * @return The packet's 32-bit flow hash. + */ +static __inline unsigned int +NETIO_PKT_FLOW_HASH(netio_pkt_t* pkt) +{ + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); + + return NETIO_PKT_FLOW_HASH_M(mda, pkt); +} + + +/** Return the first word of "user data" for the packet. + * + * The contents of the user data words depend on the IPP. + * + * When using the standard ipp1, ipp2, or ipp4 sub-drivers, the first + * word of user data contains the least significant bits of the 64-bit + * arrival cycle count (see @c get_cycle_count_low()). + * + * See the <em>System Programmer's Guide</em> for details. + * + * @ingroup ingress + * + * @param[in] pkt Packet on which to operate. + * @return The packet's first word of "user data". + */ +static __inline unsigned int +NETIO_PKT_USER_DATA_0(netio_pkt_t* pkt) +{ + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); + + return NETIO_PKT_USER_DATA_0_M(mda, pkt); +} + + +/** Return the second word of "user data" for the packet. + * + * The contents of the user data words depend on the IPP. + * + * When using the standard ipp1, ipp2, or ipp4 sub-drivers, the second + * word of user data contains the most significant bits of the 64-bit + * arrival cycle count (see @c get_cycle_count_high()). + * + * See the <em>System Programmer's Guide</em> for details. + * + * @ingroup ingress + * + * @param[in] pkt Packet on which to operate. + * @return The packet's second word of "user data". + */ +static __inline unsigned int +NETIO_PKT_USER_DATA_1(netio_pkt_t* pkt) +{ + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); + + return NETIO_PKT_USER_DATA_1_M(mda, pkt); +} + + +/** Determine whether the L4 (TCP/UDP) checksum was calculated. + * @ingroup ingress + * + * @param[in] pkt Packet on which to operate. + * @return Nonzero if the L4 checksum was calculated. + */ +static __inline unsigned int +NETIO_PKT_L4_CSUM_CALCULATED(netio_pkt_t* pkt) +{ + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); + + return NETIO_PKT_L4_CSUM_CALCULATED_M(mda, pkt); +} + + +/** Determine whether the L4 (TCP/UDP) checksum was calculated and found to + * be correct. + * @ingroup ingress + * + * @param[in] pkt Packet on which to operate. + * @return Nonzero if the checksum was calculated and is correct. + */ +static __inline unsigned int +NETIO_PKT_L4_CSUM_CORRECT(netio_pkt_t* pkt) +{ + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); + + return NETIO_PKT_L4_CSUM_CORRECT_M(mda, pkt); +} + + +/** Determine whether the L3 (IP) checksum was calculated. + * @ingroup ingress + * + * @param[in] pkt Packet on which to operate. + * @return Nonzero if the L3 (IP) checksum was calculated. +*/ +static __inline unsigned int +NETIO_PKT_L3_CSUM_CALCULATED(netio_pkt_t* pkt) +{ + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); + + return NETIO_PKT_L3_CSUM_CALCULATED_M(mda, pkt); +} + + +/** Determine whether the L3 (IP) checksum was calculated and found to be + * correct. + * @ingroup ingress + * + * @param[in] pkt Packet on which to operate. + * @return Nonzero if the checksum was calculated and is correct. + */ +static __inline unsigned int +NETIO_PKT_L3_CSUM_CORRECT(netio_pkt_t* pkt) +{ + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); + + return NETIO_PKT_L3_CSUM_CORRECT_M(mda, pkt); +} + + +/** Determine whether the Ethertype was recognized and L3 packet data was + * processed. + * @ingroup ingress + * + * @param[in] pkt Packet on which to operate. + * @return Nonzero if the Ethertype was recognized and L3 packet data was + * processed. + */ +static __inline unsigned int +NETIO_PKT_ETHERTYPE_RECOGNIZED(netio_pkt_t* pkt) +{ + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); + + return NETIO_PKT_ETHERTYPE_RECOGNIZED_M(mda, pkt); +} + + +/** Set an egress packet's L2 length, using a metadata pointer to speed the + * computation. + * @ingroup egress + * + * @param[in,out] mmd Pointer to packet's minimal metadata. + * @param[in] pkt Packet on which to operate. + * @param[in] len Packet L2 length, in bytes. + */ +static __inline void +NETIO_PKT_SET_L2_LENGTH_MM(netio_pkt_minimal_metadata_t* mmd, netio_pkt_t* pkt, + int len) +{ + mmd->l2_length = len; +} + + +/** Set an egress packet's L2 length. + * @ingroup egress + * + * @param[in,out] pkt Packet on which to operate. + * @param[in] len Packet L2 length, in bytes. + */ +static __inline void +NETIO_PKT_SET_L2_LENGTH(netio_pkt_t* pkt, int len) +{ + netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt); + + NETIO_PKT_SET_L2_LENGTH_MM(mmd, pkt, len); +} + + +/** Set an egress packet's L2 header length, using a metadata pointer to + * speed the computation. + * @ingroup egress + * + * It is not normally necessary to call this routine; only the L2 length, + * not the header length, is needed to transmit a packet. It may be useful if + * the egress packet will later be processed by code which expects to use + * functions like @ref NETIO_PKT_L3_DATA() to get a pointer to the L3 payload. + * + * @param[in,out] mmd Pointer to packet's minimal metadata. + * @param[in] pkt Packet on which to operate. + * @param[in] len Packet L2 header length, in bytes. + */ +static __inline void +NETIO_PKT_SET_L2_HEADER_LENGTH_MM(netio_pkt_minimal_metadata_t* mmd, + netio_pkt_t* pkt, int len) +{ + mmd->l3_offset = mmd->l2_offset + len; +} + + +/** Set an egress packet's L2 header length. + * @ingroup egress + * + * It is not normally necessary to call this routine; only the L2 length, + * not the header length, is needed to transmit a packet. It may be useful if + * the egress packet will later be processed by code which expects to use + * functions like @ref NETIO_PKT_L3_DATA() to get a pointer to the L3 payload. + * + * @param[in,out] pkt Packet on which to operate. + * @param[in] len Packet L2 header length, in bytes. + */ +static __inline void +NETIO_PKT_SET_L2_HEADER_LENGTH(netio_pkt_t* pkt, int len) +{ + netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt); + + NETIO_PKT_SET_L2_HEADER_LENGTH_MM(mmd, pkt, len); +} + + +/** Set up an egress packet for hardware checksum computation, using a + * metadata pointer to speed the operation. + * @ingroup egress + * + * NetIO provides the ability to automatically calculate a standard + * 16-bit Internet checksum on transmitted packets. The application + * may specify the point in the packet where the checksum starts, the + * number of bytes to be checksummed, and the two bytes in the packet + * which will be replaced with the completed checksum. (If the range + * of bytes to be checksummed includes the bytes to be replaced, the + * initial values of those bytes will be included in the checksum.) + * + * For some protocols, the packet checksum covers data which is not present + * in the packet, or is at least not contiguous to the main data payload. + * For instance, the TCP checksum includes a "pseudo-header" which includes + * the source and destination IP addresses of the packet. To accommodate + * this, the checksum engine may be "seeded" with an initial value, which + * the application would need to compute based on the specific protocol's + * requirements. Note that the seed is given in host byte order (little- + * endian), not network byte order (big-endian); code written to compute a + * pseudo-header checksum in network byte order will need to byte-swap it + * before use as the seed. + * + * Note that the checksum is computed as part of the transmission process, + * so it will not be present in the packet upon completion of this routine. + * + * @param[in,out] mmd Pointer to packet's minimal metadata. + * @param[in] pkt Packet on which to operate. + * @param[in] start Offset within L2 packet of the first byte to include in + * the checksum. + * @param[in] length Number of bytes to include in the checksum. + * the checksum. + * @param[in] location Offset within L2 packet of the first of the two bytes + * to be replaced with the calculated checksum. + * @param[in] seed Initial value of the running checksum before any of the + * packet data is added. + */ +static __inline void +NETIO_PKT_DO_EGRESS_CSUM_MM(netio_pkt_minimal_metadata_t* mmd, + netio_pkt_t* pkt, int start, int length, + int location, uint16_t seed) +{ + mmd->csum_start = start; + mmd->csum_length = length; + mmd->csum_location = location; + mmd->csum_seed = seed; + mmd->flags |= _NETIO_PKT_NEED_EDMA_CSUM_MASK; +} + + +/** Set up an egress packet for hardware checksum computation. + * @ingroup egress + * + * NetIO provides the ability to automatically calculate a standard + * 16-bit Internet checksum on transmitted packets. The application + * may specify the point in the packet where the checksum starts, the + * number of bytes to be checksummed, and the two bytes in the packet + * which will be replaced with the completed checksum. (If the range + * of bytes to be checksummed includes the bytes to be replaced, the + * initial values of those bytes will be included in the checksum.) + * + * For some protocols, the packet checksum covers data which is not present + * in the packet, or is at least not contiguous to the main data payload. + * For instance, the TCP checksum includes a "pseudo-header" which includes + * the source and destination IP addresses of the packet. To accommodate + * this, the checksum engine may be "seeded" with an initial value, which + * the application would need to compute based on the specific protocol's + * requirements. Note that the seed is given in host byte order (little- + * endian), not network byte order (big-endian); code written to compute a + * pseudo-header checksum in network byte order will need to byte-swap it + * before use as the seed. + * + * Note that the checksum is computed as part of the transmission process, + * so it will not be present in the packet upon completion of this routine. + * + * @param[in,out] pkt Packet on which to operate. + * @param[in] start Offset within L2 packet of the first byte to include in + * the checksum. + * @param[in] length Number of bytes to include in the checksum. + * the checksum. + * @param[in] location Offset within L2 packet of the first of the two bytes + * to be replaced with the calculated checksum. + * @param[in] seed Initial value of the running checksum before any of the + * packet data is added. + */ +static __inline void +NETIO_PKT_DO_EGRESS_CSUM(netio_pkt_t* pkt, int start, int length, + int location, uint16_t seed) +{ + netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt); + + NETIO_PKT_DO_EGRESS_CSUM_MM(mmd, pkt, start, length, location, seed); +} + + +/** Return the number of bytes which could be prepended to a packet, using a + * metadata pointer to speed the operation. + * See @ref netio_populate_prepend_buffer() to get a full description of + * prepending. + * + * @param[in,out] mda Pointer to packet's standard metadata. + * @param[in] pkt Packet on which to operate. + */ +static __inline int +NETIO_PKT_PREPEND_AVAIL_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ + return (pkt->__packet.bits.__offset << 6) + + NETIO_PKT_CUSTOM_HEADER_LENGTH_M(mda, pkt); +} + + +/** Return the number of bytes which could be prepended to a packet, using a + * metadata pointer to speed the operation. + * See @ref netio_populate_prepend_buffer() to get a full description of + * prepending. + * @ingroup egress + * + * @param[in,out] mmd Pointer to packet's minimal metadata. + * @param[in] pkt Packet on which to operate. + */ +static __inline int +NETIO_PKT_PREPEND_AVAIL_MM(netio_pkt_minimal_metadata_t* mmd, netio_pkt_t* pkt) +{ + return (pkt->__packet.bits.__offset << 6) + mmd->l2_offset; +} + + +/** Return the number of bytes which could be prepended to a packet. + * See @ref netio_populate_prepend_buffer() to get a full description of + * prepending. + * @ingroup egress + * + * @param[in] pkt Packet on which to operate. + */ +static __inline int +NETIO_PKT_PREPEND_AVAIL(netio_pkt_t* pkt) +{ + if (NETIO_PKT_IS_MINIMAL(pkt)) + { + netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt); + + return NETIO_PKT_PREPEND_AVAIL_MM(mmd, pkt); + } + else + { + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); + + return NETIO_PKT_PREPEND_AVAIL_M(mda, pkt); + } +} + + +/** Flush a packet's minimal metadata from the cache, using a metadata pointer + * to speed the operation. + * @ingroup egress + * + * @param[in] mmd Pointer to packet's minimal metadata. + * @param[in] pkt Packet on which to operate. + */ +static __inline void +NETIO_PKT_FLUSH_MINIMAL_METADATA_MM(netio_pkt_minimal_metadata_t* mmd, + netio_pkt_t* pkt) +{ +} + + +/** Invalidate a packet's minimal metadata from the cache, using a metadata + * pointer to speed the operation. + * @ingroup egress + * + * @param[in] mmd Pointer to packet's minimal metadata. + * @param[in] pkt Packet on which to operate. + */ +static __inline void +NETIO_PKT_INV_MINIMAL_METADATA_MM(netio_pkt_minimal_metadata_t* mmd, + netio_pkt_t* pkt) +{ +} + + +/** Flush and then invalidate a packet's minimal metadata from the cache, + * using a metadata pointer to speed the operation. + * @ingroup egress + * + * @param[in] mmd Pointer to packet's minimal metadata. + * @param[in] pkt Packet on which to operate. + */ +static __inline void +NETIO_PKT_FLUSH_INV_MINIMAL_METADATA_MM(netio_pkt_minimal_metadata_t* mmd, + netio_pkt_t* pkt) +{ +} + + +/** Flush a packet's metadata from the cache, using a metadata pointer + * to speed the operation. + * @ingroup ingress + * + * @param[in] mda Pointer to packet's minimal metadata. + * @param[in] pkt Packet on which to operate. + */ +static __inline void +NETIO_PKT_FLUSH_METADATA_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ +} + + +/** Invalidate a packet's metadata from the cache, using a metadata + * pointer to speed the operation. + * @ingroup ingress + * + * @param[in] mda Pointer to packet's metadata. + * @param[in] pkt Packet on which to operate. + */ +static __inline void +NETIO_PKT_INV_METADATA_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ +} + + +/** Flush and then invalidate a packet's metadata from the cache, + * using a metadata pointer to speed the operation. + * @ingroup ingress + * + * @param[in] mda Pointer to packet's metadata. + * @param[in] pkt Packet on which to operate. + */ +static __inline void +NETIO_PKT_FLUSH_INV_METADATA_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ +} + + +/** Flush a packet's minimal metadata from the cache. + * @ingroup egress + * + * @param[in] pkt Packet on which to operate. + */ +static __inline void +NETIO_PKT_FLUSH_MINIMAL_METADATA(netio_pkt_t* pkt) +{ +} + + +/** Invalidate a packet's minimal metadata from the cache. + * @ingroup egress + * + * @param[in] pkt Packet on which to operate. + */ +static __inline void +NETIO_PKT_INV_MINIMAL_METADATA(netio_pkt_t* pkt) +{ +} + + +/** Flush and then invalidate a packet's minimal metadata from the cache. + * @ingroup egress + * + * @param[in] pkt Packet on which to operate. + */ +static __inline void +NETIO_PKT_FLUSH_INV_MINIMAL_METADATA(netio_pkt_t* pkt) +{ +} + + +/** Flush a packet's metadata from the cache. + * @ingroup ingress + * + * @param[in] pkt Packet on which to operate. + */ +static __inline void +NETIO_PKT_FLUSH_METADATA(netio_pkt_t* pkt) +{ +} + + +/** Invalidate a packet's metadata from the cache. + * @ingroup ingress + * + * @param[in] pkt Packet on which to operate. + */ +static __inline void +NETIO_PKT_INV_METADATA(netio_pkt_t* pkt) +{ +} + + +/** Flush and then invalidate a packet's metadata from the cache. + * @ingroup ingress + * + * @param[in] pkt Packet on which to operate. + */ +static __inline void +NETIO_PKT_FLUSH_INV_METADATA(netio_pkt_t* pkt) +{ +} + +/** Number of NUMA nodes we can distribute buffers to. + * @ingroup setup */ +#define NETIO_NUM_NODE_WEIGHTS 16 + +/** + * @brief An object for specifying the characteristics of NetIO communication + * endpoint. + * + * @ingroup setup + * + * The @ref netio_input_register() function uses this structure to define + * how an application tile will communicate with an IPP. + * + * + * Future updates to NetIO may add new members to this structure, + * which can affect the success of the registration operation. Thus, + * if dynamically initializing the structure, applications are urged to + * zero it out first, for example: + * + * @code + * netio_input_config_t config; + * memset(&config, 0, sizeof (config)); + * config.flags = NETIO_RECV | NETIO_XMIT_CSUM | NETIO_TAG_NONE; + * config.num_receive_packets = NETIO_MAX_RECEIVE_PKTS; + * config.queue_id = 0; + * . + * . + * . + * @endcode + * + * since that guarantees that any unused structure members, including + * members which did not exist when the application was first developed, + * will not have unexpected values. + * + * If statically initializing the structure, we strongly recommend use of + * C99-style named initializers, for example: + * + * @code + * netio_input_config_t config = { + * .flags = NETIO_RECV | NETIO_XMIT_CSUM | NETIO_TAG_NONE, + * .num_receive_packets = NETIO_MAX_RECEIVE_PKTS, + * .queue_id = 0, + * }, + * @endcode + * + * instead of the old-style structure initialization: + * + * @code + * // Bad example! Currently equivalent to the above, but don't do this. + * netio_input_config_t config = { + * NETIO_RECV | NETIO_XMIT_CSUM | NETIO_TAG_NONE, NETIO_MAX_RECEIVE_PKTS, 0 + * }, + * @endcode + * + * since the C99 style requires no changes to the code if elements of the + * config structure are rearranged. (It also makes the initialization much + * easier to understand.) + * + * Except for items which address a particular tile's transmit or receive + * characteristics, such as the ::NETIO_RECV flag, applications are advised + * to specify the same set of configuration data on all registrations. + * This prevents differing results if multiple tiles happen to do their + * registration operations in a different order on different invocations of + * the application. This is particularly important for things like link + * management flags, and buffer size and homing specifications. + * + * Unless the ::NETIO_FIXED_BUFFER_VA flag is specified in flags, the NetIO + * buffer pool is automatically created and mapped into the application's + * virtual address space at an address chosen by the operating system, + * using the common memory (cmem) facility in the Tilera Multicore + * Components library. The cmem facility allows multiple processes to gain + * access to shared memory which is mapped into each process at an + * identical virtual address. In order for this to work, the processes + * must have a common ancestor, which must create the common memory using + * tmc_cmem_init(). + * + * In programs using the iLib process creation API, or in programs which use + * only one process (which include programs using the pthreads library), + * tmc_cmem_init() is called automatically. All other applications + * must call it explicitly, before any child processes which might call + * netio_input_register() are created. + */ +typedef struct +{ + /** Registration characteristics. + + This value determines several characteristics of the registration; + flags for different types of behavior are ORed together to make the + final flag value. Generally applications should specify exactly + one flag from each of the following categories: + + - Whether the application will be receiving packets on this queue + (::NETIO_RECV or ::NETIO_NO_RECV). + + - Whether the application will be transmitting packets on this queue, + and if so, whether it will request egress checksum calculation + (::NETIO_XMIT, ::NETIO_XMIT_CSUM, or ::NETIO_NO_XMIT). It is + legal to call netio_get_buffer() without one of the XMIT flags, + as long as ::NETIO_RECV is specified; in this case, the retrieved + buffers must be passed to another tile for transmission. + + - Whether the application expects any vendor-specific tags in + its packets' L2 headers (::NETIO_TAG_NONE, ::NETIO_TAG_BRCM, + or ::NETIO_TAG_MRVL). This must match the configuration of the + target IPP. + + To accommodate applications written to previous versions of the NetIO + interface, none of the flags above are currently required; if omitted, + NetIO behaves more or less as if ::NETIO_RECV | ::NETIO_XMIT_CSUM | + ::NETIO_TAG_NONE were used. However, explicit specification of + the relevant flags allows NetIO to do a better job of resource + allocation, allows earlier detection of certain configuration errors, + and may enable advanced features or higher performance in the future, + so their use is strongly recommended. + + Note that specifying ::NETIO_NO_RECV along with ::NETIO_NO_XMIT + is a special case, intended primarily for use by programs which + retrieve network statistics or do link management operations. + When these flags are both specified, the resulting queue may not + be used with NetIO routines other than netio_get(), netio_set(), + and netio_input_unregister(). See @ref link for more information + on link management. + + Other flags are optional; their use is described below. + */ + int flags; + + /** Interface name. This is a string which identifies the specific + Ethernet controller hardware to be used. The format of the string + is a device type and a device index, separated by a slash; so, + the first 10 Gigabit Ethernet controller is named "xgbe/0", while + the second 10/100/1000 Megabit Ethernet controller is named "gbe/1". + */ + const char* interface; + + /** Receive packet queue size. This specifies the maximum number + of ingress packets that can be received on this queue without + being retrieved by @ref netio_get_packet(). If the IPP's distribution + algorithm calls for a packet to be sent to this queue, and this + number of packets are already pending there, the new packet + will either be discarded, or sent to another tile registered + for the same queue_id (see @ref drops). This value must + be at least ::NETIO_MIN_RECEIVE_PKTS, can always be at least + ::NETIO_MAX_RECEIVE_PKTS, and may be larger than that on certain + interfaces. + */ + int num_receive_packets; + + /** The queue ID being requested. Legal values for this range from 0 + to ::NETIO_MAX_QUEUE_ID, inclusive. ::NETIO_MAX_QUEUE_ID is always + greater than or equal to the number of tiles; this allows one queue + for each tile, plus at least one additional queue. Some applications + may wish to use the additional queue as a destination for unwanted + packets, since packets delivered to queues for which no tiles have + registered are discarded. + */ + unsigned int queue_id; + + /** Maximum number of small send buffers to be held in the local empty + buffer cache. This specifies the size of the area which holds + empty small egress buffers requested from the IPP but not yet + retrieved via @ref netio_get_buffer(). This value must be greater + than zero if the application will ever use @ref netio_get_buffer() + to allocate empty small egress buffers; it may be no larger than + ::NETIO_MAX_SEND_BUFFERS. See @ref epp for more details on empty + buffer caching. + */ + int num_send_buffers_small_total; + + /** Number of small send buffers to be preallocated at registration. + If this value is nonzero, the specified number of empty small egress + buffers will be requested from the IPP during the netio_input_register + operation; this may speed the execution of @ref netio_get_buffer(). + This may be no larger than @ref num_send_buffers_small_total. See @ref + epp for more details on empty buffer caching. + */ + int num_send_buffers_small_prealloc; + + /** Maximum number of large send buffers to be held in the local empty + buffer cache. This specifies the size of the area which holds empty + large egress buffers requested from the IPP but not yet retrieved via + @ref netio_get_buffer(). This value must be greater than zero if the + application will ever use @ref netio_get_buffer() to allocate empty + large egress buffers; it may be no larger than ::NETIO_MAX_SEND_BUFFERS. + See @ref epp for more details on empty buffer caching. + */ + int num_send_buffers_large_total; + + /** Number of large send buffers to be preallocated at registration. + If this value is nonzero, the specified number of empty large egress + buffers will be requested from the IPP during the netio_input_register + operation; this may speed the execution of @ref netio_get_buffer(). + This may be no larger than @ref num_send_buffers_large_total. See @ref + epp for more details on empty buffer caching. + */ + int num_send_buffers_large_prealloc; + + /** Maximum number of jumbo send buffers to be held in the local empty + buffer cache. This specifies the size of the area which holds empty + jumbo egress buffers requested from the IPP but not yet retrieved via + @ref netio_get_buffer(). This value must be greater than zero if the + application will ever use @ref netio_get_buffer() to allocate empty + jumbo egress buffers; it may be no larger than ::NETIO_MAX_SEND_BUFFERS. + See @ref epp for more details on empty buffer caching. + */ + int num_send_buffers_jumbo_total; + + /** Number of jumbo send buffers to be preallocated at registration. + If this value is nonzero, the specified number of empty jumbo egress + buffers will be requested from the IPP during the netio_input_register + operation; this may speed the execution of @ref netio_get_buffer(). + This may be no larger than @ref num_send_buffers_jumbo_total. See @ref + epp for more details on empty buffer caching. + */ + int num_send_buffers_jumbo_prealloc; + + /** Total packet buffer size. This determines the total size, in bytes, + of the NetIO buffer pool. Note that the maximum number of available + buffers of each size is determined during hypervisor configuration + (see the <em>System Programmer's Guide</em> for details); this just + influences how much host memory is allocated for those buffers. + + The buffer pool is allocated from common memory, which will be + automatically initialized if needed. If your buffer pool is larger + than 240 MB, you might need to explicitly call @c tmc_cmem_init(), + as described in the Application Libraries Reference Manual (UG227). + + Packet buffers are currently allocated in chunks of 16 MB; this + value will be rounded up to the next larger multiple of 16 MB. + If this value is zero, a default of 32 MB will be used; this was + the value used by previous versions of NetIO. Note that taking this + default also affects the placement of buffers on Linux NUMA nodes. + See @ref buffer_node_weights for an explanation of buffer placement. + + In order to successfully allocate packet buffers, Linux must have + available huge pages on the relevant Linux NUMA nodes. See the + <em>System Programmer's Guide</em> for information on configuring + huge page support in Linux. + */ + uint64_t total_buffer_size; + + /** Buffer placement weighting factors. + + This array specifies the relative amount of buffering to place + on each of the available Linux NUMA nodes. This array is + indexed by the NUMA node, and the values in the array are + proportional to the amount of buffer space to allocate on that + node. + + If memory striping is enabled in the Hypervisor, then there is + only one logical NUMA node (node 0). In that case, NetIO will by + default ignore the suggested buffer node weights, and buffers + will be striped across the physical memory controllers. See + UG209 System Programmer's Guide for a description of the + hypervisor option that controls memory striping. + + If memory striping is disabled, then there are up to four NUMA + nodes, corresponding to the four DDRAM controllers in the TILE + processor architecture. See UG100 Tile Processor Architecture + Overview for a diagram showing the location of each of the DDRAM + controllers relative to the tile array. + + For instance, if memory striping is disabled, the following + configuration strucure: + + @code + netio_input_config_t config = { + . + . + . + .total_buffer_size = 4 * 16 * 1024 * 1024; + .buffer_node_weights = { 1, 0, 1, 0 }, + }, + @endcode + + would result in 32 MB of buffers being placed on controller 0, and + 32 MB on controller 2. (Since buffers are allocated in units of + 16 MB, some sets of weights will not be able to be matched exactly.) + + For the weights to be effective, @ref total_buffer_size must be + nonzero. If @ref total_buffer_size is zero, causing the default + 32 MB of buffer space to be used, then any specified weights will + be ignored, and buffers will positioned as they were in previous + versions of NetIO: + + - For xgbe/0 and gbe/0, 16 MB of buffers will be placed on controller 1, + and the other 16 MB will be placed on controller 2. + + - For xgbe/1 and gbe/1, 16 MB of buffers will be placed on controller 2, + and the other 16 MB will be placed on controller 3. + + If @ref total_buffer_size is nonzero, but all weights are zero, + then all buffer space will be allocated on Linux NUMA node zero. + + By default, the specified buffer placement is treated as a hint; + if sufficient free memory is not available on the specified + controllers, the buffers will be allocated elsewhere. However, + if the ::NETIO_STRICT_HOMING flag is specified in @ref flags, then a + failure to allocate buffer space exactly as requested will cause the + registration operation to fail with an error of ::NETIO_CANNOT_HOME. + + Note that maximal network performance cannot be achieved with + only one memory controller. + */ + uint8_t buffer_node_weights[NETIO_NUM_NODE_WEIGHTS]; + + /** Fixed virtual address for packet buffers. Only valid when + ::NETIO_FIXED_BUFFER_VA is specified in @ref flags; see the + description of that flag for details. + */ + void* fixed_buffer_va; + + /** + Maximum number of outstanding send packet requests. This value is + only relevant when an EPP is in use; it determines the number of + slots in the EPP's outgoing packet queue which this tile is allowed + to consume, and thus the number of packets which may be sent before + the sending tile must wait for an acknowledgment from the EPP. + Modifying this value is generally only helpful when using @ref + netio_send_packet_vector(), where it can help improve performance by + allowing a single vector send operation to process more packets. + Typically it is not specified, and the default, which divides the + outgoing packet slots evenly between all tiles on the chip, is used. + + If a registration asks for more outgoing packet queue slots than are + available, ::NETIO_TOOMANY_XMIT will be returned. The total number + of packet queue slots which are available for all tiles for each EPP + is subject to change, but is currently ::NETIO_TOTAL_SENDS_OUTSTANDING. + + + This value is ignored if ::NETIO_XMIT is not specified in flags. + If you want to specify a large value here for a specific tile, you are + advised to specify NETIO_NO_XMIT on other, non-transmitting tiles so + that they do not consume a default number of packet slots. Any tile + transmitting is required to have at least ::NETIO_MIN_SENDS_OUTSTANDING + slots allocated to it; values less than that will be silently + increased by the NetIO library. + */ + int num_sends_outstanding; +} +netio_input_config_t; + + +/** Registration flags; used in the @ref netio_input_config_t structure. + * @addtogroup setup + */ +/** @{ */ + +/** Fail a registration request if we can't put packet buffers + on the specified memory controllers. */ +#define NETIO_STRICT_HOMING 0x00000002 + +/** This application expects no tags on its L2 headers. */ +#define NETIO_TAG_NONE 0x00000004 + +/** This application expects Marvell extended tags on its L2 headers. */ +#define NETIO_TAG_MRVL 0x00000008 + +/** This application expects Broadcom tags on its L2 headers. */ +#define NETIO_TAG_BRCM 0x00000010 + +/** This registration may call routines which receive packets. */ +#define NETIO_RECV 0x00000020 + +/** This registration may not call routines which receive packets. */ +#define NETIO_NO_RECV 0x00000040 + +/** This registration may call routines which transmit packets. */ +#define NETIO_XMIT 0x00000080 + +/** This registration may call routines which transmit packets with + checksum acceleration. */ +#define NETIO_XMIT_CSUM 0x00000100 + +/** This registration may not call routines which transmit packets. */ +#define NETIO_NO_XMIT 0x00000200 + +/** This registration wants NetIO buffers mapped at an application-specified + virtual address. + + NetIO buffers are by default created by the TMC common memory facility, + which must be configured by a common ancestor of all processes sharing + a network interface. When this flag is specified, NetIO buffers are + instead mapped at an address chosen by the application (and specified + in @ref netio_input_config_t::fixed_buffer_va). This allows multiple + unrelated but cooperating processes to share a NetIO interface. + All processes sharing the same interface must specify this flag, + and all must specify the same fixed virtual address. + + @ref netio_input_config_t::fixed_buffer_va must be a + multiple of 16 MB, and the packet buffers will occupy @ref + netio_input_config_t::total_buffer_size bytes of virtual address + space, beginning at that address. If any of those virtual addresses + are currently occupied by other memory objects, like application or + shared library code or data, @ref netio_input_register() will return + ::NETIO_FAULT. While it is impossible to provide a fixed_buffer_va + which will work for all applications, a good first guess might be to + use 0xb0000000 minus @ref netio_input_config_t::total_buffer_size. + If that fails, it might be helpful to consult the running application's + virtual address description file (/proc/<em>pid</em>/maps) to see + which regions of virtual address space are available. + */ +#define NETIO_FIXED_BUFFER_VA 0x00000400 + +/** This registration call will not complete unless the network link + is up. The process will wait several seconds for this to happen (the + precise interval is link-dependent), but if the link does not come up, + ::NETIO_LINK_DOWN will be returned. This flag is the default if + ::NETIO_NOREQUIRE_LINK_UP is not specified. Note that this flag by + itself does not request that the link be brought up; that can be done + with the ::NETIO_AUTO_LINK_UPDN or ::NETIO_AUTO_LINK_UP flags (the + latter is the default if no NETIO_AUTO_LINK_xxx flags are specified), + or by explicitly setting the link's desired state via netio_set(). + If the link is not brought up by one of those methods, and this flag + is specified, the registration operation will return ::NETIO_LINK_DOWN. + This flag is ignored if it is specified along with ::NETIO_NO_XMIT and + ::NETIO_NO_RECV. See @ref link for more information on link + management. + */ +#define NETIO_REQUIRE_LINK_UP 0x00000800 + +/** This registration call will complete even if the network link is not up. + Whenever the link is not up, packets will not be sent or received: + netio_get_packet() will return ::NETIO_NOPKT once all queued packets + have been drained, and netio_send_packet() and similar routines will + return NETIO_QUEUE_FULL once the outgoing packet queue in the EPP + or the I/O shim is full. See @ref link for more information on link + management. + */ +#define NETIO_NOREQUIRE_LINK_UP 0x00001000 + +#ifndef __DOXYGEN__ +/* + * These are part of the implementation of the NETIO_AUTO_LINK_xxx flags, + * but should not be used directly by applications, and are thus not + * documented. + */ +#define _NETIO_AUTO_UP 0x00002000 +#define _NETIO_AUTO_DN 0x00004000 +#define _NETIO_AUTO_PRESENT 0x00008000 +#endif + +/** Set the desired state of the link to up, allowing any speeds which are + supported by the link hardware, as part of this registration operation. + Do not take down the link automatically. This is the default if + no other NETIO_AUTO_LINK_xxx flags are specified. This flag is ignored + if it is specified along with ::NETIO_NO_XMIT and ::NETIO_NO_RECV. + See @ref link for more information on link management. + */ +#define NETIO_AUTO_LINK_UP (_NETIO_AUTO_PRESENT | _NETIO_AUTO_UP) + +/** Set the desired state of the link to up, allowing any speeds which are + supported by the link hardware, as part of this registration operation. + Set the desired state of the link to down the next time no tiles are + registered for packet reception or transmission. This flag is ignored + if it is specified along with ::NETIO_NO_XMIT and ::NETIO_NO_RECV. + See @ref link for more information on link management. + */ +#define NETIO_AUTO_LINK_UPDN (_NETIO_AUTO_PRESENT | _NETIO_AUTO_UP | \ + _NETIO_AUTO_DN) + +/** Set the desired state of the link to down the next time no tiles are + registered for packet reception or transmission. This flag is ignored + if it is specified along with ::NETIO_NO_XMIT and ::NETIO_NO_RECV. + See @ref link for more information on link management. + */ +#define NETIO_AUTO_LINK_DN (_NETIO_AUTO_PRESENT | _NETIO_AUTO_DN) + +/** Do not bring up the link automatically as part of this registration + operation. Do not take down the link automatically. This flag + is ignored if it is specified along with ::NETIO_NO_XMIT and + ::NETIO_NO_RECV. See @ref link for more information on link management. + */ +#define NETIO_AUTO_LINK_NONE _NETIO_AUTO_PRESENT + + +/** Minimum number of receive packets. */ +#define NETIO_MIN_RECEIVE_PKTS 16 + +/** Lower bound on the maximum number of receive packets; may be higher + than this on some interfaces. */ +#define NETIO_MAX_RECEIVE_PKTS 128 + +/** Maximum number of send buffers, per packet size. */ +#define NETIO_MAX_SEND_BUFFERS 16 + +/** Number of EPP queue slots, and thus outstanding sends, per EPP. */ +#define NETIO_TOTAL_SENDS_OUTSTANDING 2015 + +/** Minimum number of EPP queue slots, and thus outstanding sends, per + * transmitting tile. */ +#define NETIO_MIN_SENDS_OUTSTANDING 16 + + +/**@}*/ + +#ifndef __DOXYGEN__ + +/** + * An object for providing Ethernet packets to a process. + */ +struct __netio_queue_impl_t; + +/** + * An object for managing the user end of a NetIO queue. + */ +struct __netio_queue_user_impl_t; + +#endif /* !__DOXYGEN__ */ + + +/** A netio_queue_t describes a NetIO communications endpoint. + * @ingroup setup + */ +typedef struct +{ +#ifdef __DOXYGEN__ + uint8_t opaque[8]; /**< This is an opaque structure. */ +#else + struct __netio_queue_impl_t* __system_part; /**< The system part. */ + struct __netio_queue_user_impl_t* __user_part; /**< The user part. */ +#ifdef _NETIO_PTHREAD + _netio_percpu_mutex_t lock; /**< Queue lock. */ +#endif +#endif +} +netio_queue_t; + + +/** + * @brief Packet send context. + * + * @ingroup egress + * + * Packet send context for use with netio_send_packet_prepare and _commit. + */ +typedef struct +{ +#ifdef __DOXYGEN__ + uint8_t opaque[44]; /**< This is an opaque structure. */ +#else + uint8_t flags; /**< Defined below */ + uint8_t datalen; /**< Number of valid words pointed to by data. */ + uint32_t request[9]; /**< Request to be sent to the EPP or shim. Note + that this is smaller than the 11-word maximum + request size, since some constant values are + not saved in the context. */ + uint32_t *data; /**< Data to be sent to the EPP or shim via IDN. */ +#endif +} +netio_send_pkt_context_t; + + +#ifndef __DOXYGEN__ +#define SEND_PKT_CTX_USE_EPP 1 /**< We're sending to an EPP. */ +#define SEND_PKT_CTX_SEND_CSUM 2 /**< Request includes a checksum. */ +#endif + +/** + * @brief Packet vector entry. + * + * @ingroup egress + * + * This data structure is used with netio_send_packet_vector() to send multiple + * packets with one NetIO call. The structure should be initialized by + * calling netio_pkt_vector_set(), rather than by setting the fields + * directly. + * + * This structure is guaranteed to be a power of two in size, no + * bigger than one L2 cache line, and to be aligned modulo its size. + */ +typedef struct +#ifndef __DOXYGEN__ +__attribute__((aligned(8))) +#endif +{ + /** Reserved for use by the user application. When initialized with + * the netio_set_pkt_vector_entry() function, this field is guaranteed + * to be visible to readers only after all other fields are already + * visible. This way it can be used as a valid flag or generation + * counter. */ + uint8_t user_data; + + /* Structure members below this point should not be accessed directly by + * applications, as they may change in the future. */ + + /** Low 8 bits of the packet address to send. The high bits are + * acquired from the 'handle' field. */ + uint8_t buffer_address_low; + + /** Number of bytes to transmit. */ + uint16_t size; + + /** The raw handle from a netio_pkt_t. If this is NETIO_PKT_HANDLE_NONE, + * this vector entry will be skipped and no packet will be transmitted. */ + netio_pkt_handle_t handle; +} +netio_pkt_vector_entry_t; + + +/** + * @brief Initialize fields in a packet vector entry. + * + * @ingroup egress + * + * @param[out] v Pointer to the vector entry to be initialized. + * @param[in] pkt Packet to be transmitted when the vector entry is passed to + * netio_send_packet_vector(). Note that the packet's attributes + * (e.g., its L2 offset and length) are captured at the time this + * routine is called; subsequent changes in those attributes will not + * be reflected in the packet which is actually transmitted. + * Changes in the packet's contents, however, will be so reflected. + * If this is NULL, no packet will be transmitted. + * @param[in] user_data User data to be set in the vector entry. + * This function guarantees that the "user_data" field will become + * visible to a reader only after all other fields have become visible. + * This allows a structure in a ring buffer to be written and read + * by a polling reader without any locks or other synchronization. + */ +static __inline void +netio_pkt_vector_set(volatile netio_pkt_vector_entry_t* v, netio_pkt_t* pkt, + uint8_t user_data) +{ + if (pkt) + { + if (NETIO_PKT_IS_MINIMAL(pkt)) + { + netio_pkt_minimal_metadata_t* mmd = + (netio_pkt_minimal_metadata_t*) &pkt->__metadata; + v->buffer_address_low = (uintptr_t) NETIO_PKT_L2_DATA_MM(mmd, pkt) & 0xFF; + v->size = NETIO_PKT_L2_LENGTH_MM(mmd, pkt); + } + else + { + netio_pkt_metadata_t* mda = &pkt->__metadata; + v->buffer_address_low = (uintptr_t) NETIO_PKT_L2_DATA_M(mda, pkt) & 0xFF; + v->size = NETIO_PKT_L2_LENGTH_M(mda, pkt); + } + v->handle.word = pkt->__packet.word; + } + else + { + v->handle.word = 0; /* Set handle to NETIO_PKT_HANDLE_NONE. */ + } + + __asm__("" : : : "memory"); + + v->user_data = user_data; +} + + +/** + * Flags and structures for @ref netio_get() and @ref netio_set(). + * @ingroup config + */ + +/** @{ */ +/** Parameter class; addr is a NETIO_PARAM_xxx value. */ +#define NETIO_PARAM 0 +/** Interface MAC address. This address is only valid with @ref netio_get(). + * The value is a 6-byte MAC address. Depending upon the overall system + * design, a MAC address may or may not be available for each interface. */ +#define NETIO_PARAM_MAC 0 + +/** Determine whether to suspend output on the receipt of pause frames. + * If the value is nonzero, the I/O shim will suspend output when a pause + * frame is received. If the value is zero, pause frames will be ignored. */ +#define NETIO_PARAM_PAUSE_IN 1 + +/** Determine whether to send pause frames if the I/O shim packet FIFOs are + * nearly full. If the value is zero, pause frames are not sent. If + * the value is nonzero, it is the delay value which will be sent in any + * pause frames which are output, in units of 512 bit times. */ +#define NETIO_PARAM_PAUSE_OUT 2 + +/** Jumbo frame support. The value is a 4-byte integer. If the value is + * nonzero, the MAC will accept frames of up to 10240 bytes. If the value + * is zero, the MAC will only accept frames of up to 1544 bytes. */ +#define NETIO_PARAM_JUMBO 3 + +/** I/O shim's overflow statistics register. The value is two 16-bit integers. + * The first 16-bit value (or the low 16 bits, if the value is treated as a + * 32-bit number) is the count of packets which were completely dropped and + * not delivered by the shim. The second 16-bit value (or the high 16 bits, + * if the value is treated as a 32-bit number) is the count of packets + * which were truncated and thus only partially delivered by the shim. This + * register is automatically reset to zero after it has been read. + */ +#define NETIO_PARAM_OVERFLOW 4 + +/** IPP statistics. This address is only valid with @ref netio_get(). The + * value is a netio_stat_t structure. Unlike the I/O shim statistics, the + * IPP statistics are not all reset to zero on read; see the description + * of the netio_stat_t for details. */ +#define NETIO_PARAM_STAT 5 + +/** Possible link state. The value is a combination of "NETIO_LINK_xxx" + * flags. With @ref netio_get(), this will indicate which flags are + * actually supported by the hardware. + * + * For historical reasons, specifying this value to netio_set() will have + * the same behavior as using ::NETIO_PARAM_LINK_CONFIG, but this usage is + * discouraged. + */ +#define NETIO_PARAM_LINK_POSSIBLE_STATE 6 + +/** Link configuration. The value is a combination of "NETIO_LINK_xxx" flags. + * With @ref netio_set(), this will attempt to immediately bring up the + * link using whichever of the requested flags are supported by the + * hardware, or take down the link if the flags are zero; if this is + * not possible, an error will be returned. Many programs will want + * to use ::NETIO_PARAM_LINK_DESIRED_STATE instead. + * + * For historical reasons, specifying this value to netio_get() will + * have the same behavior as using ::NETIO_PARAM_LINK_POSSIBLE_STATE, + * but this usage is discouraged. + */ +#define NETIO_PARAM_LINK_CONFIG NETIO_PARAM_LINK_POSSIBLE_STATE + +/** Current link state. This address is only valid with @ref netio_get(). + * The value is zero or more of the "NETIO_LINK_xxx" flags, ORed together. + * If the link is down, the value ANDed with NETIO_LINK_SPEED will be + * zero; if the link is up, the value ANDed with NETIO_LINK_SPEED will + * result in exactly one of the NETIO_LINK_xxx values, indicating the + * current speed. */ +#define NETIO_PARAM_LINK_CURRENT_STATE 7 + +/** Variant symbol for current state, retained for compatibility with + * pre-MDE-2.1 programs. */ +#define NETIO_PARAM_LINK_STATUS NETIO_PARAM_LINK_CURRENT_STATE + +/** Packet Coherence protocol. This address is only valid with @ref netio_get(). + * The value is nonzero if the interface is configured for cache-coherent DMA. + */ +#define NETIO_PARAM_COHERENT 8 + +/** Desired link state. The value is a conbination of "NETIO_LINK_xxx" + * flags, which specify the desired state for the link. With @ref + * netio_set(), this will, in the background, attempt to bring up the link + * using whichever of the requested flags are reasonable, or take down the + * link if the flags are zero. The actual link up or down operation may + * happen after this call completes. If the link state changes in the + * future, the system will continue to try to get back to the desired link + * state; for instance, if the link is brought up successfully, and then + * the network cable is disconnected, the link will go down. However, the + * desired state of the link is still up, so if the cable is reconnected, + * the link will be brought up again. + * + * With @ref netio_get(), this will indicate the desired state for the + * link, as set with a previous netio_set() call, or implicitly by a + * netio_input_register() or netio_input_unregister() operation. This may + * not reflect the current state of the link; to get that, use + * ::NETIO_PARAM_LINK_CURRENT_STATE. */ +#define NETIO_PARAM_LINK_DESIRED_STATE 9 + +/** NetIO statistics structure. Retrieved using the ::NETIO_PARAM_STAT + * address passed to @ref netio_get(). */ +typedef struct +{ + /** Number of packets which have been received by the IPP and forwarded + * to a tile's receive queue for processing. This value wraps at its + * maximum, and is not cleared upon read. */ + uint32_t packets_received; + + /** Number of packets which have been dropped by the IPP, because they could + * not be received, or could not be forwarded to a tile. The former happens + * when the IPP does not have a free packet buffer of suitable size for an + * incoming frame. The latter happens when all potential destination tiles + * for a packet, as defined by the group, bucket, and queue configuration, + * have full receive queues. This value wraps at its maximum, and is not + * cleared upon read. */ + uint32_t packets_dropped; + + /* + * Note: the #defines after each of the following four one-byte values + * denote their location within the third word of the netio_stat_t. They + * are intended for use only by the IPP implementation and are thus omitted + * from the Doxygen output. + */ + + /** Number of packets dropped because no worker was able to accept a new + * packet. This value saturates at its maximum, and is cleared upon + * read. */ + uint8_t drops_no_worker; +#ifndef __DOXYGEN__ +#define NETIO_STAT_DROPS_NO_WORKER 0 +#endif + + /** Number of packets dropped because no small buffers were available. + * This value saturates at its maximum, and is cleared upon read. */ + uint8_t drops_no_smallbuf; +#ifndef __DOXYGEN__ +#define NETIO_STAT_DROPS_NO_SMALLBUF 1 +#endif + + /** Number of packets dropped because no large buffers were available. + * This value saturates at its maximum, and is cleared upon read. */ + uint8_t drops_no_largebuf; +#ifndef __DOXYGEN__ +#define NETIO_STAT_DROPS_NO_LARGEBUF 2 +#endif + + /** Number of packets dropped because no jumbo buffers were available. + * This value saturates at its maximum, and is cleared upon read. */ + uint8_t drops_no_jumbobuf; +#ifndef __DOXYGEN__ +#define NETIO_STAT_DROPS_NO_JUMBOBUF 3 +#endif +} +netio_stat_t; + + +/** Link can run, should run, or is running at 10 Mbps. */ +#define NETIO_LINK_10M 0x01 + +/** Link can run, should run, or is running at 100 Mbps. */ +#define NETIO_LINK_100M 0x02 + +/** Link can run, should run, or is running at 1 Gbps. */ +#define NETIO_LINK_1G 0x04 + +/** Link can run, should run, or is running at 10 Gbps. */ +#define NETIO_LINK_10G 0x08 + +/** Link should run at the highest speed supported by the link and by + * the device connected to the link. Only usable as a value for + * the link's desired state; never returned as a value for the current + * or possible states. */ +#define NETIO_LINK_ANYSPEED 0x10 + +/** All legal link speeds. */ +#define NETIO_LINK_SPEED (NETIO_LINK_10M | \ + NETIO_LINK_100M | \ + NETIO_LINK_1G | \ + NETIO_LINK_10G | \ + NETIO_LINK_ANYSPEED) + + +/** MAC register class. Addr is a register offset within the MAC. + * Registers within the XGbE and GbE MACs are documented in the Tile + * Processor I/O Device Guide (UG104). MAC registers start at address + * 0x4000, and do not include the MAC_INTERFACE registers. */ +#define NETIO_MAC 1 + +/** MDIO register class (IEEE 802.3 clause 22 format). Addr is the "addr" + * member of a netio_mdio_addr_t structure. */ +#define NETIO_MDIO 2 + +/** MDIO register class (IEEE 802.3 clause 45 format). Addr is the "addr" + * member of a netio_mdio_addr_t structure. */ +#define NETIO_MDIO_CLAUSE45 3 + +/** NetIO MDIO address type. Retrieved or provided using the ::NETIO_MDIO + * address passed to @ref netio_get() or @ref netio_set(). */ +typedef union +{ + struct + { + unsigned int reg:16; /**< MDIO register offset. For clause 22 access, + must be less than 32. */ + unsigned int phy:5; /**< Which MDIO PHY to access. */ + unsigned int dev:5; /**< Which MDIO device to access within that PHY. + Applicable for clause 45 access only; ignored + for clause 22 access. */ + } + bits; /**< Container for bitfields. */ + uint64_t addr; /**< Value to pass to @ref netio_get() or + * @ref netio_set(). */ +} +netio_mdio_addr_t; + +/** @} */ + +#endif /* __NETIO_INTF_H__ */ diff --git a/arch/tile/include/hv/syscall_public.h b/arch/tile/include/hv/syscall_public.h new file mode 100644 index 00000000..9cc0837e --- /dev/null +++ b/arch/tile/include/hv/syscall_public.h @@ -0,0 +1,42 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/** + * @file syscall.h + * Indices for the hypervisor system calls that are intended to be called + * directly, rather than only through hypervisor-generated "glue" code. + */ + +#ifndef _SYS_HV_INCLUDE_SYSCALL_PUBLIC_H +#define _SYS_HV_INCLUDE_SYSCALL_PUBLIC_H + +/** Fast syscall flag bit location. When this bit is set, the hypervisor + * handles the syscall specially. + */ +#define HV_SYS_FAST_SHIFT 14 + +/** Fast syscall flag bit mask. */ +#define HV_SYS_FAST_MASK (1 << HV_SYS_FAST_SHIFT) + +/** Bit location for flagging fast syscalls that can be called from PL0. */ +#define HV_SYS_FAST_PLO_SHIFT 13 + +/** Fast syscall allowing PL0 bit mask. */ +#define HV_SYS_FAST_PL0_MASK (1 << HV_SYS_FAST_PLO_SHIFT) + +/** Perform an MF that waits for all victims to reach DRAM. */ +#define HV_SYS_fence_incoherent (51 | HV_SYS_FAST_MASK \ + | HV_SYS_FAST_PL0_MASK) + +#endif /* !_SYS_HV_INCLUDE_SYSCALL_PUBLIC_H */ diff --git a/arch/tile/kernel/Makefile b/arch/tile/kernel/Makefile new file mode 100644 index 00000000..b4dbc057 --- /dev/null +++ b/arch/tile/kernel/Makefile @@ -0,0 +1,18 @@ +# +# Makefile for the Linux/TILE kernel. +# + +extra-y := vmlinux.lds head_$(BITS).o +obj-y := backtrace.o entry.o init_task.o irq.o messaging.o \ + pci-dma.o proc.o process.o ptrace.o reboot.o \ + setup.o signal.o single_step.o stack.o sys.o sysfs.o time.o traps.o \ + intvec_$(BITS).o regs_$(BITS).o tile-desc_$(BITS).o + +obj-$(CONFIG_HARDWALL) += hardwall.o +obj-$(CONFIG_TILEGX) += futex_64.o +obj-$(CONFIG_COMPAT) += compat.o compat_signal.o +obj-$(CONFIG_SMP) += smpboot.o smp.o tlb.o +obj-$(CONFIG_MODULES) += module.o +obj-$(CONFIG_EARLY_PRINTK) += early_printk.o +obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o +obj-$(CONFIG_PCI) += pci.o diff --git a/arch/tile/kernel/asm-offsets.c b/arch/tile/kernel/asm-offsets.c new file mode 100644 index 00000000..01ddf19c --- /dev/null +++ b/arch/tile/kernel/asm-offsets.c @@ -0,0 +1,76 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * Generates definitions from c-type structures used by assembly sources. + */ + +#include <linux/kbuild.h> +#include <linux/thread_info.h> +#include <linux/sched.h> +#include <linux/hardirq.h> +#include <linux/ptrace.h> +#include <hv/hypervisor.h> + +/* Check for compatible compiler early in the build. */ +#ifdef CONFIG_TILEGX +# ifndef __tilegx__ +# error Can only build TILE-Gx configurations with tilegx compiler +# endif +# ifndef __LP64__ +# error Must not specify -m32 when building the TILE-Gx kernel +# endif +#else +# ifdef __tilegx__ +# error Can not build TILEPro/TILE64 configurations with tilegx compiler +# endif +#endif + +void foo(void) +{ + DEFINE(SINGLESTEP_STATE_BUFFER_OFFSET, \ + offsetof(struct single_step_state, buffer)); + DEFINE(SINGLESTEP_STATE_FLAGS_OFFSET, \ + offsetof(struct single_step_state, flags)); + DEFINE(SINGLESTEP_STATE_ORIG_PC_OFFSET, \ + offsetof(struct single_step_state, orig_pc)); + DEFINE(SINGLESTEP_STATE_NEXT_PC_OFFSET, \ + offsetof(struct single_step_state, next_pc)); + DEFINE(SINGLESTEP_STATE_BRANCH_NEXT_PC_OFFSET, \ + offsetof(struct single_step_state, branch_next_pc)); + DEFINE(SINGLESTEP_STATE_UPDATE_VALUE_OFFSET, \ + offsetof(struct single_step_state, update_value)); + + DEFINE(THREAD_INFO_TASK_OFFSET, \ + offsetof(struct thread_info, task)); + DEFINE(THREAD_INFO_FLAGS_OFFSET, \ + offsetof(struct thread_info, flags)); + DEFINE(THREAD_INFO_STATUS_OFFSET, \ + offsetof(struct thread_info, status)); + DEFINE(THREAD_INFO_HOMECACHE_CPU_OFFSET, \ + offsetof(struct thread_info, homecache_cpu)); + DEFINE(THREAD_INFO_STEP_STATE_OFFSET, \ + offsetof(struct thread_info, step_state)); + + DEFINE(TASK_STRUCT_THREAD_KSP_OFFSET, + offsetof(struct task_struct, thread.ksp)); + DEFINE(TASK_STRUCT_THREAD_PC_OFFSET, + offsetof(struct task_struct, thread.pc)); + + DEFINE(HV_TOPOLOGY_WIDTH_OFFSET, \ + offsetof(HV_Topology, width)); + DEFINE(HV_TOPOLOGY_HEIGHT_OFFSET, \ + offsetof(HV_Topology, height)); + + DEFINE(IRQ_CPUSTAT_SYSCALL_COUNT_OFFSET, \ + offsetof(irq_cpustat_t, irq_syscall_count)); +} diff --git a/arch/tile/kernel/backtrace.c b/arch/tile/kernel/backtrace.c new file mode 100644 index 00000000..9092ce8a --- /dev/null +++ b/arch/tile/kernel/backtrace.c @@ -0,0 +1,678 @@ +/* + * Copyright 2011 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#include <linux/kernel.h> +#include <linux/string.h> +#include <asm/backtrace.h> +#include <asm/tile-desc.h> +#include <arch/abi.h> + +#ifdef __tilegx__ +#define TILE_MAX_INSTRUCTIONS_PER_BUNDLE TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE +#define tile_decoded_instruction tilegx_decoded_instruction +#define tile_mnemonic tilegx_mnemonic +#define parse_insn_tile parse_insn_tilegx +#define TILE_OPC_IRET TILEGX_OPC_IRET +#define TILE_OPC_ADDI TILEGX_OPC_ADDI +#define TILE_OPC_ADDLI TILEGX_OPC_ADDLI +#define TILE_OPC_INFO TILEGX_OPC_INFO +#define TILE_OPC_INFOL TILEGX_OPC_INFOL +#define TILE_OPC_JRP TILEGX_OPC_JRP +#define TILE_OPC_MOVE TILEGX_OPC_MOVE +#define OPCODE_STORE TILEGX_OPC_ST +typedef long long bt_int_reg_t; +#else +#define TILE_MAX_INSTRUCTIONS_PER_BUNDLE TILEPRO_MAX_INSTRUCTIONS_PER_BUNDLE +#define tile_decoded_instruction tilepro_decoded_instruction +#define tile_mnemonic tilepro_mnemonic +#define parse_insn_tile parse_insn_tilepro +#define TILE_OPC_IRET TILEPRO_OPC_IRET +#define TILE_OPC_ADDI TILEPRO_OPC_ADDI +#define TILE_OPC_ADDLI TILEPRO_OPC_ADDLI +#define TILE_OPC_INFO TILEPRO_OPC_INFO +#define TILE_OPC_INFOL TILEPRO_OPC_INFOL +#define TILE_OPC_JRP TILEPRO_OPC_JRP +#define TILE_OPC_MOVE TILEPRO_OPC_MOVE +#define OPCODE_STORE TILEPRO_OPC_SW +typedef int bt_int_reg_t; +#endif + +/* A decoded bundle used for backtracer analysis. */ +struct BacktraceBundle { + tile_bundle_bits bits; + int num_insns; + struct tile_decoded_instruction + insns[TILE_MAX_INSTRUCTIONS_PER_BUNDLE]; +}; + + +/* Locates an instruction inside the given bundle that + * has the specified mnemonic, and whose first 'num_operands_to_match' + * operands exactly match those in 'operand_values'. + */ +static const struct tile_decoded_instruction *find_matching_insn( + const struct BacktraceBundle *bundle, + tile_mnemonic mnemonic, + const int *operand_values, + int num_operands_to_match) +{ + int i, j; + bool match; + + for (i = 0; i < bundle->num_insns; i++) { + const struct tile_decoded_instruction *insn = + &bundle->insns[i]; + + if (insn->opcode->mnemonic != mnemonic) + continue; + + match = true; + for (j = 0; j < num_operands_to_match; j++) { + if (operand_values[j] != insn->operand_values[j]) { + match = false; + break; + } + } + + if (match) + return insn; + } + + return NULL; +} + +/* Does this bundle contain an 'iret' instruction? */ +static inline bool bt_has_iret(const struct BacktraceBundle *bundle) +{ + return find_matching_insn(bundle, TILE_OPC_IRET, NULL, 0) != NULL; +} + +/* Does this bundle contain an 'addi sp, sp, OFFSET' or + * 'addli sp, sp, OFFSET' instruction, and if so, what is OFFSET? + */ +static bool bt_has_addi_sp(const struct BacktraceBundle *bundle, int *adjust) +{ + static const int vals[2] = { TREG_SP, TREG_SP }; + + const struct tile_decoded_instruction *insn = + find_matching_insn(bundle, TILE_OPC_ADDI, vals, 2); + if (insn == NULL) + insn = find_matching_insn(bundle, TILE_OPC_ADDLI, vals, 2); +#ifdef __tilegx__ + if (insn == NULL) + insn = find_matching_insn(bundle, TILEGX_OPC_ADDXLI, vals, 2); + if (insn == NULL) + insn = find_matching_insn(bundle, TILEGX_OPC_ADDXI, vals, 2); +#endif + if (insn == NULL) + return false; + + *adjust = insn->operand_values[2]; + return true; +} + +/* Does this bundle contain any 'info OP' or 'infol OP' + * instruction, and if so, what are their OP? Note that OP is interpreted + * as an unsigned value by this code since that's what the caller wants. + * Returns the number of info ops found. + */ +static int bt_get_info_ops(const struct BacktraceBundle *bundle, + int operands[MAX_INFO_OPS_PER_BUNDLE]) +{ + int num_ops = 0; + int i; + + for (i = 0; i < bundle->num_insns; i++) { + const struct tile_decoded_instruction *insn = + &bundle->insns[i]; + + if (insn->opcode->mnemonic == TILE_OPC_INFO || + insn->opcode->mnemonic == TILE_OPC_INFOL) { + operands[num_ops++] = insn->operand_values[0]; + } + } + + return num_ops; +} + +/* Does this bundle contain a jrp instruction, and if so, to which + * register is it jumping? + */ +static bool bt_has_jrp(const struct BacktraceBundle *bundle, int *target_reg) +{ + const struct tile_decoded_instruction *insn = + find_matching_insn(bundle, TILE_OPC_JRP, NULL, 0); + if (insn == NULL) + return false; + + *target_reg = insn->operand_values[0]; + return true; +} + +/* Does this bundle modify the specified register in any way? */ +static bool bt_modifies_reg(const struct BacktraceBundle *bundle, int reg) +{ + int i, j; + for (i = 0; i < bundle->num_insns; i++) { + const struct tile_decoded_instruction *insn = + &bundle->insns[i]; + + if (insn->opcode->implicitly_written_register == reg) + return true; + + for (j = 0; j < insn->opcode->num_operands; j++) + if (insn->operands[j]->is_dest_reg && + insn->operand_values[j] == reg) + return true; + } + + return false; +} + +/* Does this bundle modify sp? */ +static inline bool bt_modifies_sp(const struct BacktraceBundle *bundle) +{ + return bt_modifies_reg(bundle, TREG_SP); +} + +/* Does this bundle modify lr? */ +static inline bool bt_modifies_lr(const struct BacktraceBundle *bundle) +{ + return bt_modifies_reg(bundle, TREG_LR); +} + +/* Does this bundle contain the instruction 'move fp, sp'? */ +static inline bool bt_has_move_r52_sp(const struct BacktraceBundle *bundle) +{ + static const int vals[2] = { 52, TREG_SP }; + return find_matching_insn(bundle, TILE_OPC_MOVE, vals, 2) != NULL; +} + +/* Does this bundle contain a store of lr to sp? */ +static inline bool bt_has_sw_sp_lr(const struct BacktraceBundle *bundle) +{ + static const int vals[2] = { TREG_SP, TREG_LR }; + return find_matching_insn(bundle, OPCODE_STORE, vals, 2) != NULL; +} + +#ifdef __tilegx__ +/* Track moveli values placed into registers. */ +static inline void bt_update_moveli(const struct BacktraceBundle *bundle, + int moveli_args[]) +{ + int i; + for (i = 0; i < bundle->num_insns; i++) { + const struct tile_decoded_instruction *insn = + &bundle->insns[i]; + + if (insn->opcode->mnemonic == TILEGX_OPC_MOVELI) { + int reg = insn->operand_values[0]; + moveli_args[reg] = insn->operand_values[1]; + } + } +} + +/* Does this bundle contain an 'add sp, sp, reg' instruction + * from a register that we saw a moveli into, and if so, what + * is the value in the register? + */ +static bool bt_has_add_sp(const struct BacktraceBundle *bundle, int *adjust, + int moveli_args[]) +{ + static const int vals[2] = { TREG_SP, TREG_SP }; + + const struct tile_decoded_instruction *insn = + find_matching_insn(bundle, TILEGX_OPC_ADDX, vals, 2); + if (insn) { + int reg = insn->operand_values[2]; + if (moveli_args[reg]) { + *adjust = moveli_args[reg]; + return true; + } + } + return false; +} +#endif + +/* Locates the caller's PC and SP for a program starting at the + * given address. + */ +static void find_caller_pc_and_caller_sp(CallerLocation *location, + const unsigned long start_pc, + BacktraceMemoryReader read_memory_func, + void *read_memory_func_extra) +{ + /* Have we explicitly decided what the sp is, + * rather than just the default? + */ + bool sp_determined = false; + + /* Has any bundle seen so far modified lr? */ + bool lr_modified = false; + + /* Have we seen a move from sp to fp? */ + bool sp_moved_to_r52 = false; + + /* Have we seen a terminating bundle? */ + bool seen_terminating_bundle = false; + + /* Cut down on round-trip reading overhead by reading several + * bundles at a time. + */ + tile_bundle_bits prefetched_bundles[32]; + int num_bundles_prefetched = 0; + int next_bundle = 0; + unsigned long pc; + +#ifdef __tilegx__ + /* Naively try to track moveli values to support addx for -m32. */ + int moveli_args[TILEGX_NUM_REGISTERS] = { 0 }; +#endif + + /* Default to assuming that the caller's sp is the current sp. + * This is necessary to handle the case where we start backtracing + * right at the end of the epilog. + */ + location->sp_location = SP_LOC_OFFSET; + location->sp_offset = 0; + + /* Default to having no idea where the caller PC is. */ + location->pc_location = PC_LOC_UNKNOWN; + + /* Don't even try if the PC is not aligned. */ + if (start_pc % TILE_BUNDLE_ALIGNMENT_IN_BYTES != 0) + return; + + for (pc = start_pc;; pc += sizeof(tile_bundle_bits)) { + + struct BacktraceBundle bundle; + int num_info_ops, info_operands[MAX_INFO_OPS_PER_BUNDLE]; + int one_ago, jrp_reg; + bool has_jrp; + + if (next_bundle >= num_bundles_prefetched) { + /* Prefetch some bytes, but don't cross a page + * boundary since that might cause a read failure we + * don't care about if we only need the first few + * bytes. Note: we don't care what the actual page + * size is; using the minimum possible page size will + * prevent any problems. + */ + unsigned int bytes_to_prefetch = 4096 - (pc & 4095); + if (bytes_to_prefetch > sizeof prefetched_bundles) + bytes_to_prefetch = sizeof prefetched_bundles; + + if (!read_memory_func(prefetched_bundles, pc, + bytes_to_prefetch, + read_memory_func_extra)) { + if (pc == start_pc) { + /* The program probably called a bad + * address, such as a NULL pointer. + * So treat this as if we are at the + * start of the function prolog so the + * backtrace will show how we got here. + */ + location->pc_location = PC_LOC_IN_LR; + return; + } + + /* Unreadable address. Give up. */ + break; + } + + next_bundle = 0; + num_bundles_prefetched = + bytes_to_prefetch / sizeof(tile_bundle_bits); + } + + /* Decode the next bundle. */ + bundle.bits = prefetched_bundles[next_bundle++]; + bundle.num_insns = + parse_insn_tile(bundle.bits, pc, bundle.insns); + num_info_ops = bt_get_info_ops(&bundle, info_operands); + + /* First look at any one_ago info ops if they are interesting, + * since they should shadow any non-one-ago info ops. + */ + for (one_ago = (pc != start_pc) ? 1 : 0; + one_ago >= 0; one_ago--) { + int i; + for (i = 0; i < num_info_ops; i++) { + int info_operand = info_operands[i]; + if (info_operand < CALLER_UNKNOWN_BASE) { + /* Weird; reserved value, ignore it. */ + continue; + } + + /* Skip info ops which are not in the + * "one_ago" mode we want right now. + */ + if (((info_operand & ONE_BUNDLE_AGO_FLAG) != 0) + != (one_ago != 0)) + continue; + + /* Clear the flag to make later checking + * easier. */ + info_operand &= ~ONE_BUNDLE_AGO_FLAG; + + /* Default to looking at PC_IN_LR_FLAG. */ + if (info_operand & PC_IN_LR_FLAG) + location->pc_location = + PC_LOC_IN_LR; + else + location->pc_location = + PC_LOC_ON_STACK; + + switch (info_operand) { + case CALLER_UNKNOWN_BASE: + location->pc_location = PC_LOC_UNKNOWN; + location->sp_location = SP_LOC_UNKNOWN; + return; + + case CALLER_SP_IN_R52_BASE: + case CALLER_SP_IN_R52_BASE | PC_IN_LR_FLAG: + location->sp_location = SP_LOC_IN_R52; + return; + + default: + { + const unsigned int val = info_operand + - CALLER_SP_OFFSET_BASE; + const unsigned int sp_offset = + (val >> NUM_INFO_OP_FLAGS) * 8; + if (sp_offset < 32768) { + /* This is a properly encoded + * SP offset. */ + location->sp_location = + SP_LOC_OFFSET; + location->sp_offset = + sp_offset; + return; + } else { + /* This looked like an SP + * offset, but it's outside + * the legal range, so this + * must be an unrecognized + * info operand. Ignore it. + */ + } + } + break; + } + } + } + + if (seen_terminating_bundle) { + /* We saw a terminating bundle during the previous + * iteration, so we were only looking for an info op. + */ + break; + } + + if (bundle.bits == 0) { + /* Wacky terminating bundle. Stop looping, and hope + * we've already seen enough to find the caller. + */ + break; + } + + /* + * Try to determine caller's SP. + */ + + if (!sp_determined) { + int adjust; + if (bt_has_addi_sp(&bundle, &adjust) +#ifdef __tilegx__ + || bt_has_add_sp(&bundle, &adjust, moveli_args) +#endif + ) { + location->sp_location = SP_LOC_OFFSET; + + if (adjust <= 0) { + /* We are in prolog about to adjust + * SP. */ + location->sp_offset = 0; + } else { + /* We are in epilog restoring SP. */ + location->sp_offset = adjust; + } + + sp_determined = true; + } else { + if (bt_has_move_r52_sp(&bundle)) { + /* Maybe in prolog, creating an + * alloca-style frame. But maybe in + * the middle of a fixed-size frame + * clobbering r52 with SP. + */ + sp_moved_to_r52 = true; + } + + if (bt_modifies_sp(&bundle)) { + if (sp_moved_to_r52) { + /* We saw SP get saved into + * r52 earlier (or now), which + * must have been in the + * prolog, so we now know that + * SP is still holding the + * caller's sp value. + */ + location->sp_location = + SP_LOC_OFFSET; + location->sp_offset = 0; + } else { + /* Someone must have saved + * aside the caller's SP value + * into r52, so r52 holds the + * current value. + */ + location->sp_location = + SP_LOC_IN_R52; + } + sp_determined = true; + } + } + +#ifdef __tilegx__ + /* Track moveli arguments for -m32 mode. */ + bt_update_moveli(&bundle, moveli_args); +#endif + } + + if (bt_has_iret(&bundle)) { + /* This is a terminating bundle. */ + seen_terminating_bundle = true; + continue; + } + + /* + * Try to determine caller's PC. + */ + + jrp_reg = -1; + has_jrp = bt_has_jrp(&bundle, &jrp_reg); + if (has_jrp) + seen_terminating_bundle = true; + + if (location->pc_location == PC_LOC_UNKNOWN) { + if (has_jrp) { + if (jrp_reg == TREG_LR && !lr_modified) { + /* Looks like a leaf function, or else + * lr is already restored. */ + location->pc_location = + PC_LOC_IN_LR; + } else { + location->pc_location = + PC_LOC_ON_STACK; + } + } else if (bt_has_sw_sp_lr(&bundle)) { + /* In prolog, spilling initial lr to stack. */ + location->pc_location = PC_LOC_IN_LR; + } else if (bt_modifies_lr(&bundle)) { + lr_modified = true; + } + } + } +} + +/* Initializes a backtracer to start from the given location. + * + * If the frame pointer cannot be determined it is set to -1. + * + * state: The state to be filled in. + * read_memory_func: A callback that reads memory. + * read_memory_func_extra: An arbitrary argument to read_memory_func. + * pc: The current PC. + * lr: The current value of the 'lr' register. + * sp: The current value of the 'sp' register. + * r52: The current value of the 'r52' register. + */ +void backtrace_init(BacktraceIterator *state, + BacktraceMemoryReader read_memory_func, + void *read_memory_func_extra, + unsigned long pc, unsigned long lr, + unsigned long sp, unsigned long r52) +{ + CallerLocation location; + unsigned long fp, initial_frame_caller_pc; + + /* Find out where we are in the initial frame. */ + find_caller_pc_and_caller_sp(&location, pc, + read_memory_func, read_memory_func_extra); + + switch (location.sp_location) { + case SP_LOC_UNKNOWN: + /* Give up. */ + fp = -1; + break; + + case SP_LOC_IN_R52: + fp = r52; + break; + + case SP_LOC_OFFSET: + fp = sp + location.sp_offset; + break; + + default: + /* Give up. */ + fp = -1; + break; + } + + /* If the frame pointer is not aligned to the basic word size + * something terrible happened and we should mark it as invalid. + */ + if (fp % sizeof(bt_int_reg_t) != 0) + fp = -1; + + /* -1 means "don't know initial_frame_caller_pc". */ + initial_frame_caller_pc = -1; + + switch (location.pc_location) { + case PC_LOC_UNKNOWN: + /* Give up. */ + fp = -1; + break; + + case PC_LOC_IN_LR: + if (lr == 0 || lr % TILE_BUNDLE_ALIGNMENT_IN_BYTES != 0) { + /* Give up. */ + fp = -1; + } else { + initial_frame_caller_pc = lr; + } + break; + + case PC_LOC_ON_STACK: + /* Leave initial_frame_caller_pc as -1, + * meaning check the stack. + */ + break; + + default: + /* Give up. */ + fp = -1; + break; + } + + state->pc = pc; + state->sp = sp; + state->fp = fp; + state->initial_frame_caller_pc = initial_frame_caller_pc; + state->read_memory_func = read_memory_func; + state->read_memory_func_extra = read_memory_func_extra; +} + +/* Handle the case where the register holds more bits than the VA. */ +static bool valid_addr_reg(bt_int_reg_t reg) +{ + return ((unsigned long)reg == reg); +} + +/* Advances the backtracing state to the calling frame, returning + * true iff successful. + */ +bool backtrace_next(BacktraceIterator *state) +{ + unsigned long next_fp, next_pc; + bt_int_reg_t next_frame[2]; + + if (state->fp == -1) { + /* No parent frame. */ + return false; + } + + /* Try to read the frame linkage data chaining to the next function. */ + if (!state->read_memory_func(&next_frame, state->fp, sizeof next_frame, + state->read_memory_func_extra)) { + return false; + } + + next_fp = next_frame[1]; + if (!valid_addr_reg(next_frame[1]) || + next_fp % sizeof(bt_int_reg_t) != 0) { + /* Caller's frame pointer is suspect, so give up. */ + return false; + } + + if (state->initial_frame_caller_pc != -1) { + /* We must be in the initial stack frame and already know the + * caller PC. + */ + next_pc = state->initial_frame_caller_pc; + + /* Force reading stack next time, in case we were in the + * initial frame. We don't do this above just to paranoidly + * avoid changing the struct at all when we return false. + */ + state->initial_frame_caller_pc = -1; + } else { + /* Get the caller PC from the frame linkage area. */ + next_pc = next_frame[0]; + if (!valid_addr_reg(next_frame[0]) || next_pc == 0 || + next_pc % TILE_BUNDLE_ALIGNMENT_IN_BYTES != 0) { + /* The PC is suspect, so give up. */ + return false; + } + } + + /* Update state to become the caller's stack frame. */ + state->pc = next_pc; + state->sp = state->fp; + state->fp = next_fp; + + return true; +} diff --git a/arch/tile/kernel/compat.c b/arch/tile/kernel/compat.c new file mode 100644 index 00000000..d67459b9 --- /dev/null +++ b/arch/tile/kernel/compat.c @@ -0,0 +1,118 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/* Adjust unistd.h to provide 32-bit numbers and functions. */ +#define __SYSCALL_COMPAT + +#include <linux/compat.h> +#include <linux/syscalls.h> +#include <linux/kdev_t.h> +#include <linux/fs.h> +#include <linux/fcntl.h> +#include <linux/uaccess.h> +#include <linux/signal.h> +#include <asm/syscalls.h> + +/* + * Syscalls that take 64-bit numbers traditionally take them in 32-bit + * "high" and "low" value parts on 32-bit architectures. + * In principle, one could imagine passing some register arguments as + * fully 64-bit on TILE-Gx in 32-bit mode, but it seems easier to + * adapt the usual convention. + */ + +long compat_sys_truncate64(char __user *filename, u32 dummy, u32 low, u32 high) +{ + return sys_truncate(filename, ((loff_t)high << 32) | low); +} + +long compat_sys_ftruncate64(unsigned int fd, u32 dummy, u32 low, u32 high) +{ + return sys_ftruncate(fd, ((loff_t)high << 32) | low); +} + +long compat_sys_pread64(unsigned int fd, char __user *ubuf, size_t count, + u32 dummy, u32 low, u32 high) +{ + return sys_pread64(fd, ubuf, count, ((loff_t)high << 32) | low); +} + +long compat_sys_pwrite64(unsigned int fd, char __user *ubuf, size_t count, + u32 dummy, u32 low, u32 high) +{ + return sys_pwrite64(fd, ubuf, count, ((loff_t)high << 32) | low); +} + +long compat_sys_lookup_dcookie(u32 low, u32 high, char __user *buf, size_t len) +{ + return sys_lookup_dcookie(((loff_t)high << 32) | low, buf, len); +} + +long compat_sys_sync_file_range2(int fd, unsigned int flags, + u32 offset_lo, u32 offset_hi, + u32 nbytes_lo, u32 nbytes_hi) +{ + return sys_sync_file_range(fd, ((loff_t)offset_hi << 32) | offset_lo, + ((loff_t)nbytes_hi << 32) | nbytes_lo, + flags); +} + +long compat_sys_fallocate(int fd, int mode, + u32 offset_lo, u32 offset_hi, + u32 len_lo, u32 len_hi) +{ + return sys_fallocate(fd, mode, ((loff_t)offset_hi << 32) | offset_lo, + ((loff_t)len_hi << 32) | len_lo); +} + + + +long compat_sys_sched_rr_get_interval(compat_pid_t pid, + struct compat_timespec __user *interval) +{ + struct timespec t; + int ret; + mm_segment_t old_fs = get_fs(); + + set_fs(KERNEL_DS); + ret = sys_sched_rr_get_interval(pid, + (struct timespec __force __user *)&t); + set_fs(old_fs); + if (put_compat_timespec(&t, interval)) + return -EFAULT; + return ret; +} + +/* Provide the compat syscall number to call mapping. */ +#undef __SYSCALL +#define __SYSCALL(nr, call) [nr] = (call), + +/* See comments in sys.c */ +#define compat_sys_fadvise64_64 sys32_fadvise64_64 +#define compat_sys_readahead sys32_readahead + +/* Call the trampolines to manage pt_regs where necessary. */ +#define compat_sys_execve _compat_sys_execve +#define compat_sys_sigaltstack _compat_sys_sigaltstack +#define compat_sys_rt_sigreturn _compat_sys_rt_sigreturn +#define sys_clone _sys_clone + +/* + * Note that we can't include <linux/unistd.h> here since the header + * guard will defeat us; <asm/unistd.h> checks for __SYSCALL as well. + */ +void *compat_sys_call_table[__NR_syscalls] = { + [0 ... __NR_syscalls-1] = sys_ni_syscall, +#include <asm/unistd.h> +}; diff --git a/arch/tile/kernel/compat_signal.c b/arch/tile/kernel/compat_signal.c new file mode 100644 index 00000000..cdef6e5e --- /dev/null +++ b/arch/tile/kernel/compat_signal.c @@ -0,0 +1,431 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#include <linux/sched.h> +#include <linux/mm.h> +#include <linux/smp.h> +#include <linux/kernel.h> +#include <linux/signal.h> +#include <linux/errno.h> +#include <linux/wait.h> +#include <linux/unistd.h> +#include <linux/stddef.h> +#include <linux/personality.h> +#include <linux/suspend.h> +#include <linux/ptrace.h> +#include <linux/elf.h> +#include <linux/compat.h> +#include <linux/syscalls.h> +#include <linux/uaccess.h> +#include <asm/processor.h> +#include <asm/ucontext.h> +#include <asm/sigframe.h> +#include <asm/syscalls.h> +#include <arch/interrupts.h> + +struct compat_sigaction { + compat_uptr_t sa_handler; + compat_ulong_t sa_flags; + compat_uptr_t sa_restorer; + sigset_t sa_mask __packed; +}; + +struct compat_sigaltstack { + compat_uptr_t ss_sp; + int ss_flags; + compat_size_t ss_size; +}; + +struct compat_ucontext { + compat_ulong_t uc_flags; + compat_uptr_t uc_link; + struct compat_sigaltstack uc_stack; + struct sigcontext uc_mcontext; + sigset_t uc_sigmask; /* mask last for extensibility */ +}; + +#define COMPAT_SI_PAD_SIZE ((SI_MAX_SIZE - 3 * sizeof(int)) / sizeof(int)) + +struct compat_siginfo { + int si_signo; + int si_errno; + int si_code; + + union { + int _pad[COMPAT_SI_PAD_SIZE]; + + /* kill() */ + struct { + unsigned int _pid; /* sender's pid */ + unsigned int _uid; /* sender's uid */ + } _kill; + + /* POSIX.1b timers */ + struct { + compat_timer_t _tid; /* timer id */ + int _overrun; /* overrun count */ + compat_sigval_t _sigval; /* same as below */ + int _sys_private; /* not to be passed to user */ + int _overrun_incr; /* amount to add to overrun */ + } _timer; + + /* POSIX.1b signals */ + struct { + unsigned int _pid; /* sender's pid */ + unsigned int _uid; /* sender's uid */ + compat_sigval_t _sigval; + } _rt; + + /* SIGCHLD */ + struct { + unsigned int _pid; /* which child */ + unsigned int _uid; /* sender's uid */ + int _status; /* exit code */ + compat_clock_t _utime; + compat_clock_t _stime; + } _sigchld; + + /* SIGILL, SIGFPE, SIGSEGV, SIGBUS */ + struct { + unsigned int _addr; /* faulting insn/memory ref. */ +#ifdef __ARCH_SI_TRAPNO + int _trapno; /* TRAP # which caused the signal */ +#endif + } _sigfault; + + /* SIGPOLL */ + struct { + int _band; /* POLL_IN, POLL_OUT, POLL_MSG */ + int _fd; + } _sigpoll; + } _sifields; +}; + +struct compat_rt_sigframe { + unsigned char save_area[C_ABI_SAVE_AREA_SIZE]; /* caller save area */ + struct compat_siginfo info; + struct compat_ucontext uc; +}; + +#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) + +long compat_sys_rt_sigaction(int sig, struct compat_sigaction __user *act, + struct compat_sigaction __user *oact, + size_t sigsetsize) +{ + struct k_sigaction new_sa, old_sa; + int ret = -EINVAL; + + /* XXX: Don't preclude handling different sized sigset_t's. */ + if (sigsetsize != sizeof(sigset_t)) + goto out; + + if (act) { + compat_uptr_t handler, restorer; + + if (!access_ok(VERIFY_READ, act, sizeof(*act)) || + __get_user(handler, &act->sa_handler) || + __get_user(new_sa.sa.sa_flags, &act->sa_flags) || + __get_user(restorer, &act->sa_restorer) || + __copy_from_user(&new_sa.sa.sa_mask, &act->sa_mask, + sizeof(sigset_t))) + return -EFAULT; + new_sa.sa.sa_handler = compat_ptr(handler); + new_sa.sa.sa_restorer = compat_ptr(restorer); + } + + ret = do_sigaction(sig, act ? &new_sa : NULL, oact ? &old_sa : NULL); + + if (!ret && oact) { + if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) || + __put_user(ptr_to_compat(old_sa.sa.sa_handler), + &oact->sa_handler) || + __put_user(ptr_to_compat(old_sa.sa.sa_restorer), + &oact->sa_restorer) || + __put_user(old_sa.sa.sa_flags, &oact->sa_flags) || + __copy_to_user(&oact->sa_mask, &old_sa.sa.sa_mask, + sizeof(sigset_t))) + return -EFAULT; + } +out: + return ret; +} + +long compat_sys_rt_sigqueueinfo(int pid, int sig, + struct compat_siginfo __user *uinfo) +{ + siginfo_t info; + int ret; + mm_segment_t old_fs = get_fs(); + + if (copy_siginfo_from_user32(&info, uinfo)) + return -EFAULT; + set_fs(KERNEL_DS); + ret = sys_rt_sigqueueinfo(pid, sig, (siginfo_t __force __user *)&info); + set_fs(old_fs); + return ret; +} + +int copy_siginfo_to_user32(struct compat_siginfo __user *to, siginfo_t *from) +{ + int err; + + if (!access_ok(VERIFY_WRITE, to, sizeof(struct compat_siginfo))) + return -EFAULT; + + /* If you change siginfo_t structure, please make sure that + this code is fixed accordingly. + It should never copy any pad contained in the structure + to avoid security leaks, but must copy the generic + 3 ints plus the relevant union member. */ + err = __put_user(from->si_signo, &to->si_signo); + err |= __put_user(from->si_errno, &to->si_errno); + err |= __put_user((short)from->si_code, &to->si_code); + + if (from->si_code < 0) { + err |= __put_user(from->si_pid, &to->si_pid); + err |= __put_user(from->si_uid, &to->si_uid); + err |= __put_user(ptr_to_compat(from->si_ptr), &to->si_ptr); + } else { + /* + * First 32bits of unions are always present: + * si_pid === si_band === si_tid === si_addr(LS half) + */ + err |= __put_user(from->_sifields._pad[0], + &to->_sifields._pad[0]); + switch (from->si_code >> 16) { + case __SI_FAULT >> 16: + break; + case __SI_CHLD >> 16: + err |= __put_user(from->si_utime, &to->si_utime); + err |= __put_user(from->si_stime, &to->si_stime); + err |= __put_user(from->si_status, &to->si_status); + /* FALL THROUGH */ + default: + case __SI_KILL >> 16: + err |= __put_user(from->si_uid, &to->si_uid); + break; + case __SI_POLL >> 16: + err |= __put_user(from->si_fd, &to->si_fd); + break; + case __SI_TIMER >> 16: + err |= __put_user(from->si_overrun, &to->si_overrun); + err |= __put_user(ptr_to_compat(from->si_ptr), + &to->si_ptr); + break; + /* This is not generated by the kernel as of now. */ + case __SI_RT >> 16: + case __SI_MESGQ >> 16: + err |= __put_user(from->si_uid, &to->si_uid); + err |= __put_user(from->si_int, &to->si_int); + break; + } + } + return err; +} + +int copy_siginfo_from_user32(siginfo_t *to, struct compat_siginfo __user *from) +{ + int err; + u32 ptr32; + + if (!access_ok(VERIFY_READ, from, sizeof(struct compat_siginfo))) + return -EFAULT; + + err = __get_user(to->si_signo, &from->si_signo); + err |= __get_user(to->si_errno, &from->si_errno); + err |= __get_user(to->si_code, &from->si_code); + + err |= __get_user(to->si_pid, &from->si_pid); + err |= __get_user(to->si_uid, &from->si_uid); + err |= __get_user(ptr32, &from->si_ptr); + to->si_ptr = compat_ptr(ptr32); + + return err; +} + +long compat_sys_sigaltstack(const struct compat_sigaltstack __user *uss_ptr, + struct compat_sigaltstack __user *uoss_ptr, + struct pt_regs *regs) +{ + stack_t uss, uoss; + int ret; + mm_segment_t seg; + + if (uss_ptr) { + u32 ptr; + + memset(&uss, 0, sizeof(stack_t)); + if (!access_ok(VERIFY_READ, uss_ptr, sizeof(*uss_ptr)) || + __get_user(ptr, &uss_ptr->ss_sp) || + __get_user(uss.ss_flags, &uss_ptr->ss_flags) || + __get_user(uss.ss_size, &uss_ptr->ss_size)) + return -EFAULT; + uss.ss_sp = compat_ptr(ptr); + } + seg = get_fs(); + set_fs(KERNEL_DS); + ret = do_sigaltstack(uss_ptr ? (stack_t __user __force *)&uss : NULL, + (stack_t __user __force *)&uoss, + (unsigned long)compat_ptr(regs->sp)); + set_fs(seg); + if (ret >= 0 && uoss_ptr) { + if (!access_ok(VERIFY_WRITE, uoss_ptr, sizeof(*uoss_ptr)) || + __put_user(ptr_to_compat(uoss.ss_sp), &uoss_ptr->ss_sp) || + __put_user(uoss.ss_flags, &uoss_ptr->ss_flags) || + __put_user(uoss.ss_size, &uoss_ptr->ss_size)) + ret = -EFAULT; + } + return ret; +} + +/* The assembly shim for this function arranges to ignore the return value. */ +long compat_sys_rt_sigreturn(struct pt_regs *regs) +{ + struct compat_rt_sigframe __user *frame = + (struct compat_rt_sigframe __user *) compat_ptr(regs->sp); + sigset_t set; + + if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) + goto badframe; + if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) + goto badframe; + + sigdelsetmask(&set, ~_BLOCKABLE); + set_current_blocked(&set); + + if (restore_sigcontext(regs, &frame->uc.uc_mcontext)) + goto badframe; + + if (compat_sys_sigaltstack(&frame->uc.uc_stack, NULL, regs) != 0) + goto badframe; + + return 0; + +badframe: + signal_fault("bad sigreturn frame", regs, frame, 0); + return 0; +} + +/* + * Determine which stack to use.. + */ +static inline void __user *compat_get_sigframe(struct k_sigaction *ka, + struct pt_regs *regs, + size_t frame_size) +{ + unsigned long sp; + + /* Default to using normal stack */ + sp = (unsigned long)compat_ptr(regs->sp); + + /* + * If we are on the alternate signal stack and would overflow + * it, don't. Return an always-bogus address instead so we + * will die with SIGSEGV. + */ + if (on_sig_stack(sp) && !likely(on_sig_stack(sp - frame_size))) + return (void __user __force *)-1UL; + + /* This is the X/Open sanctioned signal stack switching. */ + if (ka->sa.sa_flags & SA_ONSTACK) { + if (sas_ss_flags(sp) == 0) + sp = current->sas_ss_sp + current->sas_ss_size; + } + + sp -= frame_size; + /* + * Align the stack pointer according to the TILE ABI, + * i.e. so that on function entry (sp & 15) == 0. + */ + sp &= -16UL; + return (void __user *) sp; +} + +int compat_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, + sigset_t *set, struct pt_regs *regs) +{ + unsigned long restorer; + struct compat_rt_sigframe __user *frame; + int err = 0; + int usig; + + frame = compat_get_sigframe(ka, regs, sizeof(*frame)); + + if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) + goto give_sigsegv; + + usig = current_thread_info()->exec_domain + && current_thread_info()->exec_domain->signal_invmap + && sig < 32 + ? current_thread_info()->exec_domain->signal_invmap[sig] + : sig; + + /* Always write at least the signal number for the stack backtracer. */ + if (ka->sa.sa_flags & SA_SIGINFO) { + /* At sigreturn time, restore the callee-save registers too. */ + err |= copy_siginfo_to_user32(&frame->info, info); + regs->flags |= PT_FLAGS_RESTORE_REGS; + } else { + err |= __put_user(info->si_signo, &frame->info.si_signo); + } + + /* Create the ucontext. */ + err |= __clear_user(&frame->save_area, sizeof(frame->save_area)); + err |= __put_user(0, &frame->uc.uc_flags); + err |= __put_user(0, &frame->uc.uc_link); + err |= __put_user(ptr_to_compat((void *)(current->sas_ss_sp)), + &frame->uc.uc_stack.ss_sp); + err |= __put_user(sas_ss_flags(regs->sp), + &frame->uc.uc_stack.ss_flags); + err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size); + err |= setup_sigcontext(&frame->uc.uc_mcontext, regs); + err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); + if (err) + goto give_sigsegv; + + restorer = VDSO_BASE; + if (ka->sa.sa_flags & SA_RESTORER) + restorer = ptr_to_compat_reg(ka->sa.sa_restorer); + + /* + * Set up registers for signal handler. + * Registers that we don't modify keep the value they had from + * user-space at the time we took the signal. + * We always pass siginfo and mcontext, regardless of SA_SIGINFO, + * since some things rely on this (e.g. glibc's debug/segfault.c). + */ + regs->pc = ptr_to_compat_reg(ka->sa.sa_handler); + regs->ex1 = PL_ICS_EX1(USER_PL, 1); /* set crit sec in handler */ + regs->sp = ptr_to_compat_reg(frame); + regs->lr = restorer; + regs->regs[0] = (unsigned long) usig; + regs->regs[1] = ptr_to_compat_reg(&frame->info); + regs->regs[2] = ptr_to_compat_reg(&frame->uc); + regs->flags |= PT_FLAGS_CALLER_SAVES; + + /* + * Notify any tracer that was single-stepping it. + * The tracer may want to single-step inside the + * handler too. + */ + if (test_thread_flag(TIF_SINGLESTEP)) + ptrace_notify(SIGTRAP); + + return 0; + +give_sigsegv: + signal_fault("bad setup frame", regs, frame, sig); + return -EFAULT; +} diff --git a/arch/tile/kernel/early_printk.c b/arch/tile/kernel/early_printk.c new file mode 100644 index 00000000..afb9c9a0 --- /dev/null +++ b/arch/tile/kernel/early_printk.c @@ -0,0 +1,110 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#include <linux/console.h> +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/string.h> +#include <linux/irqflags.h> +#include <asm/setup.h> +#include <hv/hypervisor.h> + +static void early_hv_write(struct console *con, const char *s, unsigned n) +{ + hv_console_write((HV_VirtAddr) s, n); +} + +static struct console early_hv_console = { + .name = "earlyhv", + .write = early_hv_write, + .flags = CON_PRINTBUFFER, + .index = -1, +}; + +/* Direct interface for emergencies */ +static struct console *early_console = &early_hv_console; +static int early_console_initialized; +static int early_console_complete; + +static void early_vprintk(const char *fmt, va_list ap) +{ + char buf[512]; + int n = vscnprintf(buf, sizeof(buf), fmt, ap); + early_console->write(early_console, buf, n); +} + +void early_printk(const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + early_vprintk(fmt, ap); + va_end(ap); +} + +void early_panic(const char *fmt, ...) +{ + va_list ap; + arch_local_irq_disable_all(); + va_start(ap, fmt); + early_printk("Kernel panic - not syncing: "); + early_vprintk(fmt, ap); + early_console->write(early_console, "\n", 1); + va_end(ap); + dump_stack(); + hv_halt(); +} + +static int __initdata keep_early; + +static int __init setup_early_printk(char *str) +{ + if (early_console_initialized) + return 1; + + if (str != NULL && strncmp(str, "keep", 4) == 0) + keep_early = 1; + + early_console = &early_hv_console; + early_console_initialized = 1; + register_console(early_console); + + return 0; +} + +void __init disable_early_printk(void) +{ + early_console_complete = 1; + if (!early_console_initialized || !early_console) + return; + if (!keep_early) { + early_printk("disabling early console\n"); + unregister_console(early_console); + early_console_initialized = 0; + } else { + early_printk("keeping early console\n"); + } +} + +void warn_early_printk(void) +{ + if (early_console_complete || early_console_initialized) + return; + early_printk("\ +Machine shutting down before console output is fully initialized.\n\ +You may wish to reboot and add the option 'earlyprintk' to your\n\ +boot command line to see any diagnostic early console output.\n\ +"); +} + +early_param("earlyprintk", setup_early_printk); diff --git a/arch/tile/kernel/entry.S b/arch/tile/kernel/entry.S new file mode 100644 index 00000000..ec91568d --- /dev/null +++ b/arch/tile/kernel/entry.S @@ -0,0 +1,111 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#include <linux/linkage.h> +#include <linux/unistd.h> +#include <asm/irqflags.h> +#include <asm/processor.h> +#include <arch/abi.h> +#include <arch/spr_def.h> + +#ifdef __tilegx__ +#define bnzt bnezt +#endif + +STD_ENTRY(current_text_addr) + { move r0, lr; jrp lr } + STD_ENDPROC(current_text_addr) + +/* + * Implement execve(). The i386 code has a note that forking from kernel + * space results in no copy on write until the execve, so we should be + * careful not to write to the stack here. + */ +STD_ENTRY(kernel_execve) + moveli TREG_SYSCALL_NR_NAME, __NR_execve + swint1 + jrp lr + STD_ENDPROC(kernel_execve) + +/* + * We don't run this function directly, but instead copy it to a page + * we map into every user process. See vdso_setup(). + * + * Note that libc has a copy of this function that it uses to compare + * against the PC when a stack backtrace ends, so if this code is + * changed, the libc implementation(s) should also be updated. + */ + .pushsection .data +ENTRY(__rt_sigreturn) + moveli TREG_SYSCALL_NR_NAME,__NR_rt_sigreturn + swint1 + ENDPROC(__rt_sigreturn) + ENTRY(__rt_sigreturn_end) + .popsection + +STD_ENTRY(dump_stack) + { move r2, lr; lnk r1 } + { move r4, r52; addli r1, r1, dump_stack - . } + { move r3, sp; j _dump_stack } + jrp lr /* keep backtracer happy */ + STD_ENDPROC(dump_stack) + +STD_ENTRY(KBacktraceIterator_init_current) + { move r2, lr; lnk r1 } + { move r4, r52; addli r1, r1, KBacktraceIterator_init_current - . } + { move r3, sp; j _KBacktraceIterator_init_current } + jrp lr /* keep backtracer happy */ + STD_ENDPROC(KBacktraceIterator_init_current) + +/* + * Reset our stack to r1/r2 (sp and ksp0+cpu respectively), then + * free the old stack (passed in r0) and re-invoke cpu_idle(). + * We update sp and ksp0 simultaneously to avoid backtracer warnings. + */ +STD_ENTRY(cpu_idle_on_new_stack) + { + move sp, r1 + mtspr SPR_SYSTEM_SAVE_K_0, r2 + } + jal free_thread_info + j cpu_idle + STD_ENDPROC(cpu_idle_on_new_stack) + +/* Loop forever on a nap during SMP boot. */ +STD_ENTRY(smp_nap) + nap + nop /* avoid provoking the icache prefetch with a jump */ + j smp_nap /* we are not architecturally guaranteed not to exit nap */ + jrp lr /* clue in the backtracer */ + STD_ENDPROC(smp_nap) + +/* + * Enable interrupts racelessly and then nap until interrupted. + * Architecturally, we are guaranteed that enabling interrupts via + * mtspr to INTERRUPT_CRITICAL_SECTION only interrupts at the next PC. + * This function's _cpu_idle_nap address is special; see intvec.S. + * When interrupted at _cpu_idle_nap, we bump the PC forward 8, and + * as a result return to the function that called _cpu_idle(). + */ +STD_ENTRY(_cpu_idle) + movei r1, 1 + mtspr INTERRUPT_CRITICAL_SECTION, r1 + IRQ_ENABLE(r2, r3) /* unmask, but still with ICS set */ + mtspr INTERRUPT_CRITICAL_SECTION, zero + .global _cpu_idle_nap +_cpu_idle_nap: + nap + nop /* avoid provoking the icache prefetch with a jump */ + jrp lr + STD_ENDPROC(_cpu_idle) diff --git a/arch/tile/kernel/futex_64.S b/arch/tile/kernel/futex_64.S new file mode 100644 index 00000000..f465d1ed --- /dev/null +++ b/arch/tile/kernel/futex_64.S @@ -0,0 +1,55 @@ +/* + * Copyright 2011 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * Atomically access user memory, but use MMU to avoid propagating + * kernel exceptions. + */ + +#include <linux/linkage.h> +#include <asm/errno.h> +#include <asm/futex.h> +#include <asm/page.h> +#include <asm/processor.h> + +/* + * Provide a set of atomic memory operations supporting <asm/futex.h>. + * + * r0: user address to manipulate + * r1: new value to write, or for cmpxchg, old value to compare against + * r2: (cmpxchg only) new value to write + * + * Return __get_user struct, r0 with value, r1 with error. + */ +#define FUTEX_OP(name, ...) \ +STD_ENTRY(futex_##name) \ + __VA_ARGS__; \ + { \ + move r1, zero; \ + jrp lr \ + }; \ + STD_ENDPROC(futex_##name); \ + .pushsection __ex_table,"a"; \ + .quad 1b, get_user_fault; \ + .popsection + + .pushsection .fixup,"ax" +get_user_fault: + { movei r1, -EFAULT; jrp lr } + ENDPROC(get_user_fault) + .popsection + +FUTEX_OP(cmpxchg, mtspr CMPEXCH_VALUE, r1; 1: cmpexch4 r0, r0, r2) +FUTEX_OP(set, 1: exch4 r0, r0, r1) +FUTEX_OP(add, 1: fetchadd4 r0, r0, r1) +FUTEX_OP(or, 1: fetchor4 r0, r0, r1) +FUTEX_OP(andn, nor r1, r1, zero; 1: fetchand4 r0, r0, r1) diff --git a/arch/tile/kernel/hardwall.c b/arch/tile/kernel/hardwall.c new file mode 100644 index 00000000..8c41891a --- /dev/null +++ b/arch/tile/kernel/hardwall.c @@ -0,0 +1,837 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#include <linux/fs.h> +#include <linux/proc_fs.h> +#include <linux/seq_file.h> +#include <linux/rwsem.h> +#include <linux/kprobes.h> +#include <linux/sched.h> +#include <linux/hardirq.h> +#include <linux/uaccess.h> +#include <linux/smp.h> +#include <linux/cdev.h> +#include <linux/compat.h> +#include <asm/hardwall.h> +#include <asm/traps.h> +#include <asm/siginfo.h> +#include <asm/irq_regs.h> + +#include <arch/interrupts.h> +#include <arch/spr_def.h> + + +/* + * This data structure tracks the rectangle data, etc., associated + * one-to-one with a "struct file *" from opening HARDWALL_FILE. + * Note that the file's private data points back to this structure. + */ +struct hardwall_info { + struct list_head list; /* "rectangles" list */ + struct list_head task_head; /* head of tasks in this hardwall */ + struct cpumask cpumask; /* cpus in the rectangle */ + int ulhc_x; /* upper left hand corner x coord */ + int ulhc_y; /* upper left hand corner y coord */ + int width; /* rectangle width */ + int height; /* rectangle height */ + int id; /* integer id for this hardwall */ + int teardown_in_progress; /* are we tearing this one down? */ +}; + +/* Currently allocated hardwall rectangles */ +static LIST_HEAD(rectangles); + +/* /proc/tile/hardwall */ +static struct proc_dir_entry *hardwall_proc_dir; + +/* Functions to manage files in /proc/tile/hardwall. */ +static void hardwall_add_proc(struct hardwall_info *rect); +static void hardwall_remove_proc(struct hardwall_info *rect); + +/* + * Guard changes to the hardwall data structures. + * This could be finer grained (e.g. one lock for the list of hardwall + * rectangles, then separate embedded locks for each one's list of tasks), + * but there are subtle correctness issues when trying to start with + * a task's "hardwall" pointer and lock the correct rectangle's embedded + * lock in the presence of a simultaneous deactivation, so it seems + * easier to have a single lock, given that none of these data + * structures are touched very frequently during normal operation. + */ +static DEFINE_SPINLOCK(hardwall_lock); + +/* Allow disabling UDN access. */ +static int udn_disabled; +static int __init noudn(char *str) +{ + pr_info("User-space UDN access is disabled\n"); + udn_disabled = 1; + return 0; +} +early_param("noudn", noudn); + + +/* + * Low-level primitives + */ + +/* Set a CPU bit if the CPU is online. */ +#define cpu_online_set(cpu, dst) do { \ + if (cpu_online(cpu)) \ + cpumask_set_cpu(cpu, dst); \ +} while (0) + + +/* Does the given rectangle contain the given x,y coordinate? */ +static int contains(struct hardwall_info *r, int x, int y) +{ + return (x >= r->ulhc_x && x < r->ulhc_x + r->width) && + (y >= r->ulhc_y && y < r->ulhc_y + r->height); +} + +/* Compute the rectangle parameters and validate the cpumask. */ +static int setup_rectangle(struct hardwall_info *r, struct cpumask *mask) +{ + int x, y, cpu, ulhc, lrhc; + + /* The first cpu is the ULHC, the last the LRHC. */ + ulhc = find_first_bit(cpumask_bits(mask), nr_cpumask_bits); + lrhc = find_last_bit(cpumask_bits(mask), nr_cpumask_bits); + + /* Compute the rectangle attributes from the cpus. */ + r->ulhc_x = cpu_x(ulhc); + r->ulhc_y = cpu_y(ulhc); + r->width = cpu_x(lrhc) - r->ulhc_x + 1; + r->height = cpu_y(lrhc) - r->ulhc_y + 1; + cpumask_copy(&r->cpumask, mask); + r->id = ulhc; /* The ulhc cpu id can be the hardwall id. */ + + /* Width and height must be positive */ + if (r->width <= 0 || r->height <= 0) + return -EINVAL; + + /* Confirm that the cpumask is exactly the rectangle. */ + for (y = 0, cpu = 0; y < smp_height; ++y) + for (x = 0; x < smp_width; ++x, ++cpu) + if (cpumask_test_cpu(cpu, mask) != contains(r, x, y)) + return -EINVAL; + + /* + * Note that offline cpus can't be drained when this UDN + * rectangle eventually closes. We used to detect this + * situation and print a warning, but it annoyed users and + * they ignored it anyway, so now we just return without a + * warning. + */ + return 0; +} + +/* Do the two given rectangles overlap on any cpu? */ +static int overlaps(struct hardwall_info *a, struct hardwall_info *b) +{ + return a->ulhc_x + a->width > b->ulhc_x && /* A not to the left */ + b->ulhc_x + b->width > a->ulhc_x && /* B not to the left */ + a->ulhc_y + a->height > b->ulhc_y && /* A not above */ + b->ulhc_y + b->height > a->ulhc_y; /* B not above */ +} + + +/* + * Hardware management of hardwall setup, teardown, trapping, + * and enabling/disabling PL0 access to the networks. + */ + +/* Bit field values to mask together for writes to SPR_XDN_DIRECTION_PROTECT */ +enum direction_protect { + N_PROTECT = (1 << 0), + E_PROTECT = (1 << 1), + S_PROTECT = (1 << 2), + W_PROTECT = (1 << 3) +}; + +static void enable_firewall_interrupts(void) +{ + arch_local_irq_unmask_now(INT_UDN_FIREWALL); +} + +static void disable_firewall_interrupts(void) +{ + arch_local_irq_mask_now(INT_UDN_FIREWALL); +} + +/* Set up hardwall on this cpu based on the passed hardwall_info. */ +static void hardwall_setup_ipi_func(void *info) +{ + struct hardwall_info *r = info; + int cpu = smp_processor_id(); + int x = cpu % smp_width; + int y = cpu / smp_width; + int bits = 0; + if (x == r->ulhc_x) + bits |= W_PROTECT; + if (x == r->ulhc_x + r->width - 1) + bits |= E_PROTECT; + if (y == r->ulhc_y) + bits |= N_PROTECT; + if (y == r->ulhc_y + r->height - 1) + bits |= S_PROTECT; + BUG_ON(bits == 0); + __insn_mtspr(SPR_UDN_DIRECTION_PROTECT, bits); + enable_firewall_interrupts(); + +} + +/* Set up all cpus on edge of rectangle to enable/disable hardwall SPRs. */ +static void hardwall_setup(struct hardwall_info *r) +{ + int x, y, cpu, delta; + struct cpumask rect_cpus; + + cpumask_clear(&rect_cpus); + + /* First include the top and bottom edges */ + cpu = r->ulhc_y * smp_width + r->ulhc_x; + delta = (r->height - 1) * smp_width; + for (x = 0; x < r->width; ++x, ++cpu) { + cpu_online_set(cpu, &rect_cpus); + cpu_online_set(cpu + delta, &rect_cpus); + } + + /* Then the left and right edges */ + cpu -= r->width; + delta = r->width - 1; + for (y = 0; y < r->height; ++y, cpu += smp_width) { + cpu_online_set(cpu, &rect_cpus); + cpu_online_set(cpu + delta, &rect_cpus); + } + + /* Then tell all the cpus to set up their protection SPR */ + on_each_cpu_mask(&rect_cpus, hardwall_setup_ipi_func, r, 1); +} + +void __kprobes do_hardwall_trap(struct pt_regs* regs, int fault_num) +{ + struct hardwall_info *rect; + struct task_struct *p; + struct siginfo info; + int x, y; + int cpu = smp_processor_id(); + int found_processes; + unsigned long flags; + + struct pt_regs *old_regs = set_irq_regs(regs); + irq_enter(); + + /* This tile trapped a network access; find the rectangle. */ + x = cpu % smp_width; + y = cpu / smp_width; + spin_lock_irqsave(&hardwall_lock, flags); + list_for_each_entry(rect, &rectangles, list) { + if (contains(rect, x, y)) + break; + } + + /* + * It shouldn't be possible not to find this cpu on the + * rectangle list, since only cpus in rectangles get hardwalled. + * The hardwall is only removed after the UDN is drained. + */ + BUG_ON(&rect->list == &rectangles); + + /* + * If we already started teardown on this hardwall, don't worry; + * the abort signal has been sent and we are just waiting for things + * to quiesce. + */ + if (rect->teardown_in_progress) { + pr_notice("cpu %d: detected hardwall violation %#lx" + " while teardown already in progress\n", + cpu, (long) __insn_mfspr(SPR_UDN_DIRECTION_PROTECT)); + goto done; + } + + /* + * Kill off any process that is activated in this rectangle. + * We bypass security to deliver the signal, since it must be + * one of the activated processes that generated the UDN + * message that caused this trap, and all the activated + * processes shared a single open file so are pretty tightly + * bound together from a security point of view to begin with. + */ + rect->teardown_in_progress = 1; + wmb(); /* Ensure visibility of rectangle before notifying processes. */ + pr_notice("cpu %d: detected hardwall violation %#lx...\n", + cpu, (long) __insn_mfspr(SPR_UDN_DIRECTION_PROTECT)); + info.si_signo = SIGILL; + info.si_errno = 0; + info.si_code = ILL_HARDWALL; + found_processes = 0; + list_for_each_entry(p, &rect->task_head, thread.hardwall_list) { + BUG_ON(p->thread.hardwall != rect); + if (!(p->flags & PF_EXITING)) { + found_processes = 1; + pr_notice("hardwall: killing %d\n", p->pid); + do_send_sig_info(info.si_signo, &info, p, false); + } + } + if (!found_processes) + pr_notice("hardwall: no associated processes!\n"); + + done: + spin_unlock_irqrestore(&hardwall_lock, flags); + + /* + * We have to disable firewall interrupts now, or else when we + * return from this handler, we will simply re-interrupt back to + * it. However, we can't clear the protection bits, since we + * haven't yet drained the network, and that would allow packets + * to cross out of the hardwall region. + */ + disable_firewall_interrupts(); + + irq_exit(); + set_irq_regs(old_regs); +} + +/* Allow access from user space to the UDN. */ +void grant_network_mpls(void) +{ + __insn_mtspr(SPR_MPL_UDN_ACCESS_SET_0, 1); + __insn_mtspr(SPR_MPL_UDN_AVAIL_SET_0, 1); + __insn_mtspr(SPR_MPL_UDN_COMPLETE_SET_0, 1); + __insn_mtspr(SPR_MPL_UDN_TIMER_SET_0, 1); +#if !CHIP_HAS_REV1_XDN() + __insn_mtspr(SPR_MPL_UDN_REFILL_SET_0, 1); + __insn_mtspr(SPR_MPL_UDN_CA_SET_0, 1); +#endif +} + +/* Deny access from user space to the UDN. */ +void restrict_network_mpls(void) +{ + __insn_mtspr(SPR_MPL_UDN_ACCESS_SET_1, 1); + __insn_mtspr(SPR_MPL_UDN_AVAIL_SET_1, 1); + __insn_mtspr(SPR_MPL_UDN_COMPLETE_SET_1, 1); + __insn_mtspr(SPR_MPL_UDN_TIMER_SET_1, 1); +#if !CHIP_HAS_REV1_XDN() + __insn_mtspr(SPR_MPL_UDN_REFILL_SET_1, 1); + __insn_mtspr(SPR_MPL_UDN_CA_SET_1, 1); +#endif +} + + +/* + * Code to create, activate, deactivate, and destroy hardwall rectangles. + */ + +/* Create a hardwall for the given rectangle */ +static struct hardwall_info *hardwall_create( + size_t size, const unsigned char __user *bits) +{ + struct hardwall_info *iter, *rect; + struct cpumask mask; + unsigned long flags; + int rc; + + /* Reject crazy sizes out of hand, a la sys_mbind(). */ + if (size > PAGE_SIZE) + return ERR_PTR(-EINVAL); + + /* Copy whatever fits into a cpumask. */ + if (copy_from_user(&mask, bits, min(sizeof(struct cpumask), size))) + return ERR_PTR(-EFAULT); + + /* + * If the size was short, clear the rest of the mask; + * otherwise validate that the rest of the user mask was zero + * (we don't try hard to be efficient when validating huge masks). + */ + if (size < sizeof(struct cpumask)) { + memset((char *)&mask + size, 0, sizeof(struct cpumask) - size); + } else if (size > sizeof(struct cpumask)) { + size_t i; + for (i = sizeof(struct cpumask); i < size; ++i) { + char c; + if (get_user(c, &bits[i])) + return ERR_PTR(-EFAULT); + if (c) + return ERR_PTR(-EINVAL); + } + } + + /* Allocate a new rectangle optimistically. */ + rect = kmalloc(sizeof(struct hardwall_info), + GFP_KERNEL | __GFP_ZERO); + if (rect == NULL) + return ERR_PTR(-ENOMEM); + INIT_LIST_HEAD(&rect->task_head); + + /* Compute the rectangle size and validate that it's plausible. */ + rc = setup_rectangle(rect, &mask); + if (rc != 0) { + kfree(rect); + return ERR_PTR(rc); + } + + /* Confirm it doesn't overlap and add it to the list. */ + spin_lock_irqsave(&hardwall_lock, flags); + list_for_each_entry(iter, &rectangles, list) { + if (overlaps(iter, rect)) { + spin_unlock_irqrestore(&hardwall_lock, flags); + kfree(rect); + return ERR_PTR(-EBUSY); + } + } + list_add_tail(&rect->list, &rectangles); + spin_unlock_irqrestore(&hardwall_lock, flags); + + /* Set up appropriate hardwalling on all affected cpus. */ + hardwall_setup(rect); + + /* Create a /proc/tile/hardwall entry. */ + hardwall_add_proc(rect); + + return rect; +} + +/* Activate a given hardwall on this cpu for this process. */ +static int hardwall_activate(struct hardwall_info *rect) +{ + int cpu, x, y; + unsigned long flags; + struct task_struct *p = current; + struct thread_struct *ts = &p->thread; + + /* Require a rectangle. */ + if (rect == NULL) + return -ENODATA; + + /* Not allowed to activate a rectangle that is being torn down. */ + if (rect->teardown_in_progress) + return -EINVAL; + + /* + * Get our affinity; if we're not bound to this tile uniquely, + * we can't access the network registers. + */ + if (cpumask_weight(&p->cpus_allowed) != 1) + return -EPERM; + + /* Make sure we are bound to a cpu in this rectangle. */ + cpu = smp_processor_id(); + BUG_ON(cpumask_first(&p->cpus_allowed) != cpu); + x = cpu_x(cpu); + y = cpu_y(cpu); + if (!contains(rect, x, y)) + return -EINVAL; + + /* If we are already bound to this hardwall, it's a no-op. */ + if (ts->hardwall) { + BUG_ON(ts->hardwall != rect); + return 0; + } + + /* Success! This process gets to use the user networks on this cpu. */ + ts->hardwall = rect; + spin_lock_irqsave(&hardwall_lock, flags); + list_add(&ts->hardwall_list, &rect->task_head); + spin_unlock_irqrestore(&hardwall_lock, flags); + grant_network_mpls(); + printk(KERN_DEBUG "Pid %d (%s) activated for hardwall: cpu %d\n", + p->pid, p->comm, cpu); + return 0; +} + +/* + * Deactivate a task's hardwall. Must hold hardwall_lock. + * This method may be called from free_task(), so we don't want to + * rely on too many fields of struct task_struct still being valid. + * We assume the cpus_allowed, pid, and comm fields are still valid. + */ +static void _hardwall_deactivate(struct task_struct *task) +{ + struct thread_struct *ts = &task->thread; + + if (cpumask_weight(&task->cpus_allowed) != 1) { + pr_err("pid %d (%s) releasing networks with" + " an affinity mask containing %d cpus!\n", + task->pid, task->comm, + cpumask_weight(&task->cpus_allowed)); + BUG(); + } + + BUG_ON(ts->hardwall == NULL); + ts->hardwall = NULL; + list_del(&ts->hardwall_list); + if (task == current) + restrict_network_mpls(); +} + +/* Deactivate a task's hardwall. */ +int hardwall_deactivate(struct task_struct *task) +{ + unsigned long flags; + int activated; + + spin_lock_irqsave(&hardwall_lock, flags); + activated = (task->thread.hardwall != NULL); + if (activated) + _hardwall_deactivate(task); + spin_unlock_irqrestore(&hardwall_lock, flags); + + if (!activated) + return -EINVAL; + + printk(KERN_DEBUG "Pid %d (%s) deactivated for hardwall: cpu %d\n", + task->pid, task->comm, smp_processor_id()); + return 0; +} + +/* Stop a UDN switch before draining the network. */ +static void stop_udn_switch(void *ignored) +{ +#if !CHIP_HAS_REV1_XDN() + /* Freeze the switch and the demux. */ + __insn_mtspr(SPR_UDN_SP_FREEZE, + SPR_UDN_SP_FREEZE__SP_FRZ_MASK | + SPR_UDN_SP_FREEZE__DEMUX_FRZ_MASK | + SPR_UDN_SP_FREEZE__NON_DEST_EXT_MASK); +#endif +} + +/* Drain all the state from a stopped switch. */ +static void drain_udn_switch(void *ignored) +{ +#if !CHIP_HAS_REV1_XDN() + int i; + int from_tile_words, ca_count; + + /* Empty out the 5 switch point fifos. */ + for (i = 0; i < 5; i++) { + int words, j; + __insn_mtspr(SPR_UDN_SP_FIFO_SEL, i); + words = __insn_mfspr(SPR_UDN_SP_STATE) & 0xF; + for (j = 0; j < words; j++) + (void) __insn_mfspr(SPR_UDN_SP_FIFO_DATA); + BUG_ON((__insn_mfspr(SPR_UDN_SP_STATE) & 0xF) != 0); + } + + /* Dump out the 3 word fifo at top. */ + from_tile_words = (__insn_mfspr(SPR_UDN_DEMUX_STATUS) >> 10) & 0x3; + for (i = 0; i < from_tile_words; i++) + (void) __insn_mfspr(SPR_UDN_DEMUX_WRITE_FIFO); + + /* Empty out demuxes. */ + while (__insn_mfspr(SPR_UDN_DATA_AVAIL) & (1 << 0)) + (void) __tile_udn0_receive(); + while (__insn_mfspr(SPR_UDN_DATA_AVAIL) & (1 << 1)) + (void) __tile_udn1_receive(); + while (__insn_mfspr(SPR_UDN_DATA_AVAIL) & (1 << 2)) + (void) __tile_udn2_receive(); + while (__insn_mfspr(SPR_UDN_DATA_AVAIL) & (1 << 3)) + (void) __tile_udn3_receive(); + BUG_ON((__insn_mfspr(SPR_UDN_DATA_AVAIL) & 0xF) != 0); + + /* Empty out catch all. */ + ca_count = __insn_mfspr(SPR_UDN_DEMUX_CA_COUNT); + for (i = 0; i < ca_count; i++) + (void) __insn_mfspr(SPR_UDN_CA_DATA); + BUG_ON(__insn_mfspr(SPR_UDN_DEMUX_CA_COUNT) != 0); + + /* Clear demux logic. */ + __insn_mtspr(SPR_UDN_DEMUX_CTL, 1); + + /* + * Write switch state; experimentation indicates that 0xc3000 + * is an idle switch point. + */ + for (i = 0; i < 5; i++) { + __insn_mtspr(SPR_UDN_SP_FIFO_SEL, i); + __insn_mtspr(SPR_UDN_SP_STATE, 0xc3000); + } +#endif +} + +/* Reset random UDN state registers at boot up and during hardwall teardown. */ +void reset_network_state(void) +{ +#if !CHIP_HAS_REV1_XDN() + /* Reset UDN coordinates to their standard value */ + unsigned int cpu = smp_processor_id(); + unsigned int x = cpu % smp_width; + unsigned int y = cpu / smp_width; +#endif + + if (udn_disabled) + return; + +#if !CHIP_HAS_REV1_XDN() + __insn_mtspr(SPR_UDN_TILE_COORD, (x << 18) | (y << 7)); + + /* Set demux tags to predefined values and enable them. */ + __insn_mtspr(SPR_UDN_TAG_VALID, 0xf); + __insn_mtspr(SPR_UDN_TAG_0, (1 << 0)); + __insn_mtspr(SPR_UDN_TAG_1, (1 << 1)); + __insn_mtspr(SPR_UDN_TAG_2, (1 << 2)); + __insn_mtspr(SPR_UDN_TAG_3, (1 << 3)); +#endif + + /* Clear out other random registers so we have a clean slate. */ + __insn_mtspr(SPR_UDN_AVAIL_EN, 0); + __insn_mtspr(SPR_UDN_DEADLOCK_TIMEOUT, 0); +#if !CHIP_HAS_REV1_XDN() + __insn_mtspr(SPR_UDN_REFILL_EN, 0); + __insn_mtspr(SPR_UDN_DEMUX_QUEUE_SEL, 0); + __insn_mtspr(SPR_UDN_SP_FIFO_SEL, 0); +#endif + + /* Start the switch and demux. */ +#if !CHIP_HAS_REV1_XDN() + __insn_mtspr(SPR_UDN_SP_FREEZE, 0); +#endif +} + +/* Restart a UDN switch after draining. */ +static void restart_udn_switch(void *ignored) +{ + reset_network_state(); + + /* Disable firewall interrupts. */ + __insn_mtspr(SPR_UDN_DIRECTION_PROTECT, 0); + disable_firewall_interrupts(); +} + +/* Build a struct cpumask containing all valid tiles in bounding rectangle. */ +static void fill_mask(struct hardwall_info *r, struct cpumask *result) +{ + int x, y, cpu; + + cpumask_clear(result); + + cpu = r->ulhc_y * smp_width + r->ulhc_x; + for (y = 0; y < r->height; ++y, cpu += smp_width - r->width) { + for (x = 0; x < r->width; ++x, ++cpu) + cpu_online_set(cpu, result); + } +} + +/* Last reference to a hardwall is gone, so clear the network. */ +static void hardwall_destroy(struct hardwall_info *rect) +{ + struct task_struct *task; + unsigned long flags; + struct cpumask mask; + + /* Make sure this file actually represents a rectangle. */ + if (rect == NULL) + return; + + /* + * Deactivate any remaining tasks. It's possible to race with + * some other thread that is exiting and hasn't yet called + * deactivate (when freeing its thread_info), so we carefully + * deactivate any remaining tasks before freeing the + * hardwall_info object itself. + */ + spin_lock_irqsave(&hardwall_lock, flags); + list_for_each_entry(task, &rect->task_head, thread.hardwall_list) + _hardwall_deactivate(task); + spin_unlock_irqrestore(&hardwall_lock, flags); + + /* Drain the UDN. */ + printk(KERN_DEBUG "Clearing hardwall rectangle %dx%d %d,%d\n", + rect->width, rect->height, rect->ulhc_x, rect->ulhc_y); + fill_mask(rect, &mask); + on_each_cpu_mask(&mask, stop_udn_switch, NULL, 1); + on_each_cpu_mask(&mask, drain_udn_switch, NULL, 1); + + /* Restart switch and disable firewall. */ + on_each_cpu_mask(&mask, restart_udn_switch, NULL, 1); + + /* Remove the /proc/tile/hardwall entry. */ + hardwall_remove_proc(rect); + + /* Now free the rectangle from the list. */ + spin_lock_irqsave(&hardwall_lock, flags); + BUG_ON(!list_empty(&rect->task_head)); + list_del(&rect->list); + spin_unlock_irqrestore(&hardwall_lock, flags); + kfree(rect); +} + + +static int hardwall_proc_show(struct seq_file *sf, void *v) +{ + struct hardwall_info *rect = sf->private; + char buf[256]; + + int rc = cpulist_scnprintf(buf, sizeof(buf), &rect->cpumask); + buf[rc++] = '\n'; + seq_write(sf, buf, rc); + return 0; +} + +static int hardwall_proc_open(struct inode *inode, + struct file *file) +{ + return single_open(file, hardwall_proc_show, PDE(inode)->data); +} + +static const struct file_operations hardwall_proc_fops = { + .open = hardwall_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static void hardwall_add_proc(struct hardwall_info *rect) +{ + char buf[64]; + snprintf(buf, sizeof(buf), "%d", rect->id); + proc_create_data(buf, 0444, hardwall_proc_dir, + &hardwall_proc_fops, rect); +} + +static void hardwall_remove_proc(struct hardwall_info *rect) +{ + char buf[64]; + snprintf(buf, sizeof(buf), "%d", rect->id); + remove_proc_entry(buf, hardwall_proc_dir); +} + +int proc_pid_hardwall(struct task_struct *task, char *buffer) +{ + struct hardwall_info *rect = task->thread.hardwall; + return rect ? sprintf(buffer, "%d\n", rect->id) : 0; +} + +void proc_tile_hardwall_init(struct proc_dir_entry *root) +{ + if (!udn_disabled) + hardwall_proc_dir = proc_mkdir("hardwall", root); +} + + +/* + * Character device support via ioctl/close. + */ + +static long hardwall_ioctl(struct file *file, unsigned int a, unsigned long b) +{ + struct hardwall_info *rect = file->private_data; + + if (_IOC_TYPE(a) != HARDWALL_IOCTL_BASE) + return -EINVAL; + + switch (_IOC_NR(a)) { + case _HARDWALL_CREATE: + if (udn_disabled) + return -ENOSYS; + if (rect != NULL) + return -EALREADY; + rect = hardwall_create(_IOC_SIZE(a), + (const unsigned char __user *)b); + if (IS_ERR(rect)) + return PTR_ERR(rect); + file->private_data = rect; + return 0; + + case _HARDWALL_ACTIVATE: + return hardwall_activate(rect); + + case _HARDWALL_DEACTIVATE: + if (current->thread.hardwall != rect) + return -EINVAL; + return hardwall_deactivate(current); + + case _HARDWALL_GET_ID: + return rect ? rect->id : -EINVAL; + + default: + return -EINVAL; + } +} + +#ifdef CONFIG_COMPAT +static long hardwall_compat_ioctl(struct file *file, + unsigned int a, unsigned long b) +{ + /* Sign-extend the argument so it can be used as a pointer. */ + return hardwall_ioctl(file, a, (unsigned long)compat_ptr(b)); +} +#endif + +/* The user process closed the file; revoke access to user networks. */ +static int hardwall_flush(struct file *file, fl_owner_t owner) +{ + struct hardwall_info *rect = file->private_data; + struct task_struct *task, *tmp; + unsigned long flags; + + if (rect) { + /* + * NOTE: if multiple threads are activated on this hardwall + * file, the other threads will continue having access to the + * UDN until they are context-switched out and back in again. + * + * NOTE: A NULL files pointer means the task is being torn + * down, so in that case we also deactivate it. + */ + spin_lock_irqsave(&hardwall_lock, flags); + list_for_each_entry_safe(task, tmp, &rect->task_head, + thread.hardwall_list) { + if (task->files == owner || task->files == NULL) + _hardwall_deactivate(task); + } + spin_unlock_irqrestore(&hardwall_lock, flags); + } + + return 0; +} + +/* This hardwall is gone, so destroy it. */ +static int hardwall_release(struct inode *inode, struct file *file) +{ + hardwall_destroy(file->private_data); + return 0; +} + +static const struct file_operations dev_hardwall_fops = { + .open = nonseekable_open, + .unlocked_ioctl = hardwall_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = hardwall_compat_ioctl, +#endif + .flush = hardwall_flush, + .release = hardwall_release, +}; + +static struct cdev hardwall_dev; + +static int __init dev_hardwall_init(void) +{ + int rc; + dev_t dev; + + rc = alloc_chrdev_region(&dev, 0, 1, "hardwall"); + if (rc < 0) + return rc; + cdev_init(&hardwall_dev, &dev_hardwall_fops); + rc = cdev_add(&hardwall_dev, dev, 1); + if (rc < 0) + return rc; + + return 0; +} +late_initcall(dev_hardwall_init); diff --git a/arch/tile/kernel/head_32.S b/arch/tile/kernel/head_32.S new file mode 100644 index 00000000..1a39b7c1 --- /dev/null +++ b/arch/tile/kernel/head_32.S @@ -0,0 +1,184 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * TILE startup code. + */ + +#include <linux/linkage.h> +#include <linux/init.h> +#include <asm/page.h> +#include <asm/pgtable.h> +#include <asm/thread_info.h> +#include <asm/processor.h> +#include <asm/asm-offsets.h> +#include <hv/hypervisor.h> +#include <arch/chip.h> +#include <arch/spr_def.h> + +/* + * This module contains the entry code for kernel images. It performs the + * minimal setup needed to call the generic C routines. + */ + + __HEAD +ENTRY(_start) + /* Notify the hypervisor of what version of the API we want */ + { + movei r1, TILE_CHIP + movei r2, TILE_CHIP_REV + } + { + moveli r0, _HV_VERSION + jal hv_init + } + /* Get a reasonable default ASID in r0 */ + { + move r0, zero + jal hv_inquire_asid + } + /* Install the default page table */ + { + moveli r6, lo16(swapper_pgprot - PAGE_OFFSET) + move r4, r0 /* use starting ASID of range for this page table */ + } + { + moveli r0, lo16(swapper_pg_dir - PAGE_OFFSET) + auli r6, r6, ha16(swapper_pgprot - PAGE_OFFSET) + } + { + lw r2, r6 + addi r6, r6, 4 + } + { + lw r3, r6 + auli r0, r0, ha16(swapper_pg_dir - PAGE_OFFSET) + } + { + inv r6 + move r1, zero /* high 32 bits of CPA is zero */ + } + { + moveli lr, lo16(1f) + move r5, zero + } + { + auli lr, lr, ha16(1f) + j hv_install_context + } +1: + + /* Get our processor number and save it away in SAVE_K_0. */ + jal hv_inquire_topology + mulll_uu r4, r1, r2 /* r1 == y, r2 == width */ + add r4, r4, r0 /* r0 == x, so r4 == cpu == y*width + x */ + +#ifdef CONFIG_SMP + /* + * Load up our per-cpu offset. When the first (master) tile + * boots, this value is still zero, so we will load boot_pc + * with start_kernel, and boot_sp with init_stack + THREAD_SIZE. + * The master tile initializes the per-cpu offset array, so that + * when subsequent (secondary) tiles boot, they will instead load + * from their per-cpu versions of boot_sp and boot_pc. + */ + moveli r5, lo16(__per_cpu_offset) + auli r5, r5, ha16(__per_cpu_offset) + s2a r5, r4, r5 + lw r5, r5 + bnz r5, 1f + + /* + * Save the width and height to the smp_topology variable + * for later use. + */ + moveli r0, lo16(smp_topology + HV_TOPOLOGY_WIDTH_OFFSET) + auli r0, r0, ha16(smp_topology + HV_TOPOLOGY_WIDTH_OFFSET) + { + sw r0, r2 + addi r0, r0, (HV_TOPOLOGY_HEIGHT_OFFSET - HV_TOPOLOGY_WIDTH_OFFSET) + } + sw r0, r3 +1: +#else + move r5, zero +#endif + + /* Load and go with the correct pc and sp. */ + { + addli r1, r5, lo16(boot_sp) + addli r0, r5, lo16(boot_pc) + } + { + auli r1, r1, ha16(boot_sp) + auli r0, r0, ha16(boot_pc) + } + lw r0, r0 + lw sp, r1 + or r4, sp, r4 + mtspr SPR_SYSTEM_SAVE_K_0, r4 /* save ksp0 + cpu */ + addi sp, sp, -STACK_TOP_DELTA + { + move lr, zero /* stop backtraces in the called function */ + jr r0 + } + ENDPROC(_start) + +__PAGE_ALIGNED_BSS + .align PAGE_SIZE +ENTRY(empty_zero_page) + .fill PAGE_SIZE,1,0 + END(empty_zero_page) + + .macro PTE va, cpa, bits1, no_org=0 + .ifeq \no_org + .org swapper_pg_dir + HV_L1_INDEX(\va) * HV_PTE_SIZE + .endif + .word HV_PTE_PAGE | HV_PTE_DIRTY | HV_PTE_PRESENT | HV_PTE_ACCESSED | \ + (HV_PTE_MODE_CACHE_NO_L3 << HV_PTE_INDEX_MODE) + .word (\bits1) | (HV_CPA_TO_PFN(\cpa) << (HV_PTE_INDEX_PFN - 32)) + .endm + +__PAGE_ALIGNED_DATA + .align PAGE_SIZE +ENTRY(swapper_pg_dir) + /* + * All data pages from PAGE_OFFSET to MEM_USER_INTRPT are mapped as + * VA = PA + PAGE_OFFSET. We remap things with more precise access + * permissions and more respect for size of RAM later. + */ + .set addr, 0 + .rept (MEM_USER_INTRPT - PAGE_OFFSET) >> PGDIR_SHIFT + PTE addr + PAGE_OFFSET, addr, (1 << (HV_PTE_INDEX_READABLE - 32)) | \ + (1 << (HV_PTE_INDEX_WRITABLE - 32)) + .set addr, addr + PGDIR_SIZE + .endr + + /* The true text VAs are mapped as VA = PA + MEM_SV_INTRPT */ + PTE MEM_SV_INTRPT, 0, (1 << (HV_PTE_INDEX_READABLE - 32)) | \ + (1 << (HV_PTE_INDEX_EXECUTABLE - 32)) + .org swapper_pg_dir + HV_L1_SIZE + END(swapper_pg_dir) + + /* + * Isolate swapper_pgprot to its own cache line, since each cpu + * starting up will read it using VA-is-PA and local homing. + * This would otherwise likely conflict with other data on the cache + * line, once we have set its permanent home in the page tables. + */ + __INITDATA + .align CHIP_L2_LINE_SIZE() +ENTRY(swapper_pgprot) + PTE 0, 0, (1 << (HV_PTE_INDEX_READABLE - 32)) | \ + (1 << (HV_PTE_INDEX_WRITABLE - 32)), 1 + .align CHIP_L2_LINE_SIZE() + END(swapper_pgprot) diff --git a/arch/tile/kernel/head_64.S b/arch/tile/kernel/head_64.S new file mode 100644 index 00000000..6bc3a932 --- /dev/null +++ b/arch/tile/kernel/head_64.S @@ -0,0 +1,269 @@ +/* + * Copyright 2011 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * TILE startup code. + */ + +#include <linux/linkage.h> +#include <linux/init.h> +#include <asm/page.h> +#include <asm/pgtable.h> +#include <asm/thread_info.h> +#include <asm/processor.h> +#include <asm/asm-offsets.h> +#include <hv/hypervisor.h> +#include <arch/chip.h> +#include <arch/spr_def.h> + +/* + * This module contains the entry code for kernel images. It performs the + * minimal setup needed to call the generic C routines. + */ + + __HEAD +ENTRY(_start) + /* Notify the hypervisor of what version of the API we want */ + { + movei r1, TILE_CHIP + movei r2, TILE_CHIP_REV + } + { + moveli r0, _HV_VERSION + jal hv_init + } + /* Get a reasonable default ASID in r0 */ + { + move r0, zero + jal hv_inquire_asid + } + + /* + * Install the default page table. The relocation required to + * statically define the table is a bit too complex, so we have + * to plug in the pointer from the L0 to the L1 table by hand. + * We only do this on the first cpu to boot, though, since the + * other CPUs should see a properly-constructed page table. + */ + { + v4int_l r2, zero, r0 /* ASID for hv_install_context */ + moveli r4, hw1_last(swapper_pgprot - PAGE_OFFSET) + } + { + shl16insli r4, r4, hw0(swapper_pgprot - PAGE_OFFSET) + } + { + ld r1, r4 /* access_pte for hv_install_context */ + } + { + moveli r0, hw1_last(.Lsv_data_pmd - PAGE_OFFSET) + moveli r6, hw1_last(temp_data_pmd - PAGE_OFFSET) + } + { + /* After initializing swapper_pgprot, HV_PTE_GLOBAL is set. */ + bfextu r7, r1, HV_PTE_INDEX_GLOBAL, HV_PTE_INDEX_GLOBAL + inv r4 + } + bnez r7, .Lno_write + { + shl16insli r0, r0, hw0(.Lsv_data_pmd - PAGE_OFFSET) + shl16insli r6, r6, hw0(temp_data_pmd - PAGE_OFFSET) + } + { + /* Cut off the low bits of the PT address. */ + shrui r6, r6, HV_LOG2_PAGE_TABLE_ALIGN + /* Start with our access pte. */ + move r5, r1 + } + { + /* Stuff the address into the page table pointer slot of the PTE. */ + bfins r5, r6, HV_PTE_INDEX_PTFN, \ + HV_PTE_INDEX_PTFN + HV_PTE_PTFN_BITS - 1 + } + { + /* Store the L0 data PTE. */ + st r0, r5 + addli r6, r6, (temp_code_pmd - temp_data_pmd) >> \ + HV_LOG2_PAGE_TABLE_ALIGN + } + { + addli r0, r0, .Lsv_code_pmd - .Lsv_data_pmd + bfins r5, r6, HV_PTE_INDEX_PTFN, \ + HV_PTE_INDEX_PTFN + HV_PTE_PTFN_BITS - 1 + } + /* Store the L0 code PTE. */ + st r0, r5 + +.Lno_write: + moveli lr, hw2_last(1f) + { + shl16insli lr, lr, hw1(1f) + moveli r0, hw1_last(swapper_pg_dir - PAGE_OFFSET) + } + { + shl16insli lr, lr, hw0(1f) + shl16insli r0, r0, hw0(swapper_pg_dir - PAGE_OFFSET) + } + { + move r3, zero + j hv_install_context + } +1: + + /* Install the interrupt base. */ + moveli r0, hw2_last(MEM_SV_START) + shl16insli r0, r0, hw1(MEM_SV_START) + shl16insli r0, r0, hw0(MEM_SV_START) + mtspr SPR_INTERRUPT_VECTOR_BASE_K, r0 + + /* + * Get our processor number and save it away in SAVE_K_0. + * Extract stuff from the topology structure: r4 = y, r6 = x, + * r5 = width. FIXME: consider whether we want to just make these + * 64-bit values (and if so fix smp_topology write below, too). + */ + jal hv_inquire_topology + { + v4int_l r5, zero, r1 /* r5 = width */ + shrui r4, r0, 32 /* r4 = y */ + } + { + v4int_l r6, zero, r0 /* r6 = x */ + mul_lu_lu r4, r4, r5 + } + { + add r4, r4, r6 /* r4 == cpu == y*width + x */ + } + +#ifdef CONFIG_SMP + /* + * Load up our per-cpu offset. When the first (master) tile + * boots, this value is still zero, so we will load boot_pc + * with start_kernel, and boot_sp with init_stack + THREAD_SIZE. + * The master tile initializes the per-cpu offset array, so that + * when subsequent (secondary) tiles boot, they will instead load + * from their per-cpu versions of boot_sp and boot_pc. + */ + moveli r5, hw2_last(__per_cpu_offset) + shl16insli r5, r5, hw1(__per_cpu_offset) + shl16insli r5, r5, hw0(__per_cpu_offset) + shl3add r5, r4, r5 + ld r5, r5 + bnez r5, 1f + + /* + * Save the width and height to the smp_topology variable + * for later use. + */ + moveli r0, hw2_last(smp_topology + HV_TOPOLOGY_WIDTH_OFFSET) + shl16insli r0, r0, hw1(smp_topology + HV_TOPOLOGY_WIDTH_OFFSET) + shl16insli r0, r0, hw0(smp_topology + HV_TOPOLOGY_WIDTH_OFFSET) + st r0, r1 +1: +#else + move r5, zero +#endif + + /* Load and go with the correct pc and sp. */ + { + moveli r1, hw2_last(boot_sp) + moveli r0, hw2_last(boot_pc) + } + { + shl16insli r1, r1, hw1(boot_sp) + shl16insli r0, r0, hw1(boot_pc) + } + { + shl16insli r1, r1, hw0(boot_sp) + shl16insli r0, r0, hw0(boot_pc) + } + { + add r1, r1, r5 + add r0, r0, r5 + } + ld r0, r0 + ld sp, r1 + or r4, sp, r4 + mtspr SPR_SYSTEM_SAVE_K_0, r4 /* save ksp0 + cpu */ + addi sp, sp, -STACK_TOP_DELTA + { + move lr, zero /* stop backtraces in the called function */ + jr r0 + } + ENDPROC(_start) + +__PAGE_ALIGNED_BSS + .align PAGE_SIZE +ENTRY(empty_zero_page) + .fill PAGE_SIZE,1,0 + END(empty_zero_page) + + .macro PTE cpa, bits1 + .quad HV_PTE_PAGE | HV_PTE_DIRTY | HV_PTE_PRESENT | HV_PTE_ACCESSED |\ + HV_PTE_GLOBAL | (HV_PTE_MODE_CACHE_NO_L3 << HV_PTE_INDEX_MODE) |\ + (\bits1) | (HV_CPA_TO_PFN(\cpa) << HV_PTE_INDEX_PFN) + .endm + +__PAGE_ALIGNED_DATA + .align PAGE_SIZE +ENTRY(swapper_pg_dir) + .org swapper_pg_dir + HV_L0_INDEX(PAGE_OFFSET) * HV_PTE_SIZE +.Lsv_data_pmd: + .quad 0 /* PTE temp_data_pmd - PAGE_OFFSET, 0 */ + .org swapper_pg_dir + HV_L0_INDEX(MEM_SV_START) * HV_PTE_SIZE +.Lsv_code_pmd: + .quad 0 /* PTE temp_code_pmd - PAGE_OFFSET, 0 */ + .org swapper_pg_dir + HV_L0_SIZE + END(swapper_pg_dir) + + .align HV_PAGE_TABLE_ALIGN +ENTRY(temp_data_pmd) + /* + * We fill the PAGE_OFFSET pmd with huge pages with + * VA = PA + PAGE_OFFSET. We remap things with more precise access + * permissions later. + */ + .set addr, 0 + .rept HV_L1_ENTRIES + PTE addr, HV_PTE_READABLE | HV_PTE_WRITABLE + .set addr, addr + HV_PAGE_SIZE_LARGE + .endr + .org temp_data_pmd + HV_L1_SIZE + END(temp_data_pmd) + + .align HV_PAGE_TABLE_ALIGN +ENTRY(temp_code_pmd) + /* + * We fill the MEM_SV_START pmd with huge pages with + * VA = PA + PAGE_OFFSET. We remap things with more precise access + * permissions later. + */ + .set addr, 0 + .rept HV_L1_ENTRIES + PTE addr, HV_PTE_READABLE | HV_PTE_EXECUTABLE + .set addr, addr + HV_PAGE_SIZE_LARGE + .endr + .org temp_code_pmd + HV_L1_SIZE + END(temp_code_pmd) + + /* + * Isolate swapper_pgprot to its own cache line, since each cpu + * starting up will read it using VA-is-PA and local homing. + * This would otherwise likely conflict with other data on the cache + * line, once we have set its permanent home in the page tables. + */ + __INITDATA + .align CHIP_L2_LINE_SIZE() +ENTRY(swapper_pgprot) + .quad HV_PTE_PRESENT | (HV_PTE_MODE_CACHE_NO_L3 << HV_PTE_INDEX_MODE) + .align CHIP_L2_LINE_SIZE() + END(swapper_pgprot) diff --git a/arch/tile/kernel/hvglue.lds b/arch/tile/kernel/hvglue.lds new file mode 100644 index 00000000..2b7cd0a6 --- /dev/null +++ b/arch/tile/kernel/hvglue.lds @@ -0,0 +1,58 @@ +/* Hypervisor call vector addresses; see <hv/hypervisor.h> */ +hv_init = TEXT_OFFSET + 0x10020; +hv_install_context = TEXT_OFFSET + 0x10040; +hv_sysconf = TEXT_OFFSET + 0x10060; +hv_get_rtc = TEXT_OFFSET + 0x10080; +hv_set_rtc = TEXT_OFFSET + 0x100a0; +hv_flush_asid = TEXT_OFFSET + 0x100c0; +hv_flush_page = TEXT_OFFSET + 0x100e0; +hv_flush_pages = TEXT_OFFSET + 0x10100; +hv_restart = TEXT_OFFSET + 0x10120; +hv_halt = TEXT_OFFSET + 0x10140; +hv_power_off = TEXT_OFFSET + 0x10160; +hv_inquire_physical = TEXT_OFFSET + 0x10180; +hv_inquire_memory_controller = TEXT_OFFSET + 0x101a0; +hv_inquire_virtual = TEXT_OFFSET + 0x101c0; +hv_inquire_asid = TEXT_OFFSET + 0x101e0; +hv_nanosleep = TEXT_OFFSET + 0x10200; +hv_console_read_if_ready = TEXT_OFFSET + 0x10220; +hv_console_write = TEXT_OFFSET + 0x10240; +hv_downcall_dispatch = TEXT_OFFSET + 0x10260; +hv_inquire_topology = TEXT_OFFSET + 0x10280; +hv_fs_findfile = TEXT_OFFSET + 0x102a0; +hv_fs_fstat = TEXT_OFFSET + 0x102c0; +hv_fs_pread = TEXT_OFFSET + 0x102e0; +hv_physaddr_read64 = TEXT_OFFSET + 0x10300; +hv_physaddr_write64 = TEXT_OFFSET + 0x10320; +hv_get_command_line = TEXT_OFFSET + 0x10340; +hv_set_caching = TEXT_OFFSET + 0x10360; +hv_bzero_page = TEXT_OFFSET + 0x10380; +hv_register_message_state = TEXT_OFFSET + 0x103a0; +hv_send_message = TEXT_OFFSET + 0x103c0; +hv_receive_message = TEXT_OFFSET + 0x103e0; +hv_inquire_context = TEXT_OFFSET + 0x10400; +hv_start_all_tiles = TEXT_OFFSET + 0x10420; +hv_dev_open = TEXT_OFFSET + 0x10440; +hv_dev_close = TEXT_OFFSET + 0x10460; +hv_dev_pread = TEXT_OFFSET + 0x10480; +hv_dev_pwrite = TEXT_OFFSET + 0x104a0; +hv_dev_poll = TEXT_OFFSET + 0x104c0; +hv_dev_poll_cancel = TEXT_OFFSET + 0x104e0; +hv_dev_preada = TEXT_OFFSET + 0x10500; +hv_dev_pwritea = TEXT_OFFSET + 0x10520; +hv_flush_remote = TEXT_OFFSET + 0x10540; +hv_console_putc = TEXT_OFFSET + 0x10560; +hv_inquire_tiles = TEXT_OFFSET + 0x10580; +hv_confstr = TEXT_OFFSET + 0x105a0; +hv_reexec = TEXT_OFFSET + 0x105c0; +hv_set_command_line = TEXT_OFFSET + 0x105e0; +hv_clear_intr = TEXT_OFFSET + 0x10600; +hv_enable_intr = TEXT_OFFSET + 0x10620; +hv_disable_intr = TEXT_OFFSET + 0x10640; +hv_raise_intr = TEXT_OFFSET + 0x10660; +hv_trigger_ipi = TEXT_OFFSET + 0x10680; +hv_store_mapping = TEXT_OFFSET + 0x106a0; +hv_inquire_realpa = TEXT_OFFSET + 0x106c0; +hv_flush_all = TEXT_OFFSET + 0x106e0; +hv_get_ipi_pte = TEXT_OFFSET + 0x10700; +hv_glue_internals = TEXT_OFFSET + 0x10720; diff --git a/arch/tile/kernel/init_task.c b/arch/tile/kernel/init_task.c new file mode 100644 index 00000000..928b3187 --- /dev/null +++ b/arch/tile/kernel/init_task.c @@ -0,0 +1,59 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#include <linux/mm.h> +#include <linux/fs.h> +#include <linux/init_task.h> +#include <linux/mqueue.h> +#include <linux/module.h> +#include <linux/start_kernel.h> +#include <linux/uaccess.h> + +static struct signal_struct init_signals = INIT_SIGNALS(init_signals); +static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); + +/* + * Initial thread structure. + * + * We need to make sure that this is THREAD_SIZE aligned due to the + * way process stacks are handled. This is done by having a special + * "init_task" linker map entry.. + */ +union thread_union init_thread_union __init_task_data = { + INIT_THREAD_INFO(init_task) +}; + +/* + * Initial task structure. + * + * All other task structs will be allocated on slabs in fork.c + */ +struct task_struct init_task = INIT_TASK(init_task); +EXPORT_SYMBOL(init_task); + +/* + * per-CPU stack and boot info. + */ +DEFINE_PER_CPU(unsigned long, boot_sp) = + (unsigned long)init_stack + THREAD_SIZE; + +#ifdef CONFIG_SMP +DEFINE_PER_CPU(unsigned long, boot_pc) = (unsigned long)start_kernel; +#else +/* + * The variable must be __initdata since it references __init code. + * With CONFIG_SMP it is per-cpu data, which is exempt from validation. + */ +unsigned long __initdata boot_pc = (unsigned long)start_kernel; +#endif diff --git a/arch/tile/kernel/intvec_32.S b/arch/tile/kernel/intvec_32.S new file mode 100644 index 00000000..69435151 --- /dev/null +++ b/arch/tile/kernel/intvec_32.S @@ -0,0 +1,1944 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * Linux interrupt vectors. + */ + +#include <linux/linkage.h> +#include <linux/errno.h> +#include <linux/init.h> +#include <linux/unistd.h> +#include <asm/ptrace.h> +#include <asm/thread_info.h> +#include <asm/irqflags.h> +#include <asm/atomic_32.h> +#include <asm/asm-offsets.h> +#include <hv/hypervisor.h> +#include <arch/abi.h> +#include <arch/interrupts.h> +#include <arch/spr_def.h> + +#ifdef CONFIG_PREEMPT +# error "No support for kernel preemption currently" +#endif + +#define PTREGS_PTR(reg, ptreg) addli reg, sp, C_ABI_SAVE_AREA_SIZE + (ptreg) + +#define PTREGS_OFFSET_SYSCALL PTREGS_OFFSET_REG(TREG_SYSCALL_NR) + +#if !CHIP_HAS_WH64() + /* By making this an empty macro, we can use wh64 in the code. */ + .macro wh64 reg + .endm +#endif + + .macro push_reg reg, ptr=sp, delta=-4 + { + sw \ptr, \reg + addli \ptr, \ptr, \delta + } + .endm + + .macro pop_reg reg, ptr=sp, delta=4 + { + lw \reg, \ptr + addli \ptr, \ptr, \delta + } + .endm + + .macro pop_reg_zero reg, zreg, ptr=sp, delta=4 + { + move \zreg, zero + lw \reg, \ptr + addi \ptr, \ptr, \delta + } + .endm + + .macro push_extra_callee_saves reg + PTREGS_PTR(\reg, PTREGS_OFFSET_REG(51)) + push_reg r51, \reg + push_reg r50, \reg + push_reg r49, \reg + push_reg r48, \reg + push_reg r47, \reg + push_reg r46, \reg + push_reg r45, \reg + push_reg r44, \reg + push_reg r43, \reg + push_reg r42, \reg + push_reg r41, \reg + push_reg r40, \reg + push_reg r39, \reg + push_reg r38, \reg + push_reg r37, \reg + push_reg r36, \reg + push_reg r35, \reg + push_reg r34, \reg, PTREGS_OFFSET_BASE - PTREGS_OFFSET_REG(34) + .endm + + .macro panic str + .pushsection .rodata, "a" +1: + .asciz "\str" + .popsection + { + moveli r0, lo16(1b) + } + { + auli r0, r0, ha16(1b) + jal panic + } + .endm + +#ifdef __COLLECT_LINKER_FEEDBACK__ + .pushsection .text.intvec_feedback,"ax" +intvec_feedback: + .popsection +#endif + + /* + * Default interrupt handler. + * + * vecnum is where we'll put this code. + * c_routine is the C routine we'll call. + * + * The C routine is passed two arguments: + * - A pointer to the pt_regs state. + * - The interrupt vector number. + * + * The "processing" argument specifies the code for processing + * the interrupt. Defaults to "handle_interrupt". + */ + .macro int_hand vecnum, vecname, c_routine, processing=handle_interrupt + .org (\vecnum << 8) +intvec_\vecname: + .ifc \vecnum, INT_SWINT_1 + blz TREG_SYSCALL_NR_NAME, sys_cmpxchg + .endif + + /* Temporarily save a register so we have somewhere to work. */ + + mtspr SPR_SYSTEM_SAVE_K_1, r0 + mfspr r0, SPR_EX_CONTEXT_K_1 + + /* The cmpxchg code clears sp to force us to reset it here on fault. */ + { + bz sp, 2f + andi r0, r0, SPR_EX_CONTEXT_1_1__PL_MASK /* mask off ICS */ + } + + .ifc \vecnum, INT_DOUBLE_FAULT + /* + * For double-faults from user-space, fall through to the normal + * register save and stack setup path. Otherwise, it's the + * hypervisor giving us one last chance to dump diagnostics, and we + * branch to the kernel_double_fault routine to do so. + */ + bz r0, 1f + j _kernel_double_fault +1: + .else + /* + * If we're coming from user-space, then set sp to the top of + * the kernel stack. Otherwise, assume sp is already valid. + */ + { + bnz r0, 0f + move r0, sp + } + .endif + + .ifc \c_routine, do_page_fault + /* + * The page_fault handler may be downcalled directly by the + * hypervisor even when Linux is running and has ICS set. + * + * In this case the contents of EX_CONTEXT_K_1 reflect the + * previous fault and can't be relied on to choose whether or + * not to reinitialize the stack pointer. So we add a test + * to see whether SYSTEM_SAVE_K_2 has the high bit set, + * and if so we don't reinitialize sp, since we must be coming + * from Linux. (In fact the precise case is !(val & ~1), + * but any Linux PC has to have the high bit set.) + * + * Note that the hypervisor *always* sets SYSTEM_SAVE_K_2 for + * any path that turns into a downcall to one of our TLB handlers. + */ + mfspr r0, SPR_SYSTEM_SAVE_K_2 + { + blz r0, 0f /* high bit in S_S_1_2 is for a PC to use */ + move r0, sp + } + .endif + +2: + /* + * SYSTEM_SAVE_K_0 holds the cpu number in the low bits, and + * the current stack top in the higher bits. So we recover + * our stack top by just masking off the low bits, then + * point sp at the top aligned address on the actual stack page. + */ + mfspr r0, SPR_SYSTEM_SAVE_K_0 + mm r0, r0, zero, LOG2_THREAD_SIZE, 31 + +0: + /* + * Align the stack mod 64 so we can properly predict what + * cache lines we need to write-hint to reduce memory fetch + * latency as we enter the kernel. The layout of memory is + * as follows, with cache line 0 at the lowest VA, and cache + * line 4 just below the r0 value this "andi" computes. + * Note that we never write to cache line 4, and we skip + * cache line 1 for syscalls. + * + * cache line 4: ptregs padding (two words) + * cache line 3: r46...lr, pc, ex1, faultnum, orig_r0, flags, pad + * cache line 2: r30...r45 + * cache line 1: r14...r29 + * cache line 0: 2 x frame, r0..r13 + */ + andi r0, r0, -64 + + /* + * Push the first four registers on the stack, so that we can set + * them to vector-unique values before we jump to the common code. + * + * Registers are pushed on the stack as a struct pt_regs, + * with the sp initially just above the struct, and when we're + * done, sp points to the base of the struct, minus + * C_ABI_SAVE_AREA_SIZE, so we can directly jal to C code. + * + * This routine saves just the first four registers, plus the + * stack context so we can do proper backtracing right away, + * and defers to handle_interrupt to save the rest. + * The backtracer needs pc, ex1, lr, sp, r52, and faultnum. + */ + addli r0, r0, PTREGS_OFFSET_LR - (PTREGS_SIZE + KSTK_PTREGS_GAP) + wh64 r0 /* cache line 3 */ + { + sw r0, lr + addli r0, r0, PTREGS_OFFSET_SP - PTREGS_OFFSET_LR + } + { + sw r0, sp + addli sp, r0, PTREGS_OFFSET_REG(52) - PTREGS_OFFSET_SP + } + { + sw sp, r52 + addli sp, sp, PTREGS_OFFSET_REG(1) - PTREGS_OFFSET_REG(52) + } + wh64 sp /* cache line 0 */ + { + sw sp, r1 + addli sp, sp, PTREGS_OFFSET_REG(2) - PTREGS_OFFSET_REG(1) + } + { + sw sp, r2 + addli sp, sp, PTREGS_OFFSET_REG(3) - PTREGS_OFFSET_REG(2) + } + { + sw sp, r3 + addli sp, sp, PTREGS_OFFSET_PC - PTREGS_OFFSET_REG(3) + } + mfspr r0, SPR_EX_CONTEXT_K_0 + .ifc \processing,handle_syscall + /* + * Bump the saved PC by one bundle so that when we return, we won't + * execute the same swint instruction again. We need to do this while + * we're in the critical section. + */ + addi r0, r0, 8 + .endif + { + sw sp, r0 + addli sp, sp, PTREGS_OFFSET_EX1 - PTREGS_OFFSET_PC + } + mfspr r0, SPR_EX_CONTEXT_K_1 + { + sw sp, r0 + addi sp, sp, PTREGS_OFFSET_FAULTNUM - PTREGS_OFFSET_EX1 + /* + * Use r0 for syscalls so it's a temporary; use r1 for interrupts + * so that it gets passed through unchanged to the handler routine. + * Note that the .if conditional confusingly spans bundles. + */ + .ifc \processing,handle_syscall + movei r0, \vecnum + } + { + sw sp, r0 + .else + movei r1, \vecnum + } + { + sw sp, r1 + .endif + addli sp, sp, PTREGS_OFFSET_REG(0) - PTREGS_OFFSET_FAULTNUM + } + mfspr r0, SPR_SYSTEM_SAVE_K_1 /* Original r0 */ + { + sw sp, r0 + addi sp, sp, -PTREGS_OFFSET_REG(0) - 4 + } + { + sw sp, zero /* write zero into "Next SP" frame pointer */ + addi sp, sp, -4 /* leave SP pointing at bottom of frame */ + } + .ifc \processing,handle_syscall + j handle_syscall + .else + /* + * Capture per-interrupt SPR context to registers. + * We overload the meaning of r3 on this path such that if its bit 31 + * is set, we have to mask all interrupts including NMIs before + * clearing the interrupt critical section bit. + * See discussion below at "finish_interrupt_save". + */ + .ifc \c_routine, do_page_fault + mfspr r2, SPR_SYSTEM_SAVE_K_3 /* address of page fault */ + mfspr r3, SPR_SYSTEM_SAVE_K_2 /* info about page fault */ + .else + .ifc \vecnum, INT_DOUBLE_FAULT + { + mfspr r2, SPR_SYSTEM_SAVE_K_2 /* double fault info from HV */ + movei r3, 0 + } + .else + .ifc \c_routine, do_trap + { + mfspr r2, GPV_REASON + movei r3, 0 + } + .else + .ifc \c_routine, op_handle_perf_interrupt + { + mfspr r2, PERF_COUNT_STS + movei r3, -1 /* not used, but set for consistency */ + } + .else +#if CHIP_HAS_AUX_PERF_COUNTERS() + .ifc \c_routine, op_handle_aux_perf_interrupt + { + mfspr r2, AUX_PERF_COUNT_STS + movei r3, -1 /* not used, but set for consistency */ + } + .else +#endif + movei r3, 0 +#if CHIP_HAS_AUX_PERF_COUNTERS() + .endif +#endif + .endif + .endif + .endif + .endif + /* Put function pointer in r0 */ + moveli r0, lo16(\c_routine) + { + auli r0, r0, ha16(\c_routine) + j \processing + } + .endif + ENDPROC(intvec_\vecname) + +#ifdef __COLLECT_LINKER_FEEDBACK__ + .pushsection .text.intvec_feedback,"ax" + .org (\vecnum << 5) + FEEDBACK_ENTER_EXPLICIT(intvec_\vecname, .intrpt1, 1 << 8) + jrp lr + .popsection +#endif + + .endm + + + /* + * Save the rest of the registers that we didn't save in the actual + * vector itself. We can't use r0-r10 inclusive here. + */ + .macro finish_interrupt_save, function + + /* If it's a syscall, save a proper orig_r0, otherwise just zero. */ + PTREGS_PTR(r52, PTREGS_OFFSET_ORIG_R0) + { + .ifc \function,handle_syscall + sw r52, r0 + .else + sw r52, zero + .endif + PTREGS_PTR(r52, PTREGS_OFFSET_TP) + } + + /* + * For ordinary syscalls, we save neither caller- nor callee- + * save registers, since the syscall invoker doesn't expect the + * caller-saves to be saved, and the called kernel functions will + * take care of saving the callee-saves for us. + * + * For interrupts we save just the caller-save registers. Saving + * them is required (since the "caller" can't save them). Again, + * the called kernel functions will restore the callee-save + * registers for us appropriately. + * + * On return, we normally restore nothing special for syscalls, + * and just the caller-save registers for interrupts. + * + * However, there are some important caveats to all this: + * + * - We always save a few callee-save registers to give us + * some scratchpad registers to carry across function calls. + * + * - fork/vfork/etc require us to save all the callee-save + * registers, which we do in PTREGS_SYSCALL_ALL_REGS, below. + * + * - We always save r0..r5 and r10 for syscalls, since we need + * to reload them a bit later for the actual kernel call, and + * since we might need them for -ERESTARTNOINTR, etc. + * + * - Before invoking a signal handler, we save the unsaved + * callee-save registers so they are visible to the + * signal handler or any ptracer. + * + * - If the unsaved callee-save registers are modified, we set + * a bit in pt_regs so we know to reload them from pt_regs + * and not just rely on the kernel function unwinding. + * (Done for ptrace register writes and SA_SIGINFO handler.) + */ + { + sw r52, tp + PTREGS_PTR(r52, PTREGS_OFFSET_REG(33)) + } + wh64 r52 /* cache line 2 */ + push_reg r33, r52 + push_reg r32, r52 + push_reg r31, r52 + .ifc \function,handle_syscall + push_reg r30, r52, PTREGS_OFFSET_SYSCALL - PTREGS_OFFSET_REG(30) + push_reg TREG_SYSCALL_NR_NAME, r52, \ + PTREGS_OFFSET_REG(5) - PTREGS_OFFSET_SYSCALL + .else + + push_reg r30, r52, PTREGS_OFFSET_REG(29) - PTREGS_OFFSET_REG(30) + wh64 r52 /* cache line 1 */ + push_reg r29, r52 + push_reg r28, r52 + push_reg r27, r52 + push_reg r26, r52 + push_reg r25, r52 + push_reg r24, r52 + push_reg r23, r52 + push_reg r22, r52 + push_reg r21, r52 + push_reg r20, r52 + push_reg r19, r52 + push_reg r18, r52 + push_reg r17, r52 + push_reg r16, r52 + push_reg r15, r52 + push_reg r14, r52 + push_reg r13, r52 + push_reg r12, r52 + push_reg r11, r52 + push_reg r10, r52 + push_reg r9, r52 + push_reg r8, r52 + push_reg r7, r52 + push_reg r6, r52 + + .endif + + push_reg r5, r52 + sw r52, r4 + + /* Load tp with our per-cpu offset. */ +#ifdef CONFIG_SMP + { + mfspr r20, SPR_SYSTEM_SAVE_K_0 + moveli r21, lo16(__per_cpu_offset) + } + { + auli r21, r21, ha16(__per_cpu_offset) + mm r20, r20, zero, 0, LOG2_THREAD_SIZE-1 + } + s2a r20, r20, r21 + lw tp, r20 +#else + move tp, zero +#endif + + /* + * If we will be returning to the kernel, we will need to + * reset the interrupt masks to the state they had before. + * Set DISABLE_IRQ in flags iff we came from PL1 with irqs disabled. + * We load flags in r32 here so we can jump to .Lrestore_regs + * directly after do_page_fault_ics() if necessary. + */ + mfspr r32, SPR_EX_CONTEXT_K_1 + { + andi r32, r32, SPR_EX_CONTEXT_1_1__PL_MASK /* mask off ICS */ + PTREGS_PTR(r21, PTREGS_OFFSET_FLAGS) + } + bzt r32, 1f /* zero if from user space */ + IRQS_DISABLED(r32) /* zero if irqs enabled */ +#if PT_FLAGS_DISABLE_IRQ != 1 +# error Value of IRQS_DISABLED used to set PT_FLAGS_DISABLE_IRQ; fix +#endif +1: + .ifnc \function,handle_syscall + /* Record the fact that we saved the caller-save registers above. */ + ori r32, r32, PT_FLAGS_CALLER_SAVES + .endif + sw r21, r32 + +#ifdef __COLLECT_LINKER_FEEDBACK__ + /* + * Notify the feedback routines that we were in the + * appropriate fixed interrupt vector area. Note that we + * still have ICS set at this point, so we can't invoke any + * atomic operations or we will panic. The feedback + * routines internally preserve r0..r10 and r30 up. + */ + .ifnc \function,handle_syscall + shli r20, r1, 5 + .else + moveli r20, INT_SWINT_1 << 5 + .endif + addli r20, r20, lo16(intvec_feedback) + auli r20, r20, ha16(intvec_feedback) + jalr r20 + + /* And now notify the feedback routines that we are here. */ + FEEDBACK_ENTER(\function) +#endif + + /* + * we've captured enough state to the stack (including in + * particular our EX_CONTEXT state) that we can now release + * the interrupt critical section and replace it with our + * standard "interrupts disabled" mask value. This allows + * synchronous interrupts (and profile interrupts) to punch + * through from this point onwards. + * + * If bit 31 of r3 is set during a non-NMI interrupt, we know we + * are on the path where the hypervisor has punched through our + * ICS with a page fault, so we call out to do_page_fault_ics() + * to figure out what to do with it. If the fault was in + * an atomic op, we unlock the atomic lock, adjust the + * saved register state a little, and return "zero" in r4, + * falling through into the normal page-fault interrupt code. + * If the fault was in a kernel-space atomic operation, then + * do_page_fault_ics() resolves it itself, returns "one" in r4, + * and as a result goes directly to restoring registers and iret, + * without trying to adjust the interrupt masks at all. + * The do_page_fault_ics() API involves passing and returning + * a five-word struct (in registers) to avoid writing the + * save and restore code here. + */ + .ifc \function,handle_nmi + IRQ_DISABLE_ALL(r20) + .else + .ifnc \function,handle_syscall + bgezt r3, 1f + { + PTREGS_PTR(r0, PTREGS_OFFSET_BASE) + jal do_page_fault_ics + } + FEEDBACK_REENTER(\function) + bzt r4, 1f + j .Lrestore_regs +1: + .endif + IRQ_DISABLE(r20, r21) + .endif + mtspr INTERRUPT_CRITICAL_SECTION, zero + +#if CHIP_HAS_WH64() + /* + * Prepare the first 256 stack bytes to be rapidly accessible + * without having to fetch the background data. We don't really + * know how far to write-hint, but kernel stacks generally + * aren't that big, and write-hinting here does take some time. + */ + addi r52, sp, -64 + { + wh64 r52 + addi r52, r52, -64 + } + { + wh64 r52 + addi r52, r52, -64 + } + { + wh64 r52 + addi r52, r52, -64 + } + wh64 r52 +#endif + +#ifdef CONFIG_TRACE_IRQFLAGS + .ifnc \function,handle_nmi + /* + * We finally have enough state set up to notify the irq + * tracing code that irqs were disabled on entry to the handler. + * The TRACE_IRQS_OFF call clobbers registers r0-r29. + * For syscalls, we already have the register state saved away + * on the stack, so we don't bother to do any register saves here, + * and later we pop the registers back off the kernel stack. + * For interrupt handlers, save r0-r3 in callee-saved registers. + */ + .ifnc \function,handle_syscall + { move r30, r0; move r31, r1 } + { move r32, r2; move r33, r3 } + .endif + TRACE_IRQS_OFF + .ifnc \function,handle_syscall + { move r0, r30; move r1, r31 } + { move r2, r32; move r3, r33 } + .endif + .endif +#endif + + .endm + + .macro check_single_stepping, kind, not_single_stepping + /* + * Check for single stepping in user-level priv + * kind can be "normal", "ill", or "syscall" + * At end, if fall-thru + * r29: thread_info->step_state + * r28: &pt_regs->pc + * r27: pt_regs->pc + * r26: thread_info->step_state->buffer + */ + + /* Check for single stepping */ + GET_THREAD_INFO(r29) + { + /* Get pointer to field holding step state */ + addi r29, r29, THREAD_INFO_STEP_STATE_OFFSET + + /* Get pointer to EX1 in register state */ + PTREGS_PTR(r27, PTREGS_OFFSET_EX1) + } + { + /* Get pointer to field holding PC */ + PTREGS_PTR(r28, PTREGS_OFFSET_PC) + + /* Load the pointer to the step state */ + lw r29, r29 + } + /* Load EX1 */ + lw r27, r27 + { + /* Points to flags */ + addi r23, r29, SINGLESTEP_STATE_FLAGS_OFFSET + + /* No single stepping if there is no step state structure */ + bzt r29, \not_single_stepping + } + { + /* mask off ICS and any other high bits */ + andi r27, r27, SPR_EX_CONTEXT_1_1__PL_MASK + + /* Load pointer to single step instruction buffer */ + lw r26, r29 + } + /* Check priv state */ + bnz r27, \not_single_stepping + + /* Get flags */ + lw r22, r23 + { + /* Branch if single-step mode not enabled */ + bbnst r22, \not_single_stepping + + /* Clear enabled flag */ + andi r22, r22, ~SINGLESTEP_STATE_MASK_IS_ENABLED + } + .ifc \kind,normal + { + /* Load PC */ + lw r27, r28 + + /* Point to the entry containing the original PC */ + addi r24, r29, SINGLESTEP_STATE_ORIG_PC_OFFSET + } + { + /* Disable single stepping flag */ + sw r23, r22 + } + { + /* Get the original pc */ + lw r24, r24 + + /* See if the PC is at the start of the single step buffer */ + seq r25, r26, r27 + } + /* + * NOTE: it is really expected that the PC be in the single step buffer + * at this point + */ + bzt r25, \not_single_stepping + + /* Restore the original PC */ + sw r28, r24 + .else + .ifc \kind,syscall + { + /* Load PC */ + lw r27, r28 + + /* Point to the entry containing the next PC */ + addi r24, r29, SINGLESTEP_STATE_NEXT_PC_OFFSET + } + { + /* Increment the stopped PC by the bundle size */ + addi r26, r26, 8 + + /* Disable single stepping flag */ + sw r23, r22 + } + { + /* Get the next pc */ + lw r24, r24 + + /* + * See if the PC is one bundle past the start of the + * single step buffer + */ + seq r25, r26, r27 + } + { + /* + * NOTE: it is really expected that the PC be in the + * single step buffer at this point + */ + bzt r25, \not_single_stepping + } + /* Set to the next PC */ + sw r28, r24 + .else + { + /* Point to 3rd bundle in buffer */ + addi r25, r26, 16 + + /* Load PC */ + lw r27, r28 + } + { + /* Disable single stepping flag */ + sw r23, r22 + + /* See if the PC is in the single step buffer */ + slte_u r24, r26, r27 + } + { + slte_u r25, r27, r25 + + /* + * NOTE: it is really expected that the PC be in the + * single step buffer at this point + */ + bzt r24, \not_single_stepping + } + bzt r25, \not_single_stepping + .endif + .endif + .endm + + /* + * Redispatch a downcall. + */ + .macro dc_dispatch vecnum, vecname + .org (\vecnum << 8) +intvec_\vecname: + j hv_downcall_dispatch + ENDPROC(intvec_\vecname) + .endm + + /* + * Common code for most interrupts. The C function we're eventually + * going to is in r0, and the faultnum is in r1; the original + * values for those registers are on the stack. + */ + .pushsection .text.handle_interrupt,"ax" +handle_interrupt: + finish_interrupt_save handle_interrupt + + /* + * Check for if we are single stepping in user level. If so, then + * we need to restore the PC. + */ + + check_single_stepping normal, .Ldispatch_interrupt +.Ldispatch_interrupt: + + /* Jump to the C routine; it should enable irqs as soon as possible. */ + { + jalr r0 + PTREGS_PTR(r0, PTREGS_OFFSET_BASE) + } + FEEDBACK_REENTER(handle_interrupt) + { + movei r30, 0 /* not an NMI */ + j interrupt_return + } + STD_ENDPROC(handle_interrupt) + +/* + * This routine takes a boolean in r30 indicating if this is an NMI. + * If so, we also expect a boolean in r31 indicating whether to + * re-enable the oprofile interrupts. + * + * Note that .Lresume_userspace is jumped to directly in several + * places, and we need to make sure r30 is set correctly in those + * callers as well. + */ +STD_ENTRY(interrupt_return) + /* If we're resuming to kernel space, don't check thread flags. */ + { + bnz r30, .Lrestore_all /* NMIs don't special-case user-space */ + PTREGS_PTR(r29, PTREGS_OFFSET_EX1) + } + lw r29, r29 + andi r29, r29, SPR_EX_CONTEXT_1_1__PL_MASK /* mask off ICS */ + { + bzt r29, .Lresume_userspace + PTREGS_PTR(r29, PTREGS_OFFSET_PC) + } + + /* If we're resuming to _cpu_idle_nap, bump PC forward by 8. */ + { + lw r28, r29 + moveli r27, lo16(_cpu_idle_nap) + } + { + auli r27, r27, ha16(_cpu_idle_nap) + } + { + seq r27, r27, r28 + } + { + bbns r27, .Lrestore_all + addi r28, r28, 8 + } + sw r29, r28 + j .Lrestore_all + +.Lresume_userspace: + FEEDBACK_REENTER(interrupt_return) + + /* + * Use r33 to hold whether we have already loaded the callee-saves + * into ptregs. We don't want to do it twice in this loop, since + * then we'd clobber whatever changes are made by ptrace, etc. + * Get base of stack in r32. + */ + { + GET_THREAD_INFO(r32) + movei r33, 0 + } + +.Lretry_work_pending: + /* + * Disable interrupts so as to make sure we don't + * miss an interrupt that sets any of the thread flags (like + * need_resched or sigpending) between sampling and the iret. + * Routines like schedule() or do_signal() may re-enable + * interrupts before returning. + */ + IRQ_DISABLE(r20, r21) + TRACE_IRQS_OFF /* Note: clobbers registers r0-r29 */ + + + /* Check to see if there is any work to do before returning to user. */ + { + addi r29, r32, THREAD_INFO_FLAGS_OFFSET + moveli r1, lo16(_TIF_ALLWORK_MASK) + } + { + lw r29, r29 + auli r1, r1, ha16(_TIF_ALLWORK_MASK) + } + and r1, r29, r1 + bzt r1, .Lrestore_all + + /* + * Make sure we have all the registers saved for signal + * handling, notify-resume, or single-step. Call out to C + * code to figure out exactly what we need to do for each flag bit, + * then if necessary, reload the flags and recheck. + */ + { + PTREGS_PTR(r0, PTREGS_OFFSET_BASE) + bnz r33, 1f + } + push_extra_callee_saves r0 + movei r33, 1 +1: jal do_work_pending + bnz r0, .Lretry_work_pending + + /* + * In the NMI case we + * omit the call to single_process_check_nohz, which normally checks + * to see if we should start or stop the scheduler tick, because + * we can't call arbitrary Linux code from an NMI context. + * We always call the homecache TLB deferral code to re-trigger + * the deferral mechanism. + * + * The other chunk of responsibility this code has is to reset the + * interrupt masks appropriately to reset irqs and NMIs. We have + * to call TRACE_IRQS_OFF and TRACE_IRQS_ON to support all the + * lockdep-type stuff, but we can't set ICS until afterwards, since + * ICS can only be used in very tight chunks of code to avoid + * tripping over various assertions that it is off. + * + * (There is what looks like a window of vulnerability here since + * we might take a profile interrupt between the two SPR writes + * that set the mask, but since we write the low SPR word first, + * and our interrupt entry code checks the low SPR word, any + * profile interrupt will actually disable interrupts in both SPRs + * before returning, which is OK.) + */ +.Lrestore_all: + PTREGS_PTR(r0, PTREGS_OFFSET_EX1) + { + lw r0, r0 + PTREGS_PTR(r32, PTREGS_OFFSET_FLAGS) + } + { + andi r0, r0, SPR_EX_CONTEXT_1_1__PL_MASK + lw r32, r32 + } + bnz r0, 1f + j 2f +#if PT_FLAGS_DISABLE_IRQ != 1 +# error Assuming PT_FLAGS_DISABLE_IRQ == 1 so we can use bbnst below +#endif +1: bbnst r32, 2f + IRQ_DISABLE(r20,r21) + TRACE_IRQS_OFF + movei r0, 1 + mtspr INTERRUPT_CRITICAL_SECTION, r0 + bzt r30, .Lrestore_regs + j 3f +2: TRACE_IRQS_ON + movei r0, 1 + mtspr INTERRUPT_CRITICAL_SECTION, r0 + IRQ_ENABLE(r20, r21) + bzt r30, .Lrestore_regs +3: + + + /* + * We now commit to returning from this interrupt, since we will be + * doing things like setting EX_CONTEXT SPRs and unwinding the stack + * frame. No calls should be made to any other code after this point. + * This code should only be entered with ICS set. + * r32 must still be set to ptregs.flags. + * We launch loads to each cache line separately first, so we can + * get some parallelism out of the memory subsystem. + * We start zeroing caller-saved registers throughout, since + * that will save some cycles if this turns out to be a syscall. + */ +.Lrestore_regs: + FEEDBACK_REENTER(interrupt_return) /* called from elsewhere */ + + /* + * Rotate so we have one high bit and one low bit to test. + * - low bit says whether to restore all the callee-saved registers, + * or just r30-r33, and r52 up. + * - high bit (i.e. sign bit) says whether to restore all the + * caller-saved registers, or just r0. + */ +#if PT_FLAGS_CALLER_SAVES != 2 || PT_FLAGS_RESTORE_REGS != 4 +# error Rotate trick does not work :-) +#endif + { + rli r20, r32, 30 + PTREGS_PTR(sp, PTREGS_OFFSET_REG(0)) + } + + /* + * Load cache lines 0, 2, and 3 in that order, then use + * the last loaded value, which makes it likely that the other + * cache lines have also loaded, at which point we should be + * able to safely read all the remaining words on those cache + * lines without waiting for the memory subsystem. + */ + pop_reg_zero r0, r28, sp, PTREGS_OFFSET_REG(30) - PTREGS_OFFSET_REG(0) + pop_reg_zero r30, r2, sp, PTREGS_OFFSET_PC - PTREGS_OFFSET_REG(30) + pop_reg_zero r21, r3, sp, PTREGS_OFFSET_EX1 - PTREGS_OFFSET_PC + pop_reg_zero lr, r4, sp, PTREGS_OFFSET_REG(52) - PTREGS_OFFSET_EX1 + { + mtspr SPR_EX_CONTEXT_K_0, r21 + move r5, zero + } + { + mtspr SPR_EX_CONTEXT_K_1, lr + andi lr, lr, SPR_EX_CONTEXT_1_1__PL_MASK /* mask off ICS */ + } + + /* Restore callee-saveds that we actually use. */ + pop_reg_zero r52, r6, sp, PTREGS_OFFSET_REG(31) - PTREGS_OFFSET_REG(52) + pop_reg_zero r31, r7 + pop_reg_zero r32, r8 + pop_reg_zero r33, r9, sp, PTREGS_OFFSET_REG(29) - PTREGS_OFFSET_REG(33) + + /* + * If we modified other callee-saveds, restore them now. + * This is rare, but could be via ptrace or signal handler. + */ + { + move r10, zero + bbs r20, .Lrestore_callees + } +.Lcontinue_restore_regs: + + /* Check if we're returning from a syscall. */ + { + move r11, zero + blzt r20, 1f /* no, so go restore callee-save registers */ + } + + /* + * Check if we're returning to userspace. + * Note that if we're not, we don't worry about zeroing everything. + */ + { + addli sp, sp, PTREGS_OFFSET_LR - PTREGS_OFFSET_REG(29) + bnz lr, .Lkernel_return + } + + /* + * On return from syscall, we've restored r0 from pt_regs, but we + * clear the remainder of the caller-saved registers. We could + * restore the syscall arguments, but there's not much point, + * and it ensures user programs aren't trying to use the + * caller-saves if we clear them, as well as avoiding leaking + * kernel pointers into userspace. + */ + pop_reg_zero lr, r12, sp, PTREGS_OFFSET_TP - PTREGS_OFFSET_LR + pop_reg_zero tp, r13, sp, PTREGS_OFFSET_SP - PTREGS_OFFSET_TP + { + lw sp, sp + move r14, zero + move r15, zero + } + { move r16, zero; move r17, zero } + { move r18, zero; move r19, zero } + { move r20, zero; move r21, zero } + { move r22, zero; move r23, zero } + { move r24, zero; move r25, zero } + { move r26, zero; move r27, zero } + + /* Set r1 to errno if we are returning an error, otherwise zero. */ + { + moveli r29, 4096 + sub r1, zero, r0 + } + slt_u r29, r1, r29 + { + mnz r1, r29, r1 + move r29, zero + } + iret + + /* + * Not a syscall, so restore caller-saved registers. + * First kick off a load for cache line 1, which we're touching + * for the first time here. + */ + .align 64 +1: pop_reg r29, sp, PTREGS_OFFSET_REG(1) - PTREGS_OFFSET_REG(29) + pop_reg r1 + pop_reg r2 + pop_reg r3 + pop_reg r4 + pop_reg r5 + pop_reg r6 + pop_reg r7 + pop_reg r8 + pop_reg r9 + pop_reg r10 + pop_reg r11 + pop_reg r12 + pop_reg r13 + pop_reg r14 + pop_reg r15 + pop_reg r16 + pop_reg r17 + pop_reg r18 + pop_reg r19 + pop_reg r20 + pop_reg r21 + pop_reg r22 + pop_reg r23 + pop_reg r24 + pop_reg r25 + pop_reg r26 + pop_reg r27 + pop_reg r28, sp, PTREGS_OFFSET_LR - PTREGS_OFFSET_REG(28) + /* r29 already restored above */ + bnz lr, .Lkernel_return + pop_reg lr, sp, PTREGS_OFFSET_TP - PTREGS_OFFSET_LR + pop_reg tp, sp, PTREGS_OFFSET_SP - PTREGS_OFFSET_TP + lw sp, sp + iret + + /* + * We can't restore tp when in kernel mode, since a thread might + * have migrated from another cpu and brought a stale tp value. + */ +.Lkernel_return: + pop_reg lr, sp, PTREGS_OFFSET_SP - PTREGS_OFFSET_LR + lw sp, sp + iret + + /* Restore callee-saved registers from r34 to r51. */ +.Lrestore_callees: + addli sp, sp, PTREGS_OFFSET_REG(34) - PTREGS_OFFSET_REG(29) + pop_reg r34 + pop_reg r35 + pop_reg r36 + pop_reg r37 + pop_reg r38 + pop_reg r39 + pop_reg r40 + pop_reg r41 + pop_reg r42 + pop_reg r43 + pop_reg r44 + pop_reg r45 + pop_reg r46 + pop_reg r47 + pop_reg r48 + pop_reg r49 + pop_reg r50 + pop_reg r51, sp, PTREGS_OFFSET_REG(29) - PTREGS_OFFSET_REG(51) + j .Lcontinue_restore_regs + STD_ENDPROC(interrupt_return) + + /* + * Some interrupts don't check for single stepping + */ + .pushsection .text.handle_interrupt_no_single_step,"ax" +handle_interrupt_no_single_step: + finish_interrupt_save handle_interrupt_no_single_step + { + jalr r0 + PTREGS_PTR(r0, PTREGS_OFFSET_BASE) + } + FEEDBACK_REENTER(handle_interrupt_no_single_step) + { + movei r30, 0 /* not an NMI */ + j interrupt_return + } + STD_ENDPROC(handle_interrupt_no_single_step) + + /* + * "NMI" interrupts mask ALL interrupts before calling the + * handler, and don't check thread flags, etc., on the way + * back out. In general, the only things we do here for NMIs + * are the register save/restore, fixing the PC if we were + * doing single step, and the dataplane kernel-TLB management. + * We don't (for example) deal with start/stop of the sched tick. + */ + .pushsection .text.handle_nmi,"ax" +handle_nmi: + finish_interrupt_save handle_nmi + check_single_stepping normal, .Ldispatch_nmi +.Ldispatch_nmi: + { + jalr r0 + PTREGS_PTR(r0, PTREGS_OFFSET_BASE) + } + FEEDBACK_REENTER(handle_nmi) + j interrupt_return + STD_ENDPROC(handle_nmi) + + /* + * Parallel code for syscalls to handle_interrupt. + */ + .pushsection .text.handle_syscall,"ax" +handle_syscall: + finish_interrupt_save handle_syscall + + /* + * Check for if we are single stepping in user level. If so, then + * we need to restore the PC. + */ + check_single_stepping syscall, .Ldispatch_syscall +.Ldispatch_syscall: + + /* Enable irqs. */ + TRACE_IRQS_ON + IRQ_ENABLE(r20, r21) + + /* Bump the counter for syscalls made on this tile. */ + moveli r20, lo16(irq_stat + IRQ_CPUSTAT_SYSCALL_COUNT_OFFSET) + auli r20, r20, ha16(irq_stat + IRQ_CPUSTAT_SYSCALL_COUNT_OFFSET) + add r20, r20, tp + lw r21, r20 + addi r21, r21, 1 + { + sw r20, r21 + GET_THREAD_INFO(r31) + } + + /* Trace syscalls, if requested. */ + addi r31, r31, THREAD_INFO_FLAGS_OFFSET + lw r30, r31 + andi r30, r30, _TIF_SYSCALL_TRACE + bzt r30, .Lrestore_syscall_regs + jal do_syscall_trace + FEEDBACK_REENTER(handle_syscall) + + /* + * We always reload our registers from the stack at this + * point. They might be valid, if we didn't build with + * TRACE_IRQFLAGS, and this isn't a dataplane tile, and we're not + * doing syscall tracing, but there are enough cases now that it + * seems simplest just to do the reload unconditionally. + */ +.Lrestore_syscall_regs: + PTREGS_PTR(r11, PTREGS_OFFSET_REG(0)) + pop_reg r0, r11 + pop_reg r1, r11 + pop_reg r2, r11 + pop_reg r3, r11 + pop_reg r4, r11 + pop_reg r5, r11, PTREGS_OFFSET_SYSCALL - PTREGS_OFFSET_REG(5) + pop_reg TREG_SYSCALL_NR_NAME, r11 + + /* Ensure that the syscall number is within the legal range. */ + moveli r21, __NR_syscalls + { + slt_u r21, TREG_SYSCALL_NR_NAME, r21 + moveli r20, lo16(sys_call_table) + } + { + bbns r21, .Linvalid_syscall + auli r20, r20, ha16(sys_call_table) + } + s2a r20, TREG_SYSCALL_NR_NAME, r20 + lw r20, r20 + + /* Jump to syscall handler. */ + jalr r20 +.Lhandle_syscall_link: /* value of "lr" after "jalr r20" above */ + + /* + * Write our r0 onto the stack so it gets restored instead + * of whatever the user had there before. + */ + PTREGS_PTR(r29, PTREGS_OFFSET_REG(0)) + sw r29, r0 + +.Lsyscall_sigreturn_skip: + FEEDBACK_REENTER(handle_syscall) + + /* Do syscall trace again, if requested. */ + lw r30, r31 + andi r30, r30, _TIF_SYSCALL_TRACE + bzt r30, 1f + jal do_syscall_trace + FEEDBACK_REENTER(handle_syscall) +1: { + movei r30, 0 /* not an NMI */ + j .Lresume_userspace /* jump into middle of interrupt_return */ + } + +.Linvalid_syscall: + /* Report an invalid syscall back to the user program */ + { + PTREGS_PTR(r29, PTREGS_OFFSET_REG(0)) + movei r28, -ENOSYS + } + sw r29, r28 + { + movei r30, 0 /* not an NMI */ + j .Lresume_userspace /* jump into middle of interrupt_return */ + } + STD_ENDPROC(handle_syscall) + + /* Return the address for oprofile to suppress in backtraces. */ +STD_ENTRY_SECTION(handle_syscall_link_address, .text.handle_syscall) + lnk r0 + { + addli r0, r0, .Lhandle_syscall_link - . + jrp lr + } + STD_ENDPROC(handle_syscall_link_address) + +STD_ENTRY(ret_from_fork) + jal sim_notify_fork + jal schedule_tail + FEEDBACK_REENTER(ret_from_fork) + { + movei r30, 0 /* not an NMI */ + j .Lresume_userspace /* jump into middle of interrupt_return */ + } + STD_ENDPROC(ret_from_fork) + + /* + * Code for ill interrupt. + */ + .pushsection .text.handle_ill,"ax" +handle_ill: + finish_interrupt_save handle_ill + + /* + * Check for if we are single stepping in user level. If so, then + * we need to restore the PC. + */ + check_single_stepping ill, .Ldispatch_normal_ill + + { + /* See if the PC is the 1st bundle in the buffer */ + seq r25, r27, r26 + + /* Point to the 2nd bundle in the buffer */ + addi r26, r26, 8 + } + { + /* Point to the original pc */ + addi r24, r29, SINGLESTEP_STATE_ORIG_PC_OFFSET + + /* Branch if the PC is the 1st bundle in the buffer */ + bnz r25, 3f + } + { + /* See if the PC is the 2nd bundle of the buffer */ + seq r25, r27, r26 + + /* Set PC to next instruction */ + addi r24, r29, SINGLESTEP_STATE_NEXT_PC_OFFSET + } + { + /* Point to flags */ + addi r25, r29, SINGLESTEP_STATE_FLAGS_OFFSET + + /* Branch if PC is in the second bundle */ + bz r25, 2f + } + /* Load flags */ + lw r25, r25 + { + /* + * Get the offset for the register to restore + * Note: the lower bound is 2, so we have implicit scaling by 4. + * No multiplication of the register number by the size of a register + * is needed. + */ + mm r27, r25, zero, SINGLESTEP_STATE_TARGET_LB, \ + SINGLESTEP_STATE_TARGET_UB + + /* Mask Rewrite_LR */ + andi r25, r25, SINGLESTEP_STATE_MASK_UPDATE + } + { + addi r29, r29, SINGLESTEP_STATE_UPDATE_VALUE_OFFSET + + /* Don't rewrite temp register */ + bz r25, 3f + } + { + /* Get the temp value */ + lw r29, r29 + + /* Point to where the register is stored */ + add r27, r27, sp + } + + /* Add in the C ABI save area size to the register offset */ + addi r27, r27, C_ABI_SAVE_AREA_SIZE + + /* Restore the user's register with the temp value */ + sw r27, r29 + j 3f + +2: + /* Must be in the third bundle */ + addi r24, r29, SINGLESTEP_STATE_BRANCH_NEXT_PC_OFFSET + +3: + /* set PC and continue */ + lw r26, r24 + { + sw r28, r26 + GET_THREAD_INFO(r0) + } + + /* + * Clear TIF_SINGLESTEP to prevent recursion if we execute an ill. + * The normal non-arch flow redundantly clears TIF_SINGLESTEP, but we + * need to clear it here and can't really impose on all other arches. + * So what's another write between friends? + */ + + addi r1, r0, THREAD_INFO_FLAGS_OFFSET + { + lw r2, r1 + addi r0, r0, THREAD_INFO_TASK_OFFSET /* currently a no-op */ + } + andi r2, r2, ~_TIF_SINGLESTEP + sw r1, r2 + + /* Issue a sigtrap */ + { + lw r0, r0 /* indirect thru thread_info to get task_info*/ + addi r1, sp, C_ABI_SAVE_AREA_SIZE /* put ptregs pointer into r1 */ + move r2, zero /* load error code into r2 */ + } + + jal send_sigtrap /* issue a SIGTRAP */ + FEEDBACK_REENTER(handle_ill) + { + movei r30, 0 /* not an NMI */ + j .Lresume_userspace /* jump into middle of interrupt_return */ + } + +.Ldispatch_normal_ill: + { + jalr r0 + PTREGS_PTR(r0, PTREGS_OFFSET_BASE) + } + FEEDBACK_REENTER(handle_ill) + { + movei r30, 0 /* not an NMI */ + j interrupt_return + } + STD_ENDPROC(handle_ill) + +/* Various stub interrupt handlers and syscall handlers */ + +STD_ENTRY_LOCAL(_kernel_double_fault) + mfspr r1, SPR_EX_CONTEXT_K_0 + move r2, lr + move r3, sp + move r4, r52 + addi sp, sp, -C_ABI_SAVE_AREA_SIZE + j kernel_double_fault + STD_ENDPROC(_kernel_double_fault) + +STD_ENTRY_LOCAL(bad_intr) + mfspr r2, SPR_EX_CONTEXT_K_0 + panic "Unhandled interrupt %#x: PC %#lx" + STD_ENDPROC(bad_intr) + +/* Put address of pt_regs in reg and jump. */ +#define PTREGS_SYSCALL(x, reg) \ + STD_ENTRY(_##x); \ + { \ + PTREGS_PTR(reg, PTREGS_OFFSET_BASE); \ + j x \ + }; \ + STD_ENDPROC(_##x) + +/* + * Special-case sigreturn to not write r0 to the stack on return. + * This is technically more efficient, but it also avoids difficulties + * in the 64-bit OS when handling 32-bit compat code, since we must not + * sign-extend r0 for the sigreturn return-value case. + */ +#define PTREGS_SYSCALL_SIGRETURN(x, reg) \ + STD_ENTRY(_##x); \ + addli lr, lr, .Lsyscall_sigreturn_skip - .Lhandle_syscall_link; \ + { \ + PTREGS_PTR(reg, PTREGS_OFFSET_BASE); \ + j x \ + }; \ + STD_ENDPROC(_##x) + +PTREGS_SYSCALL(sys_execve, r3) +PTREGS_SYSCALL(sys_sigaltstack, r2) +PTREGS_SYSCALL_SIGRETURN(sys_rt_sigreturn, r0) +PTREGS_SYSCALL(sys_cmpxchg_badaddr, r1) + +/* Save additional callee-saves to pt_regs, put address in r4 and jump. */ +STD_ENTRY(_sys_clone) + push_extra_callee_saves r4 + j sys_clone + STD_ENDPROC(_sys_clone) + +/* + * This entrypoint is taken for the cmpxchg and atomic_update fast + * swints. We may wish to generalize it to other fast swints at some + * point, but for now there are just two very similar ones, which + * makes it faster. + * + * The fast swint code is designed to have a small footprint. It does + * not save or restore any GPRs, counting on the caller-save registers + * to be available to it on entry. It does not modify any callee-save + * registers (including "lr"). It does not check what PL it is being + * called at, so you'd better not call it other than at PL0. + * The <atomic.h> wrapper assumes it only clobbers r20-r29, so if + * it ever is necessary to use more registers, be aware. + * + * It does not use the stack, but since it might be re-interrupted by + * a page fault which would assume the stack was valid, it does + * save/restore the stack pointer and zero it out to make sure it gets reset. + * Since we always keep interrupts disabled, the hypervisor won't + * clobber our EX_CONTEXT_K_x registers, so we don't save/restore them + * (other than to advance the PC on return). + * + * We have to manually validate the user vs kernel address range + * (since at PL1 we can read/write both), and for performance reasons + * we don't allow cmpxchg on the fc000000 memory region, since we only + * validate that the user address is below PAGE_OFFSET. + * + * We place it in the __HEAD section to ensure it is relatively + * near to the intvec_SWINT_1 code (reachable by a conditional branch). + * + * Our use of ATOMIC_LOCK_REG here must match do_page_fault_ics(). + * + * As we do in lib/atomic_asm_32.S, we bypass a store if the value we + * would store is the same as the value we just loaded. + */ + __HEAD + .align 64 + /* Align much later jump on the start of a cache line. */ +#if !ATOMIC_LOCKS_FOUND_VIA_TABLE() + nop +#if PAGE_SIZE >= 0x10000 + nop +#endif +#endif +ENTRY(sys_cmpxchg) + + /* + * Save "sp" and set it zero for any possible page fault. + * + * HACK: We want to both zero sp and check r0's alignment, + * so we do both at once. If "sp" becomes nonzero we + * know r0 is unaligned and branch to the error handler that + * restores sp, so this is OK. + * + * ICS is disabled right now so having a garbage but nonzero + * sp is OK, since we won't execute any faulting instructions + * when it is nonzero. + */ + { + move r27, sp + andi sp, r0, 3 + } + + /* + * Get the lock address in ATOMIC_LOCK_REG, and also validate that the + * address is less than PAGE_OFFSET, since that won't trap at PL1. + * We only use bits less than PAGE_SHIFT to avoid having to worry + * about aliasing among multiple mappings of the same physical page, + * and we ignore the low 3 bits so we have one lock that covers + * both a cmpxchg64() and a cmpxchg() on either its low or high word. + * NOTE: this must match __atomic_hashed_lock() in lib/atomic_32.c. + */ + +#if (PAGE_OFFSET & 0xffff) != 0 +# error Code here assumes PAGE_OFFSET can be loaded with just hi16() +#endif + +#if ATOMIC_LOCKS_FOUND_VIA_TABLE() + { + /* Check for unaligned input. */ + bnz sp, .Lcmpxchg_badaddr + mm r25, r0, zero, 3, PAGE_SHIFT-1 + } + { + crc32_32 r25, zero, r25 + moveli r21, lo16(atomic_lock_ptr) + } + { + auli r21, r21, ha16(atomic_lock_ptr) + auli r23, zero, hi16(PAGE_OFFSET) /* hugepage-aligned */ + } + { + shri r20, r25, 32 - ATOMIC_HASH_L1_SHIFT + slt_u r23, r0, r23 + lw r26, r0 /* see comment in the "#else" for the "lw r26". */ + } + { + s2a r21, r20, r21 + bbns r23, .Lcmpxchg_badaddr + } + { + lw r21, r21 + seqi r23, TREG_SYSCALL_NR_NAME, __NR_FAST_cmpxchg64 + andi r25, r25, ATOMIC_HASH_L2_SIZE - 1 + } + { + /* Branch away at this point if we're doing a 64-bit cmpxchg. */ + bbs r23, .Lcmpxchg64 + andi r23, r0, 7 /* Precompute alignment for cmpxchg64. */ + } + { + s2a ATOMIC_LOCK_REG_NAME, r25, r21 + j .Lcmpxchg32_tns /* see comment in the #else for the jump. */ + } + +#else /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */ + { + /* Check for unaligned input. */ + bnz sp, .Lcmpxchg_badaddr + auli r23, zero, hi16(PAGE_OFFSET) /* hugepage-aligned */ + } + { + /* + * Slide bits into position for 'mm'. We want to ignore + * the low 3 bits of r0, and consider only the next + * ATOMIC_HASH_SHIFT bits. + * Because of C pointer arithmetic, we want to compute this: + * + * ((char*)atomic_locks + + * (((r0 >> 3) & (1 << (ATOMIC_HASH_SIZE - 1))) << 2)) + * + * Instead of two shifts we just ">> 1", and use 'mm' + * to ignore the low and high bits we don't want. + */ + shri r25, r0, 1 + + slt_u r23, r0, r23 + + /* + * Ensure that the TLB is loaded before we take out the lock. + * On tilepro, this will start fetching the value all the way + * into our L1 as well (and if it gets modified before we + * grab the lock, it will be invalidated from our cache + * before we reload it). On tile64, we'll start fetching it + * into our L1 if we're the home, and if we're not, we'll + * still at least start fetching it into the home's L2. + */ + lw r26, r0 + } + { + auli r21, zero, ha16(atomic_locks) + + bbns r23, .Lcmpxchg_badaddr + } +#if PAGE_SIZE < 0x10000 + /* atomic_locks is page-aligned so for big pages we don't need this. */ + addli r21, r21, lo16(atomic_locks) +#endif + { + /* + * Insert the hash bits into the page-aligned pointer. + * ATOMIC_HASH_SHIFT is so big that we don't actually hash + * the unmasked address bits, as that may cause unnecessary + * collisions. + */ + mm ATOMIC_LOCK_REG_NAME, r25, r21, 2, (ATOMIC_HASH_SHIFT + 2) - 1 + + seqi r23, TREG_SYSCALL_NR_NAME, __NR_FAST_cmpxchg64 + } + { + /* Branch away at this point if we're doing a 64-bit cmpxchg. */ + bbs r23, .Lcmpxchg64 + andi r23, r0, 7 /* Precompute alignment for cmpxchg64. */ + } + { + /* + * We very carefully align the code that actually runs with + * the lock held (twelve bundles) so that we know it is all in + * the icache when we start. This instruction (the jump) is + * at the start of the first cache line, address zero mod 64; + * we jump to the very end of the second cache line to get that + * line loaded in the icache, then fall through to issue the tns + * in the third cache line, at which point it's all cached. + * Note that is for performance, not correctness. + */ + j .Lcmpxchg32_tns + } + +#endif /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */ + +/* Symbol for do_page_fault_ics() to use to compare against the PC. */ +.global __sys_cmpxchg_grab_lock +__sys_cmpxchg_grab_lock: + + /* + * Perform the actual cmpxchg or atomic_update. + */ +.Ldo_cmpxchg32: + { + lw r21, r0 + seqi r23, TREG_SYSCALL_NR_NAME, __NR_FAST_atomic_update + move r24, r2 + } + { + seq r22, r21, r1 /* See if cmpxchg matches. */ + and r25, r21, r1 /* If atomic_update, compute (*mem & mask) */ + } + { + or r22, r22, r23 /* Skip compare branch for atomic_update. */ + add r25, r25, r2 /* Compute (*mem & mask) + addend. */ + } + { + mvnz r24, r23, r25 /* Use atomic_update value if appropriate. */ + bbns r22, .Lcmpxchg32_nostore + } + seq r22, r24, r21 /* Are we storing the value we loaded? */ + bbs r22, .Lcmpxchg32_nostore + sw r0, r24 + + /* The following instruction is the start of the second cache line. */ + /* Do slow mtspr here so the following "mf" waits less. */ + { + move sp, r27 + mtspr SPR_EX_CONTEXT_K_0, r28 + } + mf + + { + move r0, r21 + sw ATOMIC_LOCK_REG_NAME, zero + } + iret + + /* Duplicated code here in the case where we don't overlap "mf" */ +.Lcmpxchg32_nostore: + { + move r0, r21 + sw ATOMIC_LOCK_REG_NAME, zero + } + { + move sp, r27 + mtspr SPR_EX_CONTEXT_K_0, r28 + } + iret + + /* + * The locking code is the same for 32-bit cmpxchg/atomic_update, + * and for 64-bit cmpxchg. We provide it as a macro and put + * it into both versions. We can't share the code literally + * since it depends on having the right branch-back address. + */ + .macro cmpxchg_lock, bitwidth + + /* Lock; if we succeed, jump back up to the read-modify-write. */ +#ifdef CONFIG_SMP + tns r21, ATOMIC_LOCK_REG_NAME +#else + /* + * Non-SMP preserves all the lock infrastructure, to keep the + * code simpler for the interesting (SMP) case. However, we do + * one small optimization here and in atomic_asm.S, which is + * to fake out acquiring the actual lock in the atomic_lock table. + */ + movei r21, 0 +#endif + + /* Issue the slow SPR here while the tns result is in flight. */ + mfspr r28, SPR_EX_CONTEXT_K_0 + + { + addi r28, r28, 8 /* return to the instruction after the swint1 */ + bzt r21, .Ldo_cmpxchg\bitwidth + } + /* + * The preceding instruction is the last thing that must be + * hot in the icache before we do the "tns" above. + */ + +#ifdef CONFIG_SMP + /* + * We failed to acquire the tns lock on our first try. Now use + * bounded exponential backoff to retry, like __atomic_spinlock(). + */ + { + moveli r23, 2048 /* maximum backoff time in cycles */ + moveli r25, 32 /* starting backoff time in cycles */ + } +1: mfspr r26, CYCLE_LOW /* get start point for this backoff */ +2: mfspr r22, CYCLE_LOW /* test to see if we've backed off enough */ + sub r22, r22, r26 + slt r22, r22, r25 + bbst r22, 2b + { + shli r25, r25, 1 /* double the backoff; retry the tns */ + tns r21, ATOMIC_LOCK_REG_NAME + } + slt r26, r23, r25 /* is the proposed backoff too big? */ + { + mvnz r25, r26, r23 + bzt r21, .Ldo_cmpxchg\bitwidth + } + j 1b +#endif /* CONFIG_SMP */ + .endm + +.Lcmpxchg32_tns: + /* + * This is the last instruction on the second cache line. + * The nop here loads the second line, then we fall through + * to the tns to load the third line before we take the lock. + */ + nop + cmpxchg_lock 32 + + /* + * This code is invoked from sys_cmpxchg after most of the + * preconditions have been checked. We still need to check + * that r0 is 8-byte aligned, since if it's not we won't + * actually be atomic. However, ATOMIC_LOCK_REG has the atomic + * lock pointer and r27/r28 have the saved SP/PC. + * r23 is holding "r0 & 7" so we can test for alignment. + * The compare value is in r2/r3; the new value is in r4/r5. + * On return, we must put the old value in r0/r1. + */ + .align 64 +.Lcmpxchg64: + { +#if ATOMIC_LOCKS_FOUND_VIA_TABLE() + s2a ATOMIC_LOCK_REG_NAME, r25, r21 +#endif + bzt r23, .Lcmpxchg64_tns + } + j .Lcmpxchg_badaddr + +.Ldo_cmpxchg64: + { + lw r21, r0 + addi r25, r0, 4 + } + { + lw r1, r25 + } + seq r26, r21, r2 + { + bz r26, .Lcmpxchg64_mismatch + seq r26, r1, r3 + } + { + bz r26, .Lcmpxchg64_mismatch + } + sw r0, r4 + sw r25, r5 + + /* + * The 32-bit path provides optimized "match" and "mismatch" + * iret paths, but we don't have enough bundles in this cache line + * to do that, so we just make even the "mismatch" path do an "mf". + */ +.Lcmpxchg64_mismatch: + { + move sp, r27 + mtspr SPR_EX_CONTEXT_K_0, r28 + } + mf + { + move r0, r21 + sw ATOMIC_LOCK_REG_NAME, zero + } + iret + +.Lcmpxchg64_tns: + cmpxchg_lock 64 + + + /* + * Reset sp and revector to sys_cmpxchg_badaddr(), which will + * just raise the appropriate signal and exit. Doing it this + * way means we don't have to duplicate the code in intvec.S's + * int_hand macro that locates the top of the stack. + */ +.Lcmpxchg_badaddr: + { + moveli TREG_SYSCALL_NR_NAME, __NR_cmpxchg_badaddr + move sp, r27 + } + j intvec_SWINT_1 + ENDPROC(sys_cmpxchg) + ENTRY(__sys_cmpxchg_end) + + +/* The single-step support may need to read all the registers. */ +int_unalign: + push_extra_callee_saves r0 + j do_trap + +/* Include .intrpt1 array of interrupt vectors */ + .section ".intrpt1", "ax" + +#define op_handle_perf_interrupt bad_intr +#define op_handle_aux_perf_interrupt bad_intr + +#ifndef CONFIG_HARDWALL +#define do_hardwall_trap bad_intr +#endif + + int_hand INT_ITLB_MISS, ITLB_MISS, \ + do_page_fault, handle_interrupt_no_single_step + int_hand INT_MEM_ERROR, MEM_ERROR, bad_intr + int_hand INT_ILL, ILL, do_trap, handle_ill + int_hand INT_GPV, GPV, do_trap + int_hand INT_SN_ACCESS, SN_ACCESS, do_trap + int_hand INT_IDN_ACCESS, IDN_ACCESS, do_trap + int_hand INT_UDN_ACCESS, UDN_ACCESS, do_trap + int_hand INT_IDN_REFILL, IDN_REFILL, bad_intr + int_hand INT_UDN_REFILL, UDN_REFILL, bad_intr + int_hand INT_IDN_COMPLETE, IDN_COMPLETE, bad_intr + int_hand INT_UDN_COMPLETE, UDN_COMPLETE, bad_intr + int_hand INT_SWINT_3, SWINT_3, do_trap + int_hand INT_SWINT_2, SWINT_2, do_trap + int_hand INT_SWINT_1, SWINT_1, SYSCALL, handle_syscall + int_hand INT_SWINT_0, SWINT_0, do_trap + int_hand INT_UNALIGN_DATA, UNALIGN_DATA, int_unalign + int_hand INT_DTLB_MISS, DTLB_MISS, do_page_fault + int_hand INT_DTLB_ACCESS, DTLB_ACCESS, do_page_fault + int_hand INT_DMATLB_MISS, DMATLB_MISS, do_page_fault + int_hand INT_DMATLB_ACCESS, DMATLB_ACCESS, do_page_fault + int_hand INT_SNITLB_MISS, SNITLB_MISS, do_page_fault + int_hand INT_SN_NOTIFY, SN_NOTIFY, bad_intr + int_hand INT_SN_FIREWALL, SN_FIREWALL, do_hardwall_trap + int_hand INT_IDN_FIREWALL, IDN_FIREWALL, bad_intr + int_hand INT_UDN_FIREWALL, UDN_FIREWALL, do_hardwall_trap + int_hand INT_TILE_TIMER, TILE_TIMER, do_timer_interrupt + int_hand INT_IDN_TIMER, IDN_TIMER, bad_intr + int_hand INT_UDN_TIMER, UDN_TIMER, bad_intr + int_hand INT_DMA_NOTIFY, DMA_NOTIFY, bad_intr + int_hand INT_IDN_CA, IDN_CA, bad_intr + int_hand INT_UDN_CA, UDN_CA, bad_intr + int_hand INT_IDN_AVAIL, IDN_AVAIL, bad_intr + int_hand INT_UDN_AVAIL, UDN_AVAIL, bad_intr + int_hand INT_PERF_COUNT, PERF_COUNT, \ + op_handle_perf_interrupt, handle_nmi + int_hand INT_INTCTRL_3, INTCTRL_3, bad_intr +#if CONFIG_KERNEL_PL == 2 + dc_dispatch INT_INTCTRL_2, INTCTRL_2 + int_hand INT_INTCTRL_1, INTCTRL_1, bad_intr +#else + int_hand INT_INTCTRL_2, INTCTRL_2, bad_intr + dc_dispatch INT_INTCTRL_1, INTCTRL_1 +#endif + int_hand INT_INTCTRL_0, INTCTRL_0, bad_intr + int_hand INT_MESSAGE_RCV_DWNCL, MESSAGE_RCV_DWNCL, \ + hv_message_intr + int_hand INT_DEV_INTR_DWNCL, DEV_INTR_DWNCL, \ + tile_dev_intr + int_hand INT_I_ASID, I_ASID, bad_intr + int_hand INT_D_ASID, D_ASID, bad_intr + int_hand INT_DMATLB_MISS_DWNCL, DMATLB_MISS_DWNCL, \ + do_page_fault + int_hand INT_SNITLB_MISS_DWNCL, SNITLB_MISS_DWNCL, \ + do_page_fault + int_hand INT_DMATLB_ACCESS_DWNCL, DMATLB_ACCESS_DWNCL, \ + do_page_fault + int_hand INT_SN_CPL, SN_CPL, bad_intr + int_hand INT_DOUBLE_FAULT, DOUBLE_FAULT, do_trap +#if CHIP_HAS_AUX_PERF_COUNTERS() + int_hand INT_AUX_PERF_COUNT, AUX_PERF_COUNT, \ + op_handle_aux_perf_interrupt, handle_nmi +#endif + + /* Synthetic interrupt delivered only by the simulator */ + int_hand INT_BREAKPOINT, BREAKPOINT, do_breakpoint diff --git a/arch/tile/kernel/intvec_64.S b/arch/tile/kernel/intvec_64.S new file mode 100644 index 00000000..30ae76e5 --- /dev/null +++ b/arch/tile/kernel/intvec_64.S @@ -0,0 +1,1289 @@ +/* + * Copyright 2011 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * Linux interrupt vectors. + */ + +#include <linux/linkage.h> +#include <linux/errno.h> +#include <linux/unistd.h> +#include <asm/ptrace.h> +#include <asm/thread_info.h> +#include <asm/irqflags.h> +#include <asm/asm-offsets.h> +#include <asm/types.h> +#include <asm/signal.h> +#include <hv/hypervisor.h> +#include <arch/abi.h> +#include <arch/interrupts.h> +#include <arch/spr_def.h> + +#ifdef CONFIG_PREEMPT +# error "No support for kernel preemption currently" +#endif + +#define PTREGS_PTR(reg, ptreg) addli reg, sp, C_ABI_SAVE_AREA_SIZE + (ptreg) + +#define PTREGS_OFFSET_SYSCALL PTREGS_OFFSET_REG(TREG_SYSCALL_NR) + + + .macro push_reg reg, ptr=sp, delta=-8 + { + st \ptr, \reg + addli \ptr, \ptr, \delta + } + .endm + + .macro pop_reg reg, ptr=sp, delta=8 + { + ld \reg, \ptr + addli \ptr, \ptr, \delta + } + .endm + + .macro pop_reg_zero reg, zreg, ptr=sp, delta=8 + { + move \zreg, zero + ld \reg, \ptr + addi \ptr, \ptr, \delta + } + .endm + + .macro push_extra_callee_saves reg + PTREGS_PTR(\reg, PTREGS_OFFSET_REG(51)) + push_reg r51, \reg + push_reg r50, \reg + push_reg r49, \reg + push_reg r48, \reg + push_reg r47, \reg + push_reg r46, \reg + push_reg r45, \reg + push_reg r44, \reg + push_reg r43, \reg + push_reg r42, \reg + push_reg r41, \reg + push_reg r40, \reg + push_reg r39, \reg + push_reg r38, \reg + push_reg r37, \reg + push_reg r36, \reg + push_reg r35, \reg + push_reg r34, \reg, PTREGS_OFFSET_BASE - PTREGS_OFFSET_REG(34) + .endm + + .macro panic str + .pushsection .rodata, "a" +1: + .asciz "\str" + .popsection + { + moveli r0, hw2_last(1b) + } + { + shl16insli r0, r0, hw1(1b) + } + { + shl16insli r0, r0, hw0(1b) + jal panic + } + .endm + + +#ifdef __COLLECT_LINKER_FEEDBACK__ + .pushsection .text.intvec_feedback,"ax" +intvec_feedback: + .popsection +#endif + + /* + * Default interrupt handler. + * + * vecnum is where we'll put this code. + * c_routine is the C routine we'll call. + * + * The C routine is passed two arguments: + * - A pointer to the pt_regs state. + * - The interrupt vector number. + * + * The "processing" argument specifies the code for processing + * the interrupt. Defaults to "handle_interrupt". + */ + .macro int_hand vecnum, vecname, c_routine, processing=handle_interrupt + .org (\vecnum << 8) +intvec_\vecname: + /* Temporarily save a register so we have somewhere to work. */ + + mtspr SPR_SYSTEM_SAVE_K_1, r0 + mfspr r0, SPR_EX_CONTEXT_K_1 + + andi r0, r0, SPR_EX_CONTEXT_1_1__PL_MASK /* mask off ICS */ + + .ifc \vecnum, INT_DOUBLE_FAULT + /* + * For double-faults from user-space, fall through to the normal + * register save and stack setup path. Otherwise, it's the + * hypervisor giving us one last chance to dump diagnostics, and we + * branch to the kernel_double_fault routine to do so. + */ + beqz r0, 1f + j _kernel_double_fault +1: + .else + /* + * If we're coming from user-space, then set sp to the top of + * the kernel stack. Otherwise, assume sp is already valid. + */ + { + bnez r0, 0f + move r0, sp + } + .endif + + .ifc \c_routine, do_page_fault + /* + * The page_fault handler may be downcalled directly by the + * hypervisor even when Linux is running and has ICS set. + * + * In this case the contents of EX_CONTEXT_K_1 reflect the + * previous fault and can't be relied on to choose whether or + * not to reinitialize the stack pointer. So we add a test + * to see whether SYSTEM_SAVE_K_2 has the high bit set, + * and if so we don't reinitialize sp, since we must be coming + * from Linux. (In fact the precise case is !(val & ~1), + * but any Linux PC has to have the high bit set.) + * + * Note that the hypervisor *always* sets SYSTEM_SAVE_K_2 for + * any path that turns into a downcall to one of our TLB handlers. + * + * FIXME: if we end up never using this path, perhaps we should + * prevent the hypervisor from generating downcalls in this case. + * The advantage of getting a downcall is we can panic in Linux. + */ + mfspr r0, SPR_SYSTEM_SAVE_K_2 + { + bltz r0, 0f /* high bit in S_S_1_2 is for a PC to use */ + move r0, sp + } + .endif + + + /* + * SYSTEM_SAVE_K_0 holds the cpu number in the low bits, and + * the current stack top in the higher bits. So we recover + * our stack top by just masking off the low bits, then + * point sp at the top aligned address on the actual stack page. + */ + mfspr r0, SPR_SYSTEM_SAVE_K_0 + mm r0, zero, LOG2_THREAD_SIZE, 63 + +0: + /* + * Align the stack mod 64 so we can properly predict what + * cache lines we need to write-hint to reduce memory fetch + * latency as we enter the kernel. The layout of memory is + * as follows, with cache line 0 at the lowest VA, and cache + * line 8 just below the r0 value this "andi" computes. + * Note that we never write to cache line 8, and we skip + * cache lines 1-3 for syscalls. + * + * cache line 8: ptregs padding (two words) + * cache line 7: sp, lr, pc, ex1, faultnum, orig_r0, flags, cmpexch + * cache line 6: r46...r53 (tp) + * cache line 5: r38...r45 + * cache line 4: r30...r37 + * cache line 3: r22...r29 + * cache line 2: r14...r21 + * cache line 1: r6...r13 + * cache line 0: 2 x frame, r0..r5 + */ + andi r0, r0, -64 + + /* + * Push the first four registers on the stack, so that we can set + * them to vector-unique values before we jump to the common code. + * + * Registers are pushed on the stack as a struct pt_regs, + * with the sp initially just above the struct, and when we're + * done, sp points to the base of the struct, minus + * C_ABI_SAVE_AREA_SIZE, so we can directly jal to C code. + * + * This routine saves just the first four registers, plus the + * stack context so we can do proper backtracing right away, + * and defers to handle_interrupt to save the rest. + * The backtracer needs pc, ex1, lr, sp, r52, and faultnum. + */ + addli r0, r0, PTREGS_OFFSET_LR - (PTREGS_SIZE + KSTK_PTREGS_GAP) + wh64 r0 /* cache line 7 */ + { + st r0, lr + addli r0, r0, PTREGS_OFFSET_SP - PTREGS_OFFSET_LR + } + { + st r0, sp + addli sp, r0, PTREGS_OFFSET_REG(52) - PTREGS_OFFSET_SP + } + wh64 sp /* cache line 6 */ + { + st sp, r52 + addli sp, sp, PTREGS_OFFSET_REG(1) - PTREGS_OFFSET_REG(52) + } + wh64 sp /* cache line 0 */ + { + st sp, r1 + addli sp, sp, PTREGS_OFFSET_REG(2) - PTREGS_OFFSET_REG(1) + } + { + st sp, r2 + addli sp, sp, PTREGS_OFFSET_REG(3) - PTREGS_OFFSET_REG(2) + } + { + st sp, r3 + addli sp, sp, PTREGS_OFFSET_PC - PTREGS_OFFSET_REG(3) + } + mfspr r0, SPR_EX_CONTEXT_K_0 + .ifc \processing,handle_syscall + /* + * Bump the saved PC by one bundle so that when we return, we won't + * execute the same swint instruction again. We need to do this while + * we're in the critical section. + */ + addi r0, r0, 8 + .endif + { + st sp, r0 + addli sp, sp, PTREGS_OFFSET_EX1 - PTREGS_OFFSET_PC + } + mfspr r0, SPR_EX_CONTEXT_K_1 + { + st sp, r0 + addi sp, sp, PTREGS_OFFSET_FAULTNUM - PTREGS_OFFSET_EX1 + /* + * Use r0 for syscalls so it's a temporary; use r1 for interrupts + * so that it gets passed through unchanged to the handler routine. + * Note that the .if conditional confusingly spans bundles. + */ + .ifc \processing,handle_syscall + movei r0, \vecnum + } + { + st sp, r0 + .else + movei r1, \vecnum + } + { + st sp, r1 + .endif + addli sp, sp, PTREGS_OFFSET_REG(0) - PTREGS_OFFSET_FAULTNUM + } + mfspr r0, SPR_SYSTEM_SAVE_K_1 /* Original r0 */ + { + st sp, r0 + addi sp, sp, -PTREGS_OFFSET_REG(0) - 8 + } + { + st sp, zero /* write zero into "Next SP" frame pointer */ + addi sp, sp, -8 /* leave SP pointing at bottom of frame */ + } + .ifc \processing,handle_syscall + j handle_syscall + .else + /* Capture per-interrupt SPR context to registers. */ + .ifc \c_routine, do_page_fault + mfspr r2, SPR_SYSTEM_SAVE_K_3 /* address of page fault */ + mfspr r3, SPR_SYSTEM_SAVE_K_2 /* info about page fault */ + .else + .ifc \vecnum, INT_ILL_TRANS + mfspr r2, ILL_TRANS_REASON + .else + .ifc \vecnum, INT_DOUBLE_FAULT + mfspr r2, SPR_SYSTEM_SAVE_K_2 /* double fault info from HV */ + .else + .ifc \c_routine, do_trap + mfspr r2, GPV_REASON + .else + .ifc \c_routine, op_handle_perf_interrupt + mfspr r2, PERF_COUNT_STS +#if CHIP_HAS_AUX_PERF_COUNTERS() + .else + .ifc \c_routine, op_handle_aux_perf_interrupt + mfspr r2, AUX_PERF_COUNT_STS + .endif +#endif + .endif + .endif + .endif + .endif + .endif + /* Put function pointer in r0 */ + moveli r0, hw2_last(\c_routine) + shl16insli r0, r0, hw1(\c_routine) + { + shl16insli r0, r0, hw0(\c_routine) + j \processing + } + .endif + ENDPROC(intvec_\vecname) + +#ifdef __COLLECT_LINKER_FEEDBACK__ + .pushsection .text.intvec_feedback,"ax" + .org (\vecnum << 5) + FEEDBACK_ENTER_EXPLICIT(intvec_\vecname, .intrpt1, 1 << 8) + jrp lr + .popsection +#endif + + .endm + + + /* + * Save the rest of the registers that we didn't save in the actual + * vector itself. We can't use r0-r10 inclusive here. + */ + .macro finish_interrupt_save, function + + /* If it's a syscall, save a proper orig_r0, otherwise just zero. */ + PTREGS_PTR(r52, PTREGS_OFFSET_ORIG_R0) + { + .ifc \function,handle_syscall + st r52, r0 + .else + st r52, zero + .endif + PTREGS_PTR(r52, PTREGS_OFFSET_TP) + } + st r52, tp + { + mfspr tp, CMPEXCH_VALUE + PTREGS_PTR(r52, PTREGS_OFFSET_CMPEXCH) + } + + /* + * For ordinary syscalls, we save neither caller- nor callee- + * save registers, since the syscall invoker doesn't expect the + * caller-saves to be saved, and the called kernel functions will + * take care of saving the callee-saves for us. + * + * For interrupts we save just the caller-save registers. Saving + * them is required (since the "caller" can't save them). Again, + * the called kernel functions will restore the callee-save + * registers for us appropriately. + * + * On return, we normally restore nothing special for syscalls, + * and just the caller-save registers for interrupts. + * + * However, there are some important caveats to all this: + * + * - We always save a few callee-save registers to give us + * some scratchpad registers to carry across function calls. + * + * - fork/vfork/etc require us to save all the callee-save + * registers, which we do in PTREGS_SYSCALL_ALL_REGS, below. + * + * - We always save r0..r5 and r10 for syscalls, since we need + * to reload them a bit later for the actual kernel call, and + * since we might need them for -ERESTARTNOINTR, etc. + * + * - Before invoking a signal handler, we save the unsaved + * callee-save registers so they are visible to the + * signal handler or any ptracer. + * + * - If the unsaved callee-save registers are modified, we set + * a bit in pt_regs so we know to reload them from pt_regs + * and not just rely on the kernel function unwinding. + * (Done for ptrace register writes and SA_SIGINFO handler.) + */ + { + st r52, tp + PTREGS_PTR(r52, PTREGS_OFFSET_REG(33)) + } + wh64 r52 /* cache line 4 */ + push_reg r33, r52 + push_reg r32, r52 + push_reg r31, r52 + .ifc \function,handle_syscall + push_reg r30, r52, PTREGS_OFFSET_SYSCALL - PTREGS_OFFSET_REG(30) + push_reg TREG_SYSCALL_NR_NAME, r52, \ + PTREGS_OFFSET_REG(5) - PTREGS_OFFSET_SYSCALL + .else + + push_reg r30, r52, PTREGS_OFFSET_REG(29) - PTREGS_OFFSET_REG(30) + wh64 r52 /* cache line 3 */ + push_reg r29, r52 + push_reg r28, r52 + push_reg r27, r52 + push_reg r26, r52 + push_reg r25, r52 + push_reg r24, r52 + push_reg r23, r52 + push_reg r22, r52 + wh64 r52 /* cache line 2 */ + push_reg r21, r52 + push_reg r20, r52 + push_reg r19, r52 + push_reg r18, r52 + push_reg r17, r52 + push_reg r16, r52 + push_reg r15, r52 + push_reg r14, r52 + wh64 r52 /* cache line 1 */ + push_reg r13, r52 + push_reg r12, r52 + push_reg r11, r52 + push_reg r10, r52 + push_reg r9, r52 + push_reg r8, r52 + push_reg r7, r52 + push_reg r6, r52 + + .endif + + push_reg r5, r52 + st r52, r4 + + /* Load tp with our per-cpu offset. */ +#ifdef CONFIG_SMP + { + mfspr r20, SPR_SYSTEM_SAVE_K_0 + moveli r21, hw2_last(__per_cpu_offset) + } + { + shl16insli r21, r21, hw1(__per_cpu_offset) + bfextu r20, r20, 0, LOG2_THREAD_SIZE-1 + } + shl16insli r21, r21, hw0(__per_cpu_offset) + shl3add r20, r20, r21 + ld tp, r20 +#else + move tp, zero +#endif + + /* + * If we will be returning to the kernel, we will need to + * reset the interrupt masks to the state they had before. + * Set DISABLE_IRQ in flags iff we came from PL1 with irqs disabled. + */ + mfspr r32, SPR_EX_CONTEXT_K_1 + { + andi r32, r32, SPR_EX_CONTEXT_1_1__PL_MASK /* mask off ICS */ + PTREGS_PTR(r21, PTREGS_OFFSET_FLAGS) + } + beqzt r32, 1f /* zero if from user space */ + IRQS_DISABLED(r32) /* zero if irqs enabled */ +#if PT_FLAGS_DISABLE_IRQ != 1 +# error Value of IRQS_DISABLED used to set PT_FLAGS_DISABLE_IRQ; fix +#endif +1: + .ifnc \function,handle_syscall + /* Record the fact that we saved the caller-save registers above. */ + ori r32, r32, PT_FLAGS_CALLER_SAVES + .endif + st r21, r32 + +#ifdef __COLLECT_LINKER_FEEDBACK__ + /* + * Notify the feedback routines that we were in the + * appropriate fixed interrupt vector area. Note that we + * still have ICS set at this point, so we can't invoke any + * atomic operations or we will panic. The feedback + * routines internally preserve r0..r10 and r30 up. + */ + .ifnc \function,handle_syscall + shli r20, r1, 5 + .else + moveli r20, INT_SWINT_1 << 5 + .endif + moveli r21, hw2_last(intvec_feedback) + shl16insli r21, r21, hw1(intvec_feedback) + shl16insli r21, r21, hw0(intvec_feedback) + add r20, r20, r21 + jalr r20 + + /* And now notify the feedback routines that we are here. */ + FEEDBACK_ENTER(\function) +#endif + + /* + * we've captured enough state to the stack (including in + * particular our EX_CONTEXT state) that we can now release + * the interrupt critical section and replace it with our + * standard "interrupts disabled" mask value. This allows + * synchronous interrupts (and profile interrupts) to punch + * through from this point onwards. + */ + .ifc \function,handle_nmi + IRQ_DISABLE_ALL(r20) + .else + IRQ_DISABLE(r20, r21) + .endif + mtspr INTERRUPT_CRITICAL_SECTION, zero + + /* + * Prepare the first 256 stack bytes to be rapidly accessible + * without having to fetch the background data. + */ + addi r52, sp, -64 + { + wh64 r52 + addi r52, r52, -64 + } + { + wh64 r52 + addi r52, r52, -64 + } + { + wh64 r52 + addi r52, r52, -64 + } + wh64 r52 + +#ifdef CONFIG_TRACE_IRQFLAGS + .ifnc \function,handle_nmi + /* + * We finally have enough state set up to notify the irq + * tracing code that irqs were disabled on entry to the handler. + * The TRACE_IRQS_OFF call clobbers registers r0-r29. + * For syscalls, we already have the register state saved away + * on the stack, so we don't bother to do any register saves here, + * and later we pop the registers back off the kernel stack. + * For interrupt handlers, save r0-r3 in callee-saved registers. + */ + .ifnc \function,handle_syscall + { move r30, r0; move r31, r1 } + { move r32, r2; move r33, r3 } + .endif + TRACE_IRQS_OFF + .ifnc \function,handle_syscall + { move r0, r30; move r1, r31 } + { move r2, r32; move r3, r33 } + .endif + .endif +#endif + + .endm + + /* + * Redispatch a downcall. + */ + .macro dc_dispatch vecnum, vecname + .org (\vecnum << 8) +intvec_\vecname: + j hv_downcall_dispatch + ENDPROC(intvec_\vecname) + .endm + + /* + * Common code for most interrupts. The C function we're eventually + * going to is in r0, and the faultnum is in r1; the original + * values for those registers are on the stack. + */ + .pushsection .text.handle_interrupt,"ax" +handle_interrupt: + finish_interrupt_save handle_interrupt + + /* Jump to the C routine; it should enable irqs as soon as possible. */ + { + jalr r0 + PTREGS_PTR(r0, PTREGS_OFFSET_BASE) + } + FEEDBACK_REENTER(handle_interrupt) + { + movei r30, 0 /* not an NMI */ + j interrupt_return + } + STD_ENDPROC(handle_interrupt) + +/* + * This routine takes a boolean in r30 indicating if this is an NMI. + * If so, we also expect a boolean in r31 indicating whether to + * re-enable the oprofile interrupts. + * + * Note that .Lresume_userspace is jumped to directly in several + * places, and we need to make sure r30 is set correctly in those + * callers as well. + */ +STD_ENTRY(interrupt_return) + /* If we're resuming to kernel space, don't check thread flags. */ + { + bnez r30, .Lrestore_all /* NMIs don't special-case user-space */ + PTREGS_PTR(r29, PTREGS_OFFSET_EX1) + } + ld r29, r29 + andi r29, r29, SPR_EX_CONTEXT_1_1__PL_MASK /* mask off ICS */ + { + beqzt r29, .Lresume_userspace + PTREGS_PTR(r29, PTREGS_OFFSET_PC) + } + + /* If we're resuming to _cpu_idle_nap, bump PC forward by 8. */ + moveli r27, hw2_last(_cpu_idle_nap) + { + ld r28, r29 + shl16insli r27, r27, hw1(_cpu_idle_nap) + } + { + shl16insli r27, r27, hw0(_cpu_idle_nap) + } + { + cmpeq r27, r27, r28 + } + { + blbc r27, .Lrestore_all + addi r28, r28, 8 + } + st r29, r28 + j .Lrestore_all + +.Lresume_userspace: + FEEDBACK_REENTER(interrupt_return) + + /* + * Use r33 to hold whether we have already loaded the callee-saves + * into ptregs. We don't want to do it twice in this loop, since + * then we'd clobber whatever changes are made by ptrace, etc. + */ + { + movei r33, 0 + move r32, sp + } + + /* Get base of stack in r32. */ + EXTRACT_THREAD_INFO(r32) + +.Lretry_work_pending: + /* + * Disable interrupts so as to make sure we don't + * miss an interrupt that sets any of the thread flags (like + * need_resched or sigpending) between sampling and the iret. + * Routines like schedule() or do_signal() may re-enable + * interrupts before returning. + */ + IRQ_DISABLE(r20, r21) + TRACE_IRQS_OFF /* Note: clobbers registers r0-r29 */ + + + /* Check to see if there is any work to do before returning to user. */ + { + addi r29, r32, THREAD_INFO_FLAGS_OFFSET + moveli r1, hw1_last(_TIF_ALLWORK_MASK) + } + { + ld r29, r29 + shl16insli r1, r1, hw0(_TIF_ALLWORK_MASK) + } + and r1, r29, r1 + beqzt r1, .Lrestore_all + + /* + * Make sure we have all the registers saved for signal + * handling or notify-resume. Call out to C code to figure out + * exactly what we need to do for each flag bit, then if + * necessary, reload the flags and recheck. + */ + { + PTREGS_PTR(r0, PTREGS_OFFSET_BASE) + bnez r33, 1f + } + push_extra_callee_saves r0 + movei r33, 1 +1: jal do_work_pending + bnez r0, .Lretry_work_pending + + /* + * In the NMI case we + * omit the call to single_process_check_nohz, which normally checks + * to see if we should start or stop the scheduler tick, because + * we can't call arbitrary Linux code from an NMI context. + * We always call the homecache TLB deferral code to re-trigger + * the deferral mechanism. + * + * The other chunk of responsibility this code has is to reset the + * interrupt masks appropriately to reset irqs and NMIs. We have + * to call TRACE_IRQS_OFF and TRACE_IRQS_ON to support all the + * lockdep-type stuff, but we can't set ICS until afterwards, since + * ICS can only be used in very tight chunks of code to avoid + * tripping over various assertions that it is off. + */ +.Lrestore_all: + PTREGS_PTR(r0, PTREGS_OFFSET_EX1) + { + ld r0, r0 + PTREGS_PTR(r32, PTREGS_OFFSET_FLAGS) + } + { + andi r0, r0, SPR_EX_CONTEXT_1_1__PL_MASK + ld r32, r32 + } + bnez r0, 1f + j 2f +#if PT_FLAGS_DISABLE_IRQ != 1 +# error Assuming PT_FLAGS_DISABLE_IRQ == 1 so we can use blbct below +#endif +1: blbct r32, 2f + IRQ_DISABLE(r20,r21) + TRACE_IRQS_OFF + movei r0, 1 + mtspr INTERRUPT_CRITICAL_SECTION, r0 + beqzt r30, .Lrestore_regs + j 3f +2: TRACE_IRQS_ON + movei r0, 1 + mtspr INTERRUPT_CRITICAL_SECTION, r0 + IRQ_ENABLE(r20, r21) + beqzt r30, .Lrestore_regs +3: + + + /* + * We now commit to returning from this interrupt, since we will be + * doing things like setting EX_CONTEXT SPRs and unwinding the stack + * frame. No calls should be made to any other code after this point. + * This code should only be entered with ICS set. + * r32 must still be set to ptregs.flags. + * We launch loads to each cache line separately first, so we can + * get some parallelism out of the memory subsystem. + * We start zeroing caller-saved registers throughout, since + * that will save some cycles if this turns out to be a syscall. + */ +.Lrestore_regs: + FEEDBACK_REENTER(interrupt_return) /* called from elsewhere */ + + /* + * Rotate so we have one high bit and one low bit to test. + * - low bit says whether to restore all the callee-saved registers, + * or just r30-r33, and r52 up. + * - high bit (i.e. sign bit) says whether to restore all the + * caller-saved registers, or just r0. + */ +#if PT_FLAGS_CALLER_SAVES != 2 || PT_FLAGS_RESTORE_REGS != 4 +# error Rotate trick does not work :-) +#endif + { + rotli r20, r32, 62 + PTREGS_PTR(sp, PTREGS_OFFSET_REG(0)) + } + + /* + * Load cache lines 0, 4, 6 and 7, in that order, then use + * the last loaded value, which makes it likely that the other + * cache lines have also loaded, at which point we should be + * able to safely read all the remaining words on those cache + * lines without waiting for the memory subsystem. + */ + pop_reg r0, sp, PTREGS_OFFSET_REG(30) - PTREGS_OFFSET_REG(0) + pop_reg r30, sp, PTREGS_OFFSET_REG(52) - PTREGS_OFFSET_REG(30) + pop_reg_zero r52, r3, sp, PTREGS_OFFSET_CMPEXCH - PTREGS_OFFSET_REG(52) + pop_reg_zero r21, r27, sp, PTREGS_OFFSET_EX1 - PTREGS_OFFSET_CMPEXCH + pop_reg_zero lr, r2, sp, PTREGS_OFFSET_PC - PTREGS_OFFSET_EX1 + { + mtspr CMPEXCH_VALUE, r21 + move r4, zero + } + pop_reg r21, sp, PTREGS_OFFSET_REG(31) - PTREGS_OFFSET_PC + { + mtspr SPR_EX_CONTEXT_K_1, lr + andi lr, lr, SPR_EX_CONTEXT_1_1__PL_MASK /* mask off ICS */ + } + { + mtspr SPR_EX_CONTEXT_K_0, r21 + move r5, zero + } + + /* Restore callee-saveds that we actually use. */ + pop_reg_zero r31, r6 + pop_reg_zero r32, r7 + pop_reg_zero r33, r8, sp, PTREGS_OFFSET_REG(29) - PTREGS_OFFSET_REG(33) + + /* + * If we modified other callee-saveds, restore them now. + * This is rare, but could be via ptrace or signal handler. + */ + { + move r9, zero + blbs r20, .Lrestore_callees + } +.Lcontinue_restore_regs: + + /* Check if we're returning from a syscall. */ + { + move r10, zero + bltzt r20, 1f /* no, so go restore callee-save registers */ + } + + /* + * Check if we're returning to userspace. + * Note that if we're not, we don't worry about zeroing everything. + */ + { + addli sp, sp, PTREGS_OFFSET_LR - PTREGS_OFFSET_REG(29) + bnez lr, .Lkernel_return + } + + /* + * On return from syscall, we've restored r0 from pt_regs, but we + * clear the remainder of the caller-saved registers. We could + * restore the syscall arguments, but there's not much point, + * and it ensures user programs aren't trying to use the + * caller-saves if we clear them, as well as avoiding leaking + * kernel pointers into userspace. + */ + pop_reg_zero lr, r11, sp, PTREGS_OFFSET_TP - PTREGS_OFFSET_LR + pop_reg_zero tp, r12, sp, PTREGS_OFFSET_SP - PTREGS_OFFSET_TP + { + ld sp, sp + move r13, zero + move r14, zero + } + { move r15, zero; move r16, zero } + { move r17, zero; move r18, zero } + { move r19, zero; move r20, zero } + { move r21, zero; move r22, zero } + { move r23, zero; move r24, zero } + { move r25, zero; move r26, zero } + + /* Set r1 to errno if we are returning an error, otherwise zero. */ + { + moveli r29, 4096 + sub r1, zero, r0 + } + { + move r28, zero + cmpltu r29, r1, r29 + } + { + mnz r1, r29, r1 + move r29, zero + } + iret + + /* + * Not a syscall, so restore caller-saved registers. + * First kick off loads for cache lines 1-3, which we're touching + * for the first time here. + */ + .align 64 +1: pop_reg r29, sp, PTREGS_OFFSET_REG(21) - PTREGS_OFFSET_REG(29) + pop_reg r21, sp, PTREGS_OFFSET_REG(13) - PTREGS_OFFSET_REG(21) + pop_reg r13, sp, PTREGS_OFFSET_REG(1) - PTREGS_OFFSET_REG(13) + pop_reg r1 + pop_reg r2 + pop_reg r3 + pop_reg r4 + pop_reg r5 + pop_reg r6 + pop_reg r7 + pop_reg r8 + pop_reg r9 + pop_reg r10 + pop_reg r11 + pop_reg r12, sp, 16 + /* r13 already restored above */ + pop_reg r14 + pop_reg r15 + pop_reg r16 + pop_reg r17 + pop_reg r18 + pop_reg r19 + pop_reg r20, sp, 16 + /* r21 already restored above */ + pop_reg r22 + pop_reg r23 + pop_reg r24 + pop_reg r25 + pop_reg r26 + pop_reg r27 + pop_reg r28, sp, PTREGS_OFFSET_LR - PTREGS_OFFSET_REG(28) + /* r29 already restored above */ + bnez lr, .Lkernel_return + pop_reg lr, sp, PTREGS_OFFSET_TP - PTREGS_OFFSET_LR + pop_reg tp, sp, PTREGS_OFFSET_SP - PTREGS_OFFSET_TP + ld sp, sp + iret + + /* + * We can't restore tp when in kernel mode, since a thread might + * have migrated from another cpu and brought a stale tp value. + */ +.Lkernel_return: + pop_reg lr, sp, PTREGS_OFFSET_SP - PTREGS_OFFSET_LR + ld sp, sp + iret + + /* Restore callee-saved registers from r34 to r51. */ +.Lrestore_callees: + addli sp, sp, PTREGS_OFFSET_REG(34) - PTREGS_OFFSET_REG(29) + pop_reg r34 + pop_reg r35 + pop_reg r36 + pop_reg r37 + pop_reg r38 + pop_reg r39 + pop_reg r40 + pop_reg r41 + pop_reg r42 + pop_reg r43 + pop_reg r44 + pop_reg r45 + pop_reg r46 + pop_reg r47 + pop_reg r48 + pop_reg r49 + pop_reg r50 + pop_reg r51, sp, PTREGS_OFFSET_REG(29) - PTREGS_OFFSET_REG(51) + j .Lcontinue_restore_regs + STD_ENDPROC(interrupt_return) + + /* + * "NMI" interrupts mask ALL interrupts before calling the + * handler, and don't check thread flags, etc., on the way + * back out. In general, the only things we do here for NMIs + * are register save/restore and dataplane kernel-TLB management. + * We don't (for example) deal with start/stop of the sched tick. + */ + .pushsection .text.handle_nmi,"ax" +handle_nmi: + finish_interrupt_save handle_nmi + { + jalr r0 + PTREGS_PTR(r0, PTREGS_OFFSET_BASE) + } + FEEDBACK_REENTER(handle_nmi) + { + movei r30, 1 + move r31, r0 + } + j interrupt_return + STD_ENDPROC(handle_nmi) + + /* + * Parallel code for syscalls to handle_interrupt. + */ + .pushsection .text.handle_syscall,"ax" +handle_syscall: + finish_interrupt_save handle_syscall + + /* Enable irqs. */ + TRACE_IRQS_ON + IRQ_ENABLE(r20, r21) + + /* Bump the counter for syscalls made on this tile. */ + moveli r20, hw2_last(irq_stat + IRQ_CPUSTAT_SYSCALL_COUNT_OFFSET) + shl16insli r20, r20, hw1(irq_stat + IRQ_CPUSTAT_SYSCALL_COUNT_OFFSET) + shl16insli r20, r20, hw0(irq_stat + IRQ_CPUSTAT_SYSCALL_COUNT_OFFSET) + add r20, r20, tp + ld4s r21, r20 + { + addi r21, r21, 1 + move r31, sp + } + { + st4 r20, r21 + EXTRACT_THREAD_INFO(r31) + } + + /* Trace syscalls, if requested. */ + addi r31, r31, THREAD_INFO_FLAGS_OFFSET + ld r30, r31 + andi r30, r30, _TIF_SYSCALL_TRACE + { + addi r30, r31, THREAD_INFO_STATUS_OFFSET - THREAD_INFO_FLAGS_OFFSET + beqzt r30, .Lrestore_syscall_regs + } + jal do_syscall_trace + FEEDBACK_REENTER(handle_syscall) + + /* + * We always reload our registers from the stack at this + * point. They might be valid, if we didn't build with + * TRACE_IRQFLAGS, and this isn't a dataplane tile, and we're not + * doing syscall tracing, but there are enough cases now that it + * seems simplest just to do the reload unconditionally. + */ +.Lrestore_syscall_regs: + { + ld r30, r30 + PTREGS_PTR(r11, PTREGS_OFFSET_REG(0)) + } + pop_reg r0, r11 + pop_reg r1, r11 + pop_reg r2, r11 + pop_reg r3, r11 + pop_reg r4, r11 + pop_reg r5, r11, PTREGS_OFFSET_SYSCALL - PTREGS_OFFSET_REG(5) + { + ld TREG_SYSCALL_NR_NAME, r11 + moveli r21, __NR_syscalls + } + + /* Ensure that the syscall number is within the legal range. */ + { + moveli r20, hw2(sys_call_table) + blbs r30, .Lcompat_syscall + } + { + cmpltu r21, TREG_SYSCALL_NR_NAME, r21 + shl16insli r20, r20, hw1(sys_call_table) + } + { + blbc r21, .Linvalid_syscall + shl16insli r20, r20, hw0(sys_call_table) + } +.Lload_syscall_pointer: + shl3add r20, TREG_SYSCALL_NR_NAME, r20 + ld r20, r20 + + /* Jump to syscall handler. */ + jalr r20 +.Lhandle_syscall_link: /* value of "lr" after "jalr r20" above */ + + /* + * Write our r0 onto the stack so it gets restored instead + * of whatever the user had there before. + * In compat mode, sign-extend r0 before storing it. + */ + { + PTREGS_PTR(r29, PTREGS_OFFSET_REG(0)) + blbct r30, 1f + } + addxi r0, r0, 0 +1: st r29, r0 + +.Lsyscall_sigreturn_skip: + FEEDBACK_REENTER(handle_syscall) + + /* Do syscall trace again, if requested. */ + ld r30, r31 + andi r0, r30, _TIF_SYSCALL_TRACE + { + andi r0, r30, _TIF_SINGLESTEP + beqzt r0, 1f + } + jal do_syscall_trace + FEEDBACK_REENTER(handle_syscall) + andi r0, r30, _TIF_SINGLESTEP + +1: beqzt r0, 2f + + /* Single stepping -- notify ptrace. */ + { + movei r0, SIGTRAP + jal ptrace_notify + } + FEEDBACK_REENTER(handle_syscall) + +2: { + movei r30, 0 /* not an NMI */ + j .Lresume_userspace /* jump into middle of interrupt_return */ + } + +.Lcompat_syscall: + /* + * Load the base of the compat syscall table in r20, and + * range-check the syscall number (duplicated from 64-bit path). + * Sign-extend all the user's passed arguments to make them consistent. + * Also save the original "r(n)" values away in "r(11+n)" in + * case the syscall table entry wants to validate them. + */ + moveli r20, hw2(compat_sys_call_table) + { + cmpltu r21, TREG_SYSCALL_NR_NAME, r21 + shl16insli r20, r20, hw1(compat_sys_call_table) + } + { + blbc r21, .Linvalid_syscall + shl16insli r20, r20, hw0(compat_sys_call_table) + } + { move r11, r0; addxi r0, r0, 0 } + { move r12, r1; addxi r1, r1, 0 } + { move r13, r2; addxi r2, r2, 0 } + { move r14, r3; addxi r3, r3, 0 } + { move r15, r4; addxi r4, r4, 0 } + { move r16, r5; addxi r5, r5, 0 } + j .Lload_syscall_pointer + +.Linvalid_syscall: + /* Report an invalid syscall back to the user program */ + { + PTREGS_PTR(r29, PTREGS_OFFSET_REG(0)) + movei r28, -ENOSYS + } + st r29, r28 + { + movei r30, 0 /* not an NMI */ + j .Lresume_userspace /* jump into middle of interrupt_return */ + } + STD_ENDPROC(handle_syscall) + + /* Return the address for oprofile to suppress in backtraces. */ +STD_ENTRY_SECTION(handle_syscall_link_address, .text.handle_syscall) + lnk r0 + { + addli r0, r0, .Lhandle_syscall_link - . + jrp lr + } + STD_ENDPROC(handle_syscall_link_address) + +STD_ENTRY(ret_from_fork) + jal sim_notify_fork + jal schedule_tail + FEEDBACK_REENTER(ret_from_fork) + { + movei r30, 0 /* not an NMI */ + j .Lresume_userspace /* jump into middle of interrupt_return */ + } + STD_ENDPROC(ret_from_fork) + +/* Various stub interrupt handlers and syscall handlers */ + +STD_ENTRY_LOCAL(_kernel_double_fault) + mfspr r1, SPR_EX_CONTEXT_K_0 + move r2, lr + move r3, sp + move r4, r52 + addi sp, sp, -C_ABI_SAVE_AREA_SIZE + j kernel_double_fault + STD_ENDPROC(_kernel_double_fault) + +STD_ENTRY_LOCAL(bad_intr) + mfspr r2, SPR_EX_CONTEXT_K_0 + panic "Unhandled interrupt %#x: PC %#lx" + STD_ENDPROC(bad_intr) + +/* Put address of pt_regs in reg and jump. */ +#define PTREGS_SYSCALL(x, reg) \ + STD_ENTRY(_##x); \ + { \ + PTREGS_PTR(reg, PTREGS_OFFSET_BASE); \ + j x \ + }; \ + STD_ENDPROC(_##x) + +/* + * Special-case sigreturn to not write r0 to the stack on return. + * This is technically more efficient, but it also avoids difficulties + * in the 64-bit OS when handling 32-bit compat code, since we must not + * sign-extend r0 for the sigreturn return-value case. + */ +#define PTREGS_SYSCALL_SIGRETURN(x, reg) \ + STD_ENTRY(_##x); \ + addli lr, lr, .Lsyscall_sigreturn_skip - .Lhandle_syscall_link; \ + { \ + PTREGS_PTR(reg, PTREGS_OFFSET_BASE); \ + j x \ + }; \ + STD_ENDPROC(_##x) + +PTREGS_SYSCALL(sys_execve, r3) +PTREGS_SYSCALL(sys_sigaltstack, r2) +PTREGS_SYSCALL_SIGRETURN(sys_rt_sigreturn, r0) +#ifdef CONFIG_COMPAT +PTREGS_SYSCALL(compat_sys_execve, r3) +PTREGS_SYSCALL(compat_sys_sigaltstack, r2) +PTREGS_SYSCALL_SIGRETURN(compat_sys_rt_sigreturn, r0) +#endif + +/* Save additional callee-saves to pt_regs, put address in r4 and jump. */ +STD_ENTRY(_sys_clone) + push_extra_callee_saves r4 + j sys_clone + STD_ENDPROC(_sys_clone) + +/* The single-step support may need to read all the registers. */ +int_unalign: + push_extra_callee_saves r0 + j do_trap + +/* Fill the return address stack with nonzero entries. */ +STD_ENTRY(fill_ra_stack) + { + move r0, lr + jal 1f + } +1: jal 2f +2: jal 3f +3: jal 4f +4: jrp r0 + STD_ENDPROC(fill_ra_stack) + +/* Include .intrpt1 array of interrupt vectors */ + .section ".intrpt1", "ax" + +#define op_handle_perf_interrupt bad_intr +#define op_handle_aux_perf_interrupt bad_intr + +#ifndef CONFIG_HARDWALL +#define do_hardwall_trap bad_intr +#endif + + int_hand INT_MEM_ERROR, MEM_ERROR, do_trap + int_hand INT_SINGLE_STEP_3, SINGLE_STEP_3, bad_intr +#if CONFIG_KERNEL_PL == 2 + int_hand INT_SINGLE_STEP_2, SINGLE_STEP_2, gx_singlestep_handle + int_hand INT_SINGLE_STEP_1, SINGLE_STEP_1, bad_intr +#else + int_hand INT_SINGLE_STEP_2, SINGLE_STEP_2, bad_intr + int_hand INT_SINGLE_STEP_1, SINGLE_STEP_1, gx_singlestep_handle +#endif + int_hand INT_SINGLE_STEP_0, SINGLE_STEP_0, bad_intr + int_hand INT_IDN_COMPLETE, IDN_COMPLETE, bad_intr + int_hand INT_UDN_COMPLETE, UDN_COMPLETE, bad_intr + int_hand INT_ITLB_MISS, ITLB_MISS, do_page_fault + int_hand INT_ILL, ILL, do_trap + int_hand INT_GPV, GPV, do_trap + int_hand INT_IDN_ACCESS, IDN_ACCESS, do_trap + int_hand INT_UDN_ACCESS, UDN_ACCESS, do_trap + int_hand INT_SWINT_3, SWINT_3, do_trap + int_hand INT_SWINT_2, SWINT_2, do_trap + int_hand INT_SWINT_1, SWINT_1, SYSCALL, handle_syscall + int_hand INT_SWINT_0, SWINT_0, do_trap + int_hand INT_ILL_TRANS, ILL_TRANS, do_trap + int_hand INT_UNALIGN_DATA, UNALIGN_DATA, int_unalign + int_hand INT_DTLB_MISS, DTLB_MISS, do_page_fault + int_hand INT_DTLB_ACCESS, DTLB_ACCESS, do_page_fault + int_hand INT_IDN_FIREWALL, IDN_FIREWALL, bad_intr + int_hand INT_UDN_FIREWALL, UDN_FIREWALL, do_hardwall_trap + int_hand INT_TILE_TIMER, TILE_TIMER, do_timer_interrupt + int_hand INT_IDN_TIMER, IDN_TIMER, bad_intr + int_hand INT_UDN_TIMER, UDN_TIMER, bad_intr + int_hand INT_IDN_AVAIL, IDN_AVAIL, bad_intr + int_hand INT_UDN_AVAIL, UDN_AVAIL, bad_intr + int_hand INT_IPI_3, IPI_3, bad_intr +#if CONFIG_KERNEL_PL == 2 + int_hand INT_IPI_2, IPI_2, tile_dev_intr + int_hand INT_IPI_1, IPI_1, bad_intr +#else + int_hand INT_IPI_2, IPI_2, bad_intr + int_hand INT_IPI_1, IPI_1, tile_dev_intr +#endif + int_hand INT_IPI_0, IPI_0, bad_intr + int_hand INT_PERF_COUNT, PERF_COUNT, \ + op_handle_perf_interrupt, handle_nmi + int_hand INT_AUX_PERF_COUNT, AUX_PERF_COUNT, \ + op_handle_perf_interrupt, handle_nmi + int_hand INT_INTCTRL_3, INTCTRL_3, bad_intr +#if CONFIG_KERNEL_PL == 2 + dc_dispatch INT_INTCTRL_2, INTCTRL_2 + int_hand INT_INTCTRL_1, INTCTRL_1, bad_intr +#else + int_hand INT_INTCTRL_2, INTCTRL_2, bad_intr + dc_dispatch INT_INTCTRL_1, INTCTRL_1 +#endif + int_hand INT_INTCTRL_0, INTCTRL_0, bad_intr + int_hand INT_MESSAGE_RCV_DWNCL, MESSAGE_RCV_DWNCL, \ + hv_message_intr + int_hand INT_DEV_INTR_DWNCL, DEV_INTR_DWNCL, bad_intr + int_hand INT_I_ASID, I_ASID, bad_intr + int_hand INT_D_ASID, D_ASID, bad_intr + int_hand INT_DOUBLE_FAULT, DOUBLE_FAULT, do_trap + + /* Synthetic interrupt delivered only by the simulator */ + int_hand INT_BREAKPOINT, BREAKPOINT, do_breakpoint diff --git a/arch/tile/kernel/irq.c b/arch/tile/kernel/irq.c new file mode 100644 index 00000000..02e62806 --- /dev/null +++ b/arch/tile/kernel/irq.c @@ -0,0 +1,295 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#include <linux/module.h> +#include <linux/seq_file.h> +#include <linux/interrupt.h> +#include <linux/irq.h> +#include <linux/kernel_stat.h> +#include <linux/uaccess.h> +#include <hv/drv_pcie_rc_intf.h> +#include <arch/spr_def.h> +#include <asm/traps.h> + +/* Bit-flag stored in irq_desc->chip_data to indicate HW-cleared irqs. */ +#define IS_HW_CLEARED 1 + +/* + * The set of interrupts we enable for arch_local_irq_enable(). + * This is initialized to have just a single interrupt that the kernel + * doesn't actually use as a sentinel. During kernel init, + * interrupts are added as the kernel gets prepared to support them. + * NOTE: we could probably initialize them all statically up front. + */ +DEFINE_PER_CPU(unsigned long long, interrupts_enabled_mask) = + INITIAL_INTERRUPTS_ENABLED; +EXPORT_PER_CPU_SYMBOL(interrupts_enabled_mask); + +/* Define per-tile device interrupt statistics state. */ +DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_internodealigned_in_smp; +EXPORT_PER_CPU_SYMBOL(irq_stat); + +/* + * Define per-tile irq disable mask; the hardware/HV only has a single + * mask that we use to implement both masking and disabling. + */ +static DEFINE_PER_CPU(unsigned long, irq_disable_mask) + ____cacheline_internodealigned_in_smp; + +/* + * Per-tile IRQ nesting depth. Used to make sure we enable newly + * enabled IRQs before exiting the outermost interrupt. + */ +static DEFINE_PER_CPU(int, irq_depth); + +/* State for allocating IRQs on Gx. */ +#if CHIP_HAS_IPI() +static unsigned long available_irqs = ~(1UL << IRQ_RESCHEDULE); +static DEFINE_SPINLOCK(available_irqs_lock); +#endif + +#if CHIP_HAS_IPI() +/* Use SPRs to manipulate device interrupts. */ +#define mask_irqs(irq_mask) __insn_mtspr(SPR_IPI_MASK_SET_K, irq_mask) +#define unmask_irqs(irq_mask) __insn_mtspr(SPR_IPI_MASK_RESET_K, irq_mask) +#define clear_irqs(irq_mask) __insn_mtspr(SPR_IPI_EVENT_RESET_K, irq_mask) +#else +/* Use HV to manipulate device interrupts. */ +#define mask_irqs(irq_mask) hv_disable_intr(irq_mask) +#define unmask_irqs(irq_mask) hv_enable_intr(irq_mask) +#define clear_irqs(irq_mask) hv_clear_intr(irq_mask) +#endif + +/* + * The interrupt handling path, implemented in terms of HV interrupt + * emulation on TILE64 and TILEPro, and IPI hardware on TILE-Gx. + */ +void tile_dev_intr(struct pt_regs *regs, int intnum) +{ + int depth = __get_cpu_var(irq_depth)++; + unsigned long original_irqs; + unsigned long remaining_irqs; + struct pt_regs *old_regs; + +#if CHIP_HAS_IPI() + /* + * Pending interrupts are listed in an SPR. We might be + * nested, so be sure to only handle irqs that weren't already + * masked by a previous interrupt. Then, mask out the ones + * we're going to handle. + */ + unsigned long masked = __insn_mfspr(SPR_IPI_MASK_K); + original_irqs = __insn_mfspr(SPR_IPI_EVENT_K) & ~masked; + __insn_mtspr(SPR_IPI_MASK_SET_K, original_irqs); +#else + /* + * Hypervisor performs the equivalent of the Gx code above and + * then puts the pending interrupt mask into a system save reg + * for us to find. + */ + original_irqs = __insn_mfspr(SPR_SYSTEM_SAVE_K_3); +#endif + remaining_irqs = original_irqs; + + /* Track time spent here in an interrupt context. */ + old_regs = set_irq_regs(regs); + irq_enter(); + +#ifdef CONFIG_DEBUG_STACKOVERFLOW + /* Debugging check for stack overflow: less than 1/8th stack free? */ + { + long sp = stack_pointer - (long) current_thread_info(); + if (unlikely(sp < (sizeof(struct thread_info) + STACK_WARN))) { + pr_emerg("tile_dev_intr: " + "stack overflow: %ld\n", + sp - sizeof(struct thread_info)); + dump_stack(); + } + } +#endif + while (remaining_irqs) { + unsigned long irq = __ffs(remaining_irqs); + remaining_irqs &= ~(1UL << irq); + + /* Count device irqs; Linux IPIs are counted elsewhere. */ + if (irq != IRQ_RESCHEDULE) + __get_cpu_var(irq_stat).irq_dev_intr_count++; + + generic_handle_irq(irq); + } + + /* + * If we weren't nested, turn on all enabled interrupts, + * including any that were reenabled during interrupt + * handling. + */ + if (depth == 0) + unmask_irqs(~__get_cpu_var(irq_disable_mask)); + + __get_cpu_var(irq_depth)--; + + /* + * Track time spent against the current process again and + * process any softirqs if they are waiting. + */ + irq_exit(); + set_irq_regs(old_regs); +} + + +/* + * Remove an irq from the disabled mask. If we're in an interrupt + * context, defer enabling the HW interrupt until we leave. + */ +static void tile_irq_chip_enable(struct irq_data *d) +{ + get_cpu_var(irq_disable_mask) &= ~(1UL << d->irq); + if (__get_cpu_var(irq_depth) == 0) + unmask_irqs(1UL << d->irq); + put_cpu_var(irq_disable_mask); +} + +/* + * Add an irq to the disabled mask. We disable the HW interrupt + * immediately so that there's no possibility of it firing. If we're + * in an interrupt context, the return path is careful to avoid + * unmasking a newly disabled interrupt. + */ +static void tile_irq_chip_disable(struct irq_data *d) +{ + get_cpu_var(irq_disable_mask) |= (1UL << d->irq); + mask_irqs(1UL << d->irq); + put_cpu_var(irq_disable_mask); +} + +/* Mask an interrupt. */ +static void tile_irq_chip_mask(struct irq_data *d) +{ + mask_irqs(1UL << d->irq); +} + +/* Unmask an interrupt. */ +static void tile_irq_chip_unmask(struct irq_data *d) +{ + unmask_irqs(1UL << d->irq); +} + +/* + * Clear an interrupt before processing it so that any new assertions + * will trigger another irq. + */ +static void tile_irq_chip_ack(struct irq_data *d) +{ + if ((unsigned long)irq_data_get_irq_chip_data(d) != IS_HW_CLEARED) + clear_irqs(1UL << d->irq); +} + +/* + * For per-cpu interrupts, we need to avoid unmasking any interrupts + * that we disabled via disable_percpu_irq(). + */ +static void tile_irq_chip_eoi(struct irq_data *d) +{ + if (!(__get_cpu_var(irq_disable_mask) & (1UL << d->irq))) + unmask_irqs(1UL << d->irq); +} + +static struct irq_chip tile_irq_chip = { + .name = "tile_irq_chip", + .irq_enable = tile_irq_chip_enable, + .irq_disable = tile_irq_chip_disable, + .irq_ack = tile_irq_chip_ack, + .irq_eoi = tile_irq_chip_eoi, + .irq_mask = tile_irq_chip_mask, + .irq_unmask = tile_irq_chip_unmask, +}; + +void __init init_IRQ(void) +{ + ipi_init(); +} + +void __cpuinit setup_irq_regs(void) +{ + /* Enable interrupt delivery. */ + unmask_irqs(~0UL); +#if CHIP_HAS_IPI() + arch_local_irq_unmask(INT_IPI_K); +#endif +} + +void tile_irq_activate(unsigned int irq, int tile_irq_type) +{ + /* + * We use handle_level_irq() by default because the pending + * interrupt vector (whether modeled by the HV on TILE64 and + * TILEPro or implemented in hardware on TILE-Gx) has + * level-style semantics for each bit. An interrupt fires + * whenever a bit is high, not just at edges. + */ + irq_flow_handler_t handle = handle_level_irq; + if (tile_irq_type == TILE_IRQ_PERCPU) + handle = handle_percpu_irq; + irq_set_chip_and_handler(irq, &tile_irq_chip, handle); + + /* + * Flag interrupts that are hardware-cleared so that ack() + * won't clear them. + */ + if (tile_irq_type == TILE_IRQ_HW_CLEAR) + irq_set_chip_data(irq, (void *)IS_HW_CLEARED); +} +EXPORT_SYMBOL(tile_irq_activate); + + +void ack_bad_irq(unsigned int irq) +{ + pr_err("unexpected IRQ trap at vector %02x\n", irq); +} + +/* + * Generic, controller-independent functions: + */ + +#if CHIP_HAS_IPI() +int create_irq(void) +{ + unsigned long flags; + int result; + + spin_lock_irqsave(&available_irqs_lock, flags); + if (available_irqs == 0) + result = -ENOMEM; + else { + result = __ffs(available_irqs); + available_irqs &= ~(1UL << result); + dynamic_irq_init(result); + } + spin_unlock_irqrestore(&available_irqs_lock, flags); + + return result; +} +EXPORT_SYMBOL(create_irq); + +void destroy_irq(unsigned int irq) +{ + unsigned long flags; + + spin_lock_irqsave(&available_irqs_lock, flags); + available_irqs |= (1UL << irq); + dynamic_irq_cleanup(irq); + spin_unlock_irqrestore(&available_irqs_lock, flags); +} +EXPORT_SYMBOL(destroy_irq); +#endif diff --git a/arch/tile/kernel/machine_kexec.c b/arch/tile/kernel/machine_kexec.c new file mode 100644 index 00000000..6255f2ea --- /dev/null +++ b/arch/tile/kernel/machine_kexec.c @@ -0,0 +1,282 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * based on machine_kexec.c from other architectures in linux-2.6.18 + */ + +#include <linux/mm.h> +#include <linux/kexec.h> +#include <linux/delay.h> +#include <linux/reboot.h> +#include <linux/errno.h> +#include <linux/vmalloc.h> +#include <linux/cpumask.h> +#include <linux/kernel.h> +#include <linux/elf.h> +#include <linux/highmem.h> +#include <linux/mmu_context.h> +#include <linux/io.h> +#include <linux/timex.h> +#include <asm/pgtable.h> +#include <asm/pgalloc.h> +#include <asm/cacheflush.h> +#include <asm/checksum.h> +#include <hv/hypervisor.h> + + +/* + * This stuff is not in elf.h and is not in any other kernel include. + * This stuff is needed below in the little boot notes parser to + * extract the command line so we can pass it to the hypervisor. + */ +struct Elf32_Bhdr { + Elf32_Word b_signature; + Elf32_Word b_size; + Elf32_Half b_checksum; + Elf32_Half b_records; +}; +#define ELF_BOOT_MAGIC 0x0E1FB007 +#define EBN_COMMAND_LINE 0x00000004 +#define roundupsz(X) (((X) + 3) & ~3) + +/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ + + +void machine_shutdown(void) +{ + /* + * Normally we would stop all the other processors here, but + * the check in machine_kexec_prepare below ensures we'll only + * get this far if we've been booted with "nosmp" on the + * command line or without CONFIG_SMP so there's nothing to do + * here (for now). + */ +} + +void machine_crash_shutdown(struct pt_regs *regs) +{ + /* + * Cannot happen. This type of kexec is disabled on this + * architecture (and enforced in machine_kexec_prepare below). + */ +} + + +int machine_kexec_prepare(struct kimage *image) +{ + if (num_online_cpus() > 1) { + pr_warning("%s: detected attempt to kexec " + "with num_online_cpus() > 1\n", + __func__); + return -ENOSYS; + } + if (image->type != KEXEC_TYPE_DEFAULT) { + pr_warning("%s: detected attempt to kexec " + "with unsupported type: %d\n", + __func__, + image->type); + return -ENOSYS; + } + return 0; +} + +void machine_kexec_cleanup(struct kimage *image) +{ + /* + * We did nothing in machine_kexec_prepare, + * so we have nothing to do here. + */ +} + +/* + * If we can find elf boot notes on this page, return the command + * line. Otherwise, silently return null. Somewhat kludgy, but no + * good way to do this without significantly rearchitecting the + * architecture-independent kexec code. + */ + +static unsigned char *kexec_bn2cl(void *pg) +{ + struct Elf32_Bhdr *bhdrp; + Elf32_Nhdr *nhdrp; + unsigned char *desc; + unsigned char *command_line; + __sum16 csum; + + bhdrp = (struct Elf32_Bhdr *) pg; + + /* + * This routine is invoked for every source page, so make + * sure to quietly ignore every impossible page. + */ + if (bhdrp->b_signature != ELF_BOOT_MAGIC || + bhdrp->b_size > PAGE_SIZE) + return 0; + + /* + * If we get a checksum mismatch, warn with the checksum + * so we can diagnose better. + */ + csum = ip_compute_csum(pg, bhdrp->b_size); + if (csum != 0) { + pr_warning("%s: bad checksum %#x (size %d)\n", + __func__, csum, bhdrp->b_size); + return 0; + } + + nhdrp = (Elf32_Nhdr *) (bhdrp + 1); + + while (nhdrp->n_type != EBN_COMMAND_LINE) { + + desc = (unsigned char *) (nhdrp + 1); + desc += roundupsz(nhdrp->n_descsz); + + nhdrp = (Elf32_Nhdr *) desc; + + /* still in bounds? */ + if ((unsigned char *) (nhdrp + 1) > + ((unsigned char *) pg) + bhdrp->b_size) { + + pr_info("%s: out of bounds\n", __func__); + return 0; + } + } + + command_line = (unsigned char *) (nhdrp + 1); + desc = command_line; + + while (*desc != '\0') { + desc++; + if (((unsigned long)desc & PAGE_MASK) != (unsigned long)pg) { + pr_info("%s: ran off end of page\n", + __func__); + return 0; + } + } + + return command_line; +} + +static void kexec_find_and_set_command_line(struct kimage *image) +{ + kimage_entry_t *ptr, entry; + + unsigned char *command_line = 0; + unsigned char *r; + HV_Errno hverr; + + for (ptr = &image->head; + (entry = *ptr) && !(entry & IND_DONE); + ptr = (entry & IND_INDIRECTION) ? + phys_to_virt((entry & PAGE_MASK)) : ptr + 1) { + + if ((entry & IND_SOURCE)) { + void *va = + kmap_atomic_pfn(entry >> PAGE_SHIFT); + r = kexec_bn2cl(va); + if (r) { + command_line = r; + break; + } + kunmap_atomic(va); + } + } + + if (command_line != 0) { + pr_info("setting new command line to \"%s\"\n", + command_line); + + hverr = hv_set_command_line( + (HV_VirtAddr) command_line, strlen(command_line)); + kunmap_atomic(command_line); + } else { + pr_info("%s: no command line found; making empty\n", + __func__); + hverr = hv_set_command_line((HV_VirtAddr) command_line, 0); + } + if (hverr) + pr_warning("%s: hv_set_command_line returned error: %d\n", + __func__, hverr); +} + +/* + * The kexec code range-checks all its PAs, so to avoid having it run + * amok and allocate memory and then sequester it from every other + * controller, we force it to come from controller zero. We also + * disable the oom-killer since if we do end up running out of memory, + * that almost certainly won't help. + */ +struct page *kimage_alloc_pages_arch(gfp_t gfp_mask, unsigned int order) +{ + gfp_mask |= __GFP_THISNODE | __GFP_NORETRY; + return alloc_pages_node(0, gfp_mask, order); +} + +static void setup_quasi_va_is_pa(void) +{ + HV_PTE *pgtable; + HV_PTE pte; + int i; + + /* + * Flush our TLB to prevent conflicts between the previous contents + * and the new stuff we're about to add. + */ + local_flush_tlb_all(); + + /* setup VA is PA, at least up to PAGE_OFFSET */ + + pgtable = (HV_PTE *)current->mm->pgd; + pte = hv_pte(_PAGE_KERNEL | _PAGE_HUGE_PAGE); + pte = hv_pte_set_mode(pte, HV_PTE_MODE_CACHE_NO_L3); + + for (i = 0; i < pgd_index(PAGE_OFFSET); i++) { + unsigned long pfn = i << (HPAGE_SHIFT - PAGE_SHIFT); + if (pfn_valid(pfn)) + __set_pte(&pgtable[i], pfn_pte(pfn, pte)); + } +} + + +void machine_kexec(struct kimage *image) +{ + void *reboot_code_buffer; + void (*rnk)(unsigned long, void *, unsigned long) + __noreturn; + + /* Mask all interrupts before starting to reboot. */ + interrupt_mask_set_mask(~0ULL); + + kexec_find_and_set_command_line(image); + + /* + * Adjust the home caching of the control page to be cached on + * this cpu, and copy the assembly helper into the control + * code page, which we map in the vmalloc area. + */ + homecache_change_page_home(image->control_code_page, 0, + smp_processor_id()); + reboot_code_buffer = vmap(&image->control_code_page, 1, 0, + __pgprot(_PAGE_KERNEL | _PAGE_EXECUTABLE)); + memcpy(reboot_code_buffer, relocate_new_kernel, + relocate_new_kernel_size); + __flush_icache_range( + (unsigned long) reboot_code_buffer, + (unsigned long) reboot_code_buffer + relocate_new_kernel_size); + + setup_quasi_va_is_pa(); + + /* now call it */ + rnk = reboot_code_buffer; + (*rnk)(image->head, reboot_code_buffer, image->start); +} diff --git a/arch/tile/kernel/messaging.c b/arch/tile/kernel/messaging.c new file mode 100644 index 00000000..0858ee6b --- /dev/null +++ b/arch/tile/kernel/messaging.c @@ -0,0 +1,116 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#include <linux/percpu.h> +#include <linux/smp.h> +#include <linux/hardirq.h> +#include <linux/ptrace.h> +#include <asm/hv_driver.h> +#include <asm/irq_regs.h> +#include <asm/traps.h> +#include <hv/hypervisor.h> +#include <arch/interrupts.h> + +/* All messages are stored here */ +static DEFINE_PER_CPU(HV_MsgState, msg_state); + +void __cpuinit init_messaging(void) +{ + /* Allocate storage for messages in kernel space */ + HV_MsgState *state = &__get_cpu_var(msg_state); + int rc = hv_register_message_state(state); + if (rc != HV_OK) + panic("hv_register_message_state: error %d", rc); + + /* Make sure downcall interrupts will be enabled. */ + arch_local_irq_unmask(INT_INTCTRL_K); +} + +void hv_message_intr(struct pt_regs *regs, int intnum) +{ + /* + * We enter with interrupts disabled and leave them disabled, + * to match expectations of called functions (e.g. + * do_ccupdate_local() in mm/slab.c). This is also consistent + * with normal call entry for device interrupts. + */ + + int message[HV_MAX_MESSAGE_SIZE/sizeof(int)]; + HV_RcvMsgInfo rmi; + int nmsgs = 0; + + /* Track time spent here in an interrupt context */ + struct pt_regs *old_regs = set_irq_regs(regs); + irq_enter(); + +#ifdef CONFIG_DEBUG_STACKOVERFLOW + /* Debugging check for stack overflow: less than 1/8th stack free? */ + { + long sp = stack_pointer - (long) current_thread_info(); + if (unlikely(sp < (sizeof(struct thread_info) + STACK_WARN))) { + pr_emerg("hv_message_intr: " + "stack overflow: %ld\n", + sp - sizeof(struct thread_info)); + dump_stack(); + } + } +#endif + + while (1) { + rmi = hv_receive_message(__get_cpu_var(msg_state), + (HV_VirtAddr) message, + sizeof(message)); + if (rmi.msglen == 0) + break; + + if (rmi.msglen < 0) + panic("hv_receive_message failed: %d", rmi.msglen); + + ++nmsgs; + + if (rmi.source == HV_MSG_TILE) { + int tag; + + /* we just send tags for now */ + BUG_ON(rmi.msglen != sizeof(int)); + + tag = message[0]; +#ifdef CONFIG_SMP + evaluate_message(message[0]); +#else + panic("Received IPI message %d in UP mode", tag); +#endif + } else if (rmi.source == HV_MSG_INTR) { + HV_IntrMsg *him = (HV_IntrMsg *)message; + struct hv_driver_cb *cb = + (struct hv_driver_cb *)him->intarg; + cb->callback(cb, him->intdata); + __get_cpu_var(irq_stat).irq_hv_msg_count++; + } + } + + /* + * We shouldn't have gotten a message downcall with no + * messages available. + */ + if (nmsgs == 0) + panic("Message downcall invoked with no messages!"); + + /* + * Track time spent against the current process again and + * process any softirqs if they are waiting. + */ + irq_exit(); + set_irq_regs(old_regs); +} diff --git a/arch/tile/kernel/module.c b/arch/tile/kernel/module.c new file mode 100644 index 00000000..98d47692 --- /dev/null +++ b/arch/tile/kernel/module.c @@ -0,0 +1,234 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * Based on i386 version, copyright (C) 2001 Rusty Russell. + */ + +#include <linux/moduleloader.h> +#include <linux/elf.h> +#include <linux/vmalloc.h> +#include <linux/fs.h> +#include <linux/string.h> +#include <linux/kernel.h> +#include <asm/pgtable.h> +#include <asm/homecache.h> +#include <arch/opcode.h> + +#ifdef __tilegx__ +# define Elf_Rela Elf64_Rela +# define ELF_R_SYM ELF64_R_SYM +# define ELF_R_TYPE ELF64_R_TYPE +#else +# define Elf_Rela Elf32_Rela +# define ELF_R_SYM ELF32_R_SYM +# define ELF_R_TYPE ELF32_R_TYPE +#endif + +#ifdef MODULE_DEBUG +#define DEBUGP printk +#else +#define DEBUGP(fmt...) +#endif + +/* + * Allocate some address space in the range MEM_MODULE_START to + * MEM_MODULE_END and populate it with memory. + */ +void *module_alloc(unsigned long size) +{ + struct page **pages; + pgprot_t prot_rwx = __pgprot(_PAGE_KERNEL | _PAGE_KERNEL_EXEC); + struct vm_struct *area; + int i = 0; + int npages; + + if (size == 0) + return NULL; + npages = (size + PAGE_SIZE - 1) / PAGE_SIZE; + pages = kmalloc(npages * sizeof(struct page *), GFP_KERNEL); + if (pages == NULL) + return NULL; + for (; i < npages; ++i) { + pages[i] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM); + if (!pages[i]) + goto error; + } + + area = __get_vm_area(size, VM_ALLOC, MEM_MODULE_START, MEM_MODULE_END); + if (!area) + goto error; + area->nr_pages = npages; + area->pages = pages; + + if (map_vm_area(area, prot_rwx, &pages)) { + vunmap(area->addr); + goto error; + } + + return area->addr; + +error: + while (--i >= 0) + __free_page(pages[i]); + kfree(pages); + return NULL; +} + + +/* Free memory returned from module_alloc */ +void module_free(struct module *mod, void *module_region) +{ + vfree(module_region); + + /* Globally flush the L1 icache. */ + flush_remote(0, HV_FLUSH_EVICT_L1I, cpu_online_mask, + 0, 0, 0, NULL, NULL, 0); + + /* + * FIXME: If module_region == mod->module_init, trim exception + * table entries. + */ +} + +#ifdef __tilegx__ +/* + * Validate that the high 16 bits of "value" is just the sign-extension of + * the low 48 bits. + */ +static int validate_hw2_last(long value, struct module *me) +{ + if (((value << 16) >> 16) != value) { + pr_warning("module %s: Out of range HW2_LAST value %#lx\n", + me->name, value); + return 0; + } + return 1; +} + +/* + * Validate that "value" isn't too big to hold in a JumpOff relocation. + */ +static int validate_jumpoff(long value) +{ + /* Determine size of jump offset. */ + int shift = __builtin_clzl(get_JumpOff_X1(create_JumpOff_X1(-1))); + + /* Check to see if it fits into the relocation slot. */ + long f = get_JumpOff_X1(create_JumpOff_X1(value)); + f = (f << shift) >> shift; + + return f == value; +} +#endif + +int apply_relocate_add(Elf_Shdr *sechdrs, + const char *strtab, + unsigned int symindex, + unsigned int relsec, + struct module *me) +{ + unsigned int i; + Elf_Rela *rel = (void *)sechdrs[relsec].sh_addr; + Elf_Sym *sym; + u64 *location; + unsigned long value; + + DEBUGP("Applying relocate section %u to %u\n", relsec, + sechdrs[relsec].sh_info); + for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) { + /* This is where to make the change */ + location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr + + rel[i].r_offset; + /* + * This is the symbol it is referring to. + * Note that all undefined symbols have been resolved. + */ + sym = (Elf_Sym *)sechdrs[symindex].sh_addr + + ELF_R_SYM(rel[i].r_info); + value = sym->st_value + rel[i].r_addend; + + switch (ELF_R_TYPE(rel[i].r_info)) { + +#define MUNGE(func) (*location = ((*location & ~func(-1)) | func(value))) + +#ifndef __tilegx__ + case R_TILE_32: + *(uint32_t *)location = value; + break; + case R_TILE_IMM16_X0_HA: + value = (value + 0x8000) >> 16; + /*FALLTHROUGH*/ + case R_TILE_IMM16_X0_LO: + MUNGE(create_Imm16_X0); + break; + case R_TILE_IMM16_X1_HA: + value = (value + 0x8000) >> 16; + /*FALLTHROUGH*/ + case R_TILE_IMM16_X1_LO: + MUNGE(create_Imm16_X1); + break; + case R_TILE_JOFFLONG_X1: + value -= (unsigned long) location; /* pc-relative */ + value = (long) value >> 3; /* count by instrs */ + MUNGE(create_JOffLong_X1); + break; +#else + case R_TILEGX_64: + *location = value; + break; + case R_TILEGX_IMM16_X0_HW2_LAST: + if (!validate_hw2_last(value, me)) + return -ENOEXEC; + value >>= 16; + /*FALLTHROUGH*/ + case R_TILEGX_IMM16_X0_HW1: + value >>= 16; + /*FALLTHROUGH*/ + case R_TILEGX_IMM16_X0_HW0: + MUNGE(create_Imm16_X0); + break; + case R_TILEGX_IMM16_X1_HW2_LAST: + if (!validate_hw2_last(value, me)) + return -ENOEXEC; + value >>= 16; + /*FALLTHROUGH*/ + case R_TILEGX_IMM16_X1_HW1: + value >>= 16; + /*FALLTHROUGH*/ + case R_TILEGX_IMM16_X1_HW0: + MUNGE(create_Imm16_X1); + break; + case R_TILEGX_JUMPOFF_X1: + value -= (unsigned long) location; /* pc-relative */ + value = (long) value >> 3; /* count by instrs */ + if (!validate_jumpoff(value)) { + pr_warning("module %s: Out of range jump to" + " %#llx at %#llx (%p)\n", me->name, + sym->st_value + rel[i].r_addend, + rel[i].r_offset, location); + return -ENOEXEC; + } + MUNGE(create_JumpOff_X1); + break; +#endif + +#undef MUNGE + + default: + pr_err("module %s: Unknown relocation: %d\n", + me->name, (int) ELF_R_TYPE(rel[i].r_info)); + return -ENOEXEC; + } + } + return 0; +} diff --git a/arch/tile/kernel/pci-dma.c b/arch/tile/kernel/pci-dma.c new file mode 100644 index 00000000..b3ed19f8 --- /dev/null +++ b/arch/tile/kernel/pci-dma.c @@ -0,0 +1,252 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#include <linux/mm.h> +#include <linux/dma-mapping.h> +#include <linux/vmalloc.h> +#include <linux/export.h> +#include <asm/tlbflush.h> +#include <asm/homecache.h> + +/* Generic DMA mapping functions: */ + +/* + * Allocate what Linux calls "coherent" memory, which for us just + * means uncached. + */ +void *dma_alloc_coherent(struct device *dev, + size_t size, + dma_addr_t *dma_handle, + gfp_t gfp) +{ + u64 dma_mask = dev->coherent_dma_mask ?: DMA_BIT_MASK(32); + int node = dev_to_node(dev); + int order = get_order(size); + struct page *pg; + dma_addr_t addr; + + gfp |= __GFP_ZERO; + + /* + * By forcing NUMA node 0 for 32-bit masks we ensure that the + * high 32 bits of the resulting PA will be zero. If the mask + * size is, e.g., 24, we may still not be able to guarantee a + * suitable memory address, in which case we will return NULL. + * But such devices are uncommon. + */ + if (dma_mask <= DMA_BIT_MASK(32)) + node = 0; + + pg = homecache_alloc_pages_node(node, gfp, order, PAGE_HOME_UNCACHED); + if (pg == NULL) + return NULL; + + addr = page_to_phys(pg); + if (addr + size > dma_mask) { + homecache_free_pages(addr, order); + return NULL; + } + + *dma_handle = addr; + return page_address(pg); +} +EXPORT_SYMBOL(dma_alloc_coherent); + +/* + * Free memory that was allocated with dma_alloc_coherent. + */ +void dma_free_coherent(struct device *dev, size_t size, + void *vaddr, dma_addr_t dma_handle) +{ + homecache_free_pages((unsigned long)vaddr, get_order(size)); +} +EXPORT_SYMBOL(dma_free_coherent); + +/* + * The map routines "map" the specified address range for DMA + * accesses. The memory belongs to the device after this call is + * issued, until it is unmapped with dma_unmap_single. + * + * We don't need to do any mapping, we just flush the address range + * out of the cache and return a DMA address. + * + * The unmap routines do whatever is necessary before the processor + * accesses the memory again, and must be called before the driver + * touches the memory. We can get away with a cache invalidate if we + * can count on nothing having been touched. + */ + +/* Flush a PA range from cache page by page. */ +static void __dma_map_pa_range(dma_addr_t dma_addr, size_t size) +{ + struct page *page = pfn_to_page(PFN_DOWN(dma_addr)); + size_t bytesleft = PAGE_SIZE - (dma_addr & (PAGE_SIZE - 1)); + + while ((ssize_t)size > 0) { + /* Flush the page. */ + homecache_flush_cache(page++, 0); + + /* Figure out if we need to continue on the next page. */ + size -= bytesleft; + bytesleft = PAGE_SIZE; + } +} + +/* + * dma_map_single can be passed any memory address, and there appear + * to be no alignment constraints. + * + * There is a chance that the start of the buffer will share a cache + * line with some other data that has been touched in the meantime. + */ +dma_addr_t dma_map_single(struct device *dev, void *ptr, size_t size, + enum dma_data_direction direction) +{ + dma_addr_t dma_addr = __pa(ptr); + + BUG_ON(!valid_dma_direction(direction)); + WARN_ON(size == 0); + + __dma_map_pa_range(dma_addr, size); + + return dma_addr; +} +EXPORT_SYMBOL(dma_map_single); + +void dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size, + enum dma_data_direction direction) +{ + BUG_ON(!valid_dma_direction(direction)); +} +EXPORT_SYMBOL(dma_unmap_single); + +int dma_map_sg(struct device *dev, struct scatterlist *sglist, int nents, + enum dma_data_direction direction) +{ + struct scatterlist *sg; + int i; + + BUG_ON(!valid_dma_direction(direction)); + + WARN_ON(nents == 0 || sglist->length == 0); + + for_each_sg(sglist, sg, nents, i) { + sg->dma_address = sg_phys(sg); + __dma_map_pa_range(sg->dma_address, sg->length); + } + + return nents; +} +EXPORT_SYMBOL(dma_map_sg); + +void dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nhwentries, + enum dma_data_direction direction) +{ + BUG_ON(!valid_dma_direction(direction)); +} +EXPORT_SYMBOL(dma_unmap_sg); + +dma_addr_t dma_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction direction) +{ + BUG_ON(!valid_dma_direction(direction)); + + BUG_ON(offset + size > PAGE_SIZE); + homecache_flush_cache(page, 0); + + return page_to_pa(page) + offset; +} +EXPORT_SYMBOL(dma_map_page); + +void dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size, + enum dma_data_direction direction) +{ + BUG_ON(!valid_dma_direction(direction)); +} +EXPORT_SYMBOL(dma_unmap_page); + +void dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, + size_t size, enum dma_data_direction direction) +{ + BUG_ON(!valid_dma_direction(direction)); +} +EXPORT_SYMBOL(dma_sync_single_for_cpu); + +void dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, + size_t size, enum dma_data_direction direction) +{ + unsigned long start = PFN_DOWN(dma_handle); + unsigned long end = PFN_DOWN(dma_handle + size - 1); + unsigned long i; + + BUG_ON(!valid_dma_direction(direction)); + for (i = start; i <= end; ++i) + homecache_flush_cache(pfn_to_page(i), 0); +} +EXPORT_SYMBOL(dma_sync_single_for_device); + +void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems, + enum dma_data_direction direction) +{ + BUG_ON(!valid_dma_direction(direction)); + WARN_ON(nelems == 0 || sg[0].length == 0); +} +EXPORT_SYMBOL(dma_sync_sg_for_cpu); + +/* + * Flush and invalidate cache for scatterlist. + */ +void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sglist, + int nelems, enum dma_data_direction direction) +{ + struct scatterlist *sg; + int i; + + BUG_ON(!valid_dma_direction(direction)); + WARN_ON(nelems == 0 || sglist->length == 0); + + for_each_sg(sglist, sg, nelems, i) { + dma_sync_single_for_device(dev, sg->dma_address, + sg_dma_len(sg), direction); + } +} +EXPORT_SYMBOL(dma_sync_sg_for_device); + +void dma_sync_single_range_for_cpu(struct device *dev, dma_addr_t dma_handle, + unsigned long offset, size_t size, + enum dma_data_direction direction) +{ + dma_sync_single_for_cpu(dev, dma_handle + offset, size, direction); +} +EXPORT_SYMBOL(dma_sync_single_range_for_cpu); + +void dma_sync_single_range_for_device(struct device *dev, + dma_addr_t dma_handle, + unsigned long offset, size_t size, + enum dma_data_direction direction) +{ + dma_sync_single_for_device(dev, dma_handle + offset, size, direction); +} +EXPORT_SYMBOL(dma_sync_single_range_for_device); + +/* + * dma_alloc_noncoherent() returns non-cacheable memory, so there's no + * need to do any flushing here. + */ +void dma_cache_sync(struct device *dev, void *vaddr, size_t size, + enum dma_data_direction direction) +{ +} +EXPORT_SYMBOL(dma_cache_sync); diff --git a/arch/tile/kernel/pci.c b/arch/tile/kernel/pci.c new file mode 100644 index 00000000..b56d12bf --- /dev/null +++ b/arch/tile/kernel/pci.c @@ -0,0 +1,632 @@ +/* + * Copyright 2011 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#include <linux/kernel.h> +#include <linux/pci.h> +#include <linux/delay.h> +#include <linux/string.h> +#include <linux/init.h> +#include <linux/capability.h> +#include <linux/sched.h> +#include <linux/errno.h> +#include <linux/bootmem.h> +#include <linux/irq.h> +#include <linux/io.h> +#include <linux/uaccess.h> +#include <linux/export.h> + +#include <asm/processor.h> +#include <asm/sections.h> +#include <asm/byteorder.h> +#include <asm/hv_driver.h> +#include <hv/drv_pcie_rc_intf.h> + + +/* + * Initialization flow and process + * ------------------------------- + * + * This files contains the routines to search for PCI buses, + * enumerate the buses, and configure any attached devices. + * + * There are two entry points here: + * 1) tile_pci_init + * This sets up the pci_controller structs, and opens the + * FDs to the hypervisor. This is called from setup_arch() early + * in the boot process. + * 2) pcibios_init + * This probes the PCI bus(es) for any attached hardware. It's + * called by subsys_initcall. All of the real work is done by the + * generic Linux PCI layer. + * + */ + +/* + * This flag tells if the platform is TILEmpower that needs + * special configuration for the PLX switch chip. + */ +int __write_once tile_plx_gen1; + +static struct pci_controller controllers[TILE_NUM_PCIE]; +static int num_controllers; +static int pci_scan_flags[TILE_NUM_PCIE]; + +static struct pci_ops tile_cfg_ops; + + +/* + * We don't need to worry about the alignment of resources. + */ +resource_size_t pcibios_align_resource(void *data, const struct resource *res, + resource_size_t size, resource_size_t align) +{ + return res->start; +} +EXPORT_SYMBOL(pcibios_align_resource); + +/* + * Open a FD to the hypervisor PCI device. + * + * controller_id is the controller number, config type is 0 or 1 for + * config0 or config1 operations. + */ +static int __devinit tile_pcie_open(int controller_id, int config_type) +{ + char filename[32]; + int fd; + + sprintf(filename, "pcie/%d/config%d", controller_id, config_type); + + fd = hv_dev_open((HV_VirtAddr)filename, 0); + + return fd; +} + + +/* + * Get the IRQ numbers from the HV and set up the handlers for them. + */ +static int __devinit tile_init_irqs(int controller_id, + struct pci_controller *controller) +{ + char filename[32]; + int fd; + int ret; + int x; + struct pcie_rc_config rc_config; + + sprintf(filename, "pcie/%d/ctl", controller_id); + fd = hv_dev_open((HV_VirtAddr)filename, 0); + if (fd < 0) { + pr_err("PCI: hv_dev_open(%s) failed\n", filename); + return -1; + } + ret = hv_dev_pread(fd, 0, (HV_VirtAddr)(&rc_config), + sizeof(rc_config), PCIE_RC_CONFIG_MASK_OFF); + hv_dev_close(fd); + if (ret != sizeof(rc_config)) { + pr_err("PCI: wanted %zd bytes, got %d\n", + sizeof(rc_config), ret); + return -1; + } + /* Record irq_base so that we can map INTx to IRQ # later. */ + controller->irq_base = rc_config.intr; + + for (x = 0; x < 4; x++) + tile_irq_activate(rc_config.intr + x, + TILE_IRQ_HW_CLEAR); + + if (rc_config.plx_gen1) + controller->plx_gen1 = 1; + + return 0; +} + +/* + * First initialization entry point, called from setup_arch(). + * + * Find valid controllers and fill in pci_controller structs for each + * of them. + * + * Returns the number of controllers discovered. + */ +int __init tile_pci_init(void) +{ + int i; + + pr_info("PCI: Searching for controllers...\n"); + + /* Re-init number of PCIe controllers to support hot-plug feature. */ + num_controllers = 0; + + /* Do any configuration we need before using the PCIe */ + + for (i = 0; i < TILE_NUM_PCIE; i++) { + /* + * To see whether we need a real config op based on + * the results of pcibios_init(), to support PCIe hot-plug. + */ + if (pci_scan_flags[i] == 0) { + int hv_cfg_fd0 = -1; + int hv_cfg_fd1 = -1; + int hv_mem_fd = -1; + char name[32]; + struct pci_controller *controller; + + /* + * Open the fd to the HV. If it fails then this + * device doesn't exist. + */ + hv_cfg_fd0 = tile_pcie_open(i, 0); + if (hv_cfg_fd0 < 0) + continue; + hv_cfg_fd1 = tile_pcie_open(i, 1); + if (hv_cfg_fd1 < 0) { + pr_err("PCI: Couldn't open config fd to HV " + "for controller %d\n", i); + goto err_cont; + } + + sprintf(name, "pcie/%d/mem", i); + hv_mem_fd = hv_dev_open((HV_VirtAddr)name, 0); + if (hv_mem_fd < 0) { + pr_err("PCI: Could not open mem fd to HV!\n"); + goto err_cont; + } + + pr_info("PCI: Found PCI controller #%d\n", i); + + controller = &controllers[i]; + + controller->index = i; + controller->hv_cfg_fd[0] = hv_cfg_fd0; + controller->hv_cfg_fd[1] = hv_cfg_fd1; + controller->hv_mem_fd = hv_mem_fd; + controller->first_busno = 0; + controller->last_busno = 0xff; + controller->ops = &tile_cfg_ops; + + num_controllers++; + continue; + +err_cont: + if (hv_cfg_fd0 >= 0) + hv_dev_close(hv_cfg_fd0); + if (hv_cfg_fd1 >= 0) + hv_dev_close(hv_cfg_fd1); + if (hv_mem_fd >= 0) + hv_dev_close(hv_mem_fd); + continue; + } + } + + /* + * Before using the PCIe, see if we need to do any platform-specific + * configuration, such as the PLX switch Gen 1 issue on TILEmpower. + */ + for (i = 0; i < num_controllers; i++) { + struct pci_controller *controller = &controllers[i]; + + if (controller->plx_gen1) + tile_plx_gen1 = 1; + } + + return num_controllers; +} + +/* + * (pin - 1) converts from the PCI standard's [1:4] convention to + * a normal [0:3] range. + */ +static int tile_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +{ + struct pci_controller *controller = + (struct pci_controller *)dev->sysdata; + return (pin - 1) + controller->irq_base; +} + + +static void __devinit fixup_read_and_payload_sizes(void) +{ + struct pci_dev *dev = NULL; + int smallest_max_payload = 0x1; /* Tile maxes out at 256 bytes. */ + int max_read_size = 0x2; /* Limit to 512 byte reads. */ + u16 new_values; + + /* Scan for the smallest maximum payload size. */ + while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { + int pcie_caps_offset; + u32 devcap; + int max_payload; + + pcie_caps_offset = pci_find_capability(dev, PCI_CAP_ID_EXP); + if (pcie_caps_offset == 0) + continue; + + pci_read_config_dword(dev, pcie_caps_offset + PCI_EXP_DEVCAP, + &devcap); + max_payload = devcap & PCI_EXP_DEVCAP_PAYLOAD; + if (max_payload < smallest_max_payload) + smallest_max_payload = max_payload; + } + + /* Now, set the max_payload_size for all devices to that value. */ + new_values = (max_read_size << 12) | (smallest_max_payload << 5); + while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { + int pcie_caps_offset; + u16 devctl; + + pcie_caps_offset = pci_find_capability(dev, PCI_CAP_ID_EXP); + if (pcie_caps_offset == 0) + continue; + + pci_read_config_word(dev, pcie_caps_offset + PCI_EXP_DEVCTL, + &devctl); + devctl &= ~(PCI_EXP_DEVCTL_PAYLOAD | PCI_EXP_DEVCTL_READRQ); + devctl |= new_values; + pci_write_config_word(dev, pcie_caps_offset + PCI_EXP_DEVCTL, + devctl); + } +} + + +/* + * Second PCI initialization entry point, called by subsys_initcall. + * + * The controllers have been set up by the time we get here, by a call to + * tile_pci_init. + */ +int __init pcibios_init(void) +{ + int i; + + pr_info("PCI: Probing PCI hardware\n"); + + /* + * Delay a bit in case devices aren't ready. Some devices are + * known to require at least 20ms here, but we use a more + * conservative value. + */ + mdelay(250); + + /* Scan all of the recorded PCI controllers. */ + for (i = 0; i < TILE_NUM_PCIE; i++) { + /* + * Do real pcibios init ops if the controller is initialized + * by tile_pci_init() successfully and not initialized by + * pcibios_init() yet to support PCIe hot-plug. + */ + if (pci_scan_flags[i] == 0 && controllers[i].ops != NULL) { + struct pci_controller *controller = &controllers[i]; + struct pci_bus *bus; + + if (tile_init_irqs(i, controller)) { + pr_err("PCI: Could not initialize IRQs\n"); + continue; + } + + pr_info("PCI: initializing controller #%d\n", i); + + /* + * This comes from the generic Linux PCI driver. + * + * It reads the PCI tree for this bus into the Linux + * data structures. + * + * This is inlined in linux/pci.h and calls into + * pci_scan_bus_parented() in probe.c. + */ + bus = pci_scan_bus(0, controller->ops, controller); + controller->root_bus = bus; + controller->last_busno = bus->subordinate; + } + } + + /* Do machine dependent PCI interrupt routing */ + pci_fixup_irqs(pci_common_swizzle, tile_map_irq); + + /* + * This comes from the generic Linux PCI driver. + * + * It allocates all of the resources (I/O memory, etc) + * associated with the devices read in above. + */ + pci_assign_unassigned_resources(); + + /* Configure the max_read_size and max_payload_size values. */ + fixup_read_and_payload_sizes(); + + /* Record the I/O resources in the PCI controller structure. */ + for (i = 0; i < TILE_NUM_PCIE; i++) { + /* + * Do real pcibios init ops if the controller is initialized + * by tile_pci_init() successfully and not initialized by + * pcibios_init() yet to support PCIe hot-plug. + */ + if (pci_scan_flags[i] == 0 && controllers[i].ops != NULL) { + struct pci_bus *root_bus = controllers[i].root_bus; + struct pci_bus *next_bus; + struct pci_dev *dev; + + list_for_each_entry(dev, &root_bus->devices, bus_list) { + /* + * Find the PCI host controller, ie. the 1st + * bridge. + */ + if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && + (PCI_SLOT(dev->devfn) == 0)) { + next_bus = dev->subordinate; + controllers[i].mem_resources[0] = + *next_bus->resource[0]; + controllers[i].mem_resources[1] = + *next_bus->resource[1]; + controllers[i].mem_resources[2] = + *next_bus->resource[2]; + + /* Setup flags. */ + pci_scan_flags[i] = 1; + + break; + } + } + } + } + + return 0; +} +subsys_initcall(pcibios_init); + +/* + * No bus fixups needed. + */ +void __devinit pcibios_fixup_bus(struct pci_bus *bus) +{ + /* Nothing needs to be done. */ +} + +void pcibios_set_master(struct pci_dev *dev) +{ + /* No special bus mastering setup handling. */ +} + +/* + * This can be called from the generic PCI layer, but doesn't need to + * do anything. + */ +char __devinit *pcibios_setup(char *str) +{ + /* Nothing needs to be done. */ + return str; +} + +/* + * This is called from the generic Linux layer. + */ +void __devinit pcibios_update_irq(struct pci_dev *dev, int irq) +{ + pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq); +} + +/* + * Enable memory and/or address decoding, as appropriate, for the + * device described by the 'dev' struct. + * + * This is called from the generic PCI layer, and can be called + * for bridges or endpoints. + */ +int pcibios_enable_device(struct pci_dev *dev, int mask) +{ + u16 cmd, old_cmd; + u8 header_type; + int i; + struct resource *r; + + pci_read_config_byte(dev, PCI_HEADER_TYPE, &header_type); + + pci_read_config_word(dev, PCI_COMMAND, &cmd); + old_cmd = cmd; + if ((header_type & 0x7F) == PCI_HEADER_TYPE_BRIDGE) { + /* + * For bridges, we enable both memory and I/O decoding + * in call cases. + */ + cmd |= PCI_COMMAND_IO; + cmd |= PCI_COMMAND_MEMORY; + } else { + /* + * For endpoints, we enable memory and/or I/O decoding + * only if they have a memory resource of that type. + */ + for (i = 0; i < 6; i++) { + r = &dev->resource[i]; + if (r->flags & IORESOURCE_UNSET) { + pr_err("PCI: Device %s not available " + "because of resource collisions\n", + pci_name(dev)); + return -EINVAL; + } + if (r->flags & IORESOURCE_IO) + cmd |= PCI_COMMAND_IO; + if (r->flags & IORESOURCE_MEM) + cmd |= PCI_COMMAND_MEMORY; + } + } + + /* + * We only write the command if it changed. + */ + if (cmd != old_cmd) + pci_write_config_word(dev, PCI_COMMAND, cmd); + return 0; +} + +/**************************************************************** + * + * Tile PCI config space read/write routines + * + ****************************************************************/ + +/* + * These are the normal read and write ops + * These are expanded with macros from pci_bus_read_config_byte() etc. + * + * devfn is the combined PCI slot & function. + * + * offset is in bytes, from the start of config space for the + * specified bus & slot. + */ + +static int __devinit tile_cfg_read(struct pci_bus *bus, + unsigned int devfn, + int offset, + int size, + u32 *val) +{ + struct pci_controller *controller = bus->sysdata; + int busnum = bus->number & 0xff; + int slot = (devfn >> 3) & 0x1f; + int function = devfn & 0x7; + u32 addr; + int config_mode = 1; + + /* + * There is no bridge between the Tile and bus 0, so we + * use config0 to talk to bus 0. + * + * If we're talking to a bus other than zero then we + * must have found a bridge. + */ + if (busnum == 0) { + /* + * We fake an empty slot for (busnum == 0) && (slot > 0), + * since there is only one slot on bus 0. + */ + if (slot) { + *val = 0xFFFFFFFF; + return 0; + } + config_mode = 0; + } + + addr = busnum << 20; /* Bus in 27:20 */ + addr |= slot << 15; /* Slot (device) in 19:15 */ + addr |= function << 12; /* Function is in 14:12 */ + addr |= (offset & 0xFFF); /* byte address in 0:11 */ + + return hv_dev_pread(controller->hv_cfg_fd[config_mode], 0, + (HV_VirtAddr)(val), size, addr); +} + + +/* + * See tile_cfg_read() for relevant comments. + * Note that "val" is the value to write, not a pointer to that value. + */ +static int __devinit tile_cfg_write(struct pci_bus *bus, + unsigned int devfn, + int offset, + int size, + u32 val) +{ + struct pci_controller *controller = bus->sysdata; + int busnum = bus->number & 0xff; + int slot = (devfn >> 3) & 0x1f; + int function = devfn & 0x7; + u32 addr; + int config_mode = 1; + HV_VirtAddr valp = (HV_VirtAddr)&val; + + /* + * For bus 0 slot 0 we use config 0 accesses. + */ + if (busnum == 0) { + /* + * We fake an empty slot for (busnum == 0) && (slot > 0), + * since there is only one slot on bus 0. + */ + if (slot) + return 0; + config_mode = 0; + } + + addr = busnum << 20; /* Bus in 27:20 */ + addr |= slot << 15; /* Slot (device) in 19:15 */ + addr |= function << 12; /* Function is in 14:12 */ + addr |= (offset & 0xFFF); /* byte address in 0:11 */ + +#ifdef __BIG_ENDIAN + /* Point to the correct part of the 32-bit "val". */ + valp += 4 - size; +#endif + + return hv_dev_pwrite(controller->hv_cfg_fd[config_mode], 0, + valp, size, addr); +} + + +static struct pci_ops tile_cfg_ops = { + .read = tile_cfg_read, + .write = tile_cfg_write, +}; + + +/* + * In the following, each PCI controller's mem_resources[1] + * represents its (non-prefetchable) PCI memory resource. + * mem_resources[0] and mem_resources[2] refer to its PCI I/O and + * prefetchable PCI memory resources, respectively. + * For more details, see pci_setup_bridge() in setup-bus.c. + * By comparing the target PCI memory address against the + * end address of controller 0, we can determine the controller + * that should accept the PCI memory access. + */ +#define TILE_READ(size, type) \ +type _tile_read##size(unsigned long addr) \ +{ \ + type val; \ + int idx = 0; \ + if (addr > controllers[0].mem_resources[1].end && \ + addr > controllers[0].mem_resources[2].end) \ + idx = 1; \ + if (hv_dev_pread(controllers[idx].hv_mem_fd, 0, \ + (HV_VirtAddr)(&val), sizeof(type), addr)) \ + pr_err("PCI: read %zd bytes at 0x%lX failed\n", \ + sizeof(type), addr); \ + return val; \ +} \ +EXPORT_SYMBOL(_tile_read##size) + +TILE_READ(b, u8); +TILE_READ(w, u16); +TILE_READ(l, u32); +TILE_READ(q, u64); + +#define TILE_WRITE(size, type) \ +void _tile_write##size(type val, unsigned long addr) \ +{ \ + int idx = 0; \ + if (addr > controllers[0].mem_resources[1].end && \ + addr > controllers[0].mem_resources[2].end) \ + idx = 1; \ + if (hv_dev_pwrite(controllers[idx].hv_mem_fd, 0, \ + (HV_VirtAddr)(&val), sizeof(type), addr)) \ + pr_err("PCI: write %zd bytes at 0x%lX failed\n", \ + sizeof(type), addr); \ +} \ +EXPORT_SYMBOL(_tile_write##size) + +TILE_WRITE(b, u8); +TILE_WRITE(w, u16); +TILE_WRITE(l, u32); +TILE_WRITE(q, u64); diff --git a/arch/tile/kernel/proc.c b/arch/tile/kernel/proc.c new file mode 100644 index 00000000..446a7f52 --- /dev/null +++ b/arch/tile/kernel/proc.c @@ -0,0 +1,162 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#include <linux/smp.h> +#include <linux/seq_file.h> +#include <linux/threads.h> +#include <linux/cpumask.h> +#include <linux/timex.h> +#include <linux/delay.h> +#include <linux/fs.h> +#include <linux/proc_fs.h> +#include <linux/sysctl.h> +#include <linux/hardirq.h> +#include <linux/mman.h> +#include <asm/unaligned.h> +#include <asm/pgtable.h> +#include <asm/processor.h> +#include <asm/sections.h> +#include <asm/homecache.h> +#include <asm/hardwall.h> +#include <arch/chip.h> + + +/* + * Support /proc/cpuinfo + */ + +#define cpu_to_ptr(n) ((void *)((long)(n)+1)) +#define ptr_to_cpu(p) ((long)(p) - 1) + +static int show_cpuinfo(struct seq_file *m, void *v) +{ + int n = ptr_to_cpu(v); + + if (n == 0) { + char buf[NR_CPUS*5]; + cpulist_scnprintf(buf, sizeof(buf), cpu_online_mask); + seq_printf(m, "cpu count\t: %d\n", num_online_cpus()); + seq_printf(m, "cpu list\t: %s\n", buf); + seq_printf(m, "model name\t: %s\n", chip_model); + seq_printf(m, "flags\t\t:\n"); /* nothing for now */ + seq_printf(m, "cpu MHz\t\t: %llu.%06llu\n", + get_clock_rate() / 1000000, + (get_clock_rate() % 1000000)); + seq_printf(m, "bogomips\t: %lu.%02lu\n\n", + loops_per_jiffy/(500000/HZ), + (loops_per_jiffy/(5000/HZ)) % 100); + } + +#ifdef CONFIG_SMP + if (!cpu_online(n)) + return 0; +#endif + + seq_printf(m, "processor\t: %d\n", n); + + /* Print only num_online_cpus() blank lines total. */ + if (cpumask_next(n, cpu_online_mask) < nr_cpu_ids) + seq_printf(m, "\n"); + + return 0; +} + +static void *c_start(struct seq_file *m, loff_t *pos) +{ + return *pos < nr_cpu_ids ? cpu_to_ptr(*pos) : NULL; +} +static void *c_next(struct seq_file *m, void *v, loff_t *pos) +{ + ++*pos; + return c_start(m, pos); +} +static void c_stop(struct seq_file *m, void *v) +{ +} +const struct seq_operations cpuinfo_op = { + .start = c_start, + .next = c_next, + .stop = c_stop, + .show = show_cpuinfo, +}; + +/* + * Support /proc/tile directory + */ + +static int __init proc_tile_init(void) +{ + struct proc_dir_entry *root = proc_mkdir("tile", NULL); + if (root == NULL) + return 0; + + proc_tile_hardwall_init(root); + + return 0; +} + +arch_initcall(proc_tile_init); + +/* + * Support /proc/sys/tile directory + */ + +#ifndef __tilegx__ /* FIXME: GX: no support for unaligned access yet */ +static ctl_table unaligned_subtable[] = { + { + .procname = "enabled", + .data = &unaligned_fixup, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + { + .procname = "printk", + .data = &unaligned_printk, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + { + .procname = "count", + .data = &unaligned_fixup_count, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + {} +}; + +static ctl_table unaligned_table[] = { + { + .procname = "unaligned_fixup", + .mode = 0555, + .child = unaligned_subtable + }, + {} +}; + +static struct ctl_path tile_path[] = { + { .procname = "tile" }, + { } +}; + +static int __init proc_sys_tile_init(void) +{ + register_sysctl_paths(tile_path, unaligned_table); + return 0; +} + +arch_initcall(proc_sys_tile_init); +#endif diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c new file mode 100644 index 00000000..54e6c64b --- /dev/null +++ b/arch/tile/kernel/process.c @@ -0,0 +1,749 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#include <linux/sched.h> +#include <linux/preempt.h> +#include <linux/module.h> +#include <linux/fs.h> +#include <linux/kprobes.h> +#include <linux/elfcore.h> +#include <linux/tick.h> +#include <linux/init.h> +#include <linux/mm.h> +#include <linux/compat.h> +#include <linux/hardirq.h> +#include <linux/syscalls.h> +#include <linux/kernel.h> +#include <linux/tracehook.h> +#include <linux/signal.h> +#include <asm/stack.h> +#include <asm/switch_to.h> +#include <asm/homecache.h> +#include <asm/syscalls.h> +#include <asm/traps.h> +#include <asm/setup.h> +#ifdef CONFIG_HARDWALL +#include <asm/hardwall.h> +#endif +#include <arch/chip.h> +#include <arch/abi.h> +#include <arch/sim_def.h> + + +/* + * Use the (x86) "idle=poll" option to prefer low latency when leaving the + * idle loop over low power while in the idle loop, e.g. if we have + * one thread per core and we want to get threads out of futex waits fast. + */ +static int no_idle_nap; +static int __init idle_setup(char *str) +{ + if (!str) + return -EINVAL; + + if (!strcmp(str, "poll")) { + pr_info("using polling idle threads.\n"); + no_idle_nap = 1; + } else if (!strcmp(str, "halt")) + no_idle_nap = 0; + else + return -1; + + return 0; +} +early_param("idle", idle_setup); + +/* + * The idle thread. There's no useful work to be + * done, so just try to conserve power and have a + * low exit latency (ie sit in a loop waiting for + * somebody to say that they'd like to reschedule) + */ +void cpu_idle(void) +{ + int cpu = smp_processor_id(); + + + current_thread_info()->status |= TS_POLLING; + + if (no_idle_nap) { + while (1) { + while (!need_resched()) + cpu_relax(); + schedule(); + } + } + + /* endless idle loop with no priority at all */ + while (1) { + tick_nohz_idle_enter(); + rcu_idle_enter(); + while (!need_resched()) { + if (cpu_is_offline(cpu)) + BUG(); /* no HOTPLUG_CPU */ + + local_irq_disable(); + __get_cpu_var(irq_stat).idle_timestamp = jiffies; + current_thread_info()->status &= ~TS_POLLING; + /* + * TS_POLLING-cleared state must be visible before we + * test NEED_RESCHED: + */ + smp_mb(); + + if (!need_resched()) + _cpu_idle(); + else + local_irq_enable(); + current_thread_info()->status |= TS_POLLING; + } + rcu_idle_exit(); + tick_nohz_idle_exit(); + schedule_preempt_disabled(); + } +} + +struct thread_info *alloc_thread_info_node(struct task_struct *task, int node) +{ + struct page *page; + gfp_t flags = GFP_KERNEL; + +#ifdef CONFIG_DEBUG_STACK_USAGE + flags |= __GFP_ZERO; +#endif + + page = alloc_pages_node(node, flags, THREAD_SIZE_ORDER); + if (!page) + return NULL; + + return (struct thread_info *)page_address(page); +} + +/* + * Free a thread_info node, and all of its derivative + * data structures. + */ +void free_thread_info(struct thread_info *info) +{ + struct single_step_state *step_state = info->step_state; + +#ifdef CONFIG_HARDWALL + /* + * We free a thread_info from the context of the task that has + * been scheduled next, so the original task is already dead. + * Calling deactivate here just frees up the data structures. + * If the task we're freeing held the last reference to a + * hardwall fd, it would have been released prior to this point + * anyway via exit_files(), and "hardwall" would be NULL by now. + */ + if (info->task->thread.hardwall) + hardwall_deactivate(info->task); +#endif + + if (step_state) { + + /* + * FIXME: we don't munmap step_state->buffer + * because the mm_struct for this process (info->task->mm) + * has already been zeroed in exit_mm(). Keeping a + * reference to it here seems like a bad move, so this + * means we can't munmap() the buffer, and therefore if we + * ptrace multiple threads in a process, we will slowly + * leak user memory. (Note that as soon as the last + * thread in a process dies, we will reclaim all user + * memory including single-step buffers in the usual way.) + * We should either assign a kernel VA to this buffer + * somehow, or we should associate the buffer(s) with the + * mm itself so we can clean them up that way. + */ + kfree(step_state); + } + + free_pages((unsigned long)info, THREAD_SIZE_ORDER); +} + +static void save_arch_state(struct thread_struct *t); + +int copy_thread(unsigned long clone_flags, unsigned long sp, + unsigned long stack_size, + struct task_struct *p, struct pt_regs *regs) +{ + struct pt_regs *childregs; + unsigned long ksp; + + /* + * When creating a new kernel thread we pass sp as zero. + * Assign it to a reasonable value now that we have the stack. + */ + if (sp == 0 && regs->ex1 == PL_ICS_EX1(KERNEL_PL, 0)) + sp = KSTK_TOP(p); + + /* + * Do not clone step state from the parent; each thread + * must make its own lazily. + */ + task_thread_info(p)->step_state = NULL; + + /* + * Start new thread in ret_from_fork so it schedules properly + * and then return from interrupt like the parent. + */ + p->thread.pc = (unsigned long) ret_from_fork; + + /* Save user stack top pointer so we can ID the stack vm area later. */ + p->thread.usp0 = sp; + + /* Record the pid of the process that created this one. */ + p->thread.creator_pid = current->pid; + + /* + * Copy the registers onto the kernel stack so the + * return-from-interrupt code will reload it into registers. + */ + childregs = task_pt_regs(p); + *childregs = *regs; + childregs->regs[0] = 0; /* return value is zero */ + childregs->sp = sp; /* override with new user stack pointer */ + + /* + * If CLONE_SETTLS is set, set "tp" in the new task to "r4", + * which is passed in as arg #5 to sys_clone(). + */ + if (clone_flags & CLONE_SETTLS) + childregs->tp = regs->regs[4]; + + /* + * Copy the callee-saved registers from the passed pt_regs struct + * into the context-switch callee-saved registers area. + * This way when we start the interrupt-return sequence, the + * callee-save registers will be correctly in registers, which + * is how we assume the compiler leaves them as we start doing + * the normal return-from-interrupt path after calling C code. + * Zero out the C ABI save area to mark the top of the stack. + */ + ksp = (unsigned long) childregs; + ksp -= C_ABI_SAVE_AREA_SIZE; /* interrupt-entry save area */ + ((long *)ksp)[0] = ((long *)ksp)[1] = 0; + ksp -= CALLEE_SAVED_REGS_COUNT * sizeof(unsigned long); + memcpy((void *)ksp, ®s->regs[CALLEE_SAVED_FIRST_REG], + CALLEE_SAVED_REGS_COUNT * sizeof(unsigned long)); + ksp -= C_ABI_SAVE_AREA_SIZE; /* __switch_to() save area */ + ((long *)ksp)[0] = ((long *)ksp)[1] = 0; + p->thread.ksp = ksp; + +#if CHIP_HAS_TILE_DMA() + /* + * No DMA in the new thread. We model this on the fact that + * fork() clears the pending signals, alarms, and aio for the child. + */ + memset(&p->thread.tile_dma_state, 0, sizeof(struct tile_dma_state)); + memset(&p->thread.dma_async_tlb, 0, sizeof(struct async_tlb)); +#endif + +#if CHIP_HAS_SN_PROC() + /* Likewise, the new thread is not running static processor code. */ + p->thread.sn_proc_running = 0; + memset(&p->thread.sn_async_tlb, 0, sizeof(struct async_tlb)); +#endif + +#if CHIP_HAS_PROC_STATUS_SPR() + /* New thread has its miscellaneous processor state bits clear. */ + p->thread.proc_status = 0; +#endif + +#ifdef CONFIG_HARDWALL + /* New thread does not own any networks. */ + p->thread.hardwall = NULL; +#endif + + + /* + * Start the new thread with the current architecture state + * (user interrupt masks, etc.). + */ + save_arch_state(&p->thread); + + return 0; +} + +/* + * Return "current" if it looks plausible, or else a pointer to a dummy. + * This can be helpful if we are just trying to emit a clean panic. + */ +struct task_struct *validate_current(void) +{ + static struct task_struct corrupt = { .comm = "<corrupt>" }; + struct task_struct *tsk = current; + if (unlikely((unsigned long)tsk < PAGE_OFFSET || + (high_memory && (void *)tsk > high_memory) || + ((unsigned long)tsk & (__alignof__(*tsk) - 1)) != 0)) { + pr_err("Corrupt 'current' %p (sp %#lx)\n", tsk, stack_pointer); + tsk = &corrupt; + } + return tsk; +} + +/* Take and return the pointer to the previous task, for schedule_tail(). */ +struct task_struct *sim_notify_fork(struct task_struct *prev) +{ + struct task_struct *tsk = current; + __insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_OS_FORK_PARENT | + (tsk->thread.creator_pid << _SIM_CONTROL_OPERATOR_BITS)); + __insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_OS_FORK | + (tsk->pid << _SIM_CONTROL_OPERATOR_BITS)); + return prev; +} + +int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs) +{ + struct pt_regs *ptregs = task_pt_regs(tsk); + elf_core_copy_regs(regs, ptregs); + return 1; +} + +#if CHIP_HAS_TILE_DMA() + +/* Allow user processes to access the DMA SPRs */ +void grant_dma_mpls(void) +{ +#if CONFIG_KERNEL_PL == 2 + __insn_mtspr(SPR_MPL_DMA_CPL_SET_1, 1); + __insn_mtspr(SPR_MPL_DMA_NOTIFY_SET_1, 1); +#else + __insn_mtspr(SPR_MPL_DMA_CPL_SET_0, 1); + __insn_mtspr(SPR_MPL_DMA_NOTIFY_SET_0, 1); +#endif +} + +/* Forbid user processes from accessing the DMA SPRs */ +void restrict_dma_mpls(void) +{ +#if CONFIG_KERNEL_PL == 2 + __insn_mtspr(SPR_MPL_DMA_CPL_SET_2, 1); + __insn_mtspr(SPR_MPL_DMA_NOTIFY_SET_2, 1); +#else + __insn_mtspr(SPR_MPL_DMA_CPL_SET_1, 1); + __insn_mtspr(SPR_MPL_DMA_NOTIFY_SET_1, 1); +#endif +} + +/* Pause the DMA engine, then save off its state registers. */ +static void save_tile_dma_state(struct tile_dma_state *dma) +{ + unsigned long state = __insn_mfspr(SPR_DMA_USER_STATUS); + unsigned long post_suspend_state; + + /* If we're running, suspend the engine. */ + if ((state & DMA_STATUS_MASK) == SPR_DMA_STATUS__RUNNING_MASK) + __insn_mtspr(SPR_DMA_CTR, SPR_DMA_CTR__SUSPEND_MASK); + + /* + * Wait for the engine to idle, then save regs. Note that we + * want to record the "running" bit from before suspension, + * and the "done" bit from after, so that we can properly + * distinguish a case where the user suspended the engine from + * the case where the kernel suspended as part of the context + * swap. + */ + do { + post_suspend_state = __insn_mfspr(SPR_DMA_USER_STATUS); + } while (post_suspend_state & SPR_DMA_STATUS__BUSY_MASK); + + dma->src = __insn_mfspr(SPR_DMA_SRC_ADDR); + dma->src_chunk = __insn_mfspr(SPR_DMA_SRC_CHUNK_ADDR); + dma->dest = __insn_mfspr(SPR_DMA_DST_ADDR); + dma->dest_chunk = __insn_mfspr(SPR_DMA_DST_CHUNK_ADDR); + dma->strides = __insn_mfspr(SPR_DMA_STRIDE); + dma->chunk_size = __insn_mfspr(SPR_DMA_CHUNK_SIZE); + dma->byte = __insn_mfspr(SPR_DMA_BYTE); + dma->status = (state & SPR_DMA_STATUS__RUNNING_MASK) | + (post_suspend_state & SPR_DMA_STATUS__DONE_MASK); +} + +/* Restart a DMA that was running before we were context-switched out. */ +static void restore_tile_dma_state(struct thread_struct *t) +{ + const struct tile_dma_state *dma = &t->tile_dma_state; + + /* + * The only way to restore the done bit is to run a zero + * length transaction. + */ + if ((dma->status & SPR_DMA_STATUS__DONE_MASK) && + !(__insn_mfspr(SPR_DMA_USER_STATUS) & SPR_DMA_STATUS__DONE_MASK)) { + __insn_mtspr(SPR_DMA_BYTE, 0); + __insn_mtspr(SPR_DMA_CTR, SPR_DMA_CTR__REQUEST_MASK); + while (__insn_mfspr(SPR_DMA_USER_STATUS) & + SPR_DMA_STATUS__BUSY_MASK) + ; + } + + __insn_mtspr(SPR_DMA_SRC_ADDR, dma->src); + __insn_mtspr(SPR_DMA_SRC_CHUNK_ADDR, dma->src_chunk); + __insn_mtspr(SPR_DMA_DST_ADDR, dma->dest); + __insn_mtspr(SPR_DMA_DST_CHUNK_ADDR, dma->dest_chunk); + __insn_mtspr(SPR_DMA_STRIDE, dma->strides); + __insn_mtspr(SPR_DMA_CHUNK_SIZE, dma->chunk_size); + __insn_mtspr(SPR_DMA_BYTE, dma->byte); + + /* + * Restart the engine if we were running and not done. + * Clear a pending async DMA fault that we were waiting on return + * to user space to execute, since we expect the DMA engine + * to regenerate those faults for us now. Note that we don't + * try to clear the TIF_ASYNC_TLB flag, since it's relatively + * harmless if set, and it covers both DMA and the SN processor. + */ + if ((dma->status & DMA_STATUS_MASK) == SPR_DMA_STATUS__RUNNING_MASK) { + t->dma_async_tlb.fault_num = 0; + __insn_mtspr(SPR_DMA_CTR, SPR_DMA_CTR__REQUEST_MASK); + } +} + +#endif + +static void save_arch_state(struct thread_struct *t) +{ +#if CHIP_HAS_SPLIT_INTR_MASK() + t->interrupt_mask = __insn_mfspr(SPR_INTERRUPT_MASK_0_0) | + ((u64)__insn_mfspr(SPR_INTERRUPT_MASK_0_1) << 32); +#else + t->interrupt_mask = __insn_mfspr(SPR_INTERRUPT_MASK_0); +#endif + t->ex_context[0] = __insn_mfspr(SPR_EX_CONTEXT_0_0); + t->ex_context[1] = __insn_mfspr(SPR_EX_CONTEXT_0_1); + t->system_save[0] = __insn_mfspr(SPR_SYSTEM_SAVE_0_0); + t->system_save[1] = __insn_mfspr(SPR_SYSTEM_SAVE_0_1); + t->system_save[2] = __insn_mfspr(SPR_SYSTEM_SAVE_0_2); + t->system_save[3] = __insn_mfspr(SPR_SYSTEM_SAVE_0_3); + t->intctrl_0 = __insn_mfspr(SPR_INTCTRL_0_STATUS); +#if CHIP_HAS_PROC_STATUS_SPR() + t->proc_status = __insn_mfspr(SPR_PROC_STATUS); +#endif +#if !CHIP_HAS_FIXED_INTVEC_BASE() + t->interrupt_vector_base = __insn_mfspr(SPR_INTERRUPT_VECTOR_BASE_0); +#endif +#if CHIP_HAS_TILE_RTF_HWM() + t->tile_rtf_hwm = __insn_mfspr(SPR_TILE_RTF_HWM); +#endif +#if CHIP_HAS_DSTREAM_PF() + t->dstream_pf = __insn_mfspr(SPR_DSTREAM_PF); +#endif +} + +static void restore_arch_state(const struct thread_struct *t) +{ +#if CHIP_HAS_SPLIT_INTR_MASK() + __insn_mtspr(SPR_INTERRUPT_MASK_0_0, (u32) t->interrupt_mask); + __insn_mtspr(SPR_INTERRUPT_MASK_0_1, t->interrupt_mask >> 32); +#else + __insn_mtspr(SPR_INTERRUPT_MASK_0, t->interrupt_mask); +#endif + __insn_mtspr(SPR_EX_CONTEXT_0_0, t->ex_context[0]); + __insn_mtspr(SPR_EX_CONTEXT_0_1, t->ex_context[1]); + __insn_mtspr(SPR_SYSTEM_SAVE_0_0, t->system_save[0]); + __insn_mtspr(SPR_SYSTEM_SAVE_0_1, t->system_save[1]); + __insn_mtspr(SPR_SYSTEM_SAVE_0_2, t->system_save[2]); + __insn_mtspr(SPR_SYSTEM_SAVE_0_3, t->system_save[3]); + __insn_mtspr(SPR_INTCTRL_0_STATUS, t->intctrl_0); +#if CHIP_HAS_PROC_STATUS_SPR() + __insn_mtspr(SPR_PROC_STATUS, t->proc_status); +#endif +#if !CHIP_HAS_FIXED_INTVEC_BASE() + __insn_mtspr(SPR_INTERRUPT_VECTOR_BASE_0, t->interrupt_vector_base); +#endif +#if CHIP_HAS_TILE_RTF_HWM() + __insn_mtspr(SPR_TILE_RTF_HWM, t->tile_rtf_hwm); +#endif +#if CHIP_HAS_DSTREAM_PF() + __insn_mtspr(SPR_DSTREAM_PF, t->dstream_pf); +#endif +} + + +void _prepare_arch_switch(struct task_struct *next) +{ +#if CHIP_HAS_SN_PROC() + int snctl; +#endif +#if CHIP_HAS_TILE_DMA() + struct tile_dma_state *dma = ¤t->thread.tile_dma_state; + if (dma->enabled) + save_tile_dma_state(dma); +#endif +#if CHIP_HAS_SN_PROC() + /* + * Suspend the static network processor if it was running. + * We do not suspend the fabric itself, just like we don't + * try to suspend the UDN. + */ + snctl = __insn_mfspr(SPR_SNCTL); + current->thread.sn_proc_running = + (snctl & SPR_SNCTL__FRZPROC_MASK) == 0; + if (current->thread.sn_proc_running) + __insn_mtspr(SPR_SNCTL, snctl | SPR_SNCTL__FRZPROC_MASK); +#endif +} + + +struct task_struct *__sched _switch_to(struct task_struct *prev, + struct task_struct *next) +{ + /* DMA state is already saved; save off other arch state. */ + save_arch_state(&prev->thread); + +#if CHIP_HAS_TILE_DMA() + /* + * Restore DMA in new task if desired. + * Note that it is only safe to restart here since interrupts + * are disabled, so we can't take any DMATLB miss or access + * interrupts before we have finished switching stacks. + */ + if (next->thread.tile_dma_state.enabled) { + restore_tile_dma_state(&next->thread); + grant_dma_mpls(); + } else { + restrict_dma_mpls(); + } +#endif + + /* Restore other arch state. */ + restore_arch_state(&next->thread); + +#if CHIP_HAS_SN_PROC() + /* + * Restart static network processor in the new process + * if it was running before. + */ + if (next->thread.sn_proc_running) { + int snctl = __insn_mfspr(SPR_SNCTL); + __insn_mtspr(SPR_SNCTL, snctl & ~SPR_SNCTL__FRZPROC_MASK); + } +#endif + +#ifdef CONFIG_HARDWALL + /* Enable or disable access to the network registers appropriately. */ + if (prev->thread.hardwall != NULL) { + if (next->thread.hardwall == NULL) + restrict_network_mpls(); + } else if (next->thread.hardwall != NULL) { + grant_network_mpls(); + } +#endif + + /* + * Switch kernel SP, PC, and callee-saved registers. + * In the context of the new task, return the old task pointer + * (i.e. the task that actually called __switch_to). + * Pass the value to use for SYSTEM_SAVE_K_0 when we reset our sp. + */ + return __switch_to(prev, next, next_current_ksp0(next)); +} + +/* + * This routine is called on return from interrupt if any of the + * TIF_WORK_MASK flags are set in thread_info->flags. It is + * entered with interrupts disabled so we don't miss an event + * that modified the thread_info flags. If any flag is set, we + * handle it and return, and the calling assembly code will + * re-disable interrupts, reload the thread flags, and call back + * if more flags need to be handled. + * + * We return whether we need to check the thread_info flags again + * or not. Note that we don't clear TIF_SINGLESTEP here, so it's + * important that it be tested last, and then claim that we don't + * need to recheck the flags. + */ +int do_work_pending(struct pt_regs *regs, u32 thread_info_flags) +{ + /* If we enter in kernel mode, do nothing and exit the caller loop. */ + if (!user_mode(regs)) + return 0; + + if (thread_info_flags & _TIF_NEED_RESCHED) { + schedule(); + return 1; + } +#if CHIP_HAS_TILE_DMA() || CHIP_HAS_SN_PROC() + if (thread_info_flags & _TIF_ASYNC_TLB) { + do_async_page_fault(regs); + return 1; + } +#endif + if (thread_info_flags & _TIF_SIGPENDING) { + do_signal(regs); + return 1; + } + if (thread_info_flags & _TIF_NOTIFY_RESUME) { + clear_thread_flag(TIF_NOTIFY_RESUME); + tracehook_notify_resume(regs); + if (current->replacement_session_keyring) + key_replace_session_keyring(); + return 1; + } + if (thread_info_flags & _TIF_SINGLESTEP) { + single_step_once(regs); + return 0; + } + panic("work_pending: bad flags %#x\n", thread_info_flags); +} + +/* Note there is an implicit fifth argument if (clone_flags & CLONE_SETTLS). */ +SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp, + void __user *, parent_tidptr, void __user *, child_tidptr, + struct pt_regs *, regs) +{ + if (!newsp) + newsp = regs->sp; + return do_fork(clone_flags, newsp, regs, 0, + parent_tidptr, child_tidptr); +} + +/* + * sys_execve() executes a new program. + */ +SYSCALL_DEFINE4(execve, const char __user *, path, + const char __user *const __user *, argv, + const char __user *const __user *, envp, + struct pt_regs *, regs) +{ + long error; + char *filename; + + filename = getname(path); + error = PTR_ERR(filename); + if (IS_ERR(filename)) + goto out; + error = do_execve(filename, argv, envp, regs); + putname(filename); + if (error == 0) + single_step_execve(); +out: + return error; +} + +#ifdef CONFIG_COMPAT +long compat_sys_execve(const char __user *path, + compat_uptr_t __user *argv, + compat_uptr_t __user *envp, + struct pt_regs *regs) +{ + long error; + char *filename; + + filename = getname(path); + error = PTR_ERR(filename); + if (IS_ERR(filename)) + goto out; + error = compat_do_execve(filename, argv, envp, regs); + putname(filename); + if (error == 0) + single_step_execve(); +out: + return error; +} +#endif + +unsigned long get_wchan(struct task_struct *p) +{ + struct KBacktraceIterator kbt; + + if (!p || p == current || p->state == TASK_RUNNING) + return 0; + + for (KBacktraceIterator_init(&kbt, p, NULL); + !KBacktraceIterator_end(&kbt); + KBacktraceIterator_next(&kbt)) { + if (!in_sched_functions(kbt.it.pc)) + return kbt.it.pc; + } + + return 0; +} + +/* + * We pass in lr as zero (cleared in kernel_thread) and the caller + * part of the backtrace ABI on the stack also zeroed (in copy_thread) + * so that backtraces will stop with this function. + * Note that we don't use r0, since copy_thread() clears it. + */ +static void start_kernel_thread(int dummy, int (*fn)(int), int arg) +{ + do_exit(fn(arg)); +} + +/* + * Create a kernel thread + */ +int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) +{ + struct pt_regs regs; + + memset(®s, 0, sizeof(regs)); + regs.ex1 = PL_ICS_EX1(KERNEL_PL, 0); /* run at kernel PL, no ICS */ + regs.pc = (long) start_kernel_thread; + regs.flags = PT_FLAGS_CALLER_SAVES; /* need to restore r1 and r2 */ + regs.regs[1] = (long) fn; /* function pointer */ + regs.regs[2] = (long) arg; /* parameter register */ + + /* Ok, create the new process.. */ + return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, ®s, + 0, NULL, NULL); +} +EXPORT_SYMBOL(kernel_thread); + +/* Flush thread state. */ +void flush_thread(void) +{ + /* Nothing */ +} + +/* + * Free current thread data structures etc.. + */ +void exit_thread(void) +{ + /* Nothing */ +} + +void show_regs(struct pt_regs *regs) +{ + struct task_struct *tsk = validate_current(); + int i; + + pr_err("\n"); + pr_err(" Pid: %d, comm: %20s, CPU: %d\n", + tsk->pid, tsk->comm, smp_processor_id()); +#ifdef __tilegx__ + for (i = 0; i < 51; i += 3) + pr_err(" r%-2d: "REGFMT" r%-2d: "REGFMT" r%-2d: "REGFMT"\n", + i, regs->regs[i], i+1, regs->regs[i+1], + i+2, regs->regs[i+2]); + pr_err(" r51: "REGFMT" r52: "REGFMT" tp : "REGFMT"\n", + regs->regs[51], regs->regs[52], regs->tp); + pr_err(" sp : "REGFMT" lr : "REGFMT"\n", regs->sp, regs->lr); +#else + for (i = 0; i < 52; i += 4) + pr_err(" r%-2d: "REGFMT" r%-2d: "REGFMT + " r%-2d: "REGFMT" r%-2d: "REGFMT"\n", + i, regs->regs[i], i+1, regs->regs[i+1], + i+2, regs->regs[i+2], i+3, regs->regs[i+3]); + pr_err(" r52: "REGFMT" tp : "REGFMT" sp : "REGFMT" lr : "REGFMT"\n", + regs->regs[52], regs->tp, regs->sp, regs->lr); +#endif + pr_err(" pc : "REGFMT" ex1: %ld faultnum: %ld\n", + regs->pc, regs->ex1, regs->faultnum); + + dump_stack_regs(regs); +} diff --git a/arch/tile/kernel/ptrace.c b/arch/tile/kernel/ptrace.c new file mode 100644 index 00000000..e92e4052 --- /dev/null +++ b/arch/tile/kernel/ptrace.c @@ -0,0 +1,205 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * Copied from i386: Ross Biro 1/23/92 + */ + +#include <linux/kernel.h> +#include <linux/ptrace.h> +#include <linux/kprobes.h> +#include <linux/compat.h> +#include <linux/uaccess.h> +#include <asm/traps.h> + +void user_enable_single_step(struct task_struct *child) +{ + set_tsk_thread_flag(child, TIF_SINGLESTEP); +} + +void user_disable_single_step(struct task_struct *child) +{ + clear_tsk_thread_flag(child, TIF_SINGLESTEP); +} + +/* + * Called by kernel/ptrace.c when detaching.. + */ +void ptrace_disable(struct task_struct *child) +{ + clear_tsk_thread_flag(child, TIF_SINGLESTEP); + + /* + * These two are currently unused, but will be set by arch_ptrace() + * and used in the syscall assembly when we do support them. + */ + clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); +} + +long arch_ptrace(struct task_struct *child, long request, + unsigned long addr, unsigned long data) +{ + unsigned long __user *datap = (long __user __force *)data; + unsigned long tmp; + long ret = -EIO; + char *childreg; + struct pt_regs copyregs; + int ex1_offset; + + switch (request) { + + case PTRACE_PEEKUSR: /* Read register from pt_regs. */ + if (addr >= PTREGS_SIZE) + break; + childreg = (char *)task_pt_regs(child) + addr; +#ifdef CONFIG_COMPAT + if (is_compat_task()) { + if (addr & (sizeof(compat_long_t)-1)) + break; + ret = put_user(*(compat_long_t *)childreg, + (compat_long_t __user *)datap); + } else +#endif + { + if (addr & (sizeof(long)-1)) + break; + ret = put_user(*(long *)childreg, datap); + } + break; + + case PTRACE_POKEUSR: /* Write register in pt_regs. */ + if (addr >= PTREGS_SIZE) + break; + childreg = (char *)task_pt_regs(child) + addr; + + /* Guard against overwrites of the privilege level. */ + ex1_offset = PTREGS_OFFSET_EX1; +#if defined(CONFIG_COMPAT) && defined(__BIG_ENDIAN) + if (is_compat_task()) /* point at low word */ + ex1_offset += sizeof(compat_long_t); +#endif + if (addr == ex1_offset) + data = PL_ICS_EX1(USER_PL, EX1_ICS(data)); + +#ifdef CONFIG_COMPAT + if (is_compat_task()) { + if (addr & (sizeof(compat_long_t)-1)) + break; + *(compat_long_t *)childreg = data; + } else +#endif + { + if (addr & (sizeof(long)-1)) + break; + *(long *)childreg = data; + } + ret = 0; + break; + + case PTRACE_GETREGS: /* Get all registers from the child. */ + if (copy_to_user(datap, task_pt_regs(child), + sizeof(struct pt_regs)) == 0) { + ret = 0; + } + break; + + case PTRACE_SETREGS: /* Set all registers in the child. */ + if (copy_from_user(©regs, datap, + sizeof(struct pt_regs)) == 0) { + copyregs.ex1 = + PL_ICS_EX1(USER_PL, EX1_ICS(copyregs.ex1)); + *task_pt_regs(child) = copyregs; + ret = 0; + } + break; + + case PTRACE_GETFPREGS: /* Get the child FPU state. */ + case PTRACE_SETFPREGS: /* Set the child FPU state. */ + break; + + case PTRACE_SETOPTIONS: + /* Support TILE-specific ptrace options. */ + child->ptrace &= ~PT_TRACE_MASK_TILE; + tmp = data & PTRACE_O_MASK_TILE; + data &= ~PTRACE_O_MASK_TILE; + ret = ptrace_request(child, request, addr, data); + if (tmp & PTRACE_O_TRACEMIGRATE) + child->ptrace |= PT_TRACE_MIGRATE; + break; + + default: +#ifdef CONFIG_COMPAT + if (task_thread_info(current)->status & TS_COMPAT) { + ret = compat_ptrace_request(child, request, + addr, data); + break; + } +#endif + ret = ptrace_request(child, request, addr, data); + break; + } + + return ret; +} + +#ifdef CONFIG_COMPAT +/* Not used; we handle compat issues in arch_ptrace() directly. */ +long compat_arch_ptrace(struct task_struct *child, compat_long_t request, + compat_ulong_t addr, compat_ulong_t data) +{ + BUG(); +} +#endif + +void do_syscall_trace(void) +{ + if (!test_thread_flag(TIF_SYSCALL_TRACE)) + return; + + if (!(current->ptrace & PT_PTRACED)) + return; + + /* + * The 0x80 provides a way for the tracing parent to distinguish + * between a syscall stop and SIGTRAP delivery + */ + ptrace_notify(SIGTRAP|((current->ptrace & PT_TRACESYSGOOD) ? 0x80 : 0)); + + /* + * this isn't the same as continuing with a signal, but it will do + * for normal use. strace only continues with a signal if the + * stopping signal is not SIGTRAP. -brl + */ + if (current->exit_code) { + send_sig(current->exit_code, current, 1); + current->exit_code = 0; + } +} + +void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code) +{ + struct siginfo info; + + memset(&info, 0, sizeof(info)); + info.si_signo = SIGTRAP; + info.si_code = TRAP_BRKPT; + info.si_addr = (void __user *) regs->pc; + + /* Send us the fakey SIGTRAP */ + force_sig_info(SIGTRAP, &info, tsk); +} + +/* Handle synthetic interrupt delivered only by the simulator. */ +void __kprobes do_breakpoint(struct pt_regs* regs, int fault_num) +{ + send_sigtrap(current, regs, fault_num); +} diff --git a/arch/tile/kernel/reboot.c b/arch/tile/kernel/reboot.c new file mode 100644 index 00000000..baa3d905 --- /dev/null +++ b/arch/tile/kernel/reboot.c @@ -0,0 +1,51 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#include <linux/stddef.h> +#include <linux/reboot.h> +#include <linux/smp.h> +#include <linux/pm.h> +#include <asm/page.h> +#include <asm/setup.h> +#include <hv/hypervisor.h> + +#ifndef CONFIG_SMP +#define smp_send_stop() +#endif + +void machine_halt(void) +{ + warn_early_printk(); + arch_local_irq_disable_all(); + smp_send_stop(); + hv_halt(); +} + +void machine_power_off(void) +{ + warn_early_printk(); + arch_local_irq_disable_all(); + smp_send_stop(); + hv_power_off(); +} + +void machine_restart(char *cmd) +{ + arch_local_irq_disable_all(); + smp_send_stop(); + hv_restart((HV_VirtAddr) "vmlinux", (HV_VirtAddr) cmd); +} + +/* No interesting distinction to be made here. */ +void (*pm_power_off)(void) = NULL; diff --git a/arch/tile/kernel/regs_32.S b/arch/tile/kernel/regs_32.S new file mode 100644 index 00000000..c12280c2 --- /dev/null +++ b/arch/tile/kernel/regs_32.S @@ -0,0 +1,145 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#include <linux/linkage.h> +#include <asm/ptrace.h> +#include <asm/asm-offsets.h> +#include <arch/spr_def.h> +#include <asm/processor.h> +#include <asm/switch_to.h> + +/* + * See <asm/system.h>; called with prev and next task_struct pointers. + * "prev" is returned in r0 for _switch_to and also for ret_from_fork. + * + * We want to save pc/sp in "prev", and get the new pc/sp from "next". + * We also need to save all the callee-saved registers on the stack. + * + * Intel enables/disables access to the hardware cycle counter in + * seccomp (secure computing) environments if necessary, based on + * has_secure_computing(). We might want to do this at some point, + * though it would require virtualizing the other SPRs under WORLD_ACCESS. + * + * Since we're saving to the stack, we omit sp from this list. + * And for parallels with other architectures, we save lr separately, + * in the thread_struct itself (as the "pc" field). + * + * This code also needs to be aligned with process.c copy_thread() + */ + +#if CALLEE_SAVED_REGS_COUNT != 24 +# error Mismatch between <asm/system.h> and kernel/entry.S +#endif +#define FRAME_SIZE ((2 + CALLEE_SAVED_REGS_COUNT) * 4) + +#define SAVE_REG(r) { sw r12, r; addi r12, r12, 4 } +#define LOAD_REG(r) { lw r, r12; addi r12, r12, 4 } +#define FOR_EACH_CALLEE_SAVED_REG(f) \ + f(r30); f(r31); \ + f(r32); f(r33); f(r34); f(r35); f(r36); f(r37); f(r38); f(r39); \ + f(r40); f(r41); f(r42); f(r43); f(r44); f(r45); f(r46); f(r47); \ + f(r48); f(r49); f(r50); f(r51); f(r52); + +STD_ENTRY_SECTION(__switch_to, .sched.text) + { + move r10, sp + sw sp, lr + addi sp, sp, -FRAME_SIZE + } + { + addi r11, sp, 4 + addi r12, sp, 8 + } + { + sw r11, r10 + addli r4, r1, TASK_STRUCT_THREAD_KSP_OFFSET + } + { + lw r13, r4 /* Load new sp to a temp register early. */ + addli r3, r0, TASK_STRUCT_THREAD_KSP_OFFSET + } + FOR_EACH_CALLEE_SAVED_REG(SAVE_REG) + { + sw r3, sp + addli r3, r0, TASK_STRUCT_THREAD_PC_OFFSET + } + { + sw r3, lr + addli r4, r1, TASK_STRUCT_THREAD_PC_OFFSET + } + { + lw lr, r4 + addi r12, r13, 8 + } + { + /* Update sp and ksp0 simultaneously to avoid backtracer warnings. */ + move sp, r13 + mtspr SPR_SYSTEM_SAVE_K_0, r2 + } + FOR_EACH_CALLEE_SAVED_REG(LOAD_REG) +.L__switch_to_pc: + { + addi sp, sp, FRAME_SIZE + jrp lr /* r0 is still valid here, so return it */ + } + STD_ENDPROC(__switch_to) + +/* Return a suitable address for the backtracer for suspended threads */ +STD_ENTRY_SECTION(get_switch_to_pc, .sched.text) + lnk r0 + { + addli r0, r0, .L__switch_to_pc - . + jrp lr + } + STD_ENDPROC(get_switch_to_pc) + +STD_ENTRY(get_pt_regs) + .irp reg, r0, r1, r2, r3, r4, r5, r6, r7, \ + r8, r9, r10, r11, r12, r13, r14, r15, \ + r16, r17, r18, r19, r20, r21, r22, r23, \ + r24, r25, r26, r27, r28, r29, r30, r31, \ + r32, r33, r34, r35, r36, r37, r38, r39, \ + r40, r41, r42, r43, r44, r45, r46, r47, \ + r48, r49, r50, r51, r52, tp, sp + { + sw r0, \reg + addi r0, r0, 4 + } + .endr + { + sw r0, lr + addi r0, r0, PTREGS_OFFSET_PC - PTREGS_OFFSET_LR + } + lnk r1 + { + sw r0, r1 + addi r0, r0, PTREGS_OFFSET_EX1 - PTREGS_OFFSET_PC + } + mfspr r1, INTERRUPT_CRITICAL_SECTION + shli r1, r1, SPR_EX_CONTEXT_1_1__ICS_SHIFT + ori r1, r1, KERNEL_PL + { + sw r0, r1 + addi r0, r0, PTREGS_OFFSET_FAULTNUM - PTREGS_OFFSET_EX1 + } + { + sw r0, zero /* clear faultnum */ + addi r0, r0, PTREGS_OFFSET_ORIG_R0 - PTREGS_OFFSET_FAULTNUM + } + { + sw r0, zero /* clear orig_r0 */ + addli r0, r0, -PTREGS_OFFSET_ORIG_R0 /* restore r0 to base */ + } + jrp lr + STD_ENDPROC(get_pt_regs) diff --git a/arch/tile/kernel/regs_64.S b/arch/tile/kernel/regs_64.S new file mode 100644 index 00000000..0829fd01 --- /dev/null +++ b/arch/tile/kernel/regs_64.S @@ -0,0 +1,145 @@ +/* + * Copyright 2011 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#include <linux/linkage.h> +#include <asm/ptrace.h> +#include <asm/asm-offsets.h> +#include <arch/spr_def.h> +#include <asm/processor.h> +#include <asm/switch_to.h> + +/* + * See <asm/system.h>; called with prev and next task_struct pointers. + * "prev" is returned in r0 for _switch_to and also for ret_from_fork. + * + * We want to save pc/sp in "prev", and get the new pc/sp from "next". + * We also need to save all the callee-saved registers on the stack. + * + * Intel enables/disables access to the hardware cycle counter in + * seccomp (secure computing) environments if necessary, based on + * has_secure_computing(). We might want to do this at some point, + * though it would require virtualizing the other SPRs under WORLD_ACCESS. + * + * Since we're saving to the stack, we omit sp from this list. + * And for parallels with other architectures, we save lr separately, + * in the thread_struct itself (as the "pc" field). + * + * This code also needs to be aligned with process.c copy_thread() + */ + +#if CALLEE_SAVED_REGS_COUNT != 24 +# error Mismatch between <asm/system.h> and kernel/entry.S +#endif +#define FRAME_SIZE ((2 + CALLEE_SAVED_REGS_COUNT) * 8) + +#define SAVE_REG(r) { st r12, r; addi r12, r12, 8 } +#define LOAD_REG(r) { ld r, r12; addi r12, r12, 8 } +#define FOR_EACH_CALLEE_SAVED_REG(f) \ + f(r30); f(r31); \ + f(r32); f(r33); f(r34); f(r35); f(r36); f(r37); f(r38); f(r39); \ + f(r40); f(r41); f(r42); f(r43); f(r44); f(r45); f(r46); f(r47); \ + f(r48); f(r49); f(r50); f(r51); f(r52); + +STD_ENTRY_SECTION(__switch_to, .sched.text) + { + move r10, sp + st sp, lr + } + { + addli r11, sp, -FRAME_SIZE + 8 + addli sp, sp, -FRAME_SIZE + } + { + st r11, r10 + addli r4, r1, TASK_STRUCT_THREAD_KSP_OFFSET + } + { + ld r13, r4 /* Load new sp to a temp register early. */ + addi r12, sp, 16 + } + FOR_EACH_CALLEE_SAVED_REG(SAVE_REG) + addli r3, r0, TASK_STRUCT_THREAD_KSP_OFFSET + { + st r3, sp + addli r3, r0, TASK_STRUCT_THREAD_PC_OFFSET + } + { + st r3, lr + addli r4, r1, TASK_STRUCT_THREAD_PC_OFFSET + } + { + ld lr, r4 + addi r12, r13, 16 + } + { + /* Update sp and ksp0 simultaneously to avoid backtracer warnings. */ + move sp, r13 + mtspr SPR_SYSTEM_SAVE_K_0, r2 + } + FOR_EACH_CALLEE_SAVED_REG(LOAD_REG) +.L__switch_to_pc: + { + addli sp, sp, FRAME_SIZE + jrp lr /* r0 is still valid here, so return it */ + } + STD_ENDPROC(__switch_to) + +/* Return a suitable address for the backtracer for suspended threads */ +STD_ENTRY_SECTION(get_switch_to_pc, .sched.text) + lnk r0 + { + addli r0, r0, .L__switch_to_pc - . + jrp lr + } + STD_ENDPROC(get_switch_to_pc) + +STD_ENTRY(get_pt_regs) + .irp reg, r0, r1, r2, r3, r4, r5, r6, r7, \ + r8, r9, r10, r11, r12, r13, r14, r15, \ + r16, r17, r18, r19, r20, r21, r22, r23, \ + r24, r25, r26, r27, r28, r29, r30, r31, \ + r32, r33, r34, r35, r36, r37, r38, r39, \ + r40, r41, r42, r43, r44, r45, r46, r47, \ + r48, r49, r50, r51, r52, tp, sp + { + st r0, \reg + addi r0, r0, 8 + } + .endr + { + st r0, lr + addi r0, r0, PTREGS_OFFSET_PC - PTREGS_OFFSET_LR + } + lnk r1 + { + st r0, r1 + addi r0, r0, PTREGS_OFFSET_EX1 - PTREGS_OFFSET_PC + } + mfspr r1, INTERRUPT_CRITICAL_SECTION + shli r1, r1, SPR_EX_CONTEXT_1_1__ICS_SHIFT + ori r1, r1, KERNEL_PL + { + st r0, r1 + addi r0, r0, PTREGS_OFFSET_FAULTNUM - PTREGS_OFFSET_EX1 + } + { + st r0, zero /* clear faultnum */ + addi r0, r0, PTREGS_OFFSET_ORIG_R0 - PTREGS_OFFSET_FAULTNUM + } + { + st r0, zero /* clear orig_r0 */ + addli r0, r0, -PTREGS_OFFSET_ORIG_R0 /* restore r0 to base */ + } + jrp lr + STD_ENDPROC(get_pt_regs) diff --git a/arch/tile/kernel/relocate_kernel.S b/arch/tile/kernel/relocate_kernel.S new file mode 100644 index 00000000..010b4185 --- /dev/null +++ b/arch/tile/kernel/relocate_kernel.S @@ -0,0 +1,280 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * copy new kernel into place and then call hv_reexec + * + */ + +#include <linux/linkage.h> +#include <arch/chip.h> +#include <asm/page.h> +#include <hv/hypervisor.h> + +#define ___hvb MEM_SV_INTRPT + HV_GLUE_START_CPA + +#define ___hv_dispatch(f) (___hvb + (HV_DISPATCH_ENTRY_SIZE * f)) + +#define ___hv_console_putc ___hv_dispatch(HV_DISPATCH_CONSOLE_PUTC) +#define ___hv_halt ___hv_dispatch(HV_DISPATCH_HALT) +#define ___hv_reexec ___hv_dispatch(HV_DISPATCH_REEXEC) +#define ___hv_flush_remote ___hv_dispatch(HV_DISPATCH_FLUSH_REMOTE) + +#undef RELOCATE_NEW_KERNEL_VERBOSE + +STD_ENTRY(relocate_new_kernel) + + move r30, r0 /* page list */ + move r31, r1 /* address of page we are on */ + move r32, r2 /* start address of new kernel */ + + shri r1, r1, PAGE_SHIFT + addi r1, r1, 1 + shli sp, r1, PAGE_SHIFT + addi sp, sp, -8 + /* we now have a stack (whether we need one or not) */ + + moveli r40, lo16(___hv_console_putc) + auli r40, r40, ha16(___hv_console_putc) + +#ifdef RELOCATE_NEW_KERNEL_VERBOSE + moveli r0, 'r' + jalr r40 + + moveli r0, '_' + jalr r40 + + moveli r0, 'n' + jalr r40 + + moveli r0, '_' + jalr r40 + + moveli r0, 'k' + jalr r40 + + moveli r0, '\n' + jalr r40 +#endif + + /* + * Throughout this code r30 is pointer to the element of page + * list we are working on. + * + * Normally we get to the next element of the page list by + * incrementing r30 by four. The exception is if the element + * on the page list is an IND_INDIRECTION in which case we use + * the element with the low bits masked off as the new value + * of r30. + * + * To get this started, we need the value passed to us (which + * will always be an IND_INDIRECTION) in memory somewhere with + * r30 pointing at it. To do that, we push the value passed + * to us on the stack and make r30 point to it. + */ + + sw sp, r30 + move r30, sp + addi sp, sp, -8 + +#if CHIP_HAS_CBOX_HOME_MAP() + /* + * On TILEPro, we need to flush all tiles' caches, since we may + * have been doing hash-for-home caching there. Note that we + * must do this _after_ we're completely done modifying any memory + * other than our output buffer (which we know is locally cached). + * We want the caches to be fully clean when we do the reexec, + * because the hypervisor is going to do this flush again at that + * point, and we don't want that second flush to overwrite any memory. + */ + { + move r0, zero /* cache_pa */ + move r1, zero + } + { + auli r2, zero, ha16(HV_FLUSH_EVICT_L2) /* cache_control */ + movei r3, -1 /* cache_cpumask; -1 means all client tiles */ + } + { + move r4, zero /* tlb_va */ + move r5, zero /* tlb_length */ + } + { + move r6, zero /* tlb_pgsize */ + move r7, zero /* tlb_cpumask */ + } + { + move r8, zero /* asids */ + moveli r20, lo16(___hv_flush_remote) + } + { + move r9, zero /* asidcount */ + auli r20, r20, ha16(___hv_flush_remote) + } + + jalr r20 +#endif + + /* r33 is destination pointer, default to zero */ + + moveli r33, 0 + +.Lloop: lw r10, r30 + + andi r9, r10, 0xf /* low 4 bits tell us what type it is */ + xor r10, r10, r9 /* r10 is now value with low 4 bits stripped */ + + seqi r0, r9, 0x1 /* IND_DESTINATION */ + bzt r0, .Ltry2 + + move r33, r10 + +#ifdef RELOCATE_NEW_KERNEL_VERBOSE + moveli r0, 'd' + jalr r40 +#endif + + addi r30, r30, 4 + j .Lloop + +.Ltry2: + seqi r0, r9, 0x2 /* IND_INDIRECTION */ + bzt r0, .Ltry4 + + move r30, r10 + +#ifdef RELOCATE_NEW_KERNEL_VERBOSE + moveli r0, 'i' + jalr r40 +#endif + + j .Lloop + +.Ltry4: + seqi r0, r9, 0x4 /* IND_DONE */ + bzt r0, .Ltry8 + + mf + +#ifdef RELOCATE_NEW_KERNEL_VERBOSE + moveli r0, 'D' + jalr r40 + moveli r0, '\n' + jalr r40 +#endif + + move r0, r32 + moveli r1, 0 /* arg to hv_reexec is 64 bits */ + + moveli r41, lo16(___hv_reexec) + auli r41, r41, ha16(___hv_reexec) + + jalr r41 + + /* we should not get here */ + + moveli r0, '?' + jalr r40 + moveli r0, '\n' + jalr r40 + + j .Lhalt + +.Ltry8: seqi r0, r9, 0x8 /* IND_SOURCE */ + bz r0, .Lerr /* unknown type */ + + /* copy page at r10 to page at r33 */ + + move r11, r33 + + moveli r0, lo16(PAGE_SIZE) + auli r0, r0, ha16(PAGE_SIZE) + add r33, r33, r0 + + /* copy word at r10 to word at r11 until r11 equals r33 */ + + /* We know page size must be multiple of 16, so we can unroll + * 16 times safely without any edge case checking. + * + * Issue a flush of the destination every 16 words to avoid + * incoherence when starting the new kernel. (Now this is + * just good paranoia because the hv_reexec call will also + * take care of this.) + */ + +1: + { lw r0, r10; addi r10, r10, 4 } + { sw r11, r0; addi r11, r11, 4 } + { lw r0, r10; addi r10, r10, 4 } + { sw r11, r0; addi r11, r11, 4 } + { lw r0, r10; addi r10, r10, 4 } + { sw r11, r0; addi r11, r11, 4 } + { lw r0, r10; addi r10, r10, 4 } + { sw r11, r0; addi r11, r11, 4 } + { lw r0, r10; addi r10, r10, 4 } + { sw r11, r0; addi r11, r11, 4 } + { lw r0, r10; addi r10, r10, 4 } + { sw r11, r0; addi r11, r11, 4 } + { lw r0, r10; addi r10, r10, 4 } + { sw r11, r0; addi r11, r11, 4 } + { lw r0, r10; addi r10, r10, 4 } + { sw r11, r0; addi r11, r11, 4 } + { lw r0, r10; addi r10, r10, 4 } + { sw r11, r0; addi r11, r11, 4 } + { lw r0, r10; addi r10, r10, 4 } + { sw r11, r0; addi r11, r11, 4 } + { lw r0, r10; addi r10, r10, 4 } + { sw r11, r0; addi r11, r11, 4 } + { lw r0, r10; addi r10, r10, 4 } + { sw r11, r0; addi r11, r11, 4 } + { lw r0, r10; addi r10, r10, 4 } + { sw r11, r0; addi r11, r11, 4 } + { lw r0, r10; addi r10, r10, 4 } + { sw r11, r0; addi r11, r11, 4 } + { lw r0, r10; addi r10, r10, 4 } + { sw r11, r0; addi r11, r11, 4 } + { lw r0, r10; addi r10, r10, 4 } + { sw r11, r0 } + { flush r11 ; addi r11, r11, 4 } + + seq r0, r33, r11 + bzt r0, 1b + +#ifdef RELOCATE_NEW_KERNEL_VERBOSE + moveli r0, 's' + jalr r40 +#endif + + addi r30, r30, 4 + j .Lloop + + +.Lerr: moveli r0, 'e' + jalr r40 + moveli r0, 'r' + jalr r40 + moveli r0, 'r' + jalr r40 + moveli r0, '\n' + jalr r40 +.Lhalt: + moveli r41, lo16(___hv_halt) + auli r41, r41, ha16(___hv_halt) + + jalr r41 + STD_ENDPROC(relocate_new_kernel) + + .section .rodata,"a" + + .globl relocate_new_kernel_size +relocate_new_kernel_size: + .long .Lend_relocate_new_kernel - relocate_new_kernel diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c new file mode 100644 index 00000000..bff23f47 --- /dev/null +++ b/arch/tile/kernel/setup.c @@ -0,0 +1,1532 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/mmzone.h> +#include <linux/bootmem.h> +#include <linux/module.h> +#include <linux/node.h> +#include <linux/cpu.h> +#include <linux/ioport.h> +#include <linux/irq.h> +#include <linux/kexec.h> +#include <linux/pci.h> +#include <linux/initrd.h> +#include <linux/io.h> +#include <linux/highmem.h> +#include <linux/smp.h> +#include <linux/timex.h> +#include <asm/setup.h> +#include <asm/sections.h> +#include <asm/cacheflush.h> +#include <asm/pgalloc.h> +#include <asm/mmu_context.h> +#include <hv/hypervisor.h> +#include <arch/interrupts.h> + +/* <linux/smp.h> doesn't provide this definition. */ +#ifndef CONFIG_SMP +#define setup_max_cpus 1 +#endif + +static inline int ABS(int x) { return x >= 0 ? x : -x; } + +/* Chip information */ +char chip_model[64] __write_once; + +struct pglist_data node_data[MAX_NUMNODES] __read_mostly; +EXPORT_SYMBOL(node_data); + +/* We only create bootmem data on node 0. */ +static bootmem_data_t __initdata node0_bdata; + +/* Information on the NUMA nodes that we compute early */ +unsigned long __cpuinitdata node_start_pfn[MAX_NUMNODES]; +unsigned long __cpuinitdata node_end_pfn[MAX_NUMNODES]; +unsigned long __initdata node_memmap_pfn[MAX_NUMNODES]; +unsigned long __initdata node_percpu_pfn[MAX_NUMNODES]; +unsigned long __initdata node_free_pfn[MAX_NUMNODES]; + +static unsigned long __initdata node_percpu[MAX_NUMNODES]; + +#ifdef CONFIG_HIGHMEM +/* Page frame index of end of lowmem on each controller. */ +unsigned long __cpuinitdata node_lowmem_end_pfn[MAX_NUMNODES]; + +/* Number of pages that can be mapped into lowmem. */ +static unsigned long __initdata mappable_physpages; +#endif + +/* Data on which physical memory controller corresponds to which NUMA node */ +int node_controller[MAX_NUMNODES] = { [0 ... MAX_NUMNODES-1] = -1 }; + +#ifdef CONFIG_HIGHMEM +/* Map information from VAs to PAs */ +unsigned long pbase_map[1 << (32 - HPAGE_SHIFT)] + __write_once __attribute__((aligned(L2_CACHE_BYTES))); +EXPORT_SYMBOL(pbase_map); + +/* Map information from PAs to VAs */ +void *vbase_map[NR_PA_HIGHBIT_VALUES] + __write_once __attribute__((aligned(L2_CACHE_BYTES))); +EXPORT_SYMBOL(vbase_map); +#endif + +/* Node number as a function of the high PA bits */ +int highbits_to_node[NR_PA_HIGHBIT_VALUES] __write_once; +EXPORT_SYMBOL(highbits_to_node); + +static unsigned int __initdata maxmem_pfn = -1U; +static unsigned int __initdata maxnodemem_pfn[MAX_NUMNODES] = { + [0 ... MAX_NUMNODES-1] = -1U +}; +static nodemask_t __initdata isolnodes; + +#ifdef CONFIG_PCI +enum { DEFAULT_PCI_RESERVE_MB = 64 }; +static unsigned int __initdata pci_reserve_mb = DEFAULT_PCI_RESERVE_MB; +unsigned long __initdata pci_reserve_start_pfn = -1U; +unsigned long __initdata pci_reserve_end_pfn = -1U; +#endif + +static int __init setup_maxmem(char *str) +{ + unsigned long long maxmem; + if (str == NULL || (maxmem = memparse(str, NULL)) == 0) + return -EINVAL; + + maxmem_pfn = (maxmem >> HPAGE_SHIFT) << (HPAGE_SHIFT - PAGE_SHIFT); + pr_info("Forcing RAM used to no more than %dMB\n", + maxmem_pfn >> (20 - PAGE_SHIFT)); + return 0; +} +early_param("maxmem", setup_maxmem); + +static int __init setup_maxnodemem(char *str) +{ + char *endp; + unsigned long long maxnodemem; + long node; + + node = str ? simple_strtoul(str, &endp, 0) : INT_MAX; + if (node >= MAX_NUMNODES || *endp != ':') + return -EINVAL; + + maxnodemem = memparse(endp+1, NULL); + maxnodemem_pfn[node] = (maxnodemem >> HPAGE_SHIFT) << + (HPAGE_SHIFT - PAGE_SHIFT); + pr_info("Forcing RAM used on node %ld to no more than %dMB\n", + node, maxnodemem_pfn[node] >> (20 - PAGE_SHIFT)); + return 0; +} +early_param("maxnodemem", setup_maxnodemem); + +static int __init setup_isolnodes(char *str) +{ + char buf[MAX_NUMNODES * 5]; + if (str == NULL || nodelist_parse(str, isolnodes) != 0) + return -EINVAL; + + nodelist_scnprintf(buf, sizeof(buf), isolnodes); + pr_info("Set isolnodes value to '%s'\n", buf); + return 0; +} +early_param("isolnodes", setup_isolnodes); + +#ifdef CONFIG_PCI +static int __init setup_pci_reserve(char* str) +{ + unsigned long mb; + + if (str == NULL || strict_strtoul(str, 0, &mb) != 0 || + mb > 3 * 1024) + return -EINVAL; + + pci_reserve_mb = mb; + pr_info("Reserving %dMB for PCIE root complex mappings\n", + pci_reserve_mb); + return 0; +} +early_param("pci_reserve", setup_pci_reserve); +#endif + +#ifndef __tilegx__ +/* + * vmalloc=size forces the vmalloc area to be exactly 'size' bytes. + * This can be used to increase (or decrease) the vmalloc area. + */ +static int __init parse_vmalloc(char *arg) +{ + if (!arg) + return -EINVAL; + + VMALLOC_RESERVE = (memparse(arg, &arg) + PGDIR_SIZE - 1) & PGDIR_MASK; + + /* See validate_va() for more on this test. */ + if ((long)_VMALLOC_START >= 0) + early_panic("\"vmalloc=%#lx\" value too large: maximum %#lx\n", + VMALLOC_RESERVE, _VMALLOC_END - 0x80000000UL); + + return 0; +} +early_param("vmalloc", parse_vmalloc); +#endif + +#ifdef CONFIG_HIGHMEM +/* + * Determine for each controller where its lowmem is mapped and how much of + * it is mapped there. On controller zero, the first few megabytes are + * already mapped in as code at MEM_SV_INTRPT, so in principle we could + * start our data mappings higher up, but for now we don't bother, to avoid + * additional confusion. + * + * One question is whether, on systems with more than 768 Mb and + * controllers of different sizes, to map in a proportionate amount of + * each one, or to try to map the same amount from each controller. + * (E.g. if we have three controllers with 256MB, 1GB, and 256MB + * respectively, do we map 256MB from each, or do we map 128 MB, 512 + * MB, and 128 MB respectively?) For now we use a proportionate + * solution like the latter. + * + * The VA/PA mapping demands that we align our decisions at 16 MB + * boundaries so that we can rapidly convert VA to PA. + */ +static void *__init setup_pa_va_mapping(void) +{ + unsigned long curr_pages = 0; + unsigned long vaddr = PAGE_OFFSET; + nodemask_t highonlynodes = isolnodes; + int i, j; + + memset(pbase_map, -1, sizeof(pbase_map)); + memset(vbase_map, -1, sizeof(vbase_map)); + + /* Node zero cannot be isolated for LOWMEM purposes. */ + node_clear(0, highonlynodes); + + /* Count up the number of pages on non-highonlynodes controllers. */ + mappable_physpages = 0; + for_each_online_node(i) { + if (!node_isset(i, highonlynodes)) + mappable_physpages += + node_end_pfn[i] - node_start_pfn[i]; + } + + for_each_online_node(i) { + unsigned long start = node_start_pfn[i]; + unsigned long end = node_end_pfn[i]; + unsigned long size = end - start; + unsigned long vaddr_end; + + if (node_isset(i, highonlynodes)) { + /* Mark this controller as having no lowmem. */ + node_lowmem_end_pfn[i] = start; + continue; + } + + curr_pages += size; + if (mappable_physpages > MAXMEM_PFN) { + vaddr_end = PAGE_OFFSET + + (((u64)curr_pages * MAXMEM_PFN / + mappable_physpages) + << PAGE_SHIFT); + } else { + vaddr_end = PAGE_OFFSET + (curr_pages << PAGE_SHIFT); + } + for (j = 0; vaddr < vaddr_end; vaddr += HPAGE_SIZE, ++j) { + unsigned long this_pfn = + start + (j << HUGETLB_PAGE_ORDER); + pbase_map[vaddr >> HPAGE_SHIFT] = this_pfn; + if (vbase_map[__pfn_to_highbits(this_pfn)] == + (void *)-1) + vbase_map[__pfn_to_highbits(this_pfn)] = + (void *)(vaddr & HPAGE_MASK); + } + node_lowmem_end_pfn[i] = start + (j << HUGETLB_PAGE_ORDER); + BUG_ON(node_lowmem_end_pfn[i] > end); + } + + /* Return highest address of any mapped memory. */ + return (void *)vaddr; +} +#endif /* CONFIG_HIGHMEM */ + +/* + * Register our most important memory mappings with the debug stub. + * + * This is up to 4 mappings for lowmem, one mapping per memory + * controller, plus one for our text segment. + */ +static void __cpuinit store_permanent_mappings(void) +{ + int i; + + for_each_online_node(i) { + HV_PhysAddr pa = ((HV_PhysAddr)node_start_pfn[i]) << PAGE_SHIFT; +#ifdef CONFIG_HIGHMEM + HV_PhysAddr high_mapped_pa = node_lowmem_end_pfn[i]; +#else + HV_PhysAddr high_mapped_pa = node_end_pfn[i]; +#endif + + unsigned long pages = high_mapped_pa - node_start_pfn[i]; + HV_VirtAddr addr = (HV_VirtAddr) __va(pa); + hv_store_mapping(addr, pages << PAGE_SHIFT, pa); + } + + hv_store_mapping((HV_VirtAddr)_stext, + (uint32_t)(_einittext - _stext), 0); +} + +/* + * Use hv_inquire_physical() to populate node_{start,end}_pfn[] + * and node_online_map, doing suitable sanity-checking. + * Also set min_low_pfn, max_low_pfn, and max_pfn. + */ +static void __init setup_memory(void) +{ + int i, j; + int highbits_seen[NR_PA_HIGHBIT_VALUES] = { 0 }; +#ifdef CONFIG_HIGHMEM + long highmem_pages; +#endif +#ifndef __tilegx__ + int cap; +#endif +#if defined(CONFIG_HIGHMEM) || defined(__tilegx__) + long lowmem_pages; +#endif + + /* We are using a char to hold the cpu_2_node[] mapping */ + BUILD_BUG_ON(MAX_NUMNODES > 127); + + /* Discover the ranges of memory available to us */ + for (i = 0; ; ++i) { + unsigned long start, size, end, highbits; + HV_PhysAddrRange range = hv_inquire_physical(i); + if (range.size == 0) + break; +#ifdef CONFIG_FLATMEM + if (i > 0) { + pr_err("Can't use discontiguous PAs: %#llx..%#llx\n", + range.size, range.start + range.size); + continue; + } +#endif +#ifndef __tilegx__ + if ((unsigned long)range.start) { + pr_err("Range not at 4GB multiple: %#llx..%#llx\n", + range.start, range.start + range.size); + continue; + } +#endif + if ((range.start & (HPAGE_SIZE-1)) != 0 || + (range.size & (HPAGE_SIZE-1)) != 0) { + unsigned long long start_pa = range.start; + unsigned long long orig_size = range.size; + range.start = (start_pa + HPAGE_SIZE - 1) & HPAGE_MASK; + range.size -= (range.start - start_pa); + range.size &= HPAGE_MASK; + pr_err("Range not hugepage-aligned: %#llx..%#llx:" + " now %#llx-%#llx\n", + start_pa, start_pa + orig_size, + range.start, range.start + range.size); + } + highbits = __pa_to_highbits(range.start); + if (highbits >= NR_PA_HIGHBIT_VALUES) { + pr_err("PA high bits too high: %#llx..%#llx\n", + range.start, range.start + range.size); + continue; + } + if (highbits_seen[highbits]) { + pr_err("Range overlaps in high bits: %#llx..%#llx\n", + range.start, range.start + range.size); + continue; + } + highbits_seen[highbits] = 1; + if (PFN_DOWN(range.size) > maxnodemem_pfn[i]) { + int max_size = maxnodemem_pfn[i]; + if (max_size > 0) { + pr_err("Maxnodemem reduced node %d to" + " %d pages\n", i, max_size); + range.size = PFN_PHYS(max_size); + } else { + pr_err("Maxnodemem disabled node %d\n", i); + continue; + } + } + if (num_physpages + PFN_DOWN(range.size) > maxmem_pfn) { + int max_size = maxmem_pfn - num_physpages; + if (max_size > 0) { + pr_err("Maxmem reduced node %d to %d pages\n", + i, max_size); + range.size = PFN_PHYS(max_size); + } else { + pr_err("Maxmem disabled node %d\n", i); + continue; + } + } + if (i >= MAX_NUMNODES) { + pr_err("Too many PA nodes (#%d): %#llx...%#llx\n", + i, range.size, range.size + range.start); + continue; + } + + start = range.start >> PAGE_SHIFT; + size = range.size >> PAGE_SHIFT; + end = start + size; + +#ifndef __tilegx__ + if (((HV_PhysAddr)end << PAGE_SHIFT) != + (range.start + range.size)) { + pr_err("PAs too high to represent: %#llx..%#llx\n", + range.start, range.start + range.size); + continue; + } +#endif +#ifdef CONFIG_PCI + /* + * Blocks that overlap the pci reserved region must + * have enough space to hold the maximum percpu data + * region at the top of the range. If there isn't + * enough space above the reserved region, just + * truncate the node. + */ + if (start <= pci_reserve_start_pfn && + end > pci_reserve_start_pfn) { + unsigned int per_cpu_size = + __per_cpu_end - __per_cpu_start; + unsigned int percpu_pages = + NR_CPUS * (PFN_UP(per_cpu_size) >> PAGE_SHIFT); + if (end < pci_reserve_end_pfn + percpu_pages) { + end = pci_reserve_start_pfn; + pr_err("PCI mapping region reduced node %d to" + " %ld pages\n", i, end - start); + } + } +#endif + + for (j = __pfn_to_highbits(start); + j <= __pfn_to_highbits(end - 1); j++) + highbits_to_node[j] = i; + + node_start_pfn[i] = start; + node_end_pfn[i] = end; + node_controller[i] = range.controller; + num_physpages += size; + max_pfn = end; + + /* Mark node as online */ + node_set(i, node_online_map); + node_set(i, node_possible_map); + } + +#ifndef __tilegx__ + /* + * For 4KB pages, mem_map "struct page" data is 1% of the size + * of the physical memory, so can be quite big (640 MB for + * four 16G zones). These structures must be mapped in + * lowmem, and since we currently cap out at about 768 MB, + * it's impractical to try to use this much address space. + * For now, arbitrarily cap the amount of physical memory + * we're willing to use at 8 million pages (32GB of 4KB pages). + */ + cap = 8 * 1024 * 1024; /* 8 million pages */ + if (num_physpages > cap) { + int num_nodes = num_online_nodes(); + int cap_each = cap / num_nodes; + unsigned long dropped_pages = 0; + for (i = 0; i < num_nodes; ++i) { + int size = node_end_pfn[i] - node_start_pfn[i]; + if (size > cap_each) { + dropped_pages += (size - cap_each); + node_end_pfn[i] = node_start_pfn[i] + cap_each; + } + } + num_physpages -= dropped_pages; + pr_warning("Only using %ldMB memory;" + " ignoring %ldMB.\n", + num_physpages >> (20 - PAGE_SHIFT), + dropped_pages >> (20 - PAGE_SHIFT)); + pr_warning("Consider using a larger page size.\n"); + } +#endif + + /* Heap starts just above the last loaded address. */ + min_low_pfn = PFN_UP((unsigned long)_end - PAGE_OFFSET); + +#ifdef CONFIG_HIGHMEM + /* Find where we map lowmem from each controller. */ + high_memory = setup_pa_va_mapping(); + + /* Set max_low_pfn based on what node 0 can directly address. */ + max_low_pfn = node_lowmem_end_pfn[0]; + + lowmem_pages = (mappable_physpages > MAXMEM_PFN) ? + MAXMEM_PFN : mappable_physpages; + highmem_pages = (long) (num_physpages - lowmem_pages); + + pr_notice("%ldMB HIGHMEM available.\n", + pages_to_mb(highmem_pages > 0 ? highmem_pages : 0)); + pr_notice("%ldMB LOWMEM available.\n", + pages_to_mb(lowmem_pages)); +#else + /* Set max_low_pfn based on what node 0 can directly address. */ + max_low_pfn = node_end_pfn[0]; + +#ifndef __tilegx__ + if (node_end_pfn[0] > MAXMEM_PFN) { + pr_warning("Only using %ldMB LOWMEM.\n", + MAXMEM>>20); + pr_warning("Use a HIGHMEM enabled kernel.\n"); + max_low_pfn = MAXMEM_PFN; + max_pfn = MAXMEM_PFN; + num_physpages = MAXMEM_PFN; + node_end_pfn[0] = MAXMEM_PFN; + } else { + pr_notice("%ldMB memory available.\n", + pages_to_mb(node_end_pfn[0])); + } + for (i = 1; i < MAX_NUMNODES; ++i) { + node_start_pfn[i] = 0; + node_end_pfn[i] = 0; + } + high_memory = __va(node_end_pfn[0]); +#else + lowmem_pages = 0; + for (i = 0; i < MAX_NUMNODES; ++i) { + int pages = node_end_pfn[i] - node_start_pfn[i]; + lowmem_pages += pages; + if (pages) + high_memory = pfn_to_kaddr(node_end_pfn[i]); + } + pr_notice("%ldMB memory available.\n", + pages_to_mb(lowmem_pages)); +#endif +#endif +} + +static void __init setup_bootmem_allocator(void) +{ + unsigned long bootmap_size, first_alloc_pfn, last_alloc_pfn; + + /* Provide a node 0 bdata. */ + NODE_DATA(0)->bdata = &node0_bdata; + +#ifdef CONFIG_PCI + /* Don't let boot memory alias the PCI region. */ + last_alloc_pfn = min(max_low_pfn, pci_reserve_start_pfn); +#else + last_alloc_pfn = max_low_pfn; +#endif + + /* + * Initialize the boot-time allocator (with low memory only): + * The first argument says where to put the bitmap, and the + * second says where the end of allocatable memory is. + */ + bootmap_size = init_bootmem(min_low_pfn, last_alloc_pfn); + + /* + * Let the bootmem allocator use all the space we've given it + * except for its own bitmap. + */ + first_alloc_pfn = min_low_pfn + PFN_UP(bootmap_size); + if (first_alloc_pfn >= last_alloc_pfn) + early_panic("Not enough memory on controller 0 for bootmem\n"); + + free_bootmem(PFN_PHYS(first_alloc_pfn), + PFN_PHYS(last_alloc_pfn - first_alloc_pfn)); + +#ifdef CONFIG_KEXEC + if (crashk_res.start != crashk_res.end) + reserve_bootmem(crashk_res.start, resource_size(&crashk_res), 0); +#endif +} + +void *__init alloc_remap(int nid, unsigned long size) +{ + int pages = node_end_pfn[nid] - node_start_pfn[nid]; + void *map = pfn_to_kaddr(node_memmap_pfn[nid]); + BUG_ON(size != pages * sizeof(struct page)); + memset(map, 0, size); + return map; +} + +static int __init percpu_size(void) +{ + int size = __per_cpu_end - __per_cpu_start; + size += PERCPU_MODULE_RESERVE; + size += PERCPU_DYNAMIC_EARLY_SIZE; + if (size < PCPU_MIN_UNIT_SIZE) + size = PCPU_MIN_UNIT_SIZE; + size = roundup(size, PAGE_SIZE); + + /* In several places we assume the per-cpu data fits on a huge page. */ + BUG_ON(kdata_huge && size > HPAGE_SIZE); + return size; +} + +static inline unsigned long alloc_bootmem_pfn(int size, unsigned long goal) +{ + void *kva = __alloc_bootmem(size, PAGE_SIZE, goal); + unsigned long pfn = kaddr_to_pfn(kva); + BUG_ON(goal && PFN_PHYS(pfn) != goal); + return pfn; +} + +static void __init zone_sizes_init(void) +{ + unsigned long zones_size[MAX_NR_ZONES] = { 0 }; + int size = percpu_size(); + int num_cpus = smp_height * smp_width; + int i; + + for (i = 0; i < num_cpus; ++i) + node_percpu[cpu_to_node(i)] += size; + + for_each_online_node(i) { + unsigned long start = node_start_pfn[i]; + unsigned long end = node_end_pfn[i]; +#ifdef CONFIG_HIGHMEM + unsigned long lowmem_end = node_lowmem_end_pfn[i]; +#else + unsigned long lowmem_end = end; +#endif + int memmap_size = (end - start) * sizeof(struct page); + node_free_pfn[i] = start; + + /* + * Set aside pages for per-cpu data and the mem_map array. + * + * Since the per-cpu data requires special homecaching, + * if we are in kdata_huge mode, we put it at the end of + * the lowmem region. If we're not in kdata_huge mode, + * we take the per-cpu pages from the bottom of the + * controller, since that avoids fragmenting a huge page + * that users might want. We always take the memmap + * from the bottom of the controller, since with + * kdata_huge that lets it be under a huge TLB entry. + * + * If the user has requested isolnodes for a controller, + * though, there'll be no lowmem, so we just alloc_bootmem + * the memmap. There will be no percpu memory either. + */ + if (__pfn_to_highbits(start) == 0) { + /* In low PAs, allocate via bootmem. */ + unsigned long goal = 0; + node_memmap_pfn[i] = + alloc_bootmem_pfn(memmap_size, goal); + if (kdata_huge) + goal = PFN_PHYS(lowmem_end) - node_percpu[i]; + if (node_percpu[i]) + node_percpu_pfn[i] = + alloc_bootmem_pfn(node_percpu[i], goal); + } else if (cpu_isset(i, isolnodes)) { + node_memmap_pfn[i] = alloc_bootmem_pfn(memmap_size, 0); + BUG_ON(node_percpu[i] != 0); + } else { + /* In high PAs, just reserve some pages. */ + node_memmap_pfn[i] = node_free_pfn[i]; + node_free_pfn[i] += PFN_UP(memmap_size); + if (!kdata_huge) { + node_percpu_pfn[i] = node_free_pfn[i]; + node_free_pfn[i] += PFN_UP(node_percpu[i]); + } else { + node_percpu_pfn[i] = + lowmem_end - PFN_UP(node_percpu[i]); + } + } + +#ifdef CONFIG_HIGHMEM + if (start > lowmem_end) { + zones_size[ZONE_NORMAL] = 0; + zones_size[ZONE_HIGHMEM] = end - start; + } else { + zones_size[ZONE_NORMAL] = lowmem_end - start; + zones_size[ZONE_HIGHMEM] = end - lowmem_end; + } +#else + zones_size[ZONE_NORMAL] = end - start; +#endif + + /* + * Everyone shares node 0's bootmem allocator, but + * we use alloc_remap(), above, to put the actual + * struct page array on the individual controllers, + * which is most of the data that we actually care about. + * We can't place bootmem allocators on the other + * controllers since the bootmem allocator can only + * operate on 32-bit physical addresses. + */ + NODE_DATA(i)->bdata = NODE_DATA(0)->bdata; + + free_area_init_node(i, zones_size, start, NULL); + printk(KERN_DEBUG " Normal zone: %ld per-cpu pages\n", + PFN_UP(node_percpu[i])); + + /* Track the type of memory on each node */ + if (zones_size[ZONE_NORMAL]) + node_set_state(i, N_NORMAL_MEMORY); +#ifdef CONFIG_HIGHMEM + if (end != start) + node_set_state(i, N_HIGH_MEMORY); +#endif + + node_set_online(i); + } +} + +#ifdef CONFIG_NUMA + +/* which logical CPUs are on which nodes */ +struct cpumask node_2_cpu_mask[MAX_NUMNODES] __write_once; +EXPORT_SYMBOL(node_2_cpu_mask); + +/* which node each logical CPU is on */ +char cpu_2_node[NR_CPUS] __write_once __attribute__((aligned(L2_CACHE_BYTES))); +EXPORT_SYMBOL(cpu_2_node); + +/* Return cpu_to_node() except for cpus not yet assigned, which return -1 */ +static int __init cpu_to_bound_node(int cpu, struct cpumask* unbound_cpus) +{ + if (!cpu_possible(cpu) || cpumask_test_cpu(cpu, unbound_cpus)) + return -1; + else + return cpu_to_node(cpu); +} + +/* Return number of immediately-adjacent tiles sharing the same NUMA node. */ +static int __init node_neighbors(int node, int cpu, + struct cpumask *unbound_cpus) +{ + int neighbors = 0; + int w = smp_width; + int h = smp_height; + int x = cpu % w; + int y = cpu / w; + if (x > 0 && cpu_to_bound_node(cpu-1, unbound_cpus) == node) + ++neighbors; + if (x < w-1 && cpu_to_bound_node(cpu+1, unbound_cpus) == node) + ++neighbors; + if (y > 0 && cpu_to_bound_node(cpu-w, unbound_cpus) == node) + ++neighbors; + if (y < h-1 && cpu_to_bound_node(cpu+w, unbound_cpus) == node) + ++neighbors; + return neighbors; +} + +static void __init setup_numa_mapping(void) +{ + int distance[MAX_NUMNODES][NR_CPUS]; + HV_Coord coord; + int cpu, node, cpus, i, x, y; + int num_nodes = num_online_nodes(); + struct cpumask unbound_cpus; + nodemask_t default_nodes; + + cpumask_clear(&unbound_cpus); + + /* Get set of nodes we will use for defaults */ + nodes_andnot(default_nodes, node_online_map, isolnodes); + if (nodes_empty(default_nodes)) { + BUG_ON(!node_isset(0, node_online_map)); + pr_err("Forcing NUMA node zero available as a default node\n"); + node_set(0, default_nodes); + } + + /* Populate the distance[] array */ + memset(distance, -1, sizeof(distance)); + cpu = 0; + for (coord.y = 0; coord.y < smp_height; ++coord.y) { + for (coord.x = 0; coord.x < smp_width; + ++coord.x, ++cpu) { + BUG_ON(cpu >= nr_cpu_ids); + if (!cpu_possible(cpu)) { + cpu_2_node[cpu] = -1; + continue; + } + for_each_node_mask(node, default_nodes) { + HV_MemoryControllerInfo info = + hv_inquire_memory_controller( + coord, node_controller[node]); + distance[node][cpu] = + ABS(info.coord.x) + ABS(info.coord.y); + } + cpumask_set_cpu(cpu, &unbound_cpus); + } + } + cpus = cpu; + + /* + * Round-robin through the NUMA nodes until all the cpus are + * assigned. We could be more clever here (e.g. create four + * sorted linked lists on the same set of cpu nodes, and pull + * off them in round-robin sequence, removing from all four + * lists each time) but given the relatively small numbers + * involved, O(n^2) seem OK for a one-time cost. + */ + node = first_node(default_nodes); + while (!cpumask_empty(&unbound_cpus)) { + int best_cpu = -1; + int best_distance = INT_MAX; + for (cpu = 0; cpu < cpus; ++cpu) { + if (cpumask_test_cpu(cpu, &unbound_cpus)) { + /* + * Compute metric, which is how much + * closer the cpu is to this memory + * controller than the others, shifted + * up, and then the number of + * neighbors already in the node as an + * epsilon adjustment to try to keep + * the nodes compact. + */ + int d = distance[node][cpu] * num_nodes; + for_each_node_mask(i, default_nodes) { + if (i != node) + d -= distance[i][cpu]; + } + d *= 8; /* allow space for epsilon */ + d -= node_neighbors(node, cpu, &unbound_cpus); + if (d < best_distance) { + best_cpu = cpu; + best_distance = d; + } + } + } + BUG_ON(best_cpu < 0); + cpumask_set_cpu(best_cpu, &node_2_cpu_mask[node]); + cpu_2_node[best_cpu] = node; + cpumask_clear_cpu(best_cpu, &unbound_cpus); + node = next_node(node, default_nodes); + if (node == MAX_NUMNODES) + node = first_node(default_nodes); + } + + /* Print out node assignments and set defaults for disabled cpus */ + cpu = 0; + for (y = 0; y < smp_height; ++y) { + printk(KERN_DEBUG "NUMA cpu-to-node row %d:", y); + for (x = 0; x < smp_width; ++x, ++cpu) { + if (cpu_to_node(cpu) < 0) { + pr_cont(" -"); + cpu_2_node[cpu] = first_node(default_nodes); + } else { + pr_cont(" %d", cpu_to_node(cpu)); + } + } + pr_cont("\n"); + } +} + +static struct cpu cpu_devices[NR_CPUS]; + +static int __init topology_init(void) +{ + int i; + + for_each_online_node(i) + register_one_node(i); + + for (i = 0; i < smp_height * smp_width; ++i) + register_cpu(&cpu_devices[i], i); + + return 0; +} + +subsys_initcall(topology_init); + +#else /* !CONFIG_NUMA */ + +#define setup_numa_mapping() do { } while (0) + +#endif /* CONFIG_NUMA */ + +/** + * setup_cpu() - Do all necessary per-cpu, tile-specific initialization. + * @boot: Is this the boot cpu? + * + * Called from setup_arch() on the boot cpu, or online_secondary(). + */ +void __cpuinit setup_cpu(int boot) +{ + /* The boot cpu sets up its permanent mappings much earlier. */ + if (!boot) + store_permanent_mappings(); + + /* Allow asynchronous TLB interrupts. */ +#if CHIP_HAS_TILE_DMA() + arch_local_irq_unmask(INT_DMATLB_MISS); + arch_local_irq_unmask(INT_DMATLB_ACCESS); +#endif +#if CHIP_HAS_SN_PROC() + arch_local_irq_unmask(INT_SNITLB_MISS); +#endif +#ifdef __tilegx__ + arch_local_irq_unmask(INT_SINGLE_STEP_K); +#endif + + /* + * Allow user access to many generic SPRs, like the cycle + * counter, PASS/FAIL/DONE, INTERRUPT_CRITICAL_SECTION, etc. + */ + __insn_mtspr(SPR_MPL_WORLD_ACCESS_SET_0, 1); + +#if CHIP_HAS_SN() + /* Static network is not restricted. */ + __insn_mtspr(SPR_MPL_SN_ACCESS_SET_0, 1); +#endif +#if CHIP_HAS_SN_PROC() + __insn_mtspr(SPR_MPL_SN_NOTIFY_SET_0, 1); + __insn_mtspr(SPR_MPL_SN_CPL_SET_0, 1); +#endif + + /* + * Set the MPL for interrupt control 0 & 1 to the corresponding + * values. This includes access to the SYSTEM_SAVE and EX_CONTEXT + * SPRs, as well as the interrupt mask. + */ + __insn_mtspr(SPR_MPL_INTCTRL_0_SET_0, 1); + __insn_mtspr(SPR_MPL_INTCTRL_1_SET_1, 1); + + /* Initialize IRQ support for this cpu. */ + setup_irq_regs(); + +#ifdef CONFIG_HARDWALL + /* Reset the network state on this cpu. */ + reset_network_state(); +#endif +} + +#ifdef CONFIG_BLK_DEV_INITRD + +/* + * Note that the kernel can potentially support other compression + * techniques than gz, though we don't do so by default. If we ever + * decide to do so we can either look for other filename extensions, + * or just allow a file with this name to be compressed with an + * arbitrary compressor (somewhat counterintuitively). + */ +static int __initdata set_initramfs_file; +static char __initdata initramfs_file[128] = "initramfs.cpio.gz"; + +static int __init setup_initramfs_file(char *str) +{ + if (str == NULL) + return -EINVAL; + strncpy(initramfs_file, str, sizeof(initramfs_file) - 1); + set_initramfs_file = 1; + + return 0; +} +early_param("initramfs_file", setup_initramfs_file); + +/* + * We look for an "initramfs.cpio.gz" file in the hvfs. + * If there is one, we allocate some memory for it and it will be + * unpacked to the initramfs. + */ +static void __init load_hv_initrd(void) +{ + HV_FS_StatInfo stat; + int fd, rc; + void *initrd; + + fd = hv_fs_findfile((HV_VirtAddr) initramfs_file); + if (fd == HV_ENOENT) { + if (set_initramfs_file) + pr_warning("No such hvfs initramfs file '%s'\n", + initramfs_file); + return; + } + BUG_ON(fd < 0); + stat = hv_fs_fstat(fd); + BUG_ON(stat.size < 0); + if (stat.flags & HV_FS_ISDIR) { + pr_warning("Ignoring hvfs file '%s': it's a directory.\n", + initramfs_file); + return; + } + initrd = alloc_bootmem_pages(stat.size); + rc = hv_fs_pread(fd, (HV_VirtAddr) initrd, stat.size, 0); + if (rc != stat.size) { + pr_err("Error reading %d bytes from hvfs file '%s': %d\n", + stat.size, initramfs_file, rc); + free_initrd_mem((unsigned long) initrd, stat.size); + return; + } + initrd_start = (unsigned long) initrd; + initrd_end = initrd_start + stat.size; +} + +void __init free_initrd_mem(unsigned long begin, unsigned long end) +{ + free_bootmem(__pa(begin), end - begin); +} + +#else +static inline void load_hv_initrd(void) {} +#endif /* CONFIG_BLK_DEV_INITRD */ + +static void __init validate_hv(void) +{ + /* + * It may already be too late, but let's check our built-in + * configuration against what the hypervisor is providing. + */ + unsigned long glue_size = hv_sysconf(HV_SYSCONF_GLUE_SIZE); + int hv_page_size = hv_sysconf(HV_SYSCONF_PAGE_SIZE_SMALL); + int hv_hpage_size = hv_sysconf(HV_SYSCONF_PAGE_SIZE_LARGE); + HV_ASIDRange asid_range; + +#ifndef CONFIG_SMP + HV_Topology topology = hv_inquire_topology(); + BUG_ON(topology.coord.x != 0 || topology.coord.y != 0); + if (topology.width != 1 || topology.height != 1) { + pr_warning("Warning: booting UP kernel on %dx%d grid;" + " will ignore all but first tile.\n", + topology.width, topology.height); + } +#endif + + if (PAGE_OFFSET + HV_GLUE_START_CPA + glue_size > (unsigned long)_text) + early_panic("Hypervisor glue size %ld is too big!\n", + glue_size); + if (hv_page_size != PAGE_SIZE) + early_panic("Hypervisor page size %#x != our %#lx\n", + hv_page_size, PAGE_SIZE); + if (hv_hpage_size != HPAGE_SIZE) + early_panic("Hypervisor huge page size %#x != our %#lx\n", + hv_hpage_size, HPAGE_SIZE); + +#ifdef CONFIG_SMP + /* + * Some hypervisor APIs take a pointer to a bitmap array + * whose size is at least the number of cpus on the chip. + * We use a struct cpumask for this, so it must be big enough. + */ + if ((smp_height * smp_width) > nr_cpu_ids) + early_panic("Hypervisor %d x %d grid too big for Linux" + " NR_CPUS %d\n", smp_height, smp_width, + nr_cpu_ids); +#endif + + /* + * Check that we're using allowed ASIDs, and initialize the + * various asid variables to their appropriate initial states. + */ + asid_range = hv_inquire_asid(0); + __get_cpu_var(current_asid) = min_asid = asid_range.start; + max_asid = asid_range.start + asid_range.size - 1; + + if (hv_confstr(HV_CONFSTR_CHIP_MODEL, (HV_VirtAddr)chip_model, + sizeof(chip_model)) < 0) { + pr_err("Warning: HV_CONFSTR_CHIP_MODEL not available\n"); + strlcpy(chip_model, "unknown", sizeof(chip_model)); + } +} + +static void __init validate_va(void) +{ +#ifndef __tilegx__ /* FIXME: GX: probably some validation relevant here */ + /* + * Similarly, make sure we're only using allowed VAs. + * We assume we can contiguously use MEM_USER_INTRPT .. MEM_HV_INTRPT, + * and 0 .. KERNEL_HIGH_VADDR. + * In addition, make sure we CAN'T use the end of memory, since + * we use the last chunk of each pgd for the pgd_list. + */ + int i, user_kernel_ok = 0; + unsigned long max_va = 0; + unsigned long list_va = + ((PGD_LIST_OFFSET / sizeof(pgd_t)) << PGDIR_SHIFT); + + for (i = 0; ; ++i) { + HV_VirtAddrRange range = hv_inquire_virtual(i); + if (range.size == 0) + break; + if (range.start <= MEM_USER_INTRPT && + range.start + range.size >= MEM_HV_INTRPT) + user_kernel_ok = 1; + if (range.start == 0) + max_va = range.size; + BUG_ON(range.start + range.size > list_va); + } + if (!user_kernel_ok) + early_panic("Hypervisor not configured for user/kernel VAs\n"); + if (max_va == 0) + early_panic("Hypervisor not configured for low VAs\n"); + if (max_va < KERNEL_HIGH_VADDR) + early_panic("Hypervisor max VA %#lx smaller than %#lx\n", + max_va, KERNEL_HIGH_VADDR); + + /* Kernel PCs must have their high bit set; see intvec.S. */ + if ((long)VMALLOC_START >= 0) + early_panic( + "Linux VMALLOC region below the 2GB line (%#lx)!\n" + "Reconfigure the kernel with fewer NR_HUGE_VMAPS\n" + "or smaller VMALLOC_RESERVE.\n", + VMALLOC_START); +#endif +} + +/* + * cpu_lotar_map lists all the cpus that are valid for the supervisor + * to cache data on at a page level, i.e. what cpus can be placed in + * the LOTAR field of a PTE. It is equivalent to the set of possible + * cpus plus any other cpus that are willing to share their cache. + * It is set by hv_inquire_tiles(HV_INQ_TILES_LOTAR). + */ +struct cpumask __write_once cpu_lotar_map; +EXPORT_SYMBOL(cpu_lotar_map); + +#if CHIP_HAS_CBOX_HOME_MAP() +/* + * hash_for_home_map lists all the tiles that hash-for-home data + * will be cached on. Note that this may includes tiles that are not + * valid for this supervisor to use otherwise (e.g. if a hypervisor + * device is being shared between multiple supervisors). + * It is set by hv_inquire_tiles(HV_INQ_TILES_HFH_CACHE). + */ +struct cpumask hash_for_home_map; +EXPORT_SYMBOL(hash_for_home_map); +#endif + +/* + * cpu_cacheable_map lists all the cpus whose caches the hypervisor can + * flush on our behalf. It is set to cpu_possible_mask OR'ed with + * hash_for_home_map, and it is what should be passed to + * hv_flush_remote() to flush all caches. Note that if there are + * dedicated hypervisor driver tiles that have authorized use of their + * cache, those tiles will only appear in cpu_lotar_map, NOT in + * cpu_cacheable_map, as they are a special case. + */ +struct cpumask __write_once cpu_cacheable_map; +EXPORT_SYMBOL(cpu_cacheable_map); + +static __initdata struct cpumask disabled_map; + +static int __init disabled_cpus(char *str) +{ + int boot_cpu = smp_processor_id(); + + if (str == NULL || cpulist_parse_crop(str, &disabled_map) != 0) + return -EINVAL; + if (cpumask_test_cpu(boot_cpu, &disabled_map)) { + pr_err("disabled_cpus: can't disable boot cpu %d\n", boot_cpu); + cpumask_clear_cpu(boot_cpu, &disabled_map); + } + return 0; +} + +early_param("disabled_cpus", disabled_cpus); + +void __init print_disabled_cpus(void) +{ + if (!cpumask_empty(&disabled_map)) { + char buf[100]; + cpulist_scnprintf(buf, sizeof(buf), &disabled_map); + pr_info("CPUs not available for Linux: %s\n", buf); + } +} + +static void __init setup_cpu_maps(void) +{ + struct cpumask hv_disabled_map, cpu_possible_init; + int boot_cpu = smp_processor_id(); + int cpus, i, rc; + + /* Learn which cpus are allowed by the hypervisor. */ + rc = hv_inquire_tiles(HV_INQ_TILES_AVAIL, + (HV_VirtAddr) cpumask_bits(&cpu_possible_init), + sizeof(cpu_cacheable_map)); + if (rc < 0) + early_panic("hv_inquire_tiles(AVAIL) failed: rc %d\n", rc); + if (!cpumask_test_cpu(boot_cpu, &cpu_possible_init)) + early_panic("Boot CPU %d disabled by hypervisor!\n", boot_cpu); + + /* Compute the cpus disabled by the hvconfig file. */ + cpumask_complement(&hv_disabled_map, &cpu_possible_init); + + /* Include them with the cpus disabled by "disabled_cpus". */ + cpumask_or(&disabled_map, &disabled_map, &hv_disabled_map); + + /* + * Disable every cpu after "setup_max_cpus". But don't mark + * as disabled the cpus that are outside of our initial rectangle, + * since that turns out to be confusing. + */ + cpus = 1; /* this cpu */ + cpumask_set_cpu(boot_cpu, &disabled_map); /* ignore this cpu */ + for (i = 0; cpus < setup_max_cpus; ++i) + if (!cpumask_test_cpu(i, &disabled_map)) + ++cpus; + for (; i < smp_height * smp_width; ++i) + cpumask_set_cpu(i, &disabled_map); + cpumask_clear_cpu(boot_cpu, &disabled_map); /* reset this cpu */ + for (i = smp_height * smp_width; i < NR_CPUS; ++i) + cpumask_clear_cpu(i, &disabled_map); + + /* + * Setup cpu_possible map as every cpu allocated to us, minus + * the results of any "disabled_cpus" settings. + */ + cpumask_andnot(&cpu_possible_init, &cpu_possible_init, &disabled_map); + init_cpu_possible(&cpu_possible_init); + + /* Learn which cpus are valid for LOTAR caching. */ + rc = hv_inquire_tiles(HV_INQ_TILES_LOTAR, + (HV_VirtAddr) cpumask_bits(&cpu_lotar_map), + sizeof(cpu_lotar_map)); + if (rc < 0) { + pr_err("warning: no HV_INQ_TILES_LOTAR; using AVAIL\n"); + cpu_lotar_map = *cpu_possible_mask; + } + +#if CHIP_HAS_CBOX_HOME_MAP() + /* Retrieve set of CPUs used for hash-for-home caching */ + rc = hv_inquire_tiles(HV_INQ_TILES_HFH_CACHE, + (HV_VirtAddr) hash_for_home_map.bits, + sizeof(hash_for_home_map)); + if (rc < 0) + early_panic("hv_inquire_tiles(HFH_CACHE) failed: rc %d\n", rc); + cpumask_or(&cpu_cacheable_map, cpu_possible_mask, &hash_for_home_map); +#else + cpu_cacheable_map = *cpu_possible_mask; +#endif +} + + +static int __init dataplane(char *str) +{ + pr_warning("WARNING: dataplane support disabled in this kernel\n"); + return 0; +} + +early_param("dataplane", dataplane); + +#ifdef CONFIG_CMDLINE_BOOL +static char __initdata builtin_cmdline[COMMAND_LINE_SIZE] = CONFIG_CMDLINE; +#endif + +void __init setup_arch(char **cmdline_p) +{ + int len; + +#if defined(CONFIG_CMDLINE_BOOL) && defined(CONFIG_CMDLINE_OVERRIDE) + len = hv_get_command_line((HV_VirtAddr) boot_command_line, + COMMAND_LINE_SIZE); + if (boot_command_line[0]) + pr_warning("WARNING: ignoring dynamic command line \"%s\"\n", + boot_command_line); + strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE); +#else + char *hv_cmdline; +#if defined(CONFIG_CMDLINE_BOOL) + if (builtin_cmdline[0]) { + int builtin_len = strlcpy(boot_command_line, builtin_cmdline, + COMMAND_LINE_SIZE); + if (builtin_len < COMMAND_LINE_SIZE-1) + boot_command_line[builtin_len++] = ' '; + hv_cmdline = &boot_command_line[builtin_len]; + len = COMMAND_LINE_SIZE - builtin_len; + } else +#endif + { + hv_cmdline = boot_command_line; + len = COMMAND_LINE_SIZE; + } + len = hv_get_command_line((HV_VirtAddr) hv_cmdline, len); + if (len < 0 || len > COMMAND_LINE_SIZE) + early_panic("hv_get_command_line failed: %d\n", len); +#endif + + *cmdline_p = boot_command_line; + + /* Set disabled_map and setup_max_cpus very early */ + parse_early_param(); + + /* Make sure the kernel is compatible with the hypervisor. */ + validate_hv(); + validate_va(); + + setup_cpu_maps(); + + +#ifdef CONFIG_PCI + /* + * Initialize the PCI structures. This is done before memory + * setup so that we know whether or not a pci_reserve region + * is necessary. + */ + if (tile_pci_init() == 0) + pci_reserve_mb = 0; + + /* PCI systems reserve a region just below 4GB for mapping iomem. */ + pci_reserve_end_pfn = (1 << (32 - PAGE_SHIFT)); + pci_reserve_start_pfn = pci_reserve_end_pfn - + (pci_reserve_mb << (20 - PAGE_SHIFT)); +#endif + + init_mm.start_code = (unsigned long) _text; + init_mm.end_code = (unsigned long) _etext; + init_mm.end_data = (unsigned long) _edata; + init_mm.brk = (unsigned long) _end; + + setup_memory(); + store_permanent_mappings(); + setup_bootmem_allocator(); + + /* + * NOTE: before this point _nobody_ is allowed to allocate + * any memory using the bootmem allocator. + */ + + paging_init(); + setup_numa_mapping(); + zone_sizes_init(); + set_page_homes(); + setup_cpu(1); + setup_clock(); + load_hv_initrd(); +} + + +/* + * Set up per-cpu memory. + */ + +unsigned long __per_cpu_offset[NR_CPUS] __write_once; +EXPORT_SYMBOL(__per_cpu_offset); + +static size_t __initdata pfn_offset[MAX_NUMNODES] = { 0 }; +static unsigned long __initdata percpu_pfn[NR_CPUS] = { 0 }; + +/* + * As the percpu code allocates pages, we return the pages from the + * end of the node for the specified cpu. + */ +static void *__init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align) +{ + int nid = cpu_to_node(cpu); + unsigned long pfn = node_percpu_pfn[nid] + pfn_offset[nid]; + + BUG_ON(size % PAGE_SIZE != 0); + pfn_offset[nid] += size / PAGE_SIZE; + BUG_ON(node_percpu[nid] < size); + node_percpu[nid] -= size; + if (percpu_pfn[cpu] == 0) + percpu_pfn[cpu] = pfn; + return pfn_to_kaddr(pfn); +} + +/* + * Pages reserved for percpu memory are not freeable, and in any case we are + * on a short path to panic() in setup_per_cpu_area() at this point anyway. + */ +static void __init pcpu_fc_free(void *ptr, size_t size) +{ +} + +/* + * Set up vmalloc page tables using bootmem for the percpu code. + */ +static void __init pcpu_fc_populate_pte(unsigned long addr) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + + BUG_ON(pgd_addr_invalid(addr)); + if (addr < VMALLOC_START || addr >= VMALLOC_END) + panic("PCPU addr %#lx outside vmalloc range %#lx..%#lx;" + " try increasing CONFIG_VMALLOC_RESERVE\n", + addr, VMALLOC_START, VMALLOC_END); + + pgd = swapper_pg_dir + pgd_index(addr); + pud = pud_offset(pgd, addr); + BUG_ON(!pud_present(*pud)); + pmd = pmd_offset(pud, addr); + if (pmd_present(*pmd)) { + BUG_ON(pmd_huge_page(*pmd)); + } else { + pte = __alloc_bootmem(L2_KERNEL_PGTABLE_SIZE, + HV_PAGE_TABLE_ALIGN, 0); + pmd_populate_kernel(&init_mm, pmd, pte); + } +} + +void __init setup_per_cpu_areas(void) +{ + struct page *pg; + unsigned long delta, pfn, lowmem_va; + unsigned long size = percpu_size(); + char *ptr; + int rc, cpu, i; + + rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE, pcpu_fc_alloc, + pcpu_fc_free, pcpu_fc_populate_pte); + if (rc < 0) + panic("Cannot initialize percpu area (err=%d)", rc); + + delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start; + for_each_possible_cpu(cpu) { + __per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu]; + + /* finv the copy out of cache so we can change homecache */ + ptr = pcpu_base_addr + pcpu_unit_offsets[cpu]; + __finv_buffer(ptr, size); + pfn = percpu_pfn[cpu]; + + /* Rewrite the page tables to cache on that cpu */ + pg = pfn_to_page(pfn); + for (i = 0; i < size; i += PAGE_SIZE, ++pfn, ++pg) { + + /* Update the vmalloc mapping and page home. */ + pte_t *ptep = + virt_to_pte(NULL, (unsigned long)ptr + i); + pte_t pte = *ptep; + BUG_ON(pfn != pte_pfn(pte)); + pte = hv_pte_set_mode(pte, HV_PTE_MODE_CACHE_TILE_L3); + pte = set_remote_cache_cpu(pte, cpu); + set_pte(ptep, pte); + + /* Update the lowmem mapping for consistency. */ + lowmem_va = (unsigned long)pfn_to_kaddr(pfn); + ptep = virt_to_pte(NULL, lowmem_va); + if (pte_huge(*ptep)) { + printk(KERN_DEBUG "early shatter of huge page" + " at %#lx\n", lowmem_va); + shatter_pmd((pmd_t *)ptep); + ptep = virt_to_pte(NULL, lowmem_va); + BUG_ON(pte_huge(*ptep)); + } + BUG_ON(pfn != pte_pfn(*ptep)); + set_pte(ptep, pte); + } + } + + /* Set our thread pointer appropriately. */ + set_my_cpu_offset(__per_cpu_offset[smp_processor_id()]); + + /* Make sure the finv's have completed. */ + mb_incoherent(); + + /* Flush the TLB so we reference it properly from here on out. */ + local_flush_tlb_all(); +} + +static struct resource data_resource = { + .name = "Kernel data", + .start = 0, + .end = 0, + .flags = IORESOURCE_BUSY | IORESOURCE_MEM +}; + +static struct resource code_resource = { + .name = "Kernel code", + .start = 0, + .end = 0, + .flags = IORESOURCE_BUSY | IORESOURCE_MEM +}; + +/* + * We reserve all resources above 4GB so that PCI won't try to put + * mappings above 4GB; the standard allows that for some devices but + * the probing code trunates values to 32 bits. + */ +#ifdef CONFIG_PCI +static struct resource* __init +insert_non_bus_resource(void) +{ + struct resource *res = + kzalloc(sizeof(struct resource), GFP_ATOMIC); + res->name = "Non-Bus Physical Address Space"; + res->start = (1ULL << 32); + res->end = -1LL; + res->flags = IORESOURCE_BUSY | IORESOURCE_MEM; + if (insert_resource(&iomem_resource, res)) { + kfree(res); + return NULL; + } + return res; +} +#endif + +static struct resource* __init +insert_ram_resource(u64 start_pfn, u64 end_pfn) +{ + struct resource *res = + kzalloc(sizeof(struct resource), GFP_ATOMIC); + res->name = "System RAM"; + res->start = start_pfn << PAGE_SHIFT; + res->end = (end_pfn << PAGE_SHIFT) - 1; + res->flags = IORESOURCE_BUSY | IORESOURCE_MEM; + if (insert_resource(&iomem_resource, res)) { + kfree(res); + return NULL; + } + return res; +} + +/* + * Request address space for all standard resources + * + * If the system includes PCI root complex drivers, we need to create + * a window just below 4GB where PCI BARs can be mapped. + */ +static int __init request_standard_resources(void) +{ + int i; + enum { CODE_DELTA = MEM_SV_INTRPT - PAGE_OFFSET }; + + iomem_resource.end = -1LL; +#ifdef CONFIG_PCI + insert_non_bus_resource(); +#endif + + for_each_online_node(i) { + u64 start_pfn = node_start_pfn[i]; + u64 end_pfn = node_end_pfn[i]; + +#ifdef CONFIG_PCI + if (start_pfn <= pci_reserve_start_pfn && + end_pfn > pci_reserve_start_pfn) { + if (end_pfn > pci_reserve_end_pfn) + insert_ram_resource(pci_reserve_end_pfn, + end_pfn); + end_pfn = pci_reserve_start_pfn; + } +#endif + insert_ram_resource(start_pfn, end_pfn); + } + + code_resource.start = __pa(_text - CODE_DELTA); + code_resource.end = __pa(_etext - CODE_DELTA)-1; + data_resource.start = __pa(_sdata); + data_resource.end = __pa(_end)-1; + + insert_resource(&iomem_resource, &code_resource); + insert_resource(&iomem_resource, &data_resource); + +#ifdef CONFIG_KEXEC + insert_resource(&iomem_resource, &crashk_res); +#endif + + return 0; +} + +subsys_initcall(request_standard_resources); diff --git a/arch/tile/kernel/signal.c b/arch/tile/kernel/signal.c new file mode 100644 index 00000000..f79d4b88 --- /dev/null +++ b/arch/tile/kernel/signal.c @@ -0,0 +1,476 @@ +/* + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#include <linux/sched.h> +#include <linux/mm.h> +#include <linux/smp.h> +#include <linux/kernel.h> +#include <linux/signal.h> +#include <linux/errno.h> +#include <linux/wait.h> +#include <linux/unistd.h> +#include <linux/stddef.h> +#include <linux/personality.h> +#include <linux/suspend.h> +#include <linux/ptrace.h> +#include <linux/elf.h> +#include <linux/compat.h> +#include <linux/syscalls.h> +#include <linux/uaccess.h> +#include <asm/processor.h> +#include <asm/ucontext.h> +#include <asm/sigframe.h> +#include <asm/syscalls.h> +#include <arch/interrupts.h> + +#define DEBUG_SIG 0 + +#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) + +SYSCALL_DEFINE3(sigaltstack, const stack_t __user *, uss, + stack_t __user *, uoss, struct pt_regs *, regs) +{ + return do_sigaltstack(uss, uoss, regs->sp); +} + + +/* + * Do a signal return; undo the signal stack. + */ + +int restore_sigcontext(struct pt_regs *regs, + struct sigcontext __user *sc) +{ + int err = 0; + int i; + + /* Always make any pending restarted system calls return -EINTR */ + current_thread_info()->restart_block.fn = do_no_restart_syscall; + + /* + * Enforce that sigcontext is like pt_regs, and doesn't mess + * up our stack alignment rules. + */ + BUILD_BUG_ON(sizeof(struct sigcontext) != sizeof(struct pt_regs)); + BUILD_BUG_ON(sizeof(struct sigcontext) % 8 != 0); + + for (i = 0; i < sizeof(struct pt_regs)/sizeof(long); ++i) + err |= __get_user(regs->regs[i], &sc->gregs[i]); + + /* Ensure that the PL is always set to USER_PL. */ + regs->ex1 = PL_ICS_EX1(USER_PL, EX1_ICS(regs->ex1)); + + regs->faultnum = INT_SWINT_1_SIGRETURN; + + return err; +} + +void signal_fault(const char *type, struct pt_regs *regs, + void __user *frame, int sig) +{ + trace_unhandled_signal(type, regs, (unsigned long)frame, SIGSEGV); + force_sigsegv(sig, current); +} + +/* The assembly shim for this function arranges to ignore the return value. */ +SYSCALL_DEFINE1(rt_sigreturn, struct pt_regs *, regs) +{ + struct rt_sigframe __user *frame = + (struct rt_sigframe __user *)(regs->sp); + sigset_t set; + + if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) + goto badframe; + if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) + goto badframe; + + sigdelsetmask(&set, ~_BLOCKABLE); + set_current_blocked(&set); + + if (restore_sigcontext(regs, &frame->uc.uc_mcontext)) + goto badframe; + + if (do_sigaltstack(&frame->uc.uc_stack, NULL, regs->sp) == -EFAULT) + goto badframe; + + return 0; + +badframe: + signal_fault("bad sigreturn frame", regs, frame, 0); + return 0; +} + +/* + * Set up a signal frame. + */ + +int setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs) +{ + int i, err = 0; + + for (i = 0; i < sizeof(struct pt_regs)/sizeof(long); ++i) + err |= __put_user(regs->regs[i], &sc->gregs[i]); + + return err; +} + +/* + * Determine which stack to use.. + */ +static inline void __user *get_sigframe(struct k_sigaction *ka, + struct pt_regs *regs, + size_t frame_size) +{ + unsigned long sp; + + /* Default to using normal stack */ + sp = regs->sp; + + /* + * If we are on the alternate signal stack and would overflow + * it, don't. Return an always-bogus address instead so we + * will die with SIGSEGV. + */ + if (on_sig_stack(sp) && !likely(on_sig_stack(sp - frame_size))) + return (void __user __force *)-1UL; + + /* This is the X/Open sanctioned signal stack switching. */ + if (ka->sa.sa_flags & SA_ONSTACK) { + if (sas_ss_flags(sp) == 0) + sp = current->sas_ss_sp + current->sas_ss_size; + } + + sp -= frame_size; + /* + * Align the stack pointer according to the TILE ABI, + * i.e. so that on function entry (sp & 15) == 0. + */ + sp &= -16UL; + return (void __user *) sp; +} + +static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, + sigset_t *set, struct pt_regs *regs) +{ + unsigned long restorer; + struct rt_sigframe __user *frame; + int err = 0; + int usig; + + frame = get_sigframe(ka, regs, sizeof(*frame)); + + if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) + goto give_sigsegv; + + usig = current_thread_info()->exec_domain + && current_thread_info()->exec_domain->signal_invmap + && sig < 32 + ? current_thread_info()->exec_domain->signal_invmap[sig] + : sig; + + /* Always write at least the signal number for the stack backtracer. */ + if (ka->sa.sa_flags & SA_SIGINFO) { + /* At sigreturn time, restore the callee-save registers too. */ + err |= copy_siginfo_to_user(&frame->info, info); + regs->flags |= PT_FLAGS_RESTORE_REGS; + } else { + err |= __put_user(info->si_signo, &frame->info.si_signo); + } + + /* Create the ucontext. */ + err |= __clear_user(&frame->save_area, sizeof(frame->save_area)); + err |= __put_user(0, &frame->uc.uc_flags); + err |= __put_user(NULL, &frame->uc.uc_link); + err |= __put_user((void __user *)(current->sas_ss_sp), + &frame->uc.uc_stack.ss_sp); + err |= __put_user(sas_ss_flags(regs->sp), + &frame->uc.uc_stack.ss_flags); + err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size); + err |= setup_sigcontext(&frame->uc.uc_mcontext, regs); + err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); + if (err) + goto give_sigsegv; + + restorer = VDSO_BASE; + if (ka->sa.sa_flags & SA_RESTORER) + restorer = (unsigned long) ka->sa.sa_restorer; + + /* + * Set up registers for signal handler. + * Registers that we don't modify keep the value they had from + * user-space at the time we took the signal. + * We always pass siginfo and mcontext, regardless of SA_SIGINFO, + * since some things rely on this (e.g. glibc's debug/segfault.c). + */ + regs->pc = (unsigned long) ka->sa.sa_handler; + regs->ex1 = PL_ICS_EX1(USER_PL, 1); /* set crit sec in handler */ + regs->sp = (unsigned long) frame; + regs->lr = restorer; + regs->regs[0] = (unsigned long) usig; + regs->regs[1] = (unsigned long) &frame->info; + regs->regs[2] = (unsigned long) &frame->uc; + regs->flags |= PT_FLAGS_CALLER_SAVES; + + /* + * Notify any tracer that was single-stepping it. + * The tracer may want to single-step inside the + * handler too. + */ + if (test_thread_flag(TIF_SINGLESTEP)) + ptrace_notify(SIGTRAP); + + return 0; + +give_sigsegv: + signal_fault("bad setup frame", regs, frame, sig); + return -EFAULT; +} + +/* + * OK, we're invoking a handler + */ + +static int handle_signal(unsigned long sig, siginfo_t *info, + struct k_sigaction *ka, sigset_t *oldset, + struct pt_regs *regs) +{ + int ret; + + /* Are we from a system call? */ + if (regs->faultnum == INT_SWINT_1) { + /* If so, check system call restarting.. */ + switch (regs->regs[0]) { + case -ERESTART_RESTARTBLOCK: + case -ERESTARTNOHAND: + regs->regs[0] = -EINTR; + break; + + case -ERESTARTSYS: + if (!(ka->sa.sa_flags & SA_RESTART)) { + regs->regs[0] = -EINTR; + break; + } + /* fallthrough */ + case -ERESTARTNOINTR: + /* Reload caller-saves to restore r0..r5 and r10. */ + regs->flags |= PT_FLAGS_CALLER_SAVES; + regs->regs[0] = regs->orig_r0; + regs->pc -= 8; + } + } + + /* Set up the stack frame */ +#ifdef CONFIG_COMPAT + if (is_compat_task()) + ret = compat_setup_rt_frame(sig, ka, info, oldset, regs); + else +#endif + ret = setup_rt_frame(sig, ka, info, oldset, regs); + if (ret == 0) { + /* This code is only called from system calls or from + * the work_pending path in the return-to-user code, and + * either way we can re-enable interrupts unconditionally. + */ + block_sigmask(ka, sig); + } + + return ret; +} + +/* + * Note that 'init' is a special process: it doesn't get signals it doesn't + * want to handle. Thus you cannot kill init even with a SIGKILL even by + * mistake. + */ +void do_signal(struct pt_regs *regs) +{ + siginfo_t info; + int signr; + struct k_sigaction ka; + sigset_t *oldset; + + /* + * i386 will check if we're coming from kernel mode and bail out + * here. In my experience this just turns weird crashes into + * weird spin-hangs. But if we find a case where this seems + * helpful, we can reinstate the check on "!user_mode(regs)". + */ + + if (current_thread_info()->status & TS_RESTORE_SIGMASK) + oldset = ¤t->saved_sigmask; + else + oldset = ¤t->blocked; + + signr = get_signal_to_deliver(&info, &ka, regs, NULL); + if (signr > 0) { + /* Whee! Actually deliver the signal. */ + if (handle_signal(signr, &info, &ka, oldset, regs) == 0) { + /* + * A signal was successfully delivered; the saved + * sigmask will have been stored in the signal frame, + * and will be restored by sigreturn, so we can simply + * clear the TS_RESTORE_SIGMASK flag. + */ + current_thread_info()->status &= ~TS_RESTORE_SIGMASK; + } + + goto done; + } + + /* Did we come from a system call? */ + if (regs->faultnum == INT_SWINT_1) { + /* Restart the system call - no handlers present */ + switch (regs->regs[0]) { + case -ERESTARTNOHAND: + case -ERESTARTSYS: + case -ERESTARTNOINTR: + regs->flags |= PT_FLAGS_CALLER_SAVES; + regs->regs[0] = regs->orig_r0; + regs->pc -= 8; + break; + + case -ERESTART_RESTARTBLOCK: + regs->flags |= PT_FLAGS_CALLER_SAVES; + regs->regs[TREG_SYSCALL_NR] = __NR_restart_syscall; + regs->pc -= 8; + break; + } + } + + /* If there's no signal to deliver, just put the saved sigmask back. */ + if (current_thread_info()->status & TS_RESTORE_SIGMASK) { + current_thread_info()->status &= ~TS_RESTORE_SIGMASK; + sigprocmask(SIG_SETMASK, ¤t->saved_sigmask, NULL); + } + +done: + /* Avoid double syscall restart if there are nested signals. */ + regs->faultnum = INT_SWINT_1_SIGRETURN; +} + +int show_unhandled_signals = 1; + +static int __init crashinfo(char *str) +{ + unsigned long val; + const char *word; + + if (*str == '\0') + val = 2; + else if (*str != '=' || strict_strtoul(++str, 0, &val) != 0) + return 0; + show_unhandled_signals = val; + switch (show_unhandled_signals) { + case 0: + word = "No"; + break; + case 1: + word = "One-line"; + break; + default: + word = "Detailed"; + break; + } + pr_info("%s crash reports will be generated on the console\n", word); + return 1; +} +__setup("crashinfo", crashinfo); + +static void dump_mem(void __user *address) +{ + void __user *addr; + enum { region_size = 256, bytes_per_line = 16 }; + int i, j, k; + int found_readable_mem = 0; + + pr_err("\n"); + if (!access_ok(VERIFY_READ, address, 1)) { + pr_err("Not dumping at address 0x%lx (kernel address)\n", + (unsigned long)address); + return; + } + + addr = (void __user *) + (((unsigned long)address & -bytes_per_line) - region_size/2); + if (addr > address) + addr = NULL; + for (i = 0; i < region_size; + addr += bytes_per_line, i += bytes_per_line) { + unsigned char buf[bytes_per_line]; + char line[100]; + if (copy_from_user(buf, addr, bytes_per_line)) + continue; + if (!found_readable_mem) { + pr_err("Dumping memory around address 0x%lx:\n", + (unsigned long)address); + found_readable_mem = 1; + } + j = sprintf(line, REGFMT":", (unsigned long)addr); + for (k = 0; k < bytes_per_line; ++k) + j += sprintf(&line[j], " %02x", buf[k]); + pr_err("%s\n", line); + } + if (!found_readable_mem) + pr_err("No readable memory around address 0x%lx\n", + (unsigned long)address); +} + +void trace_unhandled_signal(const char *type, struct pt_regs *regs, + unsigned long address, int sig) +{ + struct task_struct *tsk = current; + + if (show_unhandled_signals == 0) + return; + + /* If the signal is handled, don't show it here. */ + if (!is_global_init(tsk)) { + void __user *handler = + tsk->sighand->action[sig-1].sa.sa_handler; + if (handler != SIG_IGN && handler != SIG_DFL) + return; + } + + /* Rate-limit the one-line output, not the detailed output. */ + if (show_unhandled_signals <= 1 && !printk_ratelimit()) + return; + + printk("%s%s[%d]: %s at %lx pc "REGFMT" signal %d", + task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG, + tsk->comm, task_pid_nr(tsk), type, address, regs->pc, sig); + + print_vma_addr(KERN_CONT " in ", regs->pc); + + printk(KERN_CONT "\n"); + + if (show_unhandled_signals > 1) { + switch (sig) { + case SIGILL: + case SIGFPE: + case SIGSEGV: + case SIGBUS: + pr_err("User crash: signal %d," + " trap %ld, address 0x%lx\n", + sig, regs->faultnum, address); + show_regs(regs); + dump_mem((void __user *)address); + break; + default: + pr_err("User crash: signal %d, trap %ld\n", + sig, regs->faultnum); + break; + } + } +} diff --git a/arch/tile/kernel/single_step.c b/arch/tile/kernel/single_step.c new file mode 100644 index 00000000..89529c9f --- /dev/null +++ b/arch/tile/kernel/single_step.c @@ -0,0 +1,768 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * A code-rewriter that enables instruction single-stepping. + * Derived from iLib's single-stepping code. + */ + +#ifndef __tilegx__ /* Hardware support for single step unavailable. */ + +/* These functions are only used on the TILE platform */ +#include <linux/slab.h> +#include <linux/thread_info.h> +#include <linux/uaccess.h> +#include <linux/mman.h> +#include <linux/types.h> +#include <linux/err.h> +#include <asm/cacheflush.h> +#include <asm/unaligned.h> +#include <arch/abi.h> +#include <arch/opcode.h> + +#define signExtend17(val) sign_extend((val), 17) +#define TILE_X1_MASK (0xffffffffULL << 31) + +int unaligned_printk; + +static int __init setup_unaligned_printk(char *str) +{ + long val; + if (strict_strtol(str, 0, &val) != 0) + return 0; + unaligned_printk = val; + pr_info("Printk for each unaligned data accesses is %s\n", + unaligned_printk ? "enabled" : "disabled"); + return 1; +} +__setup("unaligned_printk=", setup_unaligned_printk); + +unsigned int unaligned_fixup_count; + +enum mem_op { + MEMOP_NONE, + MEMOP_LOAD, + MEMOP_STORE, + MEMOP_LOAD_POSTINCR, + MEMOP_STORE_POSTINCR +}; + +static inline tile_bundle_bits set_BrOff_X1(tile_bundle_bits n, s32 offset) +{ + tile_bundle_bits result; + + /* mask out the old offset */ + tile_bundle_bits mask = create_BrOff_X1(-1); + result = n & (~mask); + + /* or in the new offset */ + result |= create_BrOff_X1(offset); + + return result; +} + +static inline tile_bundle_bits move_X1(tile_bundle_bits n, int dest, int src) +{ + tile_bundle_bits result; + tile_bundle_bits op; + + result = n & (~TILE_X1_MASK); + + op = create_Opcode_X1(SPECIAL_0_OPCODE_X1) | + create_RRROpcodeExtension_X1(OR_SPECIAL_0_OPCODE_X1) | + create_Dest_X1(dest) | + create_SrcB_X1(TREG_ZERO) | + create_SrcA_X1(src) ; + + result |= op; + return result; +} + +static inline tile_bundle_bits nop_X1(tile_bundle_bits n) +{ + return move_X1(n, TREG_ZERO, TREG_ZERO); +} + +static inline tile_bundle_bits addi_X1( + tile_bundle_bits n, int dest, int src, int imm) +{ + n &= ~TILE_X1_MASK; + + n |= (create_SrcA_X1(src) | + create_Dest_X1(dest) | + create_Imm8_X1(imm) | + create_S_X1(0) | + create_Opcode_X1(IMM_0_OPCODE_X1) | + create_ImmOpcodeExtension_X1(ADDI_IMM_0_OPCODE_X1)); + + return n; +} + +static tile_bundle_bits rewrite_load_store_unaligned( + struct single_step_state *state, + tile_bundle_bits bundle, + struct pt_regs *regs, + enum mem_op mem_op, + int size, int sign_ext) +{ + unsigned char __user *addr; + int val_reg, addr_reg, err, val; + + /* Get address and value registers */ + if (bundle & TILEPRO_BUNDLE_Y_ENCODING_MASK) { + addr_reg = get_SrcA_Y2(bundle); + val_reg = get_SrcBDest_Y2(bundle); + } else if (mem_op == MEMOP_LOAD || mem_op == MEMOP_LOAD_POSTINCR) { + addr_reg = get_SrcA_X1(bundle); + val_reg = get_Dest_X1(bundle); + } else { + addr_reg = get_SrcA_X1(bundle); + val_reg = get_SrcB_X1(bundle); + } + + /* + * If registers are not GPRs, don't try to handle it. + * + * FIXME: we could handle non-GPR loads by getting the real value + * from memory, writing it to the single step buffer, using a + * temp_reg to hold a pointer to that memory, then executing that + * instruction and resetting temp_reg. For non-GPR stores, it's a + * little trickier; we could use the single step buffer for that + * too, but we'd have to add some more state bits so that we could + * call back in here to copy that value to the real target. For + * now, we just handle the simple case. + */ + if ((val_reg >= PTREGS_NR_GPRS && + (val_reg != TREG_ZERO || + mem_op == MEMOP_LOAD || + mem_op == MEMOP_LOAD_POSTINCR)) || + addr_reg >= PTREGS_NR_GPRS) + return bundle; + + /* If it's aligned, don't handle it specially */ + addr = (void __user *)regs->regs[addr_reg]; + if (((unsigned long)addr % size) == 0) + return bundle; + + /* + * Return SIGBUS with the unaligned address, if requested. + * Note that we return SIGBUS even for completely invalid addresses + * as long as they are in fact unaligned; this matches what the + * tilepro hardware would be doing, if it could provide us with the + * actual bad address in an SPR, which it doesn't. + */ + if (unaligned_fixup == 0) { + siginfo_t info = { + .si_signo = SIGBUS, + .si_code = BUS_ADRALN, + .si_addr = addr + }; + trace_unhandled_signal("unaligned trap", regs, + (unsigned long)addr, SIGBUS); + force_sig_info(info.si_signo, &info, current); + return (tilepro_bundle_bits) 0; + } + +#ifndef __LITTLE_ENDIAN +# error We assume little-endian representation with copy_xx_user size 2 here +#endif + /* Handle unaligned load/store */ + if (mem_op == MEMOP_LOAD || mem_op == MEMOP_LOAD_POSTINCR) { + unsigned short val_16; + switch (size) { + case 2: + err = copy_from_user(&val_16, addr, sizeof(val_16)); + val = sign_ext ? ((short)val_16) : val_16; + break; + case 4: + err = copy_from_user(&val, addr, sizeof(val)); + break; + default: + BUG(); + } + if (err == 0) { + state->update_reg = val_reg; + state->update_value = val; + state->update = 1; + } + } else { + val = (val_reg == TREG_ZERO) ? 0 : regs->regs[val_reg]; + err = copy_to_user(addr, &val, size); + } + + if (err) { + siginfo_t info = { + .si_signo = SIGSEGV, + .si_code = SEGV_MAPERR, + .si_addr = addr + }; + trace_unhandled_signal("segfault", regs, + (unsigned long)addr, SIGSEGV); + force_sig_info(info.si_signo, &info, current); + return (tile_bundle_bits) 0; + } + + if (unaligned_printk || unaligned_fixup_count == 0) { + pr_info("Process %d/%s: PC %#lx: Fixup of" + " unaligned %s at %#lx.\n", + current->pid, current->comm, regs->pc, + (mem_op == MEMOP_LOAD || + mem_op == MEMOP_LOAD_POSTINCR) ? + "load" : "store", + (unsigned long)addr); + if (!unaligned_printk) { +#define P pr_info +P("\n"); +P("Unaligned fixups in the kernel will slow your application considerably.\n"); +P("To find them, write a \"1\" to /proc/sys/tile/unaligned_fixup/printk,\n"); +P("which requests the kernel show all unaligned fixups, or write a \"0\"\n"); +P("to /proc/sys/tile/unaligned_fixup/enabled, in which case each unaligned\n"); +P("access will become a SIGBUS you can debug. No further warnings will be\n"); +P("shown so as to avoid additional slowdown, but you can track the number\n"); +P("of fixups performed via /proc/sys/tile/unaligned_fixup/count.\n"); +P("Use the tile-addr2line command (see \"info addr2line\") to decode PCs.\n"); +P("\n"); +#undef P + } + } + ++unaligned_fixup_count; + + if (bundle & TILEPRO_BUNDLE_Y_ENCODING_MASK) { + /* Convert the Y2 instruction to a prefetch. */ + bundle &= ~(create_SrcBDest_Y2(-1) | + create_Opcode_Y2(-1)); + bundle |= (create_SrcBDest_Y2(TREG_ZERO) | + create_Opcode_Y2(LW_OPCODE_Y2)); + /* Replace the load postincr with an addi */ + } else if (mem_op == MEMOP_LOAD_POSTINCR) { + bundle = addi_X1(bundle, addr_reg, addr_reg, + get_Imm8_X1(bundle)); + /* Replace the store postincr with an addi */ + } else if (mem_op == MEMOP_STORE_POSTINCR) { + bundle = addi_X1(bundle, addr_reg, addr_reg, + get_Dest_Imm8_X1(bundle)); + } else { + /* Convert the X1 instruction to a nop. */ + bundle &= ~(create_Opcode_X1(-1) | + create_UnShOpcodeExtension_X1(-1) | + create_UnOpcodeExtension_X1(-1)); + bundle |= (create_Opcode_X1(SHUN_0_OPCODE_X1) | + create_UnShOpcodeExtension_X1( + UN_0_SHUN_0_OPCODE_X1) | + create_UnOpcodeExtension_X1( + NOP_UN_0_SHUN_0_OPCODE_X1)); + } + + return bundle; +} + +/* + * Called after execve() has started the new image. This allows us + * to reset the info state. Note that the the mmap'ed memory, if there + * was any, has already been unmapped by the exec. + */ +void single_step_execve(void) +{ + struct thread_info *ti = current_thread_info(); + kfree(ti->step_state); + ti->step_state = NULL; +} + +/** + * single_step_once() - entry point when single stepping has been triggered. + * @regs: The machine register state + * + * When we arrive at this routine via a trampoline, the single step + * engine copies the executing bundle to the single step buffer. + * If the instruction is a condition branch, then the target is + * reset to one past the next instruction. If the instruction + * sets the lr, then that is noted. If the instruction is a jump + * or call, then the new target pc is preserved and the current + * bundle instruction set to null. + * + * The necessary post-single-step rewriting information is stored in + * single_step_state-> We use data segment values because the + * stack will be rewound when we run the rewritten single-stepped + * instruction. + */ +void single_step_once(struct pt_regs *regs) +{ + extern tile_bundle_bits __single_step_ill_insn; + extern tile_bundle_bits __single_step_j_insn; + extern tile_bundle_bits __single_step_addli_insn; + extern tile_bundle_bits __single_step_auli_insn; + struct thread_info *info = (void *)current_thread_info(); + struct single_step_state *state = info->step_state; + int is_single_step = test_ti_thread_flag(info, TIF_SINGLESTEP); + tile_bundle_bits __user *buffer, *pc; + tile_bundle_bits bundle; + int temp_reg; + int target_reg = TREG_LR; + int err; + enum mem_op mem_op = MEMOP_NONE; + int size = 0, sign_ext = 0; /* happy compiler */ + + asm( +" .pushsection .rodata.single_step\n" +" .align 8\n" +" .globl __single_step_ill_insn\n" +"__single_step_ill_insn:\n" +" ill\n" +" .globl __single_step_addli_insn\n" +"__single_step_addli_insn:\n" +" { nop; addli r0, zero, 0 }\n" +" .globl __single_step_auli_insn\n" +"__single_step_auli_insn:\n" +" { nop; auli r0, r0, 0 }\n" +" .globl __single_step_j_insn\n" +"__single_step_j_insn:\n" +" j .\n" +" .popsection\n" + ); + + /* + * Enable interrupts here to allow touching userspace and the like. + * The callers expect this: do_trap() already has interrupts + * enabled, and do_work_pending() handles functions that enable + * interrupts internally. + */ + local_irq_enable(); + + if (state == NULL) { + /* allocate a page of writable, executable memory */ + state = kmalloc(sizeof(struct single_step_state), GFP_KERNEL); + if (state == NULL) { + pr_err("Out of kernel memory trying to single-step\n"); + return; + } + + /* allocate a cache line of writable, executable memory */ + buffer = (void __user *) vm_mmap(NULL, 0, 64, + PROT_EXEC | PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, + 0); + + if (IS_ERR((void __force *)buffer)) { + kfree(state); + pr_err("Out of kernel pages trying to single-step\n"); + return; + } + + state->buffer = buffer; + state->is_enabled = 0; + + info->step_state = state; + + /* Validate our stored instruction patterns */ + BUG_ON(get_Opcode_X1(__single_step_addli_insn) != + ADDLI_OPCODE_X1); + BUG_ON(get_Opcode_X1(__single_step_auli_insn) != + AULI_OPCODE_X1); + BUG_ON(get_SrcA_X1(__single_step_addli_insn) != TREG_ZERO); + BUG_ON(get_Dest_X1(__single_step_addli_insn) != 0); + BUG_ON(get_JOffLong_X1(__single_step_j_insn) != 0); + } + + /* + * If we are returning from a syscall, we still haven't hit the + * "ill" for the swint1 instruction. So back the PC up to be + * pointing at the swint1, but we'll actually return directly + * back to the "ill" so we come back in via SIGILL as if we + * had "executed" the swint1 without ever being in kernel space. + */ + if (regs->faultnum == INT_SWINT_1) + regs->pc -= 8; + + pc = (tile_bundle_bits __user *)(regs->pc); + if (get_user(bundle, pc) != 0) { + pr_err("Couldn't read instruction at %p trying to step\n", pc); + return; + } + + /* We'll follow the instruction with 2 ill op bundles */ + state->orig_pc = (unsigned long)pc; + state->next_pc = (unsigned long)(pc + 1); + state->branch_next_pc = 0; + state->update = 0; + + if (!(bundle & TILEPRO_BUNDLE_Y_ENCODING_MASK)) { + /* two wide, check for control flow */ + int opcode = get_Opcode_X1(bundle); + + switch (opcode) { + /* branches */ + case BRANCH_OPCODE_X1: + { + s32 offset = signExtend17(get_BrOff_X1(bundle)); + + /* + * For branches, we use a rewriting trick to let the + * hardware evaluate whether the branch is taken or + * untaken. We record the target offset and then + * rewrite the branch instruction to target 1 insn + * ahead if the branch is taken. We then follow the + * rewritten branch with two bundles, each containing + * an "ill" instruction. The supervisor examines the + * pc after the single step code is executed, and if + * the pc is the first ill instruction, then the + * branch (if any) was not taken. If the pc is the + * second ill instruction, then the branch was + * taken. The new pc is computed for these cases, and + * inserted into the registers for the thread. If + * the pc is the start of the single step code, then + * an exception or interrupt was taken before the + * code started processing, and the same "original" + * pc is restored. This change, different from the + * original implementation, has the advantage of + * executing a single user instruction. + */ + state->branch_next_pc = (unsigned long)(pc + offset); + + /* rewrite branch offset to go forward one bundle */ + bundle = set_BrOff_X1(bundle, 2); + } + break; + + /* jumps */ + case JALB_OPCODE_X1: + case JALF_OPCODE_X1: + state->update = 1; + state->next_pc = + (unsigned long) (pc + get_JOffLong_X1(bundle)); + break; + + case JB_OPCODE_X1: + case JF_OPCODE_X1: + state->next_pc = + (unsigned long) (pc + get_JOffLong_X1(bundle)); + bundle = nop_X1(bundle); + break; + + case SPECIAL_0_OPCODE_X1: + switch (get_RRROpcodeExtension_X1(bundle)) { + /* jump-register */ + case JALRP_SPECIAL_0_OPCODE_X1: + case JALR_SPECIAL_0_OPCODE_X1: + state->update = 1; + state->next_pc = + regs->regs[get_SrcA_X1(bundle)]; + break; + + case JRP_SPECIAL_0_OPCODE_X1: + case JR_SPECIAL_0_OPCODE_X1: + state->next_pc = + regs->regs[get_SrcA_X1(bundle)]; + bundle = nop_X1(bundle); + break; + + case LNK_SPECIAL_0_OPCODE_X1: + state->update = 1; + target_reg = get_Dest_X1(bundle); + break; + + /* stores */ + case SH_SPECIAL_0_OPCODE_X1: + mem_op = MEMOP_STORE; + size = 2; + break; + + case SW_SPECIAL_0_OPCODE_X1: + mem_op = MEMOP_STORE; + size = 4; + break; + } + break; + + /* loads and iret */ + case SHUN_0_OPCODE_X1: + if (get_UnShOpcodeExtension_X1(bundle) == + UN_0_SHUN_0_OPCODE_X1) { + switch (get_UnOpcodeExtension_X1(bundle)) { + case LH_UN_0_SHUN_0_OPCODE_X1: + mem_op = MEMOP_LOAD; + size = 2; + sign_ext = 1; + break; + + case LH_U_UN_0_SHUN_0_OPCODE_X1: + mem_op = MEMOP_LOAD; + size = 2; + sign_ext = 0; + break; + + case LW_UN_0_SHUN_0_OPCODE_X1: + mem_op = MEMOP_LOAD; + size = 4; + break; + + case IRET_UN_0_SHUN_0_OPCODE_X1: + { + unsigned long ex0_0 = __insn_mfspr( + SPR_EX_CONTEXT_0_0); + unsigned long ex0_1 = __insn_mfspr( + SPR_EX_CONTEXT_0_1); + /* + * Special-case it if we're iret'ing + * to PL0 again. Otherwise just let + * it run and it will generate SIGILL. + */ + if (EX1_PL(ex0_1) == USER_PL) { + state->next_pc = ex0_0; + regs->ex1 = ex0_1; + bundle = nop_X1(bundle); + } + } + } + } + break; + +#if CHIP_HAS_WH64() + /* postincrement operations */ + case IMM_0_OPCODE_X1: + switch (get_ImmOpcodeExtension_X1(bundle)) { + case LWADD_IMM_0_OPCODE_X1: + mem_op = MEMOP_LOAD_POSTINCR; + size = 4; + break; + + case LHADD_IMM_0_OPCODE_X1: + mem_op = MEMOP_LOAD_POSTINCR; + size = 2; + sign_ext = 1; + break; + + case LHADD_U_IMM_0_OPCODE_X1: + mem_op = MEMOP_LOAD_POSTINCR; + size = 2; + sign_ext = 0; + break; + + case SWADD_IMM_0_OPCODE_X1: + mem_op = MEMOP_STORE_POSTINCR; + size = 4; + break; + + case SHADD_IMM_0_OPCODE_X1: + mem_op = MEMOP_STORE_POSTINCR; + size = 2; + break; + + default: + break; + } + break; +#endif /* CHIP_HAS_WH64() */ + } + + if (state->update) { + /* + * Get an available register. We start with a + * bitmask with 1's for available registers. + * We truncate to the low 32 registers since + * we are guaranteed to have set bits in the + * low 32 bits, then use ctz to pick the first. + */ + u32 mask = (u32) ~((1ULL << get_Dest_X0(bundle)) | + (1ULL << get_SrcA_X0(bundle)) | + (1ULL << get_SrcB_X0(bundle)) | + (1ULL << target_reg)); + temp_reg = __builtin_ctz(mask); + state->update_reg = temp_reg; + state->update_value = regs->regs[temp_reg]; + regs->regs[temp_reg] = (unsigned long) (pc+1); + regs->flags |= PT_FLAGS_RESTORE_REGS; + bundle = move_X1(bundle, target_reg, temp_reg); + } + } else { + int opcode = get_Opcode_Y2(bundle); + + switch (opcode) { + /* loads */ + case LH_OPCODE_Y2: + mem_op = MEMOP_LOAD; + size = 2; + sign_ext = 1; + break; + + case LH_U_OPCODE_Y2: + mem_op = MEMOP_LOAD; + size = 2; + sign_ext = 0; + break; + + case LW_OPCODE_Y2: + mem_op = MEMOP_LOAD; + size = 4; + break; + + /* stores */ + case SH_OPCODE_Y2: + mem_op = MEMOP_STORE; + size = 2; + break; + + case SW_OPCODE_Y2: + mem_op = MEMOP_STORE; + size = 4; + break; + } + } + + /* + * Check if we need to rewrite an unaligned load/store. + * Returning zero is a special value meaning we need to SIGSEGV. + */ + if (mem_op != MEMOP_NONE && unaligned_fixup >= 0) { + bundle = rewrite_load_store_unaligned(state, bundle, regs, + mem_op, size, sign_ext); + if (bundle == 0) + return; + } + + /* write the bundle to our execution area */ + buffer = state->buffer; + err = __put_user(bundle, buffer++); + + /* + * If we're really single-stepping, we take an INT_ILL after. + * If we're just handling an unaligned access, we can just + * jump directly back to where we were in user code. + */ + if (is_single_step) { + err |= __put_user(__single_step_ill_insn, buffer++); + err |= __put_user(__single_step_ill_insn, buffer++); + } else { + long delta; + + if (state->update) { + /* We have some state to update; do it inline */ + int ha16; + bundle = __single_step_addli_insn; + bundle |= create_Dest_X1(state->update_reg); + bundle |= create_Imm16_X1(state->update_value); + err |= __put_user(bundle, buffer++); + bundle = __single_step_auli_insn; + bundle |= create_Dest_X1(state->update_reg); + bundle |= create_SrcA_X1(state->update_reg); + ha16 = (state->update_value + 0x8000) >> 16; + bundle |= create_Imm16_X1(ha16); + err |= __put_user(bundle, buffer++); + state->update = 0; + } + + /* End with a jump back to the next instruction */ + delta = ((regs->pc + TILE_BUNDLE_SIZE_IN_BYTES) - + (unsigned long)buffer) >> + TILE_LOG2_BUNDLE_ALIGNMENT_IN_BYTES; + bundle = __single_step_j_insn; + bundle |= create_JOffLong_X1(delta); + err |= __put_user(bundle, buffer++); + } + + if (err) { + pr_err("Fault when writing to single-step buffer\n"); + return; + } + + /* + * Flush the buffer. + * We do a local flush only, since this is a thread-specific buffer. + */ + __flush_icache_range((unsigned long)state->buffer, + (unsigned long)buffer); + + /* Indicate enabled */ + state->is_enabled = is_single_step; + regs->pc = (unsigned long)state->buffer; + + /* Fault immediately if we are coming back from a syscall. */ + if (regs->faultnum == INT_SWINT_1) + regs->pc += 8; +} + +#else +#include <linux/smp.h> +#include <linux/ptrace.h> +#include <arch/spr_def.h> + +static DEFINE_PER_CPU(unsigned long, ss_saved_pc); + + +/* + * Called directly on the occasion of an interrupt. + * + * If the process doesn't have single step set, then we use this as an + * opportunity to turn single step off. + * + * It has been mentioned that we could conditionally turn off single stepping + * on each entry into the kernel and rely on single_step_once to turn it + * on for the processes that matter (as we already do), but this + * implementation is somewhat more efficient in that we muck with registers + * once on a bum interrupt rather than on every entry into the kernel. + * + * If SINGLE_STEP_CONTROL_K has CANCELED set, then an interrupt occurred, + * so we have to run through this process again before we can say that an + * instruction has executed. + * + * swint will set CANCELED, but it's a legitimate instruction. Fortunately + * it changes the PC. If it hasn't changed, then we know that the interrupt + * wasn't generated by swint and we'll need to run this process again before + * we can say an instruction has executed. + * + * If either CANCELED == 0 or the PC's changed, we send out SIGTRAPs and get + * on with our lives. + */ + +void gx_singlestep_handle(struct pt_regs *regs, int fault_num) +{ + unsigned long *ss_pc = &__get_cpu_var(ss_saved_pc); + struct thread_info *info = (void *)current_thread_info(); + int is_single_step = test_ti_thread_flag(info, TIF_SINGLESTEP); + unsigned long control = __insn_mfspr(SPR_SINGLE_STEP_CONTROL_K); + + if (is_single_step == 0) { + __insn_mtspr(SPR_SINGLE_STEP_EN_K_K, 0); + + } else if ((*ss_pc != regs->pc) || + (!(control & SPR_SINGLE_STEP_CONTROL_1__CANCELED_MASK))) { + + ptrace_notify(SIGTRAP); + control |= SPR_SINGLE_STEP_CONTROL_1__CANCELED_MASK; + control |= SPR_SINGLE_STEP_CONTROL_1__INHIBIT_MASK; + __insn_mtspr(SPR_SINGLE_STEP_CONTROL_K, control); + } +} + + +/* + * Called from need_singlestep. Set up the control registers and the enable + * register, then return back. + */ + +void single_step_once(struct pt_regs *regs) +{ + unsigned long *ss_pc = &__get_cpu_var(ss_saved_pc); + unsigned long control = __insn_mfspr(SPR_SINGLE_STEP_CONTROL_K); + + *ss_pc = regs->pc; + control |= SPR_SINGLE_STEP_CONTROL_1__CANCELED_MASK; + control |= SPR_SINGLE_STEP_CONTROL_1__INHIBIT_MASK; + __insn_mtspr(SPR_SINGLE_STEP_CONTROL_K, control); + __insn_mtspr(SPR_SINGLE_STEP_EN_K_K, 1 << USER_PL); +} + +void single_step_execve(void) +{ + /* Nothing */ +} + +#endif /* !__tilegx__ */ diff --git a/arch/tile/kernel/smp.c b/arch/tile/kernel/smp.c new file mode 100644 index 00000000..91da0f72 --- /dev/null +++ b/arch/tile/kernel/smp.c @@ -0,0 +1,244 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * TILE SMP support routines. + */ + +#include <linux/smp.h> +#include <linux/interrupt.h> +#include <linux/io.h> +#include <linux/irq.h> +#include <linux/module.h> +#include <asm/cacheflush.h> + +HV_Topology smp_topology __write_once; +EXPORT_SYMBOL(smp_topology); + +#if CHIP_HAS_IPI() +static unsigned long __iomem *ipi_mappings[NR_CPUS]; +#endif + + +/* + * Top-level send_IPI*() functions to send messages to other cpus. + */ + +/* Set by smp_send_stop() to avoid recursive panics. */ +static int stopping_cpus; + +static void __send_IPI_many(HV_Recipient *recip, int nrecip, int tag) +{ + int sent = 0; + while (sent < nrecip) { + int rc = hv_send_message(recip, nrecip, + (HV_VirtAddr)&tag, sizeof(tag)); + if (rc < 0) { + if (!stopping_cpus) /* avoid recursive panic */ + panic("hv_send_message returned %d", rc); + break; + } + WARN_ONCE(rc == 0, "hv_send_message() returned zero\n"); + sent += rc; + } +} + +void send_IPI_single(int cpu, int tag) +{ + HV_Recipient recip = { + .y = cpu / smp_width, + .x = cpu % smp_width, + .state = HV_TO_BE_SENT + }; + __send_IPI_many(&recip, 1, tag); +} + +void send_IPI_many(const struct cpumask *mask, int tag) +{ + HV_Recipient recip[NR_CPUS]; + int cpu; + int nrecip = 0; + int my_cpu = smp_processor_id(); + for_each_cpu(cpu, mask) { + HV_Recipient *r; + BUG_ON(cpu == my_cpu); + r = &recip[nrecip++]; + r->y = cpu / smp_width; + r->x = cpu % smp_width; + r->state = HV_TO_BE_SENT; + } + __send_IPI_many(recip, nrecip, tag); +} + +void send_IPI_allbutself(int tag) +{ + struct cpumask mask; + cpumask_copy(&mask, cpu_online_mask); + cpumask_clear_cpu(smp_processor_id(), &mask); + send_IPI_many(&mask, tag); +} + +/* + * Functions related to starting/stopping cpus. + */ + +/* Handler to start the current cpu. */ +static void smp_start_cpu_interrupt(void) +{ + get_irq_regs()->pc = start_cpu_function_addr; +} + +/* Handler to stop the current cpu. */ +static void smp_stop_cpu_interrupt(void) +{ + set_cpu_online(smp_processor_id(), 0); + arch_local_irq_disable_all(); + for (;;) + asm("nap; nop"); +} + +/* This function calls the 'stop' function on all other CPUs in the system. */ +void smp_send_stop(void) +{ + stopping_cpus = 1; + send_IPI_allbutself(MSG_TAG_STOP_CPU); +} + +/* On panic, just wait; we may get an smp_send_stop() later on. */ +void panic_smp_self_stop(void) +{ + while (1) + asm("nap; nop"); +} + +/* + * Dispatch code called from hv_message_intr() for HV_MSG_TILE hv messages. + */ +void evaluate_message(int tag) +{ + switch (tag) { + case MSG_TAG_START_CPU: /* Start up a cpu */ + smp_start_cpu_interrupt(); + break; + + case MSG_TAG_STOP_CPU: /* Sent to shut down slave CPU's */ + smp_stop_cpu_interrupt(); + break; + + case MSG_TAG_CALL_FUNCTION_MANY: /* Call function on cpumask */ + generic_smp_call_function_interrupt(); + break; + + case MSG_TAG_CALL_FUNCTION_SINGLE: /* Call function on one other CPU */ + generic_smp_call_function_single_interrupt(); + break; + + default: + panic("Unknown IPI message tag %d", tag); + break; + } +} + + +/* + * flush_icache_range() code uses smp_call_function(). + */ + +struct ipi_flush { + unsigned long start; + unsigned long end; +}; + +static void ipi_flush_icache_range(void *info) +{ + struct ipi_flush *flush = (struct ipi_flush *) info; + __flush_icache_range(flush->start, flush->end); +} + +void flush_icache_range(unsigned long start, unsigned long end) +{ + struct ipi_flush flush = { start, end }; + preempt_disable(); + on_each_cpu(ipi_flush_icache_range, &flush, 1); + preempt_enable(); +} + + +/* Called when smp_send_reschedule() triggers IRQ_RESCHEDULE. */ +static irqreturn_t handle_reschedule_ipi(int irq, void *token) +{ + __get_cpu_var(irq_stat).irq_resched_count++; + scheduler_ipi(); + + return IRQ_HANDLED; +} + +static struct irqaction resched_action = { + .handler = handle_reschedule_ipi, + .name = "resched", + .dev_id = handle_reschedule_ipi /* unique token */, +}; + +void __init ipi_init(void) +{ +#if CHIP_HAS_IPI() + int cpu; + /* Map IPI trigger MMIO addresses. */ + for_each_possible_cpu(cpu) { + HV_Coord tile; + HV_PTE pte; + unsigned long offset; + + tile.x = cpu_x(cpu); + tile.y = cpu_y(cpu); + if (hv_get_ipi_pte(tile, KERNEL_PL, &pte) != 0) + panic("Failed to initialize IPI for cpu %d\n", cpu); + + offset = hv_pte_get_pfn(pte) << PAGE_SHIFT; + ipi_mappings[cpu] = ioremap_prot(offset, PAGE_SIZE, pte); + } +#endif + + /* Bind handle_reschedule_ipi() to IRQ_RESCHEDULE. */ + tile_irq_activate(IRQ_RESCHEDULE, TILE_IRQ_PERCPU); + BUG_ON(setup_irq(IRQ_RESCHEDULE, &resched_action)); +} + +#if CHIP_HAS_IPI() + +void smp_send_reschedule(int cpu) +{ + WARN_ON(cpu_is_offline(cpu)); + + /* + * We just want to do an MMIO store. The traditional writeq() + * functions aren't really correct here, since they're always + * directed at the PCI shim. For now, just do a raw store, + * casting away the __iomem attribute. + */ + ((unsigned long __force *)ipi_mappings[cpu])[IRQ_RESCHEDULE] = 0; +} + +#else + +void smp_send_reschedule(int cpu) +{ + HV_Coord coord; + + WARN_ON(cpu_is_offline(cpu)); + + coord.y = cpu_y(cpu); + coord.x = cpu_x(cpu); + hv_trigger_ipi(coord, IRQ_RESCHEDULE); +} + +#endif /* CHIP_HAS_IPI() */ diff --git a/arch/tile/kernel/smpboot.c b/arch/tile/kernel/smpboot.c new file mode 100644 index 00000000..172aef7d --- /dev/null +++ b/arch/tile/kernel/smpboot.c @@ -0,0 +1,279 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/sched.h> +#include <linux/kernel_stat.h> +#include <linux/bootmem.h> +#include <linux/notifier.h> +#include <linux/cpu.h> +#include <linux/percpu.h> +#include <linux/delay.h> +#include <linux/err.h> +#include <linux/irq.h> +#include <asm/mmu_context.h> +#include <asm/tlbflush.h> +#include <asm/sections.h> + +/* State of each CPU. */ +static DEFINE_PER_CPU(int, cpu_state) = { 0 }; + +/* The messaging code jumps to this pointer during boot-up */ +unsigned long start_cpu_function_addr; + +/* Called very early during startup to mark boot cpu as online */ +void __init smp_prepare_boot_cpu(void) +{ + int cpu = smp_processor_id(); + set_cpu_online(cpu, 1); + set_cpu_present(cpu, 1); + __get_cpu_var(cpu_state) = CPU_ONLINE; + + init_messaging(); +} + +static void start_secondary(void); + +/* + * Called at the top of init() to launch all the other CPUs. + * They run free to complete their initialization and then wait + * until they get an IPI from the boot cpu to come online. + */ +void __init smp_prepare_cpus(unsigned int max_cpus) +{ + long rc; + int cpu, cpu_count; + int boot_cpu = smp_processor_id(); + + current_thread_info()->cpu = boot_cpu; + + /* + * Pin this task to the boot CPU while we bring up the others, + * just to make sure we don't uselessly migrate as they come up. + */ + rc = sched_setaffinity(current->pid, cpumask_of(boot_cpu)); + if (rc != 0) + pr_err("Couldn't set init affinity to boot cpu (%ld)\n", rc); + + /* Print information about disabled and dataplane cpus. */ + print_disabled_cpus(); + + /* + * Tell the messaging subsystem how to respond to the + * startup message. We use a level of indirection to avoid + * confusing the linker with the fact that the messaging + * subsystem is calling __init code. + */ + start_cpu_function_addr = (unsigned long) &online_secondary; + + /* Set up thread context for all new processors. */ + cpu_count = 1; + for (cpu = 0; cpu < NR_CPUS; ++cpu) { + struct task_struct *idle; + + if (cpu == boot_cpu) + continue; + + if (!cpu_possible(cpu)) { + /* + * Make this processor do nothing on boot. + * Note that we don't give the boot_pc function + * a stack, so it has to be assembly code. + */ + per_cpu(boot_sp, cpu) = 0; + per_cpu(boot_pc, cpu) = (unsigned long) smp_nap; + continue; + } + + /* Create a new idle thread to run start_secondary() */ + idle = fork_idle(cpu); + if (IS_ERR(idle)) + panic("failed fork for CPU %d", cpu); + idle->thread.pc = (unsigned long) start_secondary; + + /* Make this thread the boot thread for this processor */ + per_cpu(boot_sp, cpu) = task_ksp0(idle); + per_cpu(boot_pc, cpu) = idle->thread.pc; + + ++cpu_count; + } + BUG_ON(cpu_count > (max_cpus ? max_cpus : 1)); + + /* Fire up the other tiles, if any */ + init_cpu_present(cpu_possible_mask); + if (cpumask_weight(cpu_present_mask) > 1) { + mb(); /* make sure all data is visible to new processors */ + hv_start_all_tiles(); + } +} + +static __initdata struct cpumask init_affinity; + +static __init int reset_init_affinity(void) +{ + long rc = sched_setaffinity(current->pid, &init_affinity); + if (rc != 0) + pr_warning("couldn't reset init affinity (%ld)\n", + rc); + return 0; +} +late_initcall(reset_init_affinity); + +static struct cpumask cpu_started __cpuinitdata; + +/* + * Activate a secondary processor. Very minimal; don't add anything + * to this path without knowing what you're doing, since SMP booting + * is pretty fragile. + */ +static void __cpuinit start_secondary(void) +{ + int cpuid = smp_processor_id(); + + /* Set our thread pointer appropriately. */ + set_my_cpu_offset(__per_cpu_offset[cpuid]); + + preempt_disable(); + + /* + * In large machines even this will slow us down, since we + * will be contending for for the printk spinlock. + */ + /* printk(KERN_DEBUG "Initializing CPU#%d\n", cpuid); */ + + /* Initialize the current asid for our first page table. */ + __get_cpu_var(current_asid) = min_asid; + + /* Set up this thread as another owner of the init_mm */ + atomic_inc(&init_mm.mm_count); + current->active_mm = &init_mm; + if (current->mm) + BUG(); + enter_lazy_tlb(&init_mm, current); + + /* Allow hypervisor messages to be received */ + init_messaging(); + local_irq_enable(); + + /* Indicate that we're ready to come up. */ + /* Must not do this before we're ready to receive messages */ + if (cpumask_test_and_set_cpu(cpuid, &cpu_started)) { + pr_warning("CPU#%d already started!\n", cpuid); + for (;;) + local_irq_enable(); + } + + smp_nap(); +} + +/* + * Bring a secondary processor online. + */ +void __cpuinit online_secondary(void) +{ + /* + * low-memory mappings have been cleared, flush them from + * the local TLBs too. + */ + local_flush_tlb(); + + BUG_ON(in_interrupt()); + + /* This must be done before setting cpu_online_mask */ + wmb(); + + notify_cpu_starting(smp_processor_id()); + + /* + * We need to hold call_lock, so there is no inconsistency + * between the time smp_call_function() determines number of + * IPI recipients, and the time when the determination is made + * for which cpus receive the IPI. Holding this + * lock helps us to not include this cpu in a currently in progress + * smp_call_function(). + */ + ipi_call_lock(); + set_cpu_online(smp_processor_id(), 1); + ipi_call_unlock(); + __get_cpu_var(cpu_state) = CPU_ONLINE; + + /* Set up tile-specific state for this cpu. */ + setup_cpu(0); + + /* Set up tile-timer clock-event device on this cpu */ + setup_tile_timer(); + + preempt_enable(); + + cpu_idle(); +} + +int __cpuinit __cpu_up(unsigned int cpu) +{ + /* Wait 5s total for all CPUs for them to come online */ + static int timeout; + for (; !cpumask_test_cpu(cpu, &cpu_started); timeout++) { + if (timeout >= 50000) { + pr_info("skipping unresponsive cpu%d\n", cpu); + local_irq_enable(); + return -EIO; + } + udelay(100); + } + + local_irq_enable(); + per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; + + /* Unleash the CPU! */ + send_IPI_single(cpu, MSG_TAG_START_CPU); + while (!cpumask_test_cpu(cpu, cpu_online_mask)) + cpu_relax(); + return 0; +} + +static void panic_start_cpu(void) +{ + panic("Received a MSG_START_CPU IPI after boot finished."); +} + +void __init smp_cpus_done(unsigned int max_cpus) +{ + int cpu, next, rc; + + /* Reset the response to a (now illegal) MSG_START_CPU IPI. */ + start_cpu_function_addr = (unsigned long) &panic_start_cpu; + + cpumask_copy(&init_affinity, cpu_online_mask); + + /* + * Pin ourselves to a single cpu in the initial affinity set + * so that kernel mappings for the rootfs are not in the dataplane, + * if set, and to avoid unnecessary migrating during bringup. + * Use the last cpu just in case the whole chip has been + * isolated from the scheduler, to keep init away from likely + * more useful user code. This also ensures that work scheduled + * via schedule_delayed_work() in the init routines will land + * on this cpu. + */ + for (cpu = cpumask_first(&init_affinity); + (next = cpumask_next(cpu, &init_affinity)) < nr_cpu_ids; + cpu = next) + ; + rc = sched_setaffinity(current->pid, cpumask_of(cpu)); + if (rc != 0) + pr_err("Couldn't set init affinity to cpu %d (%d)\n", cpu, rc); +} diff --git a/arch/tile/kernel/stack.c b/arch/tile/kernel/stack.c new file mode 100644 index 00000000..b2f44c28 --- /dev/null +++ b/arch/tile/kernel/stack.c @@ -0,0 +1,491 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/kprobes.h> +#include <linux/module.h> +#include <linux/pfn.h> +#include <linux/kallsyms.h> +#include <linux/stacktrace.h> +#include <linux/uaccess.h> +#include <linux/mmzone.h> +#include <linux/dcache.h> +#include <linux/fs.h> +#include <asm/backtrace.h> +#include <asm/page.h> +#include <asm/ucontext.h> +#include <asm/switch_to.h> +#include <asm/sigframe.h> +#include <asm/stack.h> +#include <arch/abi.h> +#include <arch/interrupts.h> + +#define KBT_ONGOING 0 /* Backtrace still ongoing */ +#define KBT_DONE 1 /* Backtrace cleanly completed */ +#define KBT_RUNNING 2 /* Can't run backtrace on a running task */ +#define KBT_LOOP 3 /* Backtrace entered a loop */ + +/* Is address on the specified kernel stack? */ +static int in_kernel_stack(struct KBacktraceIterator *kbt, unsigned long sp) +{ + ulong kstack_base = (ulong) kbt->task->stack; + if (kstack_base == 0) /* corrupt task pointer; just follow stack... */ + return sp >= PAGE_OFFSET && sp < (unsigned long)high_memory; + return sp >= kstack_base && sp < kstack_base + THREAD_SIZE; +} + +/* Callback for backtracer; basically a glorified memcpy */ +static bool read_memory_func(void *result, unsigned long address, + unsigned int size, void *vkbt) +{ + int retval; + struct KBacktraceIterator *kbt = (struct KBacktraceIterator *)vkbt; + + if (address == 0) + return 0; + if (__kernel_text_address(address)) { + /* OK to read kernel code. */ + } else if (address >= PAGE_OFFSET) { + /* We only tolerate kernel-space reads of this task's stack */ + if (!in_kernel_stack(kbt, address)) + return 0; + } else if (!kbt->is_current) { + return 0; /* can't read from other user address spaces */ + } + pagefault_disable(); + retval = __copy_from_user_inatomic(result, + (void __user __force *)address, + size); + pagefault_enable(); + return (retval == 0); +} + +/* Return a pt_regs pointer for a valid fault handler frame */ +static struct pt_regs *valid_fault_handler(struct KBacktraceIterator* kbt) +{ + const char *fault = NULL; /* happy compiler */ + char fault_buf[64]; + unsigned long sp = kbt->it.sp; + struct pt_regs *p; + + if (sp % sizeof(long) != 0) + return NULL; + if (!in_kernel_stack(kbt, sp)) + return NULL; + if (!in_kernel_stack(kbt, sp + C_ABI_SAVE_AREA_SIZE + PTREGS_SIZE-1)) + return NULL; + p = (struct pt_regs *)(sp + C_ABI_SAVE_AREA_SIZE); + if (p->faultnum == INT_SWINT_1 || p->faultnum == INT_SWINT_1_SIGRETURN) + fault = "syscall"; + else { + if (kbt->verbose) { /* else we aren't going to use it */ + snprintf(fault_buf, sizeof(fault_buf), + "interrupt %ld", p->faultnum); + fault = fault_buf; + } + } + if (EX1_PL(p->ex1) == KERNEL_PL && + __kernel_text_address(p->pc) && + in_kernel_stack(kbt, p->sp) && + p->sp >= sp) { + if (kbt->verbose) + pr_err(" <%s while in kernel mode>\n", fault); + } else if (EX1_PL(p->ex1) == USER_PL && + p->pc < PAGE_OFFSET && + p->sp < PAGE_OFFSET) { + if (kbt->verbose) + pr_err(" <%s while in user mode>\n", fault); + } else if (kbt->verbose) { + pr_err(" (odd fault: pc %#lx, sp %#lx, ex1 %#lx?)\n", + p->pc, p->sp, p->ex1); + p = NULL; + } + if (!kbt->profile || (INT_MASK(p->faultnum) & QUEUED_INTERRUPTS) == 0) + return p; + return NULL; +} + +/* Is the pc pointing to a sigreturn trampoline? */ +static int is_sigreturn(unsigned long pc) +{ + return (pc == VDSO_BASE); +} + +/* Return a pt_regs pointer for a valid signal handler frame */ +static struct pt_regs *valid_sigframe(struct KBacktraceIterator* kbt, + struct rt_sigframe* kframe) +{ + BacktraceIterator *b = &kbt->it; + + if (b->pc == VDSO_BASE && b->sp < PAGE_OFFSET && + b->sp % sizeof(long) == 0) { + int retval; + pagefault_disable(); + retval = __copy_from_user_inatomic( + kframe, (void __user __force *)b->sp, + sizeof(*kframe)); + pagefault_enable(); + if (retval != 0 || + (unsigned int)(kframe->info.si_signo) >= _NSIG) + return NULL; + if (kbt->verbose) { + pr_err(" <received signal %d>\n", + kframe->info.si_signo); + } + return (struct pt_regs *)&kframe->uc.uc_mcontext; + } + return NULL; +} + +static int KBacktraceIterator_is_sigreturn(struct KBacktraceIterator *kbt) +{ + return is_sigreturn(kbt->it.pc); +} + +static int KBacktraceIterator_restart(struct KBacktraceIterator *kbt) +{ + struct pt_regs *p; + struct rt_sigframe kframe; + + p = valid_fault_handler(kbt); + if (p == NULL) + p = valid_sigframe(kbt, &kframe); + if (p == NULL) + return 0; + backtrace_init(&kbt->it, read_memory_func, kbt, + p->pc, p->lr, p->sp, p->regs[52]); + kbt->new_context = 1; + return 1; +} + +/* Find a frame that isn't a sigreturn, if there is one. */ +static int KBacktraceIterator_next_item_inclusive( + struct KBacktraceIterator *kbt) +{ + for (;;) { + do { + if (!KBacktraceIterator_is_sigreturn(kbt)) + return KBT_ONGOING; + } while (backtrace_next(&kbt->it)); + + if (!KBacktraceIterator_restart(kbt)) + return KBT_DONE; + } +} + +/* + * If the current sp is on a page different than what we recorded + * as the top-of-kernel-stack last time we context switched, we have + * probably blown the stack, and nothing is going to work out well. + * If we can at least get out a warning, that may help the debug, + * though we probably won't be able to backtrace into the code that + * actually did the recursive damage. + */ +static void validate_stack(struct pt_regs *regs) +{ + int cpu = smp_processor_id(); + unsigned long ksp0 = get_current_ksp0(); + unsigned long ksp0_base = ksp0 - THREAD_SIZE; + unsigned long sp = stack_pointer; + + if (EX1_PL(regs->ex1) == KERNEL_PL && regs->sp >= ksp0) { + pr_err("WARNING: cpu %d: kernel stack page %#lx underrun!\n" + " sp %#lx (%#lx in caller), caller pc %#lx, lr %#lx\n", + cpu, ksp0_base, sp, regs->sp, regs->pc, regs->lr); + } + + else if (sp < ksp0_base + sizeof(struct thread_info)) { + pr_err("WARNING: cpu %d: kernel stack page %#lx overrun!\n" + " sp %#lx (%#lx in caller), caller pc %#lx, lr %#lx\n", + cpu, ksp0_base, sp, regs->sp, regs->pc, regs->lr); + } +} + +void KBacktraceIterator_init(struct KBacktraceIterator *kbt, + struct task_struct *t, struct pt_regs *regs) +{ + unsigned long pc, lr, sp, r52; + int is_current; + + /* + * Set up callback information. We grab the kernel stack base + * so we will allow reads of that address range. + */ + is_current = (t == NULL || t == current); + kbt->is_current = is_current; + if (is_current) + t = validate_current(); + kbt->task = t; + kbt->verbose = 0; /* override in caller if desired */ + kbt->profile = 0; /* override in caller if desired */ + kbt->end = KBT_ONGOING; + kbt->new_context = 1; + if (is_current) + validate_stack(regs); + + if (regs == NULL) { + if (is_current || t->state == TASK_RUNNING) { + /* Can't do this; we need registers */ + kbt->end = KBT_RUNNING; + return; + } + pc = get_switch_to_pc(); + lr = t->thread.pc; + sp = t->thread.ksp; + r52 = 0; + } else { + pc = regs->pc; + lr = regs->lr; + sp = regs->sp; + r52 = regs->regs[52]; + } + + backtrace_init(&kbt->it, read_memory_func, kbt, pc, lr, sp, r52); + kbt->end = KBacktraceIterator_next_item_inclusive(kbt); +} +EXPORT_SYMBOL(KBacktraceIterator_init); + +int KBacktraceIterator_end(struct KBacktraceIterator *kbt) +{ + return kbt->end != KBT_ONGOING; +} +EXPORT_SYMBOL(KBacktraceIterator_end); + +void KBacktraceIterator_next(struct KBacktraceIterator *kbt) +{ + unsigned long old_pc = kbt->it.pc, old_sp = kbt->it.sp; + kbt->new_context = 0; + if (!backtrace_next(&kbt->it) && !KBacktraceIterator_restart(kbt)) { + kbt->end = KBT_DONE; + return; + } + kbt->end = KBacktraceIterator_next_item_inclusive(kbt); + if (old_pc == kbt->it.pc && old_sp == kbt->it.sp) { + /* Trapped in a loop; give up. */ + kbt->end = KBT_LOOP; + } +} +EXPORT_SYMBOL(KBacktraceIterator_next); + +static void describe_addr(struct KBacktraceIterator *kbt, + unsigned long address, + int have_mmap_sem, char *buf, size_t bufsize) +{ + struct vm_area_struct *vma; + size_t namelen, remaining; + unsigned long size, offset, adjust; + char *p, *modname; + const char *name; + int rc; + + /* + * Look one byte back for every caller frame (i.e. those that + * aren't a new context) so we look up symbol data for the + * call itself, not the following instruction, which may be on + * a different line (or in a different function). + */ + adjust = !kbt->new_context; + address -= adjust; + + if (address >= PAGE_OFFSET) { + /* Handle kernel symbols. */ + BUG_ON(bufsize < KSYM_NAME_LEN); + name = kallsyms_lookup(address, &size, &offset, + &modname, buf); + if (name == NULL) { + buf[0] = '\0'; + return; + } + namelen = strlen(buf); + remaining = (bufsize - 1) - namelen; + p = buf + namelen; + rc = snprintf(p, remaining, "+%#lx/%#lx ", + offset + adjust, size); + if (modname && rc < remaining) + snprintf(p + rc, remaining - rc, "[%s] ", modname); + buf[bufsize-1] = '\0'; + return; + } + + /* If we don't have the mmap_sem, we can't show any more info. */ + buf[0] = '\0'; + if (!have_mmap_sem) + return; + + /* Find vma info. */ + vma = find_vma(kbt->task->mm, address); + if (vma == NULL || address < vma->vm_start) { + snprintf(buf, bufsize, "[unmapped address] "); + return; + } + + if (vma->vm_file) { + char *s; + p = d_path(&vma->vm_file->f_path, buf, bufsize); + if (IS_ERR(p)) + p = "?"; + s = strrchr(p, '/'); + if (s) + p = s+1; + } else { + p = "anon"; + } + + /* Generate a string description of the vma info. */ + namelen = strlen(p); + remaining = (bufsize - 1) - namelen; + memmove(buf, p, namelen); + snprintf(buf + namelen, remaining, "[%lx+%lx] ", + vma->vm_start, vma->vm_end - vma->vm_start); +} + +/* + * This method wraps the backtracer's more generic support. + * It is only invoked from the architecture-specific code; show_stack() + * and dump_stack() (in entry.S) are architecture-independent entry points. + */ +void tile_show_stack(struct KBacktraceIterator *kbt, int headers) +{ + int i; + int have_mmap_sem = 0; + + if (headers) { + /* + * Add a blank line since if we are called from panic(), + * then bust_spinlocks() spit out a space in front of us + * and it will mess up our KERN_ERR. + */ + pr_err("\n"); + pr_err("Starting stack dump of tid %d, pid %d (%s)" + " on cpu %d at cycle %lld\n", + kbt->task->pid, kbt->task->tgid, kbt->task->comm, + smp_processor_id(), get_cycles()); + } + kbt->verbose = 1; + i = 0; + for (; !KBacktraceIterator_end(kbt); KBacktraceIterator_next(kbt)) { + char namebuf[KSYM_NAME_LEN+100]; + unsigned long address = kbt->it.pc; + + /* Try to acquire the mmap_sem as we pass into userspace. */ + if (address < PAGE_OFFSET && !have_mmap_sem && kbt->task->mm) + have_mmap_sem = + down_read_trylock(&kbt->task->mm->mmap_sem); + + describe_addr(kbt, address, have_mmap_sem, + namebuf, sizeof(namebuf)); + + pr_err(" frame %d: 0x%lx %s(sp 0x%lx)\n", + i++, address, namebuf, (unsigned long)(kbt->it.sp)); + + if (i >= 100) { + pr_err("Stack dump truncated" + " (%d frames)\n", i); + break; + } + } + if (kbt->end == KBT_LOOP) + pr_err("Stack dump stopped; next frame identical to this one\n"); + if (headers) + pr_err("Stack dump complete\n"); + if (have_mmap_sem) + up_read(&kbt->task->mm->mmap_sem); +} +EXPORT_SYMBOL(tile_show_stack); + + +/* This is called from show_regs() and _dump_stack() */ +void dump_stack_regs(struct pt_regs *regs) +{ + struct KBacktraceIterator kbt; + KBacktraceIterator_init(&kbt, NULL, regs); + tile_show_stack(&kbt, 1); +} +EXPORT_SYMBOL(dump_stack_regs); + +static struct pt_regs *regs_to_pt_regs(struct pt_regs *regs, + ulong pc, ulong lr, ulong sp, ulong r52) +{ + memset(regs, 0, sizeof(struct pt_regs)); + regs->pc = pc; + regs->lr = lr; + regs->sp = sp; + regs->regs[52] = r52; + return regs; +} + +/* This is called from dump_stack() and just converts to pt_regs */ +void _dump_stack(int dummy, ulong pc, ulong lr, ulong sp, ulong r52) +{ + struct pt_regs regs; + dump_stack_regs(regs_to_pt_regs(®s, pc, lr, sp, r52)); +} + +/* This is called from KBacktraceIterator_init_current() */ +void _KBacktraceIterator_init_current(struct KBacktraceIterator *kbt, ulong pc, + ulong lr, ulong sp, ulong r52) +{ + struct pt_regs regs; + KBacktraceIterator_init(kbt, NULL, + regs_to_pt_regs(®s, pc, lr, sp, r52)); +} + +/* This is called only from kernel/sched.c, with esp == NULL */ +void show_stack(struct task_struct *task, unsigned long *esp) +{ + struct KBacktraceIterator kbt; + if (task == NULL || task == current) + KBacktraceIterator_init_current(&kbt); + else + KBacktraceIterator_init(&kbt, task, NULL); + tile_show_stack(&kbt, 0); +} + +#ifdef CONFIG_STACKTRACE + +/* Support generic Linux stack API too */ + +void save_stack_trace_tsk(struct task_struct *task, struct stack_trace *trace) +{ + struct KBacktraceIterator kbt; + int skip = trace->skip; + int i = 0; + + if (task == NULL || task == current) + KBacktraceIterator_init_current(&kbt); + else + KBacktraceIterator_init(&kbt, task, NULL); + for (; !KBacktraceIterator_end(&kbt); KBacktraceIterator_next(&kbt)) { + if (skip) { + --skip; + continue; + } + if (i >= trace->max_entries || kbt.it.pc < PAGE_OFFSET) + break; + trace->entries[i++] = kbt.it.pc; + } + trace->nr_entries = i; +} +EXPORT_SYMBOL(save_stack_trace_tsk); + +void save_stack_trace(struct stack_trace *trace) +{ + save_stack_trace_tsk(NULL, trace); +} + +#endif + +/* In entry.S */ +EXPORT_SYMBOL(KBacktraceIterator_init_current); diff --git a/arch/tile/kernel/sys.c b/arch/tile/kernel/sys.c new file mode 100644 index 00000000..cb44ba7c --- /dev/null +++ b/arch/tile/kernel/sys.c @@ -0,0 +1,119 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * This file contains various random system calls that + * have a non-standard calling sequence on the Linux/TILE + * platform. + */ + +#include <linux/errno.h> +#include <linux/sched.h> +#include <linux/mm.h> +#include <linux/smp.h> +#include <linux/syscalls.h> +#include <linux/mman.h> +#include <linux/file.h> +#include <linux/mempolicy.h> +#include <linux/binfmts.h> +#include <linux/fs.h> +#include <linux/compat.h> +#include <linux/uaccess.h> +#include <linux/signal.h> +#include <asm/syscalls.h> +#include <asm/pgtable.h> +#include <asm/homecache.h> +#include <arch/chip.h> + +SYSCALL_DEFINE0(flush_cache) +{ + homecache_evict(cpumask_of(smp_processor_id())); + return 0; +} + +/* + * Syscalls that pass 64-bit values on 32-bit systems normally + * pass them as (low,high) word packed into the immediately adjacent + * registers. If the low word naturally falls on an even register, + * our ABI makes it work correctly; if not, we adjust it here. + * Handling it here means we don't have to fix uclibc AND glibc AND + * any other standard libcs we want to support. + */ + +#if !defined(__tilegx__) || defined(CONFIG_COMPAT) + +ssize_t sys32_readahead(int fd, u32 offset_lo, u32 offset_hi, u32 count) +{ + return sys_readahead(fd, ((loff_t)offset_hi << 32) | offset_lo, count); +} + +int sys32_fadvise64_64(int fd, u32 offset_lo, u32 offset_hi, + u32 len_lo, u32 len_hi, int advice) +{ + return sys_fadvise64_64(fd, ((loff_t)offset_hi << 32) | offset_lo, + ((loff_t)len_hi << 32) | len_lo, advice); +} + +#endif /* 32-bit syscall wrappers */ + +/* Note: used by the compat code even in 64-bit Linux. */ +SYSCALL_DEFINE6(mmap2, unsigned long, addr, unsigned long, len, + unsigned long, prot, unsigned long, flags, + unsigned long, fd, unsigned long, off_4k) +{ +#define PAGE_ADJUST (PAGE_SHIFT - 12) + if (off_4k & ((1 << PAGE_ADJUST) - 1)) + return -EINVAL; + return sys_mmap_pgoff(addr, len, prot, flags, fd, + off_4k >> PAGE_ADJUST); +} + +#ifdef __tilegx__ +SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len, + unsigned long, prot, unsigned long, flags, + unsigned long, fd, off_t, offset) +{ + if (offset & ((1 << PAGE_SHIFT) - 1)) + return -EINVAL; + return sys_mmap_pgoff(addr, len, prot, flags, fd, + offset >> PAGE_SHIFT); +} +#endif + + +/* Provide the actual syscall number to call mapping. */ +#undef __SYSCALL +#define __SYSCALL(nr, call) [nr] = (call), + +#ifndef __tilegx__ +/* See comments at the top of the file. */ +#define sys_fadvise64_64 sys32_fadvise64_64 +#define sys_readahead sys32_readahead +#endif + +/* Call the trampolines to manage pt_regs where necessary. */ +#define sys_execve _sys_execve +#define sys_sigaltstack _sys_sigaltstack +#define sys_rt_sigreturn _sys_rt_sigreturn +#define sys_clone _sys_clone +#ifndef __tilegx__ +#define sys_cmpxchg_badaddr _sys_cmpxchg_badaddr +#endif + +/* + * Note that we can't include <linux/unistd.h> here since the header + * guard will defeat us; <asm/unistd.h> checks for __SYSCALL as well. + */ +void *sys_call_table[__NR_syscalls] = { + [0 ... __NR_syscalls-1] = sys_ni_syscall, +#include <asm/unistd.h> +}; diff --git a/arch/tile/kernel/sysfs.c b/arch/tile/kernel/sysfs.c new file mode 100644 index 00000000..71ae728e --- /dev/null +++ b/arch/tile/kernel/sysfs.c @@ -0,0 +1,185 @@ +/* + * Copyright 2011 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * /sys entry support. + */ + +#include <linux/device.h> +#include <linux/cpu.h> +#include <linux/slab.h> +#include <linux/smp.h> +#include <linux/stat.h> +#include <hv/hypervisor.h> + +/* Return a string queried from the hypervisor, truncated to page size. */ +static ssize_t get_hv_confstr(char *page, int query) +{ + ssize_t n = hv_confstr(query, (unsigned long)page, PAGE_SIZE - 1); + n = n < 0 ? 0 : min(n, (ssize_t)PAGE_SIZE - 1) - 1; + if (n) + page[n++] = '\n'; + page[n] = '\0'; + return n; +} + +static ssize_t chip_width_show(struct device *dev, + struct device_attribute *attr, + char *page) +{ + return sprintf(page, "%u\n", smp_width); +} +static DEVICE_ATTR(chip_width, 0444, chip_width_show, NULL); + +static ssize_t chip_height_show(struct device *dev, + struct device_attribute *attr, + char *page) +{ + return sprintf(page, "%u\n", smp_height); +} +static DEVICE_ATTR(chip_height, 0444, chip_height_show, NULL); + +static ssize_t chip_serial_show(struct device *dev, + struct device_attribute *attr, + char *page) +{ + return get_hv_confstr(page, HV_CONFSTR_CHIP_SERIAL_NUM); +} +static DEVICE_ATTR(chip_serial, 0444, chip_serial_show, NULL); + +static ssize_t chip_revision_show(struct device *dev, + struct device_attribute *attr, + char *page) +{ + return get_hv_confstr(page, HV_CONFSTR_CHIP_REV); +} +static DEVICE_ATTR(chip_revision, 0444, chip_revision_show, NULL); + + +static ssize_t type_show(struct device *dev, + struct device_attribute *attr, + char *page) +{ + return sprintf(page, "tilera\n"); +} +static DEVICE_ATTR(type, 0444, type_show, NULL); + +#define HV_CONF_ATTR(name, conf) \ + static ssize_t name ## _show(struct device *dev, \ + struct device_attribute *attr, \ + char *page) \ + { \ + return get_hv_confstr(page, conf); \ + } \ + static DEVICE_ATTR(name, 0444, name ## _show, NULL); + +HV_CONF_ATTR(version, HV_CONFSTR_HV_SW_VER) +HV_CONF_ATTR(config_version, HV_CONFSTR_HV_CONFIG_VER) + +HV_CONF_ATTR(board_part, HV_CONFSTR_BOARD_PART_NUM) +HV_CONF_ATTR(board_serial, HV_CONFSTR_BOARD_SERIAL_NUM) +HV_CONF_ATTR(board_revision, HV_CONFSTR_BOARD_REV) +HV_CONF_ATTR(board_description, HV_CONFSTR_BOARD_DESC) +HV_CONF_ATTR(mezz_part, HV_CONFSTR_MEZZ_PART_NUM) +HV_CONF_ATTR(mezz_serial, HV_CONFSTR_MEZZ_SERIAL_NUM) +HV_CONF_ATTR(mezz_revision, HV_CONFSTR_MEZZ_REV) +HV_CONF_ATTR(mezz_description, HV_CONFSTR_MEZZ_DESC) +HV_CONF_ATTR(switch_control, HV_CONFSTR_SWITCH_CONTROL) + +static struct attribute *board_attrs[] = { + &dev_attr_board_part.attr, + &dev_attr_board_serial.attr, + &dev_attr_board_revision.attr, + &dev_attr_board_description.attr, + &dev_attr_mezz_part.attr, + &dev_attr_mezz_serial.attr, + &dev_attr_mezz_revision.attr, + &dev_attr_mezz_description.attr, + &dev_attr_switch_control.attr, + NULL +}; + +static struct attribute_group board_attr_group = { + .name = "board", + .attrs = board_attrs, +}; + + +static struct bin_attribute hvconfig_bin; + +static ssize_t +hvconfig_bin_read(struct file *filp, struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) +{ + static size_t size; + + /* Lazily learn the true size (minus the trailing NUL). */ + if (size == 0) + size = hv_confstr(HV_CONFSTR_HV_CONFIG, 0, 0) - 1; + + /* Check and adjust input parameters. */ + if (off > size) + return -EINVAL; + if (count > size - off) + count = size - off; + + if (count) { + /* Get a copy of the hvc and copy out the relevant portion. */ + char *hvc; + + size = off + count; + hvc = kmalloc(size, GFP_KERNEL); + if (hvc == NULL) + return -ENOMEM; + hv_confstr(HV_CONFSTR_HV_CONFIG, (unsigned long)hvc, size); + memcpy(buf, hvc + off, count); + kfree(hvc); + } + + return count; +} + +static int __init create_sysfs_entries(void) +{ + int err = 0; + +#define create_cpu_attr(name) \ + if (!err) \ + err = device_create_file(cpu_subsys.dev_root, &dev_attr_##name); + create_cpu_attr(chip_width); + create_cpu_attr(chip_height); + create_cpu_attr(chip_serial); + create_cpu_attr(chip_revision); + +#define create_hv_attr(name) \ + if (!err) \ + err = sysfs_create_file(hypervisor_kobj, &dev_attr_##name.attr); + create_hv_attr(type); + create_hv_attr(version); + create_hv_attr(config_version); + + if (!err) + err = sysfs_create_group(hypervisor_kobj, &board_attr_group); + + if (!err) { + sysfs_bin_attr_init(&hvconfig_bin); + hvconfig_bin.attr.name = "hvconfig"; + hvconfig_bin.attr.mode = S_IRUGO; + hvconfig_bin.read = hvconfig_bin_read; + hvconfig_bin.size = PAGE_SIZE; + err = sysfs_create_bin_file(hypervisor_kobj, &hvconfig_bin); + } + + return err; +} +subsys_initcall(create_sysfs_entries); diff --git a/arch/tile/kernel/tile-desc_32.c b/arch/tile/kernel/tile-desc_32.c new file mode 100644 index 00000000..dd7bd1d8 --- /dev/null +++ b/arch/tile/kernel/tile-desc_32.c @@ -0,0 +1,2605 @@ +/* TILEPro opcode information. + * + * Copyright 2011 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * + * + * + * + */ + +/* This define is BFD_RELOC_##x for real bfd, or -1 for everyone else. */ +#define BFD_RELOC(x) -1 + +/* Special registers. */ +#define TREG_LR 55 +#define TREG_SN 56 +#define TREG_ZERO 63 + +#include <linux/stddef.h> +#include <asm/tile-desc.h> + +const struct tilepro_opcode tilepro_opcodes[395] = +{ + { "bpt", TILEPRO_OPC_BPT, 0x2, 0, TREG_ZERO, 0, + { { 0, }, { }, { 0, }, { 0, }, { 0, } }, + }, + { "info", TILEPRO_OPC_INFO, 0xf, 1, TREG_ZERO, 1, + { { 0 }, { 1 }, { 2 }, { 3 }, { 0, } }, + }, + { "infol", TILEPRO_OPC_INFOL, 0x3, 1, TREG_ZERO, 1, + { { 4 }, { 5 }, { 0, }, { 0, }, { 0, } }, + }, + { "j", TILEPRO_OPC_J, 0x2, 1, TREG_ZERO, 1, + { { 0, }, { 6 }, { 0, }, { 0, }, { 0, } }, + }, + { "jal", TILEPRO_OPC_JAL, 0x2, 1, TREG_LR, 1, + { { 0, }, { 6 }, { 0, }, { 0, }, { 0, } }, + }, + { "move", TILEPRO_OPC_MOVE, 0xf, 2, TREG_ZERO, 1, + { { 7, 8 }, { 9, 10 }, { 11, 12 }, { 13, 14 }, { 0, } }, + }, + { "move.sn", TILEPRO_OPC_MOVE_SN, 0x3, 2, TREG_SN, 1, + { { 7, 8 }, { 9, 10 }, { 0, }, { 0, }, { 0, } }, + }, + { "movei", TILEPRO_OPC_MOVEI, 0xf, 2, TREG_ZERO, 1, + { { 7, 0 }, { 9, 1 }, { 11, 2 }, { 13, 3 }, { 0, } }, + }, + { "movei.sn", TILEPRO_OPC_MOVEI_SN, 0x3, 2, TREG_SN, 1, + { { 7, 0 }, { 9, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "moveli", TILEPRO_OPC_MOVELI, 0x3, 2, TREG_ZERO, 1, + { { 7, 4 }, { 9, 5 }, { 0, }, { 0, }, { 0, } }, + }, + { "moveli.sn", TILEPRO_OPC_MOVELI_SN, 0x3, 2, TREG_SN, 1, + { { 7, 4 }, { 9, 5 }, { 0, }, { 0, }, { 0, } }, + }, + { "movelis", TILEPRO_OPC_MOVELIS, 0x3, 2, TREG_SN, 1, + { { 7, 4 }, { 9, 5 }, { 0, }, { 0, }, { 0, } }, + }, + { "prefetch", TILEPRO_OPC_PREFETCH, 0x12, 1, TREG_ZERO, 1, + { { 0, }, { 10 }, { 0, }, { 0, }, { 15 } }, + }, + { "raise", TILEPRO_OPC_RAISE, 0x2, 0, TREG_ZERO, 1, + { { 0, }, { }, { 0, }, { 0, }, { 0, } }, + }, + { "add", TILEPRO_OPC_ADD, 0xf, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 11, 12, 18 }, { 13, 14, 19 }, { 0, } }, + }, + { "add.sn", TILEPRO_OPC_ADD_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "addb", TILEPRO_OPC_ADDB, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "addb.sn", TILEPRO_OPC_ADDB_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "addbs_u", TILEPRO_OPC_ADDBS_U, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "addbs_u.sn", TILEPRO_OPC_ADDBS_U_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "addh", TILEPRO_OPC_ADDH, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "addh.sn", TILEPRO_OPC_ADDH_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "addhs", TILEPRO_OPC_ADDHS, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "addhs.sn", TILEPRO_OPC_ADDHS_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "addi", TILEPRO_OPC_ADDI, 0xf, 3, TREG_ZERO, 1, + { { 7, 8, 0 }, { 9, 10, 1 }, { 11, 12, 2 }, { 13, 14, 3 }, { 0, } }, + }, + { "addi.sn", TILEPRO_OPC_ADDI_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "addib", TILEPRO_OPC_ADDIB, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "addib.sn", TILEPRO_OPC_ADDIB_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "addih", TILEPRO_OPC_ADDIH, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "addih.sn", TILEPRO_OPC_ADDIH_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "addli", TILEPRO_OPC_ADDLI, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 4 }, { 9, 10, 5 }, { 0, }, { 0, }, { 0, } }, + }, + { "addli.sn", TILEPRO_OPC_ADDLI_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 4 }, { 9, 10, 5 }, { 0, }, { 0, }, { 0, } }, + }, + { "addlis", TILEPRO_OPC_ADDLIS, 0x3, 3, TREG_SN, 1, + { { 7, 8, 4 }, { 9, 10, 5 }, { 0, }, { 0, }, { 0, } }, + }, + { "adds", TILEPRO_OPC_ADDS, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "adds.sn", TILEPRO_OPC_ADDS_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "adiffb_u", TILEPRO_OPC_ADIFFB_U, 0x1, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "adiffb_u.sn", TILEPRO_OPC_ADIFFB_U_SN, 0x1, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "adiffh", TILEPRO_OPC_ADIFFH, 0x1, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "adiffh.sn", TILEPRO_OPC_ADIFFH_SN, 0x1, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "and", TILEPRO_OPC_AND, 0xf, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 11, 12, 18 }, { 13, 14, 19 }, { 0, } }, + }, + { "and.sn", TILEPRO_OPC_AND_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "andi", TILEPRO_OPC_ANDI, 0xf, 3, TREG_ZERO, 1, + { { 7, 8, 0 }, { 9, 10, 1 }, { 11, 12, 2 }, { 13, 14, 3 }, { 0, } }, + }, + { "andi.sn", TILEPRO_OPC_ANDI_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "auli", TILEPRO_OPC_AULI, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 4 }, { 9, 10, 5 }, { 0, }, { 0, }, { 0, } }, + }, + { "avgb_u", TILEPRO_OPC_AVGB_U, 0x1, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "avgb_u.sn", TILEPRO_OPC_AVGB_U_SN, 0x1, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "avgh", TILEPRO_OPC_AVGH, 0x1, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "avgh.sn", TILEPRO_OPC_AVGH_SN, 0x1, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "bbns", TILEPRO_OPC_BBNS, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "bbns.sn", TILEPRO_OPC_BBNS_SN, 0x2, 2, TREG_SN, 1, + { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "bbnst", TILEPRO_OPC_BBNST, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "bbnst.sn", TILEPRO_OPC_BBNST_SN, 0x2, 2, TREG_SN, 1, + { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "bbs", TILEPRO_OPC_BBS, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "bbs.sn", TILEPRO_OPC_BBS_SN, 0x2, 2, TREG_SN, 1, + { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "bbst", TILEPRO_OPC_BBST, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "bbst.sn", TILEPRO_OPC_BBST_SN, 0x2, 2, TREG_SN, 1, + { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "bgez", TILEPRO_OPC_BGEZ, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "bgez.sn", TILEPRO_OPC_BGEZ_SN, 0x2, 2, TREG_SN, 1, + { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "bgezt", TILEPRO_OPC_BGEZT, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "bgezt.sn", TILEPRO_OPC_BGEZT_SN, 0x2, 2, TREG_SN, 1, + { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "bgz", TILEPRO_OPC_BGZ, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "bgz.sn", TILEPRO_OPC_BGZ_SN, 0x2, 2, TREG_SN, 1, + { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "bgzt", TILEPRO_OPC_BGZT, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "bgzt.sn", TILEPRO_OPC_BGZT_SN, 0x2, 2, TREG_SN, 1, + { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "bitx", TILEPRO_OPC_BITX, 0x5, 2, TREG_ZERO, 1, + { { 7, 8 }, { 0, }, { 11, 12 }, { 0, }, { 0, } }, + }, + { "bitx.sn", TILEPRO_OPC_BITX_SN, 0x1, 2, TREG_SN, 1, + { { 7, 8 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "blez", TILEPRO_OPC_BLEZ, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "blez.sn", TILEPRO_OPC_BLEZ_SN, 0x2, 2, TREG_SN, 1, + { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "blezt", TILEPRO_OPC_BLEZT, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "blezt.sn", TILEPRO_OPC_BLEZT_SN, 0x2, 2, TREG_SN, 1, + { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "blz", TILEPRO_OPC_BLZ, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "blz.sn", TILEPRO_OPC_BLZ_SN, 0x2, 2, TREG_SN, 1, + { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "blzt", TILEPRO_OPC_BLZT, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "blzt.sn", TILEPRO_OPC_BLZT_SN, 0x2, 2, TREG_SN, 1, + { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "bnz", TILEPRO_OPC_BNZ, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "bnz.sn", TILEPRO_OPC_BNZ_SN, 0x2, 2, TREG_SN, 1, + { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "bnzt", TILEPRO_OPC_BNZT, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "bnzt.sn", TILEPRO_OPC_BNZT_SN, 0x2, 2, TREG_SN, 1, + { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "bytex", TILEPRO_OPC_BYTEX, 0x5, 2, TREG_ZERO, 1, + { { 7, 8 }, { 0, }, { 11, 12 }, { 0, }, { 0, } }, + }, + { "bytex.sn", TILEPRO_OPC_BYTEX_SN, 0x1, 2, TREG_SN, 1, + { { 7, 8 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "bz", TILEPRO_OPC_BZ, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "bz.sn", TILEPRO_OPC_BZ_SN, 0x2, 2, TREG_SN, 1, + { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "bzt", TILEPRO_OPC_BZT, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "bzt.sn", TILEPRO_OPC_BZT_SN, 0x2, 2, TREG_SN, 1, + { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "clz", TILEPRO_OPC_CLZ, 0x5, 2, TREG_ZERO, 1, + { { 7, 8 }, { 0, }, { 11, 12 }, { 0, }, { 0, } }, + }, + { "clz.sn", TILEPRO_OPC_CLZ_SN, 0x1, 2, TREG_SN, 1, + { { 7, 8 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "crc32_32", TILEPRO_OPC_CRC32_32, 0x1, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "crc32_32.sn", TILEPRO_OPC_CRC32_32_SN, 0x1, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "crc32_8", TILEPRO_OPC_CRC32_8, 0x1, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "crc32_8.sn", TILEPRO_OPC_CRC32_8_SN, 0x1, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "ctz", TILEPRO_OPC_CTZ, 0x5, 2, TREG_ZERO, 1, + { { 7, 8 }, { 0, }, { 11, 12 }, { 0, }, { 0, } }, + }, + { "ctz.sn", TILEPRO_OPC_CTZ_SN, 0x1, 2, TREG_SN, 1, + { { 7, 8 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "drain", TILEPRO_OPC_DRAIN, 0x2, 0, TREG_ZERO, 0, + { { 0, }, { }, { 0, }, { 0, }, { 0, } }, + }, + { "dtlbpr", TILEPRO_OPC_DTLBPR, 0x2, 1, TREG_ZERO, 1, + { { 0, }, { 10 }, { 0, }, { 0, }, { 0, } }, + }, + { "dword_align", TILEPRO_OPC_DWORD_ALIGN, 0x1, 3, TREG_ZERO, 1, + { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "dword_align.sn", TILEPRO_OPC_DWORD_ALIGN_SN, 0x1, 3, TREG_SN, 1, + { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "finv", TILEPRO_OPC_FINV, 0x2, 1, TREG_ZERO, 1, + { { 0, }, { 10 }, { 0, }, { 0, }, { 0, } }, + }, + { "flush", TILEPRO_OPC_FLUSH, 0x2, 1, TREG_ZERO, 1, + { { 0, }, { 10 }, { 0, }, { 0, }, { 0, } }, + }, + { "fnop", TILEPRO_OPC_FNOP, 0xf, 0, TREG_ZERO, 1, + { { }, { }, { }, { }, { 0, } }, + }, + { "icoh", TILEPRO_OPC_ICOH, 0x2, 1, TREG_ZERO, 1, + { { 0, }, { 10 }, { 0, }, { 0, }, { 0, } }, + }, + { "ill", TILEPRO_OPC_ILL, 0xa, 0, TREG_ZERO, 1, + { { 0, }, { }, { 0, }, { }, { 0, } }, + }, + { "inthb", TILEPRO_OPC_INTHB, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "inthb.sn", TILEPRO_OPC_INTHB_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "inthh", TILEPRO_OPC_INTHH, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "inthh.sn", TILEPRO_OPC_INTHH_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "intlb", TILEPRO_OPC_INTLB, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "intlb.sn", TILEPRO_OPC_INTLB_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "intlh", TILEPRO_OPC_INTLH, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "intlh.sn", TILEPRO_OPC_INTLH_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "inv", TILEPRO_OPC_INV, 0x2, 1, TREG_ZERO, 1, + { { 0, }, { 10 }, { 0, }, { 0, }, { 0, } }, + }, + { "iret", TILEPRO_OPC_IRET, 0x2, 0, TREG_ZERO, 1, + { { 0, }, { }, { 0, }, { 0, }, { 0, } }, + }, + { "jalb", TILEPRO_OPC_JALB, 0x2, 1, TREG_LR, 1, + { { 0, }, { 22 }, { 0, }, { 0, }, { 0, } }, + }, + { "jalf", TILEPRO_OPC_JALF, 0x2, 1, TREG_LR, 1, + { { 0, }, { 22 }, { 0, }, { 0, }, { 0, } }, + }, + { "jalr", TILEPRO_OPC_JALR, 0x2, 1, TREG_LR, 1, + { { 0, }, { 10 }, { 0, }, { 0, }, { 0, } }, + }, + { "jalrp", TILEPRO_OPC_JALRP, 0x2, 1, TREG_LR, 1, + { { 0, }, { 10 }, { 0, }, { 0, }, { 0, } }, + }, + { "jb", TILEPRO_OPC_JB, 0x2, 1, TREG_ZERO, 1, + { { 0, }, { 22 }, { 0, }, { 0, }, { 0, } }, + }, + { "jf", TILEPRO_OPC_JF, 0x2, 1, TREG_ZERO, 1, + { { 0, }, { 22 }, { 0, }, { 0, }, { 0, } }, + }, + { "jr", TILEPRO_OPC_JR, 0x2, 1, TREG_ZERO, 1, + { { 0, }, { 10 }, { 0, }, { 0, }, { 0, } }, + }, + { "jrp", TILEPRO_OPC_JRP, 0x2, 1, TREG_ZERO, 1, + { { 0, }, { 10 }, { 0, }, { 0, }, { 0, } }, + }, + { "lb", TILEPRO_OPC_LB, 0x12, 2, TREG_ZERO, 1, + { { 0, }, { 9, 10 }, { 0, }, { 0, }, { 23, 15 } }, + }, + { "lb.sn", TILEPRO_OPC_LB_SN, 0x2, 2, TREG_SN, 1, + { { 0, }, { 9, 10 }, { 0, }, { 0, }, { 0, } }, + }, + { "lb_u", TILEPRO_OPC_LB_U, 0x12, 2, TREG_ZERO, 1, + { { 0, }, { 9, 10 }, { 0, }, { 0, }, { 23, 15 } }, + }, + { "lb_u.sn", TILEPRO_OPC_LB_U_SN, 0x2, 2, TREG_SN, 1, + { { 0, }, { 9, 10 }, { 0, }, { 0, }, { 0, } }, + }, + { "lbadd", TILEPRO_OPC_LBADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 9, 24, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "lbadd.sn", TILEPRO_OPC_LBADD_SN, 0x2, 3, TREG_SN, 1, + { { 0, }, { 9, 24, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "lbadd_u", TILEPRO_OPC_LBADD_U, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 9, 24, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "lbadd_u.sn", TILEPRO_OPC_LBADD_U_SN, 0x2, 3, TREG_SN, 1, + { { 0, }, { 9, 24, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "lh", TILEPRO_OPC_LH, 0x12, 2, TREG_ZERO, 1, + { { 0, }, { 9, 10 }, { 0, }, { 0, }, { 23, 15 } }, + }, + { "lh.sn", TILEPRO_OPC_LH_SN, 0x2, 2, TREG_SN, 1, + { { 0, }, { 9, 10 }, { 0, }, { 0, }, { 0, } }, + }, + { "lh_u", TILEPRO_OPC_LH_U, 0x12, 2, TREG_ZERO, 1, + { { 0, }, { 9, 10 }, { 0, }, { 0, }, { 23, 15 } }, + }, + { "lh_u.sn", TILEPRO_OPC_LH_U_SN, 0x2, 2, TREG_SN, 1, + { { 0, }, { 9, 10 }, { 0, }, { 0, }, { 0, } }, + }, + { "lhadd", TILEPRO_OPC_LHADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 9, 24, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "lhadd.sn", TILEPRO_OPC_LHADD_SN, 0x2, 3, TREG_SN, 1, + { { 0, }, { 9, 24, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "lhadd_u", TILEPRO_OPC_LHADD_U, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 9, 24, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "lhadd_u.sn", TILEPRO_OPC_LHADD_U_SN, 0x2, 3, TREG_SN, 1, + { { 0, }, { 9, 24, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "lnk", TILEPRO_OPC_LNK, 0x2, 1, TREG_ZERO, 1, + { { 0, }, { 9 }, { 0, }, { 0, }, { 0, } }, + }, + { "lnk.sn", TILEPRO_OPC_LNK_SN, 0x2, 1, TREG_SN, 1, + { { 0, }, { 9 }, { 0, }, { 0, }, { 0, } }, + }, + { "lw", TILEPRO_OPC_LW, 0x12, 2, TREG_ZERO, 1, + { { 0, }, { 9, 10 }, { 0, }, { 0, }, { 23, 15 } }, + }, + { "lw.sn", TILEPRO_OPC_LW_SN, 0x2, 2, TREG_SN, 1, + { { 0, }, { 9, 10 }, { 0, }, { 0, }, { 0, } }, + }, + { "lw_na", TILEPRO_OPC_LW_NA, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 9, 10 }, { 0, }, { 0, }, { 0, } }, + }, + { "lw_na.sn", TILEPRO_OPC_LW_NA_SN, 0x2, 2, TREG_SN, 1, + { { 0, }, { 9, 10 }, { 0, }, { 0, }, { 0, } }, + }, + { "lwadd", TILEPRO_OPC_LWADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 9, 24, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "lwadd.sn", TILEPRO_OPC_LWADD_SN, 0x2, 3, TREG_SN, 1, + { { 0, }, { 9, 24, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "lwadd_na", TILEPRO_OPC_LWADD_NA, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 9, 24, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "lwadd_na.sn", TILEPRO_OPC_LWADD_NA_SN, 0x2, 3, TREG_SN, 1, + { { 0, }, { 9, 24, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "maxb_u", TILEPRO_OPC_MAXB_U, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "maxb_u.sn", TILEPRO_OPC_MAXB_U_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "maxh", TILEPRO_OPC_MAXH, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "maxh.sn", TILEPRO_OPC_MAXH_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "maxib_u", TILEPRO_OPC_MAXIB_U, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "maxib_u.sn", TILEPRO_OPC_MAXIB_U_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "maxih", TILEPRO_OPC_MAXIH, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "maxih.sn", TILEPRO_OPC_MAXIH_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "mf", TILEPRO_OPC_MF, 0x2, 0, TREG_ZERO, 1, + { { 0, }, { }, { 0, }, { 0, }, { 0, } }, + }, + { "mfspr", TILEPRO_OPC_MFSPR, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 9, 25 }, { 0, }, { 0, }, { 0, } }, + }, + { "minb_u", TILEPRO_OPC_MINB_U, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "minb_u.sn", TILEPRO_OPC_MINB_U_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "minh", TILEPRO_OPC_MINH, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "minh.sn", TILEPRO_OPC_MINH_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "minib_u", TILEPRO_OPC_MINIB_U, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "minib_u.sn", TILEPRO_OPC_MINIB_U_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "minih", TILEPRO_OPC_MINIH, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "minih.sn", TILEPRO_OPC_MINIH_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "mm", TILEPRO_OPC_MM, 0x3, 5, TREG_ZERO, 1, + { { 7, 8, 16, 26, 27 }, { 9, 10, 17, 28, 29 }, { 0, }, { 0, }, { 0, } }, + }, + { "mnz", TILEPRO_OPC_MNZ, 0xf, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 11, 12, 18 }, { 13, 14, 19 }, { 0, } }, + }, + { "mnz.sn", TILEPRO_OPC_MNZ_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "mnzb", TILEPRO_OPC_MNZB, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "mnzb.sn", TILEPRO_OPC_MNZB_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "mnzh", TILEPRO_OPC_MNZH, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "mnzh.sn", TILEPRO_OPC_MNZH_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "mtspr", TILEPRO_OPC_MTSPR, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 30, 10 }, { 0, }, { 0, }, { 0, } }, + }, + { "mulhh_ss", TILEPRO_OPC_MULHH_SS, 0x5, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 0, }, { 11, 12, 18 }, { 0, }, { 0, } }, + }, + { "mulhh_ss.sn", TILEPRO_OPC_MULHH_SS_SN, 0x1, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mulhh_su", TILEPRO_OPC_MULHH_SU, 0x1, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mulhh_su.sn", TILEPRO_OPC_MULHH_SU_SN, 0x1, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mulhh_uu", TILEPRO_OPC_MULHH_UU, 0x5, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 0, }, { 11, 12, 18 }, { 0, }, { 0, } }, + }, + { "mulhh_uu.sn", TILEPRO_OPC_MULHH_UU_SN, 0x1, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mulhha_ss", TILEPRO_OPC_MULHHA_SS, 0x5, 3, TREG_ZERO, 1, + { { 21, 8, 16 }, { 0, }, { 31, 12, 18 }, { 0, }, { 0, } }, + }, + { "mulhha_ss.sn", TILEPRO_OPC_MULHHA_SS_SN, 0x1, 3, TREG_SN, 1, + { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mulhha_su", TILEPRO_OPC_MULHHA_SU, 0x1, 3, TREG_ZERO, 1, + { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mulhha_su.sn", TILEPRO_OPC_MULHHA_SU_SN, 0x1, 3, TREG_SN, 1, + { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mulhha_uu", TILEPRO_OPC_MULHHA_UU, 0x5, 3, TREG_ZERO, 1, + { { 21, 8, 16 }, { 0, }, { 31, 12, 18 }, { 0, }, { 0, } }, + }, + { "mulhha_uu.sn", TILEPRO_OPC_MULHHA_UU_SN, 0x1, 3, TREG_SN, 1, + { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mulhhsa_uu", TILEPRO_OPC_MULHHSA_UU, 0x1, 3, TREG_ZERO, 1, + { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mulhhsa_uu.sn", TILEPRO_OPC_MULHHSA_UU_SN, 0x1, 3, TREG_SN, 1, + { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mulhl_ss", TILEPRO_OPC_MULHL_SS, 0x1, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mulhl_ss.sn", TILEPRO_OPC_MULHL_SS_SN, 0x1, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mulhl_su", TILEPRO_OPC_MULHL_SU, 0x1, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mulhl_su.sn", TILEPRO_OPC_MULHL_SU_SN, 0x1, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mulhl_us", TILEPRO_OPC_MULHL_US, 0x1, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mulhl_us.sn", TILEPRO_OPC_MULHL_US_SN, 0x1, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mulhl_uu", TILEPRO_OPC_MULHL_UU, 0x1, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mulhl_uu.sn", TILEPRO_OPC_MULHL_UU_SN, 0x1, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mulhla_ss", TILEPRO_OPC_MULHLA_SS, 0x1, 3, TREG_ZERO, 1, + { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mulhla_ss.sn", TILEPRO_OPC_MULHLA_SS_SN, 0x1, 3, TREG_SN, 1, + { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mulhla_su", TILEPRO_OPC_MULHLA_SU, 0x1, 3, TREG_ZERO, 1, + { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mulhla_su.sn", TILEPRO_OPC_MULHLA_SU_SN, 0x1, 3, TREG_SN, 1, + { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mulhla_us", TILEPRO_OPC_MULHLA_US, 0x1, 3, TREG_ZERO, 1, + { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mulhla_us.sn", TILEPRO_OPC_MULHLA_US_SN, 0x1, 3, TREG_SN, 1, + { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mulhla_uu", TILEPRO_OPC_MULHLA_UU, 0x1, 3, TREG_ZERO, 1, + { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mulhla_uu.sn", TILEPRO_OPC_MULHLA_UU_SN, 0x1, 3, TREG_SN, 1, + { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mulhlsa_uu", TILEPRO_OPC_MULHLSA_UU, 0x5, 3, TREG_ZERO, 1, + { { 21, 8, 16 }, { 0, }, { 31, 12, 18 }, { 0, }, { 0, } }, + }, + { "mulhlsa_uu.sn", TILEPRO_OPC_MULHLSA_UU_SN, 0x1, 3, TREG_SN, 1, + { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mulll_ss", TILEPRO_OPC_MULLL_SS, 0x5, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 0, }, { 11, 12, 18 }, { 0, }, { 0, } }, + }, + { "mulll_ss.sn", TILEPRO_OPC_MULLL_SS_SN, 0x1, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mulll_su", TILEPRO_OPC_MULLL_SU, 0x1, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mulll_su.sn", TILEPRO_OPC_MULLL_SU_SN, 0x1, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mulll_uu", TILEPRO_OPC_MULLL_UU, 0x5, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 0, }, { 11, 12, 18 }, { 0, }, { 0, } }, + }, + { "mulll_uu.sn", TILEPRO_OPC_MULLL_UU_SN, 0x1, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mullla_ss", TILEPRO_OPC_MULLLA_SS, 0x5, 3, TREG_ZERO, 1, + { { 21, 8, 16 }, { 0, }, { 31, 12, 18 }, { 0, }, { 0, } }, + }, + { "mullla_ss.sn", TILEPRO_OPC_MULLLA_SS_SN, 0x1, 3, TREG_SN, 1, + { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mullla_su", TILEPRO_OPC_MULLLA_SU, 0x1, 3, TREG_ZERO, 1, + { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mullla_su.sn", TILEPRO_OPC_MULLLA_SU_SN, 0x1, 3, TREG_SN, 1, + { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mullla_uu", TILEPRO_OPC_MULLLA_UU, 0x5, 3, TREG_ZERO, 1, + { { 21, 8, 16 }, { 0, }, { 31, 12, 18 }, { 0, }, { 0, } }, + }, + { "mullla_uu.sn", TILEPRO_OPC_MULLLA_UU_SN, 0x1, 3, TREG_SN, 1, + { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mulllsa_uu", TILEPRO_OPC_MULLLSA_UU, 0x1, 3, TREG_ZERO, 1, + { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mulllsa_uu.sn", TILEPRO_OPC_MULLLSA_UU_SN, 0x1, 3, TREG_SN, 1, + { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mvnz", TILEPRO_OPC_MVNZ, 0x5, 3, TREG_ZERO, 1, + { { 21, 8, 16 }, { 0, }, { 31, 12, 18 }, { 0, }, { 0, } }, + }, + { "mvnz.sn", TILEPRO_OPC_MVNZ_SN, 0x1, 3, TREG_SN, 1, + { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mvz", TILEPRO_OPC_MVZ, 0x5, 3, TREG_ZERO, 1, + { { 21, 8, 16 }, { 0, }, { 31, 12, 18 }, { 0, }, { 0, } }, + }, + { "mvz.sn", TILEPRO_OPC_MVZ_SN, 0x1, 3, TREG_SN, 1, + { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mz", TILEPRO_OPC_MZ, 0xf, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 11, 12, 18 }, { 13, 14, 19 }, { 0, } }, + }, + { "mz.sn", TILEPRO_OPC_MZ_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "mzb", TILEPRO_OPC_MZB, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "mzb.sn", TILEPRO_OPC_MZB_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "mzh", TILEPRO_OPC_MZH, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "mzh.sn", TILEPRO_OPC_MZH_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "nap", TILEPRO_OPC_NAP, 0x2, 0, TREG_ZERO, 0, + { { 0, }, { }, { 0, }, { 0, }, { 0, } }, + }, + { "nop", TILEPRO_OPC_NOP, 0xf, 0, TREG_ZERO, 1, + { { }, { }, { }, { }, { 0, } }, + }, + { "nor", TILEPRO_OPC_NOR, 0xf, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 11, 12, 18 }, { 13, 14, 19 }, { 0, } }, + }, + { "nor.sn", TILEPRO_OPC_NOR_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "or", TILEPRO_OPC_OR, 0xf, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 11, 12, 18 }, { 13, 14, 19 }, { 0, } }, + }, + { "or.sn", TILEPRO_OPC_OR_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "ori", TILEPRO_OPC_ORI, 0xf, 3, TREG_ZERO, 1, + { { 7, 8, 0 }, { 9, 10, 1 }, { 11, 12, 2 }, { 13, 14, 3 }, { 0, } }, + }, + { "ori.sn", TILEPRO_OPC_ORI_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "packbs_u", TILEPRO_OPC_PACKBS_U, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "packbs_u.sn", TILEPRO_OPC_PACKBS_U_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "packhb", TILEPRO_OPC_PACKHB, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "packhb.sn", TILEPRO_OPC_PACKHB_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "packhs", TILEPRO_OPC_PACKHS, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "packhs.sn", TILEPRO_OPC_PACKHS_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "packlb", TILEPRO_OPC_PACKLB, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "packlb.sn", TILEPRO_OPC_PACKLB_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "pcnt", TILEPRO_OPC_PCNT, 0x5, 2, TREG_ZERO, 1, + { { 7, 8 }, { 0, }, { 11, 12 }, { 0, }, { 0, } }, + }, + { "pcnt.sn", TILEPRO_OPC_PCNT_SN, 0x1, 2, TREG_SN, 1, + { { 7, 8 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "rl", TILEPRO_OPC_RL, 0xf, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 11, 12, 18 }, { 13, 14, 19 }, { 0, } }, + }, + { "rl.sn", TILEPRO_OPC_RL_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "rli", TILEPRO_OPC_RLI, 0xf, 3, TREG_ZERO, 1, + { { 7, 8, 32 }, { 9, 10, 33 }, { 11, 12, 34 }, { 13, 14, 35 }, { 0, } }, + }, + { "rli.sn", TILEPRO_OPC_RLI_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 32 }, { 9, 10, 33 }, { 0, }, { 0, }, { 0, } }, + }, + { "s1a", TILEPRO_OPC_S1A, 0xf, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 11, 12, 18 }, { 13, 14, 19 }, { 0, } }, + }, + { "s1a.sn", TILEPRO_OPC_S1A_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "s2a", TILEPRO_OPC_S2A, 0xf, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 11, 12, 18 }, { 13, 14, 19 }, { 0, } }, + }, + { "s2a.sn", TILEPRO_OPC_S2A_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "s3a", TILEPRO_OPC_S3A, 0xf, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 11, 12, 18 }, { 13, 14, 19 }, { 0, } }, + }, + { "s3a.sn", TILEPRO_OPC_S3A_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "sadab_u", TILEPRO_OPC_SADAB_U, 0x1, 3, TREG_ZERO, 1, + { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "sadab_u.sn", TILEPRO_OPC_SADAB_U_SN, 0x1, 3, TREG_SN, 1, + { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "sadah", TILEPRO_OPC_SADAH, 0x1, 3, TREG_ZERO, 1, + { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "sadah.sn", TILEPRO_OPC_SADAH_SN, 0x1, 3, TREG_SN, 1, + { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "sadah_u", TILEPRO_OPC_SADAH_U, 0x1, 3, TREG_ZERO, 1, + { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "sadah_u.sn", TILEPRO_OPC_SADAH_U_SN, 0x1, 3, TREG_SN, 1, + { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "sadb_u", TILEPRO_OPC_SADB_U, 0x1, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "sadb_u.sn", TILEPRO_OPC_SADB_U_SN, 0x1, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "sadh", TILEPRO_OPC_SADH, 0x1, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "sadh.sn", TILEPRO_OPC_SADH_SN, 0x1, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "sadh_u", TILEPRO_OPC_SADH_U, 0x1, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "sadh_u.sn", TILEPRO_OPC_SADH_U_SN, 0x1, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "sb", TILEPRO_OPC_SB, 0x12, 2, TREG_ZERO, 1, + { { 0, }, { 10, 17 }, { 0, }, { 0, }, { 15, 36 } }, + }, + { "sbadd", TILEPRO_OPC_SBADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 24, 17, 37 }, { 0, }, { 0, }, { 0, } }, + }, + { "seq", TILEPRO_OPC_SEQ, 0xf, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 11, 12, 18 }, { 13, 14, 19 }, { 0, } }, + }, + { "seq.sn", TILEPRO_OPC_SEQ_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "seqb", TILEPRO_OPC_SEQB, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "seqb.sn", TILEPRO_OPC_SEQB_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "seqh", TILEPRO_OPC_SEQH, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "seqh.sn", TILEPRO_OPC_SEQH_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "seqi", TILEPRO_OPC_SEQI, 0xf, 3, TREG_ZERO, 1, + { { 7, 8, 0 }, { 9, 10, 1 }, { 11, 12, 2 }, { 13, 14, 3 }, { 0, } }, + }, + { "seqi.sn", TILEPRO_OPC_SEQI_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "seqib", TILEPRO_OPC_SEQIB, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "seqib.sn", TILEPRO_OPC_SEQIB_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "seqih", TILEPRO_OPC_SEQIH, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "seqih.sn", TILEPRO_OPC_SEQIH_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "sh", TILEPRO_OPC_SH, 0x12, 2, TREG_ZERO, 1, + { { 0, }, { 10, 17 }, { 0, }, { 0, }, { 15, 36 } }, + }, + { "shadd", TILEPRO_OPC_SHADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 24, 17, 37 }, { 0, }, { 0, }, { 0, } }, + }, + { "shl", TILEPRO_OPC_SHL, 0xf, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 11, 12, 18 }, { 13, 14, 19 }, { 0, } }, + }, + { "shl.sn", TILEPRO_OPC_SHL_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "shlb", TILEPRO_OPC_SHLB, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "shlb.sn", TILEPRO_OPC_SHLB_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "shlh", TILEPRO_OPC_SHLH, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "shlh.sn", TILEPRO_OPC_SHLH_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "shli", TILEPRO_OPC_SHLI, 0xf, 3, TREG_ZERO, 1, + { { 7, 8, 32 }, { 9, 10, 33 }, { 11, 12, 34 }, { 13, 14, 35 }, { 0, } }, + }, + { "shli.sn", TILEPRO_OPC_SHLI_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 32 }, { 9, 10, 33 }, { 0, }, { 0, }, { 0, } }, + }, + { "shlib", TILEPRO_OPC_SHLIB, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 32 }, { 9, 10, 33 }, { 0, }, { 0, }, { 0, } }, + }, + { "shlib.sn", TILEPRO_OPC_SHLIB_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 32 }, { 9, 10, 33 }, { 0, }, { 0, }, { 0, } }, + }, + { "shlih", TILEPRO_OPC_SHLIH, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 32 }, { 9, 10, 33 }, { 0, }, { 0, }, { 0, } }, + }, + { "shlih.sn", TILEPRO_OPC_SHLIH_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 32 }, { 9, 10, 33 }, { 0, }, { 0, }, { 0, } }, + }, + { "shr", TILEPRO_OPC_SHR, 0xf, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 11, 12, 18 }, { 13, 14, 19 }, { 0, } }, + }, + { "shr.sn", TILEPRO_OPC_SHR_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "shrb", TILEPRO_OPC_SHRB, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "shrb.sn", TILEPRO_OPC_SHRB_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "shrh", TILEPRO_OPC_SHRH, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "shrh.sn", TILEPRO_OPC_SHRH_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "shri", TILEPRO_OPC_SHRI, 0xf, 3, TREG_ZERO, 1, + { { 7, 8, 32 }, { 9, 10, 33 }, { 11, 12, 34 }, { 13, 14, 35 }, { 0, } }, + }, + { "shri.sn", TILEPRO_OPC_SHRI_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 32 }, { 9, 10, 33 }, { 0, }, { 0, }, { 0, } }, + }, + { "shrib", TILEPRO_OPC_SHRIB, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 32 }, { 9, 10, 33 }, { 0, }, { 0, }, { 0, } }, + }, + { "shrib.sn", TILEPRO_OPC_SHRIB_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 32 }, { 9, 10, 33 }, { 0, }, { 0, }, { 0, } }, + }, + { "shrih", TILEPRO_OPC_SHRIH, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 32 }, { 9, 10, 33 }, { 0, }, { 0, }, { 0, } }, + }, + { "shrih.sn", TILEPRO_OPC_SHRIH_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 32 }, { 9, 10, 33 }, { 0, }, { 0, }, { 0, } }, + }, + { "slt", TILEPRO_OPC_SLT, 0xf, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 11, 12, 18 }, { 13, 14, 19 }, { 0, } }, + }, + { "slt.sn", TILEPRO_OPC_SLT_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "slt_u", TILEPRO_OPC_SLT_U, 0xf, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 11, 12, 18 }, { 13, 14, 19 }, { 0, } }, + }, + { "slt_u.sn", TILEPRO_OPC_SLT_U_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "sltb", TILEPRO_OPC_SLTB, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "sltb.sn", TILEPRO_OPC_SLTB_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "sltb_u", TILEPRO_OPC_SLTB_U, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "sltb_u.sn", TILEPRO_OPC_SLTB_U_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "slte", TILEPRO_OPC_SLTE, 0xf, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 11, 12, 18 }, { 13, 14, 19 }, { 0, } }, + }, + { "slte.sn", TILEPRO_OPC_SLTE_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "slte_u", TILEPRO_OPC_SLTE_U, 0xf, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 11, 12, 18 }, { 13, 14, 19 }, { 0, } }, + }, + { "slte_u.sn", TILEPRO_OPC_SLTE_U_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "slteb", TILEPRO_OPC_SLTEB, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "slteb.sn", TILEPRO_OPC_SLTEB_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "slteb_u", TILEPRO_OPC_SLTEB_U, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "slteb_u.sn", TILEPRO_OPC_SLTEB_U_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "slteh", TILEPRO_OPC_SLTEH, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "slteh.sn", TILEPRO_OPC_SLTEH_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "slteh_u", TILEPRO_OPC_SLTEH_U, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "slteh_u.sn", TILEPRO_OPC_SLTEH_U_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "slth", TILEPRO_OPC_SLTH, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "slth.sn", TILEPRO_OPC_SLTH_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "slth_u", TILEPRO_OPC_SLTH_U, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "slth_u.sn", TILEPRO_OPC_SLTH_U_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "slti", TILEPRO_OPC_SLTI, 0xf, 3, TREG_ZERO, 1, + { { 7, 8, 0 }, { 9, 10, 1 }, { 11, 12, 2 }, { 13, 14, 3 }, { 0, } }, + }, + { "slti.sn", TILEPRO_OPC_SLTI_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "slti_u", TILEPRO_OPC_SLTI_U, 0xf, 3, TREG_ZERO, 1, + { { 7, 8, 0 }, { 9, 10, 1 }, { 11, 12, 2 }, { 13, 14, 3 }, { 0, } }, + }, + { "slti_u.sn", TILEPRO_OPC_SLTI_U_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "sltib", TILEPRO_OPC_SLTIB, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "sltib.sn", TILEPRO_OPC_SLTIB_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "sltib_u", TILEPRO_OPC_SLTIB_U, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "sltib_u.sn", TILEPRO_OPC_SLTIB_U_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "sltih", TILEPRO_OPC_SLTIH, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "sltih.sn", TILEPRO_OPC_SLTIH_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "sltih_u", TILEPRO_OPC_SLTIH_U, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "sltih_u.sn", TILEPRO_OPC_SLTIH_U_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "sne", TILEPRO_OPC_SNE, 0xf, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 11, 12, 18 }, { 13, 14, 19 }, { 0, } }, + }, + { "sne.sn", TILEPRO_OPC_SNE_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "sneb", TILEPRO_OPC_SNEB, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "sneb.sn", TILEPRO_OPC_SNEB_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "sneh", TILEPRO_OPC_SNEH, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "sneh.sn", TILEPRO_OPC_SNEH_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "sra", TILEPRO_OPC_SRA, 0xf, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 11, 12, 18 }, { 13, 14, 19 }, { 0, } }, + }, + { "sra.sn", TILEPRO_OPC_SRA_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "srab", TILEPRO_OPC_SRAB, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "srab.sn", TILEPRO_OPC_SRAB_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "srah", TILEPRO_OPC_SRAH, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "srah.sn", TILEPRO_OPC_SRAH_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "srai", TILEPRO_OPC_SRAI, 0xf, 3, TREG_ZERO, 1, + { { 7, 8, 32 }, { 9, 10, 33 }, { 11, 12, 34 }, { 13, 14, 35 }, { 0, } }, + }, + { "srai.sn", TILEPRO_OPC_SRAI_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 32 }, { 9, 10, 33 }, { 0, }, { 0, }, { 0, } }, + }, + { "sraib", TILEPRO_OPC_SRAIB, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 32 }, { 9, 10, 33 }, { 0, }, { 0, }, { 0, } }, + }, + { "sraib.sn", TILEPRO_OPC_SRAIB_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 32 }, { 9, 10, 33 }, { 0, }, { 0, }, { 0, } }, + }, + { "sraih", TILEPRO_OPC_SRAIH, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 32 }, { 9, 10, 33 }, { 0, }, { 0, }, { 0, } }, + }, + { "sraih.sn", TILEPRO_OPC_SRAIH_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 32 }, { 9, 10, 33 }, { 0, }, { 0, }, { 0, } }, + }, + { "sub", TILEPRO_OPC_SUB, 0xf, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 11, 12, 18 }, { 13, 14, 19 }, { 0, } }, + }, + { "sub.sn", TILEPRO_OPC_SUB_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "subb", TILEPRO_OPC_SUBB, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "subb.sn", TILEPRO_OPC_SUBB_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "subbs_u", TILEPRO_OPC_SUBBS_U, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "subbs_u.sn", TILEPRO_OPC_SUBBS_U_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "subh", TILEPRO_OPC_SUBH, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "subh.sn", TILEPRO_OPC_SUBH_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "subhs", TILEPRO_OPC_SUBHS, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "subhs.sn", TILEPRO_OPC_SUBHS_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "subs", TILEPRO_OPC_SUBS, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "subs.sn", TILEPRO_OPC_SUBS_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "sw", TILEPRO_OPC_SW, 0x12, 2, TREG_ZERO, 1, + { { 0, }, { 10, 17 }, { 0, }, { 0, }, { 15, 36 } }, + }, + { "swadd", TILEPRO_OPC_SWADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 24, 17, 37 }, { 0, }, { 0, }, { 0, } }, + }, + { "swint0", TILEPRO_OPC_SWINT0, 0x2, 0, TREG_ZERO, 0, + { { 0, }, { }, { 0, }, { 0, }, { 0, } }, + }, + { "swint1", TILEPRO_OPC_SWINT1, 0x2, 0, TREG_ZERO, 0, + { { 0, }, { }, { 0, }, { 0, }, { 0, } }, + }, + { "swint2", TILEPRO_OPC_SWINT2, 0x2, 0, TREG_ZERO, 0, + { { 0, }, { }, { 0, }, { 0, }, { 0, } }, + }, + { "swint3", TILEPRO_OPC_SWINT3, 0x2, 0, TREG_ZERO, 0, + { { 0, }, { }, { 0, }, { 0, }, { 0, } }, + }, + { "tblidxb0", TILEPRO_OPC_TBLIDXB0, 0x5, 2, TREG_ZERO, 1, + { { 21, 8 }, { 0, }, { 31, 12 }, { 0, }, { 0, } }, + }, + { "tblidxb0.sn", TILEPRO_OPC_TBLIDXB0_SN, 0x1, 2, TREG_SN, 1, + { { 21, 8 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "tblidxb1", TILEPRO_OPC_TBLIDXB1, 0x5, 2, TREG_ZERO, 1, + { { 21, 8 }, { 0, }, { 31, 12 }, { 0, }, { 0, } }, + }, + { "tblidxb1.sn", TILEPRO_OPC_TBLIDXB1_SN, 0x1, 2, TREG_SN, 1, + { { 21, 8 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "tblidxb2", TILEPRO_OPC_TBLIDXB2, 0x5, 2, TREG_ZERO, 1, + { { 21, 8 }, { 0, }, { 31, 12 }, { 0, }, { 0, } }, + }, + { "tblidxb2.sn", TILEPRO_OPC_TBLIDXB2_SN, 0x1, 2, TREG_SN, 1, + { { 21, 8 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "tblidxb3", TILEPRO_OPC_TBLIDXB3, 0x5, 2, TREG_ZERO, 1, + { { 21, 8 }, { 0, }, { 31, 12 }, { 0, }, { 0, } }, + }, + { "tblidxb3.sn", TILEPRO_OPC_TBLIDXB3_SN, 0x1, 2, TREG_SN, 1, + { { 21, 8 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "tns", TILEPRO_OPC_TNS, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 9, 10 }, { 0, }, { 0, }, { 0, } }, + }, + { "tns.sn", TILEPRO_OPC_TNS_SN, 0x2, 2, TREG_SN, 1, + { { 0, }, { 9, 10 }, { 0, }, { 0, }, { 0, } }, + }, + { "wh64", TILEPRO_OPC_WH64, 0x2, 1, TREG_ZERO, 1, + { { 0, }, { 10 }, { 0, }, { 0, }, { 0, } }, + }, + { "xor", TILEPRO_OPC_XOR, 0xf, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 11, 12, 18 }, { 13, 14, 19 }, { 0, } }, + }, + { "xor.sn", TILEPRO_OPC_XOR_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "xori", TILEPRO_OPC_XORI, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "xori.sn", TILEPRO_OPC_XORI_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { NULL, TILEPRO_OPC_NONE, 0, 0, TREG_ZERO, 0, { { 0, } }, + } +}; +#define BITFIELD(start, size) ((start) | (((1 << (size)) - 1) << 6)) +#define CHILD(array_index) (TILEPRO_OPC_NONE + (array_index)) + +static const unsigned short decode_X0_fsm[1153] = +{ + BITFIELD(22, 9) /* index 0 */, + CHILD(513), CHILD(530), CHILD(547), CHILD(564), CHILD(596), CHILD(613), + CHILD(630), TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, CHILD(663), CHILD(680), CHILD(697), + CHILD(714), CHILD(746), CHILD(763), CHILD(780), TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813), + CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813), + CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813), + CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813), + CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813), + CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813), + CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813), + CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813), + CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813), + CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813), + CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(828), CHILD(828), + CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828), + CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828), + CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828), + CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828), + CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828), + CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828), + CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828), + CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828), + CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828), + CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828), + CHILD(828), CHILD(828), CHILD(843), CHILD(843), CHILD(843), CHILD(843), + CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), + CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), + CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), + CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), + CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), + CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), + CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), + CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), + CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), + CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), + CHILD(873), CHILD(878), CHILD(883), CHILD(903), CHILD(908), + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, CHILD(913), + CHILD(918), CHILD(923), CHILD(943), CHILD(948), TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, CHILD(953), TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, CHILD(988), TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_MM, TILEPRO_OPC_MM, + TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, + TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, + TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, + TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, + TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, + TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, + TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, + TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, + TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, + TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, + TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, + TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, + TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, + TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, + TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, + TILEPRO_OPC_MM, TILEPRO_OPC_MM, CHILD(993), TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, CHILD(1076), TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + BITFIELD(18, 4) /* index 513 */, + TILEPRO_OPC_NONE, TILEPRO_OPC_ADDB, TILEPRO_OPC_ADDH, TILEPRO_OPC_ADD, + TILEPRO_OPC_ADIFFB_U, TILEPRO_OPC_ADIFFH, TILEPRO_OPC_AND, + TILEPRO_OPC_AVGB_U, TILEPRO_OPC_AVGH, TILEPRO_OPC_CRC32_32, + TILEPRO_OPC_CRC32_8, TILEPRO_OPC_INTHB, TILEPRO_OPC_INTHH, + TILEPRO_OPC_INTLB, TILEPRO_OPC_INTLH, TILEPRO_OPC_MAXB_U, + BITFIELD(18, 4) /* index 530 */, + TILEPRO_OPC_MAXH, TILEPRO_OPC_MINB_U, TILEPRO_OPC_MINH, TILEPRO_OPC_MNZB, + TILEPRO_OPC_MNZH, TILEPRO_OPC_MNZ, TILEPRO_OPC_MULHHA_SS, + TILEPRO_OPC_MULHHA_SU, TILEPRO_OPC_MULHHA_UU, TILEPRO_OPC_MULHHSA_UU, + TILEPRO_OPC_MULHH_SS, TILEPRO_OPC_MULHH_SU, TILEPRO_OPC_MULHH_UU, + TILEPRO_OPC_MULHLA_SS, TILEPRO_OPC_MULHLA_SU, TILEPRO_OPC_MULHLA_US, + BITFIELD(18, 4) /* index 547 */, + TILEPRO_OPC_MULHLA_UU, TILEPRO_OPC_MULHLSA_UU, TILEPRO_OPC_MULHL_SS, + TILEPRO_OPC_MULHL_SU, TILEPRO_OPC_MULHL_US, TILEPRO_OPC_MULHL_UU, + TILEPRO_OPC_MULLLA_SS, TILEPRO_OPC_MULLLA_SU, TILEPRO_OPC_MULLLA_UU, + TILEPRO_OPC_MULLLSA_UU, TILEPRO_OPC_MULLL_SS, TILEPRO_OPC_MULLL_SU, + TILEPRO_OPC_MULLL_UU, TILEPRO_OPC_MVNZ, TILEPRO_OPC_MVZ, TILEPRO_OPC_MZB, + BITFIELD(18, 4) /* index 564 */, + TILEPRO_OPC_MZH, TILEPRO_OPC_MZ, TILEPRO_OPC_NOR, CHILD(581), + TILEPRO_OPC_PACKHB, TILEPRO_OPC_PACKLB, TILEPRO_OPC_RL, TILEPRO_OPC_S1A, + TILEPRO_OPC_S2A, TILEPRO_OPC_S3A, TILEPRO_OPC_SADAB_U, TILEPRO_OPC_SADAH, + TILEPRO_OPC_SADAH_U, TILEPRO_OPC_SADB_U, TILEPRO_OPC_SADH, + TILEPRO_OPC_SADH_U, + BITFIELD(12, 2) /* index 581 */, + TILEPRO_OPC_OR, TILEPRO_OPC_OR, TILEPRO_OPC_OR, CHILD(586), + BITFIELD(14, 2) /* index 586 */, + TILEPRO_OPC_OR, TILEPRO_OPC_OR, TILEPRO_OPC_OR, CHILD(591), + BITFIELD(16, 2) /* index 591 */, + TILEPRO_OPC_OR, TILEPRO_OPC_OR, TILEPRO_OPC_OR, TILEPRO_OPC_MOVE, + BITFIELD(18, 4) /* index 596 */, + TILEPRO_OPC_SEQB, TILEPRO_OPC_SEQH, TILEPRO_OPC_SEQ, TILEPRO_OPC_SHLB, + TILEPRO_OPC_SHLH, TILEPRO_OPC_SHL, TILEPRO_OPC_SHRB, TILEPRO_OPC_SHRH, + TILEPRO_OPC_SHR, TILEPRO_OPC_SLTB, TILEPRO_OPC_SLTB_U, TILEPRO_OPC_SLTEB, + TILEPRO_OPC_SLTEB_U, TILEPRO_OPC_SLTEH, TILEPRO_OPC_SLTEH_U, + TILEPRO_OPC_SLTE, + BITFIELD(18, 4) /* index 613 */, + TILEPRO_OPC_SLTE_U, TILEPRO_OPC_SLTH, TILEPRO_OPC_SLTH_U, TILEPRO_OPC_SLT, + TILEPRO_OPC_SLT_U, TILEPRO_OPC_SNEB, TILEPRO_OPC_SNEH, TILEPRO_OPC_SNE, + TILEPRO_OPC_SRAB, TILEPRO_OPC_SRAH, TILEPRO_OPC_SRA, TILEPRO_OPC_SUBB, + TILEPRO_OPC_SUBH, TILEPRO_OPC_SUB, TILEPRO_OPC_XOR, TILEPRO_OPC_DWORD_ALIGN, + BITFIELD(18, 3) /* index 630 */, + CHILD(639), CHILD(642), CHILD(645), CHILD(648), CHILD(651), CHILD(654), + CHILD(657), CHILD(660), + BITFIELD(21, 1) /* index 639 */, + TILEPRO_OPC_ADDS, TILEPRO_OPC_NONE, + BITFIELD(21, 1) /* index 642 */, + TILEPRO_OPC_SUBS, TILEPRO_OPC_NONE, + BITFIELD(21, 1) /* index 645 */, + TILEPRO_OPC_ADDBS_U, TILEPRO_OPC_NONE, + BITFIELD(21, 1) /* index 648 */, + TILEPRO_OPC_ADDHS, TILEPRO_OPC_NONE, + BITFIELD(21, 1) /* index 651 */, + TILEPRO_OPC_SUBBS_U, TILEPRO_OPC_NONE, + BITFIELD(21, 1) /* index 654 */, + TILEPRO_OPC_SUBHS, TILEPRO_OPC_NONE, + BITFIELD(21, 1) /* index 657 */, + TILEPRO_OPC_PACKHS, TILEPRO_OPC_NONE, + BITFIELD(21, 1) /* index 660 */, + TILEPRO_OPC_PACKBS_U, TILEPRO_OPC_NONE, + BITFIELD(18, 4) /* index 663 */, + TILEPRO_OPC_NONE, TILEPRO_OPC_ADDB_SN, TILEPRO_OPC_ADDH_SN, + TILEPRO_OPC_ADD_SN, TILEPRO_OPC_ADIFFB_U_SN, TILEPRO_OPC_ADIFFH_SN, + TILEPRO_OPC_AND_SN, TILEPRO_OPC_AVGB_U_SN, TILEPRO_OPC_AVGH_SN, + TILEPRO_OPC_CRC32_32_SN, TILEPRO_OPC_CRC32_8_SN, TILEPRO_OPC_INTHB_SN, + TILEPRO_OPC_INTHH_SN, TILEPRO_OPC_INTLB_SN, TILEPRO_OPC_INTLH_SN, + TILEPRO_OPC_MAXB_U_SN, + BITFIELD(18, 4) /* index 680 */, + TILEPRO_OPC_MAXH_SN, TILEPRO_OPC_MINB_U_SN, TILEPRO_OPC_MINH_SN, + TILEPRO_OPC_MNZB_SN, TILEPRO_OPC_MNZH_SN, TILEPRO_OPC_MNZ_SN, + TILEPRO_OPC_MULHHA_SS_SN, TILEPRO_OPC_MULHHA_SU_SN, + TILEPRO_OPC_MULHHA_UU_SN, TILEPRO_OPC_MULHHSA_UU_SN, + TILEPRO_OPC_MULHH_SS_SN, TILEPRO_OPC_MULHH_SU_SN, TILEPRO_OPC_MULHH_UU_SN, + TILEPRO_OPC_MULHLA_SS_SN, TILEPRO_OPC_MULHLA_SU_SN, + TILEPRO_OPC_MULHLA_US_SN, + BITFIELD(18, 4) /* index 697 */, + TILEPRO_OPC_MULHLA_UU_SN, TILEPRO_OPC_MULHLSA_UU_SN, + TILEPRO_OPC_MULHL_SS_SN, TILEPRO_OPC_MULHL_SU_SN, TILEPRO_OPC_MULHL_US_SN, + TILEPRO_OPC_MULHL_UU_SN, TILEPRO_OPC_MULLLA_SS_SN, TILEPRO_OPC_MULLLA_SU_SN, + TILEPRO_OPC_MULLLA_UU_SN, TILEPRO_OPC_MULLLSA_UU_SN, + TILEPRO_OPC_MULLL_SS_SN, TILEPRO_OPC_MULLL_SU_SN, TILEPRO_OPC_MULLL_UU_SN, + TILEPRO_OPC_MVNZ_SN, TILEPRO_OPC_MVZ_SN, TILEPRO_OPC_MZB_SN, + BITFIELD(18, 4) /* index 714 */, + TILEPRO_OPC_MZH_SN, TILEPRO_OPC_MZ_SN, TILEPRO_OPC_NOR_SN, CHILD(731), + TILEPRO_OPC_PACKHB_SN, TILEPRO_OPC_PACKLB_SN, TILEPRO_OPC_RL_SN, + TILEPRO_OPC_S1A_SN, TILEPRO_OPC_S2A_SN, TILEPRO_OPC_S3A_SN, + TILEPRO_OPC_SADAB_U_SN, TILEPRO_OPC_SADAH_SN, TILEPRO_OPC_SADAH_U_SN, + TILEPRO_OPC_SADB_U_SN, TILEPRO_OPC_SADH_SN, TILEPRO_OPC_SADH_U_SN, + BITFIELD(12, 2) /* index 731 */, + TILEPRO_OPC_OR_SN, TILEPRO_OPC_OR_SN, TILEPRO_OPC_OR_SN, CHILD(736), + BITFIELD(14, 2) /* index 736 */, + TILEPRO_OPC_OR_SN, TILEPRO_OPC_OR_SN, TILEPRO_OPC_OR_SN, CHILD(741), + BITFIELD(16, 2) /* index 741 */, + TILEPRO_OPC_OR_SN, TILEPRO_OPC_OR_SN, TILEPRO_OPC_OR_SN, + TILEPRO_OPC_MOVE_SN, + BITFIELD(18, 4) /* index 746 */, + TILEPRO_OPC_SEQB_SN, TILEPRO_OPC_SEQH_SN, TILEPRO_OPC_SEQ_SN, + TILEPRO_OPC_SHLB_SN, TILEPRO_OPC_SHLH_SN, TILEPRO_OPC_SHL_SN, + TILEPRO_OPC_SHRB_SN, TILEPRO_OPC_SHRH_SN, TILEPRO_OPC_SHR_SN, + TILEPRO_OPC_SLTB_SN, TILEPRO_OPC_SLTB_U_SN, TILEPRO_OPC_SLTEB_SN, + TILEPRO_OPC_SLTEB_U_SN, TILEPRO_OPC_SLTEH_SN, TILEPRO_OPC_SLTEH_U_SN, + TILEPRO_OPC_SLTE_SN, + BITFIELD(18, 4) /* index 763 */, + TILEPRO_OPC_SLTE_U_SN, TILEPRO_OPC_SLTH_SN, TILEPRO_OPC_SLTH_U_SN, + TILEPRO_OPC_SLT_SN, TILEPRO_OPC_SLT_U_SN, TILEPRO_OPC_SNEB_SN, + TILEPRO_OPC_SNEH_SN, TILEPRO_OPC_SNE_SN, TILEPRO_OPC_SRAB_SN, + TILEPRO_OPC_SRAH_SN, TILEPRO_OPC_SRA_SN, TILEPRO_OPC_SUBB_SN, + TILEPRO_OPC_SUBH_SN, TILEPRO_OPC_SUB_SN, TILEPRO_OPC_XOR_SN, + TILEPRO_OPC_DWORD_ALIGN_SN, + BITFIELD(18, 3) /* index 780 */, + CHILD(789), CHILD(792), CHILD(795), CHILD(798), CHILD(801), CHILD(804), + CHILD(807), CHILD(810), + BITFIELD(21, 1) /* index 789 */, + TILEPRO_OPC_ADDS_SN, TILEPRO_OPC_NONE, + BITFIELD(21, 1) /* index 792 */, + TILEPRO_OPC_SUBS_SN, TILEPRO_OPC_NONE, + BITFIELD(21, 1) /* index 795 */, + TILEPRO_OPC_ADDBS_U_SN, TILEPRO_OPC_NONE, + BITFIELD(21, 1) /* index 798 */, + TILEPRO_OPC_ADDHS_SN, TILEPRO_OPC_NONE, + BITFIELD(21, 1) /* index 801 */, + TILEPRO_OPC_SUBBS_U_SN, TILEPRO_OPC_NONE, + BITFIELD(21, 1) /* index 804 */, + TILEPRO_OPC_SUBHS_SN, TILEPRO_OPC_NONE, + BITFIELD(21, 1) /* index 807 */, + TILEPRO_OPC_PACKHS_SN, TILEPRO_OPC_NONE, + BITFIELD(21, 1) /* index 810 */, + TILEPRO_OPC_PACKBS_U_SN, TILEPRO_OPC_NONE, + BITFIELD(6, 2) /* index 813 */, + TILEPRO_OPC_ADDLI_SN, TILEPRO_OPC_ADDLI_SN, TILEPRO_OPC_ADDLI_SN, + CHILD(818), + BITFIELD(8, 2) /* index 818 */, + TILEPRO_OPC_ADDLI_SN, TILEPRO_OPC_ADDLI_SN, TILEPRO_OPC_ADDLI_SN, + CHILD(823), + BITFIELD(10, 2) /* index 823 */, + TILEPRO_OPC_ADDLI_SN, TILEPRO_OPC_ADDLI_SN, TILEPRO_OPC_ADDLI_SN, + TILEPRO_OPC_MOVELI_SN, + BITFIELD(6, 2) /* index 828 */, + TILEPRO_OPC_ADDLI, TILEPRO_OPC_ADDLI, TILEPRO_OPC_ADDLI, CHILD(833), + BITFIELD(8, 2) /* index 833 */, + TILEPRO_OPC_ADDLI, TILEPRO_OPC_ADDLI, TILEPRO_OPC_ADDLI, CHILD(838), + BITFIELD(10, 2) /* index 838 */, + TILEPRO_OPC_ADDLI, TILEPRO_OPC_ADDLI, TILEPRO_OPC_ADDLI, TILEPRO_OPC_MOVELI, + BITFIELD(0, 2) /* index 843 */, + TILEPRO_OPC_AULI, TILEPRO_OPC_AULI, TILEPRO_OPC_AULI, CHILD(848), + BITFIELD(2, 2) /* index 848 */, + TILEPRO_OPC_AULI, TILEPRO_OPC_AULI, TILEPRO_OPC_AULI, CHILD(853), + BITFIELD(4, 2) /* index 853 */, + TILEPRO_OPC_AULI, TILEPRO_OPC_AULI, TILEPRO_OPC_AULI, CHILD(858), + BITFIELD(6, 2) /* index 858 */, + TILEPRO_OPC_AULI, TILEPRO_OPC_AULI, TILEPRO_OPC_AULI, CHILD(863), + BITFIELD(8, 2) /* index 863 */, + TILEPRO_OPC_AULI, TILEPRO_OPC_AULI, TILEPRO_OPC_AULI, CHILD(868), + BITFIELD(10, 2) /* index 868 */, + TILEPRO_OPC_AULI, TILEPRO_OPC_AULI, TILEPRO_OPC_AULI, TILEPRO_OPC_INFOL, + BITFIELD(20, 2) /* index 873 */, + TILEPRO_OPC_NONE, TILEPRO_OPC_ADDIB, TILEPRO_OPC_ADDIH, TILEPRO_OPC_ADDI, + BITFIELD(20, 2) /* index 878 */, + TILEPRO_OPC_MAXIB_U, TILEPRO_OPC_MAXIH, TILEPRO_OPC_MINIB_U, + TILEPRO_OPC_MINIH, + BITFIELD(20, 2) /* index 883 */, + CHILD(888), TILEPRO_OPC_SEQIB, TILEPRO_OPC_SEQIH, TILEPRO_OPC_SEQI, + BITFIELD(6, 2) /* index 888 */, + TILEPRO_OPC_ORI, TILEPRO_OPC_ORI, TILEPRO_OPC_ORI, CHILD(893), + BITFIELD(8, 2) /* index 893 */, + TILEPRO_OPC_ORI, TILEPRO_OPC_ORI, TILEPRO_OPC_ORI, CHILD(898), + BITFIELD(10, 2) /* index 898 */, + TILEPRO_OPC_ORI, TILEPRO_OPC_ORI, TILEPRO_OPC_ORI, TILEPRO_OPC_MOVEI, + BITFIELD(20, 2) /* index 903 */, + TILEPRO_OPC_SLTIB, TILEPRO_OPC_SLTIB_U, TILEPRO_OPC_SLTIH, + TILEPRO_OPC_SLTIH_U, + BITFIELD(20, 2) /* index 908 */, + TILEPRO_OPC_SLTI, TILEPRO_OPC_SLTI_U, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + BITFIELD(20, 2) /* index 913 */, + TILEPRO_OPC_NONE, TILEPRO_OPC_ADDIB_SN, TILEPRO_OPC_ADDIH_SN, + TILEPRO_OPC_ADDI_SN, + BITFIELD(20, 2) /* index 918 */, + TILEPRO_OPC_MAXIB_U_SN, TILEPRO_OPC_MAXIH_SN, TILEPRO_OPC_MINIB_U_SN, + TILEPRO_OPC_MINIH_SN, + BITFIELD(20, 2) /* index 923 */, + CHILD(928), TILEPRO_OPC_SEQIB_SN, TILEPRO_OPC_SEQIH_SN, TILEPRO_OPC_SEQI_SN, + BITFIELD(6, 2) /* index 928 */, + TILEPRO_OPC_ORI_SN, TILEPRO_OPC_ORI_SN, TILEPRO_OPC_ORI_SN, CHILD(933), + BITFIELD(8, 2) /* index 933 */, + TILEPRO_OPC_ORI_SN, TILEPRO_OPC_ORI_SN, TILEPRO_OPC_ORI_SN, CHILD(938), + BITFIELD(10, 2) /* index 938 */, + TILEPRO_OPC_ORI_SN, TILEPRO_OPC_ORI_SN, TILEPRO_OPC_ORI_SN, + TILEPRO_OPC_MOVEI_SN, + BITFIELD(20, 2) /* index 943 */, + TILEPRO_OPC_SLTIB_SN, TILEPRO_OPC_SLTIB_U_SN, TILEPRO_OPC_SLTIH_SN, + TILEPRO_OPC_SLTIH_U_SN, + BITFIELD(20, 2) /* index 948 */, + TILEPRO_OPC_SLTI_SN, TILEPRO_OPC_SLTI_U_SN, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, + BITFIELD(20, 2) /* index 953 */, + TILEPRO_OPC_NONE, CHILD(958), TILEPRO_OPC_XORI, TILEPRO_OPC_NONE, + BITFIELD(0, 2) /* index 958 */, + TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, CHILD(963), + BITFIELD(2, 2) /* index 963 */, + TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, CHILD(968), + BITFIELD(4, 2) /* index 968 */, + TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, CHILD(973), + BITFIELD(6, 2) /* index 973 */, + TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, CHILD(978), + BITFIELD(8, 2) /* index 978 */, + TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, CHILD(983), + BITFIELD(10, 2) /* index 983 */, + TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_INFO, + BITFIELD(20, 2) /* index 988 */, + TILEPRO_OPC_NONE, TILEPRO_OPC_ANDI_SN, TILEPRO_OPC_XORI_SN, + TILEPRO_OPC_NONE, + BITFIELD(17, 5) /* index 993 */, + TILEPRO_OPC_NONE, TILEPRO_OPC_RLI, TILEPRO_OPC_SHLIB, TILEPRO_OPC_SHLIH, + TILEPRO_OPC_SHLI, TILEPRO_OPC_SHRIB, TILEPRO_OPC_SHRIH, TILEPRO_OPC_SHRI, + TILEPRO_OPC_SRAIB, TILEPRO_OPC_SRAIH, TILEPRO_OPC_SRAI, CHILD(1026), + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + BITFIELD(12, 4) /* index 1026 */, + TILEPRO_OPC_NONE, CHILD(1043), CHILD(1046), CHILD(1049), CHILD(1052), + CHILD(1055), CHILD(1058), CHILD(1061), CHILD(1064), CHILD(1067), + CHILD(1070), CHILD(1073), TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + BITFIELD(16, 1) /* index 1043 */, + TILEPRO_OPC_BITX, TILEPRO_OPC_NONE, + BITFIELD(16, 1) /* index 1046 */, + TILEPRO_OPC_BYTEX, TILEPRO_OPC_NONE, + BITFIELD(16, 1) /* index 1049 */, + TILEPRO_OPC_CLZ, TILEPRO_OPC_NONE, + BITFIELD(16, 1) /* index 1052 */, + TILEPRO_OPC_CTZ, TILEPRO_OPC_NONE, + BITFIELD(16, 1) /* index 1055 */, + TILEPRO_OPC_FNOP, TILEPRO_OPC_NONE, + BITFIELD(16, 1) /* index 1058 */, + TILEPRO_OPC_NOP, TILEPRO_OPC_NONE, + BITFIELD(16, 1) /* index 1061 */, + TILEPRO_OPC_PCNT, TILEPRO_OPC_NONE, + BITFIELD(16, 1) /* index 1064 */, + TILEPRO_OPC_TBLIDXB0, TILEPRO_OPC_NONE, + BITFIELD(16, 1) /* index 1067 */, + TILEPRO_OPC_TBLIDXB1, TILEPRO_OPC_NONE, + BITFIELD(16, 1) /* index 1070 */, + TILEPRO_OPC_TBLIDXB2, TILEPRO_OPC_NONE, + BITFIELD(16, 1) /* index 1073 */, + TILEPRO_OPC_TBLIDXB3, TILEPRO_OPC_NONE, + BITFIELD(17, 5) /* index 1076 */, + TILEPRO_OPC_NONE, TILEPRO_OPC_RLI_SN, TILEPRO_OPC_SHLIB_SN, + TILEPRO_OPC_SHLIH_SN, TILEPRO_OPC_SHLI_SN, TILEPRO_OPC_SHRIB_SN, + TILEPRO_OPC_SHRIH_SN, TILEPRO_OPC_SHRI_SN, TILEPRO_OPC_SRAIB_SN, + TILEPRO_OPC_SRAIH_SN, TILEPRO_OPC_SRAI_SN, CHILD(1109), TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + BITFIELD(12, 4) /* index 1109 */, + TILEPRO_OPC_NONE, CHILD(1126), CHILD(1129), CHILD(1132), CHILD(1135), + CHILD(1055), CHILD(1058), CHILD(1138), CHILD(1141), CHILD(1144), + CHILD(1147), CHILD(1150), TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + BITFIELD(16, 1) /* index 1126 */, + TILEPRO_OPC_BITX_SN, TILEPRO_OPC_NONE, + BITFIELD(16, 1) /* index 1129 */, + TILEPRO_OPC_BYTEX_SN, TILEPRO_OPC_NONE, + BITFIELD(16, 1) /* index 1132 */, + TILEPRO_OPC_CLZ_SN, TILEPRO_OPC_NONE, + BITFIELD(16, 1) /* index 1135 */, + TILEPRO_OPC_CTZ_SN, TILEPRO_OPC_NONE, + BITFIELD(16, 1) /* index 1138 */, + TILEPRO_OPC_PCNT_SN, TILEPRO_OPC_NONE, + BITFIELD(16, 1) /* index 1141 */, + TILEPRO_OPC_TBLIDXB0_SN, TILEPRO_OPC_NONE, + BITFIELD(16, 1) /* index 1144 */, + TILEPRO_OPC_TBLIDXB1_SN, TILEPRO_OPC_NONE, + BITFIELD(16, 1) /* index 1147 */, + TILEPRO_OPC_TBLIDXB2_SN, TILEPRO_OPC_NONE, + BITFIELD(16, 1) /* index 1150 */, + TILEPRO_OPC_TBLIDXB3_SN, TILEPRO_OPC_NONE, +}; + +static const unsigned short decode_X1_fsm[1540] = +{ + BITFIELD(54, 9) /* index 0 */, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + CHILD(513), CHILD(561), CHILD(594), TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, CHILD(641), + CHILD(689), CHILD(722), TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, CHILD(766), + CHILD(766), CHILD(766), CHILD(766), CHILD(766), CHILD(766), CHILD(766), + CHILD(766), CHILD(766), CHILD(766), CHILD(766), CHILD(766), CHILD(766), + CHILD(766), CHILD(766), CHILD(766), CHILD(766), CHILD(766), CHILD(766), + CHILD(766), CHILD(766), CHILD(766), CHILD(766), CHILD(766), CHILD(766), + CHILD(766), CHILD(766), CHILD(766), CHILD(766), CHILD(766), CHILD(766), + CHILD(766), CHILD(781), CHILD(781), CHILD(781), CHILD(781), CHILD(781), + CHILD(781), CHILD(781), CHILD(781), CHILD(781), CHILD(781), CHILD(781), + CHILD(781), CHILD(781), CHILD(781), CHILD(781), CHILD(781), CHILD(781), + CHILD(781), CHILD(781), CHILD(781), CHILD(781), CHILD(781), CHILD(781), + CHILD(781), CHILD(781), CHILD(781), CHILD(781), CHILD(781), CHILD(781), + CHILD(781), CHILD(781), CHILD(781), CHILD(796), CHILD(796), CHILD(796), + CHILD(796), CHILD(796), CHILD(796), CHILD(796), CHILD(796), CHILD(796), + CHILD(796), CHILD(796), CHILD(796), CHILD(796), CHILD(796), CHILD(796), + CHILD(796), CHILD(796), CHILD(796), CHILD(796), CHILD(796), CHILD(796), + CHILD(796), CHILD(796), CHILD(796), CHILD(796), CHILD(796), CHILD(796), + CHILD(796), CHILD(796), CHILD(796), CHILD(796), CHILD(796), CHILD(826), + CHILD(826), CHILD(826), CHILD(826), CHILD(826), CHILD(826), CHILD(826), + CHILD(826), CHILD(826), CHILD(826), CHILD(826), CHILD(826), CHILD(826), + CHILD(826), CHILD(826), CHILD(826), CHILD(843), CHILD(843), CHILD(843), + CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), + CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), + CHILD(843), CHILD(860), CHILD(899), CHILD(923), CHILD(932), + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + CHILD(941), CHILD(950), CHILD(974), CHILD(983), TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_MM, + TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, + TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, + TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, + TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, + TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, + TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, + TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, + TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, CHILD(992), + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, CHILD(1334), + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_J, + TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, + TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, + TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, + TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, + TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, + TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, + TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, + TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, + TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, + TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, + TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, + TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, + TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_JAL, + TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, + TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, + TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, + TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, + TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, + TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, + TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, + TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, + TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, + TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, + TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, + TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, + TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, + TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, + TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, + TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + BITFIELD(49, 5) /* index 513 */, + TILEPRO_OPC_NONE, TILEPRO_OPC_ADDB, TILEPRO_OPC_ADDH, TILEPRO_OPC_ADD, + TILEPRO_OPC_AND, TILEPRO_OPC_INTHB, TILEPRO_OPC_INTHH, TILEPRO_OPC_INTLB, + TILEPRO_OPC_INTLH, TILEPRO_OPC_JALRP, TILEPRO_OPC_JALR, TILEPRO_OPC_JRP, + TILEPRO_OPC_JR, TILEPRO_OPC_LNK, TILEPRO_OPC_MAXB_U, TILEPRO_OPC_MAXH, + TILEPRO_OPC_MINB_U, TILEPRO_OPC_MINH, TILEPRO_OPC_MNZB, TILEPRO_OPC_MNZH, + TILEPRO_OPC_MNZ, TILEPRO_OPC_MZB, TILEPRO_OPC_MZH, TILEPRO_OPC_MZ, + TILEPRO_OPC_NOR, CHILD(546), TILEPRO_OPC_PACKHB, TILEPRO_OPC_PACKLB, + TILEPRO_OPC_RL, TILEPRO_OPC_S1A, TILEPRO_OPC_S2A, TILEPRO_OPC_S3A, + BITFIELD(43, 2) /* index 546 */, + TILEPRO_OPC_OR, TILEPRO_OPC_OR, TILEPRO_OPC_OR, CHILD(551), + BITFIELD(45, 2) /* index 551 */, + TILEPRO_OPC_OR, TILEPRO_OPC_OR, TILEPRO_OPC_OR, CHILD(556), + BITFIELD(47, 2) /* index 556 */, + TILEPRO_OPC_OR, TILEPRO_OPC_OR, TILEPRO_OPC_OR, TILEPRO_OPC_MOVE, + BITFIELD(49, 5) /* index 561 */, + TILEPRO_OPC_SB, TILEPRO_OPC_SEQB, TILEPRO_OPC_SEQH, TILEPRO_OPC_SEQ, + TILEPRO_OPC_SHLB, TILEPRO_OPC_SHLH, TILEPRO_OPC_SHL, TILEPRO_OPC_SHRB, + TILEPRO_OPC_SHRH, TILEPRO_OPC_SHR, TILEPRO_OPC_SH, TILEPRO_OPC_SLTB, + TILEPRO_OPC_SLTB_U, TILEPRO_OPC_SLTEB, TILEPRO_OPC_SLTEB_U, + TILEPRO_OPC_SLTEH, TILEPRO_OPC_SLTEH_U, TILEPRO_OPC_SLTE, + TILEPRO_OPC_SLTE_U, TILEPRO_OPC_SLTH, TILEPRO_OPC_SLTH_U, TILEPRO_OPC_SLT, + TILEPRO_OPC_SLT_U, TILEPRO_OPC_SNEB, TILEPRO_OPC_SNEH, TILEPRO_OPC_SNE, + TILEPRO_OPC_SRAB, TILEPRO_OPC_SRAH, TILEPRO_OPC_SRA, TILEPRO_OPC_SUBB, + TILEPRO_OPC_SUBH, TILEPRO_OPC_SUB, + BITFIELD(49, 4) /* index 594 */, + CHILD(611), CHILD(614), CHILD(617), CHILD(620), CHILD(623), CHILD(626), + CHILD(629), CHILD(632), CHILD(635), CHILD(638), TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 611 */, + TILEPRO_OPC_SW, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 614 */, + TILEPRO_OPC_XOR, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 617 */, + TILEPRO_OPC_ADDS, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 620 */, + TILEPRO_OPC_SUBS, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 623 */, + TILEPRO_OPC_ADDBS_U, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 626 */, + TILEPRO_OPC_ADDHS, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 629 */, + TILEPRO_OPC_SUBBS_U, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 632 */, + TILEPRO_OPC_SUBHS, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 635 */, + TILEPRO_OPC_PACKHS, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 638 */, + TILEPRO_OPC_PACKBS_U, TILEPRO_OPC_NONE, + BITFIELD(49, 5) /* index 641 */, + TILEPRO_OPC_NONE, TILEPRO_OPC_ADDB_SN, TILEPRO_OPC_ADDH_SN, + TILEPRO_OPC_ADD_SN, TILEPRO_OPC_AND_SN, TILEPRO_OPC_INTHB_SN, + TILEPRO_OPC_INTHH_SN, TILEPRO_OPC_INTLB_SN, TILEPRO_OPC_INTLH_SN, + TILEPRO_OPC_JALRP, TILEPRO_OPC_JALR, TILEPRO_OPC_JRP, TILEPRO_OPC_JR, + TILEPRO_OPC_LNK_SN, TILEPRO_OPC_MAXB_U_SN, TILEPRO_OPC_MAXH_SN, + TILEPRO_OPC_MINB_U_SN, TILEPRO_OPC_MINH_SN, TILEPRO_OPC_MNZB_SN, + TILEPRO_OPC_MNZH_SN, TILEPRO_OPC_MNZ_SN, TILEPRO_OPC_MZB_SN, + TILEPRO_OPC_MZH_SN, TILEPRO_OPC_MZ_SN, TILEPRO_OPC_NOR_SN, CHILD(674), + TILEPRO_OPC_PACKHB_SN, TILEPRO_OPC_PACKLB_SN, TILEPRO_OPC_RL_SN, + TILEPRO_OPC_S1A_SN, TILEPRO_OPC_S2A_SN, TILEPRO_OPC_S3A_SN, + BITFIELD(43, 2) /* index 674 */, + TILEPRO_OPC_OR_SN, TILEPRO_OPC_OR_SN, TILEPRO_OPC_OR_SN, CHILD(679), + BITFIELD(45, 2) /* index 679 */, + TILEPRO_OPC_OR_SN, TILEPRO_OPC_OR_SN, TILEPRO_OPC_OR_SN, CHILD(684), + BITFIELD(47, 2) /* index 684 */, + TILEPRO_OPC_OR_SN, TILEPRO_OPC_OR_SN, TILEPRO_OPC_OR_SN, + TILEPRO_OPC_MOVE_SN, + BITFIELD(49, 5) /* index 689 */, + TILEPRO_OPC_SB, TILEPRO_OPC_SEQB_SN, TILEPRO_OPC_SEQH_SN, + TILEPRO_OPC_SEQ_SN, TILEPRO_OPC_SHLB_SN, TILEPRO_OPC_SHLH_SN, + TILEPRO_OPC_SHL_SN, TILEPRO_OPC_SHRB_SN, TILEPRO_OPC_SHRH_SN, + TILEPRO_OPC_SHR_SN, TILEPRO_OPC_SH, TILEPRO_OPC_SLTB_SN, + TILEPRO_OPC_SLTB_U_SN, TILEPRO_OPC_SLTEB_SN, TILEPRO_OPC_SLTEB_U_SN, + TILEPRO_OPC_SLTEH_SN, TILEPRO_OPC_SLTEH_U_SN, TILEPRO_OPC_SLTE_SN, + TILEPRO_OPC_SLTE_U_SN, TILEPRO_OPC_SLTH_SN, TILEPRO_OPC_SLTH_U_SN, + TILEPRO_OPC_SLT_SN, TILEPRO_OPC_SLT_U_SN, TILEPRO_OPC_SNEB_SN, + TILEPRO_OPC_SNEH_SN, TILEPRO_OPC_SNE_SN, TILEPRO_OPC_SRAB_SN, + TILEPRO_OPC_SRAH_SN, TILEPRO_OPC_SRA_SN, TILEPRO_OPC_SUBB_SN, + TILEPRO_OPC_SUBH_SN, TILEPRO_OPC_SUB_SN, + BITFIELD(49, 4) /* index 722 */, + CHILD(611), CHILD(739), CHILD(742), CHILD(745), CHILD(748), CHILD(751), + CHILD(754), CHILD(757), CHILD(760), CHILD(763), TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 739 */, + TILEPRO_OPC_XOR_SN, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 742 */, + TILEPRO_OPC_ADDS_SN, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 745 */, + TILEPRO_OPC_SUBS_SN, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 748 */, + TILEPRO_OPC_ADDBS_U_SN, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 751 */, + TILEPRO_OPC_ADDHS_SN, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 754 */, + TILEPRO_OPC_SUBBS_U_SN, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 757 */, + TILEPRO_OPC_SUBHS_SN, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 760 */, + TILEPRO_OPC_PACKHS_SN, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 763 */, + TILEPRO_OPC_PACKBS_U_SN, TILEPRO_OPC_NONE, + BITFIELD(37, 2) /* index 766 */, + TILEPRO_OPC_ADDLI_SN, TILEPRO_OPC_ADDLI_SN, TILEPRO_OPC_ADDLI_SN, + CHILD(771), + BITFIELD(39, 2) /* index 771 */, + TILEPRO_OPC_ADDLI_SN, TILEPRO_OPC_ADDLI_SN, TILEPRO_OPC_ADDLI_SN, + CHILD(776), + BITFIELD(41, 2) /* index 776 */, + TILEPRO_OPC_ADDLI_SN, TILEPRO_OPC_ADDLI_SN, TILEPRO_OPC_ADDLI_SN, + TILEPRO_OPC_MOVELI_SN, + BITFIELD(37, 2) /* index 781 */, + TILEPRO_OPC_ADDLI, TILEPRO_OPC_ADDLI, TILEPRO_OPC_ADDLI, CHILD(786), + BITFIELD(39, 2) /* index 786 */, + TILEPRO_OPC_ADDLI, TILEPRO_OPC_ADDLI, TILEPRO_OPC_ADDLI, CHILD(791), + BITFIELD(41, 2) /* index 791 */, + TILEPRO_OPC_ADDLI, TILEPRO_OPC_ADDLI, TILEPRO_OPC_ADDLI, TILEPRO_OPC_MOVELI, + BITFIELD(31, 2) /* index 796 */, + TILEPRO_OPC_AULI, TILEPRO_OPC_AULI, TILEPRO_OPC_AULI, CHILD(801), + BITFIELD(33, 2) /* index 801 */, + TILEPRO_OPC_AULI, TILEPRO_OPC_AULI, TILEPRO_OPC_AULI, CHILD(806), + BITFIELD(35, 2) /* index 806 */, + TILEPRO_OPC_AULI, TILEPRO_OPC_AULI, TILEPRO_OPC_AULI, CHILD(811), + BITFIELD(37, 2) /* index 811 */, + TILEPRO_OPC_AULI, TILEPRO_OPC_AULI, TILEPRO_OPC_AULI, CHILD(816), + BITFIELD(39, 2) /* index 816 */, + TILEPRO_OPC_AULI, TILEPRO_OPC_AULI, TILEPRO_OPC_AULI, CHILD(821), + BITFIELD(41, 2) /* index 821 */, + TILEPRO_OPC_AULI, TILEPRO_OPC_AULI, TILEPRO_OPC_AULI, TILEPRO_OPC_INFOL, + BITFIELD(31, 4) /* index 826 */, + TILEPRO_OPC_BZ, TILEPRO_OPC_BZT, TILEPRO_OPC_BNZ, TILEPRO_OPC_BNZT, + TILEPRO_OPC_BGZ, TILEPRO_OPC_BGZT, TILEPRO_OPC_BGEZ, TILEPRO_OPC_BGEZT, + TILEPRO_OPC_BLZ, TILEPRO_OPC_BLZT, TILEPRO_OPC_BLEZ, TILEPRO_OPC_BLEZT, + TILEPRO_OPC_BBS, TILEPRO_OPC_BBST, TILEPRO_OPC_BBNS, TILEPRO_OPC_BBNST, + BITFIELD(31, 4) /* index 843 */, + TILEPRO_OPC_BZ_SN, TILEPRO_OPC_BZT_SN, TILEPRO_OPC_BNZ_SN, + TILEPRO_OPC_BNZT_SN, TILEPRO_OPC_BGZ_SN, TILEPRO_OPC_BGZT_SN, + TILEPRO_OPC_BGEZ_SN, TILEPRO_OPC_BGEZT_SN, TILEPRO_OPC_BLZ_SN, + TILEPRO_OPC_BLZT_SN, TILEPRO_OPC_BLEZ_SN, TILEPRO_OPC_BLEZT_SN, + TILEPRO_OPC_BBS_SN, TILEPRO_OPC_BBST_SN, TILEPRO_OPC_BBNS_SN, + TILEPRO_OPC_BBNST_SN, + BITFIELD(51, 3) /* index 860 */, + TILEPRO_OPC_NONE, TILEPRO_OPC_ADDIB, TILEPRO_OPC_ADDIH, TILEPRO_OPC_ADDI, + CHILD(869), TILEPRO_OPC_MAXIB_U, TILEPRO_OPC_MAXIH, TILEPRO_OPC_MFSPR, + BITFIELD(31, 2) /* index 869 */, + TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, CHILD(874), + BITFIELD(33, 2) /* index 874 */, + TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, CHILD(879), + BITFIELD(35, 2) /* index 879 */, + TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, CHILD(884), + BITFIELD(37, 2) /* index 884 */, + TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, CHILD(889), + BITFIELD(39, 2) /* index 889 */, + TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, CHILD(894), + BITFIELD(41, 2) /* index 894 */, + TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_INFO, + BITFIELD(51, 3) /* index 899 */, + TILEPRO_OPC_MINIB_U, TILEPRO_OPC_MINIH, TILEPRO_OPC_MTSPR, CHILD(908), + TILEPRO_OPC_SEQIB, TILEPRO_OPC_SEQIH, TILEPRO_OPC_SEQI, TILEPRO_OPC_SLTIB, + BITFIELD(37, 2) /* index 908 */, + TILEPRO_OPC_ORI, TILEPRO_OPC_ORI, TILEPRO_OPC_ORI, CHILD(913), + BITFIELD(39, 2) /* index 913 */, + TILEPRO_OPC_ORI, TILEPRO_OPC_ORI, TILEPRO_OPC_ORI, CHILD(918), + BITFIELD(41, 2) /* index 918 */, + TILEPRO_OPC_ORI, TILEPRO_OPC_ORI, TILEPRO_OPC_ORI, TILEPRO_OPC_MOVEI, + BITFIELD(51, 3) /* index 923 */, + TILEPRO_OPC_SLTIB_U, TILEPRO_OPC_SLTIH, TILEPRO_OPC_SLTIH_U, + TILEPRO_OPC_SLTI, TILEPRO_OPC_SLTI_U, TILEPRO_OPC_XORI, TILEPRO_OPC_LBADD, + TILEPRO_OPC_LBADD_U, + BITFIELD(51, 3) /* index 932 */, + TILEPRO_OPC_LHADD, TILEPRO_OPC_LHADD_U, TILEPRO_OPC_LWADD, + TILEPRO_OPC_LWADD_NA, TILEPRO_OPC_SBADD, TILEPRO_OPC_SHADD, + TILEPRO_OPC_SWADD, TILEPRO_OPC_NONE, + BITFIELD(51, 3) /* index 941 */, + TILEPRO_OPC_NONE, TILEPRO_OPC_ADDIB_SN, TILEPRO_OPC_ADDIH_SN, + TILEPRO_OPC_ADDI_SN, TILEPRO_OPC_ANDI_SN, TILEPRO_OPC_MAXIB_U_SN, + TILEPRO_OPC_MAXIH_SN, TILEPRO_OPC_MFSPR, + BITFIELD(51, 3) /* index 950 */, + TILEPRO_OPC_MINIB_U_SN, TILEPRO_OPC_MINIH_SN, TILEPRO_OPC_MTSPR, CHILD(959), + TILEPRO_OPC_SEQIB_SN, TILEPRO_OPC_SEQIH_SN, TILEPRO_OPC_SEQI_SN, + TILEPRO_OPC_SLTIB_SN, + BITFIELD(37, 2) /* index 959 */, + TILEPRO_OPC_ORI_SN, TILEPRO_OPC_ORI_SN, TILEPRO_OPC_ORI_SN, CHILD(964), + BITFIELD(39, 2) /* index 964 */, + TILEPRO_OPC_ORI_SN, TILEPRO_OPC_ORI_SN, TILEPRO_OPC_ORI_SN, CHILD(969), + BITFIELD(41, 2) /* index 969 */, + TILEPRO_OPC_ORI_SN, TILEPRO_OPC_ORI_SN, TILEPRO_OPC_ORI_SN, + TILEPRO_OPC_MOVEI_SN, + BITFIELD(51, 3) /* index 974 */, + TILEPRO_OPC_SLTIB_U_SN, TILEPRO_OPC_SLTIH_SN, TILEPRO_OPC_SLTIH_U_SN, + TILEPRO_OPC_SLTI_SN, TILEPRO_OPC_SLTI_U_SN, TILEPRO_OPC_XORI_SN, + TILEPRO_OPC_LBADD_SN, TILEPRO_OPC_LBADD_U_SN, + BITFIELD(51, 3) /* index 983 */, + TILEPRO_OPC_LHADD_SN, TILEPRO_OPC_LHADD_U_SN, TILEPRO_OPC_LWADD_SN, + TILEPRO_OPC_LWADD_NA_SN, TILEPRO_OPC_SBADD, TILEPRO_OPC_SHADD, + TILEPRO_OPC_SWADD, TILEPRO_OPC_NONE, + BITFIELD(46, 7) /* index 992 */, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + CHILD(1121), CHILD(1121), CHILD(1121), CHILD(1121), CHILD(1124), + CHILD(1124), CHILD(1124), CHILD(1124), CHILD(1127), CHILD(1127), + CHILD(1127), CHILD(1127), CHILD(1130), CHILD(1130), CHILD(1130), + CHILD(1130), CHILD(1133), CHILD(1133), CHILD(1133), CHILD(1133), + CHILD(1136), CHILD(1136), CHILD(1136), CHILD(1136), CHILD(1139), + CHILD(1139), CHILD(1139), CHILD(1139), CHILD(1142), CHILD(1142), + CHILD(1142), CHILD(1142), CHILD(1145), CHILD(1145), CHILD(1145), + CHILD(1145), CHILD(1148), CHILD(1148), CHILD(1148), CHILD(1148), + CHILD(1151), CHILD(1242), CHILD(1290), CHILD(1323), TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 1121 */, + TILEPRO_OPC_RLI, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 1124 */, + TILEPRO_OPC_SHLIB, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 1127 */, + TILEPRO_OPC_SHLIH, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 1130 */, + TILEPRO_OPC_SHLI, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 1133 */, + TILEPRO_OPC_SHRIB, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 1136 */, + TILEPRO_OPC_SHRIH, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 1139 */, + TILEPRO_OPC_SHRI, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 1142 */, + TILEPRO_OPC_SRAIB, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 1145 */, + TILEPRO_OPC_SRAIH, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 1148 */, + TILEPRO_OPC_SRAI, TILEPRO_OPC_NONE, + BITFIELD(43, 3) /* index 1151 */, + TILEPRO_OPC_NONE, CHILD(1160), CHILD(1163), CHILD(1166), CHILD(1169), + CHILD(1172), CHILD(1175), CHILD(1178), + BITFIELD(53, 1) /* index 1160 */, + TILEPRO_OPC_DRAIN, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 1163 */, + TILEPRO_OPC_DTLBPR, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 1166 */, + TILEPRO_OPC_FINV, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 1169 */, + TILEPRO_OPC_FLUSH, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 1172 */, + TILEPRO_OPC_FNOP, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 1175 */, + TILEPRO_OPC_ICOH, TILEPRO_OPC_NONE, + BITFIELD(31, 2) /* index 1178 */, + CHILD(1183), CHILD(1211), CHILD(1239), CHILD(1239), + BITFIELD(53, 1) /* index 1183 */, + CHILD(1186), TILEPRO_OPC_NONE, + BITFIELD(33, 2) /* index 1186 */, + TILEPRO_OPC_ILL, TILEPRO_OPC_ILL, TILEPRO_OPC_ILL, CHILD(1191), + BITFIELD(35, 2) /* index 1191 */, + TILEPRO_OPC_ILL, CHILD(1196), TILEPRO_OPC_ILL, TILEPRO_OPC_ILL, + BITFIELD(37, 2) /* index 1196 */, + TILEPRO_OPC_ILL, CHILD(1201), TILEPRO_OPC_ILL, TILEPRO_OPC_ILL, + BITFIELD(39, 2) /* index 1201 */, + TILEPRO_OPC_ILL, CHILD(1206), TILEPRO_OPC_ILL, TILEPRO_OPC_ILL, + BITFIELD(41, 2) /* index 1206 */, + TILEPRO_OPC_ILL, TILEPRO_OPC_ILL, TILEPRO_OPC_BPT, TILEPRO_OPC_ILL, + BITFIELD(53, 1) /* index 1211 */, + CHILD(1214), TILEPRO_OPC_NONE, + BITFIELD(33, 2) /* index 1214 */, + TILEPRO_OPC_ILL, TILEPRO_OPC_ILL, TILEPRO_OPC_ILL, CHILD(1219), + BITFIELD(35, 2) /* index 1219 */, + TILEPRO_OPC_ILL, CHILD(1224), TILEPRO_OPC_ILL, TILEPRO_OPC_ILL, + BITFIELD(37, 2) /* index 1224 */, + TILEPRO_OPC_ILL, CHILD(1229), TILEPRO_OPC_ILL, TILEPRO_OPC_ILL, + BITFIELD(39, 2) /* index 1229 */, + TILEPRO_OPC_ILL, CHILD(1234), TILEPRO_OPC_ILL, TILEPRO_OPC_ILL, + BITFIELD(41, 2) /* index 1234 */, + TILEPRO_OPC_ILL, TILEPRO_OPC_ILL, TILEPRO_OPC_RAISE, TILEPRO_OPC_ILL, + BITFIELD(53, 1) /* index 1239 */, + TILEPRO_OPC_ILL, TILEPRO_OPC_NONE, + BITFIELD(43, 3) /* index 1242 */, + CHILD(1251), CHILD(1254), CHILD(1257), CHILD(1275), CHILD(1278), + CHILD(1281), CHILD(1284), CHILD(1287), + BITFIELD(53, 1) /* index 1251 */, + TILEPRO_OPC_INV, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 1254 */, + TILEPRO_OPC_IRET, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 1257 */, + CHILD(1260), TILEPRO_OPC_NONE, + BITFIELD(31, 2) /* index 1260 */, + TILEPRO_OPC_LB, TILEPRO_OPC_LB, TILEPRO_OPC_LB, CHILD(1265), + BITFIELD(33, 2) /* index 1265 */, + TILEPRO_OPC_LB, TILEPRO_OPC_LB, TILEPRO_OPC_LB, CHILD(1270), + BITFIELD(35, 2) /* index 1270 */, + TILEPRO_OPC_LB, TILEPRO_OPC_LB, TILEPRO_OPC_LB, TILEPRO_OPC_PREFETCH, + BITFIELD(53, 1) /* index 1275 */, + TILEPRO_OPC_LB_U, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 1278 */, + TILEPRO_OPC_LH, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 1281 */, + TILEPRO_OPC_LH_U, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 1284 */, + TILEPRO_OPC_LW, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 1287 */, + TILEPRO_OPC_MF, TILEPRO_OPC_NONE, + BITFIELD(43, 3) /* index 1290 */, + CHILD(1299), CHILD(1302), CHILD(1305), CHILD(1308), CHILD(1311), + CHILD(1314), CHILD(1317), CHILD(1320), + BITFIELD(53, 1) /* index 1299 */, + TILEPRO_OPC_NAP, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 1302 */, + TILEPRO_OPC_NOP, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 1305 */, + TILEPRO_OPC_SWINT0, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 1308 */, + TILEPRO_OPC_SWINT1, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 1311 */, + TILEPRO_OPC_SWINT2, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 1314 */, + TILEPRO_OPC_SWINT3, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 1317 */, + TILEPRO_OPC_TNS, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 1320 */, + TILEPRO_OPC_WH64, TILEPRO_OPC_NONE, + BITFIELD(43, 2) /* index 1323 */, + CHILD(1328), TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + BITFIELD(45, 1) /* index 1328 */, + CHILD(1331), TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 1331 */, + TILEPRO_OPC_LW_NA, TILEPRO_OPC_NONE, + BITFIELD(46, 7) /* index 1334 */, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + CHILD(1463), CHILD(1463), CHILD(1463), CHILD(1463), CHILD(1466), + CHILD(1466), CHILD(1466), CHILD(1466), CHILD(1469), CHILD(1469), + CHILD(1469), CHILD(1469), CHILD(1472), CHILD(1472), CHILD(1472), + CHILD(1472), CHILD(1475), CHILD(1475), CHILD(1475), CHILD(1475), + CHILD(1478), CHILD(1478), CHILD(1478), CHILD(1478), CHILD(1481), + CHILD(1481), CHILD(1481), CHILD(1481), CHILD(1484), CHILD(1484), + CHILD(1484), CHILD(1484), CHILD(1487), CHILD(1487), CHILD(1487), + CHILD(1487), CHILD(1490), CHILD(1490), CHILD(1490), CHILD(1490), + CHILD(1151), CHILD(1493), CHILD(1517), CHILD(1529), TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 1463 */, + TILEPRO_OPC_RLI_SN, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 1466 */, + TILEPRO_OPC_SHLIB_SN, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 1469 */, + TILEPRO_OPC_SHLIH_SN, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 1472 */, + TILEPRO_OPC_SHLI_SN, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 1475 */, + TILEPRO_OPC_SHRIB_SN, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 1478 */, + TILEPRO_OPC_SHRIH_SN, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 1481 */, + TILEPRO_OPC_SHRI_SN, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 1484 */, + TILEPRO_OPC_SRAIB_SN, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 1487 */, + TILEPRO_OPC_SRAIH_SN, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 1490 */, + TILEPRO_OPC_SRAI_SN, TILEPRO_OPC_NONE, + BITFIELD(43, 3) /* index 1493 */, + CHILD(1251), CHILD(1254), CHILD(1502), CHILD(1505), CHILD(1508), + CHILD(1511), CHILD(1514), CHILD(1287), + BITFIELD(53, 1) /* index 1502 */, + TILEPRO_OPC_LB_SN, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 1505 */, + TILEPRO_OPC_LB_U_SN, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 1508 */, + TILEPRO_OPC_LH_SN, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 1511 */, + TILEPRO_OPC_LH_U_SN, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 1514 */, + TILEPRO_OPC_LW_SN, TILEPRO_OPC_NONE, + BITFIELD(43, 3) /* index 1517 */, + CHILD(1299), CHILD(1302), CHILD(1305), CHILD(1308), CHILD(1311), + CHILD(1314), CHILD(1526), CHILD(1320), + BITFIELD(53, 1) /* index 1526 */, + TILEPRO_OPC_TNS_SN, TILEPRO_OPC_NONE, + BITFIELD(43, 2) /* index 1529 */, + CHILD(1534), TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + BITFIELD(45, 1) /* index 1534 */, + CHILD(1537), TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 1537 */, + TILEPRO_OPC_LW_NA_SN, TILEPRO_OPC_NONE, +}; + +static const unsigned short decode_Y0_fsm[168] = +{ + BITFIELD(27, 4) /* index 0 */, + TILEPRO_OPC_NONE, CHILD(17), CHILD(22), CHILD(27), CHILD(47), CHILD(52), + CHILD(57), CHILD(62), CHILD(67), TILEPRO_OPC_ADDI, CHILD(72), CHILD(102), + TILEPRO_OPC_SEQI, CHILD(117), TILEPRO_OPC_SLTI, TILEPRO_OPC_SLTI_U, + BITFIELD(18, 2) /* index 17 */, + TILEPRO_OPC_ADD, TILEPRO_OPC_S1A, TILEPRO_OPC_S2A, TILEPRO_OPC_SUB, + BITFIELD(18, 2) /* index 22 */, + TILEPRO_OPC_MNZ, TILEPRO_OPC_MVNZ, TILEPRO_OPC_MVZ, TILEPRO_OPC_MZ, + BITFIELD(18, 2) /* index 27 */, + TILEPRO_OPC_AND, TILEPRO_OPC_NOR, CHILD(32), TILEPRO_OPC_XOR, + BITFIELD(12, 2) /* index 32 */, + TILEPRO_OPC_OR, TILEPRO_OPC_OR, TILEPRO_OPC_OR, CHILD(37), + BITFIELD(14, 2) /* index 37 */, + TILEPRO_OPC_OR, TILEPRO_OPC_OR, TILEPRO_OPC_OR, CHILD(42), + BITFIELD(16, 2) /* index 42 */, + TILEPRO_OPC_OR, TILEPRO_OPC_OR, TILEPRO_OPC_OR, TILEPRO_OPC_MOVE, + BITFIELD(18, 2) /* index 47 */, + TILEPRO_OPC_RL, TILEPRO_OPC_SHL, TILEPRO_OPC_SHR, TILEPRO_OPC_SRA, + BITFIELD(18, 2) /* index 52 */, + TILEPRO_OPC_SLTE, TILEPRO_OPC_SLTE_U, TILEPRO_OPC_SLT, TILEPRO_OPC_SLT_U, + BITFIELD(18, 2) /* index 57 */, + TILEPRO_OPC_MULHLSA_UU, TILEPRO_OPC_S3A, TILEPRO_OPC_SEQ, TILEPRO_OPC_SNE, + BITFIELD(18, 2) /* index 62 */, + TILEPRO_OPC_MULHH_SS, TILEPRO_OPC_MULHH_UU, TILEPRO_OPC_MULLL_SS, + TILEPRO_OPC_MULLL_UU, + BITFIELD(18, 2) /* index 67 */, + TILEPRO_OPC_MULHHA_SS, TILEPRO_OPC_MULHHA_UU, TILEPRO_OPC_MULLLA_SS, + TILEPRO_OPC_MULLLA_UU, + BITFIELD(0, 2) /* index 72 */, + TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, CHILD(77), + BITFIELD(2, 2) /* index 77 */, + TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, CHILD(82), + BITFIELD(4, 2) /* index 82 */, + TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, CHILD(87), + BITFIELD(6, 2) /* index 87 */, + TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, CHILD(92), + BITFIELD(8, 2) /* index 92 */, + TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, CHILD(97), + BITFIELD(10, 2) /* index 97 */, + TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_INFO, + BITFIELD(6, 2) /* index 102 */, + TILEPRO_OPC_ORI, TILEPRO_OPC_ORI, TILEPRO_OPC_ORI, CHILD(107), + BITFIELD(8, 2) /* index 107 */, + TILEPRO_OPC_ORI, TILEPRO_OPC_ORI, TILEPRO_OPC_ORI, CHILD(112), + BITFIELD(10, 2) /* index 112 */, + TILEPRO_OPC_ORI, TILEPRO_OPC_ORI, TILEPRO_OPC_ORI, TILEPRO_OPC_MOVEI, + BITFIELD(15, 5) /* index 117 */, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_RLI, TILEPRO_OPC_RLI, TILEPRO_OPC_RLI, TILEPRO_OPC_RLI, + TILEPRO_OPC_SHLI, TILEPRO_OPC_SHLI, TILEPRO_OPC_SHLI, TILEPRO_OPC_SHLI, + TILEPRO_OPC_SHRI, TILEPRO_OPC_SHRI, TILEPRO_OPC_SHRI, TILEPRO_OPC_SHRI, + TILEPRO_OPC_SRAI, TILEPRO_OPC_SRAI, TILEPRO_OPC_SRAI, TILEPRO_OPC_SRAI, + CHILD(150), CHILD(159), TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + BITFIELD(12, 3) /* index 150 */, + TILEPRO_OPC_NONE, TILEPRO_OPC_BITX, TILEPRO_OPC_BYTEX, TILEPRO_OPC_CLZ, + TILEPRO_OPC_CTZ, TILEPRO_OPC_FNOP, TILEPRO_OPC_NOP, TILEPRO_OPC_PCNT, + BITFIELD(12, 3) /* index 159 */, + TILEPRO_OPC_TBLIDXB0, TILEPRO_OPC_TBLIDXB1, TILEPRO_OPC_TBLIDXB2, + TILEPRO_OPC_TBLIDXB3, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, +}; + +static const unsigned short decode_Y1_fsm[140] = +{ + BITFIELD(59, 4) /* index 0 */, + TILEPRO_OPC_NONE, CHILD(17), CHILD(22), CHILD(27), CHILD(47), CHILD(52), + CHILD(57), TILEPRO_OPC_ADDI, CHILD(62), CHILD(92), TILEPRO_OPC_SEQI, + CHILD(107), TILEPRO_OPC_SLTI, TILEPRO_OPC_SLTI_U, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, + BITFIELD(49, 2) /* index 17 */, + TILEPRO_OPC_ADD, TILEPRO_OPC_S1A, TILEPRO_OPC_S2A, TILEPRO_OPC_SUB, + BITFIELD(49, 2) /* index 22 */, + TILEPRO_OPC_NONE, TILEPRO_OPC_MNZ, TILEPRO_OPC_MZ, TILEPRO_OPC_NONE, + BITFIELD(49, 2) /* index 27 */, + TILEPRO_OPC_AND, TILEPRO_OPC_NOR, CHILD(32), TILEPRO_OPC_XOR, + BITFIELD(43, 2) /* index 32 */, + TILEPRO_OPC_OR, TILEPRO_OPC_OR, TILEPRO_OPC_OR, CHILD(37), + BITFIELD(45, 2) /* index 37 */, + TILEPRO_OPC_OR, TILEPRO_OPC_OR, TILEPRO_OPC_OR, CHILD(42), + BITFIELD(47, 2) /* index 42 */, + TILEPRO_OPC_OR, TILEPRO_OPC_OR, TILEPRO_OPC_OR, TILEPRO_OPC_MOVE, + BITFIELD(49, 2) /* index 47 */, + TILEPRO_OPC_RL, TILEPRO_OPC_SHL, TILEPRO_OPC_SHR, TILEPRO_OPC_SRA, + BITFIELD(49, 2) /* index 52 */, + TILEPRO_OPC_SLTE, TILEPRO_OPC_SLTE_U, TILEPRO_OPC_SLT, TILEPRO_OPC_SLT_U, + BITFIELD(49, 2) /* index 57 */, + TILEPRO_OPC_NONE, TILEPRO_OPC_S3A, TILEPRO_OPC_SEQ, TILEPRO_OPC_SNE, + BITFIELD(31, 2) /* index 62 */, + TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, CHILD(67), + BITFIELD(33, 2) /* index 67 */, + TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, CHILD(72), + BITFIELD(35, 2) /* index 72 */, + TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, CHILD(77), + BITFIELD(37, 2) /* index 77 */, + TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, CHILD(82), + BITFIELD(39, 2) /* index 82 */, + TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, CHILD(87), + BITFIELD(41, 2) /* index 87 */, + TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_INFO, + BITFIELD(37, 2) /* index 92 */, + TILEPRO_OPC_ORI, TILEPRO_OPC_ORI, TILEPRO_OPC_ORI, CHILD(97), + BITFIELD(39, 2) /* index 97 */, + TILEPRO_OPC_ORI, TILEPRO_OPC_ORI, TILEPRO_OPC_ORI, CHILD(102), + BITFIELD(41, 2) /* index 102 */, + TILEPRO_OPC_ORI, TILEPRO_OPC_ORI, TILEPRO_OPC_ORI, TILEPRO_OPC_MOVEI, + BITFIELD(48, 3) /* index 107 */, + TILEPRO_OPC_NONE, TILEPRO_OPC_RLI, TILEPRO_OPC_SHLI, TILEPRO_OPC_SHRI, + TILEPRO_OPC_SRAI, CHILD(116), TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + BITFIELD(43, 3) /* index 116 */, + TILEPRO_OPC_NONE, CHILD(125), CHILD(130), CHILD(135), TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + BITFIELD(46, 2) /* index 125 */, + TILEPRO_OPC_FNOP, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + BITFIELD(46, 2) /* index 130 */, + TILEPRO_OPC_ILL, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + BITFIELD(46, 2) /* index 135 */, + TILEPRO_OPC_NOP, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, +}; + +static const unsigned short decode_Y2_fsm[24] = +{ + BITFIELD(56, 3) /* index 0 */, + CHILD(9), TILEPRO_OPC_LB_U, TILEPRO_OPC_LH, TILEPRO_OPC_LH_U, + TILEPRO_OPC_LW, TILEPRO_OPC_SB, TILEPRO_OPC_SH, TILEPRO_OPC_SW, + BITFIELD(20, 2) /* index 9 */, + TILEPRO_OPC_LB, TILEPRO_OPC_LB, TILEPRO_OPC_LB, CHILD(14), + BITFIELD(22, 2) /* index 14 */, + TILEPRO_OPC_LB, TILEPRO_OPC_LB, TILEPRO_OPC_LB, CHILD(19), + BITFIELD(24, 2) /* index 19 */, + TILEPRO_OPC_LB, TILEPRO_OPC_LB, TILEPRO_OPC_LB, TILEPRO_OPC_PREFETCH, +}; + +#undef BITFIELD +#undef CHILD +const unsigned short * const +tilepro_bundle_decoder_fsms[TILEPRO_NUM_PIPELINE_ENCODINGS] = +{ + decode_X0_fsm, + decode_X1_fsm, + decode_Y0_fsm, + decode_Y1_fsm, + decode_Y2_fsm +}; +const struct tilepro_operand tilepro_operands[43] = +{ + { + TILEPRO_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEPRO_IMM8_X0), + 8, 1, 0, 0, 0, 0, + create_Imm8_X0, get_Imm8_X0 + }, + { + TILEPRO_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEPRO_IMM8_X1), + 8, 1, 0, 0, 0, 0, + create_Imm8_X1, get_Imm8_X1 + }, + { + TILEPRO_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEPRO_IMM8_Y0), + 8, 1, 0, 0, 0, 0, + create_Imm8_Y0, get_Imm8_Y0 + }, + { + TILEPRO_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEPRO_IMM8_Y1), + 8, 1, 0, 0, 0, 0, + create_Imm8_Y1, get_Imm8_Y1 + }, + { + TILEPRO_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEPRO_IMM16_X0), + 16, 1, 0, 0, 0, 0, + create_Imm16_X0, get_Imm16_X0 + }, + { + TILEPRO_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEPRO_IMM16_X1), + 16, 1, 0, 0, 0, 0, + create_Imm16_X1, get_Imm16_X1 + }, + { + TILEPRO_OP_TYPE_ADDRESS, BFD_RELOC(TILEPRO_JOFFLONG_X1), + 29, 1, 0, 0, 1, TILEPRO_LOG2_BUNDLE_ALIGNMENT_IN_BYTES, + create_JOffLong_X1, get_JOffLong_X1 + }, + { + TILEPRO_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 0, 1, 0, 0, + create_Dest_X0, get_Dest_X0 + }, + { + TILEPRO_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 1, 0, 0, 0, + create_SrcA_X0, get_SrcA_X0 + }, + { + TILEPRO_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 0, 1, 0, 0, + create_Dest_X1, get_Dest_X1 + }, + { + TILEPRO_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 1, 0, 0, 0, + create_SrcA_X1, get_SrcA_X1 + }, + { + TILEPRO_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 0, 1, 0, 0, + create_Dest_Y0, get_Dest_Y0 + }, + { + TILEPRO_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 1, 0, 0, 0, + create_SrcA_Y0, get_SrcA_Y0 + }, + { + TILEPRO_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 0, 1, 0, 0, + create_Dest_Y1, get_Dest_Y1 + }, + { + TILEPRO_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 1, 0, 0, 0, + create_SrcA_Y1, get_SrcA_Y1 + }, + { + TILEPRO_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 1, 0, 0, 0, + create_SrcA_Y2, get_SrcA_Y2 + }, + { + TILEPRO_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 1, 0, 0, 0, + create_SrcB_X0, get_SrcB_X0 + }, + { + TILEPRO_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 1, 0, 0, 0, + create_SrcB_X1, get_SrcB_X1 + }, + { + TILEPRO_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 1, 0, 0, 0, + create_SrcB_Y0, get_SrcB_Y0 + }, + { + TILEPRO_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 1, 0, 0, 0, + create_SrcB_Y1, get_SrcB_Y1 + }, + { + TILEPRO_OP_TYPE_ADDRESS, BFD_RELOC(TILEPRO_BROFF_X1), + 17, 1, 0, 0, 1, TILEPRO_LOG2_BUNDLE_ALIGNMENT_IN_BYTES, + create_BrOff_X1, get_BrOff_X1 + }, + { + TILEPRO_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 1, 1, 0, 0, + create_Dest_X0, get_Dest_X0 + }, + { + TILEPRO_OP_TYPE_ADDRESS, BFD_RELOC(NONE), + 28, 1, 0, 0, 1, TILEPRO_LOG2_BUNDLE_ALIGNMENT_IN_BYTES, + create_JOff_X1, get_JOff_X1 + }, + { + TILEPRO_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 0, 1, 0, 0, + create_SrcBDest_Y2, get_SrcBDest_Y2 + }, + { + TILEPRO_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 1, 1, 0, 0, + create_SrcA_X1, get_SrcA_X1 + }, + { + TILEPRO_OP_TYPE_SPR, BFD_RELOC(TILEPRO_MF_IMM15_X1), + 15, 0, 0, 0, 0, 0, + create_MF_Imm15_X1, get_MF_Imm15_X1 + }, + { + TILEPRO_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEPRO_MMSTART_X0), + 5, 0, 0, 0, 0, 0, + create_MMStart_X0, get_MMStart_X0 + }, + { + TILEPRO_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEPRO_MMEND_X0), + 5, 0, 0, 0, 0, 0, + create_MMEnd_X0, get_MMEnd_X0 + }, + { + TILEPRO_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEPRO_MMSTART_X1), + 5, 0, 0, 0, 0, 0, + create_MMStart_X1, get_MMStart_X1 + }, + { + TILEPRO_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEPRO_MMEND_X1), + 5, 0, 0, 0, 0, 0, + create_MMEnd_X1, get_MMEnd_X1 + }, + { + TILEPRO_OP_TYPE_SPR, BFD_RELOC(TILEPRO_MT_IMM15_X1), + 15, 0, 0, 0, 0, 0, + create_MT_Imm15_X1, get_MT_Imm15_X1 + }, + { + TILEPRO_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 1, 1, 0, 0, + create_Dest_Y0, get_Dest_Y0 + }, + { + TILEPRO_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEPRO_SHAMT_X0), + 5, 0, 0, 0, 0, 0, + create_ShAmt_X0, get_ShAmt_X0 + }, + { + TILEPRO_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEPRO_SHAMT_X1), + 5, 0, 0, 0, 0, 0, + create_ShAmt_X1, get_ShAmt_X1 + }, + { + TILEPRO_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEPRO_SHAMT_Y0), + 5, 0, 0, 0, 0, 0, + create_ShAmt_Y0, get_ShAmt_Y0 + }, + { + TILEPRO_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEPRO_SHAMT_Y1), + 5, 0, 0, 0, 0, 0, + create_ShAmt_Y1, get_ShAmt_Y1 + }, + { + TILEPRO_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 1, 0, 0, 0, + create_SrcBDest_Y2, get_SrcBDest_Y2 + }, + { + TILEPRO_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEPRO_DEST_IMM8_X1), + 8, 1, 0, 0, 0, 0, + create_Dest_Imm8_X1, get_Dest_Imm8_X1 + }, + { + TILEPRO_OP_TYPE_ADDRESS, BFD_RELOC(NONE), + 10, 1, 0, 0, 1, TILEPRO_LOG2_SN_INSTRUCTION_SIZE_IN_BYTES, + create_BrOff_SN, get_BrOff_SN + }, + { + TILEPRO_OP_TYPE_IMMEDIATE, BFD_RELOC(NONE), + 8, 0, 0, 0, 0, 0, + create_Imm8_SN, get_Imm8_SN + }, + { + TILEPRO_OP_TYPE_IMMEDIATE, BFD_RELOC(NONE), + 8, 1, 0, 0, 0, 0, + create_Imm8_SN, get_Imm8_SN + }, + { + TILEPRO_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 2, 0, 0, 1, 0, 0, + create_Dest_SN, get_Dest_SN + }, + { + TILEPRO_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 2, 0, 1, 0, 0, 0, + create_Src_SN, get_Src_SN + } +}; + + + + +/* Given a set of bundle bits and a specific pipe, returns which + * instruction the bundle contains in that pipe. + */ +const struct tilepro_opcode * +find_opcode(tilepro_bundle_bits bits, tilepro_pipeline pipe) +{ + const unsigned short *table = tilepro_bundle_decoder_fsms[pipe]; + int index = 0; + + while (1) + { + unsigned short bitspec = table[index]; + unsigned int bitfield = + ((unsigned int)(bits >> (bitspec & 63))) & (bitspec >> 6); + + unsigned short next = table[index + 1 + bitfield]; + if (next <= TILEPRO_OPC_NONE) + return &tilepro_opcodes[next]; + + index = next - TILEPRO_OPC_NONE; + } +} + + +int +parse_insn_tilepro(tilepro_bundle_bits bits, + unsigned int pc, + struct tilepro_decoded_instruction + decoded[TILEPRO_MAX_INSTRUCTIONS_PER_BUNDLE]) +{ + int num_instructions = 0; + int pipe; + + int min_pipe, max_pipe; + if ((bits & TILEPRO_BUNDLE_Y_ENCODING_MASK) == 0) + { + min_pipe = TILEPRO_PIPELINE_X0; + max_pipe = TILEPRO_PIPELINE_X1; + } + else + { + min_pipe = TILEPRO_PIPELINE_Y0; + max_pipe = TILEPRO_PIPELINE_Y2; + } + + /* For each pipe, find an instruction that fits. */ + for (pipe = min_pipe; pipe <= max_pipe; pipe++) + { + const struct tilepro_opcode *opc; + struct tilepro_decoded_instruction *d; + int i; + + d = &decoded[num_instructions++]; + opc = find_opcode (bits, (tilepro_pipeline)pipe); + d->opcode = opc; + + /* Decode each operand, sign extending, etc. as appropriate. */ + for (i = 0; i < opc->num_operands; i++) + { + const struct tilepro_operand *op = + &tilepro_operands[opc->operands[pipe][i]]; + int opval = op->extract (bits); + if (op->is_signed) + { + /* Sign-extend the operand. */ + int shift = (int)((sizeof(int) * 8) - op->num_bits); + opval = (opval << shift) >> shift; + } + + /* Adjust PC-relative scaled branch offsets. */ + if (op->type == TILEPRO_OP_TYPE_ADDRESS) + { + opval *= TILEPRO_BUNDLE_SIZE_IN_BYTES; + opval += (int)pc; + } + + /* Record the final value. */ + d->operands[i] = op; + d->operand_values[i] = opval; + } + } + + return num_instructions; +} diff --git a/arch/tile/kernel/tile-desc_64.c b/arch/tile/kernel/tile-desc_64.c new file mode 100644 index 00000000..65b5f8ac --- /dev/null +++ b/arch/tile/kernel/tile-desc_64.c @@ -0,0 +1,2218 @@ +/* TILE-Gx opcode information. + * + * Copyright 2011 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * + * + * + * + */ + +/* This define is BFD_RELOC_##x for real bfd, or -1 for everyone else. */ +#define BFD_RELOC(x) -1 + +/* Special registers. */ +#define TREG_LR 55 +#define TREG_SN 56 +#define TREG_ZERO 63 + +#include <linux/stddef.h> +#include <asm/tile-desc.h> + +const struct tilegx_opcode tilegx_opcodes[334] = +{ + { "bpt", TILEGX_OPC_BPT, 0x2, 0, TREG_ZERO, 0, + { { 0, }, { }, { 0, }, { 0, }, { 0, } }, + }, + { "info", TILEGX_OPC_INFO, 0xf, 1, TREG_ZERO, 1, + { { 0 }, { 1 }, { 2 }, { 3 }, { 0, } }, + }, + { "infol", TILEGX_OPC_INFOL, 0x3, 1, TREG_ZERO, 1, + { { 4 }, { 5 }, { 0, }, { 0, }, { 0, } }, + }, + { "move", TILEGX_OPC_MOVE, 0xf, 2, TREG_ZERO, 1, + { { 6, 7 }, { 8, 9 }, { 10, 11 }, { 12, 13 }, { 0, } }, + }, + { "movei", TILEGX_OPC_MOVEI, 0xf, 2, TREG_ZERO, 1, + { { 6, 0 }, { 8, 1 }, { 10, 2 }, { 12, 3 }, { 0, } }, + }, + { "moveli", TILEGX_OPC_MOVELI, 0x3, 2, TREG_ZERO, 1, + { { 6, 4 }, { 8, 5 }, { 0, }, { 0, }, { 0, } }, + }, + { "prefetch", TILEGX_OPC_PREFETCH, 0x12, 1, TREG_ZERO, 1, + { { 0, }, { 9 }, { 0, }, { 0, }, { 14 } }, + }, + { "prefetch_add_l1", TILEGX_OPC_PREFETCH_ADD_L1, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 15, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "prefetch_add_l1_fault", TILEGX_OPC_PREFETCH_ADD_L1_FAULT, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 15, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "prefetch_add_l2", TILEGX_OPC_PREFETCH_ADD_L2, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 15, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "prefetch_add_l2_fault", TILEGX_OPC_PREFETCH_ADD_L2_FAULT, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 15, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "prefetch_add_l3", TILEGX_OPC_PREFETCH_ADD_L3, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 15, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "prefetch_add_l3_fault", TILEGX_OPC_PREFETCH_ADD_L3_FAULT, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 15, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "prefetch_l1", TILEGX_OPC_PREFETCH_L1, 0x12, 1, TREG_ZERO, 1, + { { 0, }, { 9 }, { 0, }, { 0, }, { 14 } }, + }, + { "prefetch_l1_fault", TILEGX_OPC_PREFETCH_L1_FAULT, 0x12, 1, TREG_ZERO, 1, + { { 0, }, { 9 }, { 0, }, { 0, }, { 14 } }, + }, + { "prefetch_l2", TILEGX_OPC_PREFETCH_L2, 0x12, 1, TREG_ZERO, 1, + { { 0, }, { 9 }, { 0, }, { 0, }, { 14 } }, + }, + { "prefetch_l2_fault", TILEGX_OPC_PREFETCH_L2_FAULT, 0x12, 1, TREG_ZERO, 1, + { { 0, }, { 9 }, { 0, }, { 0, }, { 14 } }, + }, + { "prefetch_l3", TILEGX_OPC_PREFETCH_L3, 0x12, 1, TREG_ZERO, 1, + { { 0, }, { 9 }, { 0, }, { 0, }, { 14 } }, + }, + { "prefetch_l3_fault", TILEGX_OPC_PREFETCH_L3_FAULT, 0x12, 1, TREG_ZERO, 1, + { { 0, }, { 9 }, { 0, }, { 0, }, { 14 } }, + }, + { "raise", TILEGX_OPC_RAISE, 0x2, 0, TREG_ZERO, 1, + { { 0, }, { }, { 0, }, { 0, }, { 0, } }, + }, + { "add", TILEGX_OPC_ADD, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, + }, + { "addi", TILEGX_OPC_ADDI, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 0 }, { 8, 9, 1 }, { 10, 11, 2 }, { 12, 13, 3 }, { 0, } }, + }, + { "addli", TILEGX_OPC_ADDLI, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 4 }, { 8, 9, 5 }, { 0, }, { 0, }, { 0, } }, + }, + { "addx", TILEGX_OPC_ADDX, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, + }, + { "addxi", TILEGX_OPC_ADDXI, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 0 }, { 8, 9, 1 }, { 10, 11, 2 }, { 12, 13, 3 }, { 0, } }, + }, + { "addxli", TILEGX_OPC_ADDXLI, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 4 }, { 8, 9, 5 }, { 0, }, { 0, }, { 0, } }, + }, + { "addxsc", TILEGX_OPC_ADDXSC, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "and", TILEGX_OPC_AND, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, + }, + { "andi", TILEGX_OPC_ANDI, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 0 }, { 8, 9, 1 }, { 10, 11, 2 }, { 12, 13, 3 }, { 0, } }, + }, + { "beqz", TILEGX_OPC_BEQZ, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 9, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "beqzt", TILEGX_OPC_BEQZT, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 9, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "bfexts", TILEGX_OPC_BFEXTS, 0x1, 4, TREG_ZERO, 1, + { { 6, 7, 21, 22 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "bfextu", TILEGX_OPC_BFEXTU, 0x1, 4, TREG_ZERO, 1, + { { 6, 7, 21, 22 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "bfins", TILEGX_OPC_BFINS, 0x1, 4, TREG_ZERO, 1, + { { 23, 7, 21, 22 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "bgez", TILEGX_OPC_BGEZ, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 9, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "bgezt", TILEGX_OPC_BGEZT, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 9, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "bgtz", TILEGX_OPC_BGTZ, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 9, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "bgtzt", TILEGX_OPC_BGTZT, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 9, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "blbc", TILEGX_OPC_BLBC, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 9, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "blbct", TILEGX_OPC_BLBCT, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 9, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "blbs", TILEGX_OPC_BLBS, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 9, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "blbst", TILEGX_OPC_BLBST, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 9, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "blez", TILEGX_OPC_BLEZ, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 9, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "blezt", TILEGX_OPC_BLEZT, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 9, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "bltz", TILEGX_OPC_BLTZ, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 9, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "bltzt", TILEGX_OPC_BLTZT, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 9, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "bnez", TILEGX_OPC_BNEZ, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 9, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "bnezt", TILEGX_OPC_BNEZT, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 9, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "clz", TILEGX_OPC_CLZ, 0x5, 2, TREG_ZERO, 1, + { { 6, 7 }, { 0, }, { 10, 11 }, { 0, }, { 0, } }, + }, + { "cmoveqz", TILEGX_OPC_CMOVEQZ, 0x5, 3, TREG_ZERO, 1, + { { 23, 7, 16 }, { 0, }, { 24, 11, 18 }, { 0, }, { 0, } }, + }, + { "cmovnez", TILEGX_OPC_CMOVNEZ, 0x5, 3, TREG_ZERO, 1, + { { 23, 7, 16 }, { 0, }, { 24, 11, 18 }, { 0, }, { 0, } }, + }, + { "cmpeq", TILEGX_OPC_CMPEQ, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, + }, + { "cmpeqi", TILEGX_OPC_CMPEQI, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 0 }, { 8, 9, 1 }, { 10, 11, 2 }, { 12, 13, 3 }, { 0, } }, + }, + { "cmpexch", TILEGX_OPC_CMPEXCH, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "cmpexch4", TILEGX_OPC_CMPEXCH4, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "cmples", TILEGX_OPC_CMPLES, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, + }, + { "cmpleu", TILEGX_OPC_CMPLEU, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, + }, + { "cmplts", TILEGX_OPC_CMPLTS, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, + }, + { "cmpltsi", TILEGX_OPC_CMPLTSI, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 0 }, { 8, 9, 1 }, { 10, 11, 2 }, { 12, 13, 3 }, { 0, } }, + }, + { "cmpltu", TILEGX_OPC_CMPLTU, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, + }, + { "cmpltui", TILEGX_OPC_CMPLTUI, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 0 }, { 8, 9, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "cmpne", TILEGX_OPC_CMPNE, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, + }, + { "cmul", TILEGX_OPC_CMUL, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "cmula", TILEGX_OPC_CMULA, 0x1, 3, TREG_ZERO, 1, + { { 23, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "cmulaf", TILEGX_OPC_CMULAF, 0x1, 3, TREG_ZERO, 1, + { { 23, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "cmulf", TILEGX_OPC_CMULF, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "cmulfr", TILEGX_OPC_CMULFR, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "cmulh", TILEGX_OPC_CMULH, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "cmulhr", TILEGX_OPC_CMULHR, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "crc32_32", TILEGX_OPC_CRC32_32, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "crc32_8", TILEGX_OPC_CRC32_8, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "ctz", TILEGX_OPC_CTZ, 0x5, 2, TREG_ZERO, 1, + { { 6, 7 }, { 0, }, { 10, 11 }, { 0, }, { 0, } }, + }, + { "dblalign", TILEGX_OPC_DBLALIGN, 0x1, 3, TREG_ZERO, 1, + { { 23, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "dblalign2", TILEGX_OPC_DBLALIGN2, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "dblalign4", TILEGX_OPC_DBLALIGN4, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "dblalign6", TILEGX_OPC_DBLALIGN6, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "drain", TILEGX_OPC_DRAIN, 0x2, 0, TREG_ZERO, 0, + { { 0, }, { }, { 0, }, { 0, }, { 0, } }, + }, + { "dtlbpr", TILEGX_OPC_DTLBPR, 0x2, 1, TREG_ZERO, 1, + { { 0, }, { 9 }, { 0, }, { 0, }, { 0, } }, + }, + { "exch", TILEGX_OPC_EXCH, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "exch4", TILEGX_OPC_EXCH4, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "fdouble_add_flags", TILEGX_OPC_FDOUBLE_ADD_FLAGS, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "fdouble_addsub", TILEGX_OPC_FDOUBLE_ADDSUB, 0x1, 3, TREG_ZERO, 1, + { { 23, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "fdouble_mul_flags", TILEGX_OPC_FDOUBLE_MUL_FLAGS, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "fdouble_pack1", TILEGX_OPC_FDOUBLE_PACK1, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "fdouble_pack2", TILEGX_OPC_FDOUBLE_PACK2, 0x1, 3, TREG_ZERO, 1, + { { 23, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "fdouble_sub_flags", TILEGX_OPC_FDOUBLE_SUB_FLAGS, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "fdouble_unpack_max", TILEGX_OPC_FDOUBLE_UNPACK_MAX, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "fdouble_unpack_min", TILEGX_OPC_FDOUBLE_UNPACK_MIN, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "fetchadd", TILEGX_OPC_FETCHADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "fetchadd4", TILEGX_OPC_FETCHADD4, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "fetchaddgez", TILEGX_OPC_FETCHADDGEZ, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "fetchaddgez4", TILEGX_OPC_FETCHADDGEZ4, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "fetchand", TILEGX_OPC_FETCHAND, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "fetchand4", TILEGX_OPC_FETCHAND4, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "fetchor", TILEGX_OPC_FETCHOR, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "fetchor4", TILEGX_OPC_FETCHOR4, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "finv", TILEGX_OPC_FINV, 0x2, 1, TREG_ZERO, 1, + { { 0, }, { 9 }, { 0, }, { 0, }, { 0, } }, + }, + { "flush", TILEGX_OPC_FLUSH, 0x2, 1, TREG_ZERO, 1, + { { 0, }, { 9 }, { 0, }, { 0, }, { 0, } }, + }, + { "flushwb", TILEGX_OPC_FLUSHWB, 0x2, 0, TREG_ZERO, 1, + { { 0, }, { }, { 0, }, { 0, }, { 0, } }, + }, + { "fnop", TILEGX_OPC_FNOP, 0xf, 0, TREG_ZERO, 1, + { { }, { }, { }, { }, { 0, } }, + }, + { "fsingle_add1", TILEGX_OPC_FSINGLE_ADD1, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "fsingle_addsub2", TILEGX_OPC_FSINGLE_ADDSUB2, 0x1, 3, TREG_ZERO, 1, + { { 23, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "fsingle_mul1", TILEGX_OPC_FSINGLE_MUL1, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "fsingle_mul2", TILEGX_OPC_FSINGLE_MUL2, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "fsingle_pack1", TILEGX_OPC_FSINGLE_PACK1, 0x5, 2, TREG_ZERO, 1, + { { 6, 7 }, { 0, }, { 10, 11 }, { 0, }, { 0, } }, + }, + { "fsingle_pack2", TILEGX_OPC_FSINGLE_PACK2, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "fsingle_sub1", TILEGX_OPC_FSINGLE_SUB1, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "icoh", TILEGX_OPC_ICOH, 0x2, 1, TREG_ZERO, 1, + { { 0, }, { 9 }, { 0, }, { 0, }, { 0, } }, + }, + { "ill", TILEGX_OPC_ILL, 0xa, 0, TREG_ZERO, 1, + { { 0, }, { }, { 0, }, { }, { 0, } }, + }, + { "inv", TILEGX_OPC_INV, 0x2, 1, TREG_ZERO, 1, + { { 0, }, { 9 }, { 0, }, { 0, }, { 0, } }, + }, + { "iret", TILEGX_OPC_IRET, 0x2, 0, TREG_ZERO, 1, + { { 0, }, { }, { 0, }, { 0, }, { 0, } }, + }, + { "j", TILEGX_OPC_J, 0x2, 1, TREG_ZERO, 1, + { { 0, }, { 25 }, { 0, }, { 0, }, { 0, } }, + }, + { "jal", TILEGX_OPC_JAL, 0x2, 1, TREG_LR, 1, + { { 0, }, { 25 }, { 0, }, { 0, }, { 0, } }, + }, + { "jalr", TILEGX_OPC_JALR, 0xa, 1, TREG_LR, 1, + { { 0, }, { 9 }, { 0, }, { 13 }, { 0, } }, + }, + { "jalrp", TILEGX_OPC_JALRP, 0xa, 1, TREG_LR, 1, + { { 0, }, { 9 }, { 0, }, { 13 }, { 0, } }, + }, + { "jr", TILEGX_OPC_JR, 0xa, 1, TREG_ZERO, 1, + { { 0, }, { 9 }, { 0, }, { 13 }, { 0, } }, + }, + { "jrp", TILEGX_OPC_JRP, 0xa, 1, TREG_ZERO, 1, + { { 0, }, { 9 }, { 0, }, { 13 }, { 0, } }, + }, + { "ld", TILEGX_OPC_LD, 0x12, 2, TREG_ZERO, 1, + { { 0, }, { 8, 9 }, { 0, }, { 0, }, { 26, 14 } }, + }, + { "ld1s", TILEGX_OPC_LD1S, 0x12, 2, TREG_ZERO, 1, + { { 0, }, { 8, 9 }, { 0, }, { 0, }, { 26, 14 } }, + }, + { "ld1s_add", TILEGX_OPC_LD1S_ADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 8, 15, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "ld1u", TILEGX_OPC_LD1U, 0x12, 2, TREG_ZERO, 1, + { { 0, }, { 8, 9 }, { 0, }, { 0, }, { 26, 14 } }, + }, + { "ld1u_add", TILEGX_OPC_LD1U_ADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 8, 15, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "ld2s", TILEGX_OPC_LD2S, 0x12, 2, TREG_ZERO, 1, + { { 0, }, { 8, 9 }, { 0, }, { 0, }, { 26, 14 } }, + }, + { "ld2s_add", TILEGX_OPC_LD2S_ADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 8, 15, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "ld2u", TILEGX_OPC_LD2U, 0x12, 2, TREG_ZERO, 1, + { { 0, }, { 8, 9 }, { 0, }, { 0, }, { 26, 14 } }, + }, + { "ld2u_add", TILEGX_OPC_LD2U_ADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 8, 15, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "ld4s", TILEGX_OPC_LD4S, 0x12, 2, TREG_ZERO, 1, + { { 0, }, { 8, 9 }, { 0, }, { 0, }, { 26, 14 } }, + }, + { "ld4s_add", TILEGX_OPC_LD4S_ADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 8, 15, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "ld4u", TILEGX_OPC_LD4U, 0x12, 2, TREG_ZERO, 1, + { { 0, }, { 8, 9 }, { 0, }, { 0, }, { 26, 14 } }, + }, + { "ld4u_add", TILEGX_OPC_LD4U_ADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 8, 15, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "ld_add", TILEGX_OPC_LD_ADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 8, 15, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "ldna", TILEGX_OPC_LDNA, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 8, 9 }, { 0, }, { 0, }, { 0, } }, + }, + { "ldna_add", TILEGX_OPC_LDNA_ADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 8, 15, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "ldnt", TILEGX_OPC_LDNT, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 8, 9 }, { 0, }, { 0, }, { 0, } }, + }, + { "ldnt1s", TILEGX_OPC_LDNT1S, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 8, 9 }, { 0, }, { 0, }, { 0, } }, + }, + { "ldnt1s_add", TILEGX_OPC_LDNT1S_ADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 8, 15, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "ldnt1u", TILEGX_OPC_LDNT1U, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 8, 9 }, { 0, }, { 0, }, { 0, } }, + }, + { "ldnt1u_add", TILEGX_OPC_LDNT1U_ADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 8, 15, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "ldnt2s", TILEGX_OPC_LDNT2S, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 8, 9 }, { 0, }, { 0, }, { 0, } }, + }, + { "ldnt2s_add", TILEGX_OPC_LDNT2S_ADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 8, 15, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "ldnt2u", TILEGX_OPC_LDNT2U, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 8, 9 }, { 0, }, { 0, }, { 0, } }, + }, + { "ldnt2u_add", TILEGX_OPC_LDNT2U_ADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 8, 15, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "ldnt4s", TILEGX_OPC_LDNT4S, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 8, 9 }, { 0, }, { 0, }, { 0, } }, + }, + { "ldnt4s_add", TILEGX_OPC_LDNT4S_ADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 8, 15, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "ldnt4u", TILEGX_OPC_LDNT4U, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 8, 9 }, { 0, }, { 0, }, { 0, } }, + }, + { "ldnt4u_add", TILEGX_OPC_LDNT4U_ADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 8, 15, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "ldnt_add", TILEGX_OPC_LDNT_ADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 8, 15, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "lnk", TILEGX_OPC_LNK, 0xa, 1, TREG_ZERO, 1, + { { 0, }, { 8 }, { 0, }, { 12 }, { 0, } }, + }, + { "mf", TILEGX_OPC_MF, 0x2, 0, TREG_ZERO, 1, + { { 0, }, { }, { 0, }, { 0, }, { 0, } }, + }, + { "mfspr", TILEGX_OPC_MFSPR, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 8, 27 }, { 0, }, { 0, }, { 0, } }, + }, + { "mm", TILEGX_OPC_MM, 0x1, 4, TREG_ZERO, 1, + { { 23, 7, 21, 22 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mnz", TILEGX_OPC_MNZ, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, + }, + { "mtspr", TILEGX_OPC_MTSPR, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 28, 9 }, { 0, }, { 0, }, { 0, } }, + }, + { "mul_hs_hs", TILEGX_OPC_MUL_HS_HS, 0x5, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 10, 11, 18 }, { 0, }, { 0, } }, + }, + { "mul_hs_hu", TILEGX_OPC_MUL_HS_HU, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mul_hs_ls", TILEGX_OPC_MUL_HS_LS, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mul_hs_lu", TILEGX_OPC_MUL_HS_LU, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mul_hu_hu", TILEGX_OPC_MUL_HU_HU, 0x5, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 10, 11, 18 }, { 0, }, { 0, } }, + }, + { "mul_hu_ls", TILEGX_OPC_MUL_HU_LS, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mul_hu_lu", TILEGX_OPC_MUL_HU_LU, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mul_ls_ls", TILEGX_OPC_MUL_LS_LS, 0x5, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 10, 11, 18 }, { 0, }, { 0, } }, + }, + { "mul_ls_lu", TILEGX_OPC_MUL_LS_LU, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mul_lu_lu", TILEGX_OPC_MUL_LU_LU, 0x5, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 10, 11, 18 }, { 0, }, { 0, } }, + }, + { "mula_hs_hs", TILEGX_OPC_MULA_HS_HS, 0x5, 3, TREG_ZERO, 1, + { { 23, 7, 16 }, { 0, }, { 24, 11, 18 }, { 0, }, { 0, } }, + }, + { "mula_hs_hu", TILEGX_OPC_MULA_HS_HU, 0x1, 3, TREG_ZERO, 1, + { { 23, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mula_hs_ls", TILEGX_OPC_MULA_HS_LS, 0x1, 3, TREG_ZERO, 1, + { { 23, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mula_hs_lu", TILEGX_OPC_MULA_HS_LU, 0x1, 3, TREG_ZERO, 1, + { { 23, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mula_hu_hu", TILEGX_OPC_MULA_HU_HU, 0x5, 3, TREG_ZERO, 1, + { { 23, 7, 16 }, { 0, }, { 24, 11, 18 }, { 0, }, { 0, } }, + }, + { "mula_hu_ls", TILEGX_OPC_MULA_HU_LS, 0x1, 3, TREG_ZERO, 1, + { { 23, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mula_hu_lu", TILEGX_OPC_MULA_HU_LU, 0x1, 3, TREG_ZERO, 1, + { { 23, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mula_ls_ls", TILEGX_OPC_MULA_LS_LS, 0x5, 3, TREG_ZERO, 1, + { { 23, 7, 16 }, { 0, }, { 24, 11, 18 }, { 0, }, { 0, } }, + }, + { "mula_ls_lu", TILEGX_OPC_MULA_LS_LU, 0x1, 3, TREG_ZERO, 1, + { { 23, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mula_lu_lu", TILEGX_OPC_MULA_LU_LU, 0x5, 3, TREG_ZERO, 1, + { { 23, 7, 16 }, { 0, }, { 24, 11, 18 }, { 0, }, { 0, } }, + }, + { "mulax", TILEGX_OPC_MULAX, 0x5, 3, TREG_ZERO, 1, + { { 23, 7, 16 }, { 0, }, { 24, 11, 18 }, { 0, }, { 0, } }, + }, + { "mulx", TILEGX_OPC_MULX, 0x5, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 10, 11, 18 }, { 0, }, { 0, } }, + }, + { "mz", TILEGX_OPC_MZ, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, + }, + { "nap", TILEGX_OPC_NAP, 0x2, 0, TREG_ZERO, 0, + { { 0, }, { }, { 0, }, { 0, }, { 0, } }, + }, + { "nop", TILEGX_OPC_NOP, 0xf, 0, TREG_ZERO, 1, + { { }, { }, { }, { }, { 0, } }, + }, + { "nor", TILEGX_OPC_NOR, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, + }, + { "or", TILEGX_OPC_OR, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, + }, + { "ori", TILEGX_OPC_ORI, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 0 }, { 8, 9, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "pcnt", TILEGX_OPC_PCNT, 0x5, 2, TREG_ZERO, 1, + { { 6, 7 }, { 0, }, { 10, 11 }, { 0, }, { 0, } }, + }, + { "revbits", TILEGX_OPC_REVBITS, 0x5, 2, TREG_ZERO, 1, + { { 6, 7 }, { 0, }, { 10, 11 }, { 0, }, { 0, } }, + }, + { "revbytes", TILEGX_OPC_REVBYTES, 0x5, 2, TREG_ZERO, 1, + { { 6, 7 }, { 0, }, { 10, 11 }, { 0, }, { 0, } }, + }, + { "rotl", TILEGX_OPC_ROTL, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, + }, + { "rotli", TILEGX_OPC_ROTLI, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 29 }, { 8, 9, 30 }, { 10, 11, 31 }, { 12, 13, 32 }, { 0, } }, + }, + { "shl", TILEGX_OPC_SHL, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, + }, + { "shl16insli", TILEGX_OPC_SHL16INSLI, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 4 }, { 8, 9, 5 }, { 0, }, { 0, }, { 0, } }, + }, + { "shl1add", TILEGX_OPC_SHL1ADD, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, + }, + { "shl1addx", TILEGX_OPC_SHL1ADDX, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, + }, + { "shl2add", TILEGX_OPC_SHL2ADD, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, + }, + { "shl2addx", TILEGX_OPC_SHL2ADDX, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, + }, + { "shl3add", TILEGX_OPC_SHL3ADD, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, + }, + { "shl3addx", TILEGX_OPC_SHL3ADDX, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, + }, + { "shli", TILEGX_OPC_SHLI, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 29 }, { 8, 9, 30 }, { 10, 11, 31 }, { 12, 13, 32 }, { 0, } }, + }, + { "shlx", TILEGX_OPC_SHLX, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "shlxi", TILEGX_OPC_SHLXI, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 29 }, { 8, 9, 30 }, { 0, }, { 0, }, { 0, } }, + }, + { "shrs", TILEGX_OPC_SHRS, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, + }, + { "shrsi", TILEGX_OPC_SHRSI, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 29 }, { 8, 9, 30 }, { 10, 11, 31 }, { 12, 13, 32 }, { 0, } }, + }, + { "shru", TILEGX_OPC_SHRU, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, + }, + { "shrui", TILEGX_OPC_SHRUI, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 29 }, { 8, 9, 30 }, { 10, 11, 31 }, { 12, 13, 32 }, { 0, } }, + }, + { "shrux", TILEGX_OPC_SHRUX, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "shruxi", TILEGX_OPC_SHRUXI, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 29 }, { 8, 9, 30 }, { 0, }, { 0, }, { 0, } }, + }, + { "shufflebytes", TILEGX_OPC_SHUFFLEBYTES, 0x1, 3, TREG_ZERO, 1, + { { 23, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "st", TILEGX_OPC_ST, 0x12, 2, TREG_ZERO, 1, + { { 0, }, { 9, 17 }, { 0, }, { 0, }, { 14, 33 } }, + }, + { "st1", TILEGX_OPC_ST1, 0x12, 2, TREG_ZERO, 1, + { { 0, }, { 9, 17 }, { 0, }, { 0, }, { 14, 33 } }, + }, + { "st1_add", TILEGX_OPC_ST1_ADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 15, 17, 34 }, { 0, }, { 0, }, { 0, } }, + }, + { "st2", TILEGX_OPC_ST2, 0x12, 2, TREG_ZERO, 1, + { { 0, }, { 9, 17 }, { 0, }, { 0, }, { 14, 33 } }, + }, + { "st2_add", TILEGX_OPC_ST2_ADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 15, 17, 34 }, { 0, }, { 0, }, { 0, } }, + }, + { "st4", TILEGX_OPC_ST4, 0x12, 2, TREG_ZERO, 1, + { { 0, }, { 9, 17 }, { 0, }, { 0, }, { 14, 33 } }, + }, + { "st4_add", TILEGX_OPC_ST4_ADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 15, 17, 34 }, { 0, }, { 0, }, { 0, } }, + }, + { "st_add", TILEGX_OPC_ST_ADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 15, 17, 34 }, { 0, }, { 0, }, { 0, } }, + }, + { "stnt", TILEGX_OPC_STNT, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "stnt1", TILEGX_OPC_STNT1, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "stnt1_add", TILEGX_OPC_STNT1_ADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 15, 17, 34 }, { 0, }, { 0, }, { 0, } }, + }, + { "stnt2", TILEGX_OPC_STNT2, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "stnt2_add", TILEGX_OPC_STNT2_ADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 15, 17, 34 }, { 0, }, { 0, }, { 0, } }, + }, + { "stnt4", TILEGX_OPC_STNT4, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "stnt4_add", TILEGX_OPC_STNT4_ADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 15, 17, 34 }, { 0, }, { 0, }, { 0, } }, + }, + { "stnt_add", TILEGX_OPC_STNT_ADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 15, 17, 34 }, { 0, }, { 0, }, { 0, } }, + }, + { "sub", TILEGX_OPC_SUB, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, + }, + { "subx", TILEGX_OPC_SUBX, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, + }, + { "subxsc", TILEGX_OPC_SUBXSC, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "swint0", TILEGX_OPC_SWINT0, 0x2, 0, TREG_ZERO, 0, + { { 0, }, { }, { 0, }, { 0, }, { 0, } }, + }, + { "swint1", TILEGX_OPC_SWINT1, 0x2, 0, TREG_ZERO, 0, + { { 0, }, { }, { 0, }, { 0, }, { 0, } }, + }, + { "swint2", TILEGX_OPC_SWINT2, 0x2, 0, TREG_ZERO, 0, + { { 0, }, { }, { 0, }, { 0, }, { 0, } }, + }, + { "swint3", TILEGX_OPC_SWINT3, 0x2, 0, TREG_ZERO, 0, + { { 0, }, { }, { 0, }, { 0, }, { 0, } }, + }, + { "tblidxb0", TILEGX_OPC_TBLIDXB0, 0x5, 2, TREG_ZERO, 1, + { { 23, 7 }, { 0, }, { 24, 11 }, { 0, }, { 0, } }, + }, + { "tblidxb1", TILEGX_OPC_TBLIDXB1, 0x5, 2, TREG_ZERO, 1, + { { 23, 7 }, { 0, }, { 24, 11 }, { 0, }, { 0, } }, + }, + { "tblidxb2", TILEGX_OPC_TBLIDXB2, 0x5, 2, TREG_ZERO, 1, + { { 23, 7 }, { 0, }, { 24, 11 }, { 0, }, { 0, } }, + }, + { "tblidxb3", TILEGX_OPC_TBLIDXB3, 0x5, 2, TREG_ZERO, 1, + { { 23, 7 }, { 0, }, { 24, 11 }, { 0, }, { 0, } }, + }, + { "v1add", TILEGX_OPC_V1ADD, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v1addi", TILEGX_OPC_V1ADDI, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 0 }, { 8, 9, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "v1adduc", TILEGX_OPC_V1ADDUC, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v1adiffu", TILEGX_OPC_V1ADIFFU, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "v1avgu", TILEGX_OPC_V1AVGU, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "v1cmpeq", TILEGX_OPC_V1CMPEQ, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v1cmpeqi", TILEGX_OPC_V1CMPEQI, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 0 }, { 8, 9, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "v1cmples", TILEGX_OPC_V1CMPLES, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v1cmpleu", TILEGX_OPC_V1CMPLEU, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v1cmplts", TILEGX_OPC_V1CMPLTS, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v1cmpltsi", TILEGX_OPC_V1CMPLTSI, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 0 }, { 8, 9, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "v1cmpltu", TILEGX_OPC_V1CMPLTU, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v1cmpltui", TILEGX_OPC_V1CMPLTUI, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 0 }, { 8, 9, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "v1cmpne", TILEGX_OPC_V1CMPNE, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v1ddotpu", TILEGX_OPC_V1DDOTPU, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "v1ddotpua", TILEGX_OPC_V1DDOTPUA, 0x1, 3, TREG_ZERO, 1, + { { 23, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "v1ddotpus", TILEGX_OPC_V1DDOTPUS, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "v1ddotpusa", TILEGX_OPC_V1DDOTPUSA, 0x1, 3, TREG_ZERO, 1, + { { 23, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "v1dotp", TILEGX_OPC_V1DOTP, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "v1dotpa", TILEGX_OPC_V1DOTPA, 0x1, 3, TREG_ZERO, 1, + { { 23, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "v1dotpu", TILEGX_OPC_V1DOTPU, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "v1dotpua", TILEGX_OPC_V1DOTPUA, 0x1, 3, TREG_ZERO, 1, + { { 23, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "v1dotpus", TILEGX_OPC_V1DOTPUS, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "v1dotpusa", TILEGX_OPC_V1DOTPUSA, 0x1, 3, TREG_ZERO, 1, + { { 23, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "v1int_h", TILEGX_OPC_V1INT_H, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v1int_l", TILEGX_OPC_V1INT_L, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v1maxu", TILEGX_OPC_V1MAXU, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v1maxui", TILEGX_OPC_V1MAXUI, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 0 }, { 8, 9, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "v1minu", TILEGX_OPC_V1MINU, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v1minui", TILEGX_OPC_V1MINUI, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 0 }, { 8, 9, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "v1mnz", TILEGX_OPC_V1MNZ, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v1multu", TILEGX_OPC_V1MULTU, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "v1mulu", TILEGX_OPC_V1MULU, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "v1mulus", TILEGX_OPC_V1MULUS, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "v1mz", TILEGX_OPC_V1MZ, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v1sadau", TILEGX_OPC_V1SADAU, 0x1, 3, TREG_ZERO, 1, + { { 23, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "v1sadu", TILEGX_OPC_V1SADU, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "v1shl", TILEGX_OPC_V1SHL, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v1shli", TILEGX_OPC_V1SHLI, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 29 }, { 8, 9, 30 }, { 0, }, { 0, }, { 0, } }, + }, + { "v1shrs", TILEGX_OPC_V1SHRS, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v1shrsi", TILEGX_OPC_V1SHRSI, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 29 }, { 8, 9, 30 }, { 0, }, { 0, }, { 0, } }, + }, + { "v1shru", TILEGX_OPC_V1SHRU, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v1shrui", TILEGX_OPC_V1SHRUI, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 29 }, { 8, 9, 30 }, { 0, }, { 0, }, { 0, } }, + }, + { "v1sub", TILEGX_OPC_V1SUB, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v1subuc", TILEGX_OPC_V1SUBUC, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v2add", TILEGX_OPC_V2ADD, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v2addi", TILEGX_OPC_V2ADDI, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 0 }, { 8, 9, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "v2addsc", TILEGX_OPC_V2ADDSC, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v2adiffs", TILEGX_OPC_V2ADIFFS, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "v2avgs", TILEGX_OPC_V2AVGS, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "v2cmpeq", TILEGX_OPC_V2CMPEQ, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v2cmpeqi", TILEGX_OPC_V2CMPEQI, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 0 }, { 8, 9, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "v2cmples", TILEGX_OPC_V2CMPLES, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v2cmpleu", TILEGX_OPC_V2CMPLEU, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v2cmplts", TILEGX_OPC_V2CMPLTS, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v2cmpltsi", TILEGX_OPC_V2CMPLTSI, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 0 }, { 8, 9, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "v2cmpltu", TILEGX_OPC_V2CMPLTU, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v2cmpltui", TILEGX_OPC_V2CMPLTUI, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 0 }, { 8, 9, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "v2cmpne", TILEGX_OPC_V2CMPNE, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v2dotp", TILEGX_OPC_V2DOTP, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "v2dotpa", TILEGX_OPC_V2DOTPA, 0x1, 3, TREG_ZERO, 1, + { { 23, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "v2int_h", TILEGX_OPC_V2INT_H, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v2int_l", TILEGX_OPC_V2INT_L, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v2maxs", TILEGX_OPC_V2MAXS, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v2maxsi", TILEGX_OPC_V2MAXSI, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 0 }, { 8, 9, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "v2mins", TILEGX_OPC_V2MINS, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v2minsi", TILEGX_OPC_V2MINSI, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 0 }, { 8, 9, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "v2mnz", TILEGX_OPC_V2MNZ, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v2mulfsc", TILEGX_OPC_V2MULFSC, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "v2muls", TILEGX_OPC_V2MULS, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "v2mults", TILEGX_OPC_V2MULTS, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "v2mz", TILEGX_OPC_V2MZ, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v2packh", TILEGX_OPC_V2PACKH, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v2packl", TILEGX_OPC_V2PACKL, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v2packuc", TILEGX_OPC_V2PACKUC, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v2sadas", TILEGX_OPC_V2SADAS, 0x1, 3, TREG_ZERO, 1, + { { 23, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "v2sadau", TILEGX_OPC_V2SADAU, 0x1, 3, TREG_ZERO, 1, + { { 23, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "v2sads", TILEGX_OPC_V2SADS, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "v2sadu", TILEGX_OPC_V2SADU, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "v2shl", TILEGX_OPC_V2SHL, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v2shli", TILEGX_OPC_V2SHLI, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 29 }, { 8, 9, 30 }, { 0, }, { 0, }, { 0, } }, + }, + { "v2shlsc", TILEGX_OPC_V2SHLSC, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v2shrs", TILEGX_OPC_V2SHRS, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v2shrsi", TILEGX_OPC_V2SHRSI, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 29 }, { 8, 9, 30 }, { 0, }, { 0, }, { 0, } }, + }, + { "v2shru", TILEGX_OPC_V2SHRU, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v2shrui", TILEGX_OPC_V2SHRUI, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 29 }, { 8, 9, 30 }, { 0, }, { 0, }, { 0, } }, + }, + { "v2sub", TILEGX_OPC_V2SUB, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v2subsc", TILEGX_OPC_V2SUBSC, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v4add", TILEGX_OPC_V4ADD, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v4addsc", TILEGX_OPC_V4ADDSC, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v4int_h", TILEGX_OPC_V4INT_H, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v4int_l", TILEGX_OPC_V4INT_L, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v4packsc", TILEGX_OPC_V4PACKSC, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v4shl", TILEGX_OPC_V4SHL, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v4shlsc", TILEGX_OPC_V4SHLSC, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v4shrs", TILEGX_OPC_V4SHRS, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v4shru", TILEGX_OPC_V4SHRU, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v4sub", TILEGX_OPC_V4SUB, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v4subsc", TILEGX_OPC_V4SUBSC, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "wh64", TILEGX_OPC_WH64, 0x2, 1, TREG_ZERO, 1, + { { 0, }, { 9 }, { 0, }, { 0, }, { 0, } }, + }, + { "xor", TILEGX_OPC_XOR, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, + }, + { "xori", TILEGX_OPC_XORI, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 0 }, { 8, 9, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { NULL, TILEGX_OPC_NONE, 0, 0, TREG_ZERO, 0, { { 0, } }, + } +}; +#define BITFIELD(start, size) ((start) | (((1 << (size)) - 1) << 6)) +#define CHILD(array_index) (TILEGX_OPC_NONE + (array_index)) + +static const unsigned short decode_X0_fsm[936] = +{ + BITFIELD(22, 9) /* index 0 */, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), + CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), + CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), + CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), + CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), + CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), + CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), + CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), + CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), + CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), + CHILD(513), CHILD(513), CHILD(513), CHILD(513), TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_BFEXTS, + TILEGX_OPC_BFEXTS, TILEGX_OPC_BFEXTS, TILEGX_OPC_BFEXTS, TILEGX_OPC_BFEXTU, + TILEGX_OPC_BFEXTU, TILEGX_OPC_BFEXTU, TILEGX_OPC_BFEXTU, TILEGX_OPC_BFINS, + TILEGX_OPC_BFINS, TILEGX_OPC_BFINS, TILEGX_OPC_BFINS, TILEGX_OPC_MM, + TILEGX_OPC_MM, TILEGX_OPC_MM, TILEGX_OPC_MM, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, CHILD(528), CHILD(578), + CHILD(583), CHILD(588), CHILD(593), CHILD(598), TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, CHILD(603), CHILD(620), CHILD(637), CHILD(654), CHILD(671), + CHILD(703), CHILD(797), CHILD(814), CHILD(831), CHILD(848), CHILD(865), + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, CHILD(889), TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), + CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), + CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), + CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), + CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), + CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), + CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), + CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), + CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), + CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), + CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), + BITFIELD(6, 2) /* index 513 */, + TILEGX_OPC_ADDLI, TILEGX_OPC_ADDLI, TILEGX_OPC_ADDLI, CHILD(518), + BITFIELD(8, 2) /* index 518 */, + TILEGX_OPC_ADDLI, TILEGX_OPC_ADDLI, TILEGX_OPC_ADDLI, CHILD(523), + BITFIELD(10, 2) /* index 523 */, + TILEGX_OPC_ADDLI, TILEGX_OPC_ADDLI, TILEGX_OPC_ADDLI, TILEGX_OPC_MOVELI, + BITFIELD(20, 2) /* index 528 */, + TILEGX_OPC_NONE, CHILD(533), TILEGX_OPC_ADDXI, CHILD(548), + BITFIELD(6, 2) /* index 533 */, + TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, CHILD(538), + BITFIELD(8, 2) /* index 538 */, + TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, CHILD(543), + BITFIELD(10, 2) /* index 543 */, + TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_MOVEI, + BITFIELD(0, 2) /* index 548 */, + TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(553), + BITFIELD(2, 2) /* index 553 */, + TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(558), + BITFIELD(4, 2) /* index 558 */, + TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(563), + BITFIELD(6, 2) /* index 563 */, + TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(568), + BITFIELD(8, 2) /* index 568 */, + TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(573), + BITFIELD(10, 2) /* index 573 */, + TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_INFO, + BITFIELD(20, 2) /* index 578 */, + TILEGX_OPC_CMPEQI, TILEGX_OPC_CMPLTSI, TILEGX_OPC_CMPLTUI, TILEGX_OPC_ORI, + BITFIELD(20, 2) /* index 583 */, + TILEGX_OPC_V1ADDI, TILEGX_OPC_V1CMPEQI, TILEGX_OPC_V1CMPLTSI, + TILEGX_OPC_V1CMPLTUI, + BITFIELD(20, 2) /* index 588 */, + TILEGX_OPC_V1MAXUI, TILEGX_OPC_V1MINUI, TILEGX_OPC_V2ADDI, + TILEGX_OPC_V2CMPEQI, + BITFIELD(20, 2) /* index 593 */, + TILEGX_OPC_V2CMPLTSI, TILEGX_OPC_V2CMPLTUI, TILEGX_OPC_V2MAXSI, + TILEGX_OPC_V2MINSI, + BITFIELD(20, 2) /* index 598 */, + TILEGX_OPC_XORI, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + BITFIELD(18, 4) /* index 603 */, + TILEGX_OPC_NONE, TILEGX_OPC_ADDXSC, TILEGX_OPC_ADDX, TILEGX_OPC_ADD, + TILEGX_OPC_AND, TILEGX_OPC_CMOVEQZ, TILEGX_OPC_CMOVNEZ, TILEGX_OPC_CMPEQ, + TILEGX_OPC_CMPLES, TILEGX_OPC_CMPLEU, TILEGX_OPC_CMPLTS, TILEGX_OPC_CMPLTU, + TILEGX_OPC_CMPNE, TILEGX_OPC_CMULAF, TILEGX_OPC_CMULA, TILEGX_OPC_CMULFR, + BITFIELD(18, 4) /* index 620 */, + TILEGX_OPC_CMULF, TILEGX_OPC_CMULHR, TILEGX_OPC_CMULH, TILEGX_OPC_CMUL, + TILEGX_OPC_CRC32_32, TILEGX_OPC_CRC32_8, TILEGX_OPC_DBLALIGN2, + TILEGX_OPC_DBLALIGN4, TILEGX_OPC_DBLALIGN6, TILEGX_OPC_DBLALIGN, + TILEGX_OPC_FDOUBLE_ADDSUB, TILEGX_OPC_FDOUBLE_ADD_FLAGS, + TILEGX_OPC_FDOUBLE_MUL_FLAGS, TILEGX_OPC_FDOUBLE_PACK1, + TILEGX_OPC_FDOUBLE_PACK2, TILEGX_OPC_FDOUBLE_SUB_FLAGS, + BITFIELD(18, 4) /* index 637 */, + TILEGX_OPC_FDOUBLE_UNPACK_MAX, TILEGX_OPC_FDOUBLE_UNPACK_MIN, + TILEGX_OPC_FSINGLE_ADD1, TILEGX_OPC_FSINGLE_ADDSUB2, + TILEGX_OPC_FSINGLE_MUL1, TILEGX_OPC_FSINGLE_MUL2, TILEGX_OPC_FSINGLE_PACK2, + TILEGX_OPC_FSINGLE_SUB1, TILEGX_OPC_MNZ, TILEGX_OPC_MULAX, + TILEGX_OPC_MULA_HS_HS, TILEGX_OPC_MULA_HS_HU, TILEGX_OPC_MULA_HS_LS, + TILEGX_OPC_MULA_HS_LU, TILEGX_OPC_MULA_HU_HU, TILEGX_OPC_MULA_HU_LS, + BITFIELD(18, 4) /* index 654 */, + TILEGX_OPC_MULA_HU_LU, TILEGX_OPC_MULA_LS_LS, TILEGX_OPC_MULA_LS_LU, + TILEGX_OPC_MULA_LU_LU, TILEGX_OPC_MULX, TILEGX_OPC_MUL_HS_HS, + TILEGX_OPC_MUL_HS_HU, TILEGX_OPC_MUL_HS_LS, TILEGX_OPC_MUL_HS_LU, + TILEGX_OPC_MUL_HU_HU, TILEGX_OPC_MUL_HU_LS, TILEGX_OPC_MUL_HU_LU, + TILEGX_OPC_MUL_LS_LS, TILEGX_OPC_MUL_LS_LU, TILEGX_OPC_MUL_LU_LU, + TILEGX_OPC_MZ, + BITFIELD(18, 4) /* index 671 */, + TILEGX_OPC_NOR, CHILD(688), TILEGX_OPC_ROTL, TILEGX_OPC_SHL1ADDX, + TILEGX_OPC_SHL1ADD, TILEGX_OPC_SHL2ADDX, TILEGX_OPC_SHL2ADD, + TILEGX_OPC_SHL3ADDX, TILEGX_OPC_SHL3ADD, TILEGX_OPC_SHLX, TILEGX_OPC_SHL, + TILEGX_OPC_SHRS, TILEGX_OPC_SHRUX, TILEGX_OPC_SHRU, TILEGX_OPC_SHUFFLEBYTES, + TILEGX_OPC_SUBXSC, + BITFIELD(12, 2) /* index 688 */, + TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_OR, CHILD(693), + BITFIELD(14, 2) /* index 693 */, + TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_OR, CHILD(698), + BITFIELD(16, 2) /* index 698 */, + TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_MOVE, + BITFIELD(18, 4) /* index 703 */, + TILEGX_OPC_SUBX, TILEGX_OPC_SUB, CHILD(720), TILEGX_OPC_V1ADDUC, + TILEGX_OPC_V1ADD, TILEGX_OPC_V1ADIFFU, TILEGX_OPC_V1AVGU, + TILEGX_OPC_V1CMPEQ, TILEGX_OPC_V1CMPLES, TILEGX_OPC_V1CMPLEU, + TILEGX_OPC_V1CMPLTS, TILEGX_OPC_V1CMPLTU, TILEGX_OPC_V1CMPNE, + TILEGX_OPC_V1DDOTPUSA, TILEGX_OPC_V1DDOTPUS, TILEGX_OPC_V1DOTPA, + BITFIELD(12, 4) /* index 720 */, + TILEGX_OPC_NONE, CHILD(737), CHILD(742), CHILD(747), CHILD(752), CHILD(757), + CHILD(762), CHILD(767), CHILD(772), CHILD(777), CHILD(782), CHILD(787), + CHILD(792), TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + BITFIELD(16, 2) /* index 737 */, + TILEGX_OPC_CLZ, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + BITFIELD(16, 2) /* index 742 */, + TILEGX_OPC_CTZ, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + BITFIELD(16, 2) /* index 747 */, + TILEGX_OPC_FNOP, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + BITFIELD(16, 2) /* index 752 */, + TILEGX_OPC_FSINGLE_PACK1, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + BITFIELD(16, 2) /* index 757 */, + TILEGX_OPC_NOP, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + BITFIELD(16, 2) /* index 762 */, + TILEGX_OPC_PCNT, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + BITFIELD(16, 2) /* index 767 */, + TILEGX_OPC_REVBITS, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + BITFIELD(16, 2) /* index 772 */, + TILEGX_OPC_REVBYTES, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + BITFIELD(16, 2) /* index 777 */, + TILEGX_OPC_TBLIDXB0, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + BITFIELD(16, 2) /* index 782 */, + TILEGX_OPC_TBLIDXB1, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + BITFIELD(16, 2) /* index 787 */, + TILEGX_OPC_TBLIDXB2, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + BITFIELD(16, 2) /* index 792 */, + TILEGX_OPC_TBLIDXB3, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + BITFIELD(18, 4) /* index 797 */, + TILEGX_OPC_V1DOTPUSA, TILEGX_OPC_V1DOTPUS, TILEGX_OPC_V1DOTP, + TILEGX_OPC_V1INT_H, TILEGX_OPC_V1INT_L, TILEGX_OPC_V1MAXU, + TILEGX_OPC_V1MINU, TILEGX_OPC_V1MNZ, TILEGX_OPC_V1MULTU, TILEGX_OPC_V1MULUS, + TILEGX_OPC_V1MULU, TILEGX_OPC_V1MZ, TILEGX_OPC_V1SADAU, TILEGX_OPC_V1SADU, + TILEGX_OPC_V1SHL, TILEGX_OPC_V1SHRS, + BITFIELD(18, 4) /* index 814 */, + TILEGX_OPC_V1SHRU, TILEGX_OPC_V1SUBUC, TILEGX_OPC_V1SUB, TILEGX_OPC_V2ADDSC, + TILEGX_OPC_V2ADD, TILEGX_OPC_V2ADIFFS, TILEGX_OPC_V2AVGS, + TILEGX_OPC_V2CMPEQ, TILEGX_OPC_V2CMPLES, TILEGX_OPC_V2CMPLEU, + TILEGX_OPC_V2CMPLTS, TILEGX_OPC_V2CMPLTU, TILEGX_OPC_V2CMPNE, + TILEGX_OPC_V2DOTPA, TILEGX_OPC_V2DOTP, TILEGX_OPC_V2INT_H, + BITFIELD(18, 4) /* index 831 */, + TILEGX_OPC_V2INT_L, TILEGX_OPC_V2MAXS, TILEGX_OPC_V2MINS, TILEGX_OPC_V2MNZ, + TILEGX_OPC_V2MULFSC, TILEGX_OPC_V2MULS, TILEGX_OPC_V2MULTS, TILEGX_OPC_V2MZ, + TILEGX_OPC_V2PACKH, TILEGX_OPC_V2PACKL, TILEGX_OPC_V2PACKUC, + TILEGX_OPC_V2SADAS, TILEGX_OPC_V2SADAU, TILEGX_OPC_V2SADS, + TILEGX_OPC_V2SADU, TILEGX_OPC_V2SHLSC, + BITFIELD(18, 4) /* index 848 */, + TILEGX_OPC_V2SHL, TILEGX_OPC_V2SHRS, TILEGX_OPC_V2SHRU, TILEGX_OPC_V2SUBSC, + TILEGX_OPC_V2SUB, TILEGX_OPC_V4ADDSC, TILEGX_OPC_V4ADD, TILEGX_OPC_V4INT_H, + TILEGX_OPC_V4INT_L, TILEGX_OPC_V4PACKSC, TILEGX_OPC_V4SHLSC, + TILEGX_OPC_V4SHL, TILEGX_OPC_V4SHRS, TILEGX_OPC_V4SHRU, TILEGX_OPC_V4SUBSC, + TILEGX_OPC_V4SUB, + BITFIELD(18, 3) /* index 865 */, + CHILD(874), CHILD(877), CHILD(880), CHILD(883), CHILD(886), TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, + BITFIELD(21, 1) /* index 874 */, + TILEGX_OPC_XOR, TILEGX_OPC_NONE, + BITFIELD(21, 1) /* index 877 */, + TILEGX_OPC_V1DDOTPUA, TILEGX_OPC_NONE, + BITFIELD(21, 1) /* index 880 */, + TILEGX_OPC_V1DDOTPU, TILEGX_OPC_NONE, + BITFIELD(21, 1) /* index 883 */, + TILEGX_OPC_V1DOTPUA, TILEGX_OPC_NONE, + BITFIELD(21, 1) /* index 886 */, + TILEGX_OPC_V1DOTPU, TILEGX_OPC_NONE, + BITFIELD(18, 4) /* index 889 */, + TILEGX_OPC_NONE, TILEGX_OPC_ROTLI, TILEGX_OPC_SHLI, TILEGX_OPC_SHLXI, + TILEGX_OPC_SHRSI, TILEGX_OPC_SHRUI, TILEGX_OPC_SHRUXI, TILEGX_OPC_V1SHLI, + TILEGX_OPC_V1SHRSI, TILEGX_OPC_V1SHRUI, TILEGX_OPC_V2SHLI, + TILEGX_OPC_V2SHRSI, TILEGX_OPC_V2SHRUI, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, + BITFIELD(0, 2) /* index 906 */, + TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, + CHILD(911), + BITFIELD(2, 2) /* index 911 */, + TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, + CHILD(916), + BITFIELD(4, 2) /* index 916 */, + TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, + CHILD(921), + BITFIELD(6, 2) /* index 921 */, + TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, + CHILD(926), + BITFIELD(8, 2) /* index 926 */, + TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, + CHILD(931), + BITFIELD(10, 2) /* index 931 */, + TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, + TILEGX_OPC_INFOL, +}; + +static const unsigned short decode_X1_fsm[1206] = +{ + BITFIELD(53, 9) /* index 0 */, + CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), + CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), + CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), + CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), + CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), + CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), + CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), + CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), + CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), + CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), + CHILD(513), CHILD(513), CHILD(513), CHILD(513), TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_BEQZT, + TILEGX_OPC_BEQZT, TILEGX_OPC_BEQZ, TILEGX_OPC_BEQZ, TILEGX_OPC_BGEZT, + TILEGX_OPC_BGEZT, TILEGX_OPC_BGEZ, TILEGX_OPC_BGEZ, TILEGX_OPC_BGTZT, + TILEGX_OPC_BGTZT, TILEGX_OPC_BGTZ, TILEGX_OPC_BGTZ, TILEGX_OPC_BLBCT, + TILEGX_OPC_BLBCT, TILEGX_OPC_BLBC, TILEGX_OPC_BLBC, TILEGX_OPC_BLBST, + TILEGX_OPC_BLBST, TILEGX_OPC_BLBS, TILEGX_OPC_BLBS, TILEGX_OPC_BLEZT, + TILEGX_OPC_BLEZT, TILEGX_OPC_BLEZ, TILEGX_OPC_BLEZ, TILEGX_OPC_BLTZT, + TILEGX_OPC_BLTZT, TILEGX_OPC_BLTZ, TILEGX_OPC_BLTZ, TILEGX_OPC_BNEZT, + TILEGX_OPC_BNEZT, TILEGX_OPC_BNEZ, TILEGX_OPC_BNEZ, CHILD(528), CHILD(578), + CHILD(598), CHILD(663), CHILD(683), CHILD(688), CHILD(693), CHILD(698), + CHILD(703), CHILD(708), CHILD(713), CHILD(718), TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_JAL, + TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL, + TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL, + TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL, + TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL, + TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL, + TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL, + TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL, + TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_J, TILEGX_OPC_J, + TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, + TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, + TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, + TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, + TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, + TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, + CHILD(723), CHILD(740), CHILD(772), CHILD(789), CHILD(1108), CHILD(1125), + CHILD(1142), TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, CHILD(1159), TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, CHILD(1176), CHILD(1176), CHILD(1176), + CHILD(1176), CHILD(1176), CHILD(1176), CHILD(1176), CHILD(1176), + CHILD(1176), CHILD(1176), CHILD(1176), CHILD(1176), CHILD(1176), + CHILD(1176), CHILD(1176), CHILD(1176), CHILD(1176), CHILD(1176), + CHILD(1176), CHILD(1176), CHILD(1176), CHILD(1176), CHILD(1176), + CHILD(1176), CHILD(1176), CHILD(1176), CHILD(1176), CHILD(1176), + CHILD(1176), CHILD(1176), CHILD(1176), CHILD(1176), CHILD(1176), + CHILD(1176), CHILD(1176), CHILD(1176), CHILD(1176), CHILD(1176), + CHILD(1176), CHILD(1176), CHILD(1176), CHILD(1176), CHILD(1176), + CHILD(1176), CHILD(1176), CHILD(1176), CHILD(1176), CHILD(1176), + CHILD(1176), CHILD(1176), CHILD(1176), CHILD(1176), CHILD(1176), + CHILD(1176), CHILD(1176), CHILD(1176), CHILD(1176), CHILD(1176), + CHILD(1176), CHILD(1176), CHILD(1176), CHILD(1176), CHILD(1176), + CHILD(1176), + BITFIELD(37, 2) /* index 513 */, + TILEGX_OPC_ADDLI, TILEGX_OPC_ADDLI, TILEGX_OPC_ADDLI, CHILD(518), + BITFIELD(39, 2) /* index 518 */, + TILEGX_OPC_ADDLI, TILEGX_OPC_ADDLI, TILEGX_OPC_ADDLI, CHILD(523), + BITFIELD(41, 2) /* index 523 */, + TILEGX_OPC_ADDLI, TILEGX_OPC_ADDLI, TILEGX_OPC_ADDLI, TILEGX_OPC_MOVELI, + BITFIELD(51, 2) /* index 528 */, + TILEGX_OPC_NONE, CHILD(533), TILEGX_OPC_ADDXI, CHILD(548), + BITFIELD(37, 2) /* index 533 */, + TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, CHILD(538), + BITFIELD(39, 2) /* index 538 */, + TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, CHILD(543), + BITFIELD(41, 2) /* index 543 */, + TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_MOVEI, + BITFIELD(31, 2) /* index 548 */, + TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(553), + BITFIELD(33, 2) /* index 553 */, + TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(558), + BITFIELD(35, 2) /* index 558 */, + TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(563), + BITFIELD(37, 2) /* index 563 */, + TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(568), + BITFIELD(39, 2) /* index 568 */, + TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(573), + BITFIELD(41, 2) /* index 573 */, + TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_INFO, + BITFIELD(51, 2) /* index 578 */, + TILEGX_OPC_CMPEQI, TILEGX_OPC_CMPLTSI, TILEGX_OPC_CMPLTUI, CHILD(583), + BITFIELD(31, 2) /* index 583 */, + TILEGX_OPC_LD1S_ADD, TILEGX_OPC_LD1S_ADD, TILEGX_OPC_LD1S_ADD, CHILD(588), + BITFIELD(33, 2) /* index 588 */, + TILEGX_OPC_LD1S_ADD, TILEGX_OPC_LD1S_ADD, TILEGX_OPC_LD1S_ADD, CHILD(593), + BITFIELD(35, 2) /* index 593 */, + TILEGX_OPC_LD1S_ADD, TILEGX_OPC_LD1S_ADD, TILEGX_OPC_LD1S_ADD, + TILEGX_OPC_PREFETCH_ADD_L1_FAULT, + BITFIELD(51, 2) /* index 598 */, + CHILD(603), CHILD(618), CHILD(633), CHILD(648), + BITFIELD(31, 2) /* index 603 */, + TILEGX_OPC_LD1U_ADD, TILEGX_OPC_LD1U_ADD, TILEGX_OPC_LD1U_ADD, CHILD(608), + BITFIELD(33, 2) /* index 608 */, + TILEGX_OPC_LD1U_ADD, TILEGX_OPC_LD1U_ADD, TILEGX_OPC_LD1U_ADD, CHILD(613), + BITFIELD(35, 2) /* index 613 */, + TILEGX_OPC_LD1U_ADD, TILEGX_OPC_LD1U_ADD, TILEGX_OPC_LD1U_ADD, + TILEGX_OPC_PREFETCH_ADD_L1, + BITFIELD(31, 2) /* index 618 */, + TILEGX_OPC_LD2S_ADD, TILEGX_OPC_LD2S_ADD, TILEGX_OPC_LD2S_ADD, CHILD(623), + BITFIELD(33, 2) /* index 623 */, + TILEGX_OPC_LD2S_ADD, TILEGX_OPC_LD2S_ADD, TILEGX_OPC_LD2S_ADD, CHILD(628), + BITFIELD(35, 2) /* index 628 */, + TILEGX_OPC_LD2S_ADD, TILEGX_OPC_LD2S_ADD, TILEGX_OPC_LD2S_ADD, + TILEGX_OPC_PREFETCH_ADD_L2_FAULT, + BITFIELD(31, 2) /* index 633 */, + TILEGX_OPC_LD2U_ADD, TILEGX_OPC_LD2U_ADD, TILEGX_OPC_LD2U_ADD, CHILD(638), + BITFIELD(33, 2) /* index 638 */, + TILEGX_OPC_LD2U_ADD, TILEGX_OPC_LD2U_ADD, TILEGX_OPC_LD2U_ADD, CHILD(643), + BITFIELD(35, 2) /* index 643 */, + TILEGX_OPC_LD2U_ADD, TILEGX_OPC_LD2U_ADD, TILEGX_OPC_LD2U_ADD, + TILEGX_OPC_PREFETCH_ADD_L2, + BITFIELD(31, 2) /* index 648 */, + TILEGX_OPC_LD4S_ADD, TILEGX_OPC_LD4S_ADD, TILEGX_OPC_LD4S_ADD, CHILD(653), + BITFIELD(33, 2) /* index 653 */, + TILEGX_OPC_LD4S_ADD, TILEGX_OPC_LD4S_ADD, TILEGX_OPC_LD4S_ADD, CHILD(658), + BITFIELD(35, 2) /* index 658 */, + TILEGX_OPC_LD4S_ADD, TILEGX_OPC_LD4S_ADD, TILEGX_OPC_LD4S_ADD, + TILEGX_OPC_PREFETCH_ADD_L3_FAULT, + BITFIELD(51, 2) /* index 663 */, + CHILD(668), TILEGX_OPC_LDNT1S_ADD, TILEGX_OPC_LDNT1U_ADD, + TILEGX_OPC_LDNT2S_ADD, + BITFIELD(31, 2) /* index 668 */, + TILEGX_OPC_LD4U_ADD, TILEGX_OPC_LD4U_ADD, TILEGX_OPC_LD4U_ADD, CHILD(673), + BITFIELD(33, 2) /* index 673 */, + TILEGX_OPC_LD4U_ADD, TILEGX_OPC_LD4U_ADD, TILEGX_OPC_LD4U_ADD, CHILD(678), + BITFIELD(35, 2) /* index 678 */, + TILEGX_OPC_LD4U_ADD, TILEGX_OPC_LD4U_ADD, TILEGX_OPC_LD4U_ADD, + TILEGX_OPC_PREFETCH_ADD_L3, + BITFIELD(51, 2) /* index 683 */, + TILEGX_OPC_LDNT2U_ADD, TILEGX_OPC_LDNT4S_ADD, TILEGX_OPC_LDNT4U_ADD, + TILEGX_OPC_LDNT_ADD, + BITFIELD(51, 2) /* index 688 */, + TILEGX_OPC_LD_ADD, TILEGX_OPC_LDNA_ADD, TILEGX_OPC_MFSPR, TILEGX_OPC_MTSPR, + BITFIELD(51, 2) /* index 693 */, + TILEGX_OPC_ORI, TILEGX_OPC_ST1_ADD, TILEGX_OPC_ST2_ADD, TILEGX_OPC_ST4_ADD, + BITFIELD(51, 2) /* index 698 */, + TILEGX_OPC_STNT1_ADD, TILEGX_OPC_STNT2_ADD, TILEGX_OPC_STNT4_ADD, + TILEGX_OPC_STNT_ADD, + BITFIELD(51, 2) /* index 703 */, + TILEGX_OPC_ST_ADD, TILEGX_OPC_V1ADDI, TILEGX_OPC_V1CMPEQI, + TILEGX_OPC_V1CMPLTSI, + BITFIELD(51, 2) /* index 708 */, + TILEGX_OPC_V1CMPLTUI, TILEGX_OPC_V1MAXUI, TILEGX_OPC_V1MINUI, + TILEGX_OPC_V2ADDI, + BITFIELD(51, 2) /* index 713 */, + TILEGX_OPC_V2CMPEQI, TILEGX_OPC_V2CMPLTSI, TILEGX_OPC_V2CMPLTUI, + TILEGX_OPC_V2MAXSI, + BITFIELD(51, 2) /* index 718 */, + TILEGX_OPC_V2MINSI, TILEGX_OPC_XORI, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + BITFIELD(49, 4) /* index 723 */, + TILEGX_OPC_NONE, TILEGX_OPC_ADDXSC, TILEGX_OPC_ADDX, TILEGX_OPC_ADD, + TILEGX_OPC_AND, TILEGX_OPC_CMPEQ, TILEGX_OPC_CMPEXCH4, TILEGX_OPC_CMPEXCH, + TILEGX_OPC_CMPLES, TILEGX_OPC_CMPLEU, TILEGX_OPC_CMPLTS, TILEGX_OPC_CMPLTU, + TILEGX_OPC_CMPNE, TILEGX_OPC_DBLALIGN2, TILEGX_OPC_DBLALIGN4, + TILEGX_OPC_DBLALIGN6, + BITFIELD(49, 4) /* index 740 */, + TILEGX_OPC_EXCH4, TILEGX_OPC_EXCH, TILEGX_OPC_FETCHADD4, + TILEGX_OPC_FETCHADDGEZ4, TILEGX_OPC_FETCHADDGEZ, TILEGX_OPC_FETCHADD, + TILEGX_OPC_FETCHAND4, TILEGX_OPC_FETCHAND, TILEGX_OPC_FETCHOR4, + TILEGX_OPC_FETCHOR, TILEGX_OPC_MNZ, TILEGX_OPC_MZ, TILEGX_OPC_NOR, + CHILD(757), TILEGX_OPC_ROTL, TILEGX_OPC_SHL1ADDX, + BITFIELD(43, 2) /* index 757 */, + TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_OR, CHILD(762), + BITFIELD(45, 2) /* index 762 */, + TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_OR, CHILD(767), + BITFIELD(47, 2) /* index 767 */, + TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_MOVE, + BITFIELD(49, 4) /* index 772 */, + TILEGX_OPC_SHL1ADD, TILEGX_OPC_SHL2ADDX, TILEGX_OPC_SHL2ADD, + TILEGX_OPC_SHL3ADDX, TILEGX_OPC_SHL3ADD, TILEGX_OPC_SHLX, TILEGX_OPC_SHL, + TILEGX_OPC_SHRS, TILEGX_OPC_SHRUX, TILEGX_OPC_SHRU, TILEGX_OPC_ST1, + TILEGX_OPC_ST2, TILEGX_OPC_ST4, TILEGX_OPC_STNT1, TILEGX_OPC_STNT2, + TILEGX_OPC_STNT4, + BITFIELD(46, 7) /* index 789 */, + TILEGX_OPC_STNT, TILEGX_OPC_STNT, TILEGX_OPC_STNT, TILEGX_OPC_STNT, + TILEGX_OPC_STNT, TILEGX_OPC_STNT, TILEGX_OPC_STNT, TILEGX_OPC_STNT, + TILEGX_OPC_ST, TILEGX_OPC_ST, TILEGX_OPC_ST, TILEGX_OPC_ST, TILEGX_OPC_ST, + TILEGX_OPC_ST, TILEGX_OPC_ST, TILEGX_OPC_ST, TILEGX_OPC_SUBXSC, + TILEGX_OPC_SUBXSC, TILEGX_OPC_SUBXSC, TILEGX_OPC_SUBXSC, TILEGX_OPC_SUBXSC, + TILEGX_OPC_SUBXSC, TILEGX_OPC_SUBXSC, TILEGX_OPC_SUBXSC, TILEGX_OPC_SUBX, + TILEGX_OPC_SUBX, TILEGX_OPC_SUBX, TILEGX_OPC_SUBX, TILEGX_OPC_SUBX, + TILEGX_OPC_SUBX, TILEGX_OPC_SUBX, TILEGX_OPC_SUBX, TILEGX_OPC_SUB, + TILEGX_OPC_SUB, TILEGX_OPC_SUB, TILEGX_OPC_SUB, TILEGX_OPC_SUB, + TILEGX_OPC_SUB, TILEGX_OPC_SUB, TILEGX_OPC_SUB, CHILD(918), CHILD(927), + CHILD(1006), CHILD(1090), CHILD(1099), TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_V1ADDUC, TILEGX_OPC_V1ADDUC, TILEGX_OPC_V1ADDUC, + TILEGX_OPC_V1ADDUC, TILEGX_OPC_V1ADDUC, TILEGX_OPC_V1ADDUC, + TILEGX_OPC_V1ADDUC, TILEGX_OPC_V1ADDUC, TILEGX_OPC_V1ADD, TILEGX_OPC_V1ADD, + TILEGX_OPC_V1ADD, TILEGX_OPC_V1ADD, TILEGX_OPC_V1ADD, TILEGX_OPC_V1ADD, + TILEGX_OPC_V1ADD, TILEGX_OPC_V1ADD, TILEGX_OPC_V1CMPEQ, TILEGX_OPC_V1CMPEQ, + TILEGX_OPC_V1CMPEQ, TILEGX_OPC_V1CMPEQ, TILEGX_OPC_V1CMPEQ, + TILEGX_OPC_V1CMPEQ, TILEGX_OPC_V1CMPEQ, TILEGX_OPC_V1CMPEQ, + TILEGX_OPC_V1CMPLES, TILEGX_OPC_V1CMPLES, TILEGX_OPC_V1CMPLES, + TILEGX_OPC_V1CMPLES, TILEGX_OPC_V1CMPLES, TILEGX_OPC_V1CMPLES, + TILEGX_OPC_V1CMPLES, TILEGX_OPC_V1CMPLES, TILEGX_OPC_V1CMPLEU, + TILEGX_OPC_V1CMPLEU, TILEGX_OPC_V1CMPLEU, TILEGX_OPC_V1CMPLEU, + TILEGX_OPC_V1CMPLEU, TILEGX_OPC_V1CMPLEU, TILEGX_OPC_V1CMPLEU, + TILEGX_OPC_V1CMPLEU, TILEGX_OPC_V1CMPLTS, TILEGX_OPC_V1CMPLTS, + TILEGX_OPC_V1CMPLTS, TILEGX_OPC_V1CMPLTS, TILEGX_OPC_V1CMPLTS, + TILEGX_OPC_V1CMPLTS, TILEGX_OPC_V1CMPLTS, TILEGX_OPC_V1CMPLTS, + TILEGX_OPC_V1CMPLTU, TILEGX_OPC_V1CMPLTU, TILEGX_OPC_V1CMPLTU, + TILEGX_OPC_V1CMPLTU, TILEGX_OPC_V1CMPLTU, TILEGX_OPC_V1CMPLTU, + TILEGX_OPC_V1CMPLTU, TILEGX_OPC_V1CMPLTU, TILEGX_OPC_V1CMPNE, + TILEGX_OPC_V1CMPNE, TILEGX_OPC_V1CMPNE, TILEGX_OPC_V1CMPNE, + TILEGX_OPC_V1CMPNE, TILEGX_OPC_V1CMPNE, TILEGX_OPC_V1CMPNE, + TILEGX_OPC_V1CMPNE, TILEGX_OPC_V1INT_H, TILEGX_OPC_V1INT_H, + TILEGX_OPC_V1INT_H, TILEGX_OPC_V1INT_H, TILEGX_OPC_V1INT_H, + TILEGX_OPC_V1INT_H, TILEGX_OPC_V1INT_H, TILEGX_OPC_V1INT_H, + TILEGX_OPC_V1INT_L, TILEGX_OPC_V1INT_L, TILEGX_OPC_V1INT_L, + TILEGX_OPC_V1INT_L, TILEGX_OPC_V1INT_L, TILEGX_OPC_V1INT_L, + TILEGX_OPC_V1INT_L, TILEGX_OPC_V1INT_L, + BITFIELD(43, 3) /* index 918 */, + TILEGX_OPC_NONE, TILEGX_OPC_DRAIN, TILEGX_OPC_DTLBPR, TILEGX_OPC_FINV, + TILEGX_OPC_FLUSHWB, TILEGX_OPC_FLUSH, TILEGX_OPC_FNOP, TILEGX_OPC_ICOH, + BITFIELD(43, 3) /* index 927 */, + CHILD(936), TILEGX_OPC_INV, TILEGX_OPC_IRET, TILEGX_OPC_JALRP, + TILEGX_OPC_JALR, TILEGX_OPC_JRP, TILEGX_OPC_JR, CHILD(991), + BITFIELD(31, 2) /* index 936 */, + CHILD(941), CHILD(966), TILEGX_OPC_ILL, TILEGX_OPC_ILL, + BITFIELD(33, 2) /* index 941 */, + TILEGX_OPC_ILL, TILEGX_OPC_ILL, TILEGX_OPC_ILL, CHILD(946), + BITFIELD(35, 2) /* index 946 */, + TILEGX_OPC_ILL, CHILD(951), TILEGX_OPC_ILL, TILEGX_OPC_ILL, + BITFIELD(37, 2) /* index 951 */, + TILEGX_OPC_ILL, CHILD(956), TILEGX_OPC_ILL, TILEGX_OPC_ILL, + BITFIELD(39, 2) /* index 956 */, + TILEGX_OPC_ILL, CHILD(961), TILEGX_OPC_ILL, TILEGX_OPC_ILL, + BITFIELD(41, 2) /* index 961 */, + TILEGX_OPC_ILL, TILEGX_OPC_ILL, TILEGX_OPC_BPT, TILEGX_OPC_ILL, + BITFIELD(33, 2) /* index 966 */, + TILEGX_OPC_ILL, TILEGX_OPC_ILL, TILEGX_OPC_ILL, CHILD(971), + BITFIELD(35, 2) /* index 971 */, + TILEGX_OPC_ILL, CHILD(976), TILEGX_OPC_ILL, TILEGX_OPC_ILL, + BITFIELD(37, 2) /* index 976 */, + TILEGX_OPC_ILL, CHILD(981), TILEGX_OPC_ILL, TILEGX_OPC_ILL, + BITFIELD(39, 2) /* index 981 */, + TILEGX_OPC_ILL, CHILD(986), TILEGX_OPC_ILL, TILEGX_OPC_ILL, + BITFIELD(41, 2) /* index 986 */, + TILEGX_OPC_ILL, TILEGX_OPC_ILL, TILEGX_OPC_RAISE, TILEGX_OPC_ILL, + BITFIELD(31, 2) /* index 991 */, + TILEGX_OPC_LD1S, TILEGX_OPC_LD1S, TILEGX_OPC_LD1S, CHILD(996), + BITFIELD(33, 2) /* index 996 */, + TILEGX_OPC_LD1S, TILEGX_OPC_LD1S, TILEGX_OPC_LD1S, CHILD(1001), + BITFIELD(35, 2) /* index 1001 */, + TILEGX_OPC_LD1S, TILEGX_OPC_LD1S, TILEGX_OPC_LD1S, + TILEGX_OPC_PREFETCH_L1_FAULT, + BITFIELD(43, 3) /* index 1006 */, + CHILD(1015), CHILD(1030), CHILD(1045), CHILD(1060), CHILD(1075), + TILEGX_OPC_LDNA, TILEGX_OPC_LDNT1S, TILEGX_OPC_LDNT1U, + BITFIELD(31, 2) /* index 1015 */, + TILEGX_OPC_LD1U, TILEGX_OPC_LD1U, TILEGX_OPC_LD1U, CHILD(1020), + BITFIELD(33, 2) /* index 1020 */, + TILEGX_OPC_LD1U, TILEGX_OPC_LD1U, TILEGX_OPC_LD1U, CHILD(1025), + BITFIELD(35, 2) /* index 1025 */, + TILEGX_OPC_LD1U, TILEGX_OPC_LD1U, TILEGX_OPC_LD1U, TILEGX_OPC_PREFETCH, + BITFIELD(31, 2) /* index 1030 */, + TILEGX_OPC_LD2S, TILEGX_OPC_LD2S, TILEGX_OPC_LD2S, CHILD(1035), + BITFIELD(33, 2) /* index 1035 */, + TILEGX_OPC_LD2S, TILEGX_OPC_LD2S, TILEGX_OPC_LD2S, CHILD(1040), + BITFIELD(35, 2) /* index 1040 */, + TILEGX_OPC_LD2S, TILEGX_OPC_LD2S, TILEGX_OPC_LD2S, + TILEGX_OPC_PREFETCH_L2_FAULT, + BITFIELD(31, 2) /* index 1045 */, + TILEGX_OPC_LD2U, TILEGX_OPC_LD2U, TILEGX_OPC_LD2U, CHILD(1050), + BITFIELD(33, 2) /* index 1050 */, + TILEGX_OPC_LD2U, TILEGX_OPC_LD2U, TILEGX_OPC_LD2U, CHILD(1055), + BITFIELD(35, 2) /* index 1055 */, + TILEGX_OPC_LD2U, TILEGX_OPC_LD2U, TILEGX_OPC_LD2U, TILEGX_OPC_PREFETCH_L2, + BITFIELD(31, 2) /* index 1060 */, + TILEGX_OPC_LD4S, TILEGX_OPC_LD4S, TILEGX_OPC_LD4S, CHILD(1065), + BITFIELD(33, 2) /* index 1065 */, + TILEGX_OPC_LD4S, TILEGX_OPC_LD4S, TILEGX_OPC_LD4S, CHILD(1070), + BITFIELD(35, 2) /* index 1070 */, + TILEGX_OPC_LD4S, TILEGX_OPC_LD4S, TILEGX_OPC_LD4S, + TILEGX_OPC_PREFETCH_L3_FAULT, + BITFIELD(31, 2) /* index 1075 */, + TILEGX_OPC_LD4U, TILEGX_OPC_LD4U, TILEGX_OPC_LD4U, CHILD(1080), + BITFIELD(33, 2) /* index 1080 */, + TILEGX_OPC_LD4U, TILEGX_OPC_LD4U, TILEGX_OPC_LD4U, CHILD(1085), + BITFIELD(35, 2) /* index 1085 */, + TILEGX_OPC_LD4U, TILEGX_OPC_LD4U, TILEGX_OPC_LD4U, TILEGX_OPC_PREFETCH_L3, + BITFIELD(43, 3) /* index 1090 */, + TILEGX_OPC_LDNT2S, TILEGX_OPC_LDNT2U, TILEGX_OPC_LDNT4S, TILEGX_OPC_LDNT4U, + TILEGX_OPC_LDNT, TILEGX_OPC_LD, TILEGX_OPC_LNK, TILEGX_OPC_MF, + BITFIELD(43, 3) /* index 1099 */, + TILEGX_OPC_NAP, TILEGX_OPC_NOP, TILEGX_OPC_SWINT0, TILEGX_OPC_SWINT1, + TILEGX_OPC_SWINT2, TILEGX_OPC_SWINT3, TILEGX_OPC_WH64, TILEGX_OPC_NONE, + BITFIELD(49, 4) /* index 1108 */, + TILEGX_OPC_V1MAXU, TILEGX_OPC_V1MINU, TILEGX_OPC_V1MNZ, TILEGX_OPC_V1MZ, + TILEGX_OPC_V1SHL, TILEGX_OPC_V1SHRS, TILEGX_OPC_V1SHRU, TILEGX_OPC_V1SUBUC, + TILEGX_OPC_V1SUB, TILEGX_OPC_V2ADDSC, TILEGX_OPC_V2ADD, TILEGX_OPC_V2CMPEQ, + TILEGX_OPC_V2CMPLES, TILEGX_OPC_V2CMPLEU, TILEGX_OPC_V2CMPLTS, + TILEGX_OPC_V2CMPLTU, + BITFIELD(49, 4) /* index 1125 */, + TILEGX_OPC_V2CMPNE, TILEGX_OPC_V2INT_H, TILEGX_OPC_V2INT_L, + TILEGX_OPC_V2MAXS, TILEGX_OPC_V2MINS, TILEGX_OPC_V2MNZ, TILEGX_OPC_V2MZ, + TILEGX_OPC_V2PACKH, TILEGX_OPC_V2PACKL, TILEGX_OPC_V2PACKUC, + TILEGX_OPC_V2SHLSC, TILEGX_OPC_V2SHL, TILEGX_OPC_V2SHRS, TILEGX_OPC_V2SHRU, + TILEGX_OPC_V2SUBSC, TILEGX_OPC_V2SUB, + BITFIELD(49, 4) /* index 1142 */, + TILEGX_OPC_V4ADDSC, TILEGX_OPC_V4ADD, TILEGX_OPC_V4INT_H, + TILEGX_OPC_V4INT_L, TILEGX_OPC_V4PACKSC, TILEGX_OPC_V4SHLSC, + TILEGX_OPC_V4SHL, TILEGX_OPC_V4SHRS, TILEGX_OPC_V4SHRU, TILEGX_OPC_V4SUBSC, + TILEGX_OPC_V4SUB, TILEGX_OPC_XOR, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, + BITFIELD(49, 4) /* index 1159 */, + TILEGX_OPC_NONE, TILEGX_OPC_ROTLI, TILEGX_OPC_SHLI, TILEGX_OPC_SHLXI, + TILEGX_OPC_SHRSI, TILEGX_OPC_SHRUI, TILEGX_OPC_SHRUXI, TILEGX_OPC_V1SHLI, + TILEGX_OPC_V1SHRSI, TILEGX_OPC_V1SHRUI, TILEGX_OPC_V2SHLI, + TILEGX_OPC_V2SHRSI, TILEGX_OPC_V2SHRUI, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, + BITFIELD(31, 2) /* index 1176 */, + TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, + CHILD(1181), + BITFIELD(33, 2) /* index 1181 */, + TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, + CHILD(1186), + BITFIELD(35, 2) /* index 1186 */, + TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, + CHILD(1191), + BITFIELD(37, 2) /* index 1191 */, + TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, + CHILD(1196), + BITFIELD(39, 2) /* index 1196 */, + TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, + CHILD(1201), + BITFIELD(41, 2) /* index 1201 */, + TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, + TILEGX_OPC_INFOL, +}; + +static const unsigned short decode_Y0_fsm[178] = +{ + BITFIELD(27, 4) /* index 0 */, + CHILD(17), TILEGX_OPC_ADDXI, CHILD(32), TILEGX_OPC_CMPEQI, + TILEGX_OPC_CMPLTSI, CHILD(62), CHILD(67), CHILD(118), CHILD(123), + CHILD(128), CHILD(133), CHILD(153), CHILD(158), CHILD(163), CHILD(168), + CHILD(173), + BITFIELD(6, 2) /* index 17 */, + TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, CHILD(22), + BITFIELD(8, 2) /* index 22 */, + TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, CHILD(27), + BITFIELD(10, 2) /* index 27 */, + TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_MOVEI, + BITFIELD(0, 2) /* index 32 */, + TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(37), + BITFIELD(2, 2) /* index 37 */, + TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(42), + BITFIELD(4, 2) /* index 42 */, + TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(47), + BITFIELD(6, 2) /* index 47 */, + TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(52), + BITFIELD(8, 2) /* index 52 */, + TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(57), + BITFIELD(10, 2) /* index 57 */, + TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_INFO, + BITFIELD(18, 2) /* index 62 */, + TILEGX_OPC_ADDX, TILEGX_OPC_ADD, TILEGX_OPC_SUBX, TILEGX_OPC_SUB, + BITFIELD(15, 5) /* index 67 */, + TILEGX_OPC_SHL1ADD, TILEGX_OPC_SHL1ADD, TILEGX_OPC_SHL1ADD, + TILEGX_OPC_SHL1ADD, TILEGX_OPC_SHL1ADD, TILEGX_OPC_SHL1ADD, + TILEGX_OPC_SHL1ADD, TILEGX_OPC_SHL1ADD, TILEGX_OPC_SHL2ADD, + TILEGX_OPC_SHL2ADD, TILEGX_OPC_SHL2ADD, TILEGX_OPC_SHL2ADD, + TILEGX_OPC_SHL2ADD, TILEGX_OPC_SHL2ADD, TILEGX_OPC_SHL2ADD, + TILEGX_OPC_SHL2ADD, TILEGX_OPC_SHL3ADD, TILEGX_OPC_SHL3ADD, + TILEGX_OPC_SHL3ADD, TILEGX_OPC_SHL3ADD, TILEGX_OPC_SHL3ADD, + TILEGX_OPC_SHL3ADD, TILEGX_OPC_SHL3ADD, TILEGX_OPC_SHL3ADD, CHILD(100), + CHILD(109), TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + BITFIELD(12, 3) /* index 100 */, + TILEGX_OPC_NONE, TILEGX_OPC_CLZ, TILEGX_OPC_CTZ, TILEGX_OPC_FNOP, + TILEGX_OPC_FSINGLE_PACK1, TILEGX_OPC_NOP, TILEGX_OPC_PCNT, + TILEGX_OPC_REVBITS, + BITFIELD(12, 3) /* index 109 */, + TILEGX_OPC_REVBYTES, TILEGX_OPC_TBLIDXB0, TILEGX_OPC_TBLIDXB1, + TILEGX_OPC_TBLIDXB2, TILEGX_OPC_TBLIDXB3, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, + BITFIELD(18, 2) /* index 118 */, + TILEGX_OPC_CMPLES, TILEGX_OPC_CMPLEU, TILEGX_OPC_CMPLTS, TILEGX_OPC_CMPLTU, + BITFIELD(18, 2) /* index 123 */, + TILEGX_OPC_CMPEQ, TILEGX_OPC_CMPNE, TILEGX_OPC_MULAX, TILEGX_OPC_MULX, + BITFIELD(18, 2) /* index 128 */, + TILEGX_OPC_CMOVEQZ, TILEGX_OPC_CMOVNEZ, TILEGX_OPC_MNZ, TILEGX_OPC_MZ, + BITFIELD(18, 2) /* index 133 */, + TILEGX_OPC_AND, TILEGX_OPC_NOR, CHILD(138), TILEGX_OPC_XOR, + BITFIELD(12, 2) /* index 138 */, + TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_OR, CHILD(143), + BITFIELD(14, 2) /* index 143 */, + TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_OR, CHILD(148), + BITFIELD(16, 2) /* index 148 */, + TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_MOVE, + BITFIELD(18, 2) /* index 153 */, + TILEGX_OPC_ROTL, TILEGX_OPC_SHL, TILEGX_OPC_SHRS, TILEGX_OPC_SHRU, + BITFIELD(18, 2) /* index 158 */, + TILEGX_OPC_NONE, TILEGX_OPC_SHL1ADDX, TILEGX_OPC_SHL2ADDX, + TILEGX_OPC_SHL3ADDX, + BITFIELD(18, 2) /* index 163 */, + TILEGX_OPC_MUL_HS_HS, TILEGX_OPC_MUL_HU_HU, TILEGX_OPC_MUL_LS_LS, + TILEGX_OPC_MUL_LU_LU, + BITFIELD(18, 2) /* index 168 */, + TILEGX_OPC_MULA_HS_HS, TILEGX_OPC_MULA_HU_HU, TILEGX_OPC_MULA_LS_LS, + TILEGX_OPC_MULA_LU_LU, + BITFIELD(18, 2) /* index 173 */, + TILEGX_OPC_ROTLI, TILEGX_OPC_SHLI, TILEGX_OPC_SHRSI, TILEGX_OPC_SHRUI, +}; + +static const unsigned short decode_Y1_fsm[167] = +{ + BITFIELD(58, 4) /* index 0 */, + TILEGX_OPC_NONE, CHILD(17), TILEGX_OPC_ADDXI, CHILD(32), TILEGX_OPC_CMPEQI, + TILEGX_OPC_CMPLTSI, CHILD(62), CHILD(67), CHILD(117), CHILD(122), + CHILD(127), CHILD(132), CHILD(152), CHILD(157), CHILD(162), TILEGX_OPC_NONE, + BITFIELD(37, 2) /* index 17 */, + TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, CHILD(22), + BITFIELD(39, 2) /* index 22 */, + TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, CHILD(27), + BITFIELD(41, 2) /* index 27 */, + TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_MOVEI, + BITFIELD(31, 2) /* index 32 */, + TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(37), + BITFIELD(33, 2) /* index 37 */, + TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(42), + BITFIELD(35, 2) /* index 42 */, + TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(47), + BITFIELD(37, 2) /* index 47 */, + TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(52), + BITFIELD(39, 2) /* index 52 */, + TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(57), + BITFIELD(41, 2) /* index 57 */, + TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_INFO, + BITFIELD(49, 2) /* index 62 */, + TILEGX_OPC_ADDX, TILEGX_OPC_ADD, TILEGX_OPC_SUBX, TILEGX_OPC_SUB, + BITFIELD(47, 4) /* index 67 */, + TILEGX_OPC_SHL1ADD, TILEGX_OPC_SHL1ADD, TILEGX_OPC_SHL1ADD, + TILEGX_OPC_SHL1ADD, TILEGX_OPC_SHL2ADD, TILEGX_OPC_SHL2ADD, + TILEGX_OPC_SHL2ADD, TILEGX_OPC_SHL2ADD, TILEGX_OPC_SHL3ADD, + TILEGX_OPC_SHL3ADD, TILEGX_OPC_SHL3ADD, TILEGX_OPC_SHL3ADD, CHILD(84), + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + BITFIELD(43, 3) /* index 84 */, + CHILD(93), CHILD(96), CHILD(99), CHILD(102), CHILD(105), CHILD(108), + CHILD(111), CHILD(114), + BITFIELD(46, 1) /* index 93 */, + TILEGX_OPC_NONE, TILEGX_OPC_FNOP, + BITFIELD(46, 1) /* index 96 */, + TILEGX_OPC_NONE, TILEGX_OPC_ILL, + BITFIELD(46, 1) /* index 99 */, + TILEGX_OPC_NONE, TILEGX_OPC_JALRP, + BITFIELD(46, 1) /* index 102 */, + TILEGX_OPC_NONE, TILEGX_OPC_JALR, + BITFIELD(46, 1) /* index 105 */, + TILEGX_OPC_NONE, TILEGX_OPC_JRP, + BITFIELD(46, 1) /* index 108 */, + TILEGX_OPC_NONE, TILEGX_OPC_JR, + BITFIELD(46, 1) /* index 111 */, + TILEGX_OPC_NONE, TILEGX_OPC_LNK, + BITFIELD(46, 1) /* index 114 */, + TILEGX_OPC_NONE, TILEGX_OPC_NOP, + BITFIELD(49, 2) /* index 117 */, + TILEGX_OPC_CMPLES, TILEGX_OPC_CMPLEU, TILEGX_OPC_CMPLTS, TILEGX_OPC_CMPLTU, + BITFIELD(49, 2) /* index 122 */, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_CMPEQ, TILEGX_OPC_CMPNE, + BITFIELD(49, 2) /* index 127 */, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_MNZ, TILEGX_OPC_MZ, + BITFIELD(49, 2) /* index 132 */, + TILEGX_OPC_AND, TILEGX_OPC_NOR, CHILD(137), TILEGX_OPC_XOR, + BITFIELD(43, 2) /* index 137 */, + TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_OR, CHILD(142), + BITFIELD(45, 2) /* index 142 */, + TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_OR, CHILD(147), + BITFIELD(47, 2) /* index 147 */, + TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_MOVE, + BITFIELD(49, 2) /* index 152 */, + TILEGX_OPC_ROTL, TILEGX_OPC_SHL, TILEGX_OPC_SHRS, TILEGX_OPC_SHRU, + BITFIELD(49, 2) /* index 157 */, + TILEGX_OPC_NONE, TILEGX_OPC_SHL1ADDX, TILEGX_OPC_SHL2ADDX, + TILEGX_OPC_SHL3ADDX, + BITFIELD(49, 2) /* index 162 */, + TILEGX_OPC_ROTLI, TILEGX_OPC_SHLI, TILEGX_OPC_SHRSI, TILEGX_OPC_SHRUI, +}; + +static const unsigned short decode_Y2_fsm[118] = +{ + BITFIELD(62, 2) /* index 0 */, + TILEGX_OPC_NONE, CHILD(5), CHILD(66), CHILD(109), + BITFIELD(55, 3) /* index 5 */, + CHILD(14), CHILD(14), CHILD(14), CHILD(17), CHILD(40), CHILD(40), CHILD(40), + CHILD(43), + BITFIELD(26, 1) /* index 14 */, + TILEGX_OPC_LD1S, TILEGX_OPC_LD1U, + BITFIELD(26, 1) /* index 17 */, + CHILD(20), CHILD(30), + BITFIELD(51, 2) /* index 20 */, + TILEGX_OPC_LD1S, TILEGX_OPC_LD1S, TILEGX_OPC_LD1S, CHILD(25), + BITFIELD(53, 2) /* index 25 */, + TILEGX_OPC_LD1S, TILEGX_OPC_LD1S, TILEGX_OPC_LD1S, + TILEGX_OPC_PREFETCH_L1_FAULT, + BITFIELD(51, 2) /* index 30 */, + TILEGX_OPC_LD1U, TILEGX_OPC_LD1U, TILEGX_OPC_LD1U, CHILD(35), + BITFIELD(53, 2) /* index 35 */, + TILEGX_OPC_LD1U, TILEGX_OPC_LD1U, TILEGX_OPC_LD1U, TILEGX_OPC_PREFETCH, + BITFIELD(26, 1) /* index 40 */, + TILEGX_OPC_LD2S, TILEGX_OPC_LD2U, + BITFIELD(26, 1) /* index 43 */, + CHILD(46), CHILD(56), + BITFIELD(51, 2) /* index 46 */, + TILEGX_OPC_LD2S, TILEGX_OPC_LD2S, TILEGX_OPC_LD2S, CHILD(51), + BITFIELD(53, 2) /* index 51 */, + TILEGX_OPC_LD2S, TILEGX_OPC_LD2S, TILEGX_OPC_LD2S, + TILEGX_OPC_PREFETCH_L2_FAULT, + BITFIELD(51, 2) /* index 56 */, + TILEGX_OPC_LD2U, TILEGX_OPC_LD2U, TILEGX_OPC_LD2U, CHILD(61), + BITFIELD(53, 2) /* index 61 */, + TILEGX_OPC_LD2U, TILEGX_OPC_LD2U, TILEGX_OPC_LD2U, TILEGX_OPC_PREFETCH_L2, + BITFIELD(56, 2) /* index 66 */, + CHILD(71), CHILD(74), CHILD(90), CHILD(93), + BITFIELD(26, 1) /* index 71 */, + TILEGX_OPC_NONE, TILEGX_OPC_LD4S, + BITFIELD(26, 1) /* index 74 */, + TILEGX_OPC_NONE, CHILD(77), + BITFIELD(51, 2) /* index 77 */, + TILEGX_OPC_LD4S, TILEGX_OPC_LD4S, TILEGX_OPC_LD4S, CHILD(82), + BITFIELD(53, 2) /* index 82 */, + TILEGX_OPC_LD4S, TILEGX_OPC_LD4S, TILEGX_OPC_LD4S, CHILD(87), + BITFIELD(55, 1) /* index 87 */, + TILEGX_OPC_LD4S, TILEGX_OPC_PREFETCH_L3_FAULT, + BITFIELD(26, 1) /* index 90 */, + TILEGX_OPC_LD4U, TILEGX_OPC_LD, + BITFIELD(26, 1) /* index 93 */, + CHILD(96), TILEGX_OPC_LD, + BITFIELD(51, 2) /* index 96 */, + TILEGX_OPC_LD4U, TILEGX_OPC_LD4U, TILEGX_OPC_LD4U, CHILD(101), + BITFIELD(53, 2) /* index 101 */, + TILEGX_OPC_LD4U, TILEGX_OPC_LD4U, TILEGX_OPC_LD4U, CHILD(106), + BITFIELD(55, 1) /* index 106 */, + TILEGX_OPC_LD4U, TILEGX_OPC_PREFETCH_L3, + BITFIELD(26, 1) /* index 109 */, + CHILD(112), CHILD(115), + BITFIELD(57, 1) /* index 112 */, + TILEGX_OPC_ST1, TILEGX_OPC_ST4, + BITFIELD(57, 1) /* index 115 */, + TILEGX_OPC_ST2, TILEGX_OPC_ST, +}; + +#undef BITFIELD +#undef CHILD +const unsigned short * const +tilegx_bundle_decoder_fsms[TILEGX_NUM_PIPELINE_ENCODINGS] = +{ + decode_X0_fsm, + decode_X1_fsm, + decode_Y0_fsm, + decode_Y1_fsm, + decode_Y2_fsm +}; +const struct tilegx_operand tilegx_operands[35] = +{ + { + TILEGX_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEGX_IMM8_X0), + 8, 1, 0, 0, 0, 0, + create_Imm8_X0, get_Imm8_X0 + }, + { + TILEGX_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEGX_IMM8_X1), + 8, 1, 0, 0, 0, 0, + create_Imm8_X1, get_Imm8_X1 + }, + { + TILEGX_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEGX_IMM8_Y0), + 8, 1, 0, 0, 0, 0, + create_Imm8_Y0, get_Imm8_Y0 + }, + { + TILEGX_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEGX_IMM8_Y1), + 8, 1, 0, 0, 0, 0, + create_Imm8_Y1, get_Imm8_Y1 + }, + { + TILEGX_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEGX_IMM16_X0_HW0_LAST), + 16, 1, 0, 0, 0, 0, + create_Imm16_X0, get_Imm16_X0 + }, + { + TILEGX_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEGX_IMM16_X1_HW0_LAST), + 16, 1, 0, 0, 0, 0, + create_Imm16_X1, get_Imm16_X1 + }, + { + TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 0, 1, 0, 0, + create_Dest_X0, get_Dest_X0 + }, + { + TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 1, 0, 0, 0, + create_SrcA_X0, get_SrcA_X0 + }, + { + TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 0, 1, 0, 0, + create_Dest_X1, get_Dest_X1 + }, + { + TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 1, 0, 0, 0, + create_SrcA_X1, get_SrcA_X1 + }, + { + TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 0, 1, 0, 0, + create_Dest_Y0, get_Dest_Y0 + }, + { + TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 1, 0, 0, 0, + create_SrcA_Y0, get_SrcA_Y0 + }, + { + TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 0, 1, 0, 0, + create_Dest_Y1, get_Dest_Y1 + }, + { + TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 1, 0, 0, 0, + create_SrcA_Y1, get_SrcA_Y1 + }, + { + TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 1, 0, 0, 0, + create_SrcA_Y2, get_SrcA_Y2 + }, + { + TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 1, 1, 0, 0, + create_SrcA_X1, get_SrcA_X1 + }, + { + TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 1, 0, 0, 0, + create_SrcB_X0, get_SrcB_X0 + }, + { + TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 1, 0, 0, 0, + create_SrcB_X1, get_SrcB_X1 + }, + { + TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 1, 0, 0, 0, + create_SrcB_Y0, get_SrcB_Y0 + }, + { + TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 1, 0, 0, 0, + create_SrcB_Y1, get_SrcB_Y1 + }, + { + TILEGX_OP_TYPE_ADDRESS, BFD_RELOC(TILEGX_BROFF_X1), + 17, 1, 0, 0, 1, TILEGX_LOG2_BUNDLE_ALIGNMENT_IN_BYTES, + create_BrOff_X1, get_BrOff_X1 + }, + { + TILEGX_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEGX_MMSTART_X0), + 6, 0, 0, 0, 0, 0, + create_BFStart_X0, get_BFStart_X0 + }, + { + TILEGX_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEGX_MMEND_X0), + 6, 0, 0, 0, 0, 0, + create_BFEnd_X0, get_BFEnd_X0 + }, + { + TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 1, 1, 0, 0, + create_Dest_X0, get_Dest_X0 + }, + { + TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 1, 1, 0, 0, + create_Dest_Y0, get_Dest_Y0 + }, + { + TILEGX_OP_TYPE_ADDRESS, BFD_RELOC(TILEGX_JUMPOFF_X1), + 27, 1, 0, 0, 1, TILEGX_LOG2_BUNDLE_ALIGNMENT_IN_BYTES, + create_JumpOff_X1, get_JumpOff_X1 + }, + { + TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 0, 1, 0, 0, + create_SrcBDest_Y2, get_SrcBDest_Y2 + }, + { + TILEGX_OP_TYPE_SPR, BFD_RELOC(TILEGX_MF_IMM14_X1), + 14, 0, 0, 0, 0, 0, + create_MF_Imm14_X1, get_MF_Imm14_X1 + }, + { + TILEGX_OP_TYPE_SPR, BFD_RELOC(TILEGX_MT_IMM14_X1), + 14, 0, 0, 0, 0, 0, + create_MT_Imm14_X1, get_MT_Imm14_X1 + }, + { + TILEGX_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEGX_SHAMT_X0), + 6, 0, 0, 0, 0, 0, + create_ShAmt_X0, get_ShAmt_X0 + }, + { + TILEGX_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEGX_SHAMT_X1), + 6, 0, 0, 0, 0, 0, + create_ShAmt_X1, get_ShAmt_X1 + }, + { + TILEGX_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEGX_SHAMT_Y0), + 6, 0, 0, 0, 0, 0, + create_ShAmt_Y0, get_ShAmt_Y0 + }, + { + TILEGX_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEGX_SHAMT_Y1), + 6, 0, 0, 0, 0, 0, + create_ShAmt_Y1, get_ShAmt_Y1 + }, + { + TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 1, 0, 0, 0, + create_SrcBDest_Y2, get_SrcBDest_Y2 + }, + { + TILEGX_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEGX_DEST_IMM8_X1), + 8, 1, 0, 0, 0, 0, + create_Dest_Imm8_X1, get_Dest_Imm8_X1 + } +}; + + + + +/* Given a set of bundle bits and the lookup FSM for a specific pipe, + * returns which instruction the bundle contains in that pipe. + */ +static const struct tilegx_opcode * +find_opcode(tilegx_bundle_bits bits, const unsigned short *table) +{ + int index = 0; + + while (1) + { + unsigned short bitspec = table[index]; + unsigned int bitfield = + ((unsigned int)(bits >> (bitspec & 63))) & (bitspec >> 6); + + unsigned short next = table[index + 1 + bitfield]; + if (next <= TILEGX_OPC_NONE) + return &tilegx_opcodes[next]; + + index = next - TILEGX_OPC_NONE; + } +} + + +int +parse_insn_tilegx(tilegx_bundle_bits bits, + unsigned long long pc, + struct tilegx_decoded_instruction + decoded[TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE]) +{ + int num_instructions = 0; + int pipe; + + int min_pipe, max_pipe; + if ((bits & TILEGX_BUNDLE_MODE_MASK) == 0) + { + min_pipe = TILEGX_PIPELINE_X0; + max_pipe = TILEGX_PIPELINE_X1; + } + else + { + min_pipe = TILEGX_PIPELINE_Y0; + max_pipe = TILEGX_PIPELINE_Y2; + } + + /* For each pipe, find an instruction that fits. */ + for (pipe = min_pipe; pipe <= max_pipe; pipe++) + { + const struct tilegx_opcode *opc; + struct tilegx_decoded_instruction *d; + int i; + + d = &decoded[num_instructions++]; + opc = find_opcode (bits, tilegx_bundle_decoder_fsms[pipe]); + d->opcode = opc; + + /* Decode each operand, sign extending, etc. as appropriate. */ + for (i = 0; i < opc->num_operands; i++) + { + const struct tilegx_operand *op = + &tilegx_operands[opc->operands[pipe][i]]; + int raw_opval = op->extract (bits); + long long opval; + + if (op->is_signed) + { + /* Sign-extend the operand. */ + int shift = (int)((sizeof(int) * 8) - op->num_bits); + raw_opval = (raw_opval << shift) >> shift; + } + + /* Adjust PC-relative scaled branch offsets. */ + if (op->type == TILEGX_OP_TYPE_ADDRESS) + opval = (raw_opval * TILEGX_BUNDLE_SIZE_IN_BYTES) + pc; + else + opval = raw_opval; + + /* Record the final value. */ + d->operands[i] = op; + d->operand_values[i] = opval; + } + } + + return num_instructions; +} diff --git a/arch/tile/kernel/time.c b/arch/tile/kernel/time.c new file mode 100644 index 00000000..f6f50f2a --- /dev/null +++ b/arch/tile/kernel/time.c @@ -0,0 +1,235 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * Support the cycle counter clocksource and tile timer clock event device. + */ + +#include <linux/time.h> +#include <linux/timex.h> +#include <linux/clocksource.h> +#include <linux/clockchips.h> +#include <linux/hardirq.h> +#include <linux/sched.h> +#include <linux/smp.h> +#include <linux/delay.h> +#include <linux/module.h> +#include <asm/irq_regs.h> +#include <asm/traps.h> +#include <hv/hypervisor.h> +#include <arch/interrupts.h> +#include <arch/spr_def.h> + + +/* + * Define the cycle counter clock source. + */ + +/* How many cycles per second we are running at. */ +static cycles_t cycles_per_sec __write_once; + +cycles_t get_clock_rate(void) +{ + return cycles_per_sec; +} + +#if CHIP_HAS_SPLIT_CYCLE() +cycles_t get_cycles(void) +{ + unsigned int high = __insn_mfspr(SPR_CYCLE_HIGH); + unsigned int low = __insn_mfspr(SPR_CYCLE_LOW); + unsigned int high2 = __insn_mfspr(SPR_CYCLE_HIGH); + + while (unlikely(high != high2)) { + low = __insn_mfspr(SPR_CYCLE_LOW); + high = high2; + high2 = __insn_mfspr(SPR_CYCLE_HIGH); + } + + return (((cycles_t)high) << 32) | low; +} +EXPORT_SYMBOL(get_cycles); +#endif + +/* + * We use a relatively small shift value so that sched_clock() + * won't wrap around very often. + */ +#define SCHED_CLOCK_SHIFT 10 + +static unsigned long sched_clock_mult __write_once; + +static cycles_t clocksource_get_cycles(struct clocksource *cs) +{ + return get_cycles(); +} + +static struct clocksource cycle_counter_cs = { + .name = "cycle counter", + .rating = 300, + .read = clocksource_get_cycles, + .mask = CLOCKSOURCE_MASK(64), + .flags = CLOCK_SOURCE_IS_CONTINUOUS, +}; + +/* + * Called very early from setup_arch() to set cycles_per_sec. + * We initialize it early so we can use it to set up loops_per_jiffy. + */ +void __init setup_clock(void) +{ + cycles_per_sec = hv_sysconf(HV_SYSCONF_CPU_SPEED); + sched_clock_mult = + clocksource_hz2mult(cycles_per_sec, SCHED_CLOCK_SHIFT); +} + +void __init calibrate_delay(void) +{ + loops_per_jiffy = get_clock_rate() / HZ; + pr_info("Clock rate yields %lu.%02lu BogoMIPS (lpj=%lu)\n", + loops_per_jiffy/(500000/HZ), + (loops_per_jiffy/(5000/HZ)) % 100, loops_per_jiffy); +} + +/* Called fairly late in init/main.c, but before we go smp. */ +void __init time_init(void) +{ + /* Initialize and register the clock source. */ + clocksource_register_hz(&cycle_counter_cs, cycles_per_sec); + + /* Start up the tile-timer interrupt source on the boot cpu. */ + setup_tile_timer(); +} + + +/* + * Define the tile timer clock event device. The timer is driven by + * the TILE_TIMER_CONTROL register, which consists of a 31-bit down + * counter, plus bit 31, which signifies that the counter has wrapped + * from zero to (2**31) - 1. The INT_TILE_TIMER interrupt will be + * raised as long as bit 31 is set. + * + * The TILE_MINSEC value represents the largest range of real-time + * we can possibly cover with the timer, based on MAX_TICK combined + * with the slowest reasonable clock rate we might run at. + */ + +#define MAX_TICK 0x7fffffff /* we have 31 bits of countdown timer */ +#define TILE_MINSEC 5 /* timer covers no more than 5 seconds */ + +static int tile_timer_set_next_event(unsigned long ticks, + struct clock_event_device *evt) +{ + BUG_ON(ticks > MAX_TICK); + __insn_mtspr(SPR_TILE_TIMER_CONTROL, ticks); + arch_local_irq_unmask_now(INT_TILE_TIMER); + return 0; +} + +/* + * Whenever anyone tries to change modes, we just mask interrupts + * and wait for the next event to get set. + */ +static void tile_timer_set_mode(enum clock_event_mode mode, + struct clock_event_device *evt) +{ + arch_local_irq_mask_now(INT_TILE_TIMER); +} + +/* + * Set min_delta_ns to 1 microsecond, since it takes about + * that long to fire the interrupt. + */ +static DEFINE_PER_CPU(struct clock_event_device, tile_timer) = { + .name = "tile timer", + .features = CLOCK_EVT_FEAT_ONESHOT, + .min_delta_ns = 1000, + .rating = 100, + .irq = -1, + .set_next_event = tile_timer_set_next_event, + .set_mode = tile_timer_set_mode, +}; + +void __cpuinit setup_tile_timer(void) +{ + struct clock_event_device *evt = &__get_cpu_var(tile_timer); + + /* Fill in fields that are speed-specific. */ + clockevents_calc_mult_shift(evt, cycles_per_sec, TILE_MINSEC); + evt->max_delta_ns = clockevent_delta2ns(MAX_TICK, evt); + + /* Mark as being for this cpu only. */ + evt->cpumask = cpumask_of(smp_processor_id()); + + /* Start out with timer not firing. */ + arch_local_irq_mask_now(INT_TILE_TIMER); + + /* Register tile timer. */ + clockevents_register_device(evt); +} + +/* Called from the interrupt vector. */ +void do_timer_interrupt(struct pt_regs *regs, int fault_num) +{ + struct pt_regs *old_regs = set_irq_regs(regs); + struct clock_event_device *evt = &__get_cpu_var(tile_timer); + + /* + * Mask the timer interrupt here, since we are a oneshot timer + * and there are now by definition no events pending. + */ + arch_local_irq_mask(INT_TILE_TIMER); + + /* Track time spent here in an interrupt context */ + irq_enter(); + + /* Track interrupt count. */ + __get_cpu_var(irq_stat).irq_timer_count++; + + /* Call the generic timer handler */ + evt->event_handler(evt); + + /* + * Track time spent against the current process again and + * process any softirqs if they are waiting. + */ + irq_exit(); + + set_irq_regs(old_regs); +} + +/* + * Scheduler clock - returns current time in nanosec units. + * Note that with LOCKDEP, this is called during lockdep_init(), and + * we will claim that sched_clock() is zero for a little while, until + * we run setup_clock(), above. + */ +unsigned long long sched_clock(void) +{ + return clocksource_cyc2ns(get_cycles(), + sched_clock_mult, SCHED_CLOCK_SHIFT); +} + +int setup_profiling_timer(unsigned int multiplier) +{ + return -EINVAL; +} + +/* + * Use the tile timer to convert nsecs to core clock cycles, relying + * on it having the same frequency as SPR_CYCLE. + */ +cycles_t ns2cycles(unsigned long nsecs) +{ + struct clock_event_device *dev = &__get_cpu_var(tile_timer); + return ((u64)nsecs * dev->mult) >> dev->shift; +} diff --git a/arch/tile/kernel/tlb.c b/arch/tile/kernel/tlb.c new file mode 100644 index 00000000..a5f241c2 --- /dev/null +++ b/arch/tile/kernel/tlb.c @@ -0,0 +1,97 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + */ + +#include <linux/cpumask.h> +#include <linux/module.h> +#include <asm/tlbflush.h> +#include <asm/homecache.h> +#include <hv/hypervisor.h> + +/* From tlbflush.h */ +DEFINE_PER_CPU(int, current_asid); +int min_asid, max_asid; + +/* + * Note that we flush the L1I (for VM_EXEC pages) as well as the TLB + * so that when we are unmapping an executable page, we also flush it. + * Combined with flushing the L1I at context switch time, this means + * we don't have to do any other icache flushes. + */ + +void flush_tlb_mm(struct mm_struct *mm) +{ + HV_Remote_ASID asids[NR_CPUS]; + int i = 0, cpu; + for_each_cpu(cpu, mm_cpumask(mm)) { + HV_Remote_ASID *asid = &asids[i++]; + asid->y = cpu / smp_topology.width; + asid->x = cpu % smp_topology.width; + asid->asid = per_cpu(current_asid, cpu); + } + flush_remote(0, HV_FLUSH_EVICT_L1I, mm_cpumask(mm), + 0, 0, 0, NULL, asids, i); +} + +void flush_tlb_current_task(void) +{ + flush_tlb_mm(current->mm); +} + +void flush_tlb_page_mm(const struct vm_area_struct *vma, struct mm_struct *mm, + unsigned long va) +{ + unsigned long size = hv_page_size(vma); + int cache = (vma->vm_flags & VM_EXEC) ? HV_FLUSH_EVICT_L1I : 0; + flush_remote(0, cache, mm_cpumask(mm), + va, size, size, mm_cpumask(mm), NULL, 0); +} + +void flush_tlb_page(const struct vm_area_struct *vma, unsigned long va) +{ + flush_tlb_page_mm(vma, vma->vm_mm, va); +} +EXPORT_SYMBOL(flush_tlb_page); + +void flush_tlb_range(const struct vm_area_struct *vma, + unsigned long start, unsigned long end) +{ + unsigned long size = hv_page_size(vma); + struct mm_struct *mm = vma->vm_mm; + int cache = (vma->vm_flags & VM_EXEC) ? HV_FLUSH_EVICT_L1I : 0; + flush_remote(0, cache, mm_cpumask(mm), start, end - start, size, + mm_cpumask(mm), NULL, 0); +} + +void flush_tlb_all(void) +{ + int i; + for (i = 0; ; ++i) { + HV_VirtAddrRange r = hv_inquire_virtual(i); + if (r.size == 0) + break; + flush_remote(0, HV_FLUSH_EVICT_L1I, cpu_online_mask, + r.start, r.size, PAGE_SIZE, cpu_online_mask, + NULL, 0); + flush_remote(0, 0, NULL, + r.start, r.size, HPAGE_SIZE, cpu_online_mask, + NULL, 0); + } +} + +void flush_tlb_kernel_range(unsigned long start, unsigned long end) +{ + flush_remote(0, HV_FLUSH_EVICT_L1I, cpu_online_mask, + start, end - start, PAGE_SIZE, cpu_online_mask, NULL, 0); +} diff --git a/arch/tile/kernel/traps.c b/arch/tile/kernel/traps.c new file mode 100644 index 00000000..73cff814 --- /dev/null +++ b/arch/tile/kernel/traps.c @@ -0,0 +1,329 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/kprobes.h> +#include <linux/module.h> +#include <linux/reboot.h> +#include <linux/uaccess.h> +#include <linux/ptrace.h> +#include <asm/stack.h> +#include <asm/traps.h> +#include <asm/setup.h> + +#include <arch/interrupts.h> +#include <arch/spr_def.h> +#include <arch/opcode.h> + +void __init trap_init(void) +{ + /* Nothing needed here since we link code at .intrpt1 */ +} + +int unaligned_fixup = 1; + +static int __init setup_unaligned_fixup(char *str) +{ + /* + * Say "=-1" to completely disable it. If you just do "=0", we + * will still parse the instruction, then fire a SIGBUS with + * the correct address from inside the single_step code. + */ + long val; + if (strict_strtol(str, 0, &val) != 0) + return 0; + unaligned_fixup = val; + pr_info("Fixups for unaligned data accesses are %s\n", + unaligned_fixup >= 0 ? + (unaligned_fixup ? "enabled" : "disabled") : + "completely disabled"); + return 1; +} +__setup("unaligned_fixup=", setup_unaligned_fixup); + +#if CHIP_HAS_TILE_DMA() + +static int dma_disabled; + +static int __init nodma(char *str) +{ + pr_info("User-space DMA is disabled\n"); + dma_disabled = 1; + return 1; +} +__setup("nodma", nodma); + +/* How to decode SPR_GPV_REASON */ +#define IRET_ERROR (1U << 31) +#define MT_ERROR (1U << 30) +#define MF_ERROR (1U << 29) +#define SPR_INDEX ((1U << 15) - 1) +#define SPR_MPL_SHIFT 9 /* starting bit position for MPL encoded in SPR */ + +/* + * See if this GPV is just to notify the kernel of SPR use and we can + * retry the user instruction after adjusting some MPLs suitably. + */ +static int retry_gpv(unsigned int gpv_reason) +{ + int mpl; + + if (gpv_reason & IRET_ERROR) + return 0; + + BUG_ON((gpv_reason & (MT_ERROR|MF_ERROR)) == 0); + mpl = (gpv_reason & SPR_INDEX) >> SPR_MPL_SHIFT; + if (mpl == INT_DMA_NOTIFY && !dma_disabled) { + /* User is turning on DMA. Allow it and retry. */ + printk(KERN_DEBUG "Process %d/%s is now enabled for DMA\n", + current->pid, current->comm); + BUG_ON(current->thread.tile_dma_state.enabled); + current->thread.tile_dma_state.enabled = 1; + grant_dma_mpls(); + return 1; + } + + return 0; +} + +#endif /* CHIP_HAS_TILE_DMA() */ + +#ifdef __tilegx__ +#define bundle_bits tilegx_bundle_bits +#else +#define bundle_bits tile_bundle_bits +#endif + +extern bundle_bits bpt_code; + +asm(".pushsection .rodata.bpt_code,\"a\";" + ".align 8;" + "bpt_code: bpt;" + ".size bpt_code,.-bpt_code;" + ".popsection"); + +static int special_ill(bundle_bits bundle, int *sigp, int *codep) +{ + int sig, code, maxcode; + + if (bundle == bpt_code) { + *sigp = SIGTRAP; + *codep = TRAP_BRKPT; + return 1; + } + + /* If it's a "raise" bundle, then "ill" must be in pipe X1. */ +#ifdef __tilegx__ + if ((bundle & TILEGX_BUNDLE_MODE_MASK) != 0) + return 0; + if (get_Opcode_X1(bundle) != RRR_0_OPCODE_X1) + return 0; + if (get_RRROpcodeExtension_X1(bundle) != UNARY_RRR_0_OPCODE_X1) + return 0; + if (get_UnaryOpcodeExtension_X1(bundle) != ILL_UNARY_OPCODE_X1) + return 0; +#else + if (bundle & TILEPRO_BUNDLE_Y_ENCODING_MASK) + return 0; + if (get_Opcode_X1(bundle) != SHUN_0_OPCODE_X1) + return 0; + if (get_UnShOpcodeExtension_X1(bundle) != UN_0_SHUN_0_OPCODE_X1) + return 0; + if (get_UnOpcodeExtension_X1(bundle) != ILL_UN_0_SHUN_0_OPCODE_X1) + return 0; +#endif + + /* Check that the magic distinguishers are set to mean "raise". */ + if (get_Dest_X1(bundle) != 29 || get_SrcA_X1(bundle) != 37) + return 0; + + /* There must be an "addli zero, zero, VAL" in X0. */ + if (get_Opcode_X0(bundle) != ADDLI_OPCODE_X0) + return 0; + if (get_Dest_X0(bundle) != TREG_ZERO) + return 0; + if (get_SrcA_X0(bundle) != TREG_ZERO) + return 0; + + /* + * Validate the proposed signal number and si_code value. + * Note that we embed these in the static instruction itself + * so that we perturb the register state as little as possible + * at the time of the actual fault; it's unlikely you'd ever + * need to dynamically choose which kind of fault to raise + * from user space. + */ + sig = get_Imm16_X0(bundle) & 0x3f; + switch (sig) { + case SIGILL: + maxcode = NSIGILL; + break; + case SIGFPE: + maxcode = NSIGFPE; + break; + case SIGSEGV: + maxcode = NSIGSEGV; + break; + case SIGBUS: + maxcode = NSIGBUS; + break; + case SIGTRAP: + maxcode = NSIGTRAP; + break; + default: + return 0; + } + code = (get_Imm16_X0(bundle) >> 6) & 0xf; + if (code <= 0 || code > maxcode) + return 0; + + /* Make it the requested signal. */ + *sigp = sig; + *codep = code | __SI_FAULT; + return 1; +} + +void __kprobes do_trap(struct pt_regs *regs, int fault_num, + unsigned long reason) +{ + siginfo_t info = { 0 }; + int signo, code; + unsigned long address = 0; + bundle_bits instr; + + /* Re-enable interrupts. */ + local_irq_enable(); + + /* + * If it hits in kernel mode and we can't fix it up, just exit the + * current process and hope for the best. + */ + if (!user_mode(regs)) { + if (fixup_exception(regs)) /* only UNALIGN_DATA in practice */ + return; + pr_alert("Kernel took bad trap %d at PC %#lx\n", + fault_num, regs->pc); + if (fault_num == INT_GPV) + pr_alert("GPV_REASON is %#lx\n", reason); + show_regs(regs); + do_exit(SIGKILL); /* FIXME: implement i386 die() */ + return; + } + + switch (fault_num) { + case INT_MEM_ERROR: + signo = SIGBUS; + code = BUS_OBJERR; + break; + case INT_ILL: + if (copy_from_user(&instr, (void __user *)regs->pc, + sizeof(instr))) { + pr_err("Unreadable instruction for INT_ILL:" + " %#lx\n", regs->pc); + do_exit(SIGKILL); + return; + } + if (!special_ill(instr, &signo, &code)) { + signo = SIGILL; + code = ILL_ILLOPC; + } + address = regs->pc; + break; + case INT_GPV: +#if CHIP_HAS_TILE_DMA() + if (retry_gpv(reason)) + return; +#endif + /*FALLTHROUGH*/ + case INT_UDN_ACCESS: + case INT_IDN_ACCESS: +#if CHIP_HAS_SN() + case INT_SN_ACCESS: +#endif + signo = SIGILL; + code = ILL_PRVREG; + address = regs->pc; + break; + case INT_SWINT_3: + case INT_SWINT_2: + case INT_SWINT_0: + signo = SIGILL; + code = ILL_ILLTRP; + address = regs->pc; + break; + case INT_UNALIGN_DATA: +#ifndef __tilegx__ /* Emulated support for single step debugging */ + if (unaligned_fixup >= 0) { + struct single_step_state *state = + current_thread_info()->step_state; + if (!state || + (void __user *)(regs->pc) != state->buffer) { + single_step_once(regs); + return; + } + } +#endif + signo = SIGBUS; + code = BUS_ADRALN; + address = 0; + break; + case INT_DOUBLE_FAULT: + /* + * For double fault, "reason" is actually passed as + * SYSTEM_SAVE_K_2, the hypervisor's double-fault info, so + * we can provide the original fault number rather than + * the uninteresting "INT_DOUBLE_FAULT" so the user can + * learn what actually struck while PL0 ICS was set. + */ + fault_num = reason; + signo = SIGILL; + code = ILL_DBLFLT; + address = regs->pc; + break; +#ifdef __tilegx__ + case INT_ILL_TRANS: { + /* Avoid a hardware erratum with the return address stack. */ + fill_ra_stack(); + + signo = SIGSEGV; + code = SEGV_MAPERR; + if (reason & SPR_ILL_TRANS_REASON__I_STREAM_VA_RMASK) + address = regs->pc; + else + address = 0; /* FIXME: GX: single-step for address */ + break; + } +#endif + default: + panic("Unexpected do_trap interrupt number %d", fault_num); + return; + } + + info.si_signo = signo; + info.si_code = code; + info.si_addr = (void __user *)address; + if (signo == SIGILL) + info.si_trapno = fault_num; + if (signo != SIGTRAP) + trace_unhandled_signal("trap", regs, address, signo); + force_sig_info(signo, &info, current); +} + +void kernel_double_fault(int dummy, ulong pc, ulong lr, ulong sp, ulong r52) +{ + _dump_stack(dummy, pc, lr, sp, r52); + pr_emerg("Double fault: exiting\n"); + machine_halt(); +} diff --git a/arch/tile/kernel/vmlinux.lds.S b/arch/tile/kernel/vmlinux.lds.S new file mode 100644 index 00000000..631f10de --- /dev/null +++ b/arch/tile/kernel/vmlinux.lds.S @@ -0,0 +1,95 @@ +#include <asm-generic/vmlinux.lds.h> +#include <asm/page.h> +#include <asm/cache.h> +#include <asm/thread_info.h> +#include <hv/hypervisor.h> + +/* Text loads starting from the supervisor interrupt vector address. */ +#define TEXT_OFFSET MEM_SV_INTRPT + +OUTPUT_ARCH(tile) +ENTRY(_start) +jiffies = jiffies_64; + +PHDRS +{ + intrpt1 PT_LOAD ; + text PT_LOAD ; + data PT_LOAD ; +} +SECTIONS +{ + /* Text is loaded with a different VA than data; start with text. */ + #undef LOAD_OFFSET + #define LOAD_OFFSET TEXT_OFFSET + + /* Interrupt vectors */ + .intrpt1 (LOAD_OFFSET) : AT ( 0 ) /* put at the start of physical memory */ + { + _text = .; + _stext = .; + *(.intrpt1) + } :intrpt1 =0 + + /* Hypervisor call vectors */ + #include "hvglue.lds" + + /* Now the real code */ + . = ALIGN(0x20000); + .text : AT (ADDR(.text) - LOAD_OFFSET) { + HEAD_TEXT + SCHED_TEXT + LOCK_TEXT + __fix_text_end = .; /* tile-cpack won't rearrange before this */ + TEXT_TEXT + *(.text.*) + *(.coldtext*) + *(.fixup) + *(.gnu.warning) + } :text =0 + _etext = .; + + /* "Init" is divided into two areas with very different virtual addresses. */ + INIT_TEXT_SECTION(PAGE_SIZE) + + /* Now we skip back to PAGE_OFFSET for the data. */ + . = (. - TEXT_OFFSET + PAGE_OFFSET); + #undef LOAD_OFFSET + #define LOAD_OFFSET PAGE_OFFSET + + . = ALIGN(PAGE_SIZE); + VMLINUX_SYMBOL(_sinitdata) = .; + INIT_DATA_SECTION(16) :data =0 + PERCPU_SECTION(L2_CACHE_BYTES) + . = ALIGN(PAGE_SIZE); + VMLINUX_SYMBOL(_einitdata) = .; + + _sdata = .; /* Start of data section */ + + RO_DATA_SECTION(PAGE_SIZE) + + /* initially writeable, then read-only */ + . = ALIGN(PAGE_SIZE); + __w1data_begin = .; + .w1data : AT(ADDR(.w1data) - LOAD_OFFSET) { + VMLINUX_SYMBOL(__w1data_begin) = .; + *(.w1data) + VMLINUX_SYMBOL(__w1data_end) = .; + } + + RW_DATA_SECTION(L2_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE) + + _edata = .; + + EXCEPTION_TABLE(L2_CACHE_BYTES) + NOTES + + + BSS_SECTION(8, PAGE_SIZE, 1) + _end = . ; + + STABS_DEBUG + DWARF_DEBUG + + DISCARDS +} diff --git a/arch/tile/kvm/Kconfig b/arch/tile/kvm/Kconfig new file mode 100644 index 00000000..669fcdba --- /dev/null +++ b/arch/tile/kvm/Kconfig @@ -0,0 +1,37 @@ +# +# KVM configuration +# + +source "virt/kvm/Kconfig" + +menuconfig VIRTUALIZATION + bool "Virtualization" + ---help--- + Say Y here to get to see options for using your Linux host to run + other operating systems inside virtual machines (guests). + This option alone does not add any kernel code. + + If you say N, all options in this submenu will be skipped and + disabled. + +if VIRTUALIZATION + +config KVM + tristate "Kernel-based Virtual Machine (KVM) support" + depends on HAVE_KVM && MODULES && EXPERIMENTAL + select PREEMPT_NOTIFIERS + select ANON_INODES + ---help--- + Support hosting paravirtualized guest machines. + + This module provides access to the hardware capabilities through + a character device node named /dev/kvm. + + To compile this as a module, choose M here: the module + will be called kvm. + + If unsure, say N. + +source drivers/vhost/Kconfig + +endif # VIRTUALIZATION diff --git a/arch/tile/lib/Makefile b/arch/tile/lib/Makefile new file mode 100644 index 00000000..985f5985 --- /dev/null +++ b/arch/tile/lib/Makefile @@ -0,0 +1,18 @@ +# +# Makefile for TILE-specific library files.. +# + +lib-y = cacheflush.o checksum.o cpumask.o delay.o uaccess.o \ + memmove.o memcpy_$(BITS).o memchr_$(BITS).o memset_$(BITS).o \ + strchr_$(BITS).o strlen_$(BITS).o + +ifeq ($(CONFIG_TILEGX),y) +CFLAGS_REMOVE_memcpy_user_64.o = -fno-omit-frame-pointer +lib-y += memcpy_user_64.o +else +lib-y += atomic_32.o atomic_asm_32.o memcpy_tile64.o +endif + +lib-$(CONFIG_SMP) += spinlock_$(BITS).o usercopy_$(BITS).o + +obj-$(CONFIG_MODULES) += exports.o diff --git a/arch/tile/lib/atomic_32.c b/arch/tile/lib/atomic_32.c new file mode 100644 index 00000000..771b251b --- /dev/null +++ b/arch/tile/lib/atomic_32.c @@ -0,0 +1,329 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#include <linux/cache.h> +#include <linux/delay.h> +#include <linux/uaccess.h> +#include <linux/module.h> +#include <linux/mm.h> +#include <linux/atomic.h> +#include <asm/futex.h> +#include <arch/chip.h> + +/* See <asm/atomic_32.h> */ +#if ATOMIC_LOCKS_FOUND_VIA_TABLE() + +/* + * A block of memory containing locks for atomic ops. Each instance of this + * struct will be homed on a different CPU. + */ +struct atomic_locks_on_cpu { + int lock[ATOMIC_HASH_L2_SIZE]; +} __attribute__((aligned(ATOMIC_HASH_L2_SIZE * 4))); + +static DEFINE_PER_CPU(struct atomic_locks_on_cpu, atomic_lock_pool); + +/* The locks we'll use until __init_atomic_per_cpu is called. */ +static struct atomic_locks_on_cpu __initdata initial_atomic_locks; + +/* Hash into this vector to get a pointer to lock for the given atomic. */ +struct atomic_locks_on_cpu *atomic_lock_ptr[ATOMIC_HASH_L1_SIZE] + __write_once = { + [0 ... ATOMIC_HASH_L1_SIZE-1] (&initial_atomic_locks) +}; + +#else /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */ + +/* This page is remapped on startup to be hash-for-home. */ +int atomic_locks[PAGE_SIZE / sizeof(int)] __page_aligned_bss; + +#endif /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */ + +static inline int *__atomic_hashed_lock(volatile void *v) +{ + /* NOTE: this code must match "sys_cmpxchg" in kernel/intvec_32.S */ +#if ATOMIC_LOCKS_FOUND_VIA_TABLE() + unsigned long i = + (unsigned long) v & ((PAGE_SIZE-1) & -sizeof(long long)); + unsigned long n = __insn_crc32_32(0, i); + + /* Grab high bits for L1 index. */ + unsigned long l1_index = n >> ((sizeof(n) * 8) - ATOMIC_HASH_L1_SHIFT); + /* Grab low bits for L2 index. */ + unsigned long l2_index = n & (ATOMIC_HASH_L2_SIZE - 1); + + return &atomic_lock_ptr[l1_index]->lock[l2_index]; +#else + /* + * Use bits [3, 3 + ATOMIC_HASH_SHIFT) as the lock index. + * Using mm works here because atomic_locks is page aligned. + */ + unsigned long ptr = __insn_mm((unsigned long)v >> 1, + (unsigned long)atomic_locks, + 2, (ATOMIC_HASH_SHIFT + 2) - 1); + return (int *)ptr; +#endif +} + +#ifdef CONFIG_SMP +/* Return whether the passed pointer is a valid atomic lock pointer. */ +static int is_atomic_lock(int *p) +{ +#if ATOMIC_LOCKS_FOUND_VIA_TABLE() + int i; + for (i = 0; i < ATOMIC_HASH_L1_SIZE; ++i) { + + if (p >= &atomic_lock_ptr[i]->lock[0] && + p < &atomic_lock_ptr[i]->lock[ATOMIC_HASH_L2_SIZE]) { + return 1; + } + } + return 0; +#else + return p >= &atomic_locks[0] && p < &atomic_locks[ATOMIC_HASH_SIZE]; +#endif +} + +void __atomic_fault_unlock(int *irqlock_word) +{ + BUG_ON(!is_atomic_lock(irqlock_word)); + BUG_ON(*irqlock_word != 1); + *irqlock_word = 0; +} + +#endif /* CONFIG_SMP */ + +static inline int *__atomic_setup(volatile void *v) +{ + /* Issue a load to the target to bring it into cache. */ + *(volatile int *)v; + return __atomic_hashed_lock(v); +} + +int _atomic_xchg(atomic_t *v, int n) +{ + return __atomic_xchg(&v->counter, __atomic_setup(v), n).val; +} +EXPORT_SYMBOL(_atomic_xchg); + +int _atomic_xchg_add(atomic_t *v, int i) +{ + return __atomic_xchg_add(&v->counter, __atomic_setup(v), i).val; +} +EXPORT_SYMBOL(_atomic_xchg_add); + +int _atomic_xchg_add_unless(atomic_t *v, int a, int u) +{ + /* + * Note: argument order is switched here since it is easier + * to use the first argument consistently as the "old value" + * in the assembly, as is done for _atomic_cmpxchg(). + */ + return __atomic_xchg_add_unless(&v->counter, __atomic_setup(v), u, a) + .val; +} +EXPORT_SYMBOL(_atomic_xchg_add_unless); + +int _atomic_cmpxchg(atomic_t *v, int o, int n) +{ + return __atomic_cmpxchg(&v->counter, __atomic_setup(v), o, n).val; +} +EXPORT_SYMBOL(_atomic_cmpxchg); + +unsigned long _atomic_or(volatile unsigned long *p, unsigned long mask) +{ + return __atomic_or((int *)p, __atomic_setup(p), mask).val; +} +EXPORT_SYMBOL(_atomic_or); + +unsigned long _atomic_andn(volatile unsigned long *p, unsigned long mask) +{ + return __atomic_andn((int *)p, __atomic_setup(p), mask).val; +} +EXPORT_SYMBOL(_atomic_andn); + +unsigned long _atomic_xor(volatile unsigned long *p, unsigned long mask) +{ + return __atomic_xor((int *)p, __atomic_setup(p), mask).val; +} +EXPORT_SYMBOL(_atomic_xor); + + +u64 _atomic64_xchg(atomic64_t *v, u64 n) +{ + return __atomic64_xchg(&v->counter, __atomic_setup(v), n); +} +EXPORT_SYMBOL(_atomic64_xchg); + +u64 _atomic64_xchg_add(atomic64_t *v, u64 i) +{ + return __atomic64_xchg_add(&v->counter, __atomic_setup(v), i); +} +EXPORT_SYMBOL(_atomic64_xchg_add); + +u64 _atomic64_xchg_add_unless(atomic64_t *v, u64 a, u64 u) +{ + /* + * Note: argument order is switched here since it is easier + * to use the first argument consistently as the "old value" + * in the assembly, as is done for _atomic_cmpxchg(). + */ + return __atomic64_xchg_add_unless(&v->counter, __atomic_setup(v), + u, a); +} +EXPORT_SYMBOL(_atomic64_xchg_add_unless); + +u64 _atomic64_cmpxchg(atomic64_t *v, u64 o, u64 n) +{ + return __atomic64_cmpxchg(&v->counter, __atomic_setup(v), o, n); +} +EXPORT_SYMBOL(_atomic64_cmpxchg); + + +static inline int *__futex_setup(int __user *v) +{ + /* + * Issue a prefetch to the counter to bring it into cache. + * As for __atomic_setup, but we can't do a read into the L1 + * since it might fault; instead we do a prefetch into the L2. + */ + __insn_prefetch(v); + return __atomic_hashed_lock((int __force *)v); +} + +struct __get_user futex_set(u32 __user *v, int i) +{ + return __atomic_xchg((int __force *)v, __futex_setup(v), i); +} + +struct __get_user futex_add(u32 __user *v, int n) +{ + return __atomic_xchg_add((int __force *)v, __futex_setup(v), n); +} + +struct __get_user futex_or(u32 __user *v, int n) +{ + return __atomic_or((int __force *)v, __futex_setup(v), n); +} + +struct __get_user futex_andn(u32 __user *v, int n) +{ + return __atomic_andn((int __force *)v, __futex_setup(v), n); +} + +struct __get_user futex_xor(u32 __user *v, int n) +{ + return __atomic_xor((int __force *)v, __futex_setup(v), n); +} + +struct __get_user futex_cmpxchg(u32 __user *v, int o, int n) +{ + return __atomic_cmpxchg((int __force *)v, __futex_setup(v), o, n); +} + +/* + * If any of the atomic or futex routines hit a bad address (not in + * the page tables at kernel PL) this routine is called. The futex + * routines are never used on kernel space, and the normal atomics and + * bitops are never used on user space. So a fault on kernel space + * must be fatal, but a fault on userspace is a futex fault and we + * need to return -EFAULT. Note that the context this routine is + * invoked in is the context of the "_atomic_xxx()" routines called + * by the functions in this file. + */ +struct __get_user __atomic_bad_address(int __user *addr) +{ + if (unlikely(!access_ok(VERIFY_WRITE, addr, sizeof(int)))) + panic("Bad address used for kernel atomic op: %p\n", addr); + return (struct __get_user) { .err = -EFAULT }; +} + + +#if CHIP_HAS_CBOX_HOME_MAP() +static int __init noatomichash(char *str) +{ + pr_warning("noatomichash is deprecated.\n"); + return 1; +} +__setup("noatomichash", noatomichash); +#endif + +void __init __init_atomic_per_cpu(void) +{ +#if ATOMIC_LOCKS_FOUND_VIA_TABLE() + + unsigned int i; + int actual_cpu; + + /* + * Before this is called from setup, we just have one lock for + * all atomic objects/operations. Here we replace the + * elements of atomic_lock_ptr so that they point at per_cpu + * integers. This seemingly over-complex approach stems from + * the fact that DEFINE_PER_CPU defines an entry for each cpu + * in the grid, not each cpu from 0..ATOMIC_HASH_SIZE-1. But + * for efficient hashing of atomics to their locks we want a + * compile time constant power of 2 for the size of this + * table, so we use ATOMIC_HASH_SIZE. + * + * Here we populate atomic_lock_ptr from the per cpu + * atomic_lock_pool, interspersing by actual cpu so that + * subsequent elements are homed on consecutive cpus. + */ + + actual_cpu = cpumask_first(cpu_possible_mask); + + for (i = 0; i < ATOMIC_HASH_L1_SIZE; ++i) { + /* + * Preincrement to slightly bias against using cpu 0, + * which has plenty of stuff homed on it already. + */ + actual_cpu = cpumask_next(actual_cpu, cpu_possible_mask); + if (actual_cpu >= nr_cpu_ids) + actual_cpu = cpumask_first(cpu_possible_mask); + + atomic_lock_ptr[i] = &per_cpu(atomic_lock_pool, actual_cpu); + } + +#else /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */ + + /* Validate power-of-two and "bigger than cpus" assumption */ + BUILD_BUG_ON(ATOMIC_HASH_SIZE & (ATOMIC_HASH_SIZE-1)); + BUG_ON(ATOMIC_HASH_SIZE < nr_cpu_ids); + + /* + * On TILEPro we prefer to use a single hash-for-home + * page, since this means atomic operations are less + * likely to encounter a TLB fault and thus should + * in general perform faster. You may wish to disable + * this in situations where few hash-for-home tiles + * are configured. + */ + BUG_ON((unsigned long)atomic_locks % PAGE_SIZE != 0); + + /* The locks must all fit on one page. */ + BUILD_BUG_ON(ATOMIC_HASH_SIZE * sizeof(int) > PAGE_SIZE); + + /* + * We use the page offset of the atomic value's address as + * an index into atomic_locks, excluding the low 3 bits. + * That should not produce more indices than ATOMIC_HASH_SIZE. + */ + BUILD_BUG_ON((PAGE_SIZE >> 3) > ATOMIC_HASH_SIZE); + +#endif /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */ + + /* The futex code makes this assumption, so we validate it here. */ + BUILD_BUG_ON(sizeof(atomic_t) != sizeof(int)); +} diff --git a/arch/tile/lib/atomic_asm_32.S b/arch/tile/lib/atomic_asm_32.S new file mode 100644 index 00000000..30638042 --- /dev/null +++ b/arch/tile/lib/atomic_asm_32.S @@ -0,0 +1,196 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * Support routines for atomic operations. Each function takes: + * + * r0: address to manipulate + * r1: pointer to atomic lock guarding this operation (for ATOMIC_LOCK_REG) + * r2: new value to write, or for cmpxchg/add_unless, value to compare against + * r3: (cmpxchg/xchg_add_unless) new value to write or add; + * (atomic64 ops) high word of value to write + * r4/r5: (cmpxchg64/add_unless64) new value to write or add + * + * The 32-bit routines return a "struct __get_user" so that the futex code + * has an opportunity to return -EFAULT to the user if needed. + * The 64-bit routines just return a "long long" with the value, + * since they are only used from kernel space and don't expect to fault. + * Support for 16-bit ops is included in the framework but we don't provide + * any (x86_64 has an atomic_inc_short(), so we might want to some day). + * + * Note that the caller is advised to issue a suitable L1 or L2 + * prefetch on the address being manipulated to avoid extra stalls. + * In addition, the hot path is on two icache lines, and we start with + * a jump to the second line to make sure they are both in cache so + * that we never stall waiting on icache fill while holding the lock. + * (This doesn't work out with most 64-bit ops, since they consume + * too many bundles, so may take an extra i-cache stall.) + * + * These routines set the INTERRUPT_CRITICAL_SECTION bit, just + * like sys_cmpxchg(), so that NMIs like PERF_COUNT will not interrupt + * the code, just page faults. + * + * If the load or store faults in a way that can be directly fixed in + * the do_page_fault_ics() handler (e.g. a vmalloc reference) we fix it + * directly, return to the instruction that faulted, and retry it. + * + * If the load or store faults in a way that potentially requires us + * to release the atomic lock, then retry (e.g. a migrating PTE), we + * reset the PC in do_page_fault_ics() to the "tns" instruction so + * that on return we will reacquire the lock and restart the op. We + * are somewhat overloading the exception_table_entry notion by doing + * this, since those entries are not normally used for migrating PTEs. + * + * If the main page fault handler discovers a bad address, it will see + * the PC pointing to the "tns" instruction (due to the earlier + * exception_table_entry processing in do_page_fault_ics), and + * re-reset the PC to the fault handler, atomic_bad_address(), which + * effectively takes over from the atomic op and can either return a + * bad "struct __get_user" (for user addresses) or can just panic (for + * bad kernel addresses). + * + * Note that if the value we would store is the same as what we + * loaded, we bypass the store. Other platforms with true atomics can + * make the guarantee that a non-atomic __clear_bit(), for example, + * can safely race with an atomic test_and_set_bit(); this example is + * from bit_spinlock.h in slub_lock() / slub_unlock(). We can't do + * that on Tile since the "atomic" op is really just a + * read/modify/write, and can race with the non-atomic + * read/modify/write. However, if we can short-circuit the write when + * it is not needed, in the atomic case, we avoid the race. + */ + +#include <linux/linkage.h> +#include <asm/atomic_32.h> +#include <asm/page.h> +#include <asm/processor.h> + + .section .text.atomic,"ax" +ENTRY(__start_atomic_asm_code) + + .macro atomic_op, name, bitwidth, body + .align 64 +STD_ENTRY_SECTION(__atomic\name, .text.atomic) + { + movei r24, 1 + j 4f /* branch to second cache line */ + } +1: { + .ifc \bitwidth,16 + lh r22, r0 + .else + lw r22, r0 + addi r28, r0, 4 + .endif + } + .ifc \bitwidth,64 + lw r23, r28 + .endif + \body /* set r24, and r25 if 64-bit */ + { + seq r26, r22, r24 + seq r27, r23, r25 + } + .ifc \bitwidth,64 + bbnst r27, 2f + .endif + bbs r26, 3f /* skip write-back if it's the same value */ +2: { + .ifc \bitwidth,16 + sh r0, r24 + .else + sw r0, r24 + .endif + } + .ifc \bitwidth,64 + sw r28, r25 + .endif + mf +3: { + move r0, r22 + .ifc \bitwidth,64 + move r1, r23 + .else + move r1, zero + .endif + sw ATOMIC_LOCK_REG_NAME, zero + } + mtspr INTERRUPT_CRITICAL_SECTION, zero + jrp lr +4: { + move ATOMIC_LOCK_REG_NAME, r1 + mtspr INTERRUPT_CRITICAL_SECTION, r24 + } +#ifndef CONFIG_SMP + j 1b /* no atomic locks */ +#else + { + tns r21, ATOMIC_LOCK_REG_NAME + moveli r23, 2048 /* maximum backoff time in cycles */ + } + { + bzt r21, 1b /* branch if lock acquired */ + moveli r25, 32 /* starting backoff time in cycles */ + } +5: mtspr INTERRUPT_CRITICAL_SECTION, zero + mfspr r26, CYCLE_LOW /* get start point for this backoff */ +6: mfspr r22, CYCLE_LOW /* test to see if we've backed off enough */ + sub r22, r22, r26 + slt r22, r22, r25 + bbst r22, 6b + { + mtspr INTERRUPT_CRITICAL_SECTION, r24 + shli r25, r25, 1 /* double the backoff; retry the tns */ + } + { + tns r21, ATOMIC_LOCK_REG_NAME + slt r26, r23, r25 /* is the proposed backoff too big? */ + } + { + bzt r21, 1b /* branch if lock acquired */ + mvnz r25, r26, r23 + } + j 5b +#endif + STD_ENDPROC(__atomic\name) + .ifc \bitwidth,32 + .pushsection __ex_table,"a" + .word 1b, __atomic\name + .word 2b, __atomic\name + .word __atomic\name, __atomic_bad_address + .popsection + .endif + .endm + +atomic_op _cmpxchg, 32, "seq r26, r22, r2; { bbns r26, 3f; move r24, r3 }" +atomic_op _xchg, 32, "move r24, r2" +atomic_op _xchg_add, 32, "add r24, r22, r2" +atomic_op _xchg_add_unless, 32, \ + "sne r26, r22, r2; { bbns r26, 3f; add r24, r22, r3 }" +atomic_op _or, 32, "or r24, r22, r2" +atomic_op _andn, 32, "nor r2, r2, zero; and r24, r22, r2" +atomic_op _xor, 32, "xor r24, r22, r2" + +atomic_op 64_cmpxchg, 64, "{ seq r26, r22, r2; seq r27, r23, r3 }; \ + { bbns r26, 3f; move r24, r4 }; { bbns r27, 3f; move r25, r5 }" +atomic_op 64_xchg, 64, "{ move r24, r2; move r25, r3 }" +atomic_op 64_xchg_add, 64, "{ add r24, r22, r2; add r25, r23, r3 }; \ + slt_u r26, r24, r22; add r25, r25, r26" +atomic_op 64_xchg_add_unless, 64, \ + "{ sne r26, r22, r2; sne r27, r23, r3 }; \ + { bbns r26, 3f; add r24, r22, r4 }; \ + { bbns r27, 3f; add r25, r23, r5 }; \ + slt_u r26, r24, r22; add r25, r25, r26" + + jrp lr /* happy backtracer */ + +ENTRY(__end_atomic_asm_code) diff --git a/arch/tile/lib/cacheflush.c b/arch/tile/lib/cacheflush.c new file mode 100644 index 00000000..db4fb89e --- /dev/null +++ b/arch/tile/lib/cacheflush.c @@ -0,0 +1,167 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#include <asm/page.h> +#include <asm/cacheflush.h> +#include <arch/icache.h> +#include <arch/spr_def.h> + + +void __flush_icache_range(unsigned long start, unsigned long end) +{ + invalidate_icache((const void *)start, end - start, PAGE_SIZE); +} + + +/* Force a load instruction to issue. */ +static inline void force_load(char *p) +{ + *(volatile char *)p; +} + +/* + * Flush and invalidate a VA range that is homed remotely on a single + * core (if "!hfh") or homed via hash-for-home (if "hfh"), waiting + * until the memory controller holds the flushed values. + */ +void finv_buffer_remote(void *buffer, size_t size, int hfh) +{ + char *p, *base; + size_t step_size, load_count; + + /* + * On TILEPro the striping granularity is a fixed 8KB; on + * TILE-Gx it is configurable, and we rely on the fact that + * the hypervisor always configures maximum striping, so that + * bits 9 and 10 of the PA are part of the stripe function, so + * every 512 bytes we hit a striping boundary. + * + */ +#ifdef __tilegx__ + const unsigned long STRIPE_WIDTH = 512; +#else + const unsigned long STRIPE_WIDTH = 8192; +#endif + +#ifdef __tilegx__ + /* + * On TILE-Gx, we must disable the dstream prefetcher before doing + * a cache flush; otherwise, we could end up with data in the cache + * that we don't want there. Note that normally we'd do an mf + * after the SPR write to disabling the prefetcher, but we do one + * below, before any further loads, so there's no need to do it + * here. + */ + uint_reg_t old_dstream_pf = __insn_mfspr(SPR_DSTREAM_PF); + __insn_mtspr(SPR_DSTREAM_PF, 0); +#endif + + /* + * Flush and invalidate the buffer out of the local L1/L2 + * and request the home cache to flush and invalidate as well. + */ + __finv_buffer(buffer, size); + + /* + * Wait for the home cache to acknowledge that it has processed + * all the flush-and-invalidate requests. This does not mean + * that the flushed data has reached the memory controller yet, + * but it does mean the home cache is processing the flushes. + */ + __insn_mf(); + + /* + * Issue a load to the last cache line, which can't complete + * until all the previously-issued flushes to the same memory + * controller have also completed. If we weren't striping + * memory, that one load would be sufficient, but since we may + * be, we also need to back up to the last load issued to + * another memory controller, which would be the point where + * we crossed a "striping" boundary (the granularity of striping + * across memory controllers). Keep backing up and doing this + * until we are before the beginning of the buffer, or have + * hit all the controllers. + * + * If we are flushing a hash-for-home buffer, it's even worse. + * Each line may be homed on a different tile, and each tile + * may have up to four lines that are on different + * controllers. So as we walk backwards, we have to touch + * enough cache lines to satisfy these constraints. In + * practice this ends up being close enough to "load from + * every cache line on a full memory stripe on each + * controller" that we simply do that, to simplify the logic. + * + * On TILE-Gx the hash-for-home function is much more complex, + * with the upshot being we can't readily guarantee we have + * hit both entries in the 128-entry AMT that were hit by any + * load in the entire range, so we just re-load them all. + * With larger buffers, we may want to consider using a hypervisor + * trap to issue loads directly to each hash-for-home tile for + * each controller (doing it from Linux would trash the TLB). + */ + if (hfh) { + step_size = L2_CACHE_BYTES; +#ifdef __tilegx__ + load_count = (size + L2_CACHE_BYTES - 1) / L2_CACHE_BYTES; +#else + load_count = (STRIPE_WIDTH / L2_CACHE_BYTES) * + (1 << CHIP_LOG_NUM_MSHIMS()); +#endif + } else { + step_size = STRIPE_WIDTH; + load_count = (1 << CHIP_LOG_NUM_MSHIMS()); + } + + /* Load the last byte of the buffer. */ + p = (char *)buffer + size - 1; + force_load(p); + + /* Bump down to the end of the previous stripe or cache line. */ + p -= step_size; + p = (char *)((unsigned long)p | (step_size - 1)); + + /* Figure out how far back we need to go. */ + base = p - (step_size * (load_count - 2)); + if ((unsigned long)base < (unsigned long)buffer) + base = buffer; + + /* + * Fire all the loads we need. The MAF only has eight entries + * so we can have at most eight outstanding loads, so we + * unroll by that amount. + */ +#pragma unroll 8 + for (; p >= base; p -= step_size) + force_load(p); + + /* + * Repeat, but with inv's instead of loads, to get rid of the + * data we just loaded into our own cache and the old home L3. + * No need to unroll since inv's don't target a register. + */ + p = (char *)buffer + size - 1; + __insn_inv(p); + p -= step_size; + p = (char *)((unsigned long)p | (step_size - 1)); + for (; p >= base; p -= step_size) + __insn_inv(p); + + /* Wait for the load+inv's (and thus finvs) to have completed. */ + __insn_mf(); + +#ifdef __tilegx__ + /* Reenable the prefetcher. */ + __insn_mtspr(SPR_DSTREAM_PF, old_dstream_pf); +#endif +} diff --git a/arch/tile/lib/checksum.c b/arch/tile/lib/checksum.c new file mode 100644 index 00000000..e4bab5bd --- /dev/null +++ b/arch/tile/lib/checksum.c @@ -0,0 +1,102 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * Support code for the main lib/checksum.c. + */ + +#include <net/checksum.h> +#include <linux/module.h> + +static inline unsigned int longto16(unsigned long x) +{ + unsigned long ret; +#ifdef __tilegx__ + ret = __insn_v2sadu(x, 0); + ret = __insn_v2sadu(ret, 0); +#else + ret = __insn_sadh_u(x, 0); + ret = __insn_sadh_u(ret, 0); +#endif + return ret; +} + +__wsum do_csum(const unsigned char *buff, int len) +{ + int odd, count; + unsigned long result = 0; + + if (len <= 0) + goto out; + odd = 1 & (unsigned long) buff; + if (odd) { + result = (*buff << 8); + len--; + buff++; + } + count = len >> 1; /* nr of 16-bit words.. */ + if (count) { + if (2 & (unsigned long) buff) { + result += *(const unsigned short *)buff; + count--; + len -= 2; + buff += 2; + } + count >>= 1; /* nr of 32-bit words.. */ + if (count) { +#ifdef __tilegx__ + if (4 & (unsigned long) buff) { + unsigned int w = *(const unsigned int *)buff; + result = __insn_v2sadau(result, w, 0); + count--; + len -= 4; + buff += 4; + } + count >>= 1; /* nr of 64-bit words.. */ +#endif + + /* + * This algorithm could wrap around for very + * large buffers, but those should be impossible. + */ + BUG_ON(count >= 65530); + + while (count) { + unsigned long w = *(const unsigned long *)buff; + count--; + buff += sizeof(w); +#ifdef __tilegx__ + result = __insn_v2sadau(result, w, 0); +#else + result = __insn_sadah_u(result, w, 0); +#endif + } +#ifdef __tilegx__ + if (len & 4) { + unsigned int w = *(const unsigned int *)buff; + result = __insn_v2sadau(result, w, 0); + buff += 4; + } +#endif + } + if (len & 2) { + result += *(const unsigned short *) buff; + buff += 2; + } + } + if (len & 1) + result += *buff; + result = longto16(result); + if (odd) + result = swab16(result); +out: + return result; +} diff --git a/arch/tile/lib/cpumask.c b/arch/tile/lib/cpumask.c new file mode 100644 index 00000000..fdc40361 --- /dev/null +++ b/arch/tile/lib/cpumask.c @@ -0,0 +1,52 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#include <linux/cpumask.h> +#include <linux/ctype.h> +#include <linux/errno.h> +#include <linux/smp.h> + +/* + * Allow cropping out bits beyond the end of the array. + * Move to "lib" directory if more clients want to use this routine. + */ +int bitmap_parselist_crop(const char *bp, unsigned long *maskp, int nmaskbits) +{ + unsigned a, b; + + bitmap_zero(maskp, nmaskbits); + do { + if (!isdigit(*bp)) + return -EINVAL; + a = simple_strtoul(bp, (char **)&bp, 10); + b = a; + if (*bp == '-') { + bp++; + if (!isdigit(*bp)) + return -EINVAL; + b = simple_strtoul(bp, (char **)&bp, 10); + } + if (!(a <= b)) + return -EINVAL; + if (b >= nmaskbits) + b = nmaskbits-1; + while (a <= b) { + set_bit(a, maskp); + a++; + } + if (*bp == ',') + bp++; + } while (*bp != '\0' && *bp != '\n'); + return 0; +} diff --git a/arch/tile/lib/delay.c b/arch/tile/lib/delay.c new file mode 100644 index 00000000..cdacdd11 --- /dev/null +++ b/arch/tile/lib/delay.c @@ -0,0 +1,45 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#include <linux/module.h> +#include <linux/delay.h> +#include <linux/thread_info.h> +#include <asm/timex.h> + +void __udelay(unsigned long usecs) +{ + if (usecs > ULONG_MAX / 1000) { + WARN_ON_ONCE(usecs > ULONG_MAX / 1000); + usecs = ULONG_MAX / 1000; + } + __ndelay(usecs * 1000); +} +EXPORT_SYMBOL(__udelay); + +void __ndelay(unsigned long nsecs) +{ + cycles_t target = get_cycles(); + target += ns2cycles(nsecs); + while (get_cycles() < target) + cpu_relax(); +} +EXPORT_SYMBOL(__ndelay); + +void __delay(unsigned long cycles) +{ + cycles_t target = get_cycles() + cycles; + while (get_cycles() < target) + cpu_relax(); +} +EXPORT_SYMBOL(__delay); diff --git a/arch/tile/lib/exports.c b/arch/tile/lib/exports.c new file mode 100644 index 00000000..2a81d32d --- /dev/null +++ b/arch/tile/lib/exports.c @@ -0,0 +1,93 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * Exports from assembler code and from libtile-cc. + */ + +#include <linux/module.h> + +/* arch/tile/lib/usercopy.S */ +#include <linux/uaccess.h> +EXPORT_SYMBOL(__get_user_1); +EXPORT_SYMBOL(__get_user_2); +EXPORT_SYMBOL(__get_user_4); +EXPORT_SYMBOL(__get_user_8); +EXPORT_SYMBOL(__put_user_1); +EXPORT_SYMBOL(__put_user_2); +EXPORT_SYMBOL(__put_user_4); +EXPORT_SYMBOL(__put_user_8); +EXPORT_SYMBOL(strnlen_user_asm); +EXPORT_SYMBOL(strncpy_from_user_asm); +EXPORT_SYMBOL(clear_user_asm); +EXPORT_SYMBOL(flush_user_asm); +EXPORT_SYMBOL(inv_user_asm); +EXPORT_SYMBOL(finv_user_asm); + +/* arch/tile/kernel/entry.S */ +#include <linux/kernel.h> +#include <asm/processor.h> +EXPORT_SYMBOL(current_text_addr); +EXPORT_SYMBOL(dump_stack); + +/* arch/tile/kernel/head.S */ +EXPORT_SYMBOL(empty_zero_page); + +/* arch/tile/lib/, various memcpy files */ +EXPORT_SYMBOL(memcpy); +EXPORT_SYMBOL(__copy_to_user_inatomic); +EXPORT_SYMBOL(__copy_from_user_inatomic); +EXPORT_SYMBOL(__copy_from_user_zeroing); +#ifdef __tilegx__ +EXPORT_SYMBOL(__copy_in_user_inatomic); +#endif + +/* hypervisor glue */ +#include <hv/hypervisor.h> +EXPORT_SYMBOL(hv_dev_open); +EXPORT_SYMBOL(hv_dev_pread); +EXPORT_SYMBOL(hv_dev_pwrite); +EXPORT_SYMBOL(hv_dev_preada); +EXPORT_SYMBOL(hv_dev_pwritea); +EXPORT_SYMBOL(hv_dev_poll); +EXPORT_SYMBOL(hv_dev_poll_cancel); +EXPORT_SYMBOL(hv_dev_close); +EXPORT_SYMBOL(hv_sysconf); +EXPORT_SYMBOL(hv_confstr); + +/* libgcc.a */ +uint32_t __udivsi3(uint32_t dividend, uint32_t divisor); +EXPORT_SYMBOL(__udivsi3); +int32_t __divsi3(int32_t dividend, int32_t divisor); +EXPORT_SYMBOL(__divsi3); +uint64_t __udivdi3(uint64_t dividend, uint64_t divisor); +EXPORT_SYMBOL(__udivdi3); +int64_t __divdi3(int64_t dividend, int64_t divisor); +EXPORT_SYMBOL(__divdi3); +uint32_t __umodsi3(uint32_t dividend, uint32_t divisor); +EXPORT_SYMBOL(__umodsi3); +int32_t __modsi3(int32_t dividend, int32_t divisor); +EXPORT_SYMBOL(__modsi3); +uint64_t __umoddi3(uint64_t dividend, uint64_t divisor); +EXPORT_SYMBOL(__umoddi3); +int64_t __moddi3(int64_t dividend, int64_t divisor); +EXPORT_SYMBOL(__moddi3); +#ifndef __tilegx__ +int64_t __muldi3(int64_t, int64_t); +EXPORT_SYMBOL(__muldi3); +uint64_t __lshrdi3(uint64_t, unsigned int); +EXPORT_SYMBOL(__lshrdi3); +uint64_t __ashrdi3(uint64_t, unsigned int); +EXPORT_SYMBOL(__ashrdi3); +uint64_t __ashldi3(uint64_t, unsigned int); +EXPORT_SYMBOL(__ashldi3); +#endif diff --git a/arch/tile/lib/memchr_32.c b/arch/tile/lib/memchr_32.c new file mode 100644 index 00000000..cc3d9bad --- /dev/null +++ b/arch/tile/lib/memchr_32.c @@ -0,0 +1,71 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#include <linux/types.h> +#include <linux/string.h> +#include <linux/module.h> + +void *memchr(const void *s, int c, size_t n) +{ + const uint32_t *last_word_ptr; + const uint32_t *p; + const char *last_byte_ptr; + uintptr_t s_int; + uint32_t goal, before_mask, v, bits; + char *ret; + + if (__builtin_expect(n == 0, 0)) { + /* Don't dereference any memory if the array is empty. */ + return NULL; + } + + /* Get an aligned pointer. */ + s_int = (uintptr_t) s; + p = (const uint32_t *)(s_int & -4); + + /* Create four copies of the byte for which we are looking. */ + goal = 0x01010101 * (uint8_t) c; + + /* Read the first word, but munge it so that bytes before the array + * will not match goal. + * + * Note that this shift count expression works because we know + * shift counts are taken mod 32. + */ + before_mask = (1 << (s_int << 3)) - 1; + v = (*p | before_mask) ^ (goal & before_mask); + + /* Compute the address of the last byte. */ + last_byte_ptr = (const char *)s + n - 1; + + /* Compute the address of the word containing the last byte. */ + last_word_ptr = (const uint32_t *)((uintptr_t) last_byte_ptr & -4); + + while ((bits = __insn_seqb(v, goal)) == 0) { + if (__builtin_expect(p == last_word_ptr, 0)) { + /* We already read the last word in the array, + * so give up. + */ + return NULL; + } + v = *++p; + } + + /* We found a match, but it might be in a byte past the end + * of the array. + */ + ret = ((char *)p) + (__insn_ctz(bits) >> 3); + return (ret <= last_byte_ptr) ? ret : NULL; +} +EXPORT_SYMBOL(memchr); diff --git a/arch/tile/lib/memchr_64.c b/arch/tile/lib/memchr_64.c new file mode 100644 index 00000000..84fdc8d8 --- /dev/null +++ b/arch/tile/lib/memchr_64.c @@ -0,0 +1,71 @@ +/* + * Copyright 2011 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#include <linux/types.h> +#include <linux/string.h> +#include <linux/module.h> + +void *memchr(const void *s, int c, size_t n) +{ + const uint64_t *last_word_ptr; + const uint64_t *p; + const char *last_byte_ptr; + uintptr_t s_int; + uint64_t goal, before_mask, v, bits; + char *ret; + + if (__builtin_expect(n == 0, 0)) { + /* Don't dereference any memory if the array is empty. */ + return NULL; + } + + /* Get an aligned pointer. */ + s_int = (uintptr_t) s; + p = (const uint64_t *)(s_int & -8); + + /* Create eight copies of the byte for which we are looking. */ + goal = 0x0101010101010101ULL * (uint8_t) c; + + /* Read the first word, but munge it so that bytes before the array + * will not match goal. + * + * Note that this shift count expression works because we know + * shift counts are taken mod 64. + */ + before_mask = (1ULL << (s_int << 3)) - 1; + v = (*p | before_mask) ^ (goal & before_mask); + + /* Compute the address of the last byte. */ + last_byte_ptr = (const char *)s + n - 1; + + /* Compute the address of the word containing the last byte. */ + last_word_ptr = (const uint64_t *)((uintptr_t) last_byte_ptr & -8); + + while ((bits = __insn_v1cmpeq(v, goal)) == 0) { + if (__builtin_expect(p == last_word_ptr, 0)) { + /* We already read the last word in the array, + * so give up. + */ + return NULL; + } + v = *++p; + } + + /* We found a match, but it might be in a byte past the end + * of the array. + */ + ret = ((char *)p) + (__insn_ctz(bits) >> 3); + return (ret <= last_byte_ptr) ? ret : NULL; +} +EXPORT_SYMBOL(memchr); diff --git a/arch/tile/lib/memcpy_32.S b/arch/tile/lib/memcpy_32.S new file mode 100644 index 00000000..2a419a61 --- /dev/null +++ b/arch/tile/lib/memcpy_32.S @@ -0,0 +1,618 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#include <arch/chip.h> + + +/* + * This file shares the implementation of the userspace memcpy and + * the kernel's memcpy, copy_to_user and copy_from_user. + */ + +#include <linux/linkage.h> + +/* On TILE64, we wrap these functions via arch/tile/lib/memcpy_tile64.c */ +#if !CHIP_HAS_COHERENT_LOCAL_CACHE() +#define memcpy __memcpy_asm +#define __copy_to_user_inatomic __copy_to_user_inatomic_asm +#define __copy_from_user_inatomic __copy_from_user_inatomic_asm +#define __copy_from_user_zeroing __copy_from_user_zeroing_asm +#endif + +#define IS_MEMCPY 0 +#define IS_COPY_FROM_USER 1 +#define IS_COPY_FROM_USER_ZEROING 2 +#define IS_COPY_TO_USER -1 + + .section .text.memcpy_common, "ax" + .align 64 + +/* Use this to preface each bundle that can cause an exception so + * the kernel can clean up properly. The special cleanup code should + * not use these, since it knows what it is doing. + */ +#define EX \ + .pushsection __ex_table, "a"; \ + .word 9f, memcpy_common_fixup; \ + .popsection; \ + 9 + + +/* __copy_from_user_inatomic takes the kernel target address in r0, + * the user source in r1, and the bytes to copy in r2. + * It returns the number of uncopiable bytes (hopefully zero) in r0. + */ +ENTRY(__copy_from_user_inatomic) +.type __copy_from_user_inatomic, @function + FEEDBACK_ENTER_EXPLICIT(__copy_from_user_inatomic, \ + .text.memcpy_common, \ + .Lend_memcpy_common - __copy_from_user_inatomic) + { movei r29, IS_COPY_FROM_USER; j memcpy_common } + .size __copy_from_user_inatomic, . - __copy_from_user_inatomic + +/* __copy_from_user_zeroing is like __copy_from_user_inatomic, but + * any uncopiable bytes are zeroed in the target. + */ +ENTRY(__copy_from_user_zeroing) +.type __copy_from_user_zeroing, @function + FEEDBACK_REENTER(__copy_from_user_inatomic) + { movei r29, IS_COPY_FROM_USER_ZEROING; j memcpy_common } + .size __copy_from_user_zeroing, . - __copy_from_user_zeroing + +/* __copy_to_user_inatomic takes the user target address in r0, + * the kernel source in r1, and the bytes to copy in r2. + * It returns the number of uncopiable bytes (hopefully zero) in r0. + */ +ENTRY(__copy_to_user_inatomic) +.type __copy_to_user_inatomic, @function + FEEDBACK_REENTER(__copy_from_user_inatomic) + { movei r29, IS_COPY_TO_USER; j memcpy_common } + .size __copy_to_user_inatomic, . - __copy_to_user_inatomic + +ENTRY(memcpy) +.type memcpy, @function + FEEDBACK_REENTER(__copy_from_user_inatomic) + { movei r29, IS_MEMCPY } + .size memcpy, . - memcpy + /* Fall through */ + + .type memcpy_common, @function +memcpy_common: + /* On entry, r29 holds one of the IS_* macro values from above. */ + + + /* r0 is the dest, r1 is the source, r2 is the size. */ + + /* Save aside original dest so we can return it at the end. */ + { sw sp, lr; move r23, r0; or r4, r0, r1 } + + /* Check for an empty size. */ + { bz r2, .Ldone; andi r4, r4, 3 } + + /* Save aside original values in case of a fault. */ + { move r24, r1; move r25, r2 } + move r27, lr + + /* Check for an unaligned source or dest. */ + { bnz r4, .Lcopy_unaligned_maybe_many; addli r4, r2, -256 } + +.Lcheck_aligned_copy_size: + /* If we are copying < 256 bytes, branch to simple case. */ + { blzt r4, .Lcopy_8_check; slti_u r8, r2, 8 } + + /* Copying >= 256 bytes, so jump to complex prefetching loop. */ + { andi r6, r1, 63; j .Lcopy_many } + +/* + * + * Aligned 4 byte at a time copy loop + * + */ + +.Lcopy_8_loop: + /* Copy two words at a time to hide load latency. */ +EX: { lw r3, r1; addi r1, r1, 4; slti_u r8, r2, 16 } +EX: { lw r4, r1; addi r1, r1, 4 } +EX: { sw r0, r3; addi r0, r0, 4; addi r2, r2, -4 } +EX: { sw r0, r4; addi r0, r0, 4; addi r2, r2, -4 } +.Lcopy_8_check: + { bzt r8, .Lcopy_8_loop; slti_u r4, r2, 4 } + + /* Copy odd leftover word, if any. */ + { bnzt r4, .Lcheck_odd_stragglers } +EX: { lw r3, r1; addi r1, r1, 4 } +EX: { sw r0, r3; addi r0, r0, 4; addi r2, r2, -4 } + +.Lcheck_odd_stragglers: + { bnz r2, .Lcopy_unaligned_few } + +.Ldone: + /* For memcpy return original dest address, else zero. */ + { mz r0, r29, r23; jrp lr } + + +/* + * + * Prefetching multiple cache line copy handler (for large transfers). + * + */ + + /* Copy words until r1 is cache-line-aligned. */ +.Lalign_loop: +EX: { lw r3, r1; addi r1, r1, 4 } + { andi r6, r1, 63 } +EX: { sw r0, r3; addi r0, r0, 4; addi r2, r2, -4 } +.Lcopy_many: + { bnzt r6, .Lalign_loop; addi r9, r0, 63 } + + { addi r3, r1, 60; andi r9, r9, -64 } + +#if CHIP_HAS_WH64() + /* No need to prefetch dst, we'll just do the wh64 + * right before we copy a line. + */ +#endif + +EX: { lw r5, r3; addi r3, r3, 64; movei r4, 1 } + /* Intentionally stall for a few cycles to leave L2 cache alone. */ + { bnzt zero, .; move r27, lr } +EX: { lw r6, r3; addi r3, r3, 64 } + /* Intentionally stall for a few cycles to leave L2 cache alone. */ + { bnzt zero, . } +EX: { lw r7, r3; addi r3, r3, 64 } +#if !CHIP_HAS_WH64() + /* Prefetch the dest */ + /* Intentionally stall for a few cycles to leave L2 cache alone. */ + { bnzt zero, . } + /* Use a real load to cause a TLB miss if necessary. We aren't using + * r28, so this should be fine. + */ +EX: { lw r28, r9; addi r9, r9, 64 } + /* Intentionally stall for a few cycles to leave L2 cache alone. */ + { bnzt zero, . } + { prefetch r9; addi r9, r9, 64 } + /* Intentionally stall for a few cycles to leave L2 cache alone. */ + { bnzt zero, . } + { prefetch r9; addi r9, r9, 64 } +#endif + /* Intentionally stall for a few cycles to leave L2 cache alone. */ + { bz zero, .Lbig_loop2 } + + /* On entry to this loop: + * - r0 points to the start of dst line 0 + * - r1 points to start of src line 0 + * - r2 >= (256 - 60), only the first time the loop trips. + * - r3 contains r1 + 128 + 60 [pointer to end of source line 2] + * This is our prefetch address. When we get near the end + * rather than prefetching off the end this is changed to point + * to some "safe" recently loaded address. + * - r5 contains *(r1 + 60) [i.e. last word of source line 0] + * - r6 contains *(r1 + 64 + 60) [i.e. last word of source line 1] + * - r9 contains ((r0 + 63) & -64) + * [start of next dst cache line.] + */ + +.Lbig_loop: + { jal .Lcopy_line2; add r15, r1, r2 } + +.Lbig_loop2: + /* Copy line 0, first stalling until r5 is ready. */ +EX: { move r12, r5; lw r16, r1 } + { bz r4, .Lcopy_8_check; slti_u r8, r2, 8 } + /* Prefetch several lines ahead. */ +EX: { lw r5, r3; addi r3, r3, 64 } + { jal .Lcopy_line } + + /* Copy line 1, first stalling until r6 is ready. */ +EX: { move r12, r6; lw r16, r1 } + { bz r4, .Lcopy_8_check; slti_u r8, r2, 8 } + /* Prefetch several lines ahead. */ +EX: { lw r6, r3; addi r3, r3, 64 } + { jal .Lcopy_line } + + /* Copy line 2, first stalling until r7 is ready. */ +EX: { move r12, r7; lw r16, r1 } + { bz r4, .Lcopy_8_check; slti_u r8, r2, 8 } + /* Prefetch several lines ahead. */ +EX: { lw r7, r3; addi r3, r3, 64 } + /* Use up a caches-busy cycle by jumping back to the top of the + * loop. Might as well get it out of the way now. + */ + { j .Lbig_loop } + + + /* On entry: + * - r0 points to the destination line. + * - r1 points to the source line. + * - r3 is the next prefetch address. + * - r9 holds the last address used for wh64. + * - r12 = WORD_15 + * - r16 = WORD_0. + * - r17 == r1 + 16. + * - r27 holds saved lr to restore. + * + * On exit: + * - r0 is incremented by 64. + * - r1 is incremented by 64, unless that would point to a word + * beyond the end of the source array, in which case it is redirected + * to point to an arbitrary word already in the cache. + * - r2 is decremented by 64. + * - r3 is unchanged, unless it points to a word beyond the + * end of the source array, in which case it is redirected + * to point to an arbitrary word already in the cache. + * Redirecting is OK since if we are that close to the end + * of the array we will not come back to this subroutine + * and use the contents of the prefetched address. + * - r4 is nonzero iff r2 >= 64. + * - r9 is incremented by 64, unless it points beyond the + * end of the last full destination cache line, in which + * case it is redirected to a "safe address" that can be + * clobbered (sp - 64) + * - lr contains the value in r27. + */ + +/* r26 unused */ + +.Lcopy_line: + /* TODO: when r3 goes past the end, we would like to redirect it + * to prefetch the last partial cache line (if any) just once, for the + * benefit of the final cleanup loop. But we don't want to + * prefetch that line more than once, or subsequent prefetches + * will go into the RTF. But then .Lbig_loop should unconditionally + * branch to top of loop to execute final prefetch, and its + * nop should become a conditional branch. + */ + + /* We need two non-memory cycles here to cover the resources + * used by the loads initiated by the caller. + */ + { add r15, r1, r2 } +.Lcopy_line2: + { slt_u r13, r3, r15; addi r17, r1, 16 } + + /* NOTE: this will stall for one cycle as L1 is busy. */ + + /* Fill second L1D line. */ +EX: { lw r17, r17; addi r1, r1, 48; mvz r3, r13, r1 } /* r17 = WORD_4 */ + +#if CHIP_HAS_WH64() + /* Prepare destination line for writing. */ +EX: { wh64 r9; addi r9, r9, 64 } +#else + /* Prefetch dest line */ + { prefetch r9; addi r9, r9, 64 } +#endif + /* Load seven words that are L1D hits to cover wh64 L2 usage. */ + + /* Load the three remaining words from the last L1D line, which + * we know has already filled the L1D. + */ +EX: { lw r4, r1; addi r1, r1, 4; addi r20, r1, 16 } /* r4 = WORD_12 */ +EX: { lw r8, r1; addi r1, r1, 4; slt_u r13, r20, r15 }/* r8 = WORD_13 */ +EX: { lw r11, r1; addi r1, r1, -52; mvz r20, r13, r1 } /* r11 = WORD_14 */ + + /* Load the three remaining words from the first L1D line, first + * stalling until it has filled by "looking at" r16. + */ +EX: { lw r13, r1; addi r1, r1, 4; move zero, r16 } /* r13 = WORD_1 */ +EX: { lw r14, r1; addi r1, r1, 4 } /* r14 = WORD_2 */ +EX: { lw r15, r1; addi r1, r1, 8; addi r10, r0, 60 } /* r15 = WORD_3 */ + + /* Load second word from the second L1D line, first + * stalling until it has filled by "looking at" r17. + */ +EX: { lw r19, r1; addi r1, r1, 4; move zero, r17 } /* r19 = WORD_5 */ + + /* Store last word to the destination line, potentially dirtying it + * for the first time, which keeps the L2 busy for two cycles. + */ +EX: { sw r10, r12 } /* store(WORD_15) */ + + /* Use two L1D hits to cover the sw L2 access above. */ +EX: { lw r10, r1; addi r1, r1, 4 } /* r10 = WORD_6 */ +EX: { lw r12, r1; addi r1, r1, 4 } /* r12 = WORD_7 */ + + /* Fill third L1D line. */ +EX: { lw r18, r1; addi r1, r1, 4 } /* r18 = WORD_8 */ + + /* Store first L1D line. */ +EX: { sw r0, r16; addi r0, r0, 4; add r16, r0, r2 } /* store(WORD_0) */ +EX: { sw r0, r13; addi r0, r0, 4; andi r16, r16, -64 } /* store(WORD_1) */ +EX: { sw r0, r14; addi r0, r0, 4; slt_u r16, r9, r16 } /* store(WORD_2) */ +#if CHIP_HAS_WH64() +EX: { sw r0, r15; addi r0, r0, 4; addi r13, sp, -64 } /* store(WORD_3) */ +#else + /* Back up the r9 to a cache line we are already storing to + * if it gets past the end of the dest vector. Strictly speaking, + * we don't need to back up to the start of a cache line, but it's free + * and tidy, so why not? + */ +EX: { sw r0, r15; addi r0, r0, 4; andi r13, r0, -64 } /* store(WORD_3) */ +#endif + /* Store second L1D line. */ +EX: { sw r0, r17; addi r0, r0, 4; mvz r9, r16, r13 }/* store(WORD_4) */ +EX: { sw r0, r19; addi r0, r0, 4 } /* store(WORD_5) */ +EX: { sw r0, r10; addi r0, r0, 4 } /* store(WORD_6) */ +EX: { sw r0, r12; addi r0, r0, 4 } /* store(WORD_7) */ + +EX: { lw r13, r1; addi r1, r1, 4; move zero, r18 } /* r13 = WORD_9 */ +EX: { lw r14, r1; addi r1, r1, 4 } /* r14 = WORD_10 */ +EX: { lw r15, r1; move r1, r20 } /* r15 = WORD_11 */ + + /* Store third L1D line. */ +EX: { sw r0, r18; addi r0, r0, 4 } /* store(WORD_8) */ +EX: { sw r0, r13; addi r0, r0, 4 } /* store(WORD_9) */ +EX: { sw r0, r14; addi r0, r0, 4 } /* store(WORD_10) */ +EX: { sw r0, r15; addi r0, r0, 4 } /* store(WORD_11) */ + + /* Store rest of fourth L1D line. */ +EX: { sw r0, r4; addi r0, r0, 4 } /* store(WORD_12) */ + { +EX: sw r0, r8 /* store(WORD_13) */ + addi r0, r0, 4 + /* Will r2 be > 64 after we subtract 64 below? */ + shri r4, r2, 7 + } + { +EX: sw r0, r11 /* store(WORD_14) */ + addi r0, r0, 8 + /* Record 64 bytes successfully copied. */ + addi r2, r2, -64 + } + + { jrp lr; move lr, r27 } + + /* Convey to the backtrace library that the stack frame is size + * zero, and the real return address is on the stack rather than + * in 'lr'. + */ + { info 8 } + + .align 64 +.Lcopy_unaligned_maybe_many: + /* Skip the setup overhead if we aren't copying many bytes. */ + { slti_u r8, r2, 20; sub r4, zero, r0 } + { bnzt r8, .Lcopy_unaligned_few; andi r4, r4, 3 } + { bz r4, .Ldest_is_word_aligned; add r18, r1, r2 } + +/* + * + * unaligned 4 byte at a time copy handler. + * + */ + + /* Copy single bytes until r0 == 0 mod 4, so we can store words. */ +.Lalign_dest_loop: +EX: { lb_u r3, r1; addi r1, r1, 1; addi r4, r4, -1 } +EX: { sb r0, r3; addi r0, r0, 1; addi r2, r2, -1 } + { bnzt r4, .Lalign_dest_loop; andi r3, r1, 3 } + + /* If source and dest are now *both* aligned, do an aligned copy. */ + { bz r3, .Lcheck_aligned_copy_size; addli r4, r2, -256 } + +.Ldest_is_word_aligned: + +#if CHIP_HAS_DWORD_ALIGN() +EX: { andi r8, r0, 63; lwadd_na r6, r1, 4} + { slti_u r9, r2, 64; bz r8, .Ldest_is_L2_line_aligned } + + /* This copies unaligned words until either there are fewer + * than 4 bytes left to copy, or until the destination pointer + * is cache-aligned, whichever comes first. + * + * On entry: + * - r0 is the next store address. + * - r1 points 4 bytes past the load address corresponding to r0. + * - r2 >= 4 + * - r6 is the next aligned word loaded. + */ +.Lcopy_unaligned_src_words: +EX: { lwadd_na r7, r1, 4; slti_u r8, r2, 4 + 4 } + /* stall */ + { dword_align r6, r7, r1; slti_u r9, r2, 64 + 4 } +EX: { swadd r0, r6, 4; addi r2, r2, -4 } + { bnz r8, .Lcleanup_unaligned_words; andi r8, r0, 63 } + { bnzt r8, .Lcopy_unaligned_src_words; move r6, r7 } + + /* On entry: + * - r0 is the next store address. + * - r1 points 4 bytes past the load address corresponding to r0. + * - r2 >= 4 (# of bytes left to store). + * - r6 is the next aligned src word value. + * - r9 = (r2 < 64U). + * - r18 points one byte past the end of source memory. + */ +.Ldest_is_L2_line_aligned: + + { + /* Not a full cache line remains. */ + bnz r9, .Lcleanup_unaligned_words + move r7, r6 + } + + /* r2 >= 64 */ + + /* Kick off two prefetches, but don't go past the end. */ + { addi r3, r1, 63 - 4; addi r8, r1, 64 + 63 - 4 } + { prefetch r3; move r3, r8; slt_u r8, r8, r18 } + { mvz r3, r8, r1; addi r8, r3, 64 } + { prefetch r3; move r3, r8; slt_u r8, r8, r18 } + { mvz r3, r8, r1; movei r17, 0 } + +.Lcopy_unaligned_line: + /* Prefetch another line. */ + { prefetch r3; addi r15, r1, 60; addi r3, r3, 64 } + /* Fire off a load of the last word we are about to copy. */ +EX: { lw_na r15, r15; slt_u r8, r3, r18 } + +EX: { mvz r3, r8, r1; wh64 r0 } + + /* This loop runs twice. + * + * On entry: + * - r17 is even before the first iteration, and odd before + * the second. It is incremented inside the loop. Encountering + * an even value at the end of the loop makes it stop. + */ +.Lcopy_half_an_unaligned_line: +EX: { + /* Stall until the last byte is ready. In the steady state this + * guarantees all words to load below will be in the L2 cache, which + * avoids shunting the loads to the RTF. + */ + move zero, r15 + lwadd_na r7, r1, 16 + } +EX: { lwadd_na r11, r1, 12 } +EX: { lwadd_na r14, r1, -24 } +EX: { lwadd_na r8, r1, 4 } +EX: { lwadd_na r9, r1, 4 } +EX: { + lwadd_na r10, r1, 8 + /* r16 = (r2 < 64), after we subtract 32 from r2 below. */ + slti_u r16, r2, 64 + 32 + } +EX: { lwadd_na r12, r1, 4; addi r17, r17, 1 } +EX: { lwadd_na r13, r1, 8; dword_align r6, r7, r1 } +EX: { swadd r0, r6, 4; dword_align r7, r8, r1 } +EX: { swadd r0, r7, 4; dword_align r8, r9, r1 } +EX: { swadd r0, r8, 4; dword_align r9, r10, r1 } +EX: { swadd r0, r9, 4; dword_align r10, r11, r1 } +EX: { swadd r0, r10, 4; dword_align r11, r12, r1 } +EX: { swadd r0, r11, 4; dword_align r12, r13, r1 } +EX: { swadd r0, r12, 4; dword_align r13, r14, r1 } +EX: { swadd r0, r13, 4; addi r2, r2, -32 } + { move r6, r14; bbst r17, .Lcopy_half_an_unaligned_line } + + { bzt r16, .Lcopy_unaligned_line; move r7, r6 } + + /* On entry: + * - r0 is the next store address. + * - r1 points 4 bytes past the load address corresponding to r0. + * - r2 >= 0 (# of bytes left to store). + * - r7 is the next aligned src word value. + */ +.Lcleanup_unaligned_words: + /* Handle any trailing bytes. */ + { bz r2, .Lcopy_unaligned_done; slti_u r8, r2, 4 } + { bzt r8, .Lcopy_unaligned_src_words; move r6, r7 } + + /* Move r1 back to the point where it corresponds to r0. */ + { addi r1, r1, -4 } + +#else /* !CHIP_HAS_DWORD_ALIGN() */ + + /* Compute right/left shift counts and load initial source words. */ + { andi r5, r1, -4; andi r3, r1, 3 } +EX: { lw r6, r5; addi r5, r5, 4; shli r3, r3, 3 } +EX: { lw r7, r5; addi r5, r5, 4; sub r4, zero, r3 } + + /* Load and store one word at a time, using shifts and ORs + * to correct for the misaligned src. + */ +.Lcopy_unaligned_src_loop: + { shr r6, r6, r3; shl r8, r7, r4 } +EX: { lw r7, r5; or r8, r8, r6; move r6, r7 } +EX: { sw r0, r8; addi r0, r0, 4; addi r2, r2, -4 } + { addi r5, r5, 4; slti_u r8, r2, 8 } + { bzt r8, .Lcopy_unaligned_src_loop; addi r1, r1, 4 } + + { bz r2, .Lcopy_unaligned_done } +#endif /* !CHIP_HAS_DWORD_ALIGN() */ + + /* Fall through */ + +/* + * + * 1 byte at a time copy handler. + * + */ + +.Lcopy_unaligned_few: +EX: { lb_u r3, r1; addi r1, r1, 1 } +EX: { sb r0, r3; addi r0, r0, 1; addi r2, r2, -1 } + { bnzt r2, .Lcopy_unaligned_few } + +.Lcopy_unaligned_done: + + /* For memcpy return original dest address, else zero. */ + { mz r0, r29, r23; jrp lr } + +.Lend_memcpy_common: + .size memcpy_common, .Lend_memcpy_common - memcpy_common + + .section .fixup,"ax" +memcpy_common_fixup: + .type memcpy_common_fixup, @function + + /* Skip any bytes we already successfully copied. + * r2 (num remaining) is correct, but r0 (dst) and r1 (src) + * may not be quite right because of unrolling and prefetching. + * So we need to recompute their values as the address just + * after the last byte we are sure was successfully loaded and + * then stored. + */ + + /* Determine how many bytes we successfully copied. */ + { sub r3, r25, r2 } + + /* Add this to the original r0 and r1 to get their new values. */ + { add r0, r23, r3; add r1, r24, r3 } + + { bzt r29, memcpy_fixup_loop } + { blzt r29, copy_to_user_fixup_loop } + +copy_from_user_fixup_loop: + /* Try copying the rest one byte at a time, expecting a load fault. */ +.Lcfu: { lb_u r3, r1; addi r1, r1, 1 } + { sb r0, r3; addi r0, r0, 1; addi r2, r2, -1 } + { bnzt r2, copy_from_user_fixup_loop } + +.Lcopy_from_user_fixup_zero_remainder: + { bbs r29, 2f } /* low bit set means IS_COPY_FROM_USER */ + /* byte-at-a-time loop faulted, so zero the rest. */ + { move r3, r2; bz r2, 2f /* should be impossible, but handle it. */ } +1: { sb r0, zero; addi r0, r0, 1; addi r3, r3, -1 } + { bnzt r3, 1b } +2: move lr, r27 + { move r0, r2; jrp lr } + +copy_to_user_fixup_loop: + /* Try copying the rest one byte at a time, expecting a store fault. */ + { lb_u r3, r1; addi r1, r1, 1 } +.Lctu: { sb r0, r3; addi r0, r0, 1; addi r2, r2, -1 } + { bnzt r2, copy_to_user_fixup_loop } +.Lcopy_to_user_fixup_done: + move lr, r27 + { move r0, r2; jrp lr } + +memcpy_fixup_loop: + /* Try copying the rest one byte at a time. We expect a disastrous + * fault to happen since we are in fixup code, but let it happen. + */ + { lb_u r3, r1; addi r1, r1, 1 } + { sb r0, r3; addi r0, r0, 1; addi r2, r2, -1 } + { bnzt r2, memcpy_fixup_loop } + /* This should be unreachable, we should have faulted again. + * But be paranoid and handle it in case some interrupt changed + * the TLB or something. + */ + move lr, r27 + { move r0, r23; jrp lr } + + .size memcpy_common_fixup, . - memcpy_common_fixup + + .section __ex_table,"a" + .word .Lcfu, .Lcopy_from_user_fixup_zero_remainder + .word .Lctu, .Lcopy_to_user_fixup_done diff --git a/arch/tile/lib/memcpy_64.c b/arch/tile/lib/memcpy_64.c new file mode 100644 index 00000000..3fab9a6a --- /dev/null +++ b/arch/tile/lib/memcpy_64.c @@ -0,0 +1,220 @@ +/* + * Copyright 2011 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#include <linux/types.h> +#include <linux/string.h> +#include <linux/module.h> +#define __memcpy memcpy +/* EXPORT_SYMBOL() is in arch/tile/lib/exports.c since this should be asm. */ + +/* Must be 8 bytes in size. */ +#define word_t uint64_t + +#if CHIP_L2_LINE_SIZE() != 64 && CHIP_L2_LINE_SIZE() != 128 +#error "Assumes 64 or 128 byte line size" +#endif + +/* How many cache lines ahead should we prefetch? */ +#define PREFETCH_LINES_AHEAD 3 + +/* + * Provide "base versions" of load and store for the normal code path. + * The kernel provides other versions for userspace copies. + */ +#define ST(p, v) (*(p) = (v)) +#define LD(p) (*(p)) + +#ifndef USERCOPY_FUNC +#define ST1 ST +#define ST2 ST +#define ST4 ST +#define ST8 ST +#define LD1 LD +#define LD2 LD +#define LD4 LD +#define LD8 LD +#define RETVAL dstv +void *memcpy(void *__restrict dstv, const void *__restrict srcv, size_t n) +#else +/* + * Special kernel version will provide implementation of the LDn/STn + * macros to return a count of uncopied bytes due to mm fault. + */ +#define RETVAL 0 +int USERCOPY_FUNC(void *__restrict dstv, const void *__restrict srcv, size_t n) +#endif +{ + char *__restrict dst1 = (char *)dstv; + const char *__restrict src1 = (const char *)srcv; + const char *__restrict src1_end; + const char *__restrict prefetch; + word_t *__restrict dst8; /* 8-byte pointer to destination memory. */ + word_t final; /* Final bytes to write to trailing word, if any */ + long i; + + if (n < 16) { + for (; n; n--) + ST1(dst1++, LD1(src1++)); + return RETVAL; + } + + /* + * Locate the end of source memory we will copy. Don't + * prefetch past this. + */ + src1_end = src1 + n - 1; + + /* Prefetch ahead a few cache lines, but not past the end. */ + prefetch = src1; + for (i = 0; i < PREFETCH_LINES_AHEAD; i++) { + __insn_prefetch(prefetch); + prefetch += CHIP_L2_LINE_SIZE(); + prefetch = (prefetch > src1_end) ? prefetch : src1; + } + + /* Copy bytes until dst is word-aligned. */ + for (; (uintptr_t)dst1 & (sizeof(word_t) - 1); n--) + ST1(dst1++, LD1(src1++)); + + /* 8-byte pointer to destination memory. */ + dst8 = (word_t *)dst1; + + if (__builtin_expect((uintptr_t)src1 & (sizeof(word_t) - 1), 0)) { + /* + * Misaligned copy. Copy 8 bytes at a time, but don't + * bother with other fanciness. + * + * TODO: Consider prefetching and using wh64 as well. + */ + + /* Create an aligned src8. */ + const word_t *__restrict src8 = + (const word_t *)((uintptr_t)src1 & -sizeof(word_t)); + word_t b; + + word_t a = LD8(src8++); + for (; n >= sizeof(word_t); n -= sizeof(word_t)) { + b = LD8(src8++); + a = __insn_dblalign(a, b, src1); + ST8(dst8++, a); + a = b; + } + + if (n == 0) + return RETVAL; + + b = ((const char *)src8 <= src1_end) ? *src8 : 0; + + /* + * Final source bytes to write to trailing partial + * word, if any. + */ + final = __insn_dblalign(a, b, src1); + } else { + /* Aligned copy. */ + + const word_t* __restrict src8 = (const word_t *)src1; + + /* src8 and dst8 are both word-aligned. */ + if (n >= CHIP_L2_LINE_SIZE()) { + /* Copy until 'dst' is cache-line-aligned. */ + for (; (uintptr_t)dst8 & (CHIP_L2_LINE_SIZE() - 1); + n -= sizeof(word_t)) + ST8(dst8++, LD8(src8++)); + + for (; n >= CHIP_L2_LINE_SIZE(); ) { + __insn_wh64(dst8); + + /* + * Prefetch and advance to next line + * to prefetch, but don't go past the end + */ + __insn_prefetch(prefetch); + prefetch += CHIP_L2_LINE_SIZE(); + prefetch = (prefetch > src1_end) ? prefetch : + (const char *)src8; + + /* + * Copy an entire cache line. Manually + * unrolled to avoid idiosyncracies of + * compiler unrolling. + */ +#define COPY_WORD(offset) ({ ST8(dst8+offset, LD8(src8+offset)); n -= 8; }) + COPY_WORD(0); + COPY_WORD(1); + COPY_WORD(2); + COPY_WORD(3); + COPY_WORD(4); + COPY_WORD(5); + COPY_WORD(6); + COPY_WORD(7); +#if CHIP_L2_LINE_SIZE() == 128 + COPY_WORD(8); + COPY_WORD(9); + COPY_WORD(10); + COPY_WORD(11); + COPY_WORD(12); + COPY_WORD(13); + COPY_WORD(14); + COPY_WORD(15); +#elif CHIP_L2_LINE_SIZE() != 64 +# error Fix code that assumes particular L2 cache line sizes +#endif + + dst8 += CHIP_L2_LINE_SIZE() / sizeof(word_t); + src8 += CHIP_L2_LINE_SIZE() / sizeof(word_t); + } + } + + for (; n >= sizeof(word_t); n -= sizeof(word_t)) + ST8(dst8++, LD8(src8++)); + + if (__builtin_expect(n == 0, 1)) + return RETVAL; + + final = LD8(src8); + } + + /* n != 0 if we get here. Write out any trailing bytes. */ + dst1 = (char *)dst8; + if (n & 4) { + ST4((uint32_t *)dst1, final); + dst1 += 4; + final >>= 32; + n &= 3; + } + if (n & 2) { + ST2((uint16_t *)dst1, final); + dst1 += 2; + final >>= 16; + n &= 1; + } + if (n) + ST1((uint8_t *)dst1, final); + + return RETVAL; +} + + +#ifdef USERCOPY_FUNC +#undef ST1 +#undef ST2 +#undef ST4 +#undef ST8 +#undef LD1 +#undef LD2 +#undef LD4 +#undef LD8 +#undef USERCOPY_FUNC +#endif diff --git a/arch/tile/lib/memcpy_tile64.c b/arch/tile/lib/memcpy_tile64.c new file mode 100644 index 00000000..b2fe15e0 --- /dev/null +++ b/arch/tile/lib/memcpy_tile64.c @@ -0,0 +1,276 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#include <linux/string.h> +#include <linux/smp.h> +#include <linux/module.h> +#include <linux/uaccess.h> +#include <asm/fixmap.h> +#include <asm/kmap_types.h> +#include <asm/tlbflush.h> +#include <hv/hypervisor.h> +#include <arch/chip.h> + + +#if !CHIP_HAS_COHERENT_LOCAL_CACHE() + +/* Defined in memcpy.S */ +extern unsigned long __memcpy_asm(void *to, const void *from, unsigned long n); +extern unsigned long __copy_to_user_inatomic_asm( + void __user *to, const void *from, unsigned long n); +extern unsigned long __copy_from_user_inatomic_asm( + void *to, const void __user *from, unsigned long n); +extern unsigned long __copy_from_user_zeroing_asm( + void *to, const void __user *from, unsigned long n); + +typedef unsigned long (*memcpy_t)(void *, const void *, unsigned long); + +/* Size above which to consider TLB games for performance */ +#define LARGE_COPY_CUTOFF 2048 + +/* Communicate to the simulator what we are trying to do. */ +#define sim_allow_multiple_caching(b) \ + __insn_mtspr(SPR_SIM_CONTROL, \ + SIM_CONTROL_ALLOW_MULTIPLE_CACHING | ((b) << _SIM_CONTROL_OPERATOR_BITS)) + +/* + * Copy memory by briefly enabling incoherent cacheline-at-a-time mode. + * + * We set up our own source and destination PTEs that we fully control. + * This is the only way to guarantee that we don't race with another + * thread that is modifying the PTE; we can't afford to try the + * copy_{to,from}_user() technique of catching the interrupt, since + * we must run with interrupts disabled to avoid the risk of some + * other code seeing the incoherent data in our cache. (Recall that + * our cache is indexed by PA, so even if the other code doesn't use + * our kmap_atomic virtual addresses, they'll still hit in cache using + * the normal VAs that aren't supposed to hit in cache.) + */ +static void memcpy_multicache(void *dest, const void *source, + pte_t dst_pte, pte_t src_pte, int len) +{ + int idx; + unsigned long flags, newsrc, newdst; + pmd_t *pmdp; + pte_t *ptep; + int type0, type1; + int cpu = get_cpu(); + + /* + * Disable interrupts so that we don't recurse into memcpy() + * in an interrupt handler, nor accidentally reference + * the PA of the source from an interrupt routine. Also + * notify the simulator that we're playing games so we don't + * generate spurious coherency warnings. + */ + local_irq_save(flags); + sim_allow_multiple_caching(1); + + /* Set up the new dest mapping */ + type0 = kmap_atomic_idx_push(); + idx = FIX_KMAP_BEGIN + (KM_TYPE_NR * cpu) + type0; + newdst = __fix_to_virt(idx) + ((unsigned long)dest & (PAGE_SIZE-1)); + pmdp = pmd_offset(pud_offset(pgd_offset_k(newdst), newdst), newdst); + ptep = pte_offset_kernel(pmdp, newdst); + if (pte_val(*ptep) != pte_val(dst_pte)) { + set_pte(ptep, dst_pte); + local_flush_tlb_page(NULL, newdst, PAGE_SIZE); + } + + /* Set up the new source mapping */ + type1 = kmap_atomic_idx_push(); + idx += (type0 - type1); + src_pte = hv_pte_set_nc(src_pte); + src_pte = hv_pte_clear_writable(src_pte); /* be paranoid */ + newsrc = __fix_to_virt(idx) + ((unsigned long)source & (PAGE_SIZE-1)); + pmdp = pmd_offset(pud_offset(pgd_offset_k(newsrc), newsrc), newsrc); + ptep = pte_offset_kernel(pmdp, newsrc); + __set_pte(ptep, src_pte); /* set_pte() would be confused by this */ + local_flush_tlb_page(NULL, newsrc, PAGE_SIZE); + + /* Actually move the data. */ + __memcpy_asm((void *)newdst, (const void *)newsrc, len); + + /* + * Remap the source as locally-cached and not OLOC'ed so that + * we can inval without also invaling the remote cpu's cache. + * This also avoids known errata with inv'ing cacheable oloc data. + */ + src_pte = hv_pte_set_mode(src_pte, HV_PTE_MODE_CACHE_NO_L3); + src_pte = hv_pte_set_writable(src_pte); /* need write access for inv */ + __set_pte(ptep, src_pte); /* set_pte() would be confused by this */ + local_flush_tlb_page(NULL, newsrc, PAGE_SIZE); + + /* + * Do the actual invalidation, covering the full L2 cache line + * at the end since __memcpy_asm() is somewhat aggressive. + */ + __inv_buffer((void *)newsrc, len); + + /* + * We're done: notify the simulator that all is back to normal, + * and re-enable interrupts and pre-emption. + */ + kmap_atomic_idx_pop(); + kmap_atomic_idx_pop(); + sim_allow_multiple_caching(0); + local_irq_restore(flags); + put_cpu(); +} + +/* + * Identify large copies from remotely-cached memory, and copy them + * via memcpy_multicache() if they look good, otherwise fall back + * to the particular kind of copying passed as the memcpy_t function. + */ +static unsigned long fast_copy(void *dest, const void *source, int len, + memcpy_t func) +{ + /* + * Check if it's big enough to bother with. We may end up doing a + * small copy via TLB manipulation if we're near a page boundary, + * but presumably we'll make it up when we hit the second page. + */ + while (len >= LARGE_COPY_CUTOFF) { + int copy_size, bytes_left_on_page; + pte_t *src_ptep, *dst_ptep; + pte_t src_pte, dst_pte; + struct page *src_page, *dst_page; + + /* Is the source page oloc'ed to a remote cpu? */ +retry_source: + src_ptep = virt_to_pte(current->mm, (unsigned long)source); + if (src_ptep == NULL) + break; + src_pte = *src_ptep; + if (!hv_pte_get_present(src_pte) || + !hv_pte_get_readable(src_pte) || + hv_pte_get_mode(src_pte) != HV_PTE_MODE_CACHE_TILE_L3) + break; + if (get_remote_cache_cpu(src_pte) == smp_processor_id()) + break; + src_page = pfn_to_page(hv_pte_get_pfn(src_pte)); + get_page(src_page); + if (pte_val(src_pte) != pte_val(*src_ptep)) { + put_page(src_page); + goto retry_source; + } + if (pte_huge(src_pte)) { + /* Adjust the PTE to correspond to a small page */ + int pfn = hv_pte_get_pfn(src_pte); + pfn += (((unsigned long)source & (HPAGE_SIZE-1)) + >> PAGE_SHIFT); + src_pte = pfn_pte(pfn, src_pte); + src_pte = pte_mksmall(src_pte); + } + + /* Is the destination page writable? */ +retry_dest: + dst_ptep = virt_to_pte(current->mm, (unsigned long)dest); + if (dst_ptep == NULL) { + put_page(src_page); + break; + } + dst_pte = *dst_ptep; + if (!hv_pte_get_present(dst_pte) || + !hv_pte_get_writable(dst_pte)) { + put_page(src_page); + break; + } + dst_page = pfn_to_page(hv_pte_get_pfn(dst_pte)); + if (dst_page == src_page) { + /* + * Source and dest are on the same page; this + * potentially exposes us to incoherence if any + * part of src and dest overlap on a cache line. + * Just give up rather than trying to be precise. + */ + put_page(src_page); + break; + } + get_page(dst_page); + if (pte_val(dst_pte) != pte_val(*dst_ptep)) { + put_page(dst_page); + goto retry_dest; + } + if (pte_huge(dst_pte)) { + /* Adjust the PTE to correspond to a small page */ + int pfn = hv_pte_get_pfn(dst_pte); + pfn += (((unsigned long)dest & (HPAGE_SIZE-1)) + >> PAGE_SHIFT); + dst_pte = pfn_pte(pfn, dst_pte); + dst_pte = pte_mksmall(dst_pte); + } + + /* All looks good: create a cachable PTE and copy from it */ + copy_size = len; + bytes_left_on_page = + PAGE_SIZE - (((int)source) & (PAGE_SIZE-1)); + if (copy_size > bytes_left_on_page) + copy_size = bytes_left_on_page; + bytes_left_on_page = + PAGE_SIZE - (((int)dest) & (PAGE_SIZE-1)); + if (copy_size > bytes_left_on_page) + copy_size = bytes_left_on_page; + memcpy_multicache(dest, source, dst_pte, src_pte, copy_size); + + /* Release the pages */ + put_page(dst_page); + put_page(src_page); + + /* Continue on the next page */ + dest += copy_size; + source += copy_size; + len -= copy_size; + } + + return func(dest, source, len); +} + +void *memcpy(void *to, const void *from, __kernel_size_t n) +{ + if (n < LARGE_COPY_CUTOFF) + return (void *)__memcpy_asm(to, from, n); + else + return (void *)fast_copy(to, from, n, __memcpy_asm); +} + +unsigned long __copy_to_user_inatomic(void __user *to, const void *from, + unsigned long n) +{ + if (n < LARGE_COPY_CUTOFF) + return __copy_to_user_inatomic_asm(to, from, n); + else + return fast_copy(to, from, n, __copy_to_user_inatomic_asm); +} + +unsigned long __copy_from_user_inatomic(void *to, const void __user *from, + unsigned long n) +{ + if (n < LARGE_COPY_CUTOFF) + return __copy_from_user_inatomic_asm(to, from, n); + else + return fast_copy(to, from, n, __copy_from_user_inatomic_asm); +} + +unsigned long __copy_from_user_zeroing(void *to, const void __user *from, + unsigned long n) +{ + if (n < LARGE_COPY_CUTOFF) + return __copy_from_user_zeroing_asm(to, from, n); + else + return fast_copy(to, from, n, __copy_from_user_zeroing_asm); +} + +#endif /* !CHIP_HAS_COHERENT_LOCAL_CACHE() */ diff --git a/arch/tile/lib/memcpy_user_64.c b/arch/tile/lib/memcpy_user_64.c new file mode 100644 index 00000000..37440caa --- /dev/null +++ b/arch/tile/lib/memcpy_user_64.c @@ -0,0 +1,92 @@ +/* + * Copyright 2011 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * Do memcpy(), but trap and return "n" when a load or store faults. + * + * Note: this idiom only works when memcpy() compiles to a leaf function. + * Here leaf function not only means it does not have calls, but also + * requires no stack operations (sp, stack frame pointer) and no + * use of callee-saved registers, else "jrp lr" will be incorrect since + * unwinding stack frame is bypassed. Since memcpy() is not complex so + * these conditions are satisfied here, but we need to be careful when + * modifying this file. This is not a clean solution but is the best + * one so far. + * + * Also note that we are capturing "n" from the containing scope here. + */ + +#define _ST(p, inst, v) \ + ({ \ + asm("1: " #inst " %0, %1;" \ + ".pushsection .coldtext.memcpy,\"ax\";" \ + "2: { move r0, %2; jrp lr };" \ + ".section __ex_table,\"a\";" \ + ".quad 1b, 2b;" \ + ".popsection" \ + : "=m" (*(p)) : "r" (v), "r" (n)); \ + }) + +#define _LD(p, inst) \ + ({ \ + unsigned long __v; \ + asm("1: " #inst " %0, %1;" \ + ".pushsection .coldtext.memcpy,\"ax\";" \ + "2: { move r0, %2; jrp lr };" \ + ".section __ex_table,\"a\";" \ + ".quad 1b, 2b;" \ + ".popsection" \ + : "=r" (__v) : "m" (*(p)), "r" (n)); \ + __v; \ + }) + +#define USERCOPY_FUNC __copy_to_user_inatomic +#define ST1(p, v) _ST((p), st1, (v)) +#define ST2(p, v) _ST((p), st2, (v)) +#define ST4(p, v) _ST((p), st4, (v)) +#define ST8(p, v) _ST((p), st, (v)) +#define LD1 LD +#define LD2 LD +#define LD4 LD +#define LD8 LD +#include "memcpy_64.c" + +#define USERCOPY_FUNC __copy_from_user_inatomic +#define ST1 ST +#define ST2 ST +#define ST4 ST +#define ST8 ST +#define LD1(p) _LD((p), ld1u) +#define LD2(p) _LD((p), ld2u) +#define LD4(p) _LD((p), ld4u) +#define LD8(p) _LD((p), ld) +#include "memcpy_64.c" + +#define USERCOPY_FUNC __copy_in_user_inatomic +#define ST1(p, v) _ST((p), st1, (v)) +#define ST2(p, v) _ST((p), st2, (v)) +#define ST4(p, v) _ST((p), st4, (v)) +#define ST8(p, v) _ST((p), st, (v)) +#define LD1(p) _LD((p), ld1u) +#define LD2(p) _LD((p), ld2u) +#define LD4(p) _LD((p), ld4u) +#define LD8(p) _LD((p), ld) +#include "memcpy_64.c" + +unsigned long __copy_from_user_zeroing(void *to, const void __user *from, + unsigned long n) +{ + unsigned long rc = __copy_from_user_inatomic(to, from, n); + if (unlikely(rc)) + memset(to + n - rc, 0, rc); + return rc; +} diff --git a/arch/tile/lib/memmove.c b/arch/tile/lib/memmove.c new file mode 100644 index 00000000..fd615ae6 --- /dev/null +++ b/arch/tile/lib/memmove.c @@ -0,0 +1,63 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#include <linux/types.h> +#include <linux/string.h> +#include <linux/module.h> + +void *memmove(void *dest, const void *src, size_t n) +{ + if ((const char *)src >= (char *)dest + n + || (char *)dest >= (const char *)src + n) { + /* We found no overlap, so let memcpy do all the heavy + * lifting (prefetching, etc.) + */ + return memcpy(dest, src, n); + } + + if (n != 0) { + const uint8_t *in; + uint8_t x; + uint8_t *out; + int stride; + + if (src < dest) { + /* copy backwards */ + in = (const uint8_t *)src + n - 1; + out = (uint8_t *)dest + n - 1; + stride = -1; + } else { + /* copy forwards */ + in = (const uint8_t *)src; + out = (uint8_t *)dest; + stride = 1; + } + + /* Manually software-pipeline this loop. */ + x = *in; + in += stride; + + while (--n != 0) { + *out = x; + out += stride; + x = *in; + in += stride; + } + + *out = x; + } + + return dest; +} +EXPORT_SYMBOL(memmove); diff --git a/arch/tile/lib/memset_32.c b/arch/tile/lib/memset_32.c new file mode 100644 index 00000000..57dbb3a5 --- /dev/null +++ b/arch/tile/lib/memset_32.c @@ -0,0 +1,251 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#include <arch/chip.h> + +#include <linux/types.h> +#include <linux/string.h> +#include <linux/module.h> + +#undef memset + +void *memset(void *s, int c, size_t n) +{ + uint32_t *out32; + int n32; + uint32_t v16, v32; + uint8_t *out8 = s; +#if !CHIP_HAS_WH64() + int ahead32; +#else + int to_align32; +#endif + + /* Experimentation shows that a trivial tight loop is a win up until + * around a size of 20, where writing a word at a time starts to win. + */ +#define BYTE_CUTOFF 20 + +#if BYTE_CUTOFF < 3 + /* This must be at least at least this big, or some code later + * on doesn't work. + */ +#error "BYTE_CUTOFF is too small" +#endif + + if (n < BYTE_CUTOFF) { + /* Strangely, this turns out to be the tightest way to + * write this loop. + */ + if (n != 0) { + do { + /* Strangely, combining these into one line + * performs worse. + */ + *out8 = c; + out8++; + } while (--n != 0); + } + + return s; + } + +#if !CHIP_HAS_WH64() + /* Use a spare issue slot to start prefetching the first cache + * line early. This instruction is free as the store can be buried + * in otherwise idle issue slots doing ALU ops. + */ + __insn_prefetch(out8); + + /* We prefetch the end so that a short memset that spans two cache + * lines gets some prefetching benefit. Again we believe this is free + * to issue. + */ + __insn_prefetch(&out8[n - 1]); +#endif /* !CHIP_HAS_WH64() */ + + + /* Align 'out8'. We know n >= 3 so this won't write past the end. */ + while (((uintptr_t) out8 & 3) != 0) { + *out8++ = c; + --n; + } + + /* Align 'n'. */ + while (n & 3) + out8[--n] = c; + + out32 = (uint32_t *) out8; + n32 = n >> 2; + + /* Tile input byte out to 32 bits. */ + v16 = __insn_intlb(c, c); + v32 = __insn_intlh(v16, v16); + + /* This must be at least 8 or the following loop doesn't work. */ +#define CACHE_LINE_SIZE_IN_WORDS (CHIP_L2_LINE_SIZE() / 4) + +#if !CHIP_HAS_WH64() + + ahead32 = CACHE_LINE_SIZE_IN_WORDS; + + /* We already prefetched the first and last cache lines, so + * we only need to do more prefetching if we are storing + * to more than two cache lines. + */ + if (n32 > CACHE_LINE_SIZE_IN_WORDS * 2) { + int i; + + /* Prefetch the next several cache lines. + * This is the setup code for the software-pipelined + * loop below. + */ +#define MAX_PREFETCH 5 + ahead32 = n32 & -CACHE_LINE_SIZE_IN_WORDS; + if (ahead32 > MAX_PREFETCH * CACHE_LINE_SIZE_IN_WORDS) + ahead32 = MAX_PREFETCH * CACHE_LINE_SIZE_IN_WORDS; + + for (i = CACHE_LINE_SIZE_IN_WORDS; + i < ahead32; i += CACHE_LINE_SIZE_IN_WORDS) + __insn_prefetch(&out32[i]); + } + + if (n32 > ahead32) { + while (1) { + int j; + + /* Prefetch by reading one word several cache lines + * ahead. Since loads are non-blocking this will + * cause the full cache line to be read while we are + * finishing earlier cache lines. Using a store + * here causes microarchitectural performance + * problems where a victimizing store miss goes to + * the head of the retry FIFO and locks the pipe for + * a few cycles. So a few subsequent stores in this + * loop go into the retry FIFO, and then later + * stores see other stores to the same cache line + * are already in the retry FIFO and themselves go + * into the retry FIFO, filling it up and grinding + * to a halt waiting for the original miss to be + * satisfied. + */ + __insn_prefetch(&out32[ahead32]); + +#if CACHE_LINE_SIZE_IN_WORDS % 4 != 0 +#error "Unhandled CACHE_LINE_SIZE_IN_WORDS" +#endif + + n32 -= CACHE_LINE_SIZE_IN_WORDS; + + /* Save icache space by only partially unrolling + * this loop. + */ + for (j = CACHE_LINE_SIZE_IN_WORDS / 4; j > 0; j--) { + *out32++ = v32; + *out32++ = v32; + *out32++ = v32; + *out32++ = v32; + } + + /* To save compiled code size, reuse this loop even + * when we run out of prefetching to do by dropping + * ahead32 down. + */ + if (n32 <= ahead32) { + /* Not even a full cache line left, + * so stop now. + */ + if (n32 < CACHE_LINE_SIZE_IN_WORDS) + break; + + /* Choose a small enough value that we don't + * prefetch past the end. There's no sense + * in touching cache lines we don't have to. + */ + ahead32 = CACHE_LINE_SIZE_IN_WORDS - 1; + } + } + } + +#else /* CHIP_HAS_WH64() */ + + /* Determine how many words we need to emit before the 'out32' + * pointer becomes aligned modulo the cache line size. + */ + to_align32 = + (-((uintptr_t)out32 >> 2)) & (CACHE_LINE_SIZE_IN_WORDS - 1); + + /* Only bother aligning and using wh64 if there is at least + * one full cache line to process. This check also prevents + * overrunning the end of the buffer with alignment words. + */ + if (to_align32 <= n32 - CACHE_LINE_SIZE_IN_WORDS) { + int lines_left; + + /* Align out32 mod the cache line size so we can use wh64. */ + n32 -= to_align32; + for (; to_align32 != 0; to_align32--) { + *out32 = v32; + out32++; + } + + /* Use unsigned divide to turn this into a right shift. */ + lines_left = (unsigned)n32 / CACHE_LINE_SIZE_IN_WORDS; + + do { + /* Only wh64 a few lines at a time, so we don't + * exceed the maximum number of victim lines. + */ + int x = ((lines_left < CHIP_MAX_OUTSTANDING_VICTIMS()) + ? lines_left + : CHIP_MAX_OUTSTANDING_VICTIMS()); + uint32_t *wh = out32; + int i = x; + int j; + + lines_left -= x; + + do { + __insn_wh64(wh); + wh += CACHE_LINE_SIZE_IN_WORDS; + } while (--i); + + for (j = x * (CACHE_LINE_SIZE_IN_WORDS / 4); + j != 0; j--) { + *out32++ = v32; + *out32++ = v32; + *out32++ = v32; + *out32++ = v32; + } + } while (lines_left != 0); + + /* We processed all full lines above, so only this many + * words remain to be processed. + */ + n32 &= CACHE_LINE_SIZE_IN_WORDS - 1; + } + +#endif /* CHIP_HAS_WH64() */ + + /* Now handle any leftover values. */ + if (n32 != 0) { + do { + *out32 = v32; + out32++; + } while (--n32 != 0); + } + + return s; +} +EXPORT_SYMBOL(memset); diff --git a/arch/tile/lib/memset_64.c b/arch/tile/lib/memset_64.c new file mode 100644 index 00000000..38730857 --- /dev/null +++ b/arch/tile/lib/memset_64.c @@ -0,0 +1,145 @@ +/* + * Copyright 2011 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#include <arch/chip.h> + +#include <linux/types.h> +#include <linux/string.h> +#include <linux/module.h> + +#undef memset + +void *memset(void *s, int c, size_t n) +{ + uint64_t *out64; + int n64, to_align64; + uint64_t v64; + uint8_t *out8 = s; + + /* Experimentation shows that a trivial tight loop is a win up until + * around a size of 20, where writing a word at a time starts to win. + */ +#define BYTE_CUTOFF 20 + +#if BYTE_CUTOFF < 7 + /* This must be at least at least this big, or some code later + * on doesn't work. + */ +#error "BYTE_CUTOFF is too small" +#endif + + if (n < BYTE_CUTOFF) { + /* Strangely, this turns out to be the tightest way to + * write this loop. + */ + if (n != 0) { + do { + /* Strangely, combining these into one line + * performs worse. + */ + *out8 = c; + out8++; + } while (--n != 0); + } + + return s; + } + + /* Align 'out8'. We know n >= 7 so this won't write past the end. */ + while (((uintptr_t) out8 & 7) != 0) { + *out8++ = c; + --n; + } + + /* Align 'n'. */ + while (n & 7) + out8[--n] = c; + + out64 = (uint64_t *) out8; + n64 = n >> 3; + + /* Tile input byte out to 64 bits. */ + /* KLUDGE */ + v64 = 0x0101010101010101ULL * (uint8_t)c; + + /* This must be at least 8 or the following loop doesn't work. */ +#define CACHE_LINE_SIZE_IN_DOUBLEWORDS (CHIP_L2_LINE_SIZE() / 8) + + /* Determine how many words we need to emit before the 'out32' + * pointer becomes aligned modulo the cache line size. + */ + to_align64 = (-((uintptr_t)out64 >> 3)) & + (CACHE_LINE_SIZE_IN_DOUBLEWORDS - 1); + + /* Only bother aligning and using wh64 if there is at least + * one full cache line to process. This check also prevents + * overrunning the end of the buffer with alignment words. + */ + if (to_align64 <= n64 - CACHE_LINE_SIZE_IN_DOUBLEWORDS) { + int lines_left; + + /* Align out64 mod the cache line size so we can use wh64. */ + n64 -= to_align64; + for (; to_align64 != 0; to_align64--) { + *out64 = v64; + out64++; + } + + /* Use unsigned divide to turn this into a right shift. */ + lines_left = (unsigned)n64 / CACHE_LINE_SIZE_IN_DOUBLEWORDS; + + do { + /* Only wh64 a few lines at a time, so we don't + * exceed the maximum number of victim lines. + */ + int x = ((lines_left < CHIP_MAX_OUTSTANDING_VICTIMS()) + ? lines_left + : CHIP_MAX_OUTSTANDING_VICTIMS()); + uint64_t *wh = out64; + int i = x; + int j; + + lines_left -= x; + + do { + __insn_wh64(wh); + wh += CACHE_LINE_SIZE_IN_DOUBLEWORDS; + } while (--i); + + for (j = x * (CACHE_LINE_SIZE_IN_DOUBLEWORDS / 4); + j != 0; j--) { + *out64++ = v64; + *out64++ = v64; + *out64++ = v64; + *out64++ = v64; + } + } while (lines_left != 0); + + /* We processed all full lines above, so only this many + * words remain to be processed. + */ + n64 &= CACHE_LINE_SIZE_IN_DOUBLEWORDS - 1; + } + + /* Now handle any leftover values. */ + if (n64 != 0) { + do { + *out64 = v64; + out64++; + } while (--n64 != 0); + } + + return s; +} +EXPORT_SYMBOL(memset); diff --git a/arch/tile/lib/spinlock_32.c b/arch/tile/lib/spinlock_32.c new file mode 100644 index 00000000..b16ac49a --- /dev/null +++ b/arch/tile/lib/spinlock_32.c @@ -0,0 +1,259 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#include <linux/spinlock.h> +#include <linux/module.h> +#include <asm/processor.h> +#include <arch/spr_def.h> + +#include "spinlock_common.h" + +void arch_spin_lock(arch_spinlock_t *lock) +{ + int my_ticket; + int iterations = 0; + int delta; + + while ((my_ticket = __insn_tns((void *)&lock->next_ticket)) & 1) + delay_backoff(iterations++); + + /* Increment the next ticket number, implicitly releasing tns lock. */ + lock->next_ticket = my_ticket + TICKET_QUANTUM; + + /* Wait until it's our turn. */ + while ((delta = my_ticket - lock->current_ticket) != 0) + relax((128 / CYCLES_PER_RELAX_LOOP) * delta); +} +EXPORT_SYMBOL(arch_spin_lock); + +int arch_spin_trylock(arch_spinlock_t *lock) +{ + /* + * Grab a ticket; no need to retry if it's busy, we'll just + * treat that the same as "locked", since someone else + * will lock it momentarily anyway. + */ + int my_ticket = __insn_tns((void *)&lock->next_ticket); + + if (my_ticket == lock->current_ticket) { + /* Not currently locked, so lock it by keeping this ticket. */ + lock->next_ticket = my_ticket + TICKET_QUANTUM; + /* Success! */ + return 1; + } + + if (!(my_ticket & 1)) { + /* Release next_ticket. */ + lock->next_ticket = my_ticket; + } + + return 0; +} +EXPORT_SYMBOL(arch_spin_trylock); + +void arch_spin_unlock_wait(arch_spinlock_t *lock) +{ + u32 iterations = 0; + while (arch_spin_is_locked(lock)) + delay_backoff(iterations++); +} +EXPORT_SYMBOL(arch_spin_unlock_wait); + +/* + * The low byte is always reserved to be the marker for a "tns" operation + * since the low bit is set to "1" by a tns. The next seven bits are + * zeroes. The next byte holds the "next" writer value, i.e. the ticket + * available for the next task that wants to write. The third byte holds + * the current writer value, i.e. the writer who holds the current ticket. + * If current == next == 0, there are no interested writers. + */ +#define WR_NEXT_SHIFT _WR_NEXT_SHIFT +#define WR_CURR_SHIFT _WR_CURR_SHIFT +#define WR_WIDTH _WR_WIDTH +#define WR_MASK ((1 << WR_WIDTH) - 1) + +/* + * The last eight bits hold the active reader count. This has to be + * zero before a writer can start to write. + */ +#define RD_COUNT_SHIFT _RD_COUNT_SHIFT +#define RD_COUNT_WIDTH _RD_COUNT_WIDTH +#define RD_COUNT_MASK ((1 << RD_COUNT_WIDTH) - 1) + + +/* + * We can get the read lock if everything but the reader bits (which + * are in the high part of the word) is zero, i.e. no active or + * waiting writers, no tns. + * + * We guard the tns/store-back with an interrupt critical section to + * preserve the semantic that the same read lock can be acquired in an + * interrupt context. + */ +inline int arch_read_trylock(arch_rwlock_t *rwlock) +{ + u32 val; + __insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 1); + val = __insn_tns((int *)&rwlock->lock); + if (likely((val << _RD_COUNT_WIDTH) == 0)) { + val += 1 << RD_COUNT_SHIFT; + rwlock->lock = val; + __insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 0); + BUG_ON(val == 0); /* we don't expect wraparound */ + return 1; + } + if ((val & 1) == 0) + rwlock->lock = val; + __insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 0); + return 0; +} +EXPORT_SYMBOL(arch_read_trylock); + +/* + * Spin doing arch_read_trylock() until we acquire the lock. + * ISSUE: This approach can permanently starve readers. A reader who sees + * a writer could instead take a ticket lock (just like a writer would), + * and atomically enter read mode (with 1 reader) when it gets the ticket. + * This way both readers and writers would always make forward progress + * in a finite time. + */ +void arch_read_lock(arch_rwlock_t *rwlock) +{ + u32 iterations = 0; + while (unlikely(!arch_read_trylock(rwlock))) + delay_backoff(iterations++); +} +EXPORT_SYMBOL(arch_read_lock); + +void arch_read_unlock(arch_rwlock_t *rwlock) +{ + u32 val, iterations = 0; + + mb(); /* guarantee anything modified under the lock is visible */ + for (;;) { + __insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 1); + val = __insn_tns((int *)&rwlock->lock); + if (likely((val & 1) == 0)) { + rwlock->lock = val - (1 << _RD_COUNT_SHIFT); + __insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 0); + break; + } + __insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 0); + delay_backoff(iterations++); + } +} +EXPORT_SYMBOL(arch_read_unlock); + +/* + * We don't need an interrupt critical section here (unlike for + * arch_read_lock) since we should never use a bare write lock where + * it could be interrupted by code that could try to re-acquire it. + */ +void arch_write_lock(arch_rwlock_t *rwlock) +{ + /* + * The trailing underscore on this variable (and curr_ below) + * reminds us that the high bits are garbage; we mask them out + * when we compare them. + */ + u32 my_ticket_; + u32 iterations = 0; + u32 val = __insn_tns((int *)&rwlock->lock); + + if (likely(val == 0)) { + rwlock->lock = 1 << _WR_NEXT_SHIFT; + return; + } + + /* + * Wait until there are no readers, then bump up the next + * field and capture the ticket value. + */ + for (;;) { + if (!(val & 1)) { + if ((val >> RD_COUNT_SHIFT) == 0) + break; + rwlock->lock = val; + } + delay_backoff(iterations++); + val = __insn_tns((int *)&rwlock->lock); + } + + /* Take out the next ticket and extract my ticket value. */ + rwlock->lock = __insn_addb(val, 1 << WR_NEXT_SHIFT); + my_ticket_ = val >> WR_NEXT_SHIFT; + + /* Wait until the "current" field matches our ticket. */ + for (;;) { + u32 curr_ = val >> WR_CURR_SHIFT; + u32 delta = ((my_ticket_ - curr_) & WR_MASK); + if (likely(delta == 0)) + break; + + /* Delay based on how many lock-holders are still out there. */ + relax((256 / CYCLES_PER_RELAX_LOOP) * delta); + + /* + * Get a non-tns value to check; we don't need to tns + * it ourselves. Since we're not tns'ing, we retry + * more rapidly to get a valid value. + */ + while ((val = rwlock->lock) & 1) + relax(4); + } +} +EXPORT_SYMBOL(arch_write_lock); + +int arch_write_trylock(arch_rwlock_t *rwlock) +{ + u32 val = __insn_tns((int *)&rwlock->lock); + + /* + * If a tns is in progress, or there's a waiting or active locker, + * or active readers, we can't take the lock, so give up. + */ + if (unlikely(val != 0)) { + if (!(val & 1)) + rwlock->lock = val; + return 0; + } + + /* Set the "next" field to mark it locked. */ + rwlock->lock = 1 << _WR_NEXT_SHIFT; + return 1; +} +EXPORT_SYMBOL(arch_write_trylock); + +void arch_write_unlock(arch_rwlock_t *rwlock) +{ + u32 val, eq, mask; + + mb(); /* guarantee anything modified under the lock is visible */ + val = __insn_tns((int *)&rwlock->lock); + if (likely(val == (1 << _WR_NEXT_SHIFT))) { + rwlock->lock = 0; + return; + } + while (unlikely(val & 1)) { + /* Limited backoff since we are the highest-priority task. */ + relax(4); + val = __insn_tns((int *)&rwlock->lock); + } + mask = 1 << WR_CURR_SHIFT; + val = __insn_addb(val, mask); + eq = __insn_seqb(val, val << (WR_CURR_SHIFT - WR_NEXT_SHIFT)); + val = __insn_mz(eq & mask, val); + rwlock->lock = val; +} +EXPORT_SYMBOL(arch_write_unlock); diff --git a/arch/tile/lib/spinlock_64.c b/arch/tile/lib/spinlock_64.c new file mode 100644 index 00000000..d6fb9581 --- /dev/null +++ b/arch/tile/lib/spinlock_64.c @@ -0,0 +1,104 @@ +/* + * Copyright 2011 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#include <linux/spinlock.h> +#include <linux/module.h> +#include <asm/processor.h> + +#include "spinlock_common.h" + +/* + * Read the spinlock value without allocating in our cache and without + * causing an invalidation to another cpu with a copy of the cacheline. + * This is important when we are spinning waiting for the lock. + */ +static inline u32 arch_spin_read_noalloc(void *lock) +{ + return atomic_cmpxchg((atomic_t *)lock, -1, -1); +} + +/* + * Wait until the high bits (current) match my ticket. + * If we notice the overflow bit set on entry, we clear it. + */ +void arch_spin_lock_slow(arch_spinlock_t *lock, u32 my_ticket) +{ + if (unlikely(my_ticket & __ARCH_SPIN_NEXT_OVERFLOW)) { + __insn_fetchand4(&lock->lock, ~__ARCH_SPIN_NEXT_OVERFLOW); + my_ticket &= ~__ARCH_SPIN_NEXT_OVERFLOW; + } + + for (;;) { + u32 val = arch_spin_read_noalloc(lock); + u32 delta = my_ticket - arch_spin_current(val); + if (delta == 0) + return; + relax((128 / CYCLES_PER_RELAX_LOOP) * delta); + } +} +EXPORT_SYMBOL(arch_spin_lock_slow); + +/* + * Check the lock to see if it is plausible, and try to get it with cmpxchg(). + */ +int arch_spin_trylock(arch_spinlock_t *lock) +{ + u32 val = arch_spin_read_noalloc(lock); + if (unlikely(arch_spin_current(val) != arch_spin_next(val))) + return 0; + return cmpxchg(&lock->lock, val, (val + 1) & ~__ARCH_SPIN_NEXT_OVERFLOW) + == val; +} +EXPORT_SYMBOL(arch_spin_trylock); + +void arch_spin_unlock_wait(arch_spinlock_t *lock) +{ + u32 iterations = 0; + while (arch_spin_is_locked(lock)) + delay_backoff(iterations++); +} +EXPORT_SYMBOL(arch_spin_unlock_wait); + +/* + * If the read lock fails due to a writer, we retry periodically + * until the value is positive and we write our incremented reader count. + */ +void __read_lock_failed(arch_rwlock_t *rw) +{ + u32 val; + int iterations = 0; + do { + delay_backoff(iterations++); + val = __insn_fetchaddgez4(&rw->lock, 1); + } while (unlikely(arch_write_val_locked(val))); +} +EXPORT_SYMBOL(__read_lock_failed); + +/* + * If we failed because there were readers, clear the "writer" bit + * so we don't block additional readers. Otherwise, there was another + * writer anyway, so our "fetchor" made no difference. Then wait, + * issuing periodic fetchor instructions, till we get the lock. + */ +void __write_lock_failed(arch_rwlock_t *rw, u32 val) +{ + int iterations = 0; + do { + if (!arch_write_val_locked(val)) + val = __insn_fetchand4(&rw->lock, ~__WRITE_LOCK_BIT); + delay_backoff(iterations++); + val = __insn_fetchor4(&rw->lock, __WRITE_LOCK_BIT); + } while (val != 0); +} +EXPORT_SYMBOL(__write_lock_failed); diff --git a/arch/tile/lib/spinlock_common.h b/arch/tile/lib/spinlock_common.h new file mode 100644 index 00000000..6ac37509 --- /dev/null +++ b/arch/tile/lib/spinlock_common.h @@ -0,0 +1,64 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * This file is included into spinlock_32.c or _64.c. + */ + +/* + * The mfspr in __spinlock_relax() is 5 or 6 cycles plus 2 for loop + * overhead. + */ +#ifdef __tilegx__ +#define CYCLES_PER_RELAX_LOOP 7 +#else +#define CYCLES_PER_RELAX_LOOP 8 +#endif + +/* + * Idle the core for CYCLES_PER_RELAX_LOOP * iterations cycles. + */ +static inline void +relax(int iterations) +{ + for (/*above*/; iterations > 0; iterations--) + __insn_mfspr(SPR_PASS); + barrier(); +} + +/* Perform bounded exponential backoff.*/ +static void delay_backoff(int iterations) +{ + u32 exponent, loops; + + /* + * 2^exponent is how many times we go around the loop, + * which takes 8 cycles. We want to start with a 16- to 31-cycle + * loop, so we need to go around minimum 2 = 2^1 times, so we + * bias the original value up by 1. + */ + exponent = iterations + 1; + + /* + * Don't allow exponent to exceed 7, so we have 128 loops, + * or 1,024 (to 2,047) cycles, as our maximum. + */ + if (exponent > 8) + exponent = 8; + + loops = 1 << exponent; + + /* Add a randomness factor so two cpus never get in lock step. */ + loops += __insn_crc32_32(stack_pointer, get_cycles_low()) & + (loops - 1); + + relax(loops); +} diff --git a/arch/tile/lib/strchr_32.c b/arch/tile/lib/strchr_32.c new file mode 100644 index 00000000..c94e6f7a --- /dev/null +++ b/arch/tile/lib/strchr_32.c @@ -0,0 +1,66 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#include <linux/types.h> +#include <linux/string.h> +#include <linux/module.h> + +#undef strchr + +char *strchr(const char *s, int c) +{ + int z, g; + + /* Get an aligned pointer. */ + const uintptr_t s_int = (uintptr_t) s; + const uint32_t *p = (const uint32_t *)(s_int & -4); + + /* Create four copies of the byte for which we are looking. */ + const uint32_t goal = 0x01010101 * (uint8_t) c; + + /* Read the first aligned word, but force bytes before the string to + * match neither zero nor goal (we make sure the high bit of each + * byte is 1, and the low 7 bits are all the opposite of the goal + * byte). + * + * Note that this shift count expression works because we know shift + * counts are taken mod 32. + */ + const uint32_t before_mask = (1 << (s_int << 3)) - 1; + uint32_t v = (*p | before_mask) ^ (goal & __insn_shrib(before_mask, 1)); + + uint32_t zero_matches, goal_matches; + while (1) { + /* Look for a terminating '\0'. */ + zero_matches = __insn_seqb(v, 0); + + /* Look for the goal byte. */ + goal_matches = __insn_seqb(v, goal); + + if (__builtin_expect(zero_matches | goal_matches, 0)) + break; + + v = *++p; + } + + z = __insn_ctz(zero_matches); + g = __insn_ctz(goal_matches); + + /* If we found c before '\0' we got a match. Note that if c == '\0' + * then g == z, and we correctly return the address of the '\0' + * rather than NULL. + */ + return (g <= z) ? ((char *)p) + (g >> 3) : NULL; +} +EXPORT_SYMBOL(strchr); diff --git a/arch/tile/lib/strchr_64.c b/arch/tile/lib/strchr_64.c new file mode 100644 index 00000000..617a9273 --- /dev/null +++ b/arch/tile/lib/strchr_64.c @@ -0,0 +1,67 @@ +/* + * Copyright 2011 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#include <linux/types.h> +#include <linux/string.h> +#include <linux/module.h> + +#undef strchr + +char *strchr(const char *s, int c) +{ + int z, g; + + /* Get an aligned pointer. */ + const uintptr_t s_int = (uintptr_t) s; + const uint64_t *p = (const uint64_t *)(s_int & -8); + + /* Create eight copies of the byte for which we are looking. */ + const uint64_t goal = 0x0101010101010101ULL * (uint8_t) c; + + /* Read the first aligned word, but force bytes before the string to + * match neither zero nor goal (we make sure the high bit of each + * byte is 1, and the low 7 bits are all the opposite of the goal + * byte). + * + * Note that this shift count expression works because we know shift + * counts are taken mod 64. + */ + const uint64_t before_mask = (1ULL << (s_int << 3)) - 1; + uint64_t v = (*p | before_mask) ^ + (goal & __insn_v1shrsi(before_mask, 1)); + + uint64_t zero_matches, goal_matches; + while (1) { + /* Look for a terminating '\0'. */ + zero_matches = __insn_v1cmpeqi(v, 0); + + /* Look for the goal byte. */ + goal_matches = __insn_v1cmpeq(v, goal); + + if (__builtin_expect((zero_matches | goal_matches) != 0, 0)) + break; + + v = *++p; + } + + z = __insn_ctz(zero_matches); + g = __insn_ctz(goal_matches); + + /* If we found c before '\0' we got a match. Note that if c == '\0' + * then g == z, and we correctly return the address of the '\0' + * rather than NULL. + */ + return (g <= z) ? ((char *)p) + (g >> 3) : NULL; +} +EXPORT_SYMBOL(strchr); diff --git a/arch/tile/lib/strlen_32.c b/arch/tile/lib/strlen_32.c new file mode 100644 index 00000000..4974292a --- /dev/null +++ b/arch/tile/lib/strlen_32.c @@ -0,0 +1,38 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#include <linux/types.h> +#include <linux/string.h> +#include <linux/module.h> + +#undef strlen + +size_t strlen(const char *s) +{ + /* Get an aligned pointer. */ + const uintptr_t s_int = (uintptr_t) s; + const uint32_t *p = (const uint32_t *)(s_int & -4); + + /* Read the first word, but force bytes before the string to be nonzero. + * This expression works because we know shift counts are taken mod 32. + */ + uint32_t v = *p | ((1 << (s_int << 3)) - 1); + + uint32_t bits; + while ((bits = __insn_seqb(v, 0)) == 0) + v = *++p; + + return ((const char *)p) + (__insn_ctz(bits) >> 3) - s; +} +EXPORT_SYMBOL(strlen); diff --git a/arch/tile/lib/strlen_64.c b/arch/tile/lib/strlen_64.c new file mode 100644 index 00000000..1c92d462 --- /dev/null +++ b/arch/tile/lib/strlen_64.c @@ -0,0 +1,38 @@ +/* + * Copyright 2011 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#include <linux/types.h> +#include <linux/string.h> +#include <linux/module.h> + +#undef strlen + +size_t strlen(const char *s) +{ + /* Get an aligned pointer. */ + const uintptr_t s_int = (uintptr_t) s; + const uint64_t *p = (const uint64_t *)(s_int & -8); + + /* Read the first word, but force bytes before the string to be nonzero. + * This expression works because we know shift counts are taken mod 64. + */ + uint64_t v = *p | ((1ULL << (s_int << 3)) - 1); + + uint64_t bits; + while ((bits = __insn_v1cmpeqi(v, 0)) == 0) + v = *++p; + + return ((const char *)p) + (__insn_ctz(bits) >> 3) - s; +} +EXPORT_SYMBOL(strlen); diff --git a/arch/tile/lib/uaccess.c b/arch/tile/lib/uaccess.c new file mode 100644 index 00000000..f8d398c9 --- /dev/null +++ b/arch/tile/lib/uaccess.c @@ -0,0 +1,32 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#include <linux/uaccess.h> +#include <linux/module.h> + +int __range_ok(unsigned long addr, unsigned long size) +{ + unsigned long limit = current_thread_info()->addr_limit.seg; + return !((addr < limit && size <= limit - addr) || + is_arch_mappable_range(addr, size)); +} +EXPORT_SYMBOL(__range_ok); + +#ifdef CONFIG_DEBUG_COPY_FROM_USER +void copy_from_user_overflow(void) +{ + WARN(1, "Buffer overflow detected!\n"); +} +EXPORT_SYMBOL(copy_from_user_overflow); +#endif diff --git a/arch/tile/lib/usercopy_32.S b/arch/tile/lib/usercopy_32.S new file mode 100644 index 00000000..979f76d8 --- /dev/null +++ b/arch/tile/lib/usercopy_32.S @@ -0,0 +1,223 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#include <linux/linkage.h> +#include <asm/errno.h> +#include <asm/cache.h> +#include <arch/chip.h> + +/* Access user memory, but use MMU to avoid propagating kernel exceptions. */ + + .pushsection .fixup,"ax" + +get_user_fault: + { move r0, zero; move r1, zero } + { movei r2, -EFAULT; jrp lr } + ENDPROC(get_user_fault) + +put_user_fault: + { movei r0, -EFAULT; jrp lr } + ENDPROC(put_user_fault) + + .popsection + +/* + * __get_user_N functions take a pointer in r0, and return 0 in r2 + * on success, with the value in r0; or else -EFAULT in r2. + */ +#define __get_user_N(bytes, LOAD) \ + STD_ENTRY(__get_user_##bytes); \ +1: { LOAD r0, r0; move r1, zero; move r2, zero }; \ + jrp lr; \ + STD_ENDPROC(__get_user_##bytes); \ + .pushsection __ex_table,"a"; \ + .word 1b, get_user_fault; \ + .popsection + +__get_user_N(1, lb_u) +__get_user_N(2, lh_u) +__get_user_N(4, lw) + +/* + * __get_user_8 takes a pointer in r0, and returns 0 in r2 + * on success, with the value in r0/r1; or else -EFAULT in r2. + */ + STD_ENTRY(__get_user_8); +1: { lw r0, r0; addi r1, r0, 4 }; +2: { lw r1, r1; move r2, zero }; + jrp lr; + STD_ENDPROC(__get_user_8); + .pushsection __ex_table,"a"; + .word 1b, get_user_fault; + .word 2b, get_user_fault; + .popsection + +/* + * __put_user_N functions take a value in r0 and a pointer in r1, + * and return 0 in r0 on success or -EFAULT on failure. + */ +#define __put_user_N(bytes, STORE) \ + STD_ENTRY(__put_user_##bytes); \ +1: { STORE r1, r0; move r0, zero }; \ + jrp lr; \ + STD_ENDPROC(__put_user_##bytes); \ + .pushsection __ex_table,"a"; \ + .word 1b, put_user_fault; \ + .popsection + +__put_user_N(1, sb) +__put_user_N(2, sh) +__put_user_N(4, sw) + +/* + * __put_user_8 takes a value in r0/r1 and a pointer in r2, + * and returns 0 in r0 on success or -EFAULT on failure. + */ +STD_ENTRY(__put_user_8) +1: { sw r2, r0; addi r2, r2, 4 } +2: { sw r2, r1; move r0, zero } + jrp lr + STD_ENDPROC(__put_user_8) + .pushsection __ex_table,"a" + .word 1b, put_user_fault + .word 2b, put_user_fault + .popsection + + +/* + * strnlen_user_asm takes the pointer in r0, and the length bound in r1. + * It returns the length, including the terminating NUL, or zero on exception. + * If length is greater than the bound, returns one plus the bound. + */ +STD_ENTRY(strnlen_user_asm) + { bz r1, 2f; addi r3, r0, -1 } /* bias down to include NUL */ +1: { lb_u r4, r0; addi r1, r1, -1 } + bz r4, 2f + { bnzt r1, 1b; addi r0, r0, 1 } +2: { sub r0, r0, r3; jrp lr } + STD_ENDPROC(strnlen_user_asm) + .pushsection .fixup,"ax" +strnlen_user_fault: + { move r0, zero; jrp lr } + ENDPROC(strnlen_user_fault) + .section __ex_table,"a" + .word 1b, strnlen_user_fault + .popsection + +/* + * strncpy_from_user_asm takes the kernel target pointer in r0, + * the userspace source pointer in r1, and the length bound (including + * the trailing NUL) in r2. On success, it returns the string length + * (not including the trailing NUL), or -EFAULT on failure. + */ +STD_ENTRY(strncpy_from_user_asm) + { bz r2, 2f; move r3, r0 } +1: { lb_u r4, r1; addi r1, r1, 1; addi r2, r2, -1 } + { sb r0, r4; addi r0, r0, 1 } + bz r2, 2f + bnzt r4, 1b + addi r0, r0, -1 /* don't count the trailing NUL */ +2: { sub r0, r0, r3; jrp lr } + STD_ENDPROC(strncpy_from_user_asm) + .pushsection .fixup,"ax" +strncpy_from_user_fault: + { movei r0, -EFAULT; jrp lr } + ENDPROC(strncpy_from_user_fault) + .section __ex_table,"a" + .word 1b, strncpy_from_user_fault + .popsection + +/* + * clear_user_asm takes the user target address in r0 and the + * number of bytes to zero in r1. + * It returns the number of uncopiable bytes (hopefully zero) in r0. + * Note that we don't use a separate .fixup section here since we fall + * through into the "fixup" code as the last straight-line bundle anyway. + */ +STD_ENTRY(clear_user_asm) + { bz r1, 2f; or r2, r0, r1 } + andi r2, r2, 3 + bzt r2, .Lclear_aligned_user_asm +1: { sb r0, zero; addi r0, r0, 1; addi r1, r1, -1 } + bnzt r1, 1b +2: { move r0, r1; jrp lr } + .pushsection __ex_table,"a" + .word 1b, 2b + .popsection + +.Lclear_aligned_user_asm: +1: { sw r0, zero; addi r0, r0, 4; addi r1, r1, -4 } + bnzt r1, 1b +2: { move r0, r1; jrp lr } + STD_ENDPROC(clear_user_asm) + .pushsection __ex_table,"a" + .word 1b, 2b + .popsection + +/* + * flush_user_asm takes the user target address in r0 and the + * number of bytes to flush in r1. + * It returns the number of unflushable bytes (hopefully zero) in r0. + */ +STD_ENTRY(flush_user_asm) + bz r1, 2f + { movei r2, L2_CACHE_BYTES; add r1, r0, r1 } + { sub r2, zero, r2; addi r1, r1, L2_CACHE_BYTES-1 } + { and r0, r0, r2; and r1, r1, r2 } + { sub r1, r1, r0 } +1: { flush r0; addi r1, r1, -CHIP_FLUSH_STRIDE() } + { addi r0, r0, CHIP_FLUSH_STRIDE(); bnzt r1, 1b } +2: { move r0, r1; jrp lr } + STD_ENDPROC(flush_user_asm) + .pushsection __ex_table,"a" + .word 1b, 2b + .popsection + +/* + * inv_user_asm takes the user target address in r0 and the + * number of bytes to invalidate in r1. + * It returns the number of not inv'able bytes (hopefully zero) in r0. + */ +STD_ENTRY(inv_user_asm) + bz r1, 2f + { movei r2, L2_CACHE_BYTES; add r1, r0, r1 } + { sub r2, zero, r2; addi r1, r1, L2_CACHE_BYTES-1 } + { and r0, r0, r2; and r1, r1, r2 } + { sub r1, r1, r0 } +1: { inv r0; addi r1, r1, -CHIP_INV_STRIDE() } + { addi r0, r0, CHIP_INV_STRIDE(); bnzt r1, 1b } +2: { move r0, r1; jrp lr } + STD_ENDPROC(inv_user_asm) + .pushsection __ex_table,"a" + .word 1b, 2b + .popsection + +/* + * finv_user_asm takes the user target address in r0 and the + * number of bytes to flush-invalidate in r1. + * It returns the number of not finv'able bytes (hopefully zero) in r0. + */ +STD_ENTRY(finv_user_asm) + bz r1, 2f + { movei r2, L2_CACHE_BYTES; add r1, r0, r1 } + { sub r2, zero, r2; addi r1, r1, L2_CACHE_BYTES-1 } + { and r0, r0, r2; and r1, r1, r2 } + { sub r1, r1, r0 } +1: { finv r0; addi r1, r1, -CHIP_FINV_STRIDE() } + { addi r0, r0, CHIP_FINV_STRIDE(); bnzt r1, 1b } +2: { move r0, r1; jrp lr } + STD_ENDPROC(finv_user_asm) + .pushsection __ex_table,"a" + .word 1b, 2b + .popsection diff --git a/arch/tile/lib/usercopy_64.S b/arch/tile/lib/usercopy_64.S new file mode 100644 index 00000000..2ff44f87 --- /dev/null +++ b/arch/tile/lib/usercopy_64.S @@ -0,0 +1,196 @@ +/* + * Copyright 2011 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#include <linux/linkage.h> +#include <asm/errno.h> +#include <asm/cache.h> +#include <arch/chip.h> + +/* Access user memory, but use MMU to avoid propagating kernel exceptions. */ + + .pushsection .fixup,"ax" + +get_user_fault: + { movei r1, -EFAULT; move r0, zero } + jrp lr + ENDPROC(get_user_fault) + +put_user_fault: + { movei r0, -EFAULT; jrp lr } + ENDPROC(put_user_fault) + + .popsection + +/* + * __get_user_N functions take a pointer in r0, and return 0 in r1 + * on success, with the value in r0; or else -EFAULT in r1. + */ +#define __get_user_N(bytes, LOAD) \ + STD_ENTRY(__get_user_##bytes); \ +1: { LOAD r0, r0; move r1, zero }; \ + jrp lr; \ + STD_ENDPROC(__get_user_##bytes); \ + .pushsection __ex_table,"a"; \ + .quad 1b, get_user_fault; \ + .popsection + +__get_user_N(1, ld1u) +__get_user_N(2, ld2u) +__get_user_N(4, ld4u) +__get_user_N(8, ld) + +/* + * __put_user_N functions take a value in r0 and a pointer in r1, + * and return 0 in r0 on success or -EFAULT on failure. + */ +#define __put_user_N(bytes, STORE) \ + STD_ENTRY(__put_user_##bytes); \ +1: { STORE r1, r0; move r0, zero }; \ + jrp lr; \ + STD_ENDPROC(__put_user_##bytes); \ + .pushsection __ex_table,"a"; \ + .quad 1b, put_user_fault; \ + .popsection + +__put_user_N(1, st1) +__put_user_N(2, st2) +__put_user_N(4, st4) +__put_user_N(8, st) + +/* + * strnlen_user_asm takes the pointer in r0, and the length bound in r1. + * It returns the length, including the terminating NUL, or zero on exception. + * If length is greater than the bound, returns one plus the bound. + */ +STD_ENTRY(strnlen_user_asm) + { beqz r1, 2f; addi r3, r0, -1 } /* bias down to include NUL */ +1: { ld1u r4, r0; addi r1, r1, -1 } + beqz r4, 2f + { bnezt r1, 1b; addi r0, r0, 1 } +2: { sub r0, r0, r3; jrp lr } + STD_ENDPROC(strnlen_user_asm) + .pushsection .fixup,"ax" +strnlen_user_fault: + { move r0, zero; jrp lr } + ENDPROC(strnlen_user_fault) + .section __ex_table,"a" + .quad 1b, strnlen_user_fault + .popsection + +/* + * strncpy_from_user_asm takes the kernel target pointer in r0, + * the userspace source pointer in r1, and the length bound (including + * the trailing NUL) in r2. On success, it returns the string length + * (not including the trailing NUL), or -EFAULT on failure. + */ +STD_ENTRY(strncpy_from_user_asm) + { beqz r2, 2f; move r3, r0 } +1: { ld1u r4, r1; addi r1, r1, 1; addi r2, r2, -1 } + { st1 r0, r4; addi r0, r0, 1 } + beqz r2, 2f + bnezt r4, 1b + addi r0, r0, -1 /* don't count the trailing NUL */ +2: { sub r0, r0, r3; jrp lr } + STD_ENDPROC(strncpy_from_user_asm) + .pushsection .fixup,"ax" +strncpy_from_user_fault: + { movei r0, -EFAULT; jrp lr } + ENDPROC(strncpy_from_user_fault) + .section __ex_table,"a" + .quad 1b, strncpy_from_user_fault + .popsection + +/* + * clear_user_asm takes the user target address in r0 and the + * number of bytes to zero in r1. + * It returns the number of uncopiable bytes (hopefully zero) in r0. + * Note that we don't use a separate .fixup section here since we fall + * through into the "fixup" code as the last straight-line bundle anyway. + */ +STD_ENTRY(clear_user_asm) + { beqz r1, 2f; or r2, r0, r1 } + andi r2, r2, 7 + beqzt r2, .Lclear_aligned_user_asm +1: { st1 r0, zero; addi r0, r0, 1; addi r1, r1, -1 } + bnezt r1, 1b +2: { move r0, r1; jrp lr } + .pushsection __ex_table,"a" + .quad 1b, 2b + .popsection + +.Lclear_aligned_user_asm: +1: { st r0, zero; addi r0, r0, 8; addi r1, r1, -8 } + bnezt r1, 1b +2: { move r0, r1; jrp lr } + STD_ENDPROC(clear_user_asm) + .pushsection __ex_table,"a" + .quad 1b, 2b + .popsection + +/* + * flush_user_asm takes the user target address in r0 and the + * number of bytes to flush in r1. + * It returns the number of unflushable bytes (hopefully zero) in r0. + */ +STD_ENTRY(flush_user_asm) + beqz r1, 2f + { movei r2, L2_CACHE_BYTES; add r1, r0, r1 } + { sub r2, zero, r2; addi r1, r1, L2_CACHE_BYTES-1 } + { and r0, r0, r2; and r1, r1, r2 } + { sub r1, r1, r0 } +1: { flush r0; addi r1, r1, -CHIP_FLUSH_STRIDE() } + { addi r0, r0, CHIP_FLUSH_STRIDE(); bnezt r1, 1b } +2: { move r0, r1; jrp lr } + STD_ENDPROC(flush_user_asm) + .pushsection __ex_table,"a" + .quad 1b, 2b + .popsection + +/* + * inv_user_asm takes the user target address in r0 and the + * number of bytes to invalidate in r1. + * It returns the number of not inv'able bytes (hopefully zero) in r0. + */ +STD_ENTRY(inv_user_asm) + beqz r1, 2f + { movei r2, L2_CACHE_BYTES; add r1, r0, r1 } + { sub r2, zero, r2; addi r1, r1, L2_CACHE_BYTES-1 } + { and r0, r0, r2; and r1, r1, r2 } + { sub r1, r1, r0 } +1: { inv r0; addi r1, r1, -CHIP_INV_STRIDE() } + { addi r0, r0, CHIP_INV_STRIDE(); bnezt r1, 1b } +2: { move r0, r1; jrp lr } + STD_ENDPROC(inv_user_asm) + .pushsection __ex_table,"a" + .quad 1b, 2b + .popsection + +/* + * finv_user_asm takes the user target address in r0 and the + * number of bytes to flush-invalidate in r1. + * It returns the number of not finv'able bytes (hopefully zero) in r0. + */ +STD_ENTRY(finv_user_asm) + beqz r1, 2f + { movei r2, L2_CACHE_BYTES; add r1, r0, r1 } + { sub r2, zero, r2; addi r1, r1, L2_CACHE_BYTES-1 } + { and r0, r0, r2; and r1, r1, r2 } + { sub r1, r1, r0 } +1: { finv r0; addi r1, r1, -CHIP_FINV_STRIDE() } + { addi r0, r0, CHIP_FINV_STRIDE(); bnezt r1, 1b } +2: { move r0, r1; jrp lr } + STD_ENDPROC(finv_user_asm) + .pushsection __ex_table,"a" + .quad 1b, 2b + .popsection diff --git a/arch/tile/mm/Makefile b/arch/tile/mm/Makefile new file mode 100644 index 00000000..e252aedd --- /dev/null +++ b/arch/tile/mm/Makefile @@ -0,0 +1,9 @@ +# +# Makefile for the linux tile-specific parts of the memory manager. +# + +obj-y := init.o pgtable.o fault.o extable.o elf.o \ + mmap.o homecache.o migrate_$(BITS).o + +obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o +obj-$(CONFIG_HIGHMEM) += highmem.o diff --git a/arch/tile/mm/elf.c b/arch/tile/mm/elf.c new file mode 100644 index 00000000..758b6038 --- /dev/null +++ b/arch/tile/mm/elf.c @@ -0,0 +1,159 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#include <linux/mm.h> +#include <linux/pagemap.h> +#include <linux/binfmts.h> +#include <linux/compat.h> +#include <linux/mman.h> +#include <linux/elf.h> +#include <asm/pgtable.h> +#include <asm/pgalloc.h> +#include <asm/sections.h> +#include <arch/sim_def.h> + +/* Notify a running simulator, if any, that an exec just occurred. */ +static void sim_notify_exec(const char *binary_name) +{ + unsigned char c; + do { + c = *binary_name++; + __insn_mtspr(SPR_SIM_CONTROL, + (SIM_CONTROL_OS_EXEC + | (c << _SIM_CONTROL_OPERATOR_BITS))); + + } while (c); +} + +static int notify_exec(void) +{ + int retval = 0; /* failure */ + struct vm_area_struct *vma = current->mm->mmap; + while (vma) { + if ((vma->vm_flags & VM_EXECUTABLE) && vma->vm_file) + break; + vma = vma->vm_next; + } + if (vma) { + char *buf = (char *) __get_free_page(GFP_KERNEL); + if (buf) { + char *path = d_path(&vma->vm_file->f_path, + buf, PAGE_SIZE); + if (!IS_ERR(path)) { + sim_notify_exec(path); + retval = 1; + } + free_page((unsigned long)buf); + } + } + return retval; +} + +/* Notify a running simulator, if any, that we loaded an interpreter. */ +static void sim_notify_interp(unsigned long load_addr) +{ + size_t i; + for (i = 0; i < sizeof(load_addr); i++) { + unsigned char c = load_addr >> (i * 8); + __insn_mtspr(SPR_SIM_CONTROL, + (SIM_CONTROL_OS_INTERP + | (c << _SIM_CONTROL_OPERATOR_BITS))); + } +} + + +/* Kernel address of page used to map read-only kernel data into userspace. */ +static void *vdso_page; + +/* One-entry array used for install_special_mapping. */ +static struct page *vdso_pages[1]; + +static int __init vdso_setup(void) +{ + vdso_page = (void *)get_zeroed_page(GFP_ATOMIC); + memcpy(vdso_page, __rt_sigreturn, __rt_sigreturn_end - __rt_sigreturn); + vdso_pages[0] = virt_to_page(vdso_page); + return 0; +} +device_initcall(vdso_setup); + +const char *arch_vma_name(struct vm_area_struct *vma) +{ + if (vma->vm_private_data == vdso_pages) + return "[vdso]"; +#ifndef __tilegx__ + if (vma->vm_start == MEM_USER_INTRPT) + return "[intrpt]"; +#endif + return NULL; +} + +int arch_setup_additional_pages(struct linux_binprm *bprm, + int executable_stack) +{ + struct mm_struct *mm = current->mm; + unsigned long vdso_base; + int retval = 0; + + /* + * Notify the simulator that an exec just occurred. + * If we can't find the filename of the mapping, just use + * whatever was passed as the linux_binprm filename. + */ + if (!notify_exec()) + sim_notify_exec(bprm->filename); + + down_write(&mm->mmap_sem); + + /* + * MAYWRITE to allow gdb to COW and set breakpoints + */ + vdso_base = VDSO_BASE; + retval = install_special_mapping(mm, vdso_base, PAGE_SIZE, + VM_READ|VM_EXEC| + VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, + vdso_pages); + +#ifndef __tilegx__ + /* + * Set up a user-interrupt mapping here; the user can't + * create one themselves since it is above TASK_SIZE. + * We make it unwritable by default, so the model for adding + * interrupt vectors always involves an mprotect. + */ + if (!retval) { + unsigned long addr = MEM_USER_INTRPT; + addr = mmap_region(NULL, addr, INTRPT_SIZE, + MAP_FIXED|MAP_ANONYMOUS|MAP_PRIVATE, + VM_READ|VM_EXEC| + VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, 0); + if (addr > (unsigned long) -PAGE_SIZE) + retval = (int) addr; + } +#endif + + up_write(&mm->mmap_sem); + + return retval; +} + + +void elf_plat_init(struct pt_regs *regs, unsigned long load_addr) +{ + /* Zero all registers. */ + memset(regs, 0, sizeof(*regs)); + + /* Report the interpreter's load address. */ + sim_notify_interp(load_addr); +} diff --git a/arch/tile/mm/extable.c b/arch/tile/mm/extable.c new file mode 100644 index 00000000..4fb0acb9 --- /dev/null +++ b/arch/tile/mm/extable.c @@ -0,0 +1,30 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#include <linux/module.h> +#include <linux/spinlock.h> +#include <linux/uaccess.h> + +int fixup_exception(struct pt_regs *regs) +{ + const struct exception_table_entry *fixup; + + fixup = search_exception_tables(regs->pc); + if (fixup) { + regs->pc = fixup->fixup; + return 1; + } + + return 0; +} diff --git a/arch/tile/mm/fault.c b/arch/tile/mm/fault.c new file mode 100644 index 00000000..22e58f51 --- /dev/null +++ b/arch/tile/mm/fault.c @@ -0,0 +1,883 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * From i386 code copyright (C) 1995 Linus Torvalds + */ + +#include <linux/signal.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/types.h> +#include <linux/ptrace.h> +#include <linux/mman.h> +#include <linux/mm.h> +#include <linux/smp.h> +#include <linux/interrupt.h> +#include <linux/init.h> +#include <linux/tty.h> +#include <linux/vt_kern.h> /* For unblank_screen() */ +#include <linux/highmem.h> +#include <linux/module.h> +#include <linux/kprobes.h> +#include <linux/hugetlb.h> +#include <linux/syscalls.h> +#include <linux/uaccess.h> + +#include <asm/pgalloc.h> +#include <asm/sections.h> +#include <asm/traps.h> +#include <asm/syscalls.h> + +#include <arch/interrupts.h> + +static noinline void force_sig_info_fault(const char *type, int si_signo, + int si_code, unsigned long address, + int fault_num, + struct task_struct *tsk, + struct pt_regs *regs) +{ + siginfo_t info; + + if (unlikely(tsk->pid < 2)) { + panic("Signal %d (code %d) at %#lx sent to %s!", + si_signo, si_code & 0xffff, address, + is_idle_task(tsk) ? "the idle task" : "init"); + } + + info.si_signo = si_signo; + info.si_errno = 0; + info.si_code = si_code; + info.si_addr = (void __user *)address; + info.si_trapno = fault_num; + trace_unhandled_signal(type, regs, address, si_signo); + force_sig_info(si_signo, &info, tsk); +} + +#ifndef __tilegx__ +/* + * Synthesize the fault a PL0 process would get by doing a word-load of + * an unaligned address or a high kernel address. + */ +SYSCALL_DEFINE2(cmpxchg_badaddr, unsigned long, address, + struct pt_regs *, regs) +{ + if (address >= PAGE_OFFSET) + force_sig_info_fault("atomic segfault", SIGSEGV, SEGV_MAPERR, + address, INT_DTLB_MISS, current, regs); + else + force_sig_info_fault("atomic alignment fault", SIGBUS, + BUS_ADRALN, address, + INT_UNALIGN_DATA, current, regs); + + /* + * Adjust pc to point at the actual instruction, which is unusual + * for syscalls normally, but is appropriate when we are claiming + * that a syscall swint1 caused a page fault or bus error. + */ + regs->pc -= 8; + + /* + * Mark this as a caller-save interrupt, like a normal page fault, + * so that when we go through the signal handler path we will + * properly restore r0, r1, and r2 for the signal handler arguments. + */ + regs->flags |= PT_FLAGS_CALLER_SAVES; + + return 0; +} +#endif + +static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address) +{ + unsigned index = pgd_index(address); + pgd_t *pgd_k; + pud_t *pud, *pud_k; + pmd_t *pmd, *pmd_k; + + pgd += index; + pgd_k = init_mm.pgd + index; + + if (!pgd_present(*pgd_k)) + return NULL; + + pud = pud_offset(pgd, address); + pud_k = pud_offset(pgd_k, address); + if (!pud_present(*pud_k)) + return NULL; + + pmd = pmd_offset(pud, address); + pmd_k = pmd_offset(pud_k, address); + if (!pmd_present(*pmd_k)) + return NULL; + if (!pmd_present(*pmd)) { + set_pmd(pmd, *pmd_k); + arch_flush_lazy_mmu_mode(); + } else + BUG_ON(pmd_ptfn(*pmd) != pmd_ptfn(*pmd_k)); + return pmd_k; +} + +/* + * Handle a fault on the vmalloc area. + */ +static inline int vmalloc_fault(pgd_t *pgd, unsigned long address) +{ + pmd_t *pmd_k; + pte_t *pte_k; + + /* Make sure we are in vmalloc area */ + if (!(address >= VMALLOC_START && address < VMALLOC_END)) + return -1; + + /* + * Synchronize this task's top level page-table + * with the 'reference' page table. + */ + pmd_k = vmalloc_sync_one(pgd, address); + if (!pmd_k) + return -1; + if (pmd_huge(*pmd_k)) + return 0; /* support TILE huge_vmap() API */ + pte_k = pte_offset_kernel(pmd_k, address); + if (!pte_present(*pte_k)) + return -1; + return 0; +} + +/* Wait until this PTE has completed migration. */ +static void wait_for_migration(pte_t *pte) +{ + if (pte_migrating(*pte)) { + /* + * Wait until the migrater fixes up this pte. + * We scale the loop count by the clock rate so we'll wait for + * a few seconds here. + */ + int retries = 0; + int bound = get_clock_rate(); + while (pte_migrating(*pte)) { + barrier(); + if (++retries > bound) + panic("Hit migrating PTE (%#llx) and" + " page PFN %#lx still migrating", + pte->val, pte_pfn(*pte)); + } + } +} + +/* + * It's not generally safe to use "current" to get the page table pointer, + * since we might be running an oprofile interrupt in the middle of a + * task switch. + */ +static pgd_t *get_current_pgd(void) +{ + HV_Context ctx = hv_inquire_context(); + unsigned long pgd_pfn = ctx.page_table >> PAGE_SHIFT; + struct page *pgd_page = pfn_to_page(pgd_pfn); + BUG_ON(PageHighMem(pgd_page)); /* oops, HIGHPTE? */ + return (pgd_t *) __va(ctx.page_table); +} + +/* + * We can receive a page fault from a migrating PTE at any time. + * Handle it by just waiting until the fault resolves. + * + * It's also possible to get a migrating kernel PTE that resolves + * itself during the downcall from hypervisor to Linux. We just check + * here to see if the PTE seems valid, and if so we retry it. + * + * NOTE! We MUST NOT take any locks for this case. We may be in an + * interrupt or a critical region, and must do as little as possible. + * Similarly, we can't use atomic ops here, since we may be handling a + * fault caused by an atomic op access. + * + * If we find a migrating PTE while we're in an NMI context, and we're + * at a PC that has a registered exception handler, we don't wait, + * since this thread may (e.g.) have been interrupted while migrating + * its own stack, which would then cause us to self-deadlock. + */ +static int handle_migrating_pte(pgd_t *pgd, int fault_num, + unsigned long address, unsigned long pc, + int is_kernel_mode, int write) +{ + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + pte_t pteval; + + if (pgd_addr_invalid(address)) + return 0; + + pgd += pgd_index(address); + pud = pud_offset(pgd, address); + if (!pud || !pud_present(*pud)) + return 0; + pmd = pmd_offset(pud, address); + if (!pmd || !pmd_present(*pmd)) + return 0; + pte = pmd_huge_page(*pmd) ? ((pte_t *)pmd) : + pte_offset_kernel(pmd, address); + pteval = *pte; + if (pte_migrating(pteval)) { + if (in_nmi() && search_exception_tables(pc)) + return 0; + wait_for_migration(pte); + return 1; + } + + if (!is_kernel_mode || !pte_present(pteval)) + return 0; + if (fault_num == INT_ITLB_MISS) { + if (pte_exec(pteval)) + return 1; + } else if (write) { + if (pte_write(pteval)) + return 1; + } else { + if (pte_read(pteval)) + return 1; + } + + return 0; +} + +/* + * This routine is responsible for faulting in user pages. + * It passes the work off to one of the appropriate routines. + * It returns true if the fault was successfully handled. + */ +static int handle_page_fault(struct pt_regs *regs, + int fault_num, + int is_page_fault, + unsigned long address, + int write) +{ + struct task_struct *tsk; + struct mm_struct *mm; + struct vm_area_struct *vma; + unsigned long stack_offset; + int fault; + int si_code; + int is_kernel_mode; + pgd_t *pgd; + + /* on TILE, protection faults are always writes */ + if (!is_page_fault) + write = 1; + + is_kernel_mode = (EX1_PL(regs->ex1) != USER_PL); + + tsk = validate_current(); + + /* + * Check to see if we might be overwriting the stack, and bail + * out if so. The page fault code is a relatively likely + * place to get trapped in an infinite regress, and once we + * overwrite the whole stack, it becomes very hard to recover. + */ + stack_offset = stack_pointer & (THREAD_SIZE-1); + if (stack_offset < THREAD_SIZE / 8) { + pr_alert("Potential stack overrun: sp %#lx\n", + stack_pointer); + show_regs(regs); + pr_alert("Killing current process %d/%s\n", + tsk->pid, tsk->comm); + do_group_exit(SIGKILL); + } + + /* + * Early on, we need to check for migrating PTE entries; + * see homecache.c. If we find a migrating PTE, we wait until + * the backing page claims to be done migrating, then we proceed. + * For kernel PTEs, we rewrite the PTE and return and retry. + * Otherwise, we treat the fault like a normal "no PTE" fault, + * rather than trying to patch up the existing PTE. + */ + pgd = get_current_pgd(); + if (handle_migrating_pte(pgd, fault_num, address, regs->pc, + is_kernel_mode, write)) + return 1; + + si_code = SEGV_MAPERR; + + /* + * We fault-in kernel-space virtual memory on-demand. The + * 'reference' page table is init_mm.pgd. + * + * NOTE! We MUST NOT take any locks for this case. We may + * be in an interrupt or a critical region, and should + * only copy the information from the master page table, + * nothing more. + * + * This verifies that the fault happens in kernel space + * and that the fault was not a protection fault. + */ + if (unlikely(address >= TASK_SIZE && + !is_arch_mappable_range(address, 0))) { + if (is_kernel_mode && is_page_fault && + vmalloc_fault(pgd, address) >= 0) + return 1; + /* + * Don't take the mm semaphore here. If we fixup a prefetch + * fault we could otherwise deadlock. + */ + mm = NULL; /* happy compiler */ + vma = NULL; + goto bad_area_nosemaphore; + } + + /* + * If we're trying to touch user-space addresses, we must + * be either at PL0, or else with interrupts enabled in the + * kernel, so either way we can re-enable interrupts here + * unless we are doing atomic access to user space with + * interrupts disabled. + */ + if (!(regs->flags & PT_FLAGS_DISABLE_IRQ)) + local_irq_enable(); + + mm = tsk->mm; + + /* + * If we're in an interrupt, have no user context or are running in an + * atomic region then we must not take the fault. + */ + if (in_atomic() || !mm) { + vma = NULL; /* happy compiler */ + goto bad_area_nosemaphore; + } + + /* + * When running in the kernel we expect faults to occur only to + * addresses in user space. All other faults represent errors in the + * kernel and should generate an OOPS. Unfortunately, in the case of an + * erroneous fault occurring in a code path which already holds mmap_sem + * we will deadlock attempting to validate the fault against the + * address space. Luckily the kernel only validly references user + * space from well defined areas of code, which are listed in the + * exceptions table. + * + * As the vast majority of faults will be valid we will only perform + * the source reference check when there is a possibility of a deadlock. + * Attempt to lock the address space, if we cannot we then validate the + * source. If this is invalid we can skip the address space check, + * thus avoiding the deadlock. + */ + if (!down_read_trylock(&mm->mmap_sem)) { + if (is_kernel_mode && + !search_exception_tables(regs->pc)) { + vma = NULL; /* happy compiler */ + goto bad_area_nosemaphore; + } + down_read(&mm->mmap_sem); + } + + vma = find_vma(mm, address); + if (!vma) + goto bad_area; + if (vma->vm_start <= address) + goto good_area; + if (!(vma->vm_flags & VM_GROWSDOWN)) + goto bad_area; + if (regs->sp < PAGE_OFFSET) { + /* + * accessing the stack below sp is always a bug. + */ + if (address < regs->sp) + goto bad_area; + } + if (expand_stack(vma, address)) + goto bad_area; + +/* + * Ok, we have a good vm_area for this memory access, so + * we can handle it.. + */ +good_area: + si_code = SEGV_ACCERR; + if (fault_num == INT_ITLB_MISS) { + if (!(vma->vm_flags & VM_EXEC)) + goto bad_area; + } else if (write) { +#ifdef TEST_VERIFY_AREA + if (!is_page_fault && regs->cs == KERNEL_CS) + pr_err("WP fault at "REGFMT"\n", regs->eip); +#endif + if (!(vma->vm_flags & VM_WRITE)) + goto bad_area; + } else { + if (!is_page_fault || !(vma->vm_flags & VM_READ)) + goto bad_area; + } + + survive: + /* + * If for any reason at all we couldn't handle the fault, + * make sure we exit gracefully rather than endlessly redo + * the fault. + */ + fault = handle_mm_fault(mm, vma, address, write); + if (unlikely(fault & VM_FAULT_ERROR)) { + if (fault & VM_FAULT_OOM) + goto out_of_memory; + else if (fault & VM_FAULT_SIGBUS) + goto do_sigbus; + BUG(); + } + if (fault & VM_FAULT_MAJOR) + tsk->maj_flt++; + else + tsk->min_flt++; + +#if CHIP_HAS_TILE_DMA() || CHIP_HAS_SN_PROC() + /* + * If this was an asynchronous fault, + * restart the appropriate engine. + */ + switch (fault_num) { +#if CHIP_HAS_TILE_DMA() + case INT_DMATLB_MISS: + case INT_DMATLB_MISS_DWNCL: + case INT_DMATLB_ACCESS: + case INT_DMATLB_ACCESS_DWNCL: + __insn_mtspr(SPR_DMA_CTR, SPR_DMA_CTR__REQUEST_MASK); + break; +#endif +#if CHIP_HAS_SN_PROC() + case INT_SNITLB_MISS: + case INT_SNITLB_MISS_DWNCL: + __insn_mtspr(SPR_SNCTL, + __insn_mfspr(SPR_SNCTL) & + ~SPR_SNCTL__FRZPROC_MASK); + break; +#endif + } +#endif + + up_read(&mm->mmap_sem); + return 1; + +/* + * Something tried to access memory that isn't in our memory map.. + * Fix it, but check if it's kernel or user first.. + */ +bad_area: + up_read(&mm->mmap_sem); + +bad_area_nosemaphore: + /* User mode accesses just cause a SIGSEGV */ + if (!is_kernel_mode) { + /* + * It's possible to have interrupts off here. + */ + local_irq_enable(); + + force_sig_info_fault("segfault", SIGSEGV, si_code, address, + fault_num, tsk, regs); + return 0; + } + +no_context: + /* Are we prepared to handle this kernel fault? */ + if (fixup_exception(regs)) + return 0; + +/* + * Oops. The kernel tried to access some bad page. We'll have to + * terminate things with extreme prejudice. + */ + + bust_spinlocks(1); + + /* FIXME: no lookup_address() yet */ +#ifdef SUPPORT_LOOKUP_ADDRESS + if (fault_num == INT_ITLB_MISS) { + pte_t *pte = lookup_address(address); + + if (pte && pte_present(*pte) && !pte_exec_kernel(*pte)) + pr_crit("kernel tried to execute" + " non-executable page - exploit attempt?" + " (uid: %d)\n", current->uid); + } +#endif + if (address < PAGE_SIZE) + pr_alert("Unable to handle kernel NULL pointer dereference\n"); + else + pr_alert("Unable to handle kernel paging request\n"); + pr_alert(" at virtual address "REGFMT", pc "REGFMT"\n", + address, regs->pc); + + show_regs(regs); + + if (unlikely(tsk->pid < 2)) { + panic("Kernel page fault running %s!", + is_idle_task(tsk) ? "the idle task" : "init"); + } + + /* + * More FIXME: we should probably copy the i386 here and + * implement a generic die() routine. Not today. + */ +#ifdef SUPPORT_DIE + die("Oops", regs); +#endif + bust_spinlocks(1); + + do_group_exit(SIGKILL); + +/* + * We ran out of memory, or some other thing happened to us that made + * us unable to handle the page fault gracefully. + */ +out_of_memory: + up_read(&mm->mmap_sem); + if (is_global_init(tsk)) { + yield(); + down_read(&mm->mmap_sem); + goto survive; + } + pr_alert("VM: killing process %s\n", tsk->comm); + if (!is_kernel_mode) + do_group_exit(SIGKILL); + goto no_context; + +do_sigbus: + up_read(&mm->mmap_sem); + + /* Kernel mode? Handle exceptions or die */ + if (is_kernel_mode) + goto no_context; + + force_sig_info_fault("bus error", SIGBUS, BUS_ADRERR, address, + fault_num, tsk, regs); + return 0; +} + +#ifndef __tilegx__ + +/* We must release ICS before panicking or we won't get anywhere. */ +#define ics_panic(fmt, ...) do { \ + __insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 0); \ + panic(fmt, __VA_ARGS__); \ +} while (0) + +/* + * When we take an ITLB or DTLB fault or access violation in the + * supervisor while the critical section bit is set, the hypervisor is + * reluctant to write new values into the EX_CONTEXT_K_x registers, + * since that might indicate we have not yet squirreled the SPR + * contents away and can thus safely take a recursive interrupt. + * Accordingly, the hypervisor passes us the PC via SYSTEM_SAVE_K_2. + * + * Note that this routine is called before homecache_tlb_defer_enter(), + * which means that we can properly unlock any atomics that might + * be used there (good), but also means we must be very sensitive + * to not touch any data structures that might be located in memory + * that could migrate, as we could be entering the kernel on a dataplane + * cpu that has been deferring kernel TLB updates. This means, for + * example, that we can't migrate init_mm or its pgd. + */ +struct intvec_state do_page_fault_ics(struct pt_regs *regs, int fault_num, + unsigned long address, + unsigned long info) +{ + unsigned long pc = info & ~1; + int write = info & 1; + pgd_t *pgd = get_current_pgd(); + + /* Retval is 1 at first since we will handle the fault fully. */ + struct intvec_state state = { + do_page_fault, fault_num, address, write, 1 + }; + + /* Validate that we are plausibly in the right routine. */ + if ((pc & 0x7) != 0 || pc < PAGE_OFFSET || + (fault_num != INT_DTLB_MISS && + fault_num != INT_DTLB_ACCESS)) { + unsigned long old_pc = regs->pc; + regs->pc = pc; + ics_panic("Bad ICS page fault args:" + " old PC %#lx, fault %d/%d at %#lx\n", + old_pc, fault_num, write, address); + } + + /* We might be faulting on a vmalloc page, so check that first. */ + if (fault_num != INT_DTLB_ACCESS && vmalloc_fault(pgd, address) >= 0) + return state; + + /* + * If we faulted with ICS set in sys_cmpxchg, we are providing + * a user syscall service that should generate a signal on + * fault. We didn't set up a kernel stack on initial entry to + * sys_cmpxchg, but instead had one set up by the fault, which + * (because sys_cmpxchg never releases ICS) came to us via the + * SYSTEM_SAVE_K_2 mechanism, and thus EX_CONTEXT_K_[01] are + * still referencing the original user code. We release the + * atomic lock and rewrite pt_regs so that it appears that we + * came from user-space directly, and after we finish the + * fault we'll go back to user space and re-issue the swint. + * This way the backtrace information is correct if we need to + * emit a stack dump at any point while handling this. + * + * Must match register use in sys_cmpxchg(). + */ + if (pc >= (unsigned long) sys_cmpxchg && + pc < (unsigned long) __sys_cmpxchg_end) { +#ifdef CONFIG_SMP + /* Don't unlock before we could have locked. */ + if (pc >= (unsigned long)__sys_cmpxchg_grab_lock) { + int *lock_ptr = (int *)(regs->regs[ATOMIC_LOCK_REG]); + __atomic_fault_unlock(lock_ptr); + } +#endif + regs->sp = regs->regs[27]; + } + + /* + * We can also fault in the atomic assembly, in which + * case we use the exception table to do the first-level fixup. + * We may re-fixup again in the real fault handler if it + * turns out the faulting address is just bad, and not, + * for example, migrating. + */ + else if (pc >= (unsigned long) __start_atomic_asm_code && + pc < (unsigned long) __end_atomic_asm_code) { + const struct exception_table_entry *fixup; +#ifdef CONFIG_SMP + /* Unlock the atomic lock. */ + int *lock_ptr = (int *)(regs->regs[ATOMIC_LOCK_REG]); + __atomic_fault_unlock(lock_ptr); +#endif + fixup = search_exception_tables(pc); + if (!fixup) + ics_panic("ICS atomic fault not in table:" + " PC %#lx, fault %d", pc, fault_num); + regs->pc = fixup->fixup; + regs->ex1 = PL_ICS_EX1(KERNEL_PL, 0); + } + + /* + * Now that we have released the atomic lock (if necessary), + * it's safe to spin if the PTE that caused the fault was migrating. + */ + if (fault_num == INT_DTLB_ACCESS) + write = 1; + if (handle_migrating_pte(pgd, fault_num, address, pc, 1, write)) + return state; + + /* Return zero so that we continue on with normal fault handling. */ + state.retval = 0; + return state; +} + +#endif /* !__tilegx__ */ + +/* + * This routine handles page faults. It determines the address, and the + * problem, and then passes it handle_page_fault() for normal DTLB and + * ITLB issues, and for DMA or SN processor faults when we are in user + * space. For the latter, if we're in kernel mode, we just save the + * interrupt away appropriately and return immediately. We can't do + * page faults for user code while in kernel mode. + */ +void do_page_fault(struct pt_regs *regs, int fault_num, + unsigned long address, unsigned long write) +{ + int is_page_fault; + + /* This case should have been handled by do_page_fault_ics(). */ + BUG_ON(write & ~1); + +#if CHIP_HAS_TILE_DMA() + /* + * If it's a DMA fault, suspend the transfer while we're + * handling the miss; we'll restart after it's handled. If we + * don't suspend, it's possible that this process could swap + * out and back in, and restart the engine since the DMA is + * still 'running'. + */ + if (fault_num == INT_DMATLB_MISS || + fault_num == INT_DMATLB_ACCESS || + fault_num == INT_DMATLB_MISS_DWNCL || + fault_num == INT_DMATLB_ACCESS_DWNCL) { + __insn_mtspr(SPR_DMA_CTR, SPR_DMA_CTR__SUSPEND_MASK); + while (__insn_mfspr(SPR_DMA_USER_STATUS) & + SPR_DMA_STATUS__BUSY_MASK) + ; + } +#endif + + /* Validate fault num and decide if this is a first-time page fault. */ + switch (fault_num) { + case INT_ITLB_MISS: + case INT_DTLB_MISS: +#if CHIP_HAS_TILE_DMA() + case INT_DMATLB_MISS: + case INT_DMATLB_MISS_DWNCL: +#endif +#if CHIP_HAS_SN_PROC() + case INT_SNITLB_MISS: + case INT_SNITLB_MISS_DWNCL: +#endif + is_page_fault = 1; + break; + + case INT_DTLB_ACCESS: +#if CHIP_HAS_TILE_DMA() + case INT_DMATLB_ACCESS: + case INT_DMATLB_ACCESS_DWNCL: +#endif + is_page_fault = 0; + break; + + default: + panic("Bad fault number %d in do_page_fault", fault_num); + } + +#if CHIP_HAS_TILE_DMA() || CHIP_HAS_SN_PROC() + if (EX1_PL(regs->ex1) != USER_PL) { + struct async_tlb *async; + switch (fault_num) { +#if CHIP_HAS_TILE_DMA() + case INT_DMATLB_MISS: + case INT_DMATLB_ACCESS: + case INT_DMATLB_MISS_DWNCL: + case INT_DMATLB_ACCESS_DWNCL: + async = ¤t->thread.dma_async_tlb; + break; +#endif +#if CHIP_HAS_SN_PROC() + case INT_SNITLB_MISS: + case INT_SNITLB_MISS_DWNCL: + async = ¤t->thread.sn_async_tlb; + break; +#endif + default: + async = NULL; + } + if (async) { + + /* + * No vmalloc check required, so we can allow + * interrupts immediately at this point. + */ + local_irq_enable(); + + set_thread_flag(TIF_ASYNC_TLB); + if (async->fault_num != 0) { + panic("Second async fault %d;" + " old fault was %d (%#lx/%ld)", + fault_num, async->fault_num, + address, write); + } + BUG_ON(fault_num == 0); + async->fault_num = fault_num; + async->is_fault = is_page_fault; + async->is_write = write; + async->address = address; + return; + } + } +#endif + + handle_page_fault(regs, fault_num, is_page_fault, address, write); +} + + +#if CHIP_HAS_TILE_DMA() || CHIP_HAS_SN_PROC() +/* + * Check an async_tlb structure to see if a deferred fault is waiting, + * and if so pass it to the page-fault code. + */ +static void handle_async_page_fault(struct pt_regs *regs, + struct async_tlb *async) +{ + if (async->fault_num) { + /* + * Clear async->fault_num before calling the page-fault + * handler so that if we re-interrupt before returning + * from the function we have somewhere to put the + * information from the new interrupt. + */ + int fault_num = async->fault_num; + async->fault_num = 0; + handle_page_fault(regs, fault_num, async->is_fault, + async->address, async->is_write); + } +} + +/* + * This routine effectively re-issues asynchronous page faults + * when we are returning to user space. + */ +void do_async_page_fault(struct pt_regs *regs) +{ + /* + * Clear thread flag early. If we re-interrupt while processing + * code here, we will reset it and recall this routine before + * returning to user space. + */ + clear_thread_flag(TIF_ASYNC_TLB); + +#if CHIP_HAS_TILE_DMA() + handle_async_page_fault(regs, ¤t->thread.dma_async_tlb); +#endif +#if CHIP_HAS_SN_PROC() + handle_async_page_fault(regs, ¤t->thread.sn_async_tlb); +#endif +} +#endif /* CHIP_HAS_TILE_DMA() || CHIP_HAS_SN_PROC() */ + + +void vmalloc_sync_all(void) +{ +#ifdef __tilegx__ + /* Currently all L1 kernel pmd's are static and shared. */ + BUG_ON(pgd_index(VMALLOC_END) != pgd_index(VMALLOC_START)); +#else + /* + * Note that races in the updates of insync and start aren't + * problematic: insync can only get set bits added, and updates to + * start are only improving performance (without affecting correctness + * if undone). + */ + static DECLARE_BITMAP(insync, PTRS_PER_PGD); + static unsigned long start = PAGE_OFFSET; + unsigned long address; + + BUILD_BUG_ON(PAGE_OFFSET & ~PGDIR_MASK); + for (address = start; address >= PAGE_OFFSET; address += PGDIR_SIZE) { + if (!test_bit(pgd_index(address), insync)) { + unsigned long flags; + struct list_head *pos; + + spin_lock_irqsave(&pgd_lock, flags); + list_for_each(pos, &pgd_list) + if (!vmalloc_sync_one(list_to_pgd(pos), + address)) { + /* Must be at first entry in list. */ + BUG_ON(pos != pgd_list.next); + break; + } + spin_unlock_irqrestore(&pgd_lock, flags); + if (pos != pgd_list.next) + set_bit(pgd_index(address), insync); + } + if (address == start && test_bit(pgd_index(address), insync)) + start = address + PGDIR_SIZE; + } +#endif +} diff --git a/arch/tile/mm/highmem.c b/arch/tile/mm/highmem.c new file mode 100644 index 00000000..ef8e5a62 --- /dev/null +++ b/arch/tile/mm/highmem.c @@ -0,0 +1,290 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#include <linux/highmem.h> +#include <linux/module.h> +#include <linux/pagemap.h> +#include <asm/homecache.h> + +#define kmap_get_pte(vaddr) \ + pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(vaddr), (vaddr)),\ + (vaddr)), (vaddr)) + + +void *kmap(struct page *page) +{ + void *kva; + unsigned long flags; + pte_t *ptep; + + might_sleep(); + if (!PageHighMem(page)) + return page_address(page); + kva = kmap_high(page); + + /* + * Rewrite the PTE under the lock. This ensures that the page + * is not currently migrating. + */ + ptep = kmap_get_pte((unsigned long)kva); + flags = homecache_kpte_lock(); + set_pte_at(&init_mm, kva, ptep, mk_pte(page, page_to_kpgprot(page))); + homecache_kpte_unlock(flags); + + return kva; +} +EXPORT_SYMBOL(kmap); + +void kunmap(struct page *page) +{ + if (in_interrupt()) + BUG(); + if (!PageHighMem(page)) + return; + kunmap_high(page); +} +EXPORT_SYMBOL(kunmap); + +/* + * Describe a single atomic mapping of a page on a given cpu at a + * given address, and allow it to be linked into a list. + */ +struct atomic_mapped_page { + struct list_head list; + struct page *page; + int cpu; + unsigned long va; +}; + +static spinlock_t amp_lock = __SPIN_LOCK_UNLOCKED(&_lock); +static struct list_head amp_list = LIST_HEAD_INIT(amp_list); + +/* + * Combining this structure with a per-cpu declaration lets us give + * each cpu an atomic_mapped_page structure per type. + */ +struct kmap_amps { + struct atomic_mapped_page per_type[KM_TYPE_NR]; +}; +static DEFINE_PER_CPU(struct kmap_amps, amps); + +/* + * Add a page and va, on this cpu, to the list of kmap_atomic pages, + * and write the new pte to memory. Writing the new PTE under the + * lock guarantees that it is either on the list before migration starts + * (if we won the race), or set_pte() sets the migrating bit in the PTE + * (if we lost the race). And doing it under the lock guarantees + * that when kmap_atomic_fix_one_pte() comes along, it finds a valid + * PTE in memory, iff the mapping is still on the amp_list. + * + * Finally, doing it under the lock lets us safely examine the page + * to see if it is immutable or not, for the generic kmap_atomic() case. + * If we examine it earlier we are exposed to a race where it looks + * writable earlier, but becomes immutable before we write the PTE. + */ +static void kmap_atomic_register(struct page *page, enum km_type type, + unsigned long va, pte_t *ptep, pte_t pteval) +{ + unsigned long flags; + struct atomic_mapped_page *amp; + + flags = homecache_kpte_lock(); + spin_lock(&_lock); + + /* With interrupts disabled, now fill in the per-cpu info. */ + amp = &__get_cpu_var(amps).per_type[type]; + amp->page = page; + amp->cpu = smp_processor_id(); + amp->va = va; + + /* For generic kmap_atomic(), choose the PTE writability now. */ + if (!pte_read(pteval)) + pteval = mk_pte(page, page_to_kpgprot(page)); + + list_add(&->list, &_list); + set_pte(ptep, pteval); + arch_flush_lazy_mmu_mode(); + + spin_unlock(&_lock); + homecache_kpte_unlock(flags); +} + +/* + * Remove a page and va, on this cpu, from the list of kmap_atomic pages. + * Linear-time search, but we count on the lists being short. + * We don't need to adjust the PTE under the lock (as opposed to the + * kmap_atomic_register() case), since we're just unconditionally + * zeroing the PTE after it's off the list. + */ +static void kmap_atomic_unregister(struct page *page, unsigned long va) +{ + unsigned long flags; + struct atomic_mapped_page *amp; + int cpu = smp_processor_id(); + spin_lock_irqsave(&_lock, flags); + list_for_each_entry(amp, &_list, list) { + if (amp->page == page && amp->cpu == cpu && amp->va == va) + break; + } + BUG_ON(&->list == &_list); + list_del(&->list); + spin_unlock_irqrestore(&_lock, flags); +} + +/* Helper routine for kmap_atomic_fix_kpte(), below. */ +static void kmap_atomic_fix_one_kpte(struct atomic_mapped_page *amp, + int finished) +{ + pte_t *ptep = kmap_get_pte(amp->va); + if (!finished) { + set_pte(ptep, pte_mkmigrate(*ptep)); + flush_remote(0, 0, NULL, amp->va, PAGE_SIZE, PAGE_SIZE, + cpumask_of(amp->cpu), NULL, 0); + } else { + /* + * Rewrite a default kernel PTE for this page. + * We rely on the fact that set_pte() writes the + * present+migrating bits last. + */ + pte_t pte = mk_pte(amp->page, page_to_kpgprot(amp->page)); + set_pte(ptep, pte); + } +} + +/* + * This routine is a helper function for homecache_fix_kpte(); see + * its comments for more information on the "finished" argument here. + * + * Note that we hold the lock while doing the remote flushes, which + * will stall any unrelated cpus trying to do kmap_atomic operations. + * We could just update the PTEs under the lock, and save away copies + * of the structs (or just the va+cpu), then flush them after we + * release the lock, but it seems easier just to do it all under the lock. + */ +void kmap_atomic_fix_kpte(struct page *page, int finished) +{ + struct atomic_mapped_page *amp; + unsigned long flags; + spin_lock_irqsave(&_lock, flags); + list_for_each_entry(amp, &_list, list) { + if (amp->page == page) + kmap_atomic_fix_one_kpte(amp, finished); + } + spin_unlock_irqrestore(&_lock, flags); +} + +/* + * kmap_atomic/kunmap_atomic is significantly faster than kmap/kunmap + * because the kmap code must perform a global TLB invalidation when + * the kmap pool wraps. + * + * Note that they may be slower than on x86 (etc.) because unlike on + * those platforms, we do have to take a global lock to map and unmap + * pages on Tile (see above). + * + * When holding an atomic kmap is is not legal to sleep, so atomic + * kmaps are appropriate for short, tight code paths only. + */ +void *kmap_atomic_prot(struct page *page, pgprot_t prot) +{ + unsigned long vaddr; + int idx, type; + pte_t *pte; + + /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */ + pagefault_disable(); + + /* Avoid icache flushes by disallowing atomic executable mappings. */ + BUG_ON(pte_exec(prot)); + + if (!PageHighMem(page)) + return page_address(page); + + type = kmap_atomic_idx_push(); + idx = type + KM_TYPE_NR*smp_processor_id(); + vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); + pte = kmap_get_pte(vaddr); + BUG_ON(!pte_none(*pte)); + + /* Register that this page is mapped atomically on this cpu. */ + kmap_atomic_register(page, type, vaddr, pte, mk_pte(page, prot)); + + return (void *)vaddr; +} +EXPORT_SYMBOL(kmap_atomic_prot); + +void *kmap_atomic(struct page *page) +{ + /* PAGE_NONE is a magic value that tells us to check immutability. */ + return kmap_atomic_prot(page, PAGE_NONE); +} +EXPORT_SYMBOL(kmap_atomic); + +void __kunmap_atomic(void *kvaddr) +{ + unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK; + + if (vaddr >= __fix_to_virt(FIX_KMAP_END) && + vaddr <= __fix_to_virt(FIX_KMAP_BEGIN)) { + pte_t *pte = kmap_get_pte(vaddr); + pte_t pteval = *pte; + int idx, type; + + type = kmap_atomic_idx(); + idx = type + KM_TYPE_NR*smp_processor_id(); + + /* + * Force other mappings to Oops if they try to access this pte + * without first remapping it. Keeping stale mappings around + * is a bad idea. + */ + BUG_ON(!pte_present(pteval) && !pte_migrating(pteval)); + kmap_atomic_unregister(pte_page(pteval), vaddr); + kpte_clear_flush(pte, vaddr); + kmap_atomic_idx_pop(); + } else { + /* Must be a lowmem page */ + BUG_ON(vaddr < PAGE_OFFSET); + BUG_ON(vaddr >= (unsigned long)high_memory); + } + + arch_flush_lazy_mmu_mode(); + pagefault_enable(); +} +EXPORT_SYMBOL(__kunmap_atomic); + +/* + * This API is supposed to allow us to map memory without a "struct page". + * Currently we don't support this, though this may change in the future. + */ +void *kmap_atomic_pfn(unsigned long pfn) +{ + return kmap_atomic(pfn_to_page(pfn)); +} +void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot) +{ + return kmap_atomic_prot(pfn_to_page(pfn), prot); +} + +struct page *kmap_atomic_to_page(void *ptr) +{ + pte_t *pte; + unsigned long vaddr = (unsigned long)ptr; + + if (vaddr < FIXADDR_START) + return virt_to_page(ptr); + + pte = kmap_get_pte(vaddr); + return pte_page(*pte); +} diff --git a/arch/tile/mm/homecache.c b/arch/tile/mm/homecache.c new file mode 100644 index 00000000..499f7377 --- /dev/null +++ b/arch/tile/mm/homecache.c @@ -0,0 +1,461 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * This code maintains the "home" for each page in the system. + */ + +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/spinlock.h> +#include <linux/list.h> +#include <linux/bootmem.h> +#include <linux/rmap.h> +#include <linux/pagemap.h> +#include <linux/mutex.h> +#include <linux/interrupt.h> +#include <linux/sysctl.h> +#include <linux/pagevec.h> +#include <linux/ptrace.h> +#include <linux/timex.h> +#include <linux/cache.h> +#include <linux/smp.h> +#include <linux/module.h> + +#include <asm/page.h> +#include <asm/sections.h> +#include <asm/tlbflush.h> +#include <asm/pgalloc.h> +#include <asm/homecache.h> + +#include <arch/sim.h> + +#include "migrate.h" + + +#if CHIP_HAS_COHERENT_LOCAL_CACHE() + +/* + * The noallocl2 option suppresses all use of the L2 cache to cache + * locally from a remote home. There's no point in using it if we + * don't have coherent local caching, though. + */ +static int __write_once noallocl2; +static int __init set_noallocl2(char *str) +{ + noallocl2 = 1; + return 0; +} +early_param("noallocl2", set_noallocl2); + +#else + +#define noallocl2 0 + +#endif + +/* Provide no-op versions of these routines to keep flush_remote() cleaner. */ +#define mark_caches_evicted_start() 0 +#define mark_caches_evicted_finish(mask, timestamp) do {} while (0) + + +/* + * Update the irq_stat for cpus that we are going to interrupt + * with TLB or cache flushes. Also handle removing dataplane cpus + * from the TLB flush set, and setting dataplane_tlb_state instead. + */ +static void hv_flush_update(const struct cpumask *cache_cpumask, + struct cpumask *tlb_cpumask, + unsigned long tlb_va, unsigned long tlb_length, + HV_Remote_ASID *asids, int asidcount) +{ + struct cpumask mask; + int i, cpu; + + cpumask_clear(&mask); + if (cache_cpumask) + cpumask_or(&mask, &mask, cache_cpumask); + if (tlb_cpumask && tlb_length) { + cpumask_or(&mask, &mask, tlb_cpumask); + } + + for (i = 0; i < asidcount; ++i) + cpumask_set_cpu(asids[i].y * smp_width + asids[i].x, &mask); + + /* + * Don't bother to update atomically; losing a count + * here is not that critical. + */ + for_each_cpu(cpu, &mask) + ++per_cpu(irq_stat, cpu).irq_hv_flush_count; +} + +/* + * This wrapper function around hv_flush_remote() does several things: + * + * - Provides a return value error-checking panic path, since + * there's never any good reason for hv_flush_remote() to fail. + * - Accepts a 32-bit PFN rather than a 64-bit PA, which generally + * is the type that Linux wants to pass around anyway. + * - Centralizes the mark_caches_evicted() handling. + * - Canonicalizes that lengths of zero make cpumasks NULL. + * - Handles deferring TLB flushes for dataplane tiles. + * - Tracks remote interrupts in the per-cpu irq_cpustat_t. + * + * Note that we have to wait until the cache flush completes before + * updating the per-cpu last_cache_flush word, since otherwise another + * concurrent flush can race, conclude the flush has already + * completed, and start to use the page while it's still dirty + * remotely (running concurrently with the actual evict, presumably). + */ +void flush_remote(unsigned long cache_pfn, unsigned long cache_control, + const struct cpumask *cache_cpumask_orig, + HV_VirtAddr tlb_va, unsigned long tlb_length, + unsigned long tlb_pgsize, + const struct cpumask *tlb_cpumask_orig, + HV_Remote_ASID *asids, int asidcount) +{ + int rc; + int timestamp = 0; /* happy compiler */ + struct cpumask cache_cpumask_copy, tlb_cpumask_copy; + struct cpumask *cache_cpumask, *tlb_cpumask; + HV_PhysAddr cache_pa; + char cache_buf[NR_CPUS*5], tlb_buf[NR_CPUS*5]; + + mb(); /* provided just to simplify "magic hypervisor" mode */ + + /* + * Canonicalize and copy the cpumasks. + */ + if (cache_cpumask_orig && cache_control) { + cpumask_copy(&cache_cpumask_copy, cache_cpumask_orig); + cache_cpumask = &cache_cpumask_copy; + } else { + cpumask_clear(&cache_cpumask_copy); + cache_cpumask = NULL; + } + if (cache_cpumask == NULL) + cache_control = 0; + if (tlb_cpumask_orig && tlb_length) { + cpumask_copy(&tlb_cpumask_copy, tlb_cpumask_orig); + tlb_cpumask = &tlb_cpumask_copy; + } else { + cpumask_clear(&tlb_cpumask_copy); + tlb_cpumask = NULL; + } + + hv_flush_update(cache_cpumask, tlb_cpumask, tlb_va, tlb_length, + asids, asidcount); + cache_pa = (HV_PhysAddr)cache_pfn << PAGE_SHIFT; + if (cache_control & HV_FLUSH_EVICT_L2) + timestamp = mark_caches_evicted_start(); + rc = hv_flush_remote(cache_pa, cache_control, + cpumask_bits(cache_cpumask), + tlb_va, tlb_length, tlb_pgsize, + cpumask_bits(tlb_cpumask), + asids, asidcount); + if (cache_control & HV_FLUSH_EVICT_L2) + mark_caches_evicted_finish(cache_cpumask, timestamp); + if (rc == 0) + return; + cpumask_scnprintf(cache_buf, sizeof(cache_buf), &cache_cpumask_copy); + cpumask_scnprintf(tlb_buf, sizeof(tlb_buf), &tlb_cpumask_copy); + + pr_err("hv_flush_remote(%#llx, %#lx, %p [%s]," + " %#lx, %#lx, %#lx, %p [%s], %p, %d) = %d\n", + cache_pa, cache_control, cache_cpumask, cache_buf, + (unsigned long)tlb_va, tlb_length, tlb_pgsize, + tlb_cpumask, tlb_buf, + asids, asidcount, rc); + panic("Unsafe to continue."); +} + +void flush_remote_page(struct page *page, int order) +{ + int i, pages = (1 << order); + for (i = 0; i < pages; ++i, ++page) { + void *p = kmap_atomic(page); + int hfh = 0; + int home = page_home(page); +#if CHIP_HAS_CBOX_HOME_MAP() + if (home == PAGE_HOME_HASH) + hfh = 1; + else +#endif + BUG_ON(home < 0 || home >= NR_CPUS); + finv_buffer_remote(p, PAGE_SIZE, hfh); + kunmap_atomic(p); + } +} + +void homecache_evict(const struct cpumask *mask) +{ + flush_remote(0, HV_FLUSH_EVICT_L2, mask, 0, 0, 0, NULL, NULL, 0); +} + +/* + * Return a mask of the cpus whose caches currently own these pages. + * The return value is whether the pages are all coherently cached + * (i.e. none are immutable, incoherent, or uncached). + */ +static int homecache_mask(struct page *page, int pages, + struct cpumask *home_mask) +{ + int i; + int cached_coherently = 1; + cpumask_clear(home_mask); + for (i = 0; i < pages; ++i) { + int home = page_home(&page[i]); + if (home == PAGE_HOME_IMMUTABLE || + home == PAGE_HOME_INCOHERENT) { + cpumask_copy(home_mask, cpu_possible_mask); + return 0; + } +#if CHIP_HAS_CBOX_HOME_MAP() + if (home == PAGE_HOME_HASH) { + cpumask_or(home_mask, home_mask, &hash_for_home_map); + continue; + } +#endif + if (home == PAGE_HOME_UNCACHED) { + cached_coherently = 0; + continue; + } + BUG_ON(home < 0 || home >= NR_CPUS); + cpumask_set_cpu(home, home_mask); + } + return cached_coherently; +} + +/* + * Return the passed length, or zero if it's long enough that we + * believe we should evict the whole L2 cache. + */ +static unsigned long cache_flush_length(unsigned long length) +{ + return (length >= CHIP_L2_CACHE_SIZE()) ? HV_FLUSH_EVICT_L2 : length; +} + +/* Flush a page out of whatever cache(s) it is in. */ +void homecache_flush_cache(struct page *page, int order) +{ + int pages = 1 << order; + int length = cache_flush_length(pages * PAGE_SIZE); + unsigned long pfn = page_to_pfn(page); + struct cpumask home_mask; + + homecache_mask(page, pages, &home_mask); + flush_remote(pfn, length, &home_mask, 0, 0, 0, NULL, NULL, 0); + sim_validate_lines_evicted(PFN_PHYS(pfn), pages * PAGE_SIZE); +} + + +/* Report the home corresponding to a given PTE. */ +static int pte_to_home(pte_t pte) +{ + if (hv_pte_get_nc(pte)) + return PAGE_HOME_IMMUTABLE; + switch (hv_pte_get_mode(pte)) { + case HV_PTE_MODE_CACHE_TILE_L3: + return get_remote_cache_cpu(pte); + case HV_PTE_MODE_CACHE_NO_L3: + return PAGE_HOME_INCOHERENT; + case HV_PTE_MODE_UNCACHED: + return PAGE_HOME_UNCACHED; +#if CHIP_HAS_CBOX_HOME_MAP() + case HV_PTE_MODE_CACHE_HASH_L3: + return PAGE_HOME_HASH; +#endif + } + panic("Bad PTE %#llx\n", pte.val); +} + +/* Update the home of a PTE if necessary (can also be used for a pgprot_t). */ +pte_t pte_set_home(pte_t pte, int home) +{ + /* Check for non-linear file mapping "PTEs" and pass them through. */ + if (pte_file(pte)) + return pte; + +#if CHIP_HAS_MMIO() + /* Check for MMIO mappings and pass them through. */ + if (hv_pte_get_mode(pte) == HV_PTE_MODE_MMIO) + return pte; +#endif + + + /* + * Only immutable pages get NC mappings. If we have a + * non-coherent PTE, but the underlying page is not + * immutable, it's likely the result of a forced + * caching setting running up against ptrace setting + * the page to be writable underneath. In this case, + * just keep the PTE coherent. + */ + if (hv_pte_get_nc(pte) && home != PAGE_HOME_IMMUTABLE) { + pte = hv_pte_clear_nc(pte); + pr_err("non-immutable page incoherently referenced: %#llx\n", + pte.val); + } + + switch (home) { + + case PAGE_HOME_UNCACHED: + pte = hv_pte_set_mode(pte, HV_PTE_MODE_UNCACHED); + break; + + case PAGE_HOME_INCOHERENT: + pte = hv_pte_set_mode(pte, HV_PTE_MODE_CACHE_NO_L3); + break; + + case PAGE_HOME_IMMUTABLE: + /* + * We could home this page anywhere, since it's immutable, + * but by default just home it to follow "hash_default". + */ + BUG_ON(hv_pte_get_writable(pte)); + if (pte_get_forcecache(pte)) { + /* Upgrade "force any cpu" to "No L3" for immutable. */ + if (hv_pte_get_mode(pte) == HV_PTE_MODE_CACHE_TILE_L3 + && pte_get_anyhome(pte)) { + pte = hv_pte_set_mode(pte, + HV_PTE_MODE_CACHE_NO_L3); + } + } else +#if CHIP_HAS_CBOX_HOME_MAP() + if (hash_default) + pte = hv_pte_set_mode(pte, HV_PTE_MODE_CACHE_HASH_L3); + else +#endif + pte = hv_pte_set_mode(pte, HV_PTE_MODE_CACHE_NO_L3); + pte = hv_pte_set_nc(pte); + break; + +#if CHIP_HAS_CBOX_HOME_MAP() + case PAGE_HOME_HASH: + pte = hv_pte_set_mode(pte, HV_PTE_MODE_CACHE_HASH_L3); + break; +#endif + + default: + BUG_ON(home < 0 || home >= NR_CPUS || + !cpu_is_valid_lotar(home)); + pte = hv_pte_set_mode(pte, HV_PTE_MODE_CACHE_TILE_L3); + pte = set_remote_cache_cpu(pte, home); + break; + } + +#if CHIP_HAS_NC_AND_NOALLOC_BITS() + if (noallocl2) + pte = hv_pte_set_no_alloc_l2(pte); + + /* Simplify "no local and no l3" to "uncached" */ + if (hv_pte_get_no_alloc_l2(pte) && hv_pte_get_no_alloc_l1(pte) && + hv_pte_get_mode(pte) == HV_PTE_MODE_CACHE_NO_L3) { + pte = hv_pte_set_mode(pte, HV_PTE_MODE_UNCACHED); + } +#endif + + /* Checking this case here gives a better panic than from the hv. */ + BUG_ON(hv_pte_get_mode(pte) == 0); + + return pte; +} +EXPORT_SYMBOL(pte_set_home); + +/* + * The routines in this section are the "static" versions of the normal + * dynamic homecaching routines; they just set the home cache + * of a kernel page once, and require a full-chip cache/TLB flush, + * so they're not suitable for anything but infrequent use. + */ + +#if CHIP_HAS_CBOX_HOME_MAP() +static inline int initial_page_home(void) { return PAGE_HOME_HASH; } +#else +static inline int initial_page_home(void) { return 0; } +#endif + +int page_home(struct page *page) +{ + if (PageHighMem(page)) { + return initial_page_home(); + } else { + unsigned long kva = (unsigned long)page_address(page); + return pte_to_home(*virt_to_pte(NULL, kva)); + } +} +EXPORT_SYMBOL(page_home); + +void homecache_change_page_home(struct page *page, int order, int home) +{ + int i, pages = (1 << order); + unsigned long kva; + + BUG_ON(PageHighMem(page)); + BUG_ON(page_count(page) > 1); + BUG_ON(page_mapcount(page) != 0); + kva = (unsigned long) page_address(page); + flush_remote(0, HV_FLUSH_EVICT_L2, &cpu_cacheable_map, + kva, pages * PAGE_SIZE, PAGE_SIZE, cpu_online_mask, + NULL, 0); + + for (i = 0; i < pages; ++i, kva += PAGE_SIZE) { + pte_t *ptep = virt_to_pte(NULL, kva); + pte_t pteval = *ptep; + BUG_ON(!pte_present(pteval) || pte_huge(pteval)); + __set_pte(ptep, pte_set_home(pteval, home)); + } +} + +struct page *homecache_alloc_pages(gfp_t gfp_mask, + unsigned int order, int home) +{ + struct page *page; + BUG_ON(gfp_mask & __GFP_HIGHMEM); /* must be lowmem */ + page = alloc_pages(gfp_mask, order); + if (page) + homecache_change_page_home(page, order, home); + return page; +} +EXPORT_SYMBOL(homecache_alloc_pages); + +struct page *homecache_alloc_pages_node(int nid, gfp_t gfp_mask, + unsigned int order, int home) +{ + struct page *page; + BUG_ON(gfp_mask & __GFP_HIGHMEM); /* must be lowmem */ + page = alloc_pages_node(nid, gfp_mask, order); + if (page) + homecache_change_page_home(page, order, home); + return page; +} + +void homecache_free_pages(unsigned long addr, unsigned int order) +{ + struct page *page; + + if (addr == 0) + return; + + VM_BUG_ON(!virt_addr_valid((void *)addr)); + page = virt_to_page((void *)addr); + if (put_page_testzero(page)) { + homecache_change_page_home(page, order, initial_page_home()); + if (order == 0) { + free_hot_cold_page(page, 0); + } else { + init_page_count(page); + __free_pages(page, order); + } + } +} diff --git a/arch/tile/mm/hugetlbpage.c b/arch/tile/mm/hugetlbpage.c new file mode 100644 index 00000000..42cfcba4 --- /dev/null +++ b/arch/tile/mm/hugetlbpage.c @@ -0,0 +1,342 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * TILE Huge TLB Page Support for Kernel. + * Taken from i386 hugetlb implementation: + * Copyright (C) 2002, Rohit Seth <rohit.seth@intel.com> + */ + +#include <linux/init.h> +#include <linux/fs.h> +#include <linux/mm.h> +#include <linux/hugetlb.h> +#include <linux/pagemap.h> +#include <linux/slab.h> +#include <linux/err.h> +#include <linux/sysctl.h> +#include <linux/mman.h> +#include <asm/tlb.h> +#include <asm/tlbflush.h> + +pte_t *huge_pte_alloc(struct mm_struct *mm, + unsigned long addr, unsigned long sz) +{ + pgd_t *pgd; + pud_t *pud; + pte_t *pte = NULL; + + /* We do not yet support multiple huge page sizes. */ + BUG_ON(sz != PMD_SIZE); + + pgd = pgd_offset(mm, addr); + pud = pud_alloc(mm, pgd, addr); + if (pud) + pte = (pte_t *) pmd_alloc(mm, pud, addr); + BUG_ON(pte && !pte_none(*pte) && !pte_huge(*pte)); + + return pte; +} + +pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd = NULL; + + pgd = pgd_offset(mm, addr); + if (pgd_present(*pgd)) { + pud = pud_offset(pgd, addr); + if (pud_present(*pud)) + pmd = pmd_offset(pud, addr); + } + return (pte_t *) pmd; +} + +#ifdef HUGETLB_TEST +struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address, + int write) +{ + unsigned long start = address; + int length = 1; + int nr; + struct page *page; + struct vm_area_struct *vma; + + vma = find_vma(mm, addr); + if (!vma || !is_vm_hugetlb_page(vma)) + return ERR_PTR(-EINVAL); + + pte = huge_pte_offset(mm, address); + + /* hugetlb should be locked, and hence, prefaulted */ + WARN_ON(!pte || pte_none(*pte)); + + page = &pte_page(*pte)[vpfn % (HPAGE_SIZE/PAGE_SIZE)]; + + WARN_ON(!PageHead(page)); + + return page; +} + +int pmd_huge(pmd_t pmd) +{ + return 0; +} + +int pud_huge(pud_t pud) +{ + return 0; +} + +struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, + pmd_t *pmd, int write) +{ + return NULL; +} + +#else + +struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address, + int write) +{ + return ERR_PTR(-EINVAL); +} + +int pmd_huge(pmd_t pmd) +{ + return !!(pmd_val(pmd) & _PAGE_HUGE_PAGE); +} + +int pud_huge(pud_t pud) +{ + return !!(pud_val(pud) & _PAGE_HUGE_PAGE); +} + +struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, + pmd_t *pmd, int write) +{ + struct page *page; + + page = pte_page(*(pte_t *)pmd); + if (page) + page += ((address & ~PMD_MASK) >> PAGE_SHIFT); + return page; +} + +struct page *follow_huge_pud(struct mm_struct *mm, unsigned long address, + pud_t *pud, int write) +{ + struct page *page; + + page = pte_page(*(pte_t *)pud); + if (page) + page += ((address & ~PUD_MASK) >> PAGE_SHIFT); + return page; +} + +int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) +{ + return 0; +} + +#endif + +#ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA +static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file, + unsigned long addr, unsigned long len, + unsigned long pgoff, unsigned long flags) +{ + struct hstate *h = hstate_file(file); + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + unsigned long start_addr; + + if (len > mm->cached_hole_size) { + start_addr = mm->free_area_cache; + } else { + start_addr = TASK_UNMAPPED_BASE; + mm->cached_hole_size = 0; + } + +full_search: + addr = ALIGN(start_addr, huge_page_size(h)); + + for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { + /* At this point: (!vma || addr < vma->vm_end). */ + if (TASK_SIZE - len < addr) { + /* + * Start a new search - just in case we missed + * some holes. + */ + if (start_addr != TASK_UNMAPPED_BASE) { + start_addr = TASK_UNMAPPED_BASE; + mm->cached_hole_size = 0; + goto full_search; + } + return -ENOMEM; + } + if (!vma || addr + len <= vma->vm_start) { + mm->free_area_cache = addr + len; + return addr; + } + if (addr + mm->cached_hole_size < vma->vm_start) + mm->cached_hole_size = vma->vm_start - addr; + addr = ALIGN(vma->vm_end, huge_page_size(h)); + } +} + +static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file, + unsigned long addr0, unsigned long len, + unsigned long pgoff, unsigned long flags) +{ + struct hstate *h = hstate_file(file); + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma, *prev_vma; + unsigned long base = mm->mmap_base, addr = addr0; + unsigned long largest_hole = mm->cached_hole_size; + int first_time = 1; + + /* don't allow allocations above current base */ + if (mm->free_area_cache > base) + mm->free_area_cache = base; + + if (len <= largest_hole) { + largest_hole = 0; + mm->free_area_cache = base; + } +try_again: + /* make sure it can fit in the remaining address space */ + if (mm->free_area_cache < len) + goto fail; + + /* either no address requested or can't fit in requested address hole */ + addr = (mm->free_area_cache - len) & huge_page_mask(h); + do { + /* + * Lookup failure means no vma is above this address, + * i.e. return with success: + */ + vma = find_vma_prev(mm, addr, &prev_vma); + if (!vma) { + return addr; + break; + } + + /* + * new region fits between prev_vma->vm_end and + * vma->vm_start, use it: + */ + if (addr + len <= vma->vm_start && + (!prev_vma || (addr >= prev_vma->vm_end))) { + /* remember the address as a hint for next time */ + mm->cached_hole_size = largest_hole; + mm->free_area_cache = addr; + return addr; + } else { + /* pull free_area_cache down to the first hole */ + if (mm->free_area_cache == vma->vm_end) { + mm->free_area_cache = vma->vm_start; + mm->cached_hole_size = largest_hole; + } + } + + /* remember the largest hole we saw so far */ + if (addr + largest_hole < vma->vm_start) + largest_hole = vma->vm_start - addr; + + /* try just below the current vma->vm_start */ + addr = (vma->vm_start - len) & huge_page_mask(h); + + } while (len <= vma->vm_start); + +fail: + /* + * if hint left us with no space for the requested + * mapping then try again: + */ + if (first_time) { + mm->free_area_cache = base; + largest_hole = 0; + first_time = 0; + goto try_again; + } + /* + * A failed mmap() very likely causes application failure, + * so fall back to the bottom-up function here. This scenario + * can happen with large stack limits and large mmap() + * allocations. + */ + mm->free_area_cache = TASK_UNMAPPED_BASE; + mm->cached_hole_size = ~0UL; + addr = hugetlb_get_unmapped_area_bottomup(file, addr0, + len, pgoff, flags); + + /* + * Restore the topdown base: + */ + mm->free_area_cache = base; + mm->cached_hole_size = ~0UL; + + return addr; +} + +unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, + unsigned long len, unsigned long pgoff, unsigned long flags) +{ + struct hstate *h = hstate_file(file); + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + + if (len & ~huge_page_mask(h)) + return -EINVAL; + if (len > TASK_SIZE) + return -ENOMEM; + + if (flags & MAP_FIXED) { + if (prepare_hugepage_range(file, addr, len)) + return -EINVAL; + return addr; + } + + if (addr) { + addr = ALIGN(addr, huge_page_size(h)); + vma = find_vma(mm, addr); + if (TASK_SIZE - len >= addr && + (!vma || addr + len <= vma->vm_start)) + return addr; + } + if (current->mm->get_unmapped_area == arch_get_unmapped_area) + return hugetlb_get_unmapped_area_bottomup(file, addr, len, + pgoff, flags); + else + return hugetlb_get_unmapped_area_topdown(file, addr, len, + pgoff, flags); +} + +static __init int setup_hugepagesz(char *opt) +{ + unsigned long ps = memparse(opt, &opt); + if (ps == PMD_SIZE) { + hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT); + } else if (ps == PUD_SIZE) { + hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT); + } else { + pr_err("hugepagesz: Unsupported page size %lu M\n", + ps >> 20); + return 0; + } + return 1; +} +__setup("hugepagesz=", setup_hugepagesz); + +#endif /*HAVE_ARCH_HUGETLB_UNMAPPED_AREA*/ diff --git a/arch/tile/mm/init.c b/arch/tile/mm/init.c new file mode 100644 index 00000000..6a9d20dd --- /dev/null +++ b/arch/tile/mm/init.c @@ -0,0 +1,1085 @@ +/* + * Copyright (C) 1995 Linus Torvalds + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#include <linux/module.h> +#include <linux/signal.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/types.h> +#include <linux/ptrace.h> +#include <linux/mman.h> +#include <linux/mm.h> +#include <linux/hugetlb.h> +#include <linux/swap.h> +#include <linux/smp.h> +#include <linux/init.h> +#include <linux/highmem.h> +#include <linux/pagemap.h> +#include <linux/poison.h> +#include <linux/bootmem.h> +#include <linux/slab.h> +#include <linux/proc_fs.h> +#include <linux/efi.h> +#include <linux/memory_hotplug.h> +#include <linux/uaccess.h> +#include <asm/mmu_context.h> +#include <asm/processor.h> +#include <asm/pgtable.h> +#include <asm/pgalloc.h> +#include <asm/dma.h> +#include <asm/fixmap.h> +#include <asm/tlb.h> +#include <asm/tlbflush.h> +#include <asm/sections.h> +#include <asm/setup.h> +#include <asm/homecache.h> +#include <hv/hypervisor.h> +#include <arch/chip.h> + +#include "migrate.h" + +#define clear_pgd(pmdptr) (*(pmdptr) = hv_pte(0)) + +#ifndef __tilegx__ +unsigned long VMALLOC_RESERVE = CONFIG_VMALLOC_RESERVE; +EXPORT_SYMBOL(VMALLOC_RESERVE); +#endif + +/* Create an L2 page table */ +static pte_t * __init alloc_pte(void) +{ + return __alloc_bootmem(L2_KERNEL_PGTABLE_SIZE, HV_PAGE_TABLE_ALIGN, 0); +} + +/* + * L2 page tables per controller. We allocate these all at once from + * the bootmem allocator and store them here. This saves on kernel L2 + * page table memory, compared to allocating a full 64K page per L2 + * page table, and also means that in cases where we use huge pages, + * we are guaranteed to later be able to shatter those huge pages and + * switch to using these page tables instead, without requiring + * further allocation. Each l2_ptes[] entry points to the first page + * table for the first hugepage-size piece of memory on the + * controller; other page tables are just indexed directly, i.e. the + * L2 page tables are contiguous in memory for each controller. + */ +static pte_t *l2_ptes[MAX_NUMNODES]; +static int num_l2_ptes[MAX_NUMNODES]; + +static void init_prealloc_ptes(int node, int pages) +{ + BUG_ON(pages & (HV_L2_ENTRIES-1)); + if (pages) { + num_l2_ptes[node] = pages; + l2_ptes[node] = __alloc_bootmem(pages * sizeof(pte_t), + HV_PAGE_TABLE_ALIGN, 0); + } +} + +pte_t *get_prealloc_pte(unsigned long pfn) +{ + int node = pfn_to_nid(pfn); + pfn &= ~(-1UL << (NR_PA_HIGHBIT_SHIFT - PAGE_SHIFT)); + BUG_ON(node >= MAX_NUMNODES); + BUG_ON(pfn >= num_l2_ptes[node]); + return &l2_ptes[node][pfn]; +} + +/* + * What caching do we expect pages from the heap to have when + * they are allocated during bootup? (Once we've installed the + * "real" swapper_pg_dir.) + */ +static int initial_heap_home(void) +{ +#if CHIP_HAS_CBOX_HOME_MAP() + if (hash_default) + return PAGE_HOME_HASH; +#endif + return smp_processor_id(); +} + +/* + * Place a pointer to an L2 page table in a middle page + * directory entry. + */ +static void __init assign_pte(pmd_t *pmd, pte_t *page_table) +{ + phys_addr_t pa = __pa(page_table); + unsigned long l2_ptfn = pa >> HV_LOG2_PAGE_TABLE_ALIGN; + pte_t pteval = hv_pte_set_ptfn(__pgprot(_PAGE_TABLE), l2_ptfn); + BUG_ON((pa & (HV_PAGE_TABLE_ALIGN-1)) != 0); + pteval = pte_set_home(pteval, initial_heap_home()); + *(pte_t *)pmd = pteval; + if (page_table != (pte_t *)pmd_page_vaddr(*pmd)) + BUG(); +} + +#ifdef __tilegx__ + +#if HV_L1_SIZE != HV_L2_SIZE +# error Rework assumption that L1 and L2 page tables are same size. +#endif + +/* Since pmd_t arrays and pte_t arrays are the same size, just use casts. */ +static inline pmd_t *alloc_pmd(void) +{ + return (pmd_t *)alloc_pte(); +} + +static inline void assign_pmd(pud_t *pud, pmd_t *pmd) +{ + assign_pte((pmd_t *)pud, (pte_t *)pmd); +} + +#endif /* __tilegx__ */ + +/* Replace the given pmd with a full PTE table. */ +void __init shatter_pmd(pmd_t *pmd) +{ + pte_t *pte = get_prealloc_pte(pte_pfn(*(pte_t *)pmd)); + assign_pte(pmd, pte); +} + +#ifdef CONFIG_HIGHMEM +/* + * This function initializes a certain range of kernel virtual memory + * with new bootmem page tables, everywhere page tables are missing in + * the given range. + */ + +/* + * NOTE: The pagetables are allocated contiguous on the physical space + * so we can cache the place of the first one and move around without + * checking the pgd every time. + */ +static void __init page_table_range_init(unsigned long start, + unsigned long end, pgd_t *pgd_base) +{ + pgd_t *pgd; + int pgd_idx; + unsigned long vaddr; + + vaddr = start; + pgd_idx = pgd_index(vaddr); + pgd = pgd_base + pgd_idx; + + for ( ; (pgd_idx < PTRS_PER_PGD) && (vaddr != end); pgd++, pgd_idx++) { + pmd_t *pmd = pmd_offset(pud_offset(pgd, vaddr), vaddr); + if (pmd_none(*pmd)) + assign_pte(pmd, alloc_pte()); + vaddr += PMD_SIZE; + } +} +#endif /* CONFIG_HIGHMEM */ + + +#if CHIP_HAS_CBOX_HOME_MAP() + +static int __initdata ktext_hash = 1; /* .text pages */ +static int __initdata kdata_hash = 1; /* .data and .bss pages */ +int __write_once hash_default = 1; /* kernel allocator pages */ +EXPORT_SYMBOL(hash_default); +int __write_once kstack_hash = 1; /* if no homecaching, use h4h */ +#endif /* CHIP_HAS_CBOX_HOME_MAP */ + +/* + * CPUs to use to for striping the pages of kernel data. If hash-for-home + * is available, this is only relevant if kcache_hash sets up the + * .data and .bss to be page-homed, and we don't want the default mode + * of using the full set of kernel cpus for the striping. + */ +static __initdata struct cpumask kdata_mask; +static __initdata int kdata_arg_seen; + +int __write_once kdata_huge; /* if no homecaching, small pages */ + + +/* Combine a generic pgprot_t with cache home to get a cache-aware pgprot. */ +static pgprot_t __init construct_pgprot(pgprot_t prot, int home) +{ + prot = pte_set_home(prot, home); +#if CHIP_HAS_CBOX_HOME_MAP() + if (home == PAGE_HOME_IMMUTABLE) { + if (ktext_hash) + prot = hv_pte_set_mode(prot, HV_PTE_MODE_CACHE_HASH_L3); + else + prot = hv_pte_set_mode(prot, HV_PTE_MODE_CACHE_NO_L3); + } +#endif + return prot; +} + +/* + * For a given kernel data VA, how should it be cached? + * We return the complete pgprot_t with caching bits set. + */ +static pgprot_t __init init_pgprot(ulong address) +{ + int cpu; + unsigned long page; + enum { CODE_DELTA = MEM_SV_INTRPT - PAGE_OFFSET }; + +#if CHIP_HAS_CBOX_HOME_MAP() + /* For kdata=huge, everything is just hash-for-home. */ + if (kdata_huge) + return construct_pgprot(PAGE_KERNEL, PAGE_HOME_HASH); +#endif + + /* We map the aliased pages of permanent text inaccessible. */ + if (address < (ulong) _sinittext - CODE_DELTA) + return PAGE_NONE; + + /* + * We map read-only data non-coherent for performance. We could + * use neighborhood caching on TILE64, but it's not clear it's a win. + */ + if ((address >= (ulong) __start_rodata && + address < (ulong) __end_rodata) || + address == (ulong) empty_zero_page) { + return construct_pgprot(PAGE_KERNEL_RO, PAGE_HOME_IMMUTABLE); + } + +#ifndef __tilegx__ +#if !ATOMIC_LOCKS_FOUND_VIA_TABLE() + /* Force the atomic_locks[] array page to be hash-for-home. */ + if (address == (ulong) atomic_locks) + return construct_pgprot(PAGE_KERNEL, PAGE_HOME_HASH); +#endif +#endif + + /* + * Everything else that isn't data or bss is heap, so mark it + * with the initial heap home (hash-for-home, or this cpu). This + * includes any addresses after the loaded image and any address before + * _einitdata, since we already captured the case of text before + * _sinittext, and __pa(einittext) is approximately __pa(sinitdata). + * + * All the LOWMEM pages that we mark this way will get their + * struct page homecache properly marked later, in set_page_homes(). + * The HIGHMEM pages we leave with a default zero for their + * homes, but with a zero free_time we don't have to actually + * do a flush action the first time we use them, either. + */ + if (address >= (ulong) _end || address < (ulong) _einitdata) + return construct_pgprot(PAGE_KERNEL, initial_heap_home()); + +#if CHIP_HAS_CBOX_HOME_MAP() + /* Use hash-for-home if requested for data/bss. */ + if (kdata_hash) + return construct_pgprot(PAGE_KERNEL, PAGE_HOME_HASH); +#endif + + /* + * Make the w1data homed like heap to start with, to avoid + * making it part of the page-striped data area when we're just + * going to convert it to read-only soon anyway. + */ + if (address >= (ulong)__w1data_begin && address < (ulong)__w1data_end) + return construct_pgprot(PAGE_KERNEL, initial_heap_home()); + + /* + * Otherwise we just hand out consecutive cpus. To avoid + * requiring this function to hold state, we just walk forward from + * _sdata by PAGE_SIZE, skipping the readonly and init data, to reach + * the requested address, while walking cpu home around kdata_mask. + * This is typically no more than a dozen or so iterations. + */ + page = (((ulong)__w1data_end) + PAGE_SIZE - 1) & PAGE_MASK; + BUG_ON(address < page || address >= (ulong)_end); + cpu = cpumask_first(&kdata_mask); + for (; page < address; page += PAGE_SIZE) { + if (page >= (ulong)&init_thread_union && + page < (ulong)&init_thread_union + THREAD_SIZE) + continue; + if (page == (ulong)empty_zero_page) + continue; +#ifndef __tilegx__ +#if !ATOMIC_LOCKS_FOUND_VIA_TABLE() + if (page == (ulong)atomic_locks) + continue; +#endif +#endif + cpu = cpumask_next(cpu, &kdata_mask); + if (cpu == NR_CPUS) + cpu = cpumask_first(&kdata_mask); + } + return construct_pgprot(PAGE_KERNEL, cpu); +} + +/* + * This function sets up how we cache the kernel text. If we have + * hash-for-home support, normally that is used instead (see the + * kcache_hash boot flag for more information). But if we end up + * using a page-based caching technique, this option sets up the + * details of that. In addition, the "ktext=nocache" option may + * always be used to disable local caching of text pages, if desired. + */ + +static int __initdata ktext_arg_seen; +static int __initdata ktext_small; +static int __initdata ktext_local; +static int __initdata ktext_all; +static int __initdata ktext_nondataplane; +static int __initdata ktext_nocache; +static struct cpumask __initdata ktext_mask; + +static int __init setup_ktext(char *str) +{ + if (str == NULL) + return -EINVAL; + + /* If you have a leading "nocache", turn off ktext caching */ + if (strncmp(str, "nocache", 7) == 0) { + ktext_nocache = 1; + pr_info("ktext: disabling local caching of kernel text\n"); + str += 7; + if (*str == ',') + ++str; + if (*str == '\0') + return 0; + } + + ktext_arg_seen = 1; + + /* Default setting on Tile64: use a huge page */ + if (strcmp(str, "huge") == 0) + pr_info("ktext: using one huge locally cached page\n"); + + /* Pay TLB cost but get no cache benefit: cache small pages locally */ + else if (strcmp(str, "local") == 0) { + ktext_small = 1; + ktext_local = 1; + pr_info("ktext: using small pages with local caching\n"); + } + + /* Neighborhood cache ktext pages on all cpus. */ + else if (strcmp(str, "all") == 0) { + ktext_small = 1; + ktext_all = 1; + pr_info("ktext: using maximal caching neighborhood\n"); + } + + + /* Neighborhood ktext pages on specified mask */ + else if (cpulist_parse(str, &ktext_mask) == 0) { + char buf[NR_CPUS * 5]; + cpulist_scnprintf(buf, sizeof(buf), &ktext_mask); + if (cpumask_weight(&ktext_mask) > 1) { + ktext_small = 1; + pr_info("ktext: using caching neighborhood %s " + "with small pages\n", buf); + } else { + pr_info("ktext: caching on cpu %s with one huge page\n", + buf); + } + } + + else if (*str) + return -EINVAL; + + return 0; +} + +early_param("ktext", setup_ktext); + + +static inline pgprot_t ktext_set_nocache(pgprot_t prot) +{ + if (!ktext_nocache) + prot = hv_pte_set_nc(prot); +#if CHIP_HAS_NC_AND_NOALLOC_BITS() + else + prot = hv_pte_set_no_alloc_l2(prot); +#endif + return prot; +} + +#ifndef __tilegx__ +static pmd_t *__init get_pmd(pgd_t pgtables[], unsigned long va) +{ + return pmd_offset(pud_offset(&pgtables[pgd_index(va)], va), va); +} +#else +static pmd_t *__init get_pmd(pgd_t pgtables[], unsigned long va) +{ + pud_t *pud = pud_offset(&pgtables[pgd_index(va)], va); + if (pud_none(*pud)) + assign_pmd(pud, alloc_pmd()); + return pmd_offset(pud, va); +} +#endif + +/* Temporary page table we use for staging. */ +static pgd_t pgtables[PTRS_PER_PGD] + __attribute__((aligned(HV_PAGE_TABLE_ALIGN))); + +/* + * This maps the physical memory to kernel virtual address space, a total + * of max_low_pfn pages, by creating page tables starting from address + * PAGE_OFFSET. + * + * This routine transitions us from using a set of compiled-in large + * pages to using some more precise caching, including removing access + * to code pages mapped at PAGE_OFFSET (executed only at MEM_SV_START) + * marking read-only data as locally cacheable, striping the remaining + * .data and .bss across all the available tiles, and removing access + * to pages above the top of RAM (thus ensuring a page fault from a bad + * virtual address rather than a hypervisor shoot down for accessing + * memory outside the assigned limits). + */ +static void __init kernel_physical_mapping_init(pgd_t *pgd_base) +{ + unsigned long address, pfn; + pmd_t *pmd; + pte_t *pte; + int pte_ofs; + const struct cpumask *my_cpu_mask = cpumask_of(smp_processor_id()); + struct cpumask kstripe_mask; + int rc, i; + +#if CHIP_HAS_CBOX_HOME_MAP() + if (ktext_arg_seen && ktext_hash) { + pr_warning("warning: \"ktext\" boot argument ignored" + " if \"kcache_hash\" sets up text hash-for-home\n"); + ktext_small = 0; + } + + if (kdata_arg_seen && kdata_hash) { + pr_warning("warning: \"kdata\" boot argument ignored" + " if \"kcache_hash\" sets up data hash-for-home\n"); + } + + if (kdata_huge && !hash_default) { + pr_warning("warning: disabling \"kdata=huge\"; requires" + " kcache_hash=all or =allbutstack\n"); + kdata_huge = 0; + } +#endif + + /* + * Set up a mask for cpus to use for kernel striping. + * This is normally all cpus, but minus dataplane cpus if any. + * If the dataplane covers the whole chip, we stripe over + * the whole chip too. + */ + cpumask_copy(&kstripe_mask, cpu_possible_mask); + if (!kdata_arg_seen) + kdata_mask = kstripe_mask; + + /* Allocate and fill in L2 page tables */ + for (i = 0; i < MAX_NUMNODES; ++i) { +#ifdef CONFIG_HIGHMEM + unsigned long end_pfn = node_lowmem_end_pfn[i]; +#else + unsigned long end_pfn = node_end_pfn[i]; +#endif + unsigned long end_huge_pfn = 0; + + /* Pre-shatter the last huge page to allow per-cpu pages. */ + if (kdata_huge) + end_huge_pfn = end_pfn - (HPAGE_SIZE >> PAGE_SHIFT); + + pfn = node_start_pfn[i]; + + /* Allocate enough memory to hold L2 page tables for node. */ + init_prealloc_ptes(i, end_pfn - pfn); + + address = (unsigned long) pfn_to_kaddr(pfn); + while (pfn < end_pfn) { + BUG_ON(address & (HPAGE_SIZE-1)); + pmd = get_pmd(pgtables, address); + pte = get_prealloc_pte(pfn); + if (pfn < end_huge_pfn) { + pgprot_t prot = init_pgprot(address); + *(pte_t *)pmd = pte_mkhuge(pfn_pte(pfn, prot)); + for (pte_ofs = 0; pte_ofs < PTRS_PER_PTE; + pfn++, pte_ofs++, address += PAGE_SIZE) + pte[pte_ofs] = pfn_pte(pfn, prot); + } else { + if (kdata_huge) + printk(KERN_DEBUG "pre-shattered huge" + " page at %#lx\n", address); + for (pte_ofs = 0; pte_ofs < PTRS_PER_PTE; + pfn++, pte_ofs++, address += PAGE_SIZE) { + pgprot_t prot = init_pgprot(address); + pte[pte_ofs] = pfn_pte(pfn, prot); + } + assign_pte(pmd, pte); + } + } + } + + /* + * Set or check ktext_map now that we have cpu_possible_mask + * and kstripe_mask to work with. + */ + if (ktext_all) + cpumask_copy(&ktext_mask, cpu_possible_mask); + else if (ktext_nondataplane) + ktext_mask = kstripe_mask; + else if (!cpumask_empty(&ktext_mask)) { + /* Sanity-check any mask that was requested */ + struct cpumask bad; + cpumask_andnot(&bad, &ktext_mask, cpu_possible_mask); + cpumask_and(&ktext_mask, &ktext_mask, cpu_possible_mask); + if (!cpumask_empty(&bad)) { + char buf[NR_CPUS * 5]; + cpulist_scnprintf(buf, sizeof(buf), &bad); + pr_info("ktext: not using unavailable cpus %s\n", buf); + } + if (cpumask_empty(&ktext_mask)) { + pr_warning("ktext: no valid cpus; caching on %d.\n", + smp_processor_id()); + cpumask_copy(&ktext_mask, + cpumask_of(smp_processor_id())); + } + } + + address = MEM_SV_INTRPT; + pmd = get_pmd(pgtables, address); + pfn = 0; /* code starts at PA 0 */ + if (ktext_small) { + /* Allocate an L2 PTE for the kernel text */ + int cpu = 0; + pgprot_t prot = construct_pgprot(PAGE_KERNEL_EXEC, + PAGE_HOME_IMMUTABLE); + + if (ktext_local) { + if (ktext_nocache) + prot = hv_pte_set_mode(prot, + HV_PTE_MODE_UNCACHED); + else + prot = hv_pte_set_mode(prot, + HV_PTE_MODE_CACHE_NO_L3); + } else { + prot = hv_pte_set_mode(prot, + HV_PTE_MODE_CACHE_TILE_L3); + cpu = cpumask_first(&ktext_mask); + + prot = ktext_set_nocache(prot); + } + + BUG_ON(address != (unsigned long)_stext); + pte = NULL; + for (; address < (unsigned long)_einittext; + pfn++, address += PAGE_SIZE) { + pte_ofs = pte_index(address); + if (pte_ofs == 0) { + if (pte) + assign_pte(pmd++, pte); + pte = alloc_pte(); + } + if (!ktext_local) { + prot = set_remote_cache_cpu(prot, cpu); + cpu = cpumask_next(cpu, &ktext_mask); + if (cpu == NR_CPUS) + cpu = cpumask_first(&ktext_mask); + } + pte[pte_ofs] = pfn_pte(pfn, prot); + } + if (pte) + assign_pte(pmd, pte); + } else { + pte_t pteval = pfn_pte(0, PAGE_KERNEL_EXEC); + pteval = pte_mkhuge(pteval); +#if CHIP_HAS_CBOX_HOME_MAP() + if (ktext_hash) { + pteval = hv_pte_set_mode(pteval, + HV_PTE_MODE_CACHE_HASH_L3); + pteval = ktext_set_nocache(pteval); + } else +#endif /* CHIP_HAS_CBOX_HOME_MAP() */ + if (cpumask_weight(&ktext_mask) == 1) { + pteval = set_remote_cache_cpu(pteval, + cpumask_first(&ktext_mask)); + pteval = hv_pte_set_mode(pteval, + HV_PTE_MODE_CACHE_TILE_L3); + pteval = ktext_set_nocache(pteval); + } else if (ktext_nocache) + pteval = hv_pte_set_mode(pteval, + HV_PTE_MODE_UNCACHED); + else + pteval = hv_pte_set_mode(pteval, + HV_PTE_MODE_CACHE_NO_L3); + for (; address < (unsigned long)_einittext; + pfn += PFN_DOWN(HPAGE_SIZE), address += HPAGE_SIZE) + *(pte_t *)(pmd++) = pfn_pte(pfn, pteval); + } + + /* Set swapper_pgprot here so it is flushed to memory right away. */ + swapper_pgprot = init_pgprot((unsigned long)swapper_pg_dir); + + /* + * Since we may be changing the caching of the stack and page + * table itself, we invoke an assembly helper to do the + * following steps: + * + * - flush the cache so we start with an empty slate + * - install pgtables[] as the real page table + * - flush the TLB so the new page table takes effect + */ + rc = flush_and_install_context(__pa(pgtables), + init_pgprot((unsigned long)pgtables), + __get_cpu_var(current_asid), + cpumask_bits(my_cpu_mask)); + BUG_ON(rc != 0); + + /* Copy the page table back to the normal swapper_pg_dir. */ + memcpy(pgd_base, pgtables, sizeof(pgtables)); + __install_page_table(pgd_base, __get_cpu_var(current_asid), + swapper_pgprot); + + /* + * We just read swapper_pgprot and thus brought it into the cache, + * with its new home & caching mode. When we start the other CPUs, + * they're going to reference swapper_pgprot via their initial fake + * VA-is-PA mappings, which cache everything locally. At that + * time, if it's in our cache with a conflicting home, the + * simulator's coherence checker will complain. So, flush it out + * of our cache; we're not going to ever use it again anyway. + */ + __insn_finv(&swapper_pgprot); +} + +/* + * devmem_is_allowed() checks to see if /dev/mem access to a certain address + * is valid. The argument is a physical page number. + * + * On Tile, the only valid things for which we can just hand out unchecked + * PTEs are the kernel code and data. Anything else might change its + * homing with time, and we wouldn't know to adjust the /dev/mem PTEs. + * Note that init_thread_union is released to heap soon after boot, + * so we include it in the init data. + * + * For TILE-Gx, we might want to consider allowing access to PA + * regions corresponding to PCI space, etc. + */ +int devmem_is_allowed(unsigned long pagenr) +{ + return pagenr < kaddr_to_pfn(_end) && + !(pagenr >= kaddr_to_pfn(&init_thread_union) || + pagenr < kaddr_to_pfn(_einitdata)) && + !(pagenr >= kaddr_to_pfn(_sinittext) || + pagenr <= kaddr_to_pfn(_einittext-1)); +} + +#ifdef CONFIG_HIGHMEM +static void __init permanent_kmaps_init(pgd_t *pgd_base) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + unsigned long vaddr; + + vaddr = PKMAP_BASE; + page_table_range_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base); + + pgd = swapper_pg_dir + pgd_index(vaddr); + pud = pud_offset(pgd, vaddr); + pmd = pmd_offset(pud, vaddr); + pte = pte_offset_kernel(pmd, vaddr); + pkmap_page_table = pte; +} +#endif /* CONFIG_HIGHMEM */ + + +static void __init init_free_pfn_range(unsigned long start, unsigned long end) +{ + unsigned long pfn; + struct page *page = pfn_to_page(start); + + for (pfn = start; pfn < end; ) { + /* Optimize by freeing pages in large batches */ + int order = __ffs(pfn); + int count, i; + struct page *p; + + if (order >= MAX_ORDER) + order = MAX_ORDER-1; + count = 1 << order; + while (pfn + count > end) { + count >>= 1; + --order; + } + for (p = page, i = 0; i < count; ++i, ++p) { + __ClearPageReserved(p); + /* + * Hacky direct set to avoid unnecessary + * lock take/release for EVERY page here. + */ + p->_count.counter = 0; + p->_mapcount.counter = -1; + } + init_page_count(page); + __free_pages(page, order); + totalram_pages += count; + + page += count; + pfn += count; + } +} + +static void __init set_non_bootmem_pages_init(void) +{ + struct zone *z; + for_each_zone(z) { + unsigned long start, end; + int nid = z->zone_pgdat->node_id; + int idx = zone_idx(z); + + start = z->zone_start_pfn; + if (start == 0) + continue; /* bootmem */ + end = start + z->spanned_pages; + if (idx == ZONE_NORMAL) { + BUG_ON(start != node_start_pfn[nid]); + start = node_free_pfn[nid]; + } +#ifdef CONFIG_HIGHMEM + if (idx == ZONE_HIGHMEM) + totalhigh_pages += z->spanned_pages; +#endif + if (kdata_huge) { + unsigned long percpu_pfn = node_percpu_pfn[nid]; + if (start < percpu_pfn && end > percpu_pfn) + end = percpu_pfn; + } +#ifdef CONFIG_PCI + if (start <= pci_reserve_start_pfn && + end > pci_reserve_start_pfn) { + if (end > pci_reserve_end_pfn) + init_free_pfn_range(pci_reserve_end_pfn, end); + end = pci_reserve_start_pfn; + } +#endif + init_free_pfn_range(start, end); + } +} + +/* + * paging_init() sets up the page tables - note that all of lowmem is + * already mapped by head.S. + */ +void __init paging_init(void) +{ +#ifdef CONFIG_HIGHMEM + unsigned long vaddr, end; +#endif +#ifdef __tilegx__ + pud_t *pud; +#endif + pgd_t *pgd_base = swapper_pg_dir; + + kernel_physical_mapping_init(pgd_base); + +#ifdef CONFIG_HIGHMEM + /* + * Fixed mappings, only the page table structure has to be + * created - mappings will be set by set_fixmap(): + */ + vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK; + end = (FIXADDR_TOP + PMD_SIZE - 1) & PMD_MASK; + page_table_range_init(vaddr, end, pgd_base); + permanent_kmaps_init(pgd_base); +#endif + +#ifdef __tilegx__ + /* + * Since GX allocates just one pmd_t array worth of vmalloc space, + * we go ahead and allocate it statically here, then share it + * globally. As a result we don't have to worry about any task + * changing init_mm once we get up and running, and there's no + * need for e.g. vmalloc_sync_all(). + */ + BUILD_BUG_ON(pgd_index(VMALLOC_START) != pgd_index(VMALLOC_END)); + pud = pud_offset(pgd_base + pgd_index(VMALLOC_START), VMALLOC_START); + assign_pmd(pud, alloc_pmd()); +#endif +} + + +/* + * Walk the kernel page tables and derive the page_home() from + * the PTEs, so that set_pte() can properly validate the caching + * of all PTEs it sees. + */ +void __init set_page_homes(void) +{ +} + +static void __init set_max_mapnr_init(void) +{ +#ifdef CONFIG_FLATMEM + max_mapnr = max_low_pfn; +#endif +} + +void __init mem_init(void) +{ + int codesize, datasize, initsize; + int i; +#ifndef __tilegx__ + void *last; +#endif + +#ifdef CONFIG_FLATMEM + BUG_ON(!mem_map); +#endif + +#ifdef CONFIG_HIGHMEM + /* check that fixmap and pkmap do not overlap */ + if (PKMAP_ADDR(LAST_PKMAP-1) >= FIXADDR_START) { + pr_err("fixmap and kmap areas overlap" + " - this will crash\n"); + pr_err("pkstart: %lxh pkend: %lxh fixstart %lxh\n", + PKMAP_BASE, PKMAP_ADDR(LAST_PKMAP-1), + FIXADDR_START); + BUG(); + } +#endif + + set_max_mapnr_init(); + + /* this will put all bootmem onto the freelists */ + totalram_pages += free_all_bootmem(); + + /* count all remaining LOWMEM and give all HIGHMEM to page allocator */ + set_non_bootmem_pages_init(); + + codesize = (unsigned long)&_etext - (unsigned long)&_text; + datasize = (unsigned long)&_end - (unsigned long)&_sdata; + initsize = (unsigned long)&_einittext - (unsigned long)&_sinittext; + initsize += (unsigned long)&_einitdata - (unsigned long)&_sinitdata; + + pr_info("Memory: %luk/%luk available (%dk kernel code, %dk data, %dk init, %ldk highmem)\n", + (unsigned long) nr_free_pages() << (PAGE_SHIFT-10), + num_physpages << (PAGE_SHIFT-10), + codesize >> 10, + datasize >> 10, + initsize >> 10, + (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10)) + ); + + /* + * In debug mode, dump some interesting memory mappings. + */ +#ifdef CONFIG_HIGHMEM + printk(KERN_DEBUG " KMAP %#lx - %#lx\n", + FIXADDR_START, FIXADDR_TOP + PAGE_SIZE - 1); + printk(KERN_DEBUG " PKMAP %#lx - %#lx\n", + PKMAP_BASE, PKMAP_ADDR(LAST_PKMAP) - 1); +#endif +#ifdef CONFIG_HUGEVMAP + printk(KERN_DEBUG " HUGEMAP %#lx - %#lx\n", + HUGE_VMAP_BASE, HUGE_VMAP_END - 1); +#endif + printk(KERN_DEBUG " VMALLOC %#lx - %#lx\n", + _VMALLOC_START, _VMALLOC_END - 1); +#ifdef __tilegx__ + for (i = MAX_NUMNODES-1; i >= 0; --i) { + struct pglist_data *node = &node_data[i]; + if (node->node_present_pages) { + unsigned long start = (unsigned long) + pfn_to_kaddr(node->node_start_pfn); + unsigned long end = start + + (node->node_present_pages << PAGE_SHIFT); + printk(KERN_DEBUG " MEM%d %#lx - %#lx\n", + i, start, end - 1); + } + } +#else + last = high_memory; + for (i = MAX_NUMNODES-1; i >= 0; --i) { + if ((unsigned long)vbase_map[i] != -1UL) { + printk(KERN_DEBUG " LOWMEM%d %#lx - %#lx\n", + i, (unsigned long) (vbase_map[i]), + (unsigned long) (last-1)); + last = vbase_map[i]; + } + } +#endif + +#ifndef __tilegx__ + /* + * Convert from using one lock for all atomic operations to + * one per cpu. + */ + __init_atomic_per_cpu(); +#endif +} + +/* + * this is for the non-NUMA, single node SMP system case. + * Specifically, in the case of x86, we will always add + * memory to the highmem for now. + */ +#ifndef CONFIG_NEED_MULTIPLE_NODES +int arch_add_memory(u64 start, u64 size) +{ + struct pglist_data *pgdata = &contig_page_data; + struct zone *zone = pgdata->node_zones + MAX_NR_ZONES-1; + unsigned long start_pfn = start >> PAGE_SHIFT; + unsigned long nr_pages = size >> PAGE_SHIFT; + + return __add_pages(zone, start_pfn, nr_pages); +} + +int remove_memory(u64 start, u64 size) +{ + return -EINVAL; +} +#endif + +struct kmem_cache *pgd_cache; + +void __init pgtable_cache_init(void) +{ + pgd_cache = kmem_cache_create("pgd", SIZEOF_PGD, SIZEOF_PGD, 0, NULL); + if (!pgd_cache) + panic("pgtable_cache_init(): Cannot create pgd cache"); +} + +#if !CHIP_HAS_COHERENT_LOCAL_CACHE() +/* + * The __w1data area holds data that is only written during initialization, + * and is read-only and thus freely cacheable thereafter. Fix the page + * table entries that cover that region accordingly. + */ +static void mark_w1data_ro(void) +{ + /* Loop over page table entries */ + unsigned long addr = (unsigned long)__w1data_begin; + BUG_ON((addr & (PAGE_SIZE-1)) != 0); + for (; addr <= (unsigned long)__w1data_end - 1; addr += PAGE_SIZE) { + unsigned long pfn = kaddr_to_pfn((void *)addr); + pte_t *ptep = virt_to_pte(NULL, addr); + BUG_ON(pte_huge(*ptep)); /* not relevant for kdata_huge */ + set_pte_at(&init_mm, addr, ptep, pfn_pte(pfn, PAGE_KERNEL_RO)); + } +} +#endif + +#ifdef CONFIG_DEBUG_PAGEALLOC +static long __write_once initfree; +#else +static long __write_once initfree = 1; +#endif + +/* Select whether to free (1) or mark unusable (0) the __init pages. */ +static int __init set_initfree(char *str) +{ + long val; + if (strict_strtol(str, 0, &val) == 0) { + initfree = val; + pr_info("initfree: %s free init pages\n", + initfree ? "will" : "won't"); + } + return 1; +} +__setup("initfree=", set_initfree); + +static void free_init_pages(char *what, unsigned long begin, unsigned long end) +{ + unsigned long addr = (unsigned long) begin; + + if (kdata_huge && !initfree) { + pr_warning("Warning: ignoring initfree=0:" + " incompatible with kdata=huge\n"); + initfree = 1; + } + end = (end + PAGE_SIZE - 1) & PAGE_MASK; + local_flush_tlb_pages(NULL, begin, PAGE_SIZE, end - begin); + for (addr = begin; addr < end; addr += PAGE_SIZE) { + /* + * Note we just reset the home here directly in the + * page table. We know this is safe because our caller + * just flushed the caches on all the other cpus, + * and they won't be touching any of these pages. + */ + int pfn = kaddr_to_pfn((void *)addr); + struct page *page = pfn_to_page(pfn); + pte_t *ptep = virt_to_pte(NULL, addr); + if (!initfree) { + /* + * If debugging page accesses then do not free + * this memory but mark them not present - any + * buggy init-section access will create a + * kernel page fault: + */ + pte_clear(&init_mm, addr, ptep); + continue; + } + __ClearPageReserved(page); + init_page_count(page); + if (pte_huge(*ptep)) + BUG_ON(!kdata_huge); + else + set_pte_at(&init_mm, addr, ptep, + pfn_pte(pfn, PAGE_KERNEL)); + memset((void *)addr, POISON_FREE_INITMEM, PAGE_SIZE); + free_page(addr); + totalram_pages++; + } + pr_info("Freeing %s: %ldk freed\n", what, (end - begin) >> 10); +} + +void free_initmem(void) +{ + const unsigned long text_delta = MEM_SV_INTRPT - PAGE_OFFSET; + + /* + * Evict the dirty initdata on the boot cpu, evict the w1data + * wherever it's homed, and evict all the init code everywhere. + * We are guaranteed that no one will touch the init pages any + * more, and although other cpus may be touching the w1data, + * we only actually change the caching on tile64, which won't + * be keeping local copies in the other tiles' caches anyway. + */ + homecache_evict(&cpu_cacheable_map); + + /* Free the data pages that we won't use again after init. */ + free_init_pages("unused kernel data", + (unsigned long)_sinitdata, + (unsigned long)_einitdata); + + /* + * Free the pages mapped from 0xc0000000 that correspond to code + * pages from MEM_SV_INTRPT that we won't use again after init. + */ + free_init_pages("unused kernel text", + (unsigned long)_sinittext - text_delta, + (unsigned long)_einittext - text_delta); + +#if !CHIP_HAS_COHERENT_LOCAL_CACHE() + /* + * Upgrade the .w1data section to globally cached. + * We don't do this on tilepro, since the cache architecture + * pretty much makes it irrelevant, and in any case we end + * up having racing issues with other tiles that may touch + * the data after we flush the cache but before we update + * the PTEs and flush the TLBs, causing sharer shootdowns + * later. Even though this is to clean data, it seems like + * an unnecessary complication. + */ + mark_w1data_ro(); +#endif + + /* Do a global TLB flush so everyone sees the changes. */ + flush_tlb_all(); +} diff --git a/arch/tile/mm/migrate.h b/arch/tile/mm/migrate.h new file mode 100644 index 00000000..cd45a083 --- /dev/null +++ b/arch/tile/mm/migrate.h @@ -0,0 +1,50 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * Structure definitions for migration, exposed here for use by + * arch/tile/kernel/asm-offsets.c. + */ + +#ifndef MM_MIGRATE_H +#define MM_MIGRATE_H + +#include <linux/cpumask.h> +#include <hv/hypervisor.h> + +/* + * This function is used as a helper when setting up the initial + * page table (swapper_pg_dir). + */ +extern int flush_and_install_context(HV_PhysAddr page_table, HV_PTE access, + HV_ASID asid, + const unsigned long *cpumask); + +/* + * This function supports migration as a "helper" as follows: + * + * - Set the stack PTE itself to "migrating". + * - Do a global TLB flush for (va,length) and the specified ASIDs. + * - Do a cache-evict on all necessary cpus. + * - Write the new stack PTE. + * + * Note that any non-NULL pointers must not point to the page that + * is handled by the stack_pte itself. + */ +extern int homecache_migrate_stack_and_flush(pte_t stack_pte, unsigned long va, + size_t length, pte_t *stack_ptep, + const struct cpumask *cache_cpumask, + const struct cpumask *tlb_cpumask, + HV_Remote_ASID *asids, + int asidcount); + +#endif /* MM_MIGRATE_H */ diff --git a/arch/tile/mm/migrate_32.S b/arch/tile/mm/migrate_32.S new file mode 100644 index 00000000..ac01a7cd --- /dev/null +++ b/arch/tile/mm/migrate_32.S @@ -0,0 +1,212 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * This routine is a helper for migrating the home of a set of pages to + * a new cpu. See the documentation in homecache.c for more information. + */ + +#include <linux/linkage.h> +#include <linux/threads.h> +#include <asm/page.h> +#include <asm/thread_info.h> +#include <asm/types.h> +#include <asm/asm-offsets.h> +#include <hv/hypervisor.h> + + .text + +/* + * First, some definitions that apply to all the code in the file. + */ + +/* Locals (caller-save) */ +#define r_tmp r10 +#define r_save_sp r11 + +/* What we save where in the stack frame; must include all callee-saves. */ +#define FRAME_SP 4 +#define FRAME_R30 8 +#define FRAME_R31 12 +#define FRAME_R32 16 +#define FRAME_R33 20 +#define FRAME_R34 24 +#define FRAME_R35 28 +#define FRAME_SIZE 32 + + + + +/* + * On entry: + * + * r0 low word of the new context PA to install (moved to r_context_lo) + * r1 high word of the new context PA to install (moved to r_context_hi) + * r2 low word of PTE to use for context access (moved to r_access_lo) + * r3 high word of PTE to use for context access (moved to r_access_lo) + * r4 ASID to use for new context (moved to r_asid) + * r5 pointer to cpumask with just this cpu set in it (r_my_cpumask) + */ + +/* Arguments (caller-save) */ +#define r_context_lo_in r0 +#define r_context_hi_in r1 +#define r_access_lo_in r2 +#define r_access_hi_in r3 +#define r_asid_in r4 +#define r_my_cpumask r5 + +/* Locals (callee-save); must not be more than FRAME_xxx above. */ +#define r_save_ics r30 +#define r_context_lo r31 +#define r_context_hi r32 +#define r_access_lo r33 +#define r_access_hi r34 +#define r_asid r35 + +STD_ENTRY(flush_and_install_context) + /* + * Create a stack frame; we can't touch it once we flush the + * cache until we install the new page table and flush the TLB. + */ + { + move r_save_sp, sp + sw sp, lr + addi sp, sp, -FRAME_SIZE + } + addi r_tmp, sp, FRAME_SP + { + sw r_tmp, r_save_sp + addi r_tmp, sp, FRAME_R30 + } + { + sw r_tmp, r30 + addi r_tmp, sp, FRAME_R31 + } + { + sw r_tmp, r31 + addi r_tmp, sp, FRAME_R32 + } + { + sw r_tmp, r32 + addi r_tmp, sp, FRAME_R33 + } + { + sw r_tmp, r33 + addi r_tmp, sp, FRAME_R34 + } + { + sw r_tmp, r34 + addi r_tmp, sp, FRAME_R35 + } + sw r_tmp, r35 + + /* Move some arguments to callee-save registers. */ + { + move r_context_lo, r_context_lo_in + move r_context_hi, r_context_hi_in + } + { + move r_access_lo, r_access_lo_in + move r_access_hi, r_access_hi_in + } + move r_asid, r_asid_in + + /* Disable interrupts, since we can't use our stack. */ + { + mfspr r_save_ics, INTERRUPT_CRITICAL_SECTION + movei r_tmp, 1 + } + mtspr INTERRUPT_CRITICAL_SECTION, r_tmp + + /* First, flush our L2 cache. */ + { + move r0, zero /* cache_pa */ + move r1, zero + } + { + auli r2, zero, ha16(HV_FLUSH_EVICT_L2) /* cache_control */ + move r3, r_my_cpumask /* cache_cpumask */ + } + { + move r4, zero /* tlb_va */ + move r5, zero /* tlb_length */ + } + { + move r6, zero /* tlb_pgsize */ + move r7, zero /* tlb_cpumask */ + } + { + move r8, zero /* asids */ + move r9, zero /* asidcount */ + } + jal hv_flush_remote + bnz r0, .Ldone + + /* Now install the new page table. */ + { + move r0, r_context_lo + move r1, r_context_hi + } + { + move r2, r_access_lo + move r3, r_access_hi + } + { + move r4, r_asid + movei r5, HV_CTX_DIRECTIO + } + jal hv_install_context + bnz r0, .Ldone + + /* Finally, flush the TLB. */ + { + movei r0, 0 /* preserve_global */ + jal hv_flush_all + } + +.Ldone: + /* Reset interrupts back how they were before. */ + mtspr INTERRUPT_CRITICAL_SECTION, r_save_ics + + /* Restore the callee-saved registers and return. */ + addli lr, sp, FRAME_SIZE + { + lw lr, lr + addli r_tmp, sp, FRAME_R30 + } + { + lw r30, r_tmp + addli r_tmp, sp, FRAME_R31 + } + { + lw r31, r_tmp + addli r_tmp, sp, FRAME_R32 + } + { + lw r32, r_tmp + addli r_tmp, sp, FRAME_R33 + } + { + lw r33, r_tmp + addli r_tmp, sp, FRAME_R34 + } + { + lw r34, r_tmp + addli r_tmp, sp, FRAME_R35 + } + { + lw r35, r_tmp + addi sp, sp, FRAME_SIZE + } + jrp lr + STD_ENDPROC(flush_and_install_context) diff --git a/arch/tile/mm/migrate_64.S b/arch/tile/mm/migrate_64.S new file mode 100644 index 00000000..e76fea68 --- /dev/null +++ b/arch/tile/mm/migrate_64.S @@ -0,0 +1,187 @@ +/* + * Copyright 2011 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * This routine is a helper for migrating the home of a set of pages to + * a new cpu. See the documentation in homecache.c for more information. + */ + +#include <linux/linkage.h> +#include <linux/threads.h> +#include <asm/page.h> +#include <asm/thread_info.h> +#include <asm/types.h> +#include <asm/asm-offsets.h> +#include <hv/hypervisor.h> + + .text + +/* + * First, some definitions that apply to all the code in the file. + */ + +/* Locals (caller-save) */ +#define r_tmp r10 +#define r_save_sp r11 + +/* What we save where in the stack frame; must include all callee-saves. */ +#define FRAME_SP 8 +#define FRAME_R30 16 +#define FRAME_R31 24 +#define FRAME_R32 32 +#define FRAME_R33 40 +#define FRAME_SIZE 48 + + + + +/* + * On entry: + * + * r0 the new context PA to install (moved to r_context) + * r1 PTE to use for context access (moved to r_access) + * r2 ASID to use for new context (moved to r_asid) + * r3 pointer to cpumask with just this cpu set in it (r_my_cpumask) + */ + +/* Arguments (caller-save) */ +#define r_context_in r0 +#define r_access_in r1 +#define r_asid_in r2 +#define r_my_cpumask r3 + +/* Locals (callee-save); must not be more than FRAME_xxx above. */ +#define r_save_ics r30 +#define r_context r31 +#define r_access r32 +#define r_asid r33 + +/* + * Caller-save locals and frame constants are the same as + * for homecache_migrate_stack_and_flush. + */ + +STD_ENTRY(flush_and_install_context) + /* + * Create a stack frame; we can't touch it once we flush the + * cache until we install the new page table and flush the TLB. + */ + { + move r_save_sp, sp + st sp, lr + addi sp, sp, -FRAME_SIZE + } + addi r_tmp, sp, FRAME_SP + { + st r_tmp, r_save_sp + addi r_tmp, sp, FRAME_R30 + } + { + st r_tmp, r30 + addi r_tmp, sp, FRAME_R31 + } + { + st r_tmp, r31 + addi r_tmp, sp, FRAME_R32 + } + { + st r_tmp, r32 + addi r_tmp, sp, FRAME_R33 + } + st r_tmp, r33 + + /* Move some arguments to callee-save registers. */ + { + move r_context, r_context_in + move r_access, r_access_in + } + move r_asid, r_asid_in + + /* Disable interrupts, since we can't use our stack. */ + { + mfspr r_save_ics, INTERRUPT_CRITICAL_SECTION + movei r_tmp, 1 + } + mtspr INTERRUPT_CRITICAL_SECTION, r_tmp + + /* First, flush our L2 cache. */ + { + move r0, zero /* cache_pa */ + moveli r1, hw2_last(HV_FLUSH_EVICT_L2) /* cache_control */ + } + { + shl16insli r1, r1, hw1(HV_FLUSH_EVICT_L2) + move r2, r_my_cpumask /* cache_cpumask */ + } + { + shl16insli r1, r1, hw0(HV_FLUSH_EVICT_L2) + move r3, zero /* tlb_va */ + } + { + move r4, zero /* tlb_length */ + move r5, zero /* tlb_pgsize */ + } + { + move r6, zero /* tlb_cpumask */ + move r7, zero /* asids */ + } + { + move r8, zero /* asidcount */ + jal hv_flush_remote + } + bnez r0, 1f + + /* Now install the new page table. */ + { + move r0, r_context + move r1, r_access + } + { + move r2, r_asid + movei r3, HV_CTX_DIRECTIO + } + jal hv_install_context + bnez r0, 1f + + /* Finally, flush the TLB. */ + { + movei r0, 0 /* preserve_global */ + jal hv_flush_all + } + +1: /* Reset interrupts back how they were before. */ + mtspr INTERRUPT_CRITICAL_SECTION, r_save_ics + + /* Restore the callee-saved registers and return. */ + addli lr, sp, FRAME_SIZE + { + ld lr, lr + addli r_tmp, sp, FRAME_R30 + } + { + ld r30, r_tmp + addli r_tmp, sp, FRAME_R31 + } + { + ld r31, r_tmp + addli r_tmp, sp, FRAME_R32 + } + { + ld r32, r_tmp + addli r_tmp, sp, FRAME_R33 + } + { + ld r33, r_tmp + addi sp, sp, FRAME_SIZE + } + jrp lr + STD_ENDPROC(flush_and_install_context) diff --git a/arch/tile/mm/mmap.c b/arch/tile/mm/mmap.c new file mode 100644 index 00000000..f96f4cec --- /dev/null +++ b/arch/tile/mm/mmap.c @@ -0,0 +1,75 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * Taken from the i386 architecture and simplified. + */ + +#include <linux/mm.h> +#include <linux/random.h> +#include <linux/limits.h> +#include <linux/sched.h> +#include <linux/mman.h> +#include <linux/compat.h> + +/* + * Top of mmap area (just below the process stack). + * + * Leave an at least ~128 MB hole. + */ +#define MIN_GAP (128*1024*1024) +#define MAX_GAP (TASK_SIZE/6*5) + +static inline unsigned long mmap_base(struct mm_struct *mm) +{ + unsigned long gap = rlimit(RLIMIT_STACK); + unsigned long random_factor = 0; + + if (current->flags & PF_RANDOMIZE) + random_factor = get_random_int() % (1024*1024); + + if (gap < MIN_GAP) + gap = MIN_GAP; + else if (gap > MAX_GAP) + gap = MAX_GAP; + + return PAGE_ALIGN(TASK_SIZE - gap - random_factor); +} + +/* + * This function, called very early during the creation of a new + * process VM image, sets up which VM layout function to use: + */ +void arch_pick_mmap_layout(struct mm_struct *mm) +{ +#if !defined(__tilegx__) + int is_32bit = 1; +#elif defined(CONFIG_COMPAT) + int is_32bit = is_compat_task(); +#else + int is_32bit = 0; +#endif + + /* + * Use standard layout if the expected stack growth is unlimited + * or we are running native 64 bits. + */ + if (!is_32bit || rlimit(RLIMIT_STACK) == RLIM_INFINITY) { + mm->mmap_base = TASK_UNMAPPED_BASE; + mm->get_unmapped_area = arch_get_unmapped_area; + mm->unmap_area = arch_unmap_area; + } else { + mm->mmap_base = mmap_base(mm); + mm->get_unmapped_area = arch_get_unmapped_area_topdown; + mm->unmap_area = arch_unmap_area_topdown; + } +} diff --git a/arch/tile/mm/pgtable.c b/arch/tile/mm/pgtable.c new file mode 100644 index 00000000..2410aa89 --- /dev/null +++ b/arch/tile/mm/pgtable.c @@ -0,0 +1,639 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/mm.h> +#include <linux/swap.h> +#include <linux/highmem.h> +#include <linux/slab.h> +#include <linux/pagemap.h> +#include <linux/spinlock.h> +#include <linux/cpumask.h> +#include <linux/module.h> +#include <linux/io.h> +#include <linux/vmalloc.h> +#include <linux/smp.h> + +#include <asm/pgtable.h> +#include <asm/pgalloc.h> +#include <asm/fixmap.h> +#include <asm/tlb.h> +#include <asm/tlbflush.h> +#include <asm/homecache.h> + +#define K(x) ((x) << (PAGE_SHIFT-10)) + +/* + * The normal show_free_areas() is too verbose on Tile, with dozens + * of processors and often four NUMA zones each with high and lowmem. + */ +void show_mem(unsigned int filter) +{ + struct zone *zone; + + pr_err("Active:%lu inactive:%lu dirty:%lu writeback:%lu unstable:%lu" + " free:%lu\n slab:%lu mapped:%lu pagetables:%lu bounce:%lu" + " pagecache:%lu swap:%lu\n", + (global_page_state(NR_ACTIVE_ANON) + + global_page_state(NR_ACTIVE_FILE)), + (global_page_state(NR_INACTIVE_ANON) + + global_page_state(NR_INACTIVE_FILE)), + global_page_state(NR_FILE_DIRTY), + global_page_state(NR_WRITEBACK), + global_page_state(NR_UNSTABLE_NFS), + global_page_state(NR_FREE_PAGES), + (global_page_state(NR_SLAB_RECLAIMABLE) + + global_page_state(NR_SLAB_UNRECLAIMABLE)), + global_page_state(NR_FILE_MAPPED), + global_page_state(NR_PAGETABLE), + global_page_state(NR_BOUNCE), + global_page_state(NR_FILE_PAGES), + nr_swap_pages); + + for_each_zone(zone) { + unsigned long flags, order, total = 0, largest_order = -1; + + if (!populated_zone(zone)) + continue; + + spin_lock_irqsave(&zone->lock, flags); + for (order = 0; order < MAX_ORDER; order++) { + int nr = zone->free_area[order].nr_free; + total += nr << order; + if (nr) + largest_order = order; + } + spin_unlock_irqrestore(&zone->lock, flags); + pr_err("Node %d %7s: %lukB (largest %luKb)\n", + zone_to_nid(zone), zone->name, + K(total), largest_order ? K(1UL) << largest_order : 0); + } +} + +/* + * Associate a virtual page frame with a given physical page frame + * and protection flags for that frame. + */ +static void set_pte_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + + pgd = swapper_pg_dir + pgd_index(vaddr); + if (pgd_none(*pgd)) { + BUG(); + return; + } + pud = pud_offset(pgd, vaddr); + if (pud_none(*pud)) { + BUG(); + return; + } + pmd = pmd_offset(pud, vaddr); + if (pmd_none(*pmd)) { + BUG(); + return; + } + pte = pte_offset_kernel(pmd, vaddr); + /* <pfn,flags> stored as-is, to permit clearing entries */ + set_pte(pte, pfn_pte(pfn, flags)); + + /* + * It's enough to flush this one mapping. + * This appears conservative since it is only called + * from __set_fixmap. + */ + local_flush_tlb_page(NULL, vaddr, PAGE_SIZE); +} + +void __set_fixmap(enum fixed_addresses idx, unsigned long phys, pgprot_t flags) +{ + unsigned long address = __fix_to_virt(idx); + + if (idx >= __end_of_fixed_addresses) { + BUG(); + return; + } + set_pte_pfn(address, phys >> PAGE_SHIFT, flags); +} + +#if defined(CONFIG_HIGHPTE) +pte_t *_pte_offset_map(pmd_t *dir, unsigned long address) +{ + pte_t *pte = kmap_atomic(pmd_page(*dir)) + + (pmd_ptfn(*dir) << HV_LOG2_PAGE_TABLE_ALIGN) & ~PAGE_MASK; + return &pte[pte_index(address)]; +} +#endif + +/** + * shatter_huge_page() - ensure a given address is mapped by a small page. + * + * This function converts a huge PTE mapping kernel LOWMEM into a bunch + * of small PTEs with the same caching. No cache flush required, but we + * must do a global TLB flush. + * + * Any caller that wishes to modify a kernel mapping that might + * have been made with a huge page should call this function, + * since doing so properly avoids race conditions with installing the + * newly-shattered page and then flushing all the TLB entries. + * + * @addr: Address at which to shatter any existing huge page. + */ +void shatter_huge_page(unsigned long addr) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + unsigned long flags = 0; /* happy compiler */ +#ifdef __PAGETABLE_PMD_FOLDED + struct list_head *pos; +#endif + + /* Get a pointer to the pmd entry that we need to change. */ + addr &= HPAGE_MASK; + BUG_ON(pgd_addr_invalid(addr)); + BUG_ON(addr < PAGE_OFFSET); /* only for kernel LOWMEM */ + pgd = swapper_pg_dir + pgd_index(addr); + pud = pud_offset(pgd, addr); + BUG_ON(!pud_present(*pud)); + pmd = pmd_offset(pud, addr); + BUG_ON(!pmd_present(*pmd)); + if (!pmd_huge_page(*pmd)) + return; + + spin_lock_irqsave(&init_mm.page_table_lock, flags); + if (!pmd_huge_page(*pmd)) { + /* Lost the race to convert the huge page. */ + spin_unlock_irqrestore(&init_mm.page_table_lock, flags); + return; + } + + /* Shatter the huge page into the preallocated L2 page table. */ + pmd_populate_kernel(&init_mm, pmd, + get_prealloc_pte(pte_pfn(*(pte_t *)pmd))); + +#ifdef __PAGETABLE_PMD_FOLDED + /* Walk every pgd on the system and update the pmd there. */ + spin_lock(&pgd_lock); + list_for_each(pos, &pgd_list) { + pmd_t *copy_pmd; + pgd = list_to_pgd(pos) + pgd_index(addr); + pud = pud_offset(pgd, addr); + copy_pmd = pmd_offset(pud, addr); + __set_pmd(copy_pmd, *pmd); + } + spin_unlock(&pgd_lock); +#endif + + /* Tell every cpu to notice the change. */ + flush_remote(0, 0, NULL, addr, HPAGE_SIZE, HPAGE_SIZE, + cpu_possible_mask, NULL, 0); + + /* Hold the lock until the TLB flush is finished to avoid races. */ + spin_unlock_irqrestore(&init_mm.page_table_lock, flags); +} + +/* + * List of all pgd's needed so it can invalidate entries in both cached + * and uncached pgd's. This is essentially codepath-based locking + * against pageattr.c; it is the unique case in which a valid change + * of kernel pagetables can't be lazily synchronized by vmalloc faults. + * vmalloc faults work because attached pagetables are never freed. + * + * The lock is always taken with interrupts disabled, unlike on x86 + * and other platforms, because we need to take the lock in + * shatter_huge_page(), which may be called from an interrupt context. + * We are not at risk from the tlbflush IPI deadlock that was seen on + * x86, since we use the flush_remote() API to have the hypervisor do + * the TLB flushes regardless of irq disabling. + */ +DEFINE_SPINLOCK(pgd_lock); +LIST_HEAD(pgd_list); + +static inline void pgd_list_add(pgd_t *pgd) +{ + list_add(pgd_to_list(pgd), &pgd_list); +} + +static inline void pgd_list_del(pgd_t *pgd) +{ + list_del(pgd_to_list(pgd)); +} + +#define KERNEL_PGD_INDEX_START pgd_index(PAGE_OFFSET) +#define KERNEL_PGD_PTRS (PTRS_PER_PGD - KERNEL_PGD_INDEX_START) + +static void pgd_ctor(pgd_t *pgd) +{ + unsigned long flags; + + memset(pgd, 0, KERNEL_PGD_INDEX_START*sizeof(pgd_t)); + spin_lock_irqsave(&pgd_lock, flags); + +#ifndef __tilegx__ + /* + * Check that the user interrupt vector has no L2. + * It never should for the swapper, and new page tables + * should always start with an empty user interrupt vector. + */ + BUG_ON(((u64 *)swapper_pg_dir)[pgd_index(MEM_USER_INTRPT)] != 0); +#endif + + memcpy(pgd + KERNEL_PGD_INDEX_START, + swapper_pg_dir + KERNEL_PGD_INDEX_START, + KERNEL_PGD_PTRS * sizeof(pgd_t)); + + pgd_list_add(pgd); + spin_unlock_irqrestore(&pgd_lock, flags); +} + +static void pgd_dtor(pgd_t *pgd) +{ + unsigned long flags; /* can be called from interrupt context */ + + spin_lock_irqsave(&pgd_lock, flags); + pgd_list_del(pgd); + spin_unlock_irqrestore(&pgd_lock, flags); +} + +pgd_t *pgd_alloc(struct mm_struct *mm) +{ + pgd_t *pgd = kmem_cache_alloc(pgd_cache, GFP_KERNEL); + if (pgd) + pgd_ctor(pgd); + return pgd; +} + +void pgd_free(struct mm_struct *mm, pgd_t *pgd) +{ + pgd_dtor(pgd); + kmem_cache_free(pgd_cache, pgd); +} + + +#define L2_USER_PGTABLE_PAGES (1 << L2_USER_PGTABLE_ORDER) + +struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) +{ + gfp_t flags = GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO; + struct page *p; +#if L2_USER_PGTABLE_ORDER > 0 + int i; +#endif + +#ifdef CONFIG_HIGHPTE + flags |= __GFP_HIGHMEM; +#endif + + p = alloc_pages(flags, L2_USER_PGTABLE_ORDER); + if (p == NULL) + return NULL; + +#if L2_USER_PGTABLE_ORDER > 0 + /* + * Make every page have a page_count() of one, not just the first. + * We don't use __GFP_COMP since it doesn't look like it works + * correctly with tlb_remove_page(). + */ + for (i = 1; i < L2_USER_PGTABLE_PAGES; ++i) { + init_page_count(p+i); + inc_zone_page_state(p+i, NR_PAGETABLE); + } +#endif + + pgtable_page_ctor(p); + return p; +} + +/* + * Free page immediately (used in __pte_alloc if we raced with another + * process). We have to correct whatever pte_alloc_one() did before + * returning the pages to the allocator. + */ +void pte_free(struct mm_struct *mm, struct page *p) +{ + int i; + + pgtable_page_dtor(p); + __free_page(p); + + for (i = 1; i < L2_USER_PGTABLE_PAGES; ++i) { + __free_page(p+i); + dec_zone_page_state(p+i, NR_PAGETABLE); + } +} + +void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte, + unsigned long address) +{ + int i; + + pgtable_page_dtor(pte); + tlb_remove_page(tlb, pte); + + for (i = 1; i < L2_USER_PGTABLE_PAGES; ++i) { + tlb_remove_page(tlb, pte + i); + dec_zone_page_state(pte + i, NR_PAGETABLE); + } +} + +#ifndef __tilegx__ + +/* + * FIXME: needs to be atomic vs hypervisor writes. For now we make the + * window of vulnerability a bit smaller by doing an unlocked 8-bit update. + */ +int ptep_test_and_clear_young(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep) +{ +#if HV_PTE_INDEX_ACCESSED < 8 || HV_PTE_INDEX_ACCESSED >= 16 +# error Code assumes HV_PTE "accessed" bit in second byte +#endif + u8 *tmp = (u8 *)ptep; + u8 second_byte = tmp[1]; + if (!(second_byte & (1 << (HV_PTE_INDEX_ACCESSED - 8)))) + return 0; + tmp[1] = second_byte & ~(1 << (HV_PTE_INDEX_ACCESSED - 8)); + return 1; +} + +/* + * This implementation is atomic vs hypervisor writes, since the hypervisor + * always writes the low word (where "accessed" and "dirty" are) and this + * routine only writes the high word. + */ +void ptep_set_wrprotect(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ +#if HV_PTE_INDEX_WRITABLE < 32 +# error Code assumes HV_PTE "writable" bit in high word +#endif + u32 *tmp = (u32 *)ptep; + tmp[1] = tmp[1] & ~(1 << (HV_PTE_INDEX_WRITABLE - 32)); +} + +#endif + +pte_t *virt_to_pte(struct mm_struct* mm, unsigned long addr) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + + if (pgd_addr_invalid(addr)) + return NULL; + + pgd = mm ? pgd_offset(mm, addr) : swapper_pg_dir + pgd_index(addr); + pud = pud_offset(pgd, addr); + if (!pud_present(*pud)) + return NULL; + pmd = pmd_offset(pud, addr); + if (pmd_huge_page(*pmd)) + return (pte_t *)pmd; + if (!pmd_present(*pmd)) + return NULL; + return pte_offset_kernel(pmd, addr); +} + +pgprot_t set_remote_cache_cpu(pgprot_t prot, int cpu) +{ + unsigned int width = smp_width; + int x = cpu % width; + int y = cpu / width; + BUG_ON(y >= smp_height); + BUG_ON(hv_pte_get_mode(prot) != HV_PTE_MODE_CACHE_TILE_L3); + BUG_ON(cpu < 0 || cpu >= NR_CPUS); + BUG_ON(!cpu_is_valid_lotar(cpu)); + return hv_pte_set_lotar(prot, HV_XY_TO_LOTAR(x, y)); +} + +int get_remote_cache_cpu(pgprot_t prot) +{ + HV_LOTAR lotar = hv_pte_get_lotar(prot); + int x = HV_LOTAR_X(lotar); + int y = HV_LOTAR_Y(lotar); + BUG_ON(hv_pte_get_mode(prot) != HV_PTE_MODE_CACHE_TILE_L3); + return x + y * smp_width; +} + +/* + * Convert a kernel VA to a PA and homing information. + */ +int va_to_cpa_and_pte(void *va, unsigned long long *cpa, pte_t *pte) +{ + struct page *page = virt_to_page(va); + pte_t null_pte = { 0 }; + + *cpa = __pa(va); + + /* Note that this is not writing a page table, just returning a pte. */ + *pte = pte_set_home(null_pte, page_home(page)); + + return 0; /* return non-zero if not hfh? */ +} +EXPORT_SYMBOL(va_to_cpa_and_pte); + +void __set_pte(pte_t *ptep, pte_t pte) +{ +#ifdef __tilegx__ + *ptep = pte; +#else +# if HV_PTE_INDEX_PRESENT >= 32 || HV_PTE_INDEX_MIGRATING >= 32 +# error Must write the present and migrating bits last +# endif + if (pte_present(pte)) { + ((u32 *)ptep)[1] = (u32)(pte_val(pte) >> 32); + barrier(); + ((u32 *)ptep)[0] = (u32)(pte_val(pte)); + } else { + ((u32 *)ptep)[0] = (u32)(pte_val(pte)); + barrier(); + ((u32 *)ptep)[1] = (u32)(pte_val(pte) >> 32); + } +#endif /* __tilegx__ */ +} + +void set_pte(pte_t *ptep, pte_t pte) +{ + if (pte_present(pte) && + (!CHIP_HAS_MMIO() || hv_pte_get_mode(pte) != HV_PTE_MODE_MMIO)) { + /* The PTE actually references physical memory. */ + unsigned long pfn = pte_pfn(pte); + if (pfn_valid(pfn)) { + /* Update the home of the PTE from the struct page. */ + pte = pte_set_home(pte, page_home(pfn_to_page(pfn))); + } else if (hv_pte_get_mode(pte) == 0) { + /* remap_pfn_range(), etc, must supply PTE mode. */ + panic("set_pte(): out-of-range PFN and mode 0\n"); + } + } + + __set_pte(ptep, pte); +} + +/* Can this mm load a PTE with cached_priority set? */ +static inline int mm_is_priority_cached(struct mm_struct *mm) +{ + return mm->context.priority_cached; +} + +/* + * Add a priority mapping to an mm_context and + * notify the hypervisor if this is the first one. + */ +void start_mm_caching(struct mm_struct *mm) +{ + if (!mm_is_priority_cached(mm)) { + mm->context.priority_cached = -1U; + hv_set_caching(-1U); + } +} + +/* + * Validate and return the priority_cached flag. We know if it's zero + * that we don't need to scan, since we immediately set it non-zero + * when we first consider a MAP_CACHE_PRIORITY mapping. + * + * We only _try_ to acquire the mmap_sem semaphore; if we can't acquire it, + * since we're in an interrupt context (servicing switch_mm) we don't + * worry about it and don't unset the "priority_cached" field. + * Presumably we'll come back later and have more luck and clear + * the value then; for now we'll just keep the cache marked for priority. + */ +static unsigned int update_priority_cached(struct mm_struct *mm) +{ + if (mm->context.priority_cached && down_write_trylock(&mm->mmap_sem)) { + struct vm_area_struct *vm; + for (vm = mm->mmap; vm; vm = vm->vm_next) { + if (hv_pte_get_cached_priority(vm->vm_page_prot)) + break; + } + if (vm == NULL) + mm->context.priority_cached = 0; + up_write(&mm->mmap_sem); + } + return mm->context.priority_cached; +} + +/* Set caching correctly for an mm that we are switching to. */ +void check_mm_caching(struct mm_struct *prev, struct mm_struct *next) +{ + if (!mm_is_priority_cached(next)) { + /* + * If the new mm doesn't use priority caching, just see if we + * need the hv_set_caching(), or can assume it's already zero. + */ + if (mm_is_priority_cached(prev)) + hv_set_caching(0); + } else { + hv_set_caching(update_priority_cached(next)); + } +} + +#if CHIP_HAS_MMIO() + +/* Map an arbitrary MMIO address, homed according to pgprot, into VA space. */ +void __iomem *ioremap_prot(resource_size_t phys_addr, unsigned long size, + pgprot_t home) +{ + void *addr; + struct vm_struct *area; + unsigned long offset, last_addr; + pgprot_t pgprot; + + /* Don't allow wraparound or zero size */ + last_addr = phys_addr + size - 1; + if (!size || last_addr < phys_addr) + return NULL; + + /* Create a read/write, MMIO VA mapping homed at the requested shim. */ + pgprot = PAGE_KERNEL; + pgprot = hv_pte_set_mode(pgprot, HV_PTE_MODE_MMIO); + pgprot = hv_pte_set_lotar(pgprot, hv_pte_get_lotar(home)); + + /* + * Mappings have to be page-aligned + */ + offset = phys_addr & ~PAGE_MASK; + phys_addr &= PAGE_MASK; + size = PAGE_ALIGN(last_addr+1) - phys_addr; + + /* + * Ok, go for it.. + */ + area = get_vm_area(size, VM_IOREMAP /* | other flags? */); + if (!area) + return NULL; + area->phys_addr = phys_addr; + addr = area->addr; + if (ioremap_page_range((unsigned long)addr, (unsigned long)addr + size, + phys_addr, pgprot)) { + remove_vm_area((void *)(PAGE_MASK & (unsigned long) addr)); + return NULL; + } + return (__force void __iomem *) (offset + (char *)addr); +} +EXPORT_SYMBOL(ioremap_prot); + +/* Map a PCI MMIO bus address into VA space. */ +void __iomem *ioremap(resource_size_t phys_addr, unsigned long size) +{ + panic("ioremap for PCI MMIO is not supported"); +} +EXPORT_SYMBOL(ioremap); + +/* Unmap an MMIO VA mapping. */ +void iounmap(volatile void __iomem *addr_in) +{ + volatile void __iomem *addr = (volatile void __iomem *) + (PAGE_MASK & (unsigned long __force)addr_in); +#if 1 + vunmap((void * __force)addr); +#else + /* x86 uses this complicated flow instead of vunmap(). Is + * there any particular reason we should do the same? */ + struct vm_struct *p, *o; + + /* Use the vm area unlocked, assuming the caller + ensures there isn't another iounmap for the same address + in parallel. Reuse of the virtual address is prevented by + leaving it in the global lists until we're done with it. + cpa takes care of the direct mappings. */ + read_lock(&vmlist_lock); + for (p = vmlist; p; p = p->next) { + if (p->addr == addr) + break; + } + read_unlock(&vmlist_lock); + + if (!p) { + pr_err("iounmap: bad address %p\n", addr); + dump_stack(); + return; + } + + /* Finally remove it */ + o = remove_vm_area((void *)addr); + BUG_ON(p != o || o == NULL); + kfree(p); +#endif +} +EXPORT_SYMBOL(iounmap); + +#endif /* CHIP_HAS_MMIO() */ |