summaryrefslogtreecommitdiff
path: root/arch/tile
diff options
context:
space:
mode:
Diffstat (limited to 'arch/tile')
-rw-r--r--arch/tile/Kbuild3
-rw-r--r--arch/tile/Kconfig383
-rw-r--r--arch/tile/Kconfig.debug34
-rw-r--r--arch/tile/Makefile68
-rw-r--r--arch/tile/configs/tilegx_defconfig587
-rw-r--r--arch/tile/configs/tilepro_defconfig579
-rw-r--r--arch/tile/include/arch/Kbuild17
-rw-r--r--arch/tile/include/arch/abi.h141
-rw-r--r--arch/tile/include/arch/chip.h23
-rw-r--r--arch/tile/include/arch/chip_tile64.h258
-rw-r--r--arch/tile/include/arch/chip_tilegx.h258
-rw-r--r--arch/tile/include/arch/chip_tilepro.h258
-rw-r--r--arch/tile/include/arch/icache.h93
-rw-r--r--arch/tile/include/arch/interrupts.h19
-rw-r--r--arch/tile/include/arch/interrupts_32.h307
-rw-r--r--arch/tile/include/arch/interrupts_64.h276
-rw-r--r--arch/tile/include/arch/opcode.h21
-rw-r--r--arch/tile/include/arch/opcode_tilegx.h1405
-rw-r--r--arch/tile/include/arch/opcode_tilepro.h1471
-rw-r--r--arch/tile/include/arch/sim.h643
-rw-r--r--arch/tile/include/arch/sim_def.h505
-rw-r--r--arch/tile/include/arch/spr_def.h113
-rw-r--r--arch/tile/include/arch/spr_def_32.h201
-rw-r--r--arch/tile/include/arch/spr_def_64.h173
-rw-r--r--arch/tile/include/asm/Kbuild44
-rw-r--r--arch/tile/include/asm/asm-offsets.h1
-rw-r--r--arch/tile/include/asm/atomic.h134
-rw-r--r--arch/tile/include/asm/atomic_32.h324
-rw-r--r--arch/tile/include/asm/atomic_64.h157
-rw-r--r--arch/tile/include/asm/auxvec.h20
-rw-r--r--arch/tile/include/asm/backtrace.h162
-rw-r--r--arch/tile/include/asm/barrier.h148
-rw-r--r--arch/tile/include/asm/bitops.h128
-rw-r--r--arch/tile/include/asm/bitops_32.h130
-rw-r--r--arch/tile/include/asm/bitops_64.h101
-rw-r--r--arch/tile/include/asm/bitsperlong.h26
-rw-r--r--arch/tile/include/asm/byteorder.h1
-rw-r--r--arch/tile/include/asm/cache.h51
-rw-r--r--arch/tile/include/asm/cacheflush.h164
-rw-r--r--arch/tile/include/asm/checksum.h24
-rw-r--r--arch/tile/include/asm/cmpxchg.h73
-rw-r--r--arch/tile/include/asm/compat.h256
-rw-r--r--arch/tile/include/asm/current.h31
-rw-r--r--arch/tile/include/asm/delay.h34
-rw-r--r--arch/tile/include/asm/dma-mapping.h94
-rw-r--r--arch/tile/include/asm/dma.h25
-rw-r--r--arch/tile/include/asm/edac.h29
-rw-r--r--arch/tile/include/asm/elf.h167
-rw-r--r--arch/tile/include/asm/exec.h20
-rw-r--r--arch/tile/include/asm/fixmap.h118
-rw-r--r--arch/tile/include/asm/ftrace.h20
-rw-r--r--arch/tile/include/asm/futex.h142
-rw-r--r--arch/tile/include/asm/hardirq.h47
-rw-r--r--arch/tile/include/asm/hardwall.h65
-rw-r--r--arch/tile/include/asm/highmem.h72
-rw-r--r--arch/tile/include/asm/homecache.h125
-rw-r--r--arch/tile/include/asm/hugetlb.h109
-rw-r--r--arch/tile/include/asm/hv_driver.h60
-rw-r--r--arch/tile/include/asm/hw_irq.h18
-rw-r--r--arch/tile/include/asm/ide.h25
-rw-r--r--arch/tile/include/asm/io.h305
-rw-r--r--arch/tile/include/asm/irq.h79
-rw-r--r--arch/tile/include/asm/irqflags.h282
-rw-r--r--arch/tile/include/asm/kexec.h53
-rw-r--r--arch/tile/include/asm/kmap_types.h57
-rw-r--r--arch/tile/include/asm/linkage.h51
-rw-r--r--arch/tile/include/asm/memprof.h33
-rw-r--r--arch/tile/include/asm/mman.h41
-rw-r--r--arch/tile/include/asm/mmu.h31
-rw-r--r--arch/tile/include/asm/mmu_context.h131
-rw-r--r--arch/tile/include/asm/mmzone.h70
-rw-r--r--arch/tile/include/asm/page.h336
-rw-r--r--arch/tile/include/asm/pci.h97
-rw-r--r--arch/tile/include/asm/percpu.h24
-rw-r--r--arch/tile/include/asm/pgalloc.h122
-rw-r--r--arch/tile/include/asm/pgtable.h465
-rw-r--r--arch/tile/include/asm/pgtable_32.h135
-rw-r--r--arch/tile/include/asm/pgtable_64.h175
-rw-r--r--arch/tile/include/asm/processor.h357
-rw-r--r--arch/tile/include/asm/ptrace.h164
-rw-r--r--arch/tile/include/asm/sections.h44
-rw-r--r--arch/tile/include/asm/setup.h58
-rw-r--r--arch/tile/include/asm/sigcontext.h37
-rw-r--r--arch/tile/include/asm/sigframe.h33
-rw-r--r--arch/tile/include/asm/siginfo.h34
-rw-r--r--arch/tile/include/asm/signal.h39
-rw-r--r--arch/tile/include/asm/smp.h140
-rw-r--r--arch/tile/include/asm/spinlock.h24
-rw-r--r--arch/tile/include/asm/spinlock_32.h129
-rw-r--r--arch/tile/include/asm/spinlock_64.h161
-rw-r--r--arch/tile/include/asm/spinlock_types.h60
-rw-r--r--arch/tile/include/asm/stack.h74
-rw-r--r--arch/tile/include/asm/stat.h4
-rw-r--r--arch/tile/include/asm/string.h32
-rw-r--r--arch/tile/include/asm/swab.h23
-rw-r--r--arch/tile/include/asm/switch_to.h76
-rw-r--r--arch/tile/include/asm/syscall.h79
-rw-r--r--arch/tile/include/asm/syscalls.h76
-rw-r--r--arch/tile/include/asm/thread_info.h175
-rw-r--r--arch/tile/include/asm/tile-desc.h19
-rw-r--r--arch/tile/include/asm/tile-desc_32.h553
-rw-r--r--arch/tile/include/asm/tile-desc_64.h483
-rw-r--r--arch/tile/include/asm/timex.h52
-rw-r--r--arch/tile/include/asm/tlb.h25
-rw-r--r--arch/tile/include/asm/tlbflush.h128
-rw-r--r--arch/tile/include/asm/topology.h124
-rw-r--r--arch/tile/include/asm/traps.h74
-rw-r--r--arch/tile/include/asm/uaccess.h580
-rw-r--r--arch/tile/include/asm/unaligned.h39
-rw-r--r--arch/tile/include/asm/unistd.h47
-rw-r--r--arch/tile/include/asm/user.h21
-rw-r--r--arch/tile/include/asm/vga.h39
-rw-r--r--arch/tile/include/hv/drv_mshim_intf.h50
-rw-r--r--arch/tile/include/hv/drv_pcie_rc_intf.h38
-rw-r--r--arch/tile/include/hv/drv_srom_intf.h41
-rw-r--r--arch/tile/include/hv/drv_xgbe_impl.h300
-rw-r--r--arch/tile/include/hv/drv_xgbe_intf.h615
-rw-r--r--arch/tile/include/hv/hypervisor.h2427
-rw-r--r--arch/tile/include/hv/netio_errors.h122
-rw-r--r--arch/tile/include/hv/netio_intf.h2975
-rw-r--r--arch/tile/include/hv/syscall_public.h42
-rw-r--r--arch/tile/kernel/Makefile18
-rw-r--r--arch/tile/kernel/asm-offsets.c76
-rw-r--r--arch/tile/kernel/backtrace.c678
-rw-r--r--arch/tile/kernel/compat.c118
-rw-r--r--arch/tile/kernel/compat_signal.c431
-rw-r--r--arch/tile/kernel/early_printk.c110
-rw-r--r--arch/tile/kernel/entry.S111
-rw-r--r--arch/tile/kernel/futex_64.S55
-rw-r--r--arch/tile/kernel/hardwall.c837
-rw-r--r--arch/tile/kernel/head_32.S184
-rw-r--r--arch/tile/kernel/head_64.S269
-rw-r--r--arch/tile/kernel/hvglue.lds58
-rw-r--r--arch/tile/kernel/init_task.c59
-rw-r--r--arch/tile/kernel/intvec_32.S1944
-rw-r--r--arch/tile/kernel/intvec_64.S1289
-rw-r--r--arch/tile/kernel/irq.c295
-rw-r--r--arch/tile/kernel/machine_kexec.c282
-rw-r--r--arch/tile/kernel/messaging.c116
-rw-r--r--arch/tile/kernel/module.c234
-rw-r--r--arch/tile/kernel/pci-dma.c252
-rw-r--r--arch/tile/kernel/pci.c632
-rw-r--r--arch/tile/kernel/proc.c162
-rw-r--r--arch/tile/kernel/process.c749
-rw-r--r--arch/tile/kernel/ptrace.c205
-rw-r--r--arch/tile/kernel/reboot.c51
-rw-r--r--arch/tile/kernel/regs_32.S145
-rw-r--r--arch/tile/kernel/regs_64.S145
-rw-r--r--arch/tile/kernel/relocate_kernel.S280
-rw-r--r--arch/tile/kernel/setup.c1532
-rw-r--r--arch/tile/kernel/signal.c476
-rw-r--r--arch/tile/kernel/single_step.c768
-rw-r--r--arch/tile/kernel/smp.c244
-rw-r--r--arch/tile/kernel/smpboot.c279
-rw-r--r--arch/tile/kernel/stack.c491
-rw-r--r--arch/tile/kernel/sys.c119
-rw-r--r--arch/tile/kernel/sysfs.c185
-rw-r--r--arch/tile/kernel/tile-desc_32.c2605
-rw-r--r--arch/tile/kernel/tile-desc_64.c2218
-rw-r--r--arch/tile/kernel/time.c235
-rw-r--r--arch/tile/kernel/tlb.c97
-rw-r--r--arch/tile/kernel/traps.c329
-rw-r--r--arch/tile/kernel/vmlinux.lds.S95
-rw-r--r--arch/tile/kvm/Kconfig37
-rw-r--r--arch/tile/lib/Makefile18
-rw-r--r--arch/tile/lib/atomic_32.c329
-rw-r--r--arch/tile/lib/atomic_asm_32.S196
-rw-r--r--arch/tile/lib/cacheflush.c167
-rw-r--r--arch/tile/lib/checksum.c102
-rw-r--r--arch/tile/lib/cpumask.c52
-rw-r--r--arch/tile/lib/delay.c45
-rw-r--r--arch/tile/lib/exports.c93
-rw-r--r--arch/tile/lib/memchr_32.c71
-rw-r--r--arch/tile/lib/memchr_64.c71
-rw-r--r--arch/tile/lib/memcpy_32.S618
-rw-r--r--arch/tile/lib/memcpy_64.c220
-rw-r--r--arch/tile/lib/memcpy_tile64.c276
-rw-r--r--arch/tile/lib/memcpy_user_64.c92
-rw-r--r--arch/tile/lib/memmove.c63
-rw-r--r--arch/tile/lib/memset_32.c251
-rw-r--r--arch/tile/lib/memset_64.c145
-rw-r--r--arch/tile/lib/spinlock_32.c259
-rw-r--r--arch/tile/lib/spinlock_64.c104
-rw-r--r--arch/tile/lib/spinlock_common.h64
-rw-r--r--arch/tile/lib/strchr_32.c66
-rw-r--r--arch/tile/lib/strchr_64.c67
-rw-r--r--arch/tile/lib/strlen_32.c38
-rw-r--r--arch/tile/lib/strlen_64.c38
-rw-r--r--arch/tile/lib/uaccess.c32
-rw-r--r--arch/tile/lib/usercopy_32.S223
-rw-r--r--arch/tile/lib/usercopy_64.S196
-rw-r--r--arch/tile/mm/Makefile9
-rw-r--r--arch/tile/mm/elf.c159
-rw-r--r--arch/tile/mm/extable.c30
-rw-r--r--arch/tile/mm/fault.c883
-rw-r--r--arch/tile/mm/highmem.c290
-rw-r--r--arch/tile/mm/homecache.c461
-rw-r--r--arch/tile/mm/hugetlbpage.c342
-rw-r--r--arch/tile/mm/init.c1085
-rw-r--r--arch/tile/mm/migrate.h50
-rw-r--r--arch/tile/mm/migrate_32.S212
-rw-r--r--arch/tile/mm/migrate_64.S187
-rw-r--r--arch/tile/mm/mmap.c75
-rw-r--r--arch/tile/mm/pgtable.c639
204 files changed, 51894 insertions, 0 deletions
diff --git a/arch/tile/Kbuild b/arch/tile/Kbuild
new file mode 100644
index 00000000..a9b92271
--- /dev/null
+++ b/arch/tile/Kbuild
@@ -0,0 +1,3 @@
+
+obj-y += kernel/
+obj-y += mm/
diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig
new file mode 100644
index 00000000..74239dd7
--- /dev/null
+++ b/arch/tile/Kconfig
@@ -0,0 +1,383 @@
+# For a description of the syntax of this configuration file,
+# see Documentation/kbuild/kconfig-language.txt.
+
+config TILE
+ def_bool y
+ select HAVE_KVM if !TILEGX
+ select GENERIC_FIND_FIRST_BIT
+ select USE_GENERIC_SMP_HELPERS
+ select CC_OPTIMIZE_FOR_SIZE
+ select HAVE_GENERIC_HARDIRQS
+ select GENERIC_IRQ_PROBE
+ select GENERIC_PENDING_IRQ if SMP
+ select GENERIC_IRQ_SHOW
+ select HAVE_SYSCALL_WRAPPERS if TILEGX
+ select SYS_HYPERVISOR
+ select ARCH_HAVE_NMI_SAFE_CMPXCHG
+
+# FIXME: investigate whether we need/want these options.
+# select HAVE_IOREMAP_PROT
+# select HAVE_OPTPROBES
+# select HAVE_REGS_AND_STACK_ACCESS_API
+# select HAVE_HW_BREAKPOINT
+# select PERF_EVENTS
+# select HAVE_USER_RETURN_NOTIFIER
+# config NO_BOOTMEM
+# config ARCH_SUPPORTS_DEBUG_PAGEALLOC
+# config HUGETLB_PAGE_SIZE_VARIABLE
+
+config MMU
+ def_bool y
+
+config GENERIC_CSUM
+ def_bool y
+
+config SEMAPHORE_SLEEPERS
+ def_bool y
+
+config HAVE_ARCH_ALLOC_REMAP
+ def_bool y
+
+config HAVE_SETUP_PER_CPU_AREA
+ def_bool y
+
+config NEED_PER_CPU_PAGE_FIRST_CHUNK
+ def_bool y
+
+config SYS_SUPPORTS_HUGETLBFS
+ def_bool y
+
+config GENERIC_CLOCKEVENTS
+ def_bool y
+
+# FIXME: tilegx can implement a more efficient rwsem.
+config RWSEM_GENERIC_SPINLOCK
+ def_bool y
+
+# We have a very flat architecture from a migration point of view,
+# so save boot time by presetting this (particularly useful on tile-sim).
+config DEFAULT_MIGRATION_COST
+ int
+ default "10000000"
+
+# We only support gcc 4.4 and above, so this should work.
+config ARCH_SUPPORTS_OPTIMIZED_INLINING
+ def_bool y
+
+config ARCH_PHYS_ADDR_T_64BIT
+ def_bool y
+
+config ARCH_DMA_ADDR_T_64BIT
+ def_bool y
+
+config NEED_DMA_MAP_STATE
+ def_bool y
+
+config LOCKDEP_SUPPORT
+ def_bool y
+
+config STACKTRACE_SUPPORT
+ def_bool y
+ select STACKTRACE
+
+# We use discontigmem for now; at some point we may want to switch
+# to sparsemem (Tilera bug 7996).
+config ARCH_DISCONTIGMEM_ENABLE
+ def_bool y
+
+config ARCH_DISCONTIGMEM_DEFAULT
+ def_bool y
+
+config TRACE_IRQFLAGS_SUPPORT
+ def_bool y
+
+config STRICT_DEVMEM
+ def_bool y
+
+# SMP is required for Tilera Linux.
+config SMP
+ def_bool y
+
+# Allow checking for compile-time determined overflow errors in
+# copy_from_user(). There are still unprovable places in the
+# generic code as of 2.6.34, so this option is not really compatible
+# with -Werror, which is more useful in general.
+config DEBUG_COPY_FROM_USER
+ def_bool n
+
+config HVC_TILE
+ select HVC_DRIVER
+ def_bool y
+
+# Please note: TILE-Gx support is not yet finalized; this is
+# the preliminary support. TILE-Gx drivers are only provided
+# with the alpha or beta test versions for Tilera customers.
+config TILEGX
+ depends on EXPERIMENTAL
+ bool "Building with TILE-Gx (64-bit) compiler and toolchain"
+
+config 64BIT
+ depends on TILEGX
+ def_bool y
+
+config ARCH_DEFCONFIG
+ string
+ default "arch/tile/configs/tilepro_defconfig" if !TILEGX
+ default "arch/tile/configs/tilegx_defconfig" if TILEGX
+
+source "init/Kconfig"
+
+menu "Tilera-specific configuration"
+
+config NR_CPUS
+ int "Maximum number of tiles (2-255)"
+ range 2 255
+ depends on SMP
+ default "64"
+ ---help---
+ Building with 64 is the recommended value, but a slightly
+ smaller kernel memory footprint results from using a smaller
+ value on chips with fewer tiles.
+
+source "kernel/time/Kconfig"
+
+source "kernel/Kconfig.hz"
+
+config KEXEC
+ bool "kexec system call"
+ ---help---
+ kexec is a system call that implements the ability to shutdown your
+ current kernel, and to start another kernel. It is like a reboot
+ but it is independent of the system firmware. It is used
+ to implement the "mboot" Tilera booter.
+
+ The name comes from the similarity to the exec system call.
+
+config COMPAT
+ bool "Support 32-bit TILE-Gx binaries in addition to 64-bit"
+ depends on TILEGX
+ select COMPAT_BINFMT_ELF
+ default y
+ ---help---
+ If enabled, the kernel will support running TILE-Gx binaries
+ that were built with the -m32 option.
+
+config SYSVIPC_COMPAT
+ def_bool y
+ depends on COMPAT && SYSVIPC
+
+# We do not currently support disabling HIGHMEM on tile64 and tilepro.
+config HIGHMEM
+ bool # "Support for more than 512 MB of RAM"
+ default !TILEGX
+ ---help---
+ Linux can use the full amount of RAM in the system by
+ default. However, the address space of TILE processors is
+ only 4 Gigabytes large. That means that, if you have a large
+ amount of physical memory, not all of it can be "permanently
+ mapped" by the kernel. The physical memory that's not
+ permanently mapped is called "high memory".
+
+ If you are compiling a kernel which will never run on a
+ machine with more than 512 MB total physical RAM, answer
+ "false" here. This will result in the kernel mapping all of
+ physical memory into the top 1 GB of virtual memory space.
+
+ If unsure, say "true".
+
+# We do not currently support disabling NUMA.
+config NUMA
+ bool # "NUMA Memory Allocation and Scheduler Support"
+ depends on SMP && DISCONTIGMEM
+ default y
+ ---help---
+ NUMA memory allocation is required for TILE processors
+ unless booting with memory striping enabled in the
+ hypervisor, or with only a single memory controller.
+ It is recommended that this option always be enabled.
+
+config NODES_SHIFT
+ int "Log base 2 of the max number of memory controllers"
+ default 2
+ depends on NEED_MULTIPLE_NODES
+ ---help---
+ By default, 2, i.e. 2^2 == 4 DDR2 controllers.
+ In a system with more controllers, this value should be raised.
+
+choice
+ depends on !TILEGX
+ prompt "Memory split" if EXPERT
+ default VMSPLIT_3G
+ ---help---
+ Select the desired split between kernel and user memory.
+
+ If the address range available to the kernel is less than the
+ physical memory installed, the remaining memory will be available
+ as "high memory". Accessing high memory is a little more costly
+ than low memory, as it needs to be mapped into the kernel first.
+ Note that increasing the kernel address space limits the range
+ available to user programs, making the address space there
+ tighter. Selecting anything other than the default 3G/1G split
+ will also likely make your kernel incompatible with binary-only
+ kernel modules.
+
+ If you are not absolutely sure what you are doing, leave this
+ option alone!
+
+ config VMSPLIT_3_75G
+ bool "3.75G/0.25G user/kernel split (no kernel networking)"
+ config VMSPLIT_3_5G
+ bool "3.5G/0.5G user/kernel split"
+ config VMSPLIT_3G
+ bool "3G/1G user/kernel split"
+ config VMSPLIT_2_75G
+ bool "2.75G/1.25G user/kernel split (for full 1G low memory)"
+ config VMSPLIT_2_5G
+ bool "2.5G/1.5G user/kernel split"
+ config VMSPLIT_2_25G
+ bool "2.25G/1.75G user/kernel split"
+ config VMSPLIT_2G
+ bool "2G/2G user/kernel split"
+ config VMSPLIT_1G
+ bool "1G/3G user/kernel split"
+endchoice
+
+config PAGE_OFFSET
+ hex
+ depends on !64BIT
+ default 0xF0000000 if VMSPLIT_3_75G
+ default 0xE0000000 if VMSPLIT_3_5G
+ default 0xB0000000 if VMSPLIT_2_75G
+ default 0xA0000000 if VMSPLIT_2_5G
+ default 0x90000000 if VMSPLIT_2_25G
+ default 0x80000000 if VMSPLIT_2G
+ default 0x40000000 if VMSPLIT_1G
+ default 0xC0000000
+
+source "mm/Kconfig"
+
+config CMDLINE_BOOL
+ bool "Built-in kernel command line"
+ default n
+ ---help---
+ Allow for specifying boot arguments to the kernel at
+ build time. On some systems (e.g. embedded ones), it is
+ necessary or convenient to provide some or all of the
+ kernel boot arguments with the kernel itself (that is,
+ to not rely on the boot loader to provide them.)
+
+ To compile command line arguments into the kernel,
+ set this option to 'Y', then fill in the
+ the boot arguments in CONFIG_CMDLINE.
+
+ Systems with fully functional boot loaders (e.g. mboot, or
+ if booting over PCI) should leave this option set to 'N'.
+
+config CMDLINE
+ string "Built-in kernel command string"
+ depends on CMDLINE_BOOL
+ default ""
+ ---help---
+ Enter arguments here that should be compiled into the kernel
+ image and used at boot time. If the boot loader provides a
+ command line at boot time, it is appended to this string to
+ form the full kernel command line, when the system boots.
+
+ However, you can use the CONFIG_CMDLINE_OVERRIDE option to
+ change this behavior.
+
+ In most cases, the command line (whether built-in or provided
+ by the boot loader) should specify the device for the root
+ file system.
+
+config CMDLINE_OVERRIDE
+ bool "Built-in command line overrides boot loader arguments"
+ default n
+ depends on CMDLINE_BOOL
+ ---help---
+ Set this option to 'Y' to have the kernel ignore the boot loader
+ command line, and use ONLY the built-in command line.
+
+ This is used to work around broken boot loaders. This should
+ be set to 'N' under normal conditions.
+
+config VMALLOC_RESERVE
+ hex
+ default 0x1000000
+
+config HARDWALL
+ bool "Hardwall support to allow access to user dynamic network"
+ default y
+
+config KERNEL_PL
+ int "Processor protection level for kernel"
+ range 1 2
+ default "1"
+ ---help---
+ This setting determines the processor protection level the
+ kernel will be built to run at. Generally you should use
+ the default value here.
+
+endmenu # Tilera-specific configuration
+
+menu "Bus options"
+
+config PCI
+ bool "PCI support"
+ default y
+ select PCI_DOMAINS
+ select GENERIC_PCI_IOMAP
+ ---help---
+ Enable PCI root complex support, so PCIe endpoint devices can
+ be attached to the Tile chip. Many, but not all, PCI devices
+ are supported under Tilera's root complex driver.
+
+config PCI_DOMAINS
+ bool
+
+config NO_IOMEM
+ def_bool !PCI
+
+config NO_IOPORT
+ def_bool !PCI
+
+source "drivers/pci/Kconfig"
+
+config HOTPLUG
+ bool "Support for hot-pluggable devices"
+ ---help---
+ Say Y here if you want to plug devices into your computer while
+ the system is running, and be able to use them quickly. In many
+ cases, the devices can likewise be unplugged at any time too.
+ One well-known example of this is USB.
+
+source "drivers/pci/hotplug/Kconfig"
+
+endmenu
+
+menu "Executable file formats"
+
+# only elf supported
+config KCORE_ELF
+ def_bool y
+ depends on PROC_FS
+
+source "fs/Kconfig.binfmt"
+
+endmenu
+
+source "net/Kconfig"
+
+source "drivers/Kconfig"
+
+source "fs/Kconfig"
+
+source "arch/tile/Kconfig.debug"
+
+source "security/Kconfig"
+
+source "crypto/Kconfig"
+
+source "lib/Kconfig"
+
+source "arch/tile/kvm/Kconfig"
diff --git a/arch/tile/Kconfig.debug b/arch/tile/Kconfig.debug
new file mode 100644
index 00000000..ddbfc332
--- /dev/null
+++ b/arch/tile/Kconfig.debug
@@ -0,0 +1,34 @@
+menu "Kernel hacking"
+
+source "lib/Kconfig.debug"
+
+config EARLY_PRINTK
+ bool "Early printk" if EXPERT && DEBUG_KERNEL
+ default y
+ help
+ Write kernel log output directly via the hypervisor console.
+
+ This is useful for kernel debugging when your machine crashes very
+ early before the console code is initialized. For normal operation
+ it is not recommended because it looks ugly and doesn't cooperate
+ with klogd/syslogd. You should normally N here,
+ unless you want to debug such a crash.
+
+config DEBUG_STACKOVERFLOW
+ bool "Check for stack overflows"
+ depends on DEBUG_KERNEL
+ help
+ This option will cause messages to be printed if free stack space
+ drops below a certain limit.
+
+config DEBUG_EXTRA_FLAGS
+ string "Additional compiler arguments when building with '-g'"
+ depends on DEBUG_INFO
+ default ""
+ help
+ Debug info can be large, and flags like
+ `-femit-struct-debug-baseonly' can reduce the kernel file
+ size and build time noticeably. Such flags are often
+ helpful if the main use of debug info is line number info.
+
+endmenu
diff --git a/arch/tile/Makefile b/arch/tile/Makefile
new file mode 100644
index 00000000..9520bc5a
--- /dev/null
+++ b/arch/tile/Makefile
@@ -0,0 +1,68 @@
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License. See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# This file is included by the global makefile so that you can add your own
+# architecture-specific flags and dependencies. Remember to do have actions
+# for "archclean" and "archdep" for cleaning up and making dependencies for
+# this architecture
+
+# If building with TILERA_ROOT set (i.e. using the Tilera Multicore
+# Development Environment) we can set CROSS_COMPILE based on that.
+# If we're not cross-compiling, make sure we're on the right architecture.
+# Only bother to test for a few common targets, to avoid useless errors.
+ifeq ($(CROSS_COMPILE),)
+ ifdef TILERA_ROOT
+ CROSS_COMPILE := $(TILERA_ROOT)/bin/tile-
+ else
+ goals := $(if $(MAKECMDGOALS), $(MAKECMDGOALS), all)
+ ifneq ($(strip $(filter vmlinux modules all,$(goals))),)
+ HOST_ARCH := $(shell uname -m)
+ ifneq ($(HOST_ARCH),$(ARCH))
+$(error Set TILERA_ROOT or CROSS_COMPILE when building $(ARCH) on $(HOST_ARCH))
+ endif
+ endif
+ endif
+endif
+
+ifneq ($(CONFIG_DEBUG_EXTRA_FLAGS),"")
+KBUILD_CFLAGS += $(CONFIG_DEBUG_EXTRA_FLAGS)
+endif
+
+LIBGCC_PATH := \
+ $(shell $(CC) $(KBUILD_CFLAGS) $(KCFLAGS) -print-libgcc-file-name)
+
+# Provide the path to use for "make defconfig".
+KBUILD_DEFCONFIG := $(ARCH)_defconfig
+
+# Used as a file extension when useful, e.g. head_$(BITS).o
+# Not needed for (e.g.) "$(CC) -m32" since the compiler automatically
+# uses the right default anyway.
+export BITS
+ifeq ($(CONFIG_TILEGX),y)
+BITS := 64
+else
+BITS := 32
+endif
+
+head-y := arch/tile/kernel/head_$(BITS).o
+
+libs-y += arch/tile/lib/
+libs-y += $(LIBGCC_PATH)
+
+# See arch/tile/Kbuild for content of core part of the kernel
+core-y += arch/tile/
+
+ifdef TILERA_ROOT
+INSTALL_PATH ?= $(TILERA_ROOT)/tile/boot
+endif
+
+install:
+ install -D -m 755 vmlinux $(INSTALL_PATH)/vmlinux-$(KERNELRELEASE)
+ install -D -m 644 .config $(INSTALL_PATH)/config-$(KERNELRELEASE)
+ install -D -m 644 System.map $(INSTALL_PATH)/System.map-$(KERNELRELEASE)
+
+define archhelp
+ echo ' install - install kernel into $(INSTALL_PATH)'
+endef
diff --git a/arch/tile/configs/tilegx_defconfig b/arch/tile/configs/tilegx_defconfig
new file mode 100644
index 00000000..b8d99aca
--- /dev/null
+++ b/arch/tile/configs/tilegx_defconfig
@@ -0,0 +1,587 @@
+CONFIG_TILEGX=y
+CONFIG_EXPERIMENTAL=y
+# CONFIG_LOCALVERSION_AUTO is not set
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_BSD_PROCESS_ACCT=y
+CONFIG_BSD_PROCESS_ACCT_V3=y
+CONFIG_FHANDLE=y
+CONFIG_TASKSTATS=y
+CONFIG_TASK_DELAY_ACCT=y
+CONFIG_TASK_XACCT=y
+CONFIG_TASK_IO_ACCOUNTING=y
+CONFIG_AUDIT=y
+CONFIG_LOG_BUF_SHIFT=19
+CONFIG_CGROUPS=y
+CONFIG_CGROUP_DEBUG=y
+CONFIG_CGROUP_DEVICE=y
+CONFIG_CPUSETS=y
+CONFIG_CGROUP_CPUACCT=y
+CONFIG_RESOURCE_COUNTERS=y
+CONFIG_CGROUP_MEM_RES_CTLR=y
+CONFIG_CGROUP_MEM_RES_CTLR_SWAP=y
+CONFIG_CGROUP_SCHED=y
+CONFIG_RT_GROUP_SCHED=y
+CONFIG_BLK_CGROUP=y
+CONFIG_NAMESPACES=y
+CONFIG_RELAY=y
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_SYSCTL_SYSCALL=y
+CONFIG_EMBEDDED=y
+# CONFIG_COMPAT_BRK is not set
+CONFIG_PROFILING=y
+CONFIG_MODULES=y
+CONFIG_MODULE_FORCE_LOAD=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_BLK_DEV_INTEGRITY=y
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_OSF_PARTITION=y
+CONFIG_AMIGA_PARTITION=y
+CONFIG_MAC_PARTITION=y
+CONFIG_BSD_DISKLABEL=y
+CONFIG_MINIX_SUBPARTITION=y
+CONFIG_SOLARIS_X86_PARTITION=y
+CONFIG_UNIXWARE_DISKLABEL=y
+CONFIG_SGI_PARTITION=y
+CONFIG_SUN_PARTITION=y
+CONFIG_KARMA_PARTITION=y
+CONFIG_EFI_PARTITION=y
+CONFIG_CFQ_GROUP_IOSCHED=y
+CONFIG_NR_CPUS=100
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_HZ_100=y
+CONFIG_PCI_DEBUG=y
+# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
+CONFIG_BINFMT_MISC=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_XFRM_USER=y
+CONFIG_XFRM_SUB_POLICY=y
+CONFIG_XFRM_STATISTICS=y
+CONFIG_NET_KEY=m
+CONFIG_NET_KEY_MIGRATE=y
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IP_ROUTE_MULTIPATH=y
+CONFIG_IP_ROUTE_VERBOSE=y
+CONFIG_NET_IPIP=m
+CONFIG_IP_MROUTE=y
+CONFIG_IP_PIMSM_V1=y
+CONFIG_IP_PIMSM_V2=y
+CONFIG_SYN_COOKIES=y
+CONFIG_INET_AH=m
+CONFIG_INET_ESP=m
+CONFIG_INET_IPCOMP=m
+CONFIG_INET_XFRM_MODE_TRANSPORT=m
+CONFIG_INET_XFRM_MODE_TUNNEL=m
+CONFIG_INET_XFRM_MODE_BEET=m
+CONFIG_INET_DIAG=m
+CONFIG_TCP_CONG_ADVANCED=y
+CONFIG_TCP_CONG_HSTCP=m
+CONFIG_TCP_CONG_HYBLA=m
+CONFIG_TCP_CONG_SCALABLE=m
+CONFIG_TCP_CONG_LP=m
+CONFIG_TCP_CONG_VENO=m
+CONFIG_TCP_CONG_YEAH=m
+CONFIG_TCP_CONG_ILLINOIS=m
+CONFIG_TCP_MD5SIG=y
+CONFIG_IPV6=y
+CONFIG_IPV6_PRIVACY=y
+CONFIG_IPV6_ROUTER_PREF=y
+CONFIG_IPV6_ROUTE_INFO=y
+CONFIG_IPV6_OPTIMISTIC_DAD=y
+CONFIG_INET6_AH=m
+CONFIG_INET6_ESP=m
+CONFIG_INET6_IPCOMP=m
+CONFIG_IPV6_MIP6=m
+CONFIG_INET6_XFRM_MODE_TRANSPORT=m
+CONFIG_INET6_XFRM_MODE_TUNNEL=m
+CONFIG_INET6_XFRM_MODE_BEET=m
+CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION=m
+CONFIG_IPV6_SIT=m
+CONFIG_IPV6_TUNNEL=m
+CONFIG_IPV6_MULTIPLE_TABLES=y
+CONFIG_IPV6_MROUTE=y
+CONFIG_IPV6_PIMSM_V2=y
+CONFIG_NETLABEL=y
+CONFIG_NETFILTER=y
+CONFIG_NF_CONNTRACK=m
+CONFIG_NF_CONNTRACK_SECMARK=y
+CONFIG_NF_CONNTRACK_ZONES=y
+CONFIG_NF_CONNTRACK_EVENTS=y
+CONFIG_NF_CT_PROTO_DCCP=m
+CONFIG_NF_CT_PROTO_UDPLITE=m
+CONFIG_NF_CONNTRACK_AMANDA=m
+CONFIG_NF_CONNTRACK_FTP=m
+CONFIG_NF_CONNTRACK_H323=m
+CONFIG_NF_CONNTRACK_IRC=m
+CONFIG_NF_CONNTRACK_NETBIOS_NS=m
+CONFIG_NF_CONNTRACK_PPTP=m
+CONFIG_NF_CONNTRACK_SANE=m
+CONFIG_NF_CONNTRACK_SIP=m
+CONFIG_NF_CONNTRACK_TFTP=m
+CONFIG_NETFILTER_TPROXY=m
+CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
+CONFIG_NETFILTER_XT_TARGET_CONNMARK=m
+CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=m
+CONFIG_NETFILTER_XT_TARGET_CT=m
+CONFIG_NETFILTER_XT_TARGET_DSCP=m
+CONFIG_NETFILTER_XT_TARGET_IDLETIMER=m
+CONFIG_NETFILTER_XT_TARGET_MARK=m
+CONFIG_NETFILTER_XT_TARGET_NFLOG=m
+CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m
+CONFIG_NETFILTER_XT_TARGET_NOTRACK=m
+CONFIG_NETFILTER_XT_TARGET_TEE=m
+CONFIG_NETFILTER_XT_TARGET_TPROXY=m
+CONFIG_NETFILTER_XT_TARGET_TRACE=m
+CONFIG_NETFILTER_XT_TARGET_SECMARK=m
+CONFIG_NETFILTER_XT_TARGET_TCPMSS=m
+CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m
+CONFIG_NETFILTER_XT_MATCH_CLUSTER=m
+CONFIG_NETFILTER_XT_MATCH_COMMENT=m
+CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m
+CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m
+CONFIG_NETFILTER_XT_MATCH_CONNMARK=m
+CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m
+CONFIG_NETFILTER_XT_MATCH_DCCP=m
+CONFIG_NETFILTER_XT_MATCH_DSCP=m
+CONFIG_NETFILTER_XT_MATCH_ESP=m
+CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m
+CONFIG_NETFILTER_XT_MATCH_HELPER=m
+CONFIG_NETFILTER_XT_MATCH_IPRANGE=m
+CONFIG_NETFILTER_XT_MATCH_IPVS=m
+CONFIG_NETFILTER_XT_MATCH_LENGTH=m
+CONFIG_NETFILTER_XT_MATCH_LIMIT=m
+CONFIG_NETFILTER_XT_MATCH_MAC=m
+CONFIG_NETFILTER_XT_MATCH_MARK=m
+CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m
+CONFIG_NETFILTER_XT_MATCH_OSF=m
+CONFIG_NETFILTER_XT_MATCH_OWNER=m
+CONFIG_NETFILTER_XT_MATCH_POLICY=m
+CONFIG_NETFILTER_XT_MATCH_PHYSDEV=m
+CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m
+CONFIG_NETFILTER_XT_MATCH_QUOTA=m
+CONFIG_NETFILTER_XT_MATCH_RATEEST=m
+CONFIG_NETFILTER_XT_MATCH_REALM=m
+CONFIG_NETFILTER_XT_MATCH_RECENT=m
+CONFIG_NETFILTER_XT_MATCH_SOCKET=m
+CONFIG_NETFILTER_XT_MATCH_STATE=m
+CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
+CONFIG_NETFILTER_XT_MATCH_STRING=m
+CONFIG_NETFILTER_XT_MATCH_TCPMSS=m
+CONFIG_NETFILTER_XT_MATCH_TIME=m
+CONFIG_NETFILTER_XT_MATCH_U32=m
+CONFIG_IP_VS=m
+CONFIG_IP_VS_IPV6=y
+CONFIG_IP_VS_PROTO_TCP=y
+CONFIG_IP_VS_PROTO_UDP=y
+CONFIG_IP_VS_PROTO_ESP=y
+CONFIG_IP_VS_PROTO_AH=y
+CONFIG_IP_VS_PROTO_SCTP=y
+CONFIG_IP_VS_RR=m
+CONFIG_IP_VS_WRR=m
+CONFIG_IP_VS_LC=m
+CONFIG_IP_VS_WLC=m
+CONFIG_IP_VS_LBLC=m
+CONFIG_IP_VS_LBLCR=m
+CONFIG_IP_VS_SED=m
+CONFIG_IP_VS_NQ=m
+CONFIG_NF_CONNTRACK_IPV4=m
+# CONFIG_NF_CONNTRACK_PROC_COMPAT is not set
+CONFIG_IP_NF_QUEUE=m
+CONFIG_IP_NF_IPTABLES=y
+CONFIG_IP_NF_MATCH_AH=m
+CONFIG_IP_NF_MATCH_ECN=m
+CONFIG_IP_NF_MATCH_TTL=m
+CONFIG_IP_NF_FILTER=y
+CONFIG_IP_NF_TARGET_REJECT=y
+CONFIG_IP_NF_TARGET_LOG=m
+CONFIG_IP_NF_TARGET_ULOG=m
+CONFIG_IP_NF_MANGLE=m
+CONFIG_IP_NF_TARGET_ECN=m
+CONFIG_IP_NF_TARGET_TTL=m
+CONFIG_IP_NF_RAW=m
+CONFIG_IP_NF_SECURITY=m
+CONFIG_IP_NF_ARPTABLES=m
+CONFIG_IP_NF_ARPFILTER=m
+CONFIG_IP_NF_ARP_MANGLE=m
+CONFIG_NF_CONNTRACK_IPV6=m
+CONFIG_IP6_NF_QUEUE=m
+CONFIG_IP6_NF_IPTABLES=m
+CONFIG_IP6_NF_MATCH_AH=m
+CONFIG_IP6_NF_MATCH_EUI64=m
+CONFIG_IP6_NF_MATCH_FRAG=m
+CONFIG_IP6_NF_MATCH_OPTS=m
+CONFIG_IP6_NF_MATCH_HL=m
+CONFIG_IP6_NF_MATCH_IPV6HEADER=m
+CONFIG_IP6_NF_MATCH_MH=m
+CONFIG_IP6_NF_MATCH_RT=m
+CONFIG_IP6_NF_TARGET_HL=m
+CONFIG_IP6_NF_TARGET_LOG=m
+CONFIG_IP6_NF_FILTER=m
+CONFIG_IP6_NF_TARGET_REJECT=m
+CONFIG_IP6_NF_MANGLE=m
+CONFIG_IP6_NF_RAW=m
+CONFIG_IP6_NF_SECURITY=m
+CONFIG_BRIDGE_NF_EBTABLES=m
+CONFIG_BRIDGE_EBT_BROUTE=m
+CONFIG_BRIDGE_EBT_T_FILTER=m
+CONFIG_BRIDGE_EBT_T_NAT=m
+CONFIG_BRIDGE_EBT_802_3=m
+CONFIG_BRIDGE_EBT_AMONG=m
+CONFIG_BRIDGE_EBT_ARP=m
+CONFIG_BRIDGE_EBT_IP=m
+CONFIG_BRIDGE_EBT_IP6=m
+CONFIG_BRIDGE_EBT_LIMIT=m
+CONFIG_BRIDGE_EBT_MARK=m
+CONFIG_BRIDGE_EBT_PKTTYPE=m
+CONFIG_BRIDGE_EBT_STP=m
+CONFIG_BRIDGE_EBT_VLAN=m
+CONFIG_BRIDGE_EBT_ARPREPLY=m
+CONFIG_BRIDGE_EBT_DNAT=m
+CONFIG_BRIDGE_EBT_MARK_T=m
+CONFIG_BRIDGE_EBT_REDIRECT=m
+CONFIG_BRIDGE_EBT_SNAT=m
+CONFIG_BRIDGE_EBT_LOG=m
+CONFIG_BRIDGE_EBT_ULOG=m
+CONFIG_BRIDGE_EBT_NFLOG=m
+CONFIG_RDS=m
+CONFIG_RDS_TCP=m
+CONFIG_BRIDGE=m
+CONFIG_NET_DSA=y
+CONFIG_VLAN_8021Q=m
+CONFIG_VLAN_8021Q_GVRP=y
+CONFIG_PHONET=m
+CONFIG_NET_SCHED=y
+CONFIG_NET_SCH_CBQ=m
+CONFIG_NET_SCH_HTB=m
+CONFIG_NET_SCH_HFSC=m
+CONFIG_NET_SCH_PRIO=m
+CONFIG_NET_SCH_MULTIQ=m
+CONFIG_NET_SCH_RED=m
+CONFIG_NET_SCH_SFQ=m
+CONFIG_NET_SCH_TEQL=m
+CONFIG_NET_SCH_TBF=m
+CONFIG_NET_SCH_GRED=m
+CONFIG_NET_SCH_DSMARK=m
+CONFIG_NET_SCH_NETEM=m
+CONFIG_NET_SCH_DRR=m
+CONFIG_NET_SCH_INGRESS=m
+CONFIG_NET_CLS_BASIC=m
+CONFIG_NET_CLS_TCINDEX=m
+CONFIG_NET_CLS_ROUTE4=m
+CONFIG_NET_CLS_FW=m
+CONFIG_NET_CLS_U32=m
+CONFIG_CLS_U32_PERF=y
+CONFIG_CLS_U32_MARK=y
+CONFIG_NET_CLS_RSVP=m
+CONFIG_NET_CLS_RSVP6=m
+CONFIG_NET_CLS_FLOW=m
+CONFIG_NET_CLS_CGROUP=y
+CONFIG_NET_EMATCH=y
+CONFIG_NET_EMATCH_CMP=m
+CONFIG_NET_EMATCH_NBYTE=m
+CONFIG_NET_EMATCH_U32=m
+CONFIG_NET_EMATCH_META=m
+CONFIG_NET_EMATCH_TEXT=m
+CONFIG_NET_CLS_ACT=y
+CONFIG_NET_ACT_POLICE=m
+CONFIG_NET_ACT_GACT=m
+CONFIG_GACT_PROB=y
+CONFIG_NET_ACT_MIRRED=m
+CONFIG_NET_ACT_IPT=m
+CONFIG_NET_ACT_NAT=m
+CONFIG_NET_ACT_PEDIT=m
+CONFIG_NET_ACT_SIMP=m
+CONFIG_NET_ACT_SKBEDIT=m
+CONFIG_NET_CLS_IND=y
+CONFIG_DCB=y
+# CONFIG_WIRELESS is not set
+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
+CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_MOUNT=y
+# CONFIG_FIRMWARE_IN_KERNEL is not set
+CONFIG_CONNECTOR=y
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_CRYPTOLOOP=m
+CONFIG_BLK_DEV_SX8=m
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=16384
+CONFIG_ATA_OVER_ETH=m
+CONFIG_RAID_ATTRS=m
+CONFIG_SCSI_TGT=m
+CONFIG_BLK_DEV_SD=y
+CONFIG_SCSI_CONSTANTS=y
+CONFIG_SCSI_LOGGING=y
+CONFIG_SCSI_SAS_ATA=y
+CONFIG_SCSI_MVSAS=y
+# CONFIG_SCSI_MVSAS_DEBUG is not set
+CONFIG_SCSI_MVSAS_TASKLET=y
+CONFIG_ATA=y
+CONFIG_SATA_SIL24=y
+# CONFIG_ATA_SFF is not set
+CONFIG_MD=y
+CONFIG_BLK_DEV_MD=y
+CONFIG_MD_LINEAR=m
+CONFIG_MD_RAID0=m
+CONFIG_MD_RAID1=m
+CONFIG_MD_RAID10=m
+CONFIG_MD_RAID456=m
+CONFIG_MULTICORE_RAID456=y
+CONFIG_MD_FAULTY=m
+CONFIG_BLK_DEV_DM=m
+CONFIG_DM_DEBUG=y
+CONFIG_DM_CRYPT=m
+CONFIG_DM_SNAPSHOT=m
+CONFIG_DM_MIRROR=m
+CONFIG_DM_LOG_USERSPACE=m
+CONFIG_DM_ZERO=m
+CONFIG_DM_MULTIPATH=m
+CONFIG_DM_MULTIPATH_QL=m
+CONFIG_DM_MULTIPATH_ST=m
+CONFIG_DM_DELAY=m
+CONFIG_DM_UEVENT=y
+CONFIG_FUSION=y
+CONFIG_FUSION_SAS=y
+CONFIG_NETDEVICES=y
+CONFIG_BONDING=m
+CONFIG_DUMMY=m
+CONFIG_IFB=m
+CONFIG_MACVLAN=m
+CONFIG_MACVTAP=m
+CONFIG_NETCONSOLE=m
+CONFIG_NETCONSOLE_DYNAMIC=y
+CONFIG_NETPOLL_TRAP=y
+CONFIG_TUN=y
+CONFIG_VETH=m
+CONFIG_NET_DSA_MV88E6060=y
+CONFIG_NET_DSA_MV88E6131=y
+CONFIG_NET_DSA_MV88E6123_61_65=y
+# CONFIG_NET_VENDOR_3COM is not set
+# CONFIG_NET_VENDOR_ADAPTEC is not set
+# CONFIG_NET_VENDOR_ALTEON is not set
+# CONFIG_NET_VENDOR_AMD is not set
+# CONFIG_NET_VENDOR_ATHEROS is not set
+# CONFIG_NET_VENDOR_BROADCOM is not set
+# CONFIG_NET_VENDOR_BROCADE is not set
+# CONFIG_NET_VENDOR_CHELSIO is not set
+# CONFIG_NET_VENDOR_CISCO is not set
+# CONFIG_NET_VENDOR_DEC is not set
+# CONFIG_NET_VENDOR_DLINK is not set
+# CONFIG_NET_VENDOR_EMULEX is not set
+# CONFIG_NET_VENDOR_EXAR is not set
+# CONFIG_NET_VENDOR_HP is not set
+# CONFIG_NET_VENDOR_INTEL is not set
+# CONFIG_NET_VENDOR_MARVELL is not set
+# CONFIG_NET_VENDOR_MELLANOX is not set
+# CONFIG_NET_VENDOR_MICREL is not set
+# CONFIG_NET_VENDOR_MYRI is not set
+# CONFIG_NET_VENDOR_NATSEMI is not set
+# CONFIG_NET_VENDOR_NVIDIA is not set
+# CONFIG_NET_VENDOR_OKI is not set
+# CONFIG_NET_PACKET_ENGINE is not set
+# CONFIG_NET_VENDOR_QLOGIC is not set
+# CONFIG_NET_VENDOR_REALTEK is not set
+# CONFIG_NET_VENDOR_RDC is not set
+# CONFIG_NET_VENDOR_SEEQ is not set
+# CONFIG_NET_VENDOR_SILAN is not set
+# CONFIG_NET_VENDOR_SIS is not set
+# CONFIG_NET_VENDOR_SMSC is not set
+# CONFIG_NET_VENDOR_STMICRO is not set
+# CONFIG_NET_VENDOR_SUN is not set
+# CONFIG_NET_VENDOR_TEHUTI is not set
+# CONFIG_NET_VENDOR_TI is not set
+# CONFIG_TILE_NET is not set
+# CONFIG_NET_VENDOR_VIA is not set
+# CONFIG_WLAN is not set
+# CONFIG_INPUT_MOUSEDEV is not set
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_SERIO is not set
+# CONFIG_VT is not set
+# CONFIG_LEGACY_PTYS is not set
+CONFIG_HW_RANDOM=y
+CONFIG_HW_RANDOM_TIMERIOMEM=m
+CONFIG_I2C=y
+CONFIG_I2C_CHARDEV=y
+# CONFIG_HWMON is not set
+CONFIG_WATCHDOG=y
+CONFIG_WATCHDOG_NOWAYOUT=y
+# CONFIG_VGA_ARB is not set
+# CONFIG_HID_SUPPORT is not set
+CONFIG_USB=y
+# CONFIG_USB_DEVICE_CLASS is not set
+CONFIG_USB_EHCI_HCD=y
+CONFIG_USB_OHCI_HCD=y
+CONFIG_USB_STORAGE=y
+CONFIG_USB_LIBUSUAL=y
+CONFIG_EDAC=y
+CONFIG_EDAC_MM_EDAC=y
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_DRV_TILE=y
+CONFIG_EXT2_FS=y
+CONFIG_EXT2_FS_XATTR=y
+CONFIG_EXT2_FS_POSIX_ACL=y
+CONFIG_EXT2_FS_SECURITY=y
+CONFIG_EXT2_FS_XIP=y
+CONFIG_EXT3_FS=y
+CONFIG_EXT3_FS_POSIX_ACL=y
+CONFIG_EXT3_FS_SECURITY=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
+CONFIG_XFS_FS=y
+CONFIG_XFS_QUOTA=y
+CONFIG_XFS_POSIX_ACL=y
+CONFIG_GFS2_FS=m
+CONFIG_GFS2_FS_LOCKING_DLM=y
+CONFIG_BTRFS_FS=m
+CONFIG_BTRFS_FS_POSIX_ACL=y
+CONFIG_QUOTA=y
+CONFIG_QUOTA_NETLINK_INTERFACE=y
+# CONFIG_PRINT_QUOTA_WARNING is not set
+CONFIG_QFMT_V2=y
+CONFIG_AUTOFS4_FS=m
+CONFIG_FUSE_FS=y
+CONFIG_CUSE=m
+CONFIG_FSCACHE=m
+CONFIG_FSCACHE_STATS=y
+CONFIG_CACHEFILES=m
+CONFIG_ISO9660_FS=m
+CONFIG_JOLIET=y
+CONFIG_ZISOFS=y
+CONFIG_UDF_FS=m
+CONFIG_MSDOS_FS=m
+CONFIG_VFAT_FS=m
+CONFIG_FAT_DEFAULT_IOCHARSET="ascii"
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_HUGETLBFS=y
+CONFIG_ECRYPT_FS=m
+CONFIG_CRAMFS=m
+CONFIG_SQUASHFS=m
+CONFIG_NFS_FS=m
+CONFIG_NFS_V3=y
+CONFIG_NFS_V3_ACL=y
+CONFIG_NFS_V4=y
+CONFIG_NFS_V4_1=y
+CONFIG_NFS_FSCACHE=y
+CONFIG_NFSD=m
+CONFIG_NFSD_V3_ACL=y
+CONFIG_NFSD_V4=y
+CONFIG_CIFS=m
+CONFIG_CIFS_STATS=y
+CONFIG_CIFS_WEAK_PW_HASH=y
+CONFIG_CIFS_UPCALL=y
+CONFIG_CIFS_XATTR=y
+CONFIG_CIFS_POSIX=y
+CONFIG_CIFS_DFS_UPCALL=y
+CONFIG_CIFS_FSCACHE=y
+CONFIG_NLS_DEFAULT="utf8"
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_CODEPAGE_737=m
+CONFIG_NLS_CODEPAGE_775=m
+CONFIG_NLS_CODEPAGE_850=m
+CONFIG_NLS_CODEPAGE_852=m
+CONFIG_NLS_CODEPAGE_855=m
+CONFIG_NLS_CODEPAGE_857=m
+CONFIG_NLS_CODEPAGE_860=m
+CONFIG_NLS_CODEPAGE_861=m
+CONFIG_NLS_CODEPAGE_862=m
+CONFIG_NLS_CODEPAGE_863=m
+CONFIG_NLS_CODEPAGE_864=m
+CONFIG_NLS_CODEPAGE_865=m
+CONFIG_NLS_CODEPAGE_866=m
+CONFIG_NLS_CODEPAGE_869=m
+CONFIG_NLS_CODEPAGE_936=m
+CONFIG_NLS_CODEPAGE_950=m
+CONFIG_NLS_CODEPAGE_932=m
+CONFIG_NLS_CODEPAGE_949=m
+CONFIG_NLS_CODEPAGE_874=m
+CONFIG_NLS_ISO8859_8=m
+CONFIG_NLS_CODEPAGE_1250=m
+CONFIG_NLS_CODEPAGE_1251=m
+CONFIG_NLS_ASCII=y
+CONFIG_NLS_ISO8859_1=m
+CONFIG_NLS_ISO8859_2=m
+CONFIG_NLS_ISO8859_3=m
+CONFIG_NLS_ISO8859_4=m
+CONFIG_NLS_ISO8859_5=m
+CONFIG_NLS_ISO8859_6=m
+CONFIG_NLS_ISO8859_7=m
+CONFIG_NLS_ISO8859_9=m
+CONFIG_NLS_ISO8859_13=m
+CONFIG_NLS_ISO8859_14=m
+CONFIG_NLS_ISO8859_15=m
+CONFIG_NLS_KOI8_R=m
+CONFIG_NLS_KOI8_U=m
+CONFIG_NLS_UTF8=m
+CONFIG_DLM_DEBUG=y
+# CONFIG_ENABLE_WARN_DEPRECATED is not set
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_STRIP_ASM_SYMS=y
+CONFIG_DEBUG_FS=y
+CONFIG_HEADERS_CHECK=y
+CONFIG_LOCKUP_DETECTOR=y
+CONFIG_SCHEDSTATS=y
+CONFIG_TIMER_STATS=y
+CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_INFO_REDUCED=y
+CONFIG_DEBUG_VM=y
+CONFIG_DEBUG_MEMORY_INIT=y
+CONFIG_DEBUG_LIST=y
+CONFIG_DEBUG_CREDENTIALS=y
+CONFIG_DEBUG_FORCE_WEAK_PER_CPU=y
+CONFIG_DYNAMIC_DEBUG=y
+CONFIG_ASYNC_RAID6_TEST=m
+CONFIG_DEBUG_STACKOVERFLOW=y
+CONFIG_KEYS_DEBUG_PROC_KEYS=y
+CONFIG_SECURITY=y
+CONFIG_SECURITYFS=y
+CONFIG_SECURITY_NETWORK=y
+CONFIG_SECURITY_NETWORK_XFRM=y
+CONFIG_SECURITY_SELINUX=y
+CONFIG_SECURITY_SELINUX_BOOTPARAM=y
+CONFIG_SECURITY_SELINUX_DISABLE=y
+CONFIG_CRYPTO_NULL=m
+CONFIG_CRYPTO_PCRYPT=m
+CONFIG_CRYPTO_CRYPTD=m
+CONFIG_CRYPTO_TEST=m
+CONFIG_CRYPTO_CCM=m
+CONFIG_CRYPTO_GCM=m
+CONFIG_CRYPTO_CTS=m
+CONFIG_CRYPTO_LRW=m
+CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_XTS=m
+CONFIG_CRYPTO_HMAC=y
+CONFIG_CRYPTO_XCBC=m
+CONFIG_CRYPTO_VMAC=m
+CONFIG_CRYPTO_CRC32C=y
+CONFIG_CRYPTO_MICHAEL_MIC=m
+CONFIG_CRYPTO_RMD128=m
+CONFIG_CRYPTO_RMD160=m
+CONFIG_CRYPTO_RMD256=m
+CONFIG_CRYPTO_RMD320=m
+CONFIG_CRYPTO_SHA1=y
+CONFIG_CRYPTO_SHA256=m
+CONFIG_CRYPTO_SHA512=m
+CONFIG_CRYPTO_TGR192=m
+CONFIG_CRYPTO_WP512=m
+CONFIG_CRYPTO_ANUBIS=m
+CONFIG_CRYPTO_BLOWFISH=m
+CONFIG_CRYPTO_CAMELLIA=m
+CONFIG_CRYPTO_CAST5=m
+CONFIG_CRYPTO_CAST6=m
+CONFIG_CRYPTO_FCRYPT=m
+CONFIG_CRYPTO_KHAZAD=m
+CONFIG_CRYPTO_SEED=m
+CONFIG_CRYPTO_SERPENT=m
+CONFIG_CRYPTO_TEA=m
+CONFIG_CRYPTO_TWOFISH=m
+CONFIG_CRYPTO_ZLIB=m
+CONFIG_CRYPTO_LZO=m
diff --git a/arch/tile/configs/tilepro_defconfig b/arch/tile/configs/tilepro_defconfig
new file mode 100644
index 00000000..2b1fd318
--- /dev/null
+++ b/arch/tile/configs/tilepro_defconfig
@@ -0,0 +1,579 @@
+CONFIG_EXPERIMENTAL=y
+# CONFIG_LOCALVERSION_AUTO is not set
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_BSD_PROCESS_ACCT=y
+CONFIG_BSD_PROCESS_ACCT_V3=y
+CONFIG_FHANDLE=y
+CONFIG_TASKSTATS=y
+CONFIG_TASK_DELAY_ACCT=y
+CONFIG_TASK_XACCT=y
+CONFIG_TASK_IO_ACCOUNTING=y
+CONFIG_AUDIT=y
+CONFIG_LOG_BUF_SHIFT=19
+CONFIG_CGROUPS=y
+CONFIG_CGROUP_DEBUG=y
+CONFIG_CGROUP_DEVICE=y
+CONFIG_CPUSETS=y
+CONFIG_CGROUP_CPUACCT=y
+CONFIG_RESOURCE_COUNTERS=y
+CONFIG_CGROUP_MEM_RES_CTLR=y
+CONFIG_CGROUP_MEM_RES_CTLR_SWAP=y
+CONFIG_CGROUP_SCHED=y
+CONFIG_RT_GROUP_SCHED=y
+CONFIG_BLK_CGROUP=y
+CONFIG_NAMESPACES=y
+CONFIG_RELAY=y
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_SYSCTL_SYSCALL=y
+CONFIG_EMBEDDED=y
+# CONFIG_COMPAT_BRK is not set
+CONFIG_PROFILING=y
+CONFIG_MODULES=y
+CONFIG_MODULE_FORCE_LOAD=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_BLK_DEV_INTEGRITY=y
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_OSF_PARTITION=y
+CONFIG_AMIGA_PARTITION=y
+CONFIG_MAC_PARTITION=y
+CONFIG_BSD_DISKLABEL=y
+CONFIG_MINIX_SUBPARTITION=y
+CONFIG_SOLARIS_X86_PARTITION=y
+CONFIG_UNIXWARE_DISKLABEL=y
+CONFIG_SGI_PARTITION=y
+CONFIG_SUN_PARTITION=y
+CONFIG_KARMA_PARTITION=y
+CONFIG_EFI_PARTITION=y
+CONFIG_CFQ_GROUP_IOSCHED=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_HZ_100=y
+CONFIG_PCI_DEBUG=y
+# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
+CONFIG_BINFMT_MISC=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_XFRM_USER=y
+CONFIG_XFRM_SUB_POLICY=y
+CONFIG_XFRM_STATISTICS=y
+CONFIG_NET_KEY=m
+CONFIG_NET_KEY_MIGRATE=y
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IP_ROUTE_MULTIPATH=y
+CONFIG_IP_ROUTE_VERBOSE=y
+CONFIG_NET_IPIP=m
+CONFIG_IP_MROUTE=y
+CONFIG_IP_PIMSM_V1=y
+CONFIG_IP_PIMSM_V2=y
+CONFIG_SYN_COOKIES=y
+CONFIG_INET_AH=m
+CONFIG_INET_ESP=m
+CONFIG_INET_IPCOMP=m
+CONFIG_INET_XFRM_MODE_TRANSPORT=m
+CONFIG_INET_XFRM_MODE_TUNNEL=m
+CONFIG_INET_XFRM_MODE_BEET=m
+CONFIG_INET_DIAG=m
+CONFIG_TCP_CONG_ADVANCED=y
+CONFIG_TCP_CONG_HSTCP=m
+CONFIG_TCP_CONG_HYBLA=m
+CONFIG_TCP_CONG_SCALABLE=m
+CONFIG_TCP_CONG_LP=m
+CONFIG_TCP_CONG_VENO=m
+CONFIG_TCP_CONG_YEAH=m
+CONFIG_TCP_CONG_ILLINOIS=m
+CONFIG_TCP_MD5SIG=y
+CONFIG_IPV6=y
+CONFIG_IPV6_PRIVACY=y
+CONFIG_IPV6_ROUTER_PREF=y
+CONFIG_IPV6_ROUTE_INFO=y
+CONFIG_IPV6_OPTIMISTIC_DAD=y
+CONFIG_INET6_AH=m
+CONFIG_INET6_ESP=m
+CONFIG_INET6_IPCOMP=m
+CONFIG_IPV6_MIP6=m
+CONFIG_INET6_XFRM_MODE_TRANSPORT=m
+CONFIG_INET6_XFRM_MODE_TUNNEL=m
+CONFIG_INET6_XFRM_MODE_BEET=m
+CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION=m
+CONFIG_IPV6_SIT=m
+CONFIG_IPV6_TUNNEL=m
+CONFIG_IPV6_MULTIPLE_TABLES=y
+CONFIG_IPV6_MROUTE=y
+CONFIG_IPV6_PIMSM_V2=y
+CONFIG_NETLABEL=y
+CONFIG_NETFILTER=y
+CONFIG_NF_CONNTRACK=m
+CONFIG_NF_CONNTRACK_SECMARK=y
+CONFIG_NF_CONNTRACK_ZONES=y
+CONFIG_NF_CONNTRACK_EVENTS=y
+CONFIG_NF_CT_PROTO_DCCP=m
+CONFIG_NF_CT_PROTO_UDPLITE=m
+CONFIG_NF_CONNTRACK_AMANDA=m
+CONFIG_NF_CONNTRACK_FTP=m
+CONFIG_NF_CONNTRACK_H323=m
+CONFIG_NF_CONNTRACK_IRC=m
+CONFIG_NF_CONNTRACK_NETBIOS_NS=m
+CONFIG_NF_CONNTRACK_PPTP=m
+CONFIG_NF_CONNTRACK_SANE=m
+CONFIG_NF_CONNTRACK_SIP=m
+CONFIG_NF_CONNTRACK_TFTP=m
+CONFIG_NETFILTER_TPROXY=m
+CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
+CONFIG_NETFILTER_XT_TARGET_CONNMARK=m
+CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=m
+CONFIG_NETFILTER_XT_TARGET_CT=m
+CONFIG_NETFILTER_XT_TARGET_DSCP=m
+CONFIG_NETFILTER_XT_TARGET_IDLETIMER=m
+CONFIG_NETFILTER_XT_TARGET_MARK=m
+CONFIG_NETFILTER_XT_TARGET_NFLOG=m
+CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m
+CONFIG_NETFILTER_XT_TARGET_NOTRACK=m
+CONFIG_NETFILTER_XT_TARGET_TEE=m
+CONFIG_NETFILTER_XT_TARGET_TPROXY=m
+CONFIG_NETFILTER_XT_TARGET_TRACE=m
+CONFIG_NETFILTER_XT_TARGET_SECMARK=m
+CONFIG_NETFILTER_XT_TARGET_TCPMSS=m
+CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m
+CONFIG_NETFILTER_XT_MATCH_CLUSTER=m
+CONFIG_NETFILTER_XT_MATCH_COMMENT=m
+CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m
+CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m
+CONFIG_NETFILTER_XT_MATCH_CONNMARK=m
+CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m
+CONFIG_NETFILTER_XT_MATCH_DCCP=m
+CONFIG_NETFILTER_XT_MATCH_DSCP=m
+CONFIG_NETFILTER_XT_MATCH_ESP=m
+CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m
+CONFIG_NETFILTER_XT_MATCH_HELPER=m
+CONFIG_NETFILTER_XT_MATCH_IPRANGE=m
+CONFIG_NETFILTER_XT_MATCH_IPVS=m
+CONFIG_NETFILTER_XT_MATCH_LENGTH=m
+CONFIG_NETFILTER_XT_MATCH_LIMIT=m
+CONFIG_NETFILTER_XT_MATCH_MAC=m
+CONFIG_NETFILTER_XT_MATCH_MARK=m
+CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m
+CONFIG_NETFILTER_XT_MATCH_OSF=m
+CONFIG_NETFILTER_XT_MATCH_OWNER=m
+CONFIG_NETFILTER_XT_MATCH_POLICY=m
+CONFIG_NETFILTER_XT_MATCH_PHYSDEV=m
+CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m
+CONFIG_NETFILTER_XT_MATCH_QUOTA=m
+CONFIG_NETFILTER_XT_MATCH_RATEEST=m
+CONFIG_NETFILTER_XT_MATCH_REALM=m
+CONFIG_NETFILTER_XT_MATCH_RECENT=m
+CONFIG_NETFILTER_XT_MATCH_SOCKET=m
+CONFIG_NETFILTER_XT_MATCH_STATE=m
+CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
+CONFIG_NETFILTER_XT_MATCH_STRING=m
+CONFIG_NETFILTER_XT_MATCH_TCPMSS=m
+CONFIG_NETFILTER_XT_MATCH_TIME=m
+CONFIG_NETFILTER_XT_MATCH_U32=m
+CONFIG_IP_VS=m
+CONFIG_IP_VS_IPV6=y
+CONFIG_IP_VS_PROTO_TCP=y
+CONFIG_IP_VS_PROTO_UDP=y
+CONFIG_IP_VS_PROTO_ESP=y
+CONFIG_IP_VS_PROTO_AH=y
+CONFIG_IP_VS_PROTO_SCTP=y
+CONFIG_IP_VS_RR=m
+CONFIG_IP_VS_WRR=m
+CONFIG_IP_VS_LC=m
+CONFIG_IP_VS_WLC=m
+CONFIG_IP_VS_LBLC=m
+CONFIG_IP_VS_LBLCR=m
+CONFIG_IP_VS_SED=m
+CONFIG_IP_VS_NQ=m
+CONFIG_NF_CONNTRACK_IPV4=m
+# CONFIG_NF_CONNTRACK_PROC_COMPAT is not set
+CONFIG_IP_NF_QUEUE=m
+CONFIG_IP_NF_IPTABLES=y
+CONFIG_IP_NF_MATCH_AH=m
+CONFIG_IP_NF_MATCH_ECN=m
+CONFIG_IP_NF_MATCH_TTL=m
+CONFIG_IP_NF_FILTER=y
+CONFIG_IP_NF_TARGET_REJECT=y
+CONFIG_IP_NF_TARGET_LOG=m
+CONFIG_IP_NF_TARGET_ULOG=m
+CONFIG_IP_NF_MANGLE=m
+CONFIG_IP_NF_TARGET_ECN=m
+CONFIG_IP_NF_TARGET_TTL=m
+CONFIG_IP_NF_RAW=m
+CONFIG_IP_NF_SECURITY=m
+CONFIG_IP_NF_ARPTABLES=m
+CONFIG_IP_NF_ARPFILTER=m
+CONFIG_IP_NF_ARP_MANGLE=m
+CONFIG_NF_CONNTRACK_IPV6=m
+CONFIG_IP6_NF_QUEUE=m
+CONFIG_IP6_NF_IPTABLES=m
+CONFIG_IP6_NF_MATCH_AH=m
+CONFIG_IP6_NF_MATCH_EUI64=m
+CONFIG_IP6_NF_MATCH_FRAG=m
+CONFIG_IP6_NF_MATCH_OPTS=m
+CONFIG_IP6_NF_MATCH_HL=m
+CONFIG_IP6_NF_MATCH_IPV6HEADER=m
+CONFIG_IP6_NF_MATCH_MH=m
+CONFIG_IP6_NF_MATCH_RT=m
+CONFIG_IP6_NF_TARGET_HL=m
+CONFIG_IP6_NF_TARGET_LOG=m
+CONFIG_IP6_NF_FILTER=m
+CONFIG_IP6_NF_TARGET_REJECT=m
+CONFIG_IP6_NF_MANGLE=m
+CONFIG_IP6_NF_RAW=m
+CONFIG_IP6_NF_SECURITY=m
+CONFIG_BRIDGE_NF_EBTABLES=m
+CONFIG_BRIDGE_EBT_BROUTE=m
+CONFIG_BRIDGE_EBT_T_FILTER=m
+CONFIG_BRIDGE_EBT_T_NAT=m
+CONFIG_BRIDGE_EBT_802_3=m
+CONFIG_BRIDGE_EBT_AMONG=m
+CONFIG_BRIDGE_EBT_ARP=m
+CONFIG_BRIDGE_EBT_IP=m
+CONFIG_BRIDGE_EBT_IP6=m
+CONFIG_BRIDGE_EBT_LIMIT=m
+CONFIG_BRIDGE_EBT_MARK=m
+CONFIG_BRIDGE_EBT_PKTTYPE=m
+CONFIG_BRIDGE_EBT_STP=m
+CONFIG_BRIDGE_EBT_VLAN=m
+CONFIG_BRIDGE_EBT_ARPREPLY=m
+CONFIG_BRIDGE_EBT_DNAT=m
+CONFIG_BRIDGE_EBT_MARK_T=m
+CONFIG_BRIDGE_EBT_REDIRECT=m
+CONFIG_BRIDGE_EBT_SNAT=m
+CONFIG_BRIDGE_EBT_LOG=m
+CONFIG_BRIDGE_EBT_ULOG=m
+CONFIG_BRIDGE_EBT_NFLOG=m
+CONFIG_RDS=m
+CONFIG_RDS_TCP=m
+CONFIG_BRIDGE=m
+CONFIG_NET_DSA=y
+CONFIG_VLAN_8021Q=m
+CONFIG_VLAN_8021Q_GVRP=y
+CONFIG_PHONET=m
+CONFIG_NET_SCHED=y
+CONFIG_NET_SCH_CBQ=m
+CONFIG_NET_SCH_HTB=m
+CONFIG_NET_SCH_HFSC=m
+CONFIG_NET_SCH_PRIO=m
+CONFIG_NET_SCH_MULTIQ=m
+CONFIG_NET_SCH_RED=m
+CONFIG_NET_SCH_SFQ=m
+CONFIG_NET_SCH_TEQL=m
+CONFIG_NET_SCH_TBF=m
+CONFIG_NET_SCH_GRED=m
+CONFIG_NET_SCH_DSMARK=m
+CONFIG_NET_SCH_NETEM=m
+CONFIG_NET_SCH_DRR=m
+CONFIG_NET_SCH_INGRESS=m
+CONFIG_NET_CLS_BASIC=m
+CONFIG_NET_CLS_TCINDEX=m
+CONFIG_NET_CLS_ROUTE4=m
+CONFIG_NET_CLS_FW=m
+CONFIG_NET_CLS_U32=m
+CONFIG_CLS_U32_PERF=y
+CONFIG_CLS_U32_MARK=y
+CONFIG_NET_CLS_RSVP=m
+CONFIG_NET_CLS_RSVP6=m
+CONFIG_NET_CLS_FLOW=m
+CONFIG_NET_CLS_CGROUP=y
+CONFIG_NET_EMATCH=y
+CONFIG_NET_EMATCH_CMP=m
+CONFIG_NET_EMATCH_NBYTE=m
+CONFIG_NET_EMATCH_U32=m
+CONFIG_NET_EMATCH_META=m
+CONFIG_NET_EMATCH_TEXT=m
+CONFIG_NET_CLS_ACT=y
+CONFIG_NET_ACT_POLICE=m
+CONFIG_NET_ACT_GACT=m
+CONFIG_GACT_PROB=y
+CONFIG_NET_ACT_MIRRED=m
+CONFIG_NET_ACT_IPT=m
+CONFIG_NET_ACT_NAT=m
+CONFIG_NET_ACT_PEDIT=m
+CONFIG_NET_ACT_SIMP=m
+CONFIG_NET_ACT_SKBEDIT=m
+CONFIG_NET_CLS_IND=y
+CONFIG_DCB=y
+# CONFIG_WIRELESS is not set
+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
+CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_MOUNT=y
+# CONFIG_FIRMWARE_IN_KERNEL is not set
+CONFIG_CONNECTOR=y
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_CRYPTOLOOP=m
+CONFIG_BLK_DEV_SX8=m
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=16384
+CONFIG_ATA_OVER_ETH=m
+CONFIG_RAID_ATTRS=m
+CONFIG_SCSI_TGT=m
+CONFIG_BLK_DEV_SD=y
+CONFIG_SCSI_CONSTANTS=y
+CONFIG_SCSI_LOGGING=y
+CONFIG_ATA=y
+CONFIG_SATA_SIL24=y
+# CONFIG_ATA_SFF is not set
+CONFIG_MD=y
+CONFIG_BLK_DEV_MD=y
+CONFIG_MD_LINEAR=m
+CONFIG_MD_RAID0=m
+CONFIG_MD_RAID1=m
+CONFIG_MD_RAID10=m
+CONFIG_MD_RAID456=m
+CONFIG_MULTICORE_RAID456=y
+CONFIG_MD_FAULTY=m
+CONFIG_BLK_DEV_DM=m
+CONFIG_DM_DEBUG=y
+CONFIG_DM_CRYPT=m
+CONFIG_DM_SNAPSHOT=m
+CONFIG_DM_MIRROR=m
+CONFIG_DM_LOG_USERSPACE=m
+CONFIG_DM_ZERO=m
+CONFIG_DM_MULTIPATH=m
+CONFIG_DM_MULTIPATH_QL=m
+CONFIG_DM_MULTIPATH_ST=m
+CONFIG_DM_DELAY=m
+CONFIG_DM_UEVENT=y
+CONFIG_FUSION=y
+CONFIG_FUSION_SAS=y
+CONFIG_NETDEVICES=y
+CONFIG_BONDING=m
+CONFIG_DUMMY=m
+CONFIG_IFB=m
+CONFIG_MACVLAN=m
+CONFIG_MACVTAP=m
+CONFIG_NETCONSOLE=m
+CONFIG_NETCONSOLE_DYNAMIC=y
+CONFIG_NETPOLL_TRAP=y
+CONFIG_TUN=y
+CONFIG_VETH=m
+CONFIG_NET_DSA_MV88E6060=y
+CONFIG_NET_DSA_MV88E6131=y
+CONFIG_NET_DSA_MV88E6123_61_65=y
+# CONFIG_NET_VENDOR_3COM is not set
+# CONFIG_NET_VENDOR_ADAPTEC is not set
+# CONFIG_NET_VENDOR_ALTEON is not set
+# CONFIG_NET_VENDOR_AMD is not set
+# CONFIG_NET_VENDOR_ATHEROS is not set
+# CONFIG_NET_VENDOR_BROADCOM is not set
+# CONFIG_NET_VENDOR_BROCADE is not set
+# CONFIG_NET_VENDOR_CHELSIO is not set
+# CONFIG_NET_VENDOR_CISCO is not set
+# CONFIG_NET_VENDOR_DEC is not set
+# CONFIG_NET_VENDOR_DLINK is not set
+# CONFIG_NET_VENDOR_EMULEX is not set
+# CONFIG_NET_VENDOR_EXAR is not set
+# CONFIG_NET_VENDOR_HP is not set
+# CONFIG_NET_VENDOR_INTEL is not set
+# CONFIG_NET_VENDOR_MARVELL is not set
+# CONFIG_NET_VENDOR_MELLANOX is not set
+# CONFIG_NET_VENDOR_MICREL is not set
+# CONFIG_NET_VENDOR_MYRI is not set
+# CONFIG_NET_VENDOR_NATSEMI is not set
+# CONFIG_NET_VENDOR_NVIDIA is not set
+# CONFIG_NET_VENDOR_OKI is not set
+# CONFIG_NET_PACKET_ENGINE is not set
+# CONFIG_NET_VENDOR_QLOGIC is not set
+# CONFIG_NET_VENDOR_REALTEK is not set
+# CONFIG_NET_VENDOR_RDC is not set
+# CONFIG_NET_VENDOR_SEEQ is not set
+# CONFIG_NET_VENDOR_SILAN is not set
+# CONFIG_NET_VENDOR_SIS is not set
+# CONFIG_NET_VENDOR_SMSC is not set
+# CONFIG_NET_VENDOR_STMICRO is not set
+# CONFIG_NET_VENDOR_SUN is not set
+# CONFIG_NET_VENDOR_TEHUTI is not set
+# CONFIG_NET_VENDOR_TI is not set
+# CONFIG_NET_VENDOR_VIA is not set
+# CONFIG_WLAN is not set
+# CONFIG_INPUT_MOUSEDEV is not set
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_SERIO is not set
+# CONFIG_VT is not set
+# CONFIG_LEGACY_PTYS is not set
+CONFIG_HW_RANDOM=y
+CONFIG_HW_RANDOM_TIMERIOMEM=m
+CONFIG_I2C=y
+CONFIG_I2C_CHARDEV=y
+# CONFIG_HWMON is not set
+CONFIG_WATCHDOG=y
+CONFIG_WATCHDOG_NOWAYOUT=y
+# CONFIG_VGA_ARB is not set
+# CONFIG_HID_SUPPORT is not set
+# CONFIG_USB_SUPPORT is not set
+CONFIG_EDAC=y
+CONFIG_EDAC_MM_EDAC=y
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_DRV_TILE=y
+CONFIG_EXT2_FS=y
+CONFIG_EXT2_FS_XATTR=y
+CONFIG_EXT2_FS_POSIX_ACL=y
+CONFIG_EXT2_FS_SECURITY=y
+CONFIG_EXT2_FS_XIP=y
+CONFIG_EXT3_FS=y
+CONFIG_EXT3_FS_POSIX_ACL=y
+CONFIG_EXT3_FS_SECURITY=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
+CONFIG_XFS_FS=y
+CONFIG_XFS_QUOTA=y
+CONFIG_XFS_POSIX_ACL=y
+CONFIG_GFS2_FS=m
+CONFIG_GFS2_FS_LOCKING_DLM=y
+CONFIG_BTRFS_FS=m
+CONFIG_BTRFS_FS_POSIX_ACL=y
+CONFIG_QUOTA=y
+CONFIG_QUOTA_NETLINK_INTERFACE=y
+# CONFIG_PRINT_QUOTA_WARNING is not set
+CONFIG_QFMT_V2=y
+CONFIG_AUTOFS4_FS=m
+CONFIG_FUSE_FS=y
+CONFIG_CUSE=m
+CONFIG_FSCACHE=m
+CONFIG_FSCACHE_STATS=y
+CONFIG_CACHEFILES=m
+CONFIG_ISO9660_FS=m
+CONFIG_JOLIET=y
+CONFIG_ZISOFS=y
+CONFIG_UDF_FS=m
+CONFIG_MSDOS_FS=m
+CONFIG_VFAT_FS=m
+CONFIG_FAT_DEFAULT_IOCHARSET="ascii"
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_HUGETLBFS=y
+CONFIG_ECRYPT_FS=m
+CONFIG_CRAMFS=m
+CONFIG_SQUASHFS=m
+CONFIG_NFS_FS=m
+CONFIG_NFS_V3=y
+CONFIG_NFS_V3_ACL=y
+CONFIG_NFS_V4=y
+CONFIG_NFS_V4_1=y
+CONFIG_NFS_FSCACHE=y
+CONFIG_NFSD=m
+CONFIG_NFSD_V3_ACL=y
+CONFIG_NFSD_V4=y
+CONFIG_CIFS=m
+CONFIG_CIFS_STATS=y
+CONFIG_CIFS_WEAK_PW_HASH=y
+CONFIG_CIFS_UPCALL=y
+CONFIG_CIFS_XATTR=y
+CONFIG_CIFS_POSIX=y
+CONFIG_CIFS_DFS_UPCALL=y
+CONFIG_CIFS_FSCACHE=y
+CONFIG_NLS=y
+CONFIG_NLS_DEFAULT="utf8"
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_CODEPAGE_737=m
+CONFIG_NLS_CODEPAGE_775=m
+CONFIG_NLS_CODEPAGE_850=m
+CONFIG_NLS_CODEPAGE_852=m
+CONFIG_NLS_CODEPAGE_855=m
+CONFIG_NLS_CODEPAGE_857=m
+CONFIG_NLS_CODEPAGE_860=m
+CONFIG_NLS_CODEPAGE_861=m
+CONFIG_NLS_CODEPAGE_862=m
+CONFIG_NLS_CODEPAGE_863=m
+CONFIG_NLS_CODEPAGE_864=m
+CONFIG_NLS_CODEPAGE_865=m
+CONFIG_NLS_CODEPAGE_866=m
+CONFIG_NLS_CODEPAGE_869=m
+CONFIG_NLS_CODEPAGE_936=m
+CONFIG_NLS_CODEPAGE_950=m
+CONFIG_NLS_CODEPAGE_932=m
+CONFIG_NLS_CODEPAGE_949=m
+CONFIG_NLS_CODEPAGE_874=m
+CONFIG_NLS_ISO8859_8=m
+CONFIG_NLS_CODEPAGE_1250=m
+CONFIG_NLS_CODEPAGE_1251=m
+CONFIG_NLS_ASCII=y
+CONFIG_NLS_ISO8859_1=m
+CONFIG_NLS_ISO8859_2=m
+CONFIG_NLS_ISO8859_3=m
+CONFIG_NLS_ISO8859_4=m
+CONFIG_NLS_ISO8859_5=m
+CONFIG_NLS_ISO8859_6=m
+CONFIG_NLS_ISO8859_7=m
+CONFIG_NLS_ISO8859_9=m
+CONFIG_NLS_ISO8859_13=m
+CONFIG_NLS_ISO8859_14=m
+CONFIG_NLS_ISO8859_15=m
+CONFIG_NLS_KOI8_R=m
+CONFIG_NLS_KOI8_U=m
+CONFIG_NLS_UTF8=m
+CONFIG_DLM_DEBUG=y
+# CONFIG_ENABLE_WARN_DEPRECATED is not set
+CONFIG_FRAME_WARN=2048
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_STRIP_ASM_SYMS=y
+CONFIG_DEBUG_FS=y
+CONFIG_HEADERS_CHECK=y
+CONFIG_LOCKUP_DETECTOR=y
+CONFIG_SCHEDSTATS=y
+CONFIG_TIMER_STATS=y
+CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_INFO_REDUCED=y
+CONFIG_DEBUG_VM=y
+CONFIG_DEBUG_MEMORY_INIT=y
+CONFIG_DEBUG_LIST=y
+CONFIG_DEBUG_CREDENTIALS=y
+CONFIG_DEBUG_FORCE_WEAK_PER_CPU=y
+CONFIG_DYNAMIC_DEBUG=y
+CONFIG_ASYNC_RAID6_TEST=m
+CONFIG_DEBUG_STACKOVERFLOW=y
+CONFIG_KEYS_DEBUG_PROC_KEYS=y
+CONFIG_SECURITY=y
+CONFIG_SECURITYFS=y
+CONFIG_SECURITY_NETWORK=y
+CONFIG_SECURITY_NETWORK_XFRM=y
+CONFIG_SECURITY_SELINUX=y
+CONFIG_SECURITY_SELINUX_BOOTPARAM=y
+CONFIG_SECURITY_SELINUX_DISABLE=y
+CONFIG_CRYPTO_NULL=m
+CONFIG_CRYPTO_PCRYPT=m
+CONFIG_CRYPTO_CRYPTD=m
+CONFIG_CRYPTO_TEST=m
+CONFIG_CRYPTO_CCM=m
+CONFIG_CRYPTO_GCM=m
+CONFIG_CRYPTO_CTS=m
+CONFIG_CRYPTO_LRW=m
+CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_XTS=m
+CONFIG_CRYPTO_HMAC=y
+CONFIG_CRYPTO_XCBC=m
+CONFIG_CRYPTO_VMAC=m
+CONFIG_CRYPTO_CRC32C=y
+CONFIG_CRYPTO_MICHAEL_MIC=m
+CONFIG_CRYPTO_RMD128=m
+CONFIG_CRYPTO_RMD160=m
+CONFIG_CRYPTO_RMD256=m
+CONFIG_CRYPTO_RMD320=m
+CONFIG_CRYPTO_SHA1=y
+CONFIG_CRYPTO_SHA256=m
+CONFIG_CRYPTO_SHA512=m
+CONFIG_CRYPTO_TGR192=m
+CONFIG_CRYPTO_WP512=m
+CONFIG_CRYPTO_ANUBIS=m
+CONFIG_CRYPTO_BLOWFISH=m
+CONFIG_CRYPTO_CAMELLIA=m
+CONFIG_CRYPTO_CAST5=m
+CONFIG_CRYPTO_CAST6=m
+CONFIG_CRYPTO_FCRYPT=m
+CONFIG_CRYPTO_KHAZAD=m
+CONFIG_CRYPTO_SEED=m
+CONFIG_CRYPTO_SERPENT=m
+CONFIG_CRYPTO_TEA=m
+CONFIG_CRYPTO_TWOFISH=m
+CONFIG_CRYPTO_ZLIB=m
+CONFIG_CRYPTO_LZO=m
+CONFIG_CRC_CCITT=m
+CONFIG_CRC7=m
diff --git a/arch/tile/include/arch/Kbuild b/arch/tile/include/arch/Kbuild
new file mode 100644
index 00000000..9c0ea24c
--- /dev/null
+++ b/arch/tile/include/arch/Kbuild
@@ -0,0 +1,17 @@
+header-y += abi.h
+header-y += chip.h
+header-y += chip_tile64.h
+header-y += chip_tilegx.h
+header-y += chip_tilepro.h
+header-y += icache.h
+header-y += interrupts.h
+header-y += interrupts_32.h
+header-y += interrupts_64.h
+header-y += opcode.h
+header-y += opcode_tilegx.h
+header-y += opcode_tilepro.h
+header-y += sim.h
+header-y += sim_def.h
+header-y += spr_def.h
+header-y += spr_def_32.h
+header-y += spr_def_64.h
diff --git a/arch/tile/include/arch/abi.h b/arch/tile/include/arch/abi.h
new file mode 100644
index 00000000..c55a3d43
--- /dev/null
+++ b/arch/tile/include/arch/abi.h
@@ -0,0 +1,141 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+/**
+ * @file
+ *
+ * ABI-related register definitions.
+ */
+
+#ifndef __ARCH_ABI_H__
+
+#if !defined __need_int_reg_t && !defined __DOXYGEN__
+# define __ARCH_ABI_H__
+# include <arch/chip.h>
+#endif
+
+/* Provide the basic machine types. */
+#ifndef __INT_REG_BITS
+
+/** Number of bits in a register. */
+#if defined __tilegx__
+# define __INT_REG_BITS 64
+#elif defined __tilepro__
+# define __INT_REG_BITS 32
+#elif !defined __need_int_reg_t
+# include <arch/chip.h>
+# define __INT_REG_BITS CHIP_WORD_SIZE()
+#else
+# error Unrecognized architecture with __need_int_reg_t
+#endif
+
+#if __INT_REG_BITS == 64
+
+#ifndef __ASSEMBLER__
+/** Unsigned type that can hold a register. */
+typedef unsigned long long __uint_reg_t;
+
+/** Signed type that can hold a register. */
+typedef long long __int_reg_t;
+#endif
+
+/** String prefix to use for printf(). */
+#define __INT_REG_FMT "ll"
+
+#else
+
+#ifndef __ASSEMBLER__
+/** Unsigned type that can hold a register. */
+typedef unsigned long __uint_reg_t;
+
+/** Signed type that can hold a register. */
+typedef long __int_reg_t;
+#endif
+
+/** String prefix to use for printf(). */
+#define __INT_REG_FMT "l"
+
+#endif
+#endif /* __INT_REG_BITS */
+
+
+#ifndef __need_int_reg_t
+
+
+#ifndef __ASSEMBLER__
+/** Unsigned type that can hold a register. */
+typedef __uint_reg_t uint_reg_t;
+
+/** Signed type that can hold a register. */
+typedef __int_reg_t int_reg_t;
+#endif
+
+/** String prefix to use for printf(). */
+#define INT_REG_FMT __INT_REG_FMT
+
+/** Number of bits in a register. */
+#define INT_REG_BITS __INT_REG_BITS
+
+
+/* Registers 0 - 55 are "normal", but some perform special roles. */
+
+#define TREG_FP 52 /**< Frame pointer. */
+#define TREG_TP 53 /**< Thread pointer. */
+#define TREG_SP 54 /**< Stack pointer. */
+#define TREG_LR 55 /**< Link to calling function PC. */
+
+/** Index of last normal general-purpose register. */
+#define TREG_LAST_GPR 55
+
+/* Registers 56 - 62 are "special" network registers. */
+
+#define TREG_SN 56 /**< Static network access. */
+#define TREG_IDN0 57 /**< IDN demux 0 access. */
+#define TREG_IDN1 58 /**< IDN demux 1 access. */
+#define TREG_UDN0 59 /**< UDN demux 0 access. */
+#define TREG_UDN1 60 /**< UDN demux 1 access. */
+#define TREG_UDN2 61 /**< UDN demux 2 access. */
+#define TREG_UDN3 62 /**< UDN demux 3 access. */
+
+/* Register 63 is the "special" zero register. */
+
+#define TREG_ZERO 63 /**< "Zero" register; always reads as "0". */
+
+
+/** By convention, this register is used to hold the syscall number. */
+#define TREG_SYSCALL_NR 10
+
+/** Name of register that holds the syscall number, for use in assembly. */
+#define TREG_SYSCALL_NR_NAME r10
+
+
+/**
+ * The ABI requires callers to allocate a caller state save area of
+ * this many bytes at the bottom of each stack frame.
+ */
+#define C_ABI_SAVE_AREA_SIZE (2 * (INT_REG_BITS / 8))
+
+/**
+ * The operand to an 'info' opcode directing the backtracer to not
+ * try to find the calling frame.
+ */
+#define INFO_OP_CANNOT_BACKTRACE 2
+
+
+#endif /* !__need_int_reg_t */
+
+/* Make sure we later can get all the definitions and declarations. */
+#undef __need_int_reg_t
+
+#endif /* !__ARCH_ABI_H__ */
diff --git a/arch/tile/include/arch/chip.h b/arch/tile/include/arch/chip.h
new file mode 100644
index 00000000..926d3db0
--- /dev/null
+++ b/arch/tile/include/arch/chip.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#if __tile_chip__ == 0
+#include <arch/chip_tile64.h>
+#elif __tile_chip__ == 1
+#include <arch/chip_tilepro.h>
+#elif defined(__tilegx__)
+#include <arch/chip_tilegx.h>
+#else
+#error Unexpected Tilera chip type
+#endif
diff --git a/arch/tile/include/arch/chip_tile64.h b/arch/tile/include/arch/chip_tile64.h
new file mode 100644
index 00000000..261aaba0
--- /dev/null
+++ b/arch/tile/include/arch/chip_tile64.h
@@ -0,0 +1,258 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+/*
+ * @file
+ * Global header file.
+ * This header file specifies defines for TILE64.
+ */
+
+#ifndef __ARCH_CHIP_H__
+#define __ARCH_CHIP_H__
+
+/** Specify chip version.
+ * When possible, prefer the CHIP_xxx symbols below for future-proofing.
+ * This is intended for cross-compiling; native compilation should
+ * use the predefined __tile_chip__ symbol.
+ */
+#define TILE_CHIP 0
+
+/** Specify chip revision.
+ * This provides for the case of a respin of a particular chip type;
+ * the normal value for this symbol is "0".
+ * This is intended for cross-compiling; native compilation should
+ * use the predefined __tile_chip_rev__ symbol.
+ */
+#define TILE_CHIP_REV 0
+
+/** The name of this architecture. */
+#define CHIP_ARCH_NAME "tile64"
+
+/** The ELF e_machine type for binaries for this chip. */
+#define CHIP_ELF_TYPE() EM_TILE64
+
+/** The alternate ELF e_machine type for binaries for this chip. */
+#define CHIP_COMPAT_ELF_TYPE() 0x2506
+
+/** What is the native word size of the machine? */
+#define CHIP_WORD_SIZE() 32
+
+/** How many bits of a virtual address are used. Extra bits must be
+ * the sign extension of the low bits.
+ */
+#define CHIP_VA_WIDTH() 32
+
+/** How many bits are in a physical address? */
+#define CHIP_PA_WIDTH() 36
+
+/** Size of the L2 cache, in bytes. */
+#define CHIP_L2_CACHE_SIZE() 65536
+
+/** Log size of an L2 cache line in bytes. */
+#define CHIP_L2_LOG_LINE_SIZE() 6
+
+/** Size of an L2 cache line, in bytes. */
+#define CHIP_L2_LINE_SIZE() (1 << CHIP_L2_LOG_LINE_SIZE())
+
+/** Associativity of the L2 cache. */
+#define CHIP_L2_ASSOC() 2
+
+/** Size of the L1 data cache, in bytes. */
+#define CHIP_L1D_CACHE_SIZE() 8192
+
+/** Log size of an L1 data cache line in bytes. */
+#define CHIP_L1D_LOG_LINE_SIZE() 4
+
+/** Size of an L1 data cache line, in bytes. */
+#define CHIP_L1D_LINE_SIZE() (1 << CHIP_L1D_LOG_LINE_SIZE())
+
+/** Associativity of the L1 data cache. */
+#define CHIP_L1D_ASSOC() 2
+
+/** Size of the L1 instruction cache, in bytes. */
+#define CHIP_L1I_CACHE_SIZE() 8192
+
+/** Log size of an L1 instruction cache line in bytes. */
+#define CHIP_L1I_LOG_LINE_SIZE() 6
+
+/** Size of an L1 instruction cache line, in bytes. */
+#define CHIP_L1I_LINE_SIZE() (1 << CHIP_L1I_LOG_LINE_SIZE())
+
+/** Associativity of the L1 instruction cache. */
+#define CHIP_L1I_ASSOC() 1
+
+/** Stride with which flush instructions must be issued. */
+#define CHIP_FLUSH_STRIDE() CHIP_L2_LINE_SIZE()
+
+/** Stride with which inv instructions must be issued. */
+#define CHIP_INV_STRIDE() CHIP_L1D_LINE_SIZE()
+
+/** Stride with which finv instructions must be issued. */
+#define CHIP_FINV_STRIDE() CHIP_L1D_LINE_SIZE()
+
+/** Can the local cache coherently cache data that is homed elsewhere? */
+#define CHIP_HAS_COHERENT_LOCAL_CACHE() 0
+
+/** How many simultaneous outstanding victims can the L2 cache have? */
+#define CHIP_MAX_OUTSTANDING_VICTIMS() 2
+
+/** Does the TLB support the NC and NOALLOC bits? */
+#define CHIP_HAS_NC_AND_NOALLOC_BITS() 0
+
+/** Does the chip support hash-for-home caching? */
+#define CHIP_HAS_CBOX_HOME_MAP() 0
+
+/** Number of entries in the chip's home map tables. */
+/* #define CHIP_CBOX_HOME_MAP_SIZE() -- does not apply to chip 0 */
+
+/** Do uncacheable requests miss in the cache regardless of whether
+ * there is matching data? */
+#define CHIP_HAS_ENFORCED_UNCACHEABLE_REQUESTS() 0
+
+/** Does the mf instruction wait for victims? */
+#define CHIP_HAS_MF_WAITS_FOR_VICTIMS() 1
+
+/** Does the chip have an "inv" instruction that doesn't also flush? */
+#define CHIP_HAS_INV() 0
+
+/** Does the chip have a "wh64" instruction? */
+#define CHIP_HAS_WH64() 0
+
+/** Does this chip have a 'dword_align' instruction? */
+#define CHIP_HAS_DWORD_ALIGN() 0
+
+/** Number of performance counters. */
+#define CHIP_PERFORMANCE_COUNTERS() 2
+
+/** Does this chip have auxiliary performance counters? */
+#define CHIP_HAS_AUX_PERF_COUNTERS() 0
+
+/** Is the CBOX_MSR1 SPR supported? */
+#define CHIP_HAS_CBOX_MSR1() 0
+
+/** Is the TILE_RTF_HWM SPR supported? */
+#define CHIP_HAS_TILE_RTF_HWM() 0
+
+/** Is the TILE_WRITE_PENDING SPR supported? */
+#define CHIP_HAS_TILE_WRITE_PENDING() 0
+
+/** Is the PROC_STATUS SPR supported? */
+#define CHIP_HAS_PROC_STATUS_SPR() 0
+
+/** Is the DSTREAM_PF SPR supported? */
+#define CHIP_HAS_DSTREAM_PF() 0
+
+/** Log of the number of mshims we have. */
+#define CHIP_LOG_NUM_MSHIMS() 2
+
+/** Are the bases of the interrupt vector areas fixed? */
+#define CHIP_HAS_FIXED_INTVEC_BASE() 1
+
+/** Are the interrupt masks split up into 2 SPRs? */
+#define CHIP_HAS_SPLIT_INTR_MASK() 1
+
+/** Is the cycle count split up into 2 SPRs? */
+#define CHIP_HAS_SPLIT_CYCLE() 1
+
+/** Does the chip have a static network? */
+#define CHIP_HAS_SN() 1
+
+/** Does the chip have a static network processor? */
+#define CHIP_HAS_SN_PROC() 1
+
+/** Size of the L1 static network processor instruction cache, in bytes. */
+#define CHIP_L1SNI_CACHE_SIZE() 2048
+
+/** Does the chip have DMA support in each tile? */
+#define CHIP_HAS_TILE_DMA() 1
+
+/** Does the chip have the second revision of the directly accessible
+ * dynamic networks? This encapsulates a number of characteristics,
+ * including the absence of the catch-all, the absence of inline message
+ * tags, the absence of support for network context-switching, and so on.
+ */
+#define CHIP_HAS_REV1_XDN() 0
+
+/** Does the chip have cmpexch and similar (fetchadd, exch, etc.)? */
+#define CHIP_HAS_CMPEXCH() 0
+
+/** Does the chip have memory-mapped I/O support? */
+#define CHIP_HAS_MMIO() 0
+
+/** Does the chip have post-completion interrupts? */
+#define CHIP_HAS_POST_COMPLETION_INTERRUPTS() 0
+
+/** Does the chip have native single step support? */
+#define CHIP_HAS_SINGLE_STEP() 0
+
+#ifndef __OPEN_SOURCE__ /* features only relevant to hypervisor-level code */
+
+/** How many entries are present in the instruction TLB? */
+#define CHIP_ITLB_ENTRIES() 8
+
+/** How many entries are present in the data TLB? */
+#define CHIP_DTLB_ENTRIES() 16
+
+/** How many MAF entries does the XAUI shim have? */
+#define CHIP_XAUI_MAF_ENTRIES() 16
+
+/** Does the memory shim have a source-id table? */
+#define CHIP_HAS_MSHIM_SRCID_TABLE() 1
+
+/** Does the L1 instruction cache clear on reset? */
+#define CHIP_HAS_L1I_CLEAR_ON_RESET() 0
+
+/** Does the chip come out of reset with valid coordinates on all tiles?
+ * Note that if defined, this also implies that the upper left is 1,1.
+ */
+#define CHIP_HAS_VALID_TILE_COORD_RESET() 0
+
+/** Does the chip have unified packet formats? */
+#define CHIP_HAS_UNIFIED_PACKET_FORMATS() 0
+
+/** Does the chip support write reordering? */
+#define CHIP_HAS_WRITE_REORDERING() 0
+
+/** Does the chip support Y-X routing as well as X-Y? */
+#define CHIP_HAS_Y_X_ROUTING() 0
+
+/** Is INTCTRL_3 managed with the correct MPL? */
+#define CHIP_HAS_INTCTRL_3_STATUS_FIX() 0
+
+/** Is it possible to configure the chip to be big-endian? */
+#define CHIP_HAS_BIG_ENDIAN_CONFIG() 0
+
+/** Is the CACHE_RED_WAY_OVERRIDDEN SPR supported? */
+#define CHIP_HAS_CACHE_RED_WAY_OVERRIDDEN() 0
+
+/** Is the DIAG_TRACE_WAY SPR supported? */
+#define CHIP_HAS_DIAG_TRACE_WAY() 0
+
+/** Is the MEM_STRIPE_CONFIG SPR supported? */
+#define CHIP_HAS_MEM_STRIPE_CONFIG() 0
+
+/** Are the TLB_PERF SPRs supported? */
+#define CHIP_HAS_TLB_PERF() 0
+
+/** Is the VDN_SNOOP_SHIM_CTL SPR supported? */
+#define CHIP_HAS_VDN_SNOOP_SHIM_CTL() 0
+
+/** Does the chip support rev1 DMA packets? */
+#define CHIP_HAS_REV1_DMA_PACKETS() 0
+
+/** Does the chip have an IPI shim? */
+#define CHIP_HAS_IPI() 0
+
+#endif /* !__OPEN_SOURCE__ */
+#endif /* __ARCH_CHIP_H__ */
diff --git a/arch/tile/include/arch/chip_tilegx.h b/arch/tile/include/arch/chip_tilegx.h
new file mode 100644
index 00000000..ea8e4f2c
--- /dev/null
+++ b/arch/tile/include/arch/chip_tilegx.h
@@ -0,0 +1,258 @@
+/*
+ * Copyright 2011 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+/*
+ * @file
+ * Global header file.
+ * This header file specifies defines for TILE-Gx.
+ */
+
+#ifndef __ARCH_CHIP_H__
+#define __ARCH_CHIP_H__
+
+/** Specify chip version.
+ * When possible, prefer the CHIP_xxx symbols below for future-proofing.
+ * This is intended for cross-compiling; native compilation should
+ * use the predefined __tile_chip__ symbol.
+ */
+#define TILE_CHIP 10
+
+/** Specify chip revision.
+ * This provides for the case of a respin of a particular chip type;
+ * the normal value for this symbol is "0".
+ * This is intended for cross-compiling; native compilation should
+ * use the predefined __tile_chip_rev__ symbol.
+ */
+#define TILE_CHIP_REV 0
+
+/** The name of this architecture. */
+#define CHIP_ARCH_NAME "tilegx"
+
+/** The ELF e_machine type for binaries for this chip. */
+#define CHIP_ELF_TYPE() EM_TILEGX
+
+/** The alternate ELF e_machine type for binaries for this chip. */
+#define CHIP_COMPAT_ELF_TYPE() 0x2597
+
+/** What is the native word size of the machine? */
+#define CHIP_WORD_SIZE() 64
+
+/** How many bits of a virtual address are used. Extra bits must be
+ * the sign extension of the low bits.
+ */
+#define CHIP_VA_WIDTH() 42
+
+/** How many bits are in a physical address? */
+#define CHIP_PA_WIDTH() 40
+
+/** Size of the L2 cache, in bytes. */
+#define CHIP_L2_CACHE_SIZE() 262144
+
+/** Log size of an L2 cache line in bytes. */
+#define CHIP_L2_LOG_LINE_SIZE() 6
+
+/** Size of an L2 cache line, in bytes. */
+#define CHIP_L2_LINE_SIZE() (1 << CHIP_L2_LOG_LINE_SIZE())
+
+/** Associativity of the L2 cache. */
+#define CHIP_L2_ASSOC() 8
+
+/** Size of the L1 data cache, in bytes. */
+#define CHIP_L1D_CACHE_SIZE() 32768
+
+/** Log size of an L1 data cache line in bytes. */
+#define CHIP_L1D_LOG_LINE_SIZE() 6
+
+/** Size of an L1 data cache line, in bytes. */
+#define CHIP_L1D_LINE_SIZE() (1 << CHIP_L1D_LOG_LINE_SIZE())
+
+/** Associativity of the L1 data cache. */
+#define CHIP_L1D_ASSOC() 2
+
+/** Size of the L1 instruction cache, in bytes. */
+#define CHIP_L1I_CACHE_SIZE() 32768
+
+/** Log size of an L1 instruction cache line in bytes. */
+#define CHIP_L1I_LOG_LINE_SIZE() 6
+
+/** Size of an L1 instruction cache line, in bytes. */
+#define CHIP_L1I_LINE_SIZE() (1 << CHIP_L1I_LOG_LINE_SIZE())
+
+/** Associativity of the L1 instruction cache. */
+#define CHIP_L1I_ASSOC() 2
+
+/** Stride with which flush instructions must be issued. */
+#define CHIP_FLUSH_STRIDE() CHIP_L2_LINE_SIZE()
+
+/** Stride with which inv instructions must be issued. */
+#define CHIP_INV_STRIDE() CHIP_L2_LINE_SIZE()
+
+/** Stride with which finv instructions must be issued. */
+#define CHIP_FINV_STRIDE() CHIP_L2_LINE_SIZE()
+
+/** Can the local cache coherently cache data that is homed elsewhere? */
+#define CHIP_HAS_COHERENT_LOCAL_CACHE() 1
+
+/** How many simultaneous outstanding victims can the L2 cache have? */
+#define CHIP_MAX_OUTSTANDING_VICTIMS() 128
+
+/** Does the TLB support the NC and NOALLOC bits? */
+#define CHIP_HAS_NC_AND_NOALLOC_BITS() 1
+
+/** Does the chip support hash-for-home caching? */
+#define CHIP_HAS_CBOX_HOME_MAP() 1
+
+/** Number of entries in the chip's home map tables. */
+#define CHIP_CBOX_HOME_MAP_SIZE() 128
+
+/** Do uncacheable requests miss in the cache regardless of whether
+ * there is matching data? */
+#define CHIP_HAS_ENFORCED_UNCACHEABLE_REQUESTS() 1
+
+/** Does the mf instruction wait for victims? */
+#define CHIP_HAS_MF_WAITS_FOR_VICTIMS() 0
+
+/** Does the chip have an "inv" instruction that doesn't also flush? */
+#define CHIP_HAS_INV() 1
+
+/** Does the chip have a "wh64" instruction? */
+#define CHIP_HAS_WH64() 1
+
+/** Does this chip have a 'dword_align' instruction? */
+#define CHIP_HAS_DWORD_ALIGN() 0
+
+/** Number of performance counters. */
+#define CHIP_PERFORMANCE_COUNTERS() 4
+
+/** Does this chip have auxiliary performance counters? */
+#define CHIP_HAS_AUX_PERF_COUNTERS() 1
+
+/** Is the CBOX_MSR1 SPR supported? */
+#define CHIP_HAS_CBOX_MSR1() 0
+
+/** Is the TILE_RTF_HWM SPR supported? */
+#define CHIP_HAS_TILE_RTF_HWM() 1
+
+/** Is the TILE_WRITE_PENDING SPR supported? */
+#define CHIP_HAS_TILE_WRITE_PENDING() 0
+
+/** Is the PROC_STATUS SPR supported? */
+#define CHIP_HAS_PROC_STATUS_SPR() 1
+
+/** Is the DSTREAM_PF SPR supported? */
+#define CHIP_HAS_DSTREAM_PF() 1
+
+/** Log of the number of mshims we have. */
+#define CHIP_LOG_NUM_MSHIMS() 2
+
+/** Are the bases of the interrupt vector areas fixed? */
+#define CHIP_HAS_FIXED_INTVEC_BASE() 0
+
+/** Are the interrupt masks split up into 2 SPRs? */
+#define CHIP_HAS_SPLIT_INTR_MASK() 0
+
+/** Is the cycle count split up into 2 SPRs? */
+#define CHIP_HAS_SPLIT_CYCLE() 0
+
+/** Does the chip have a static network? */
+#define CHIP_HAS_SN() 0
+
+/** Does the chip have a static network processor? */
+#define CHIP_HAS_SN_PROC() 0
+
+/** Size of the L1 static network processor instruction cache, in bytes. */
+/* #define CHIP_L1SNI_CACHE_SIZE() -- does not apply to chip 10 */
+
+/** Does the chip have DMA support in each tile? */
+#define CHIP_HAS_TILE_DMA() 0
+
+/** Does the chip have the second revision of the directly accessible
+ * dynamic networks? This encapsulates a number of characteristics,
+ * including the absence of the catch-all, the absence of inline message
+ * tags, the absence of support for network context-switching, and so on.
+ */
+#define CHIP_HAS_REV1_XDN() 1
+
+/** Does the chip have cmpexch and similar (fetchadd, exch, etc.)? */
+#define CHIP_HAS_CMPEXCH() 1
+
+/** Does the chip have memory-mapped I/O support? */
+#define CHIP_HAS_MMIO() 1
+
+/** Does the chip have post-completion interrupts? */
+#define CHIP_HAS_POST_COMPLETION_INTERRUPTS() 1
+
+/** Does the chip have native single step support? */
+#define CHIP_HAS_SINGLE_STEP() 1
+
+#ifndef __OPEN_SOURCE__ /* features only relevant to hypervisor-level code */
+
+/** How many entries are present in the instruction TLB? */
+#define CHIP_ITLB_ENTRIES() 16
+
+/** How many entries are present in the data TLB? */
+#define CHIP_DTLB_ENTRIES() 32
+
+/** How many MAF entries does the XAUI shim have? */
+#define CHIP_XAUI_MAF_ENTRIES() 32
+
+/** Does the memory shim have a source-id table? */
+#define CHIP_HAS_MSHIM_SRCID_TABLE() 0
+
+/** Does the L1 instruction cache clear on reset? */
+#define CHIP_HAS_L1I_CLEAR_ON_RESET() 1
+
+/** Does the chip come out of reset with valid coordinates on all tiles?
+ * Note that if defined, this also implies that the upper left is 1,1.
+ */
+#define CHIP_HAS_VALID_TILE_COORD_RESET() 1
+
+/** Does the chip have unified packet formats? */
+#define CHIP_HAS_UNIFIED_PACKET_FORMATS() 1
+
+/** Does the chip support write reordering? */
+#define CHIP_HAS_WRITE_REORDERING() 1
+
+/** Does the chip support Y-X routing as well as X-Y? */
+#define CHIP_HAS_Y_X_ROUTING() 1
+
+/** Is INTCTRL_3 managed with the correct MPL? */
+#define CHIP_HAS_INTCTRL_3_STATUS_FIX() 1
+
+/** Is it possible to configure the chip to be big-endian? */
+#define CHIP_HAS_BIG_ENDIAN_CONFIG() 1
+
+/** Is the CACHE_RED_WAY_OVERRIDDEN SPR supported? */
+#define CHIP_HAS_CACHE_RED_WAY_OVERRIDDEN() 0
+
+/** Is the DIAG_TRACE_WAY SPR supported? */
+#define CHIP_HAS_DIAG_TRACE_WAY() 0
+
+/** Is the MEM_STRIPE_CONFIG SPR supported? */
+#define CHIP_HAS_MEM_STRIPE_CONFIG() 1
+
+/** Are the TLB_PERF SPRs supported? */
+#define CHIP_HAS_TLB_PERF() 1
+
+/** Is the VDN_SNOOP_SHIM_CTL SPR supported? */
+#define CHIP_HAS_VDN_SNOOP_SHIM_CTL() 0
+
+/** Does the chip support rev1 DMA packets? */
+#define CHIP_HAS_REV1_DMA_PACKETS() 1
+
+/** Does the chip have an IPI shim? */
+#define CHIP_HAS_IPI() 1
+
+#endif /* !__OPEN_SOURCE__ */
+#endif /* __ARCH_CHIP_H__ */
diff --git a/arch/tile/include/arch/chip_tilepro.h b/arch/tile/include/arch/chip_tilepro.h
new file mode 100644
index 00000000..70017699
--- /dev/null
+++ b/arch/tile/include/arch/chip_tilepro.h
@@ -0,0 +1,258 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+/*
+ * @file
+ * Global header file.
+ * This header file specifies defines for TILEPro.
+ */
+
+#ifndef __ARCH_CHIP_H__
+#define __ARCH_CHIP_H__
+
+/** Specify chip version.
+ * When possible, prefer the CHIP_xxx symbols below for future-proofing.
+ * This is intended for cross-compiling; native compilation should
+ * use the predefined __tile_chip__ symbol.
+ */
+#define TILE_CHIP 1
+
+/** Specify chip revision.
+ * This provides for the case of a respin of a particular chip type;
+ * the normal value for this symbol is "0".
+ * This is intended for cross-compiling; native compilation should
+ * use the predefined __tile_chip_rev__ symbol.
+ */
+#define TILE_CHIP_REV 0
+
+/** The name of this architecture. */
+#define CHIP_ARCH_NAME "tilepro"
+
+/** The ELF e_machine type for binaries for this chip. */
+#define CHIP_ELF_TYPE() EM_TILEPRO
+
+/** The alternate ELF e_machine type for binaries for this chip. */
+#define CHIP_COMPAT_ELF_TYPE() 0x2507
+
+/** What is the native word size of the machine? */
+#define CHIP_WORD_SIZE() 32
+
+/** How many bits of a virtual address are used. Extra bits must be
+ * the sign extension of the low bits.
+ */
+#define CHIP_VA_WIDTH() 32
+
+/** How many bits are in a physical address? */
+#define CHIP_PA_WIDTH() 36
+
+/** Size of the L2 cache, in bytes. */
+#define CHIP_L2_CACHE_SIZE() 65536
+
+/** Log size of an L2 cache line in bytes. */
+#define CHIP_L2_LOG_LINE_SIZE() 6
+
+/** Size of an L2 cache line, in bytes. */
+#define CHIP_L2_LINE_SIZE() (1 << CHIP_L2_LOG_LINE_SIZE())
+
+/** Associativity of the L2 cache. */
+#define CHIP_L2_ASSOC() 4
+
+/** Size of the L1 data cache, in bytes. */
+#define CHIP_L1D_CACHE_SIZE() 8192
+
+/** Log size of an L1 data cache line in bytes. */
+#define CHIP_L1D_LOG_LINE_SIZE() 4
+
+/** Size of an L1 data cache line, in bytes. */
+#define CHIP_L1D_LINE_SIZE() (1 << CHIP_L1D_LOG_LINE_SIZE())
+
+/** Associativity of the L1 data cache. */
+#define CHIP_L1D_ASSOC() 2
+
+/** Size of the L1 instruction cache, in bytes. */
+#define CHIP_L1I_CACHE_SIZE() 16384
+
+/** Log size of an L1 instruction cache line in bytes. */
+#define CHIP_L1I_LOG_LINE_SIZE() 6
+
+/** Size of an L1 instruction cache line, in bytes. */
+#define CHIP_L1I_LINE_SIZE() (1 << CHIP_L1I_LOG_LINE_SIZE())
+
+/** Associativity of the L1 instruction cache. */
+#define CHIP_L1I_ASSOC() 1
+
+/** Stride with which flush instructions must be issued. */
+#define CHIP_FLUSH_STRIDE() CHIP_L2_LINE_SIZE()
+
+/** Stride with which inv instructions must be issued. */
+#define CHIP_INV_STRIDE() CHIP_L2_LINE_SIZE()
+
+/** Stride with which finv instructions must be issued. */
+#define CHIP_FINV_STRIDE() CHIP_L2_LINE_SIZE()
+
+/** Can the local cache coherently cache data that is homed elsewhere? */
+#define CHIP_HAS_COHERENT_LOCAL_CACHE() 1
+
+/** How many simultaneous outstanding victims can the L2 cache have? */
+#define CHIP_MAX_OUTSTANDING_VICTIMS() 4
+
+/** Does the TLB support the NC and NOALLOC bits? */
+#define CHIP_HAS_NC_AND_NOALLOC_BITS() 1
+
+/** Does the chip support hash-for-home caching? */
+#define CHIP_HAS_CBOX_HOME_MAP() 1
+
+/** Number of entries in the chip's home map tables. */
+#define CHIP_CBOX_HOME_MAP_SIZE() 64
+
+/** Do uncacheable requests miss in the cache regardless of whether
+ * there is matching data? */
+#define CHIP_HAS_ENFORCED_UNCACHEABLE_REQUESTS() 1
+
+/** Does the mf instruction wait for victims? */
+#define CHIP_HAS_MF_WAITS_FOR_VICTIMS() 0
+
+/** Does the chip have an "inv" instruction that doesn't also flush? */
+#define CHIP_HAS_INV() 1
+
+/** Does the chip have a "wh64" instruction? */
+#define CHIP_HAS_WH64() 1
+
+/** Does this chip have a 'dword_align' instruction? */
+#define CHIP_HAS_DWORD_ALIGN() 1
+
+/** Number of performance counters. */
+#define CHIP_PERFORMANCE_COUNTERS() 4
+
+/** Does this chip have auxiliary performance counters? */
+#define CHIP_HAS_AUX_PERF_COUNTERS() 1
+
+/** Is the CBOX_MSR1 SPR supported? */
+#define CHIP_HAS_CBOX_MSR1() 1
+
+/** Is the TILE_RTF_HWM SPR supported? */
+#define CHIP_HAS_TILE_RTF_HWM() 1
+
+/** Is the TILE_WRITE_PENDING SPR supported? */
+#define CHIP_HAS_TILE_WRITE_PENDING() 1
+
+/** Is the PROC_STATUS SPR supported? */
+#define CHIP_HAS_PROC_STATUS_SPR() 1
+
+/** Is the DSTREAM_PF SPR supported? */
+#define CHIP_HAS_DSTREAM_PF() 0
+
+/** Log of the number of mshims we have. */
+#define CHIP_LOG_NUM_MSHIMS() 2
+
+/** Are the bases of the interrupt vector areas fixed? */
+#define CHIP_HAS_FIXED_INTVEC_BASE() 1
+
+/** Are the interrupt masks split up into 2 SPRs? */
+#define CHIP_HAS_SPLIT_INTR_MASK() 1
+
+/** Is the cycle count split up into 2 SPRs? */
+#define CHIP_HAS_SPLIT_CYCLE() 1
+
+/** Does the chip have a static network? */
+#define CHIP_HAS_SN() 1
+
+/** Does the chip have a static network processor? */
+#define CHIP_HAS_SN_PROC() 0
+
+/** Size of the L1 static network processor instruction cache, in bytes. */
+/* #define CHIP_L1SNI_CACHE_SIZE() -- does not apply to chip 1 */
+
+/** Does the chip have DMA support in each tile? */
+#define CHIP_HAS_TILE_DMA() 1
+
+/** Does the chip have the second revision of the directly accessible
+ * dynamic networks? This encapsulates a number of characteristics,
+ * including the absence of the catch-all, the absence of inline message
+ * tags, the absence of support for network context-switching, and so on.
+ */
+#define CHIP_HAS_REV1_XDN() 0
+
+/** Does the chip have cmpexch and similar (fetchadd, exch, etc.)? */
+#define CHIP_HAS_CMPEXCH() 0
+
+/** Does the chip have memory-mapped I/O support? */
+#define CHIP_HAS_MMIO() 0
+
+/** Does the chip have post-completion interrupts? */
+#define CHIP_HAS_POST_COMPLETION_INTERRUPTS() 0
+
+/** Does the chip have native single step support? */
+#define CHIP_HAS_SINGLE_STEP() 0
+
+#ifndef __OPEN_SOURCE__ /* features only relevant to hypervisor-level code */
+
+/** How many entries are present in the instruction TLB? */
+#define CHIP_ITLB_ENTRIES() 16
+
+/** How many entries are present in the data TLB? */
+#define CHIP_DTLB_ENTRIES() 16
+
+/** How many MAF entries does the XAUI shim have? */
+#define CHIP_XAUI_MAF_ENTRIES() 32
+
+/** Does the memory shim have a source-id table? */
+#define CHIP_HAS_MSHIM_SRCID_TABLE() 0
+
+/** Does the L1 instruction cache clear on reset? */
+#define CHIP_HAS_L1I_CLEAR_ON_RESET() 1
+
+/** Does the chip come out of reset with valid coordinates on all tiles?
+ * Note that if defined, this also implies that the upper left is 1,1.
+ */
+#define CHIP_HAS_VALID_TILE_COORD_RESET() 1
+
+/** Does the chip have unified packet formats? */
+#define CHIP_HAS_UNIFIED_PACKET_FORMATS() 1
+
+/** Does the chip support write reordering? */
+#define CHIP_HAS_WRITE_REORDERING() 1
+
+/** Does the chip support Y-X routing as well as X-Y? */
+#define CHIP_HAS_Y_X_ROUTING() 1
+
+/** Is INTCTRL_3 managed with the correct MPL? */
+#define CHIP_HAS_INTCTRL_3_STATUS_FIX() 1
+
+/** Is it possible to configure the chip to be big-endian? */
+#define CHIP_HAS_BIG_ENDIAN_CONFIG() 1
+
+/** Is the CACHE_RED_WAY_OVERRIDDEN SPR supported? */
+#define CHIP_HAS_CACHE_RED_WAY_OVERRIDDEN() 1
+
+/** Is the DIAG_TRACE_WAY SPR supported? */
+#define CHIP_HAS_DIAG_TRACE_WAY() 1
+
+/** Is the MEM_STRIPE_CONFIG SPR supported? */
+#define CHIP_HAS_MEM_STRIPE_CONFIG() 1
+
+/** Are the TLB_PERF SPRs supported? */
+#define CHIP_HAS_TLB_PERF() 1
+
+/** Is the VDN_SNOOP_SHIM_CTL SPR supported? */
+#define CHIP_HAS_VDN_SNOOP_SHIM_CTL() 1
+
+/** Does the chip support rev1 DMA packets? */
+#define CHIP_HAS_REV1_DMA_PACKETS() 1
+
+/** Does the chip have an IPI shim? */
+#define CHIP_HAS_IPI() 0
+
+#endif /* !__OPEN_SOURCE__ */
+#endif /* __ARCH_CHIP_H__ */
diff --git a/arch/tile/include/arch/icache.h b/arch/tile/include/arch/icache.h
new file mode 100644
index 00000000..762eafa8
--- /dev/null
+++ b/arch/tile/include/arch/icache.h
@@ -0,0 +1,93 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ *
+ */
+
+/**
+ * @file
+ *
+ * Support for invalidating bytes in the instruction cache.
+ */
+
+#ifndef __ARCH_ICACHE_H__
+#define __ARCH_ICACHE_H__
+
+#include <arch/chip.h>
+
+
+/**
+ * Invalidate the instruction cache for the given range of memory.
+ *
+ * @param addr The start of memory to be invalidated.
+ * @param size The number of bytes to be invalidated.
+ * @param page_size The system's page size, e.g. getpagesize() in userspace.
+ * This value must be a power of two no larger than the page containing
+ * the code to be invalidated. If the value is smaller than the actual page
+ * size, this function will still work, but may run slower than necessary.
+ */
+static __inline void
+invalidate_icache(const void* addr, unsigned long size,
+ unsigned long page_size)
+{
+ const unsigned long cache_way_size =
+ CHIP_L1I_CACHE_SIZE() / CHIP_L1I_ASSOC();
+ unsigned long max_useful_size;
+ const char* start, *end;
+ long num_passes;
+
+ if (__builtin_expect(size == 0, 0))
+ return;
+
+#ifdef __tilegx__
+ /* Limit the number of bytes visited to avoid redundant iterations. */
+ max_useful_size = (page_size < cache_way_size) ? page_size : cache_way_size;
+
+ /* No PA aliasing is possible, so one pass always suffices. */
+ num_passes = 1;
+#else
+ /* Limit the number of bytes visited to avoid redundant iterations. */
+ max_useful_size = cache_way_size;
+
+ /*
+ * Compute how many passes we need (we'll treat 0 as if it were 1).
+ * This works because we know the page size is a power of two.
+ */
+ num_passes = cache_way_size >> __builtin_ctzl(page_size);
+#endif
+
+ if (__builtin_expect(size > max_useful_size, 0))
+ size = max_useful_size;
+
+ /* Locate the first and last bytes to be invalidated. */
+ start = (const char *)((unsigned long)addr & -CHIP_L1I_LINE_SIZE());
+ end = (const char*)addr + size - 1;
+
+ __insn_mf();
+
+ do
+ {
+ const char* p;
+
+ for (p = start; p <= end; p += CHIP_L1I_LINE_SIZE())
+ __insn_icoh(p);
+
+ start += page_size;
+ end += page_size;
+ }
+ while (--num_passes > 0);
+
+ __insn_drain();
+}
+
+
+#endif /* __ARCH_ICACHE_H__ */
diff --git a/arch/tile/include/arch/interrupts.h b/arch/tile/include/arch/interrupts.h
new file mode 100644
index 00000000..20f8f07d
--- /dev/null
+++ b/arch/tile/include/arch/interrupts.h
@@ -0,0 +1,19 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#ifdef __tilegx__
+#include <arch/interrupts_64.h>
+#else
+#include <arch/interrupts_32.h>
+#endif
diff --git a/arch/tile/include/arch/interrupts_32.h b/arch/tile/include/arch/interrupts_32.h
new file mode 100644
index 00000000..96b57105
--- /dev/null
+++ b/arch/tile/include/arch/interrupts_32.h
@@ -0,0 +1,307 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef __ARCH_INTERRUPTS_H__
+#define __ARCH_INTERRUPTS_H__
+
+/** Mask for an interrupt. */
+/* Note: must handle breaking interrupts into high and low words manually. */
+#define INT_MASK_LO(intno) (1 << (intno))
+#define INT_MASK_HI(intno) (1 << ((intno) - 32))
+
+#ifndef __ASSEMBLER__
+#define INT_MASK(intno) (1ULL << (intno))
+#endif
+
+
+/** Where a given interrupt executes */
+#define INTERRUPT_VECTOR(i, pl) (0xFC000000 + ((pl) << 24) + ((i) << 8))
+
+/** Where to store a vector for a given interrupt. */
+#define USER_INTERRUPT_VECTOR(i) INTERRUPT_VECTOR(i, 0)
+
+/** The base address of user-level interrupts. */
+#define USER_INTERRUPT_VECTOR_BASE INTERRUPT_VECTOR(0, 0)
+
+
+/** Additional synthetic interrupt. */
+#define INT_BREAKPOINT (63)
+
+#define INT_ITLB_MISS 0
+#define INT_MEM_ERROR 1
+#define INT_ILL 2
+#define INT_GPV 3
+#define INT_SN_ACCESS 4
+#define INT_IDN_ACCESS 5
+#define INT_UDN_ACCESS 6
+#define INT_IDN_REFILL 7
+#define INT_UDN_REFILL 8
+#define INT_IDN_COMPLETE 9
+#define INT_UDN_COMPLETE 10
+#define INT_SWINT_3 11
+#define INT_SWINT_2 12
+#define INT_SWINT_1 13
+#define INT_SWINT_0 14
+#define INT_UNALIGN_DATA 15
+#define INT_DTLB_MISS 16
+#define INT_DTLB_ACCESS 17
+#define INT_DMATLB_MISS 18
+#define INT_DMATLB_ACCESS 19
+#define INT_SNITLB_MISS 20
+#define INT_SN_NOTIFY 21
+#define INT_SN_FIREWALL 22
+#define INT_IDN_FIREWALL 23
+#define INT_UDN_FIREWALL 24
+#define INT_TILE_TIMER 25
+#define INT_IDN_TIMER 26
+#define INT_UDN_TIMER 27
+#define INT_DMA_NOTIFY 28
+#define INT_IDN_CA 29
+#define INT_UDN_CA 30
+#define INT_IDN_AVAIL 31
+#define INT_UDN_AVAIL 32
+#define INT_PERF_COUNT 33
+#define INT_INTCTRL_3 34
+#define INT_INTCTRL_2 35
+#define INT_INTCTRL_1 36
+#define INT_INTCTRL_0 37
+#define INT_BOOT_ACCESS 38
+#define INT_WORLD_ACCESS 39
+#define INT_I_ASID 40
+#define INT_D_ASID 41
+#define INT_DMA_ASID 42
+#define INT_SNI_ASID 43
+#define INT_DMA_CPL 44
+#define INT_SN_CPL 45
+#define INT_DOUBLE_FAULT 46
+#define INT_SN_STATIC_ACCESS 47
+#define INT_AUX_PERF_COUNT 48
+
+#define NUM_INTERRUPTS 49
+
+#ifndef __ASSEMBLER__
+#define QUEUED_INTERRUPTS ( \
+ INT_MASK(INT_MEM_ERROR) | \
+ INT_MASK(INT_DMATLB_MISS) | \
+ INT_MASK(INT_DMATLB_ACCESS) | \
+ INT_MASK(INT_SNITLB_MISS) | \
+ INT_MASK(INT_SN_NOTIFY) | \
+ INT_MASK(INT_SN_FIREWALL) | \
+ INT_MASK(INT_IDN_FIREWALL) | \
+ INT_MASK(INT_UDN_FIREWALL) | \
+ INT_MASK(INT_TILE_TIMER) | \
+ INT_MASK(INT_IDN_TIMER) | \
+ INT_MASK(INT_UDN_TIMER) | \
+ INT_MASK(INT_DMA_NOTIFY) | \
+ INT_MASK(INT_IDN_CA) | \
+ INT_MASK(INT_UDN_CA) | \
+ INT_MASK(INT_IDN_AVAIL) | \
+ INT_MASK(INT_UDN_AVAIL) | \
+ INT_MASK(INT_PERF_COUNT) | \
+ INT_MASK(INT_INTCTRL_3) | \
+ INT_MASK(INT_INTCTRL_2) | \
+ INT_MASK(INT_INTCTRL_1) | \
+ INT_MASK(INT_INTCTRL_0) | \
+ INT_MASK(INT_BOOT_ACCESS) | \
+ INT_MASK(INT_WORLD_ACCESS) | \
+ INT_MASK(INT_I_ASID) | \
+ INT_MASK(INT_D_ASID) | \
+ INT_MASK(INT_DMA_ASID) | \
+ INT_MASK(INT_SNI_ASID) | \
+ INT_MASK(INT_DMA_CPL) | \
+ INT_MASK(INT_SN_CPL) | \
+ INT_MASK(INT_DOUBLE_FAULT) | \
+ INT_MASK(INT_AUX_PERF_COUNT) | \
+ 0)
+#define NONQUEUED_INTERRUPTS ( \
+ INT_MASK(INT_ITLB_MISS) | \
+ INT_MASK(INT_ILL) | \
+ INT_MASK(INT_GPV) | \
+ INT_MASK(INT_SN_ACCESS) | \
+ INT_MASK(INT_IDN_ACCESS) | \
+ INT_MASK(INT_UDN_ACCESS) | \
+ INT_MASK(INT_IDN_REFILL) | \
+ INT_MASK(INT_UDN_REFILL) | \
+ INT_MASK(INT_IDN_COMPLETE) | \
+ INT_MASK(INT_UDN_COMPLETE) | \
+ INT_MASK(INT_SWINT_3) | \
+ INT_MASK(INT_SWINT_2) | \
+ INT_MASK(INT_SWINT_1) | \
+ INT_MASK(INT_SWINT_0) | \
+ INT_MASK(INT_UNALIGN_DATA) | \
+ INT_MASK(INT_DTLB_MISS) | \
+ INT_MASK(INT_DTLB_ACCESS) | \
+ INT_MASK(INT_SN_STATIC_ACCESS) | \
+ 0)
+#define CRITICAL_MASKED_INTERRUPTS ( \
+ INT_MASK(INT_MEM_ERROR) | \
+ INT_MASK(INT_DMATLB_MISS) | \
+ INT_MASK(INT_DMATLB_ACCESS) | \
+ INT_MASK(INT_SNITLB_MISS) | \
+ INT_MASK(INT_SN_NOTIFY) | \
+ INT_MASK(INT_SN_FIREWALL) | \
+ INT_MASK(INT_IDN_FIREWALL) | \
+ INT_MASK(INT_UDN_FIREWALL) | \
+ INT_MASK(INT_TILE_TIMER) | \
+ INT_MASK(INT_IDN_TIMER) | \
+ INT_MASK(INT_UDN_TIMER) | \
+ INT_MASK(INT_DMA_NOTIFY) | \
+ INT_MASK(INT_IDN_CA) | \
+ INT_MASK(INT_UDN_CA) | \
+ INT_MASK(INT_IDN_AVAIL) | \
+ INT_MASK(INT_UDN_AVAIL) | \
+ INT_MASK(INT_PERF_COUNT) | \
+ INT_MASK(INT_INTCTRL_3) | \
+ INT_MASK(INT_INTCTRL_2) | \
+ INT_MASK(INT_INTCTRL_1) | \
+ INT_MASK(INT_INTCTRL_0) | \
+ INT_MASK(INT_AUX_PERF_COUNT) | \
+ 0)
+#define CRITICAL_UNMASKED_INTERRUPTS ( \
+ INT_MASK(INT_ITLB_MISS) | \
+ INT_MASK(INT_ILL) | \
+ INT_MASK(INT_GPV) | \
+ INT_MASK(INT_SN_ACCESS) | \
+ INT_MASK(INT_IDN_ACCESS) | \
+ INT_MASK(INT_UDN_ACCESS) | \
+ INT_MASK(INT_IDN_REFILL) | \
+ INT_MASK(INT_UDN_REFILL) | \
+ INT_MASK(INT_IDN_COMPLETE) | \
+ INT_MASK(INT_UDN_COMPLETE) | \
+ INT_MASK(INT_SWINT_3) | \
+ INT_MASK(INT_SWINT_2) | \
+ INT_MASK(INT_SWINT_1) | \
+ INT_MASK(INT_SWINT_0) | \
+ INT_MASK(INT_UNALIGN_DATA) | \
+ INT_MASK(INT_DTLB_MISS) | \
+ INT_MASK(INT_DTLB_ACCESS) | \
+ INT_MASK(INT_BOOT_ACCESS) | \
+ INT_MASK(INT_WORLD_ACCESS) | \
+ INT_MASK(INT_I_ASID) | \
+ INT_MASK(INT_D_ASID) | \
+ INT_MASK(INT_DMA_ASID) | \
+ INT_MASK(INT_SNI_ASID) | \
+ INT_MASK(INT_DMA_CPL) | \
+ INT_MASK(INT_SN_CPL) | \
+ INT_MASK(INT_DOUBLE_FAULT) | \
+ INT_MASK(INT_SN_STATIC_ACCESS) | \
+ 0)
+#define MASKABLE_INTERRUPTS ( \
+ INT_MASK(INT_MEM_ERROR) | \
+ INT_MASK(INT_IDN_REFILL) | \
+ INT_MASK(INT_UDN_REFILL) | \
+ INT_MASK(INT_IDN_COMPLETE) | \
+ INT_MASK(INT_UDN_COMPLETE) | \
+ INT_MASK(INT_DMATLB_MISS) | \
+ INT_MASK(INT_DMATLB_ACCESS) | \
+ INT_MASK(INT_SNITLB_MISS) | \
+ INT_MASK(INT_SN_NOTIFY) | \
+ INT_MASK(INT_SN_FIREWALL) | \
+ INT_MASK(INT_IDN_FIREWALL) | \
+ INT_MASK(INT_UDN_FIREWALL) | \
+ INT_MASK(INT_TILE_TIMER) | \
+ INT_MASK(INT_IDN_TIMER) | \
+ INT_MASK(INT_UDN_TIMER) | \
+ INT_MASK(INT_DMA_NOTIFY) | \
+ INT_MASK(INT_IDN_CA) | \
+ INT_MASK(INT_UDN_CA) | \
+ INT_MASK(INT_IDN_AVAIL) | \
+ INT_MASK(INT_UDN_AVAIL) | \
+ INT_MASK(INT_PERF_COUNT) | \
+ INT_MASK(INT_INTCTRL_3) | \
+ INT_MASK(INT_INTCTRL_2) | \
+ INT_MASK(INT_INTCTRL_1) | \
+ INT_MASK(INT_INTCTRL_0) | \
+ INT_MASK(INT_AUX_PERF_COUNT) | \
+ 0)
+#define UNMASKABLE_INTERRUPTS ( \
+ INT_MASK(INT_ITLB_MISS) | \
+ INT_MASK(INT_ILL) | \
+ INT_MASK(INT_GPV) | \
+ INT_MASK(INT_SN_ACCESS) | \
+ INT_MASK(INT_IDN_ACCESS) | \
+ INT_MASK(INT_UDN_ACCESS) | \
+ INT_MASK(INT_SWINT_3) | \
+ INT_MASK(INT_SWINT_2) | \
+ INT_MASK(INT_SWINT_1) | \
+ INT_MASK(INT_SWINT_0) | \
+ INT_MASK(INT_UNALIGN_DATA) | \
+ INT_MASK(INT_DTLB_MISS) | \
+ INT_MASK(INT_DTLB_ACCESS) | \
+ INT_MASK(INT_BOOT_ACCESS) | \
+ INT_MASK(INT_WORLD_ACCESS) | \
+ INT_MASK(INT_I_ASID) | \
+ INT_MASK(INT_D_ASID) | \
+ INT_MASK(INT_DMA_ASID) | \
+ INT_MASK(INT_SNI_ASID) | \
+ INT_MASK(INT_DMA_CPL) | \
+ INT_MASK(INT_SN_CPL) | \
+ INT_MASK(INT_DOUBLE_FAULT) | \
+ INT_MASK(INT_SN_STATIC_ACCESS) | \
+ 0)
+#define SYNC_INTERRUPTS ( \
+ INT_MASK(INT_ITLB_MISS) | \
+ INT_MASK(INT_ILL) | \
+ INT_MASK(INT_GPV) | \
+ INT_MASK(INT_SN_ACCESS) | \
+ INT_MASK(INT_IDN_ACCESS) | \
+ INT_MASK(INT_UDN_ACCESS) | \
+ INT_MASK(INT_IDN_REFILL) | \
+ INT_MASK(INT_UDN_REFILL) | \
+ INT_MASK(INT_IDN_COMPLETE) | \
+ INT_MASK(INT_UDN_COMPLETE) | \
+ INT_MASK(INT_SWINT_3) | \
+ INT_MASK(INT_SWINT_2) | \
+ INT_MASK(INT_SWINT_1) | \
+ INT_MASK(INT_SWINT_0) | \
+ INT_MASK(INT_UNALIGN_DATA) | \
+ INT_MASK(INT_DTLB_MISS) | \
+ INT_MASK(INT_DTLB_ACCESS) | \
+ INT_MASK(INT_SN_STATIC_ACCESS) | \
+ 0)
+#define NON_SYNC_INTERRUPTS ( \
+ INT_MASK(INT_MEM_ERROR) | \
+ INT_MASK(INT_DMATLB_MISS) | \
+ INT_MASK(INT_DMATLB_ACCESS) | \
+ INT_MASK(INT_SNITLB_MISS) | \
+ INT_MASK(INT_SN_NOTIFY) | \
+ INT_MASK(INT_SN_FIREWALL) | \
+ INT_MASK(INT_IDN_FIREWALL) | \
+ INT_MASK(INT_UDN_FIREWALL) | \
+ INT_MASK(INT_TILE_TIMER) | \
+ INT_MASK(INT_IDN_TIMER) | \
+ INT_MASK(INT_UDN_TIMER) | \
+ INT_MASK(INT_DMA_NOTIFY) | \
+ INT_MASK(INT_IDN_CA) | \
+ INT_MASK(INT_UDN_CA) | \
+ INT_MASK(INT_IDN_AVAIL) | \
+ INT_MASK(INT_UDN_AVAIL) | \
+ INT_MASK(INT_PERF_COUNT) | \
+ INT_MASK(INT_INTCTRL_3) | \
+ INT_MASK(INT_INTCTRL_2) | \
+ INT_MASK(INT_INTCTRL_1) | \
+ INT_MASK(INT_INTCTRL_0) | \
+ INT_MASK(INT_BOOT_ACCESS) | \
+ INT_MASK(INT_WORLD_ACCESS) | \
+ INT_MASK(INT_I_ASID) | \
+ INT_MASK(INT_D_ASID) | \
+ INT_MASK(INT_DMA_ASID) | \
+ INT_MASK(INT_SNI_ASID) | \
+ INT_MASK(INT_DMA_CPL) | \
+ INT_MASK(INT_SN_CPL) | \
+ INT_MASK(INT_DOUBLE_FAULT) | \
+ INT_MASK(INT_AUX_PERF_COUNT) | \
+ 0)
+#endif /* !__ASSEMBLER__ */
+#endif /* !__ARCH_INTERRUPTS_H__ */
diff --git a/arch/tile/include/arch/interrupts_64.h b/arch/tile/include/arch/interrupts_64.h
new file mode 100644
index 00000000..5bb58b2e
--- /dev/null
+++ b/arch/tile/include/arch/interrupts_64.h
@@ -0,0 +1,276 @@
+/*
+ * Copyright 2011 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef __ARCH_INTERRUPTS_H__
+#define __ARCH_INTERRUPTS_H__
+
+/** Mask for an interrupt. */
+#ifdef __ASSEMBLER__
+/* Note: must handle breaking interrupts into high and low words manually. */
+#define INT_MASK(intno) (1 << (intno))
+#else
+#define INT_MASK(intno) (1ULL << (intno))
+#endif
+
+
+/** Where a given interrupt executes */
+#define INTERRUPT_VECTOR(i, pl) (0xFC000000 + ((pl) << 24) + ((i) << 8))
+
+/** Where to store a vector for a given interrupt. */
+#define USER_INTERRUPT_VECTOR(i) INTERRUPT_VECTOR(i, 0)
+
+/** The base address of user-level interrupts. */
+#define USER_INTERRUPT_VECTOR_BASE INTERRUPT_VECTOR(0, 0)
+
+
+/** Additional synthetic interrupt. */
+#define INT_BREAKPOINT (63)
+
+#define INT_MEM_ERROR 0
+#define INT_SINGLE_STEP_3 1
+#define INT_SINGLE_STEP_2 2
+#define INT_SINGLE_STEP_1 3
+#define INT_SINGLE_STEP_0 4
+#define INT_IDN_COMPLETE 5
+#define INT_UDN_COMPLETE 6
+#define INT_ITLB_MISS 7
+#define INT_ILL 8
+#define INT_GPV 9
+#define INT_IDN_ACCESS 10
+#define INT_UDN_ACCESS 11
+#define INT_SWINT_3 12
+#define INT_SWINT_2 13
+#define INT_SWINT_1 14
+#define INT_SWINT_0 15
+#define INT_ILL_TRANS 16
+#define INT_UNALIGN_DATA 17
+#define INT_DTLB_MISS 18
+#define INT_DTLB_ACCESS 19
+#define INT_IDN_FIREWALL 20
+#define INT_UDN_FIREWALL 21
+#define INT_TILE_TIMER 22
+#define INT_AUX_TILE_TIMER 23
+#define INT_IDN_TIMER 24
+#define INT_UDN_TIMER 25
+#define INT_IDN_AVAIL 26
+#define INT_UDN_AVAIL 27
+#define INT_IPI_3 28
+#define INT_IPI_2 29
+#define INT_IPI_1 30
+#define INT_IPI_0 31
+#define INT_PERF_COUNT 32
+#define INT_AUX_PERF_COUNT 33
+#define INT_INTCTRL_3 34
+#define INT_INTCTRL_2 35
+#define INT_INTCTRL_1 36
+#define INT_INTCTRL_0 37
+#define INT_BOOT_ACCESS 38
+#define INT_WORLD_ACCESS 39
+#define INT_I_ASID 40
+#define INT_D_ASID 41
+#define INT_DOUBLE_FAULT 42
+
+#define NUM_INTERRUPTS 43
+
+#ifndef __ASSEMBLER__
+#define QUEUED_INTERRUPTS ( \
+ INT_MASK(INT_MEM_ERROR) | \
+ INT_MASK(INT_IDN_COMPLETE) | \
+ INT_MASK(INT_UDN_COMPLETE) | \
+ INT_MASK(INT_IDN_FIREWALL) | \
+ INT_MASK(INT_UDN_FIREWALL) | \
+ INT_MASK(INT_TILE_TIMER) | \
+ INT_MASK(INT_AUX_TILE_TIMER) | \
+ INT_MASK(INT_IDN_TIMER) | \
+ INT_MASK(INT_UDN_TIMER) | \
+ INT_MASK(INT_IDN_AVAIL) | \
+ INT_MASK(INT_UDN_AVAIL) | \
+ INT_MASK(INT_IPI_3) | \
+ INT_MASK(INT_IPI_2) | \
+ INT_MASK(INT_IPI_1) | \
+ INT_MASK(INT_IPI_0) | \
+ INT_MASK(INT_PERF_COUNT) | \
+ INT_MASK(INT_AUX_PERF_COUNT) | \
+ INT_MASK(INT_INTCTRL_3) | \
+ INT_MASK(INT_INTCTRL_2) | \
+ INT_MASK(INT_INTCTRL_1) | \
+ INT_MASK(INT_INTCTRL_0) | \
+ INT_MASK(INT_BOOT_ACCESS) | \
+ INT_MASK(INT_WORLD_ACCESS) | \
+ INT_MASK(INT_I_ASID) | \
+ INT_MASK(INT_D_ASID) | \
+ INT_MASK(INT_DOUBLE_FAULT) | \
+ 0)
+#define NONQUEUED_INTERRUPTS ( \
+ INT_MASK(INT_SINGLE_STEP_3) | \
+ INT_MASK(INT_SINGLE_STEP_2) | \
+ INT_MASK(INT_SINGLE_STEP_1) | \
+ INT_MASK(INT_SINGLE_STEP_0) | \
+ INT_MASK(INT_ITLB_MISS) | \
+ INT_MASK(INT_ILL) | \
+ INT_MASK(INT_GPV) | \
+ INT_MASK(INT_IDN_ACCESS) | \
+ INT_MASK(INT_UDN_ACCESS) | \
+ INT_MASK(INT_SWINT_3) | \
+ INT_MASK(INT_SWINT_2) | \
+ INT_MASK(INT_SWINT_1) | \
+ INT_MASK(INT_SWINT_0) | \
+ INT_MASK(INT_ILL_TRANS) | \
+ INT_MASK(INT_UNALIGN_DATA) | \
+ INT_MASK(INT_DTLB_MISS) | \
+ INT_MASK(INT_DTLB_ACCESS) | \
+ 0)
+#define CRITICAL_MASKED_INTERRUPTS ( \
+ INT_MASK(INT_MEM_ERROR) | \
+ INT_MASK(INT_SINGLE_STEP_3) | \
+ INT_MASK(INT_SINGLE_STEP_2) | \
+ INT_MASK(INT_SINGLE_STEP_1) | \
+ INT_MASK(INT_SINGLE_STEP_0) | \
+ INT_MASK(INT_IDN_COMPLETE) | \
+ INT_MASK(INT_UDN_COMPLETE) | \
+ INT_MASK(INT_IDN_FIREWALL) | \
+ INT_MASK(INT_UDN_FIREWALL) | \
+ INT_MASK(INT_TILE_TIMER) | \
+ INT_MASK(INT_AUX_TILE_TIMER) | \
+ INT_MASK(INT_IDN_TIMER) | \
+ INT_MASK(INT_UDN_TIMER) | \
+ INT_MASK(INT_IDN_AVAIL) | \
+ INT_MASK(INT_UDN_AVAIL) | \
+ INT_MASK(INT_IPI_3) | \
+ INT_MASK(INT_IPI_2) | \
+ INT_MASK(INT_IPI_1) | \
+ INT_MASK(INT_IPI_0) | \
+ INT_MASK(INT_PERF_COUNT) | \
+ INT_MASK(INT_AUX_PERF_COUNT) | \
+ INT_MASK(INT_INTCTRL_3) | \
+ INT_MASK(INT_INTCTRL_2) | \
+ INT_MASK(INT_INTCTRL_1) | \
+ INT_MASK(INT_INTCTRL_0) | \
+ 0)
+#define CRITICAL_UNMASKED_INTERRUPTS ( \
+ INT_MASK(INT_ITLB_MISS) | \
+ INT_MASK(INT_ILL) | \
+ INT_MASK(INT_GPV) | \
+ INT_MASK(INT_IDN_ACCESS) | \
+ INT_MASK(INT_UDN_ACCESS) | \
+ INT_MASK(INT_SWINT_3) | \
+ INT_MASK(INT_SWINT_2) | \
+ INT_MASK(INT_SWINT_1) | \
+ INT_MASK(INT_SWINT_0) | \
+ INT_MASK(INT_ILL_TRANS) | \
+ INT_MASK(INT_UNALIGN_DATA) | \
+ INT_MASK(INT_DTLB_MISS) | \
+ INT_MASK(INT_DTLB_ACCESS) | \
+ INT_MASK(INT_BOOT_ACCESS) | \
+ INT_MASK(INT_WORLD_ACCESS) | \
+ INT_MASK(INT_I_ASID) | \
+ INT_MASK(INT_D_ASID) | \
+ INT_MASK(INT_DOUBLE_FAULT) | \
+ 0)
+#define MASKABLE_INTERRUPTS ( \
+ INT_MASK(INT_MEM_ERROR) | \
+ INT_MASK(INT_SINGLE_STEP_3) | \
+ INT_MASK(INT_SINGLE_STEP_2) | \
+ INT_MASK(INT_SINGLE_STEP_1) | \
+ INT_MASK(INT_SINGLE_STEP_0) | \
+ INT_MASK(INT_IDN_COMPLETE) | \
+ INT_MASK(INT_UDN_COMPLETE) | \
+ INT_MASK(INT_IDN_FIREWALL) | \
+ INT_MASK(INT_UDN_FIREWALL) | \
+ INT_MASK(INT_TILE_TIMER) | \
+ INT_MASK(INT_AUX_TILE_TIMER) | \
+ INT_MASK(INT_IDN_TIMER) | \
+ INT_MASK(INT_UDN_TIMER) | \
+ INT_MASK(INT_IDN_AVAIL) | \
+ INT_MASK(INT_UDN_AVAIL) | \
+ INT_MASK(INT_IPI_3) | \
+ INT_MASK(INT_IPI_2) | \
+ INT_MASK(INT_IPI_1) | \
+ INT_MASK(INT_IPI_0) | \
+ INT_MASK(INT_PERF_COUNT) | \
+ INT_MASK(INT_AUX_PERF_COUNT) | \
+ INT_MASK(INT_INTCTRL_3) | \
+ INT_MASK(INT_INTCTRL_2) | \
+ INT_MASK(INT_INTCTRL_1) | \
+ INT_MASK(INT_INTCTRL_0) | \
+ 0)
+#define UNMASKABLE_INTERRUPTS ( \
+ INT_MASK(INT_ITLB_MISS) | \
+ INT_MASK(INT_ILL) | \
+ INT_MASK(INT_GPV) | \
+ INT_MASK(INT_IDN_ACCESS) | \
+ INT_MASK(INT_UDN_ACCESS) | \
+ INT_MASK(INT_SWINT_3) | \
+ INT_MASK(INT_SWINT_2) | \
+ INT_MASK(INT_SWINT_1) | \
+ INT_MASK(INT_SWINT_0) | \
+ INT_MASK(INT_ILL_TRANS) | \
+ INT_MASK(INT_UNALIGN_DATA) | \
+ INT_MASK(INT_DTLB_MISS) | \
+ INT_MASK(INT_DTLB_ACCESS) | \
+ INT_MASK(INT_BOOT_ACCESS) | \
+ INT_MASK(INT_WORLD_ACCESS) | \
+ INT_MASK(INT_I_ASID) | \
+ INT_MASK(INT_D_ASID) | \
+ INT_MASK(INT_DOUBLE_FAULT) | \
+ 0)
+#define SYNC_INTERRUPTS ( \
+ INT_MASK(INT_SINGLE_STEP_3) | \
+ INT_MASK(INT_SINGLE_STEP_2) | \
+ INT_MASK(INT_SINGLE_STEP_1) | \
+ INT_MASK(INT_SINGLE_STEP_0) | \
+ INT_MASK(INT_IDN_COMPLETE) | \
+ INT_MASK(INT_UDN_COMPLETE) | \
+ INT_MASK(INT_ITLB_MISS) | \
+ INT_MASK(INT_ILL) | \
+ INT_MASK(INT_GPV) | \
+ INT_MASK(INT_IDN_ACCESS) | \
+ INT_MASK(INT_UDN_ACCESS) | \
+ INT_MASK(INT_SWINT_3) | \
+ INT_MASK(INT_SWINT_2) | \
+ INT_MASK(INT_SWINT_1) | \
+ INT_MASK(INT_SWINT_0) | \
+ INT_MASK(INT_ILL_TRANS) | \
+ INT_MASK(INT_UNALIGN_DATA) | \
+ INT_MASK(INT_DTLB_MISS) | \
+ INT_MASK(INT_DTLB_ACCESS) | \
+ 0)
+#define NON_SYNC_INTERRUPTS ( \
+ INT_MASK(INT_MEM_ERROR) | \
+ INT_MASK(INT_IDN_FIREWALL) | \
+ INT_MASK(INT_UDN_FIREWALL) | \
+ INT_MASK(INT_TILE_TIMER) | \
+ INT_MASK(INT_AUX_TILE_TIMER) | \
+ INT_MASK(INT_IDN_TIMER) | \
+ INT_MASK(INT_UDN_TIMER) | \
+ INT_MASK(INT_IDN_AVAIL) | \
+ INT_MASK(INT_UDN_AVAIL) | \
+ INT_MASK(INT_IPI_3) | \
+ INT_MASK(INT_IPI_2) | \
+ INT_MASK(INT_IPI_1) | \
+ INT_MASK(INT_IPI_0) | \
+ INT_MASK(INT_PERF_COUNT) | \
+ INT_MASK(INT_AUX_PERF_COUNT) | \
+ INT_MASK(INT_INTCTRL_3) | \
+ INT_MASK(INT_INTCTRL_2) | \
+ INT_MASK(INT_INTCTRL_1) | \
+ INT_MASK(INT_INTCTRL_0) | \
+ INT_MASK(INT_BOOT_ACCESS) | \
+ INT_MASK(INT_WORLD_ACCESS) | \
+ INT_MASK(INT_I_ASID) | \
+ INT_MASK(INT_D_ASID) | \
+ INT_MASK(INT_DOUBLE_FAULT) | \
+ 0)
+#endif /* !__ASSEMBLER__ */
+#endif /* !__ARCH_INTERRUPTS_H__ */
diff --git a/arch/tile/include/arch/opcode.h b/arch/tile/include/arch/opcode.h
new file mode 100644
index 00000000..92d15229
--- /dev/null
+++ b/arch/tile/include/arch/opcode.h
@@ -0,0 +1,21 @@
+/*
+ * Copyright 2011 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#if defined(__tilepro__)
+#include <arch/opcode_tilepro.h>
+#elif defined(__tilegx__)
+#include <arch/opcode_tilegx.h>
+#else
+#error Unexpected Tilera chip type
+#endif
diff --git a/arch/tile/include/arch/opcode_tilegx.h b/arch/tile/include/arch/opcode_tilegx.h
new file mode 100644
index 00000000..c14d02c8
--- /dev/null
+++ b/arch/tile/include/arch/opcode_tilegx.h
@@ -0,0 +1,1405 @@
+/* TILE-Gx opcode information.
+ *
+ * Copyright 2011 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ *
+ *
+ *
+ *
+ *
+ */
+
+#ifndef __ARCH_OPCODE_H__
+#define __ARCH_OPCODE_H__
+
+#ifndef __ASSEMBLER__
+
+typedef unsigned long long tilegx_bundle_bits;
+
+/* These are the bits that determine if a bundle is in the X encoding. */
+#define TILEGX_BUNDLE_MODE_MASK ((tilegx_bundle_bits)3 << 62)
+
+enum
+{
+ /* Maximum number of instructions in a bundle (2 for X, 3 for Y). */
+ TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE = 3,
+
+ /* How many different pipeline encodings are there? X0, X1, Y0, Y1, Y2. */
+ TILEGX_NUM_PIPELINE_ENCODINGS = 5,
+
+ /* Log base 2 of TILEGX_BUNDLE_SIZE_IN_BYTES. */
+ TILEGX_LOG2_BUNDLE_SIZE_IN_BYTES = 3,
+
+ /* Instructions take this many bytes. */
+ TILEGX_BUNDLE_SIZE_IN_BYTES = 1 << TILEGX_LOG2_BUNDLE_SIZE_IN_BYTES,
+
+ /* Log base 2 of TILEGX_BUNDLE_ALIGNMENT_IN_BYTES. */
+ TILEGX_LOG2_BUNDLE_ALIGNMENT_IN_BYTES = 3,
+
+ /* Bundles should be aligned modulo this number of bytes. */
+ TILEGX_BUNDLE_ALIGNMENT_IN_BYTES =
+ (1 << TILEGX_LOG2_BUNDLE_ALIGNMENT_IN_BYTES),
+
+ /* Number of registers (some are magic, such as network I/O). */
+ TILEGX_NUM_REGISTERS = 64,
+};
+
+/* Make a few "tile_" variables to simplify common code between
+ architectures. */
+
+typedef tilegx_bundle_bits tile_bundle_bits;
+#define TILE_BUNDLE_SIZE_IN_BYTES TILEGX_BUNDLE_SIZE_IN_BYTES
+#define TILE_BUNDLE_ALIGNMENT_IN_BYTES TILEGX_BUNDLE_ALIGNMENT_IN_BYTES
+#define TILE_LOG2_BUNDLE_ALIGNMENT_IN_BYTES \
+ TILEGX_LOG2_BUNDLE_ALIGNMENT_IN_BYTES
+
+/* 64-bit pattern for a { bpt ; nop } bundle. */
+#define TILEGX_BPT_BUNDLE 0x286a44ae51485000ULL
+
+static __inline unsigned int
+get_BFEnd_X0(tilegx_bundle_bits num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((n >> 12)) & 0x3f);
+}
+
+static __inline unsigned int
+get_BFOpcodeExtension_X0(tilegx_bundle_bits num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((n >> 24)) & 0xf);
+}
+
+static __inline unsigned int
+get_BFStart_X0(tilegx_bundle_bits num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((n >> 18)) & 0x3f);
+}
+
+static __inline unsigned int
+get_BrOff_X1(tilegx_bundle_bits n)
+{
+ return (((unsigned int)(n >> 31)) & 0x0000003f) |
+ (((unsigned int)(n >> 37)) & 0x0001ffc0);
+}
+
+static __inline unsigned int
+get_BrType_X1(tilegx_bundle_bits n)
+{
+ return (((unsigned int)(n >> 54)) & 0x1f);
+}
+
+static __inline unsigned int
+get_Dest_Imm8_X1(tilegx_bundle_bits n)
+{
+ return (((unsigned int)(n >> 31)) & 0x0000003f) |
+ (((unsigned int)(n >> 43)) & 0x000000c0);
+}
+
+static __inline unsigned int
+get_Dest_X0(tilegx_bundle_bits num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((n >> 0)) & 0x3f);
+}
+
+static __inline unsigned int
+get_Dest_X1(tilegx_bundle_bits n)
+{
+ return (((unsigned int)(n >> 31)) & 0x3f);
+}
+
+static __inline unsigned int
+get_Dest_Y0(tilegx_bundle_bits num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((n >> 0)) & 0x3f);
+}
+
+static __inline unsigned int
+get_Dest_Y1(tilegx_bundle_bits n)
+{
+ return (((unsigned int)(n >> 31)) & 0x3f);
+}
+
+static __inline unsigned int
+get_Imm16_X0(tilegx_bundle_bits num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((n >> 12)) & 0xffff);
+}
+
+static __inline unsigned int
+get_Imm16_X1(tilegx_bundle_bits n)
+{
+ return (((unsigned int)(n >> 43)) & 0xffff);
+}
+
+static __inline unsigned int
+get_Imm8OpcodeExtension_X0(tilegx_bundle_bits num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((n >> 20)) & 0xff);
+}
+
+static __inline unsigned int
+get_Imm8OpcodeExtension_X1(tilegx_bundle_bits n)
+{
+ return (((unsigned int)(n >> 51)) & 0xff);
+}
+
+static __inline unsigned int
+get_Imm8_X0(tilegx_bundle_bits num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((n >> 12)) & 0xff);
+}
+
+static __inline unsigned int
+get_Imm8_X1(tilegx_bundle_bits n)
+{
+ return (((unsigned int)(n >> 43)) & 0xff);
+}
+
+static __inline unsigned int
+get_Imm8_Y0(tilegx_bundle_bits num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((n >> 12)) & 0xff);
+}
+
+static __inline unsigned int
+get_Imm8_Y1(tilegx_bundle_bits n)
+{
+ return (((unsigned int)(n >> 43)) & 0xff);
+}
+
+static __inline unsigned int
+get_JumpOff_X1(tilegx_bundle_bits n)
+{
+ return (((unsigned int)(n >> 31)) & 0x7ffffff);
+}
+
+static __inline unsigned int
+get_JumpOpcodeExtension_X1(tilegx_bundle_bits n)
+{
+ return (((unsigned int)(n >> 58)) & 0x1);
+}
+
+static __inline unsigned int
+get_MF_Imm14_X1(tilegx_bundle_bits n)
+{
+ return (((unsigned int)(n >> 37)) & 0x3fff);
+}
+
+static __inline unsigned int
+get_MT_Imm14_X1(tilegx_bundle_bits n)
+{
+ return (((unsigned int)(n >> 31)) & 0x0000003f) |
+ (((unsigned int)(n >> 37)) & 0x00003fc0);
+}
+
+static __inline unsigned int
+get_Mode(tilegx_bundle_bits n)
+{
+ return (((unsigned int)(n >> 62)) & 0x3);
+}
+
+static __inline unsigned int
+get_Opcode_X0(tilegx_bundle_bits num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((n >> 28)) & 0x7);
+}
+
+static __inline unsigned int
+get_Opcode_X1(tilegx_bundle_bits n)
+{
+ return (((unsigned int)(n >> 59)) & 0x7);
+}
+
+static __inline unsigned int
+get_Opcode_Y0(tilegx_bundle_bits num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((n >> 27)) & 0xf);
+}
+
+static __inline unsigned int
+get_Opcode_Y1(tilegx_bundle_bits n)
+{
+ return (((unsigned int)(n >> 58)) & 0xf);
+}
+
+static __inline unsigned int
+get_Opcode_Y2(tilegx_bundle_bits n)
+{
+ return (((n >> 26)) & 0x00000001) |
+ (((unsigned int)(n >> 56)) & 0x00000002);
+}
+
+static __inline unsigned int
+get_RRROpcodeExtension_X0(tilegx_bundle_bits num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((n >> 18)) & 0x3ff);
+}
+
+static __inline unsigned int
+get_RRROpcodeExtension_X1(tilegx_bundle_bits n)
+{
+ return (((unsigned int)(n >> 49)) & 0x3ff);
+}
+
+static __inline unsigned int
+get_RRROpcodeExtension_Y0(tilegx_bundle_bits num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((n >> 18)) & 0x3);
+}
+
+static __inline unsigned int
+get_RRROpcodeExtension_Y1(tilegx_bundle_bits n)
+{
+ return (((unsigned int)(n >> 49)) & 0x3);
+}
+
+static __inline unsigned int
+get_ShAmt_X0(tilegx_bundle_bits num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((n >> 12)) & 0x3f);
+}
+
+static __inline unsigned int
+get_ShAmt_X1(tilegx_bundle_bits n)
+{
+ return (((unsigned int)(n >> 43)) & 0x3f);
+}
+
+static __inline unsigned int
+get_ShAmt_Y0(tilegx_bundle_bits num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((n >> 12)) & 0x3f);
+}
+
+static __inline unsigned int
+get_ShAmt_Y1(tilegx_bundle_bits n)
+{
+ return (((unsigned int)(n >> 43)) & 0x3f);
+}
+
+static __inline unsigned int
+get_ShiftOpcodeExtension_X0(tilegx_bundle_bits num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((n >> 18)) & 0x3ff);
+}
+
+static __inline unsigned int
+get_ShiftOpcodeExtension_X1(tilegx_bundle_bits n)
+{
+ return (((unsigned int)(n >> 49)) & 0x3ff);
+}
+
+static __inline unsigned int
+get_ShiftOpcodeExtension_Y0(tilegx_bundle_bits num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((n >> 18)) & 0x3);
+}
+
+static __inline unsigned int
+get_ShiftOpcodeExtension_Y1(tilegx_bundle_bits n)
+{
+ return (((unsigned int)(n >> 49)) & 0x3);
+}
+
+static __inline unsigned int
+get_SrcA_X0(tilegx_bundle_bits num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((n >> 6)) & 0x3f);
+}
+
+static __inline unsigned int
+get_SrcA_X1(tilegx_bundle_bits n)
+{
+ return (((unsigned int)(n >> 37)) & 0x3f);
+}
+
+static __inline unsigned int
+get_SrcA_Y0(tilegx_bundle_bits num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((n >> 6)) & 0x3f);
+}
+
+static __inline unsigned int
+get_SrcA_Y1(tilegx_bundle_bits n)
+{
+ return (((unsigned int)(n >> 37)) & 0x3f);
+}
+
+static __inline unsigned int
+get_SrcA_Y2(tilegx_bundle_bits num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((n >> 20)) & 0x3f);
+}
+
+static __inline unsigned int
+get_SrcBDest_Y2(tilegx_bundle_bits n)
+{
+ return (((unsigned int)(n >> 51)) & 0x3f);
+}
+
+static __inline unsigned int
+get_SrcB_X0(tilegx_bundle_bits num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((n >> 12)) & 0x3f);
+}
+
+static __inline unsigned int
+get_SrcB_X1(tilegx_bundle_bits n)
+{
+ return (((unsigned int)(n >> 43)) & 0x3f);
+}
+
+static __inline unsigned int
+get_SrcB_Y0(tilegx_bundle_bits num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((n >> 12)) & 0x3f);
+}
+
+static __inline unsigned int
+get_SrcB_Y1(tilegx_bundle_bits n)
+{
+ return (((unsigned int)(n >> 43)) & 0x3f);
+}
+
+static __inline unsigned int
+get_UnaryOpcodeExtension_X0(tilegx_bundle_bits num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((n >> 12)) & 0x3f);
+}
+
+static __inline unsigned int
+get_UnaryOpcodeExtension_X1(tilegx_bundle_bits n)
+{
+ return (((unsigned int)(n >> 43)) & 0x3f);
+}
+
+static __inline unsigned int
+get_UnaryOpcodeExtension_Y0(tilegx_bundle_bits num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((n >> 12)) & 0x3f);
+}
+
+static __inline unsigned int
+get_UnaryOpcodeExtension_Y1(tilegx_bundle_bits n)
+{
+ return (((unsigned int)(n >> 43)) & 0x3f);
+}
+
+
+static __inline int
+sign_extend(int n, int num_bits)
+{
+ int shift = (int)(sizeof(int) * 8 - num_bits);
+ return (n << shift) >> shift;
+}
+
+
+
+static __inline tilegx_bundle_bits
+create_BFEnd_X0(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return ((n & 0x3f) << 12);
+}
+
+static __inline tilegx_bundle_bits
+create_BFOpcodeExtension_X0(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return ((n & 0xf) << 24);
+}
+
+static __inline tilegx_bundle_bits
+create_BFStart_X0(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return ((n & 0x3f) << 18);
+}
+
+static __inline tilegx_bundle_bits
+create_BrOff_X1(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((tilegx_bundle_bits)(n & 0x0000003f)) << 31) |
+ (((tilegx_bundle_bits)(n & 0x0001ffc0)) << 37);
+}
+
+static __inline tilegx_bundle_bits
+create_BrType_X1(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((tilegx_bundle_bits)(n & 0x1f)) << 54);
+}
+
+static __inline tilegx_bundle_bits
+create_Dest_Imm8_X1(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((tilegx_bundle_bits)(n & 0x0000003f)) << 31) |
+ (((tilegx_bundle_bits)(n & 0x000000c0)) << 43);
+}
+
+static __inline tilegx_bundle_bits
+create_Dest_X0(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return ((n & 0x3f) << 0);
+}
+
+static __inline tilegx_bundle_bits
+create_Dest_X1(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((tilegx_bundle_bits)(n & 0x3f)) << 31);
+}
+
+static __inline tilegx_bundle_bits
+create_Dest_Y0(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return ((n & 0x3f) << 0);
+}
+
+static __inline tilegx_bundle_bits
+create_Dest_Y1(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((tilegx_bundle_bits)(n & 0x3f)) << 31);
+}
+
+static __inline tilegx_bundle_bits
+create_Imm16_X0(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return ((n & 0xffff) << 12);
+}
+
+static __inline tilegx_bundle_bits
+create_Imm16_X1(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((tilegx_bundle_bits)(n & 0xffff)) << 43);
+}
+
+static __inline tilegx_bundle_bits
+create_Imm8OpcodeExtension_X0(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return ((n & 0xff) << 20);
+}
+
+static __inline tilegx_bundle_bits
+create_Imm8OpcodeExtension_X1(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((tilegx_bundle_bits)(n & 0xff)) << 51);
+}
+
+static __inline tilegx_bundle_bits
+create_Imm8_X0(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return ((n & 0xff) << 12);
+}
+
+static __inline tilegx_bundle_bits
+create_Imm8_X1(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((tilegx_bundle_bits)(n & 0xff)) << 43);
+}
+
+static __inline tilegx_bundle_bits
+create_Imm8_Y0(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return ((n & 0xff) << 12);
+}
+
+static __inline tilegx_bundle_bits
+create_Imm8_Y1(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((tilegx_bundle_bits)(n & 0xff)) << 43);
+}
+
+static __inline tilegx_bundle_bits
+create_JumpOff_X1(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((tilegx_bundle_bits)(n & 0x7ffffff)) << 31);
+}
+
+static __inline tilegx_bundle_bits
+create_JumpOpcodeExtension_X1(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((tilegx_bundle_bits)(n & 0x1)) << 58);
+}
+
+static __inline tilegx_bundle_bits
+create_MF_Imm14_X1(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((tilegx_bundle_bits)(n & 0x3fff)) << 37);
+}
+
+static __inline tilegx_bundle_bits
+create_MT_Imm14_X1(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((tilegx_bundle_bits)(n & 0x0000003f)) << 31) |
+ (((tilegx_bundle_bits)(n & 0x00003fc0)) << 37);
+}
+
+static __inline tilegx_bundle_bits
+create_Mode(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((tilegx_bundle_bits)(n & 0x3)) << 62);
+}
+
+static __inline tilegx_bundle_bits
+create_Opcode_X0(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return ((n & 0x7) << 28);
+}
+
+static __inline tilegx_bundle_bits
+create_Opcode_X1(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((tilegx_bundle_bits)(n & 0x7)) << 59);
+}
+
+static __inline tilegx_bundle_bits
+create_Opcode_Y0(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return ((n & 0xf) << 27);
+}
+
+static __inline tilegx_bundle_bits
+create_Opcode_Y1(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((tilegx_bundle_bits)(n & 0xf)) << 58);
+}
+
+static __inline tilegx_bundle_bits
+create_Opcode_Y2(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return ((n & 0x00000001) << 26) |
+ (((tilegx_bundle_bits)(n & 0x00000002)) << 56);
+}
+
+static __inline tilegx_bundle_bits
+create_RRROpcodeExtension_X0(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return ((n & 0x3ff) << 18);
+}
+
+static __inline tilegx_bundle_bits
+create_RRROpcodeExtension_X1(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((tilegx_bundle_bits)(n & 0x3ff)) << 49);
+}
+
+static __inline tilegx_bundle_bits
+create_RRROpcodeExtension_Y0(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return ((n & 0x3) << 18);
+}
+
+static __inline tilegx_bundle_bits
+create_RRROpcodeExtension_Y1(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((tilegx_bundle_bits)(n & 0x3)) << 49);
+}
+
+static __inline tilegx_bundle_bits
+create_ShAmt_X0(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return ((n & 0x3f) << 12);
+}
+
+static __inline tilegx_bundle_bits
+create_ShAmt_X1(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((tilegx_bundle_bits)(n & 0x3f)) << 43);
+}
+
+static __inline tilegx_bundle_bits
+create_ShAmt_Y0(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return ((n & 0x3f) << 12);
+}
+
+static __inline tilegx_bundle_bits
+create_ShAmt_Y1(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((tilegx_bundle_bits)(n & 0x3f)) << 43);
+}
+
+static __inline tilegx_bundle_bits
+create_ShiftOpcodeExtension_X0(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return ((n & 0x3ff) << 18);
+}
+
+static __inline tilegx_bundle_bits
+create_ShiftOpcodeExtension_X1(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((tilegx_bundle_bits)(n & 0x3ff)) << 49);
+}
+
+static __inline tilegx_bundle_bits
+create_ShiftOpcodeExtension_Y0(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return ((n & 0x3) << 18);
+}
+
+static __inline tilegx_bundle_bits
+create_ShiftOpcodeExtension_Y1(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((tilegx_bundle_bits)(n & 0x3)) << 49);
+}
+
+static __inline tilegx_bundle_bits
+create_SrcA_X0(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return ((n & 0x3f) << 6);
+}
+
+static __inline tilegx_bundle_bits
+create_SrcA_X1(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((tilegx_bundle_bits)(n & 0x3f)) << 37);
+}
+
+static __inline tilegx_bundle_bits
+create_SrcA_Y0(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return ((n & 0x3f) << 6);
+}
+
+static __inline tilegx_bundle_bits
+create_SrcA_Y1(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((tilegx_bundle_bits)(n & 0x3f)) << 37);
+}
+
+static __inline tilegx_bundle_bits
+create_SrcA_Y2(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return ((n & 0x3f) << 20);
+}
+
+static __inline tilegx_bundle_bits
+create_SrcBDest_Y2(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((tilegx_bundle_bits)(n & 0x3f)) << 51);
+}
+
+static __inline tilegx_bundle_bits
+create_SrcB_X0(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return ((n & 0x3f) << 12);
+}
+
+static __inline tilegx_bundle_bits
+create_SrcB_X1(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((tilegx_bundle_bits)(n & 0x3f)) << 43);
+}
+
+static __inline tilegx_bundle_bits
+create_SrcB_Y0(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return ((n & 0x3f) << 12);
+}
+
+static __inline tilegx_bundle_bits
+create_SrcB_Y1(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((tilegx_bundle_bits)(n & 0x3f)) << 43);
+}
+
+static __inline tilegx_bundle_bits
+create_UnaryOpcodeExtension_X0(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return ((n & 0x3f) << 12);
+}
+
+static __inline tilegx_bundle_bits
+create_UnaryOpcodeExtension_X1(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((tilegx_bundle_bits)(n & 0x3f)) << 43);
+}
+
+static __inline tilegx_bundle_bits
+create_UnaryOpcodeExtension_Y0(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return ((n & 0x3f) << 12);
+}
+
+static __inline tilegx_bundle_bits
+create_UnaryOpcodeExtension_Y1(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((tilegx_bundle_bits)(n & 0x3f)) << 43);
+}
+
+
+enum
+{
+ ADDI_IMM8_OPCODE_X0 = 1,
+ ADDI_IMM8_OPCODE_X1 = 1,
+ ADDI_OPCODE_Y0 = 0,
+ ADDI_OPCODE_Y1 = 1,
+ ADDLI_OPCODE_X0 = 1,
+ ADDLI_OPCODE_X1 = 0,
+ ADDXI_IMM8_OPCODE_X0 = 2,
+ ADDXI_IMM8_OPCODE_X1 = 2,
+ ADDXI_OPCODE_Y0 = 1,
+ ADDXI_OPCODE_Y1 = 2,
+ ADDXLI_OPCODE_X0 = 2,
+ ADDXLI_OPCODE_X1 = 1,
+ ADDXSC_RRR_0_OPCODE_X0 = 1,
+ ADDXSC_RRR_0_OPCODE_X1 = 1,
+ ADDX_RRR_0_OPCODE_X0 = 2,
+ ADDX_RRR_0_OPCODE_X1 = 2,
+ ADDX_RRR_0_OPCODE_Y0 = 0,
+ ADDX_SPECIAL_0_OPCODE_Y1 = 0,
+ ADD_RRR_0_OPCODE_X0 = 3,
+ ADD_RRR_0_OPCODE_X1 = 3,
+ ADD_RRR_0_OPCODE_Y0 = 1,
+ ADD_SPECIAL_0_OPCODE_Y1 = 1,
+ ANDI_IMM8_OPCODE_X0 = 3,
+ ANDI_IMM8_OPCODE_X1 = 3,
+ ANDI_OPCODE_Y0 = 2,
+ ANDI_OPCODE_Y1 = 3,
+ AND_RRR_0_OPCODE_X0 = 4,
+ AND_RRR_0_OPCODE_X1 = 4,
+ AND_RRR_5_OPCODE_Y0 = 0,
+ AND_RRR_5_OPCODE_Y1 = 0,
+ BEQZT_BRANCH_OPCODE_X1 = 16,
+ BEQZ_BRANCH_OPCODE_X1 = 17,
+ BFEXTS_BF_OPCODE_X0 = 4,
+ BFEXTU_BF_OPCODE_X0 = 5,
+ BFINS_BF_OPCODE_X0 = 6,
+ BF_OPCODE_X0 = 3,
+ BGEZT_BRANCH_OPCODE_X1 = 18,
+ BGEZ_BRANCH_OPCODE_X1 = 19,
+ BGTZT_BRANCH_OPCODE_X1 = 20,
+ BGTZ_BRANCH_OPCODE_X1 = 21,
+ BLBCT_BRANCH_OPCODE_X1 = 22,
+ BLBC_BRANCH_OPCODE_X1 = 23,
+ BLBST_BRANCH_OPCODE_X1 = 24,
+ BLBS_BRANCH_OPCODE_X1 = 25,
+ BLEZT_BRANCH_OPCODE_X1 = 26,
+ BLEZ_BRANCH_OPCODE_X1 = 27,
+ BLTZT_BRANCH_OPCODE_X1 = 28,
+ BLTZ_BRANCH_OPCODE_X1 = 29,
+ BNEZT_BRANCH_OPCODE_X1 = 30,
+ BNEZ_BRANCH_OPCODE_X1 = 31,
+ BRANCH_OPCODE_X1 = 2,
+ CMOVEQZ_RRR_0_OPCODE_X0 = 5,
+ CMOVEQZ_RRR_4_OPCODE_Y0 = 0,
+ CMOVNEZ_RRR_0_OPCODE_X0 = 6,
+ CMOVNEZ_RRR_4_OPCODE_Y0 = 1,
+ CMPEQI_IMM8_OPCODE_X0 = 4,
+ CMPEQI_IMM8_OPCODE_X1 = 4,
+ CMPEQI_OPCODE_Y0 = 3,
+ CMPEQI_OPCODE_Y1 = 4,
+ CMPEQ_RRR_0_OPCODE_X0 = 7,
+ CMPEQ_RRR_0_OPCODE_X1 = 5,
+ CMPEQ_RRR_3_OPCODE_Y0 = 0,
+ CMPEQ_RRR_3_OPCODE_Y1 = 2,
+ CMPEXCH4_RRR_0_OPCODE_X1 = 6,
+ CMPEXCH_RRR_0_OPCODE_X1 = 7,
+ CMPLES_RRR_0_OPCODE_X0 = 8,
+ CMPLES_RRR_0_OPCODE_X1 = 8,
+ CMPLES_RRR_2_OPCODE_Y0 = 0,
+ CMPLES_RRR_2_OPCODE_Y1 = 0,
+ CMPLEU_RRR_0_OPCODE_X0 = 9,
+ CMPLEU_RRR_0_OPCODE_X1 = 9,
+ CMPLEU_RRR_2_OPCODE_Y0 = 1,
+ CMPLEU_RRR_2_OPCODE_Y1 = 1,
+ CMPLTSI_IMM8_OPCODE_X0 = 5,
+ CMPLTSI_IMM8_OPCODE_X1 = 5,
+ CMPLTSI_OPCODE_Y0 = 4,
+ CMPLTSI_OPCODE_Y1 = 5,
+ CMPLTS_RRR_0_OPCODE_X0 = 10,
+ CMPLTS_RRR_0_OPCODE_X1 = 10,
+ CMPLTS_RRR_2_OPCODE_Y0 = 2,
+ CMPLTS_RRR_2_OPCODE_Y1 = 2,
+ CMPLTUI_IMM8_OPCODE_X0 = 6,
+ CMPLTUI_IMM8_OPCODE_X1 = 6,
+ CMPLTU_RRR_0_OPCODE_X0 = 11,
+ CMPLTU_RRR_0_OPCODE_X1 = 11,
+ CMPLTU_RRR_2_OPCODE_Y0 = 3,
+ CMPLTU_RRR_2_OPCODE_Y1 = 3,
+ CMPNE_RRR_0_OPCODE_X0 = 12,
+ CMPNE_RRR_0_OPCODE_X1 = 12,
+ CMPNE_RRR_3_OPCODE_Y0 = 1,
+ CMPNE_RRR_3_OPCODE_Y1 = 3,
+ CMULAF_RRR_0_OPCODE_X0 = 13,
+ CMULA_RRR_0_OPCODE_X0 = 14,
+ CMULFR_RRR_0_OPCODE_X0 = 15,
+ CMULF_RRR_0_OPCODE_X0 = 16,
+ CMULHR_RRR_0_OPCODE_X0 = 17,
+ CMULH_RRR_0_OPCODE_X0 = 18,
+ CMUL_RRR_0_OPCODE_X0 = 19,
+ CNTLZ_UNARY_OPCODE_X0 = 1,
+ CNTLZ_UNARY_OPCODE_Y0 = 1,
+ CNTTZ_UNARY_OPCODE_X0 = 2,
+ CNTTZ_UNARY_OPCODE_Y0 = 2,
+ CRC32_32_RRR_0_OPCODE_X0 = 20,
+ CRC32_8_RRR_0_OPCODE_X0 = 21,
+ DBLALIGN2_RRR_0_OPCODE_X0 = 22,
+ DBLALIGN2_RRR_0_OPCODE_X1 = 13,
+ DBLALIGN4_RRR_0_OPCODE_X0 = 23,
+ DBLALIGN4_RRR_0_OPCODE_X1 = 14,
+ DBLALIGN6_RRR_0_OPCODE_X0 = 24,
+ DBLALIGN6_RRR_0_OPCODE_X1 = 15,
+ DBLALIGN_RRR_0_OPCODE_X0 = 25,
+ DRAIN_UNARY_OPCODE_X1 = 1,
+ DTLBPR_UNARY_OPCODE_X1 = 2,
+ EXCH4_RRR_0_OPCODE_X1 = 16,
+ EXCH_RRR_0_OPCODE_X1 = 17,
+ FDOUBLE_ADDSUB_RRR_0_OPCODE_X0 = 26,
+ FDOUBLE_ADD_FLAGS_RRR_0_OPCODE_X0 = 27,
+ FDOUBLE_MUL_FLAGS_RRR_0_OPCODE_X0 = 28,
+ FDOUBLE_PACK1_RRR_0_OPCODE_X0 = 29,
+ FDOUBLE_PACK2_RRR_0_OPCODE_X0 = 30,
+ FDOUBLE_SUB_FLAGS_RRR_0_OPCODE_X0 = 31,
+ FDOUBLE_UNPACK_MAX_RRR_0_OPCODE_X0 = 32,
+ FDOUBLE_UNPACK_MIN_RRR_0_OPCODE_X0 = 33,
+ FETCHADD4_RRR_0_OPCODE_X1 = 18,
+ FETCHADDGEZ4_RRR_0_OPCODE_X1 = 19,
+ FETCHADDGEZ_RRR_0_OPCODE_X1 = 20,
+ FETCHADD_RRR_0_OPCODE_X1 = 21,
+ FETCHAND4_RRR_0_OPCODE_X1 = 22,
+ FETCHAND_RRR_0_OPCODE_X1 = 23,
+ FETCHOR4_RRR_0_OPCODE_X1 = 24,
+ FETCHOR_RRR_0_OPCODE_X1 = 25,
+ FINV_UNARY_OPCODE_X1 = 3,
+ FLUSHWB_UNARY_OPCODE_X1 = 4,
+ FLUSH_UNARY_OPCODE_X1 = 5,
+ FNOP_UNARY_OPCODE_X0 = 3,
+ FNOP_UNARY_OPCODE_X1 = 6,
+ FNOP_UNARY_OPCODE_Y0 = 3,
+ FNOP_UNARY_OPCODE_Y1 = 8,
+ FSINGLE_ADD1_RRR_0_OPCODE_X0 = 34,
+ FSINGLE_ADDSUB2_RRR_0_OPCODE_X0 = 35,
+ FSINGLE_MUL1_RRR_0_OPCODE_X0 = 36,
+ FSINGLE_MUL2_RRR_0_OPCODE_X0 = 37,
+ FSINGLE_PACK1_UNARY_OPCODE_X0 = 4,
+ FSINGLE_PACK1_UNARY_OPCODE_Y0 = 4,
+ FSINGLE_PACK2_RRR_0_OPCODE_X0 = 38,
+ FSINGLE_SUB1_RRR_0_OPCODE_X0 = 39,
+ ICOH_UNARY_OPCODE_X1 = 7,
+ ILL_UNARY_OPCODE_X1 = 8,
+ ILL_UNARY_OPCODE_Y1 = 9,
+ IMM8_OPCODE_X0 = 4,
+ IMM8_OPCODE_X1 = 3,
+ INV_UNARY_OPCODE_X1 = 9,
+ IRET_UNARY_OPCODE_X1 = 10,
+ JALRP_UNARY_OPCODE_X1 = 11,
+ JALRP_UNARY_OPCODE_Y1 = 10,
+ JALR_UNARY_OPCODE_X1 = 12,
+ JALR_UNARY_OPCODE_Y1 = 11,
+ JAL_JUMP_OPCODE_X1 = 0,
+ JRP_UNARY_OPCODE_X1 = 13,
+ JRP_UNARY_OPCODE_Y1 = 12,
+ JR_UNARY_OPCODE_X1 = 14,
+ JR_UNARY_OPCODE_Y1 = 13,
+ JUMP_OPCODE_X1 = 4,
+ J_JUMP_OPCODE_X1 = 1,
+ LD1S_ADD_IMM8_OPCODE_X1 = 7,
+ LD1S_OPCODE_Y2 = 0,
+ LD1S_UNARY_OPCODE_X1 = 15,
+ LD1U_ADD_IMM8_OPCODE_X1 = 8,
+ LD1U_OPCODE_Y2 = 1,
+ LD1U_UNARY_OPCODE_X1 = 16,
+ LD2S_ADD_IMM8_OPCODE_X1 = 9,
+ LD2S_OPCODE_Y2 = 2,
+ LD2S_UNARY_OPCODE_X1 = 17,
+ LD2U_ADD_IMM8_OPCODE_X1 = 10,
+ LD2U_OPCODE_Y2 = 3,
+ LD2U_UNARY_OPCODE_X1 = 18,
+ LD4S_ADD_IMM8_OPCODE_X1 = 11,
+ LD4S_OPCODE_Y2 = 1,
+ LD4S_UNARY_OPCODE_X1 = 19,
+ LD4U_ADD_IMM8_OPCODE_X1 = 12,
+ LD4U_OPCODE_Y2 = 2,
+ LD4U_UNARY_OPCODE_X1 = 20,
+ LDNA_UNARY_OPCODE_X1 = 21,
+ LDNT1S_ADD_IMM8_OPCODE_X1 = 13,
+ LDNT1S_UNARY_OPCODE_X1 = 22,
+ LDNT1U_ADD_IMM8_OPCODE_X1 = 14,
+ LDNT1U_UNARY_OPCODE_X1 = 23,
+ LDNT2S_ADD_IMM8_OPCODE_X1 = 15,
+ LDNT2S_UNARY_OPCODE_X1 = 24,
+ LDNT2U_ADD_IMM8_OPCODE_X1 = 16,
+ LDNT2U_UNARY_OPCODE_X1 = 25,
+ LDNT4S_ADD_IMM8_OPCODE_X1 = 17,
+ LDNT4S_UNARY_OPCODE_X1 = 26,
+ LDNT4U_ADD_IMM8_OPCODE_X1 = 18,
+ LDNT4U_UNARY_OPCODE_X1 = 27,
+ LDNT_ADD_IMM8_OPCODE_X1 = 19,
+ LDNT_UNARY_OPCODE_X1 = 28,
+ LD_ADD_IMM8_OPCODE_X1 = 20,
+ LD_OPCODE_Y2 = 3,
+ LD_UNARY_OPCODE_X1 = 29,
+ LNK_UNARY_OPCODE_X1 = 30,
+ LNK_UNARY_OPCODE_Y1 = 14,
+ LWNA_ADD_IMM8_OPCODE_X1 = 21,
+ MFSPR_IMM8_OPCODE_X1 = 22,
+ MF_UNARY_OPCODE_X1 = 31,
+ MM_BF_OPCODE_X0 = 7,
+ MNZ_RRR_0_OPCODE_X0 = 40,
+ MNZ_RRR_0_OPCODE_X1 = 26,
+ MNZ_RRR_4_OPCODE_Y0 = 2,
+ MNZ_RRR_4_OPCODE_Y1 = 2,
+ MODE_OPCODE_YA2 = 1,
+ MODE_OPCODE_YB2 = 2,
+ MODE_OPCODE_YC2 = 3,
+ MTSPR_IMM8_OPCODE_X1 = 23,
+ MULAX_RRR_0_OPCODE_X0 = 41,
+ MULAX_RRR_3_OPCODE_Y0 = 2,
+ MULA_HS_HS_RRR_0_OPCODE_X0 = 42,
+ MULA_HS_HS_RRR_9_OPCODE_Y0 = 0,
+ MULA_HS_HU_RRR_0_OPCODE_X0 = 43,
+ MULA_HS_LS_RRR_0_OPCODE_X0 = 44,
+ MULA_HS_LU_RRR_0_OPCODE_X0 = 45,
+ MULA_HU_HU_RRR_0_OPCODE_X0 = 46,
+ MULA_HU_HU_RRR_9_OPCODE_Y0 = 1,
+ MULA_HU_LS_RRR_0_OPCODE_X0 = 47,
+ MULA_HU_LU_RRR_0_OPCODE_X0 = 48,
+ MULA_LS_LS_RRR_0_OPCODE_X0 = 49,
+ MULA_LS_LS_RRR_9_OPCODE_Y0 = 2,
+ MULA_LS_LU_RRR_0_OPCODE_X0 = 50,
+ MULA_LU_LU_RRR_0_OPCODE_X0 = 51,
+ MULA_LU_LU_RRR_9_OPCODE_Y0 = 3,
+ MULX_RRR_0_OPCODE_X0 = 52,
+ MULX_RRR_3_OPCODE_Y0 = 3,
+ MUL_HS_HS_RRR_0_OPCODE_X0 = 53,
+ MUL_HS_HS_RRR_8_OPCODE_Y0 = 0,
+ MUL_HS_HU_RRR_0_OPCODE_X0 = 54,
+ MUL_HS_LS_RRR_0_OPCODE_X0 = 55,
+ MUL_HS_LU_RRR_0_OPCODE_X0 = 56,
+ MUL_HU_HU_RRR_0_OPCODE_X0 = 57,
+ MUL_HU_HU_RRR_8_OPCODE_Y0 = 1,
+ MUL_HU_LS_RRR_0_OPCODE_X0 = 58,
+ MUL_HU_LU_RRR_0_OPCODE_X0 = 59,
+ MUL_LS_LS_RRR_0_OPCODE_X0 = 60,
+ MUL_LS_LS_RRR_8_OPCODE_Y0 = 2,
+ MUL_LS_LU_RRR_0_OPCODE_X0 = 61,
+ MUL_LU_LU_RRR_0_OPCODE_X0 = 62,
+ MUL_LU_LU_RRR_8_OPCODE_Y0 = 3,
+ MZ_RRR_0_OPCODE_X0 = 63,
+ MZ_RRR_0_OPCODE_X1 = 27,
+ MZ_RRR_4_OPCODE_Y0 = 3,
+ MZ_RRR_4_OPCODE_Y1 = 3,
+ NAP_UNARY_OPCODE_X1 = 32,
+ NOP_UNARY_OPCODE_X0 = 5,
+ NOP_UNARY_OPCODE_X1 = 33,
+ NOP_UNARY_OPCODE_Y0 = 5,
+ NOP_UNARY_OPCODE_Y1 = 15,
+ NOR_RRR_0_OPCODE_X0 = 64,
+ NOR_RRR_0_OPCODE_X1 = 28,
+ NOR_RRR_5_OPCODE_Y0 = 1,
+ NOR_RRR_5_OPCODE_Y1 = 1,
+ ORI_IMM8_OPCODE_X0 = 7,
+ ORI_IMM8_OPCODE_X1 = 24,
+ OR_RRR_0_OPCODE_X0 = 65,
+ OR_RRR_0_OPCODE_X1 = 29,
+ OR_RRR_5_OPCODE_Y0 = 2,
+ OR_RRR_5_OPCODE_Y1 = 2,
+ PCNT_UNARY_OPCODE_X0 = 6,
+ PCNT_UNARY_OPCODE_Y0 = 6,
+ REVBITS_UNARY_OPCODE_X0 = 7,
+ REVBITS_UNARY_OPCODE_Y0 = 7,
+ REVBYTES_UNARY_OPCODE_X0 = 8,
+ REVBYTES_UNARY_OPCODE_Y0 = 8,
+ ROTLI_SHIFT_OPCODE_X0 = 1,
+ ROTLI_SHIFT_OPCODE_X1 = 1,
+ ROTLI_SHIFT_OPCODE_Y0 = 0,
+ ROTLI_SHIFT_OPCODE_Y1 = 0,
+ ROTL_RRR_0_OPCODE_X0 = 66,
+ ROTL_RRR_0_OPCODE_X1 = 30,
+ ROTL_RRR_6_OPCODE_Y0 = 0,
+ ROTL_RRR_6_OPCODE_Y1 = 0,
+ RRR_0_OPCODE_X0 = 5,
+ RRR_0_OPCODE_X1 = 5,
+ RRR_0_OPCODE_Y0 = 5,
+ RRR_0_OPCODE_Y1 = 6,
+ RRR_1_OPCODE_Y0 = 6,
+ RRR_1_OPCODE_Y1 = 7,
+ RRR_2_OPCODE_Y0 = 7,
+ RRR_2_OPCODE_Y1 = 8,
+ RRR_3_OPCODE_Y0 = 8,
+ RRR_3_OPCODE_Y1 = 9,
+ RRR_4_OPCODE_Y0 = 9,
+ RRR_4_OPCODE_Y1 = 10,
+ RRR_5_OPCODE_Y0 = 10,
+ RRR_5_OPCODE_Y1 = 11,
+ RRR_6_OPCODE_Y0 = 11,
+ RRR_6_OPCODE_Y1 = 12,
+ RRR_7_OPCODE_Y0 = 12,
+ RRR_7_OPCODE_Y1 = 13,
+ RRR_8_OPCODE_Y0 = 13,
+ RRR_9_OPCODE_Y0 = 14,
+ SHIFT_OPCODE_X0 = 6,
+ SHIFT_OPCODE_X1 = 6,
+ SHIFT_OPCODE_Y0 = 15,
+ SHIFT_OPCODE_Y1 = 14,
+ SHL16INSLI_OPCODE_X0 = 7,
+ SHL16INSLI_OPCODE_X1 = 7,
+ SHL1ADDX_RRR_0_OPCODE_X0 = 67,
+ SHL1ADDX_RRR_0_OPCODE_X1 = 31,
+ SHL1ADDX_RRR_7_OPCODE_Y0 = 1,
+ SHL1ADDX_RRR_7_OPCODE_Y1 = 1,
+ SHL1ADD_RRR_0_OPCODE_X0 = 68,
+ SHL1ADD_RRR_0_OPCODE_X1 = 32,
+ SHL1ADD_RRR_1_OPCODE_Y0 = 0,
+ SHL1ADD_RRR_1_OPCODE_Y1 = 0,
+ SHL2ADDX_RRR_0_OPCODE_X0 = 69,
+ SHL2ADDX_RRR_0_OPCODE_X1 = 33,
+ SHL2ADDX_RRR_7_OPCODE_Y0 = 2,
+ SHL2ADDX_RRR_7_OPCODE_Y1 = 2,
+ SHL2ADD_RRR_0_OPCODE_X0 = 70,
+ SHL2ADD_RRR_0_OPCODE_X1 = 34,
+ SHL2ADD_RRR_1_OPCODE_Y0 = 1,
+ SHL2ADD_RRR_1_OPCODE_Y1 = 1,
+ SHL3ADDX_RRR_0_OPCODE_X0 = 71,
+ SHL3ADDX_RRR_0_OPCODE_X1 = 35,
+ SHL3ADDX_RRR_7_OPCODE_Y0 = 3,
+ SHL3ADDX_RRR_7_OPCODE_Y1 = 3,
+ SHL3ADD_RRR_0_OPCODE_X0 = 72,
+ SHL3ADD_RRR_0_OPCODE_X1 = 36,
+ SHL3ADD_RRR_1_OPCODE_Y0 = 2,
+ SHL3ADD_RRR_1_OPCODE_Y1 = 2,
+ SHLI_SHIFT_OPCODE_X0 = 2,
+ SHLI_SHIFT_OPCODE_X1 = 2,
+ SHLI_SHIFT_OPCODE_Y0 = 1,
+ SHLI_SHIFT_OPCODE_Y1 = 1,
+ SHLXI_SHIFT_OPCODE_X0 = 3,
+ SHLXI_SHIFT_OPCODE_X1 = 3,
+ SHLX_RRR_0_OPCODE_X0 = 73,
+ SHLX_RRR_0_OPCODE_X1 = 37,
+ SHL_RRR_0_OPCODE_X0 = 74,
+ SHL_RRR_0_OPCODE_X1 = 38,
+ SHL_RRR_6_OPCODE_Y0 = 1,
+ SHL_RRR_6_OPCODE_Y1 = 1,
+ SHRSI_SHIFT_OPCODE_X0 = 4,
+ SHRSI_SHIFT_OPCODE_X1 = 4,
+ SHRSI_SHIFT_OPCODE_Y0 = 2,
+ SHRSI_SHIFT_OPCODE_Y1 = 2,
+ SHRS_RRR_0_OPCODE_X0 = 75,
+ SHRS_RRR_0_OPCODE_X1 = 39,
+ SHRS_RRR_6_OPCODE_Y0 = 2,
+ SHRS_RRR_6_OPCODE_Y1 = 2,
+ SHRUI_SHIFT_OPCODE_X0 = 5,
+ SHRUI_SHIFT_OPCODE_X1 = 5,
+ SHRUI_SHIFT_OPCODE_Y0 = 3,
+ SHRUI_SHIFT_OPCODE_Y1 = 3,
+ SHRUXI_SHIFT_OPCODE_X0 = 6,
+ SHRUXI_SHIFT_OPCODE_X1 = 6,
+ SHRUX_RRR_0_OPCODE_X0 = 76,
+ SHRUX_RRR_0_OPCODE_X1 = 40,
+ SHRU_RRR_0_OPCODE_X0 = 77,
+ SHRU_RRR_0_OPCODE_X1 = 41,
+ SHRU_RRR_6_OPCODE_Y0 = 3,
+ SHRU_RRR_6_OPCODE_Y1 = 3,
+ SHUFFLEBYTES_RRR_0_OPCODE_X0 = 78,
+ ST1_ADD_IMM8_OPCODE_X1 = 25,
+ ST1_OPCODE_Y2 = 0,
+ ST1_RRR_0_OPCODE_X1 = 42,
+ ST2_ADD_IMM8_OPCODE_X1 = 26,
+ ST2_OPCODE_Y2 = 1,
+ ST2_RRR_0_OPCODE_X1 = 43,
+ ST4_ADD_IMM8_OPCODE_X1 = 27,
+ ST4_OPCODE_Y2 = 2,
+ ST4_RRR_0_OPCODE_X1 = 44,
+ STNT1_ADD_IMM8_OPCODE_X1 = 28,
+ STNT1_RRR_0_OPCODE_X1 = 45,
+ STNT2_ADD_IMM8_OPCODE_X1 = 29,
+ STNT2_RRR_0_OPCODE_X1 = 46,
+ STNT4_ADD_IMM8_OPCODE_X1 = 30,
+ STNT4_RRR_0_OPCODE_X1 = 47,
+ STNT_ADD_IMM8_OPCODE_X1 = 31,
+ STNT_RRR_0_OPCODE_X1 = 48,
+ ST_ADD_IMM8_OPCODE_X1 = 32,
+ ST_OPCODE_Y2 = 3,
+ ST_RRR_0_OPCODE_X1 = 49,
+ SUBXSC_RRR_0_OPCODE_X0 = 79,
+ SUBXSC_RRR_0_OPCODE_X1 = 50,
+ SUBX_RRR_0_OPCODE_X0 = 80,
+ SUBX_RRR_0_OPCODE_X1 = 51,
+ SUBX_RRR_0_OPCODE_Y0 = 2,
+ SUBX_RRR_0_OPCODE_Y1 = 2,
+ SUB_RRR_0_OPCODE_X0 = 81,
+ SUB_RRR_0_OPCODE_X1 = 52,
+ SUB_RRR_0_OPCODE_Y0 = 3,
+ SUB_RRR_0_OPCODE_Y1 = 3,
+ SWINT0_UNARY_OPCODE_X1 = 34,
+ SWINT1_UNARY_OPCODE_X1 = 35,
+ SWINT2_UNARY_OPCODE_X1 = 36,
+ SWINT3_UNARY_OPCODE_X1 = 37,
+ TBLIDXB0_UNARY_OPCODE_X0 = 9,
+ TBLIDXB0_UNARY_OPCODE_Y0 = 9,
+ TBLIDXB1_UNARY_OPCODE_X0 = 10,
+ TBLIDXB1_UNARY_OPCODE_Y0 = 10,
+ TBLIDXB2_UNARY_OPCODE_X0 = 11,
+ TBLIDXB2_UNARY_OPCODE_Y0 = 11,
+ TBLIDXB3_UNARY_OPCODE_X0 = 12,
+ TBLIDXB3_UNARY_OPCODE_Y0 = 12,
+ UNARY_RRR_0_OPCODE_X0 = 82,
+ UNARY_RRR_0_OPCODE_X1 = 53,
+ UNARY_RRR_1_OPCODE_Y0 = 3,
+ UNARY_RRR_1_OPCODE_Y1 = 3,
+ V1ADDI_IMM8_OPCODE_X0 = 8,
+ V1ADDI_IMM8_OPCODE_X1 = 33,
+ V1ADDUC_RRR_0_OPCODE_X0 = 83,
+ V1ADDUC_RRR_0_OPCODE_X1 = 54,
+ V1ADD_RRR_0_OPCODE_X0 = 84,
+ V1ADD_RRR_0_OPCODE_X1 = 55,
+ V1ADIFFU_RRR_0_OPCODE_X0 = 85,
+ V1AVGU_RRR_0_OPCODE_X0 = 86,
+ V1CMPEQI_IMM8_OPCODE_X0 = 9,
+ V1CMPEQI_IMM8_OPCODE_X1 = 34,
+ V1CMPEQ_RRR_0_OPCODE_X0 = 87,
+ V1CMPEQ_RRR_0_OPCODE_X1 = 56,
+ V1CMPLES_RRR_0_OPCODE_X0 = 88,
+ V1CMPLES_RRR_0_OPCODE_X1 = 57,
+ V1CMPLEU_RRR_0_OPCODE_X0 = 89,
+ V1CMPLEU_RRR_0_OPCODE_X1 = 58,
+ V1CMPLTSI_IMM8_OPCODE_X0 = 10,
+ V1CMPLTSI_IMM8_OPCODE_X1 = 35,
+ V1CMPLTS_RRR_0_OPCODE_X0 = 90,
+ V1CMPLTS_RRR_0_OPCODE_X1 = 59,
+ V1CMPLTUI_IMM8_OPCODE_X0 = 11,
+ V1CMPLTUI_IMM8_OPCODE_X1 = 36,
+ V1CMPLTU_RRR_0_OPCODE_X0 = 91,
+ V1CMPLTU_RRR_0_OPCODE_X1 = 60,
+ V1CMPNE_RRR_0_OPCODE_X0 = 92,
+ V1CMPNE_RRR_0_OPCODE_X1 = 61,
+ V1DDOTPUA_RRR_0_OPCODE_X0 = 161,
+ V1DDOTPUSA_RRR_0_OPCODE_X0 = 93,
+ V1DDOTPUS_RRR_0_OPCODE_X0 = 94,
+ V1DDOTPU_RRR_0_OPCODE_X0 = 162,
+ V1DOTPA_RRR_0_OPCODE_X0 = 95,
+ V1DOTPUA_RRR_0_OPCODE_X0 = 163,
+ V1DOTPUSA_RRR_0_OPCODE_X0 = 96,
+ V1DOTPUS_RRR_0_OPCODE_X0 = 97,
+ V1DOTPU_RRR_0_OPCODE_X0 = 164,
+ V1DOTP_RRR_0_OPCODE_X0 = 98,
+ V1INT_H_RRR_0_OPCODE_X0 = 99,
+ V1INT_H_RRR_0_OPCODE_X1 = 62,
+ V1INT_L_RRR_0_OPCODE_X0 = 100,
+ V1INT_L_RRR_0_OPCODE_X1 = 63,
+ V1MAXUI_IMM8_OPCODE_X0 = 12,
+ V1MAXUI_IMM8_OPCODE_X1 = 37,
+ V1MAXU_RRR_0_OPCODE_X0 = 101,
+ V1MAXU_RRR_0_OPCODE_X1 = 64,
+ V1MINUI_IMM8_OPCODE_X0 = 13,
+ V1MINUI_IMM8_OPCODE_X1 = 38,
+ V1MINU_RRR_0_OPCODE_X0 = 102,
+ V1MINU_RRR_0_OPCODE_X1 = 65,
+ V1MNZ_RRR_0_OPCODE_X0 = 103,
+ V1MNZ_RRR_0_OPCODE_X1 = 66,
+ V1MULTU_RRR_0_OPCODE_X0 = 104,
+ V1MULUS_RRR_0_OPCODE_X0 = 105,
+ V1MULU_RRR_0_OPCODE_X0 = 106,
+ V1MZ_RRR_0_OPCODE_X0 = 107,
+ V1MZ_RRR_0_OPCODE_X1 = 67,
+ V1SADAU_RRR_0_OPCODE_X0 = 108,
+ V1SADU_RRR_0_OPCODE_X0 = 109,
+ V1SHLI_SHIFT_OPCODE_X0 = 7,
+ V1SHLI_SHIFT_OPCODE_X1 = 7,
+ V1SHL_RRR_0_OPCODE_X0 = 110,
+ V1SHL_RRR_0_OPCODE_X1 = 68,
+ V1SHRSI_SHIFT_OPCODE_X0 = 8,
+ V1SHRSI_SHIFT_OPCODE_X1 = 8,
+ V1SHRS_RRR_0_OPCODE_X0 = 111,
+ V1SHRS_RRR_0_OPCODE_X1 = 69,
+ V1SHRUI_SHIFT_OPCODE_X0 = 9,
+ V1SHRUI_SHIFT_OPCODE_X1 = 9,
+ V1SHRU_RRR_0_OPCODE_X0 = 112,
+ V1SHRU_RRR_0_OPCODE_X1 = 70,
+ V1SUBUC_RRR_0_OPCODE_X0 = 113,
+ V1SUBUC_RRR_0_OPCODE_X1 = 71,
+ V1SUB_RRR_0_OPCODE_X0 = 114,
+ V1SUB_RRR_0_OPCODE_X1 = 72,
+ V2ADDI_IMM8_OPCODE_X0 = 14,
+ V2ADDI_IMM8_OPCODE_X1 = 39,
+ V2ADDSC_RRR_0_OPCODE_X0 = 115,
+ V2ADDSC_RRR_0_OPCODE_X1 = 73,
+ V2ADD_RRR_0_OPCODE_X0 = 116,
+ V2ADD_RRR_0_OPCODE_X1 = 74,
+ V2ADIFFS_RRR_0_OPCODE_X0 = 117,
+ V2AVGS_RRR_0_OPCODE_X0 = 118,
+ V2CMPEQI_IMM8_OPCODE_X0 = 15,
+ V2CMPEQI_IMM8_OPCODE_X1 = 40,
+ V2CMPEQ_RRR_0_OPCODE_X0 = 119,
+ V2CMPEQ_RRR_0_OPCODE_X1 = 75,
+ V2CMPLES_RRR_0_OPCODE_X0 = 120,
+ V2CMPLES_RRR_0_OPCODE_X1 = 76,
+ V2CMPLEU_RRR_0_OPCODE_X0 = 121,
+ V2CMPLEU_RRR_0_OPCODE_X1 = 77,
+ V2CMPLTSI_IMM8_OPCODE_X0 = 16,
+ V2CMPLTSI_IMM8_OPCODE_X1 = 41,
+ V2CMPLTS_RRR_0_OPCODE_X0 = 122,
+ V2CMPLTS_RRR_0_OPCODE_X1 = 78,
+ V2CMPLTUI_IMM8_OPCODE_X0 = 17,
+ V2CMPLTUI_IMM8_OPCODE_X1 = 42,
+ V2CMPLTU_RRR_0_OPCODE_X0 = 123,
+ V2CMPLTU_RRR_0_OPCODE_X1 = 79,
+ V2CMPNE_RRR_0_OPCODE_X0 = 124,
+ V2CMPNE_RRR_0_OPCODE_X1 = 80,
+ V2DOTPA_RRR_0_OPCODE_X0 = 125,
+ V2DOTP_RRR_0_OPCODE_X0 = 126,
+ V2INT_H_RRR_0_OPCODE_X0 = 127,
+ V2INT_H_RRR_0_OPCODE_X1 = 81,
+ V2INT_L_RRR_0_OPCODE_X0 = 128,
+ V2INT_L_RRR_0_OPCODE_X1 = 82,
+ V2MAXSI_IMM8_OPCODE_X0 = 18,
+ V2MAXSI_IMM8_OPCODE_X1 = 43,
+ V2MAXS_RRR_0_OPCODE_X0 = 129,
+ V2MAXS_RRR_0_OPCODE_X1 = 83,
+ V2MINSI_IMM8_OPCODE_X0 = 19,
+ V2MINSI_IMM8_OPCODE_X1 = 44,
+ V2MINS_RRR_0_OPCODE_X0 = 130,
+ V2MINS_RRR_0_OPCODE_X1 = 84,
+ V2MNZ_RRR_0_OPCODE_X0 = 131,
+ V2MNZ_RRR_0_OPCODE_X1 = 85,
+ V2MULFSC_RRR_0_OPCODE_X0 = 132,
+ V2MULS_RRR_0_OPCODE_X0 = 133,
+ V2MULTS_RRR_0_OPCODE_X0 = 134,
+ V2MZ_RRR_0_OPCODE_X0 = 135,
+ V2MZ_RRR_0_OPCODE_X1 = 86,
+ V2PACKH_RRR_0_OPCODE_X0 = 136,
+ V2PACKH_RRR_0_OPCODE_X1 = 87,
+ V2PACKL_RRR_0_OPCODE_X0 = 137,
+ V2PACKL_RRR_0_OPCODE_X1 = 88,
+ V2PACKUC_RRR_0_OPCODE_X0 = 138,
+ V2PACKUC_RRR_0_OPCODE_X1 = 89,
+ V2SADAS_RRR_0_OPCODE_X0 = 139,
+ V2SADAU_RRR_0_OPCODE_X0 = 140,
+ V2SADS_RRR_0_OPCODE_X0 = 141,
+ V2SADU_RRR_0_OPCODE_X0 = 142,
+ V2SHLI_SHIFT_OPCODE_X0 = 10,
+ V2SHLI_SHIFT_OPCODE_X1 = 10,
+ V2SHLSC_RRR_0_OPCODE_X0 = 143,
+ V2SHLSC_RRR_0_OPCODE_X1 = 90,
+ V2SHL_RRR_0_OPCODE_X0 = 144,
+ V2SHL_RRR_0_OPCODE_X1 = 91,
+ V2SHRSI_SHIFT_OPCODE_X0 = 11,
+ V2SHRSI_SHIFT_OPCODE_X1 = 11,
+ V2SHRS_RRR_0_OPCODE_X0 = 145,
+ V2SHRS_RRR_0_OPCODE_X1 = 92,
+ V2SHRUI_SHIFT_OPCODE_X0 = 12,
+ V2SHRUI_SHIFT_OPCODE_X1 = 12,
+ V2SHRU_RRR_0_OPCODE_X0 = 146,
+ V2SHRU_RRR_0_OPCODE_X1 = 93,
+ V2SUBSC_RRR_0_OPCODE_X0 = 147,
+ V2SUBSC_RRR_0_OPCODE_X1 = 94,
+ V2SUB_RRR_0_OPCODE_X0 = 148,
+ V2SUB_RRR_0_OPCODE_X1 = 95,
+ V4ADDSC_RRR_0_OPCODE_X0 = 149,
+ V4ADDSC_RRR_0_OPCODE_X1 = 96,
+ V4ADD_RRR_0_OPCODE_X0 = 150,
+ V4ADD_RRR_0_OPCODE_X1 = 97,
+ V4INT_H_RRR_0_OPCODE_X0 = 151,
+ V4INT_H_RRR_0_OPCODE_X1 = 98,
+ V4INT_L_RRR_0_OPCODE_X0 = 152,
+ V4INT_L_RRR_0_OPCODE_X1 = 99,
+ V4PACKSC_RRR_0_OPCODE_X0 = 153,
+ V4PACKSC_RRR_0_OPCODE_X1 = 100,
+ V4SHLSC_RRR_0_OPCODE_X0 = 154,
+ V4SHLSC_RRR_0_OPCODE_X1 = 101,
+ V4SHL_RRR_0_OPCODE_X0 = 155,
+ V4SHL_RRR_0_OPCODE_X1 = 102,
+ V4SHRS_RRR_0_OPCODE_X0 = 156,
+ V4SHRS_RRR_0_OPCODE_X1 = 103,
+ V4SHRU_RRR_0_OPCODE_X0 = 157,
+ V4SHRU_RRR_0_OPCODE_X1 = 104,
+ V4SUBSC_RRR_0_OPCODE_X0 = 158,
+ V4SUBSC_RRR_0_OPCODE_X1 = 105,
+ V4SUB_RRR_0_OPCODE_X0 = 159,
+ V4SUB_RRR_0_OPCODE_X1 = 106,
+ WH64_UNARY_OPCODE_X1 = 38,
+ XORI_IMM8_OPCODE_X0 = 20,
+ XORI_IMM8_OPCODE_X1 = 45,
+ XOR_RRR_0_OPCODE_X0 = 160,
+ XOR_RRR_0_OPCODE_X1 = 107,
+ XOR_RRR_5_OPCODE_Y0 = 3,
+ XOR_RRR_5_OPCODE_Y1 = 3
+};
+
+
+#endif /* __ASSEMBLER__ */
+
+#endif /* __ARCH_OPCODE_H__ */
diff --git a/arch/tile/include/arch/opcode_tilepro.h b/arch/tile/include/arch/opcode_tilepro.h
new file mode 100644
index 00000000..71b763b8
--- /dev/null
+++ b/arch/tile/include/arch/opcode_tilepro.h
@@ -0,0 +1,1471 @@
+/* TILEPro opcode information.
+ *
+ * Copyright 2011 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ *
+ *
+ *
+ *
+ *
+ */
+
+#ifndef __ARCH_OPCODE_H__
+#define __ARCH_OPCODE_H__
+
+#ifndef __ASSEMBLER__
+
+typedef unsigned long long tilepro_bundle_bits;
+
+/* This is the bit that determines if a bundle is in the Y encoding. */
+#define TILEPRO_BUNDLE_Y_ENCODING_MASK ((tilepro_bundle_bits)1 << 63)
+
+enum
+{
+ /* Maximum number of instructions in a bundle (2 for X, 3 for Y). */
+ TILEPRO_MAX_INSTRUCTIONS_PER_BUNDLE = 3,
+
+ /* How many different pipeline encodings are there? X0, X1, Y0, Y1, Y2. */
+ TILEPRO_NUM_PIPELINE_ENCODINGS = 5,
+
+ /* Log base 2 of TILEPRO_BUNDLE_SIZE_IN_BYTES. */
+ TILEPRO_LOG2_BUNDLE_SIZE_IN_BYTES = 3,
+
+ /* Instructions take this many bytes. */
+ TILEPRO_BUNDLE_SIZE_IN_BYTES = 1 << TILEPRO_LOG2_BUNDLE_SIZE_IN_BYTES,
+
+ /* Log base 2 of TILEPRO_BUNDLE_ALIGNMENT_IN_BYTES. */
+ TILEPRO_LOG2_BUNDLE_ALIGNMENT_IN_BYTES = 3,
+
+ /* Bundles should be aligned modulo this number of bytes. */
+ TILEPRO_BUNDLE_ALIGNMENT_IN_BYTES =
+ (1 << TILEPRO_LOG2_BUNDLE_ALIGNMENT_IN_BYTES),
+
+ /* Log base 2 of TILEPRO_SN_INSTRUCTION_SIZE_IN_BYTES. */
+ TILEPRO_LOG2_SN_INSTRUCTION_SIZE_IN_BYTES = 1,
+
+ /* Static network instructions take this many bytes. */
+ TILEPRO_SN_INSTRUCTION_SIZE_IN_BYTES =
+ (1 << TILEPRO_LOG2_SN_INSTRUCTION_SIZE_IN_BYTES),
+
+ /* Number of registers (some are magic, such as network I/O). */
+ TILEPRO_NUM_REGISTERS = 64,
+
+ /* Number of static network registers. */
+ TILEPRO_NUM_SN_REGISTERS = 4
+};
+
+/* Make a few "tile_" variables to simplify common code between
+ architectures. */
+
+typedef tilepro_bundle_bits tile_bundle_bits;
+#define TILE_BUNDLE_SIZE_IN_BYTES TILEPRO_BUNDLE_SIZE_IN_BYTES
+#define TILE_BUNDLE_ALIGNMENT_IN_BYTES TILEPRO_BUNDLE_ALIGNMENT_IN_BYTES
+#define TILE_LOG2_BUNDLE_ALIGNMENT_IN_BYTES \
+ TILEPRO_LOG2_BUNDLE_ALIGNMENT_IN_BYTES
+
+/* 64-bit pattern for a { bpt ; nop } bundle. */
+#define TILEPRO_BPT_BUNDLE 0x400b3cae70166000ULL
+
+static __inline unsigned int
+get_BrOff_SN(tilepro_bundle_bits num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((n >> 0)) & 0x3ff);
+}
+
+static __inline unsigned int
+get_BrOff_X1(tilepro_bundle_bits n)
+{
+ return (((unsigned int)(n >> 43)) & 0x00007fff) |
+ (((unsigned int)(n >> 20)) & 0x00018000);
+}
+
+static __inline unsigned int
+get_BrType_X1(tilepro_bundle_bits n)
+{
+ return (((unsigned int)(n >> 31)) & 0xf);
+}
+
+static __inline unsigned int
+get_Dest_Imm8_X1(tilepro_bundle_bits n)
+{
+ return (((unsigned int)(n >> 31)) & 0x0000003f) |
+ (((unsigned int)(n >> 43)) & 0x000000c0);
+}
+
+static __inline unsigned int
+get_Dest_SN(tilepro_bundle_bits num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((n >> 2)) & 0x3);
+}
+
+static __inline unsigned int
+get_Dest_X0(tilepro_bundle_bits num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((n >> 0)) & 0x3f);
+}
+
+static __inline unsigned int
+get_Dest_X1(tilepro_bundle_bits n)
+{
+ return (((unsigned int)(n >> 31)) & 0x3f);
+}
+
+static __inline unsigned int
+get_Dest_Y0(tilepro_bundle_bits num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((n >> 0)) & 0x3f);
+}
+
+static __inline unsigned int
+get_Dest_Y1(tilepro_bundle_bits n)
+{
+ return (((unsigned int)(n >> 31)) & 0x3f);
+}
+
+static __inline unsigned int
+get_Imm16_X0(tilepro_bundle_bits num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((n >> 12)) & 0xffff);
+}
+
+static __inline unsigned int
+get_Imm16_X1(tilepro_bundle_bits n)
+{
+ return (((unsigned int)(n >> 43)) & 0xffff);
+}
+
+static __inline unsigned int
+get_Imm8_SN(tilepro_bundle_bits num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((n >> 0)) & 0xff);
+}
+
+static __inline unsigned int
+get_Imm8_X0(tilepro_bundle_bits num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((n >> 12)) & 0xff);
+}
+
+static __inline unsigned int
+get_Imm8_X1(tilepro_bundle_bits n)
+{
+ return (((unsigned int)(n >> 43)) & 0xff);
+}
+
+static __inline unsigned int
+get_Imm8_Y0(tilepro_bundle_bits num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((n >> 12)) & 0xff);
+}
+
+static __inline unsigned int
+get_Imm8_Y1(tilepro_bundle_bits n)
+{
+ return (((unsigned int)(n >> 43)) & 0xff);
+}
+
+static __inline unsigned int
+get_ImmOpcodeExtension_X0(tilepro_bundle_bits num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((n >> 20)) & 0x7f);
+}
+
+static __inline unsigned int
+get_ImmOpcodeExtension_X1(tilepro_bundle_bits n)
+{
+ return (((unsigned int)(n >> 51)) & 0x7f);
+}
+
+static __inline unsigned int
+get_ImmRROpcodeExtension_SN(tilepro_bundle_bits num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((n >> 8)) & 0x3);
+}
+
+static __inline unsigned int
+get_JOffLong_X1(tilepro_bundle_bits n)
+{
+ return (((unsigned int)(n >> 43)) & 0x00007fff) |
+ (((unsigned int)(n >> 20)) & 0x00018000) |
+ (((unsigned int)(n >> 14)) & 0x001e0000) |
+ (((unsigned int)(n >> 16)) & 0x07e00000) |
+ (((unsigned int)(n >> 31)) & 0x18000000);
+}
+
+static __inline unsigned int
+get_JOff_X1(tilepro_bundle_bits n)
+{
+ return (((unsigned int)(n >> 43)) & 0x00007fff) |
+ (((unsigned int)(n >> 20)) & 0x00018000) |
+ (((unsigned int)(n >> 14)) & 0x001e0000) |
+ (((unsigned int)(n >> 16)) & 0x07e00000) |
+ (((unsigned int)(n >> 31)) & 0x08000000);
+}
+
+static __inline unsigned int
+get_MF_Imm15_X1(tilepro_bundle_bits n)
+{
+ return (((unsigned int)(n >> 37)) & 0x00003fff) |
+ (((unsigned int)(n >> 44)) & 0x00004000);
+}
+
+static __inline unsigned int
+get_MMEnd_X0(tilepro_bundle_bits num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((n >> 18)) & 0x1f);
+}
+
+static __inline unsigned int
+get_MMEnd_X1(tilepro_bundle_bits n)
+{
+ return (((unsigned int)(n >> 49)) & 0x1f);
+}
+
+static __inline unsigned int
+get_MMStart_X0(tilepro_bundle_bits num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((n >> 23)) & 0x1f);
+}
+
+static __inline unsigned int
+get_MMStart_X1(tilepro_bundle_bits n)
+{
+ return (((unsigned int)(n >> 54)) & 0x1f);
+}
+
+static __inline unsigned int
+get_MT_Imm15_X1(tilepro_bundle_bits n)
+{
+ return (((unsigned int)(n >> 31)) & 0x0000003f) |
+ (((unsigned int)(n >> 37)) & 0x00003fc0) |
+ (((unsigned int)(n >> 44)) & 0x00004000);
+}
+
+static __inline unsigned int
+get_Mode(tilepro_bundle_bits n)
+{
+ return (((unsigned int)(n >> 63)) & 0x1);
+}
+
+static __inline unsigned int
+get_NoRegOpcodeExtension_SN(tilepro_bundle_bits num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((n >> 0)) & 0xf);
+}
+
+static __inline unsigned int
+get_Opcode_SN(tilepro_bundle_bits num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((n >> 10)) & 0x3f);
+}
+
+static __inline unsigned int
+get_Opcode_X0(tilepro_bundle_bits num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((n >> 28)) & 0x7);
+}
+
+static __inline unsigned int
+get_Opcode_X1(tilepro_bundle_bits n)
+{
+ return (((unsigned int)(n >> 59)) & 0xf);
+}
+
+static __inline unsigned int
+get_Opcode_Y0(tilepro_bundle_bits num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((n >> 27)) & 0xf);
+}
+
+static __inline unsigned int
+get_Opcode_Y1(tilepro_bundle_bits n)
+{
+ return (((unsigned int)(n >> 59)) & 0xf);
+}
+
+static __inline unsigned int
+get_Opcode_Y2(tilepro_bundle_bits n)
+{
+ return (((unsigned int)(n >> 56)) & 0x7);
+}
+
+static __inline unsigned int
+get_RROpcodeExtension_SN(tilepro_bundle_bits num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((n >> 4)) & 0xf);
+}
+
+static __inline unsigned int
+get_RRROpcodeExtension_X0(tilepro_bundle_bits num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((n >> 18)) & 0x1ff);
+}
+
+static __inline unsigned int
+get_RRROpcodeExtension_X1(tilepro_bundle_bits n)
+{
+ return (((unsigned int)(n >> 49)) & 0x1ff);
+}
+
+static __inline unsigned int
+get_RRROpcodeExtension_Y0(tilepro_bundle_bits num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((n >> 18)) & 0x3);
+}
+
+static __inline unsigned int
+get_RRROpcodeExtension_Y1(tilepro_bundle_bits n)
+{
+ return (((unsigned int)(n >> 49)) & 0x3);
+}
+
+static __inline unsigned int
+get_RouteOpcodeExtension_SN(tilepro_bundle_bits num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((n >> 0)) & 0x3ff);
+}
+
+static __inline unsigned int
+get_S_X0(tilepro_bundle_bits num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((n >> 27)) & 0x1);
+}
+
+static __inline unsigned int
+get_S_X1(tilepro_bundle_bits n)
+{
+ return (((unsigned int)(n >> 58)) & 0x1);
+}
+
+static __inline unsigned int
+get_ShAmt_X0(tilepro_bundle_bits num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((n >> 12)) & 0x1f);
+}
+
+static __inline unsigned int
+get_ShAmt_X1(tilepro_bundle_bits n)
+{
+ return (((unsigned int)(n >> 43)) & 0x1f);
+}
+
+static __inline unsigned int
+get_ShAmt_Y0(tilepro_bundle_bits num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((n >> 12)) & 0x1f);
+}
+
+static __inline unsigned int
+get_ShAmt_Y1(tilepro_bundle_bits n)
+{
+ return (((unsigned int)(n >> 43)) & 0x1f);
+}
+
+static __inline unsigned int
+get_SrcA_X0(tilepro_bundle_bits num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((n >> 6)) & 0x3f);
+}
+
+static __inline unsigned int
+get_SrcA_X1(tilepro_bundle_bits n)
+{
+ return (((unsigned int)(n >> 37)) & 0x3f);
+}
+
+static __inline unsigned int
+get_SrcA_Y0(tilepro_bundle_bits num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((n >> 6)) & 0x3f);
+}
+
+static __inline unsigned int
+get_SrcA_Y1(tilepro_bundle_bits n)
+{
+ return (((unsigned int)(n >> 37)) & 0x3f);
+}
+
+static __inline unsigned int
+get_SrcA_Y2(tilepro_bundle_bits n)
+{
+ return (((n >> 26)) & 0x00000001) |
+ (((unsigned int)(n >> 50)) & 0x0000003e);
+}
+
+static __inline unsigned int
+get_SrcBDest_Y2(tilepro_bundle_bits num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((n >> 20)) & 0x3f);
+}
+
+static __inline unsigned int
+get_SrcB_X0(tilepro_bundle_bits num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((n >> 12)) & 0x3f);
+}
+
+static __inline unsigned int
+get_SrcB_X1(tilepro_bundle_bits n)
+{
+ return (((unsigned int)(n >> 43)) & 0x3f);
+}
+
+static __inline unsigned int
+get_SrcB_Y0(tilepro_bundle_bits num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((n >> 12)) & 0x3f);
+}
+
+static __inline unsigned int
+get_SrcB_Y1(tilepro_bundle_bits n)
+{
+ return (((unsigned int)(n >> 43)) & 0x3f);
+}
+
+static __inline unsigned int
+get_Src_SN(tilepro_bundle_bits num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((n >> 0)) & 0x3);
+}
+
+static __inline unsigned int
+get_UnOpcodeExtension_X0(tilepro_bundle_bits num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((n >> 12)) & 0x1f);
+}
+
+static __inline unsigned int
+get_UnOpcodeExtension_X1(tilepro_bundle_bits n)
+{
+ return (((unsigned int)(n >> 43)) & 0x1f);
+}
+
+static __inline unsigned int
+get_UnOpcodeExtension_Y0(tilepro_bundle_bits num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((n >> 12)) & 0x1f);
+}
+
+static __inline unsigned int
+get_UnOpcodeExtension_Y1(tilepro_bundle_bits n)
+{
+ return (((unsigned int)(n >> 43)) & 0x1f);
+}
+
+static __inline unsigned int
+get_UnShOpcodeExtension_X0(tilepro_bundle_bits num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((n >> 17)) & 0x3ff);
+}
+
+static __inline unsigned int
+get_UnShOpcodeExtension_X1(tilepro_bundle_bits n)
+{
+ return (((unsigned int)(n >> 48)) & 0x3ff);
+}
+
+static __inline unsigned int
+get_UnShOpcodeExtension_Y0(tilepro_bundle_bits num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((n >> 17)) & 0x7);
+}
+
+static __inline unsigned int
+get_UnShOpcodeExtension_Y1(tilepro_bundle_bits n)
+{
+ return (((unsigned int)(n >> 48)) & 0x7);
+}
+
+
+static __inline int
+sign_extend(int n, int num_bits)
+{
+ int shift = (int)(sizeof(int) * 8 - num_bits);
+ return (n << shift) >> shift;
+}
+
+
+
+static __inline tilepro_bundle_bits
+create_BrOff_SN(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return ((n & 0x3ff) << 0);
+}
+
+static __inline tilepro_bundle_bits
+create_BrOff_X1(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((tilepro_bundle_bits)(n & 0x00007fff)) << 43) |
+ (((tilepro_bundle_bits)(n & 0x00018000)) << 20);
+}
+
+static __inline tilepro_bundle_bits
+create_BrType_X1(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((tilepro_bundle_bits)(n & 0xf)) << 31);
+}
+
+static __inline tilepro_bundle_bits
+create_Dest_Imm8_X1(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((tilepro_bundle_bits)(n & 0x0000003f)) << 31) |
+ (((tilepro_bundle_bits)(n & 0x000000c0)) << 43);
+}
+
+static __inline tilepro_bundle_bits
+create_Dest_SN(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return ((n & 0x3) << 2);
+}
+
+static __inline tilepro_bundle_bits
+create_Dest_X0(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return ((n & 0x3f) << 0);
+}
+
+static __inline tilepro_bundle_bits
+create_Dest_X1(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((tilepro_bundle_bits)(n & 0x3f)) << 31);
+}
+
+static __inline tilepro_bundle_bits
+create_Dest_Y0(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return ((n & 0x3f) << 0);
+}
+
+static __inline tilepro_bundle_bits
+create_Dest_Y1(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((tilepro_bundle_bits)(n & 0x3f)) << 31);
+}
+
+static __inline tilepro_bundle_bits
+create_Imm16_X0(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return ((n & 0xffff) << 12);
+}
+
+static __inline tilepro_bundle_bits
+create_Imm16_X1(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((tilepro_bundle_bits)(n & 0xffff)) << 43);
+}
+
+static __inline tilepro_bundle_bits
+create_Imm8_SN(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return ((n & 0xff) << 0);
+}
+
+static __inline tilepro_bundle_bits
+create_Imm8_X0(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return ((n & 0xff) << 12);
+}
+
+static __inline tilepro_bundle_bits
+create_Imm8_X1(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((tilepro_bundle_bits)(n & 0xff)) << 43);
+}
+
+static __inline tilepro_bundle_bits
+create_Imm8_Y0(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return ((n & 0xff) << 12);
+}
+
+static __inline tilepro_bundle_bits
+create_Imm8_Y1(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((tilepro_bundle_bits)(n & 0xff)) << 43);
+}
+
+static __inline tilepro_bundle_bits
+create_ImmOpcodeExtension_X0(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return ((n & 0x7f) << 20);
+}
+
+static __inline tilepro_bundle_bits
+create_ImmOpcodeExtension_X1(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((tilepro_bundle_bits)(n & 0x7f)) << 51);
+}
+
+static __inline tilepro_bundle_bits
+create_ImmRROpcodeExtension_SN(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return ((n & 0x3) << 8);
+}
+
+static __inline tilepro_bundle_bits
+create_JOffLong_X1(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((tilepro_bundle_bits)(n & 0x00007fff)) << 43) |
+ (((tilepro_bundle_bits)(n & 0x00018000)) << 20) |
+ (((tilepro_bundle_bits)(n & 0x001e0000)) << 14) |
+ (((tilepro_bundle_bits)(n & 0x07e00000)) << 16) |
+ (((tilepro_bundle_bits)(n & 0x18000000)) << 31);
+}
+
+static __inline tilepro_bundle_bits
+create_JOff_X1(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((tilepro_bundle_bits)(n & 0x00007fff)) << 43) |
+ (((tilepro_bundle_bits)(n & 0x00018000)) << 20) |
+ (((tilepro_bundle_bits)(n & 0x001e0000)) << 14) |
+ (((tilepro_bundle_bits)(n & 0x07e00000)) << 16) |
+ (((tilepro_bundle_bits)(n & 0x08000000)) << 31);
+}
+
+static __inline tilepro_bundle_bits
+create_MF_Imm15_X1(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((tilepro_bundle_bits)(n & 0x00003fff)) << 37) |
+ (((tilepro_bundle_bits)(n & 0x00004000)) << 44);
+}
+
+static __inline tilepro_bundle_bits
+create_MMEnd_X0(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return ((n & 0x1f) << 18);
+}
+
+static __inline tilepro_bundle_bits
+create_MMEnd_X1(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((tilepro_bundle_bits)(n & 0x1f)) << 49);
+}
+
+static __inline tilepro_bundle_bits
+create_MMStart_X0(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return ((n & 0x1f) << 23);
+}
+
+static __inline tilepro_bundle_bits
+create_MMStart_X1(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((tilepro_bundle_bits)(n & 0x1f)) << 54);
+}
+
+static __inline tilepro_bundle_bits
+create_MT_Imm15_X1(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((tilepro_bundle_bits)(n & 0x0000003f)) << 31) |
+ (((tilepro_bundle_bits)(n & 0x00003fc0)) << 37) |
+ (((tilepro_bundle_bits)(n & 0x00004000)) << 44);
+}
+
+static __inline tilepro_bundle_bits
+create_Mode(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((tilepro_bundle_bits)(n & 0x1)) << 63);
+}
+
+static __inline tilepro_bundle_bits
+create_NoRegOpcodeExtension_SN(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return ((n & 0xf) << 0);
+}
+
+static __inline tilepro_bundle_bits
+create_Opcode_SN(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return ((n & 0x3f) << 10);
+}
+
+static __inline tilepro_bundle_bits
+create_Opcode_X0(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return ((n & 0x7) << 28);
+}
+
+static __inline tilepro_bundle_bits
+create_Opcode_X1(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((tilepro_bundle_bits)(n & 0xf)) << 59);
+}
+
+static __inline tilepro_bundle_bits
+create_Opcode_Y0(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return ((n & 0xf) << 27);
+}
+
+static __inline tilepro_bundle_bits
+create_Opcode_Y1(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((tilepro_bundle_bits)(n & 0xf)) << 59);
+}
+
+static __inline tilepro_bundle_bits
+create_Opcode_Y2(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((tilepro_bundle_bits)(n & 0x7)) << 56);
+}
+
+static __inline tilepro_bundle_bits
+create_RROpcodeExtension_SN(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return ((n & 0xf) << 4);
+}
+
+static __inline tilepro_bundle_bits
+create_RRROpcodeExtension_X0(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return ((n & 0x1ff) << 18);
+}
+
+static __inline tilepro_bundle_bits
+create_RRROpcodeExtension_X1(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((tilepro_bundle_bits)(n & 0x1ff)) << 49);
+}
+
+static __inline tilepro_bundle_bits
+create_RRROpcodeExtension_Y0(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return ((n & 0x3) << 18);
+}
+
+static __inline tilepro_bundle_bits
+create_RRROpcodeExtension_Y1(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((tilepro_bundle_bits)(n & 0x3)) << 49);
+}
+
+static __inline tilepro_bundle_bits
+create_RouteOpcodeExtension_SN(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return ((n & 0x3ff) << 0);
+}
+
+static __inline tilepro_bundle_bits
+create_S_X0(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return ((n & 0x1) << 27);
+}
+
+static __inline tilepro_bundle_bits
+create_S_X1(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((tilepro_bundle_bits)(n & 0x1)) << 58);
+}
+
+static __inline tilepro_bundle_bits
+create_ShAmt_X0(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return ((n & 0x1f) << 12);
+}
+
+static __inline tilepro_bundle_bits
+create_ShAmt_X1(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((tilepro_bundle_bits)(n & 0x1f)) << 43);
+}
+
+static __inline tilepro_bundle_bits
+create_ShAmt_Y0(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return ((n & 0x1f) << 12);
+}
+
+static __inline tilepro_bundle_bits
+create_ShAmt_Y1(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((tilepro_bundle_bits)(n & 0x1f)) << 43);
+}
+
+static __inline tilepro_bundle_bits
+create_SrcA_X0(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return ((n & 0x3f) << 6);
+}
+
+static __inline tilepro_bundle_bits
+create_SrcA_X1(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((tilepro_bundle_bits)(n & 0x3f)) << 37);
+}
+
+static __inline tilepro_bundle_bits
+create_SrcA_Y0(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return ((n & 0x3f) << 6);
+}
+
+static __inline tilepro_bundle_bits
+create_SrcA_Y1(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((tilepro_bundle_bits)(n & 0x3f)) << 37);
+}
+
+static __inline tilepro_bundle_bits
+create_SrcA_Y2(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return ((n & 0x00000001) << 26) |
+ (((tilepro_bundle_bits)(n & 0x0000003e)) << 50);
+}
+
+static __inline tilepro_bundle_bits
+create_SrcBDest_Y2(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return ((n & 0x3f) << 20);
+}
+
+static __inline tilepro_bundle_bits
+create_SrcB_X0(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return ((n & 0x3f) << 12);
+}
+
+static __inline tilepro_bundle_bits
+create_SrcB_X1(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((tilepro_bundle_bits)(n & 0x3f)) << 43);
+}
+
+static __inline tilepro_bundle_bits
+create_SrcB_Y0(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return ((n & 0x3f) << 12);
+}
+
+static __inline tilepro_bundle_bits
+create_SrcB_Y1(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((tilepro_bundle_bits)(n & 0x3f)) << 43);
+}
+
+static __inline tilepro_bundle_bits
+create_Src_SN(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return ((n & 0x3) << 0);
+}
+
+static __inline tilepro_bundle_bits
+create_UnOpcodeExtension_X0(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return ((n & 0x1f) << 12);
+}
+
+static __inline tilepro_bundle_bits
+create_UnOpcodeExtension_X1(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((tilepro_bundle_bits)(n & 0x1f)) << 43);
+}
+
+static __inline tilepro_bundle_bits
+create_UnOpcodeExtension_Y0(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return ((n & 0x1f) << 12);
+}
+
+static __inline tilepro_bundle_bits
+create_UnOpcodeExtension_Y1(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((tilepro_bundle_bits)(n & 0x1f)) << 43);
+}
+
+static __inline tilepro_bundle_bits
+create_UnShOpcodeExtension_X0(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return ((n & 0x3ff) << 17);
+}
+
+static __inline tilepro_bundle_bits
+create_UnShOpcodeExtension_X1(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((tilepro_bundle_bits)(n & 0x3ff)) << 48);
+}
+
+static __inline tilepro_bundle_bits
+create_UnShOpcodeExtension_Y0(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return ((n & 0x7) << 17);
+}
+
+static __inline tilepro_bundle_bits
+create_UnShOpcodeExtension_Y1(int num)
+{
+ const unsigned int n = (unsigned int)num;
+ return (((tilepro_bundle_bits)(n & 0x7)) << 48);
+}
+
+
+enum
+{
+ ADDBS_U_SPECIAL_0_OPCODE_X0 = 98,
+ ADDBS_U_SPECIAL_0_OPCODE_X1 = 68,
+ ADDB_SPECIAL_0_OPCODE_X0 = 1,
+ ADDB_SPECIAL_0_OPCODE_X1 = 1,
+ ADDHS_SPECIAL_0_OPCODE_X0 = 99,
+ ADDHS_SPECIAL_0_OPCODE_X1 = 69,
+ ADDH_SPECIAL_0_OPCODE_X0 = 2,
+ ADDH_SPECIAL_0_OPCODE_X1 = 2,
+ ADDIB_IMM_0_OPCODE_X0 = 1,
+ ADDIB_IMM_0_OPCODE_X1 = 1,
+ ADDIH_IMM_0_OPCODE_X0 = 2,
+ ADDIH_IMM_0_OPCODE_X1 = 2,
+ ADDI_IMM_0_OPCODE_X0 = 3,
+ ADDI_IMM_0_OPCODE_X1 = 3,
+ ADDI_IMM_1_OPCODE_SN = 1,
+ ADDI_OPCODE_Y0 = 9,
+ ADDI_OPCODE_Y1 = 7,
+ ADDLIS_OPCODE_X0 = 1,
+ ADDLIS_OPCODE_X1 = 2,
+ ADDLI_OPCODE_X0 = 2,
+ ADDLI_OPCODE_X1 = 3,
+ ADDS_SPECIAL_0_OPCODE_X0 = 96,
+ ADDS_SPECIAL_0_OPCODE_X1 = 66,
+ ADD_SPECIAL_0_OPCODE_X0 = 3,
+ ADD_SPECIAL_0_OPCODE_X1 = 3,
+ ADD_SPECIAL_0_OPCODE_Y0 = 0,
+ ADD_SPECIAL_0_OPCODE_Y1 = 0,
+ ADIFFB_U_SPECIAL_0_OPCODE_X0 = 4,
+ ADIFFH_SPECIAL_0_OPCODE_X0 = 5,
+ ANDI_IMM_0_OPCODE_X0 = 1,
+ ANDI_IMM_0_OPCODE_X1 = 4,
+ ANDI_OPCODE_Y0 = 10,
+ ANDI_OPCODE_Y1 = 8,
+ AND_SPECIAL_0_OPCODE_X0 = 6,
+ AND_SPECIAL_0_OPCODE_X1 = 4,
+ AND_SPECIAL_2_OPCODE_Y0 = 0,
+ AND_SPECIAL_2_OPCODE_Y1 = 0,
+ AULI_OPCODE_X0 = 3,
+ AULI_OPCODE_X1 = 4,
+ AVGB_U_SPECIAL_0_OPCODE_X0 = 7,
+ AVGH_SPECIAL_0_OPCODE_X0 = 8,
+ BBNST_BRANCH_OPCODE_X1 = 15,
+ BBNS_BRANCH_OPCODE_X1 = 14,
+ BBNS_OPCODE_SN = 63,
+ BBST_BRANCH_OPCODE_X1 = 13,
+ BBS_BRANCH_OPCODE_X1 = 12,
+ BBS_OPCODE_SN = 62,
+ BGEZT_BRANCH_OPCODE_X1 = 7,
+ BGEZ_BRANCH_OPCODE_X1 = 6,
+ BGEZ_OPCODE_SN = 61,
+ BGZT_BRANCH_OPCODE_X1 = 5,
+ BGZ_BRANCH_OPCODE_X1 = 4,
+ BGZ_OPCODE_SN = 58,
+ BITX_UN_0_SHUN_0_OPCODE_X0 = 1,
+ BITX_UN_0_SHUN_0_OPCODE_Y0 = 1,
+ BLEZT_BRANCH_OPCODE_X1 = 11,
+ BLEZ_BRANCH_OPCODE_X1 = 10,
+ BLEZ_OPCODE_SN = 59,
+ BLZT_BRANCH_OPCODE_X1 = 9,
+ BLZ_BRANCH_OPCODE_X1 = 8,
+ BLZ_OPCODE_SN = 60,
+ BNZT_BRANCH_OPCODE_X1 = 3,
+ BNZ_BRANCH_OPCODE_X1 = 2,
+ BNZ_OPCODE_SN = 57,
+ BPT_NOREG_RR_IMM_0_OPCODE_SN = 1,
+ BRANCH_OPCODE_X1 = 5,
+ BYTEX_UN_0_SHUN_0_OPCODE_X0 = 2,
+ BYTEX_UN_0_SHUN_0_OPCODE_Y0 = 2,
+ BZT_BRANCH_OPCODE_X1 = 1,
+ BZ_BRANCH_OPCODE_X1 = 0,
+ BZ_OPCODE_SN = 56,
+ CLZ_UN_0_SHUN_0_OPCODE_X0 = 3,
+ CLZ_UN_0_SHUN_0_OPCODE_Y0 = 3,
+ CRC32_32_SPECIAL_0_OPCODE_X0 = 9,
+ CRC32_8_SPECIAL_0_OPCODE_X0 = 10,
+ CTZ_UN_0_SHUN_0_OPCODE_X0 = 4,
+ CTZ_UN_0_SHUN_0_OPCODE_Y0 = 4,
+ DRAIN_UN_0_SHUN_0_OPCODE_X1 = 1,
+ DTLBPR_UN_0_SHUN_0_OPCODE_X1 = 2,
+ DWORD_ALIGN_SPECIAL_0_OPCODE_X0 = 95,
+ FINV_UN_0_SHUN_0_OPCODE_X1 = 3,
+ FLUSH_UN_0_SHUN_0_OPCODE_X1 = 4,
+ FNOP_NOREG_RR_IMM_0_OPCODE_SN = 3,
+ FNOP_UN_0_SHUN_0_OPCODE_X0 = 5,
+ FNOP_UN_0_SHUN_0_OPCODE_X1 = 5,
+ FNOP_UN_0_SHUN_0_OPCODE_Y0 = 5,
+ FNOP_UN_0_SHUN_0_OPCODE_Y1 = 1,
+ HALT_NOREG_RR_IMM_0_OPCODE_SN = 0,
+ ICOH_UN_0_SHUN_0_OPCODE_X1 = 6,
+ ILL_UN_0_SHUN_0_OPCODE_X1 = 7,
+ ILL_UN_0_SHUN_0_OPCODE_Y1 = 2,
+ IMM_0_OPCODE_SN = 0,
+ IMM_0_OPCODE_X0 = 4,
+ IMM_0_OPCODE_X1 = 6,
+ IMM_1_OPCODE_SN = 1,
+ IMM_OPCODE_0_X0 = 5,
+ INTHB_SPECIAL_0_OPCODE_X0 = 11,
+ INTHB_SPECIAL_0_OPCODE_X1 = 5,
+ INTHH_SPECIAL_0_OPCODE_X0 = 12,
+ INTHH_SPECIAL_0_OPCODE_X1 = 6,
+ INTLB_SPECIAL_0_OPCODE_X0 = 13,
+ INTLB_SPECIAL_0_OPCODE_X1 = 7,
+ INTLH_SPECIAL_0_OPCODE_X0 = 14,
+ INTLH_SPECIAL_0_OPCODE_X1 = 8,
+ INV_UN_0_SHUN_0_OPCODE_X1 = 8,
+ IRET_UN_0_SHUN_0_OPCODE_X1 = 9,
+ JALB_OPCODE_X1 = 13,
+ JALF_OPCODE_X1 = 12,
+ JALRP_SPECIAL_0_OPCODE_X1 = 9,
+ JALRR_IMM_1_OPCODE_SN = 3,
+ JALR_RR_IMM_0_OPCODE_SN = 5,
+ JALR_SPECIAL_0_OPCODE_X1 = 10,
+ JB_OPCODE_X1 = 11,
+ JF_OPCODE_X1 = 10,
+ JRP_SPECIAL_0_OPCODE_X1 = 11,
+ JRR_IMM_1_OPCODE_SN = 2,
+ JR_RR_IMM_0_OPCODE_SN = 4,
+ JR_SPECIAL_0_OPCODE_X1 = 12,
+ LBADD_IMM_0_OPCODE_X1 = 22,
+ LBADD_U_IMM_0_OPCODE_X1 = 23,
+ LB_OPCODE_Y2 = 0,
+ LB_UN_0_SHUN_0_OPCODE_X1 = 10,
+ LB_U_OPCODE_Y2 = 1,
+ LB_U_UN_0_SHUN_0_OPCODE_X1 = 11,
+ LHADD_IMM_0_OPCODE_X1 = 24,
+ LHADD_U_IMM_0_OPCODE_X1 = 25,
+ LH_OPCODE_Y2 = 2,
+ LH_UN_0_SHUN_0_OPCODE_X1 = 12,
+ LH_U_OPCODE_Y2 = 3,
+ LH_U_UN_0_SHUN_0_OPCODE_X1 = 13,
+ LNK_SPECIAL_0_OPCODE_X1 = 13,
+ LWADD_IMM_0_OPCODE_X1 = 26,
+ LWADD_NA_IMM_0_OPCODE_X1 = 27,
+ LW_NA_UN_0_SHUN_0_OPCODE_X1 = 24,
+ LW_OPCODE_Y2 = 4,
+ LW_UN_0_SHUN_0_OPCODE_X1 = 14,
+ MAXB_U_SPECIAL_0_OPCODE_X0 = 15,
+ MAXB_U_SPECIAL_0_OPCODE_X1 = 14,
+ MAXH_SPECIAL_0_OPCODE_X0 = 16,
+ MAXH_SPECIAL_0_OPCODE_X1 = 15,
+ MAXIB_U_IMM_0_OPCODE_X0 = 4,
+ MAXIB_U_IMM_0_OPCODE_X1 = 5,
+ MAXIH_IMM_0_OPCODE_X0 = 5,
+ MAXIH_IMM_0_OPCODE_X1 = 6,
+ MFSPR_IMM_0_OPCODE_X1 = 7,
+ MF_UN_0_SHUN_0_OPCODE_X1 = 15,
+ MINB_U_SPECIAL_0_OPCODE_X0 = 17,
+ MINB_U_SPECIAL_0_OPCODE_X1 = 16,
+ MINH_SPECIAL_0_OPCODE_X0 = 18,
+ MINH_SPECIAL_0_OPCODE_X1 = 17,
+ MINIB_U_IMM_0_OPCODE_X0 = 6,
+ MINIB_U_IMM_0_OPCODE_X1 = 8,
+ MINIH_IMM_0_OPCODE_X0 = 7,
+ MINIH_IMM_0_OPCODE_X1 = 9,
+ MM_OPCODE_X0 = 6,
+ MM_OPCODE_X1 = 7,
+ MNZB_SPECIAL_0_OPCODE_X0 = 19,
+ MNZB_SPECIAL_0_OPCODE_X1 = 18,
+ MNZH_SPECIAL_0_OPCODE_X0 = 20,
+ MNZH_SPECIAL_0_OPCODE_X1 = 19,
+ MNZ_SPECIAL_0_OPCODE_X0 = 21,
+ MNZ_SPECIAL_0_OPCODE_X1 = 20,
+ MNZ_SPECIAL_1_OPCODE_Y0 = 0,
+ MNZ_SPECIAL_1_OPCODE_Y1 = 1,
+ MOVEI_IMM_1_OPCODE_SN = 0,
+ MOVE_RR_IMM_0_OPCODE_SN = 8,
+ MTSPR_IMM_0_OPCODE_X1 = 10,
+ MULHHA_SS_SPECIAL_0_OPCODE_X0 = 22,
+ MULHHA_SS_SPECIAL_7_OPCODE_Y0 = 0,
+ MULHHA_SU_SPECIAL_0_OPCODE_X0 = 23,
+ MULHHA_UU_SPECIAL_0_OPCODE_X0 = 24,
+ MULHHA_UU_SPECIAL_7_OPCODE_Y0 = 1,
+ MULHHSA_UU_SPECIAL_0_OPCODE_X0 = 25,
+ MULHH_SS_SPECIAL_0_OPCODE_X0 = 26,
+ MULHH_SS_SPECIAL_6_OPCODE_Y0 = 0,
+ MULHH_SU_SPECIAL_0_OPCODE_X0 = 27,
+ MULHH_UU_SPECIAL_0_OPCODE_X0 = 28,
+ MULHH_UU_SPECIAL_6_OPCODE_Y0 = 1,
+ MULHLA_SS_SPECIAL_0_OPCODE_X0 = 29,
+ MULHLA_SU_SPECIAL_0_OPCODE_X0 = 30,
+ MULHLA_US_SPECIAL_0_OPCODE_X0 = 31,
+ MULHLA_UU_SPECIAL_0_OPCODE_X0 = 32,
+ MULHLSA_UU_SPECIAL_0_OPCODE_X0 = 33,
+ MULHLSA_UU_SPECIAL_5_OPCODE_Y0 = 0,
+ MULHL_SS_SPECIAL_0_OPCODE_X0 = 34,
+ MULHL_SU_SPECIAL_0_OPCODE_X0 = 35,
+ MULHL_US_SPECIAL_0_OPCODE_X0 = 36,
+ MULHL_UU_SPECIAL_0_OPCODE_X0 = 37,
+ MULLLA_SS_SPECIAL_0_OPCODE_X0 = 38,
+ MULLLA_SS_SPECIAL_7_OPCODE_Y0 = 2,
+ MULLLA_SU_SPECIAL_0_OPCODE_X0 = 39,
+ MULLLA_UU_SPECIAL_0_OPCODE_X0 = 40,
+ MULLLA_UU_SPECIAL_7_OPCODE_Y0 = 3,
+ MULLLSA_UU_SPECIAL_0_OPCODE_X0 = 41,
+ MULLL_SS_SPECIAL_0_OPCODE_X0 = 42,
+ MULLL_SS_SPECIAL_6_OPCODE_Y0 = 2,
+ MULLL_SU_SPECIAL_0_OPCODE_X0 = 43,
+ MULLL_UU_SPECIAL_0_OPCODE_X0 = 44,
+ MULLL_UU_SPECIAL_6_OPCODE_Y0 = 3,
+ MVNZ_SPECIAL_0_OPCODE_X0 = 45,
+ MVNZ_SPECIAL_1_OPCODE_Y0 = 1,
+ MVZ_SPECIAL_0_OPCODE_X0 = 46,
+ MVZ_SPECIAL_1_OPCODE_Y0 = 2,
+ MZB_SPECIAL_0_OPCODE_X0 = 47,
+ MZB_SPECIAL_0_OPCODE_X1 = 21,
+ MZH_SPECIAL_0_OPCODE_X0 = 48,
+ MZH_SPECIAL_0_OPCODE_X1 = 22,
+ MZ_SPECIAL_0_OPCODE_X0 = 49,
+ MZ_SPECIAL_0_OPCODE_X1 = 23,
+ MZ_SPECIAL_1_OPCODE_Y0 = 3,
+ MZ_SPECIAL_1_OPCODE_Y1 = 2,
+ NAP_UN_0_SHUN_0_OPCODE_X1 = 16,
+ NOP_NOREG_RR_IMM_0_OPCODE_SN = 2,
+ NOP_UN_0_SHUN_0_OPCODE_X0 = 6,
+ NOP_UN_0_SHUN_0_OPCODE_X1 = 17,
+ NOP_UN_0_SHUN_0_OPCODE_Y0 = 6,
+ NOP_UN_0_SHUN_0_OPCODE_Y1 = 3,
+ NOREG_RR_IMM_0_OPCODE_SN = 0,
+ NOR_SPECIAL_0_OPCODE_X0 = 50,
+ NOR_SPECIAL_0_OPCODE_X1 = 24,
+ NOR_SPECIAL_2_OPCODE_Y0 = 1,
+ NOR_SPECIAL_2_OPCODE_Y1 = 1,
+ ORI_IMM_0_OPCODE_X0 = 8,
+ ORI_IMM_0_OPCODE_X1 = 11,
+ ORI_OPCODE_Y0 = 11,
+ ORI_OPCODE_Y1 = 9,
+ OR_SPECIAL_0_OPCODE_X0 = 51,
+ OR_SPECIAL_0_OPCODE_X1 = 25,
+ OR_SPECIAL_2_OPCODE_Y0 = 2,
+ OR_SPECIAL_2_OPCODE_Y1 = 2,
+ PACKBS_U_SPECIAL_0_OPCODE_X0 = 103,
+ PACKBS_U_SPECIAL_0_OPCODE_X1 = 73,
+ PACKHB_SPECIAL_0_OPCODE_X0 = 52,
+ PACKHB_SPECIAL_0_OPCODE_X1 = 26,
+ PACKHS_SPECIAL_0_OPCODE_X0 = 102,
+ PACKHS_SPECIAL_0_OPCODE_X1 = 72,
+ PACKLB_SPECIAL_0_OPCODE_X0 = 53,
+ PACKLB_SPECIAL_0_OPCODE_X1 = 27,
+ PCNT_UN_0_SHUN_0_OPCODE_X0 = 7,
+ PCNT_UN_0_SHUN_0_OPCODE_Y0 = 7,
+ RLI_SHUN_0_OPCODE_X0 = 1,
+ RLI_SHUN_0_OPCODE_X1 = 1,
+ RLI_SHUN_0_OPCODE_Y0 = 1,
+ RLI_SHUN_0_OPCODE_Y1 = 1,
+ RL_SPECIAL_0_OPCODE_X0 = 54,
+ RL_SPECIAL_0_OPCODE_X1 = 28,
+ RL_SPECIAL_3_OPCODE_Y0 = 0,
+ RL_SPECIAL_3_OPCODE_Y1 = 0,
+ RR_IMM_0_OPCODE_SN = 0,
+ S1A_SPECIAL_0_OPCODE_X0 = 55,
+ S1A_SPECIAL_0_OPCODE_X1 = 29,
+ S1A_SPECIAL_0_OPCODE_Y0 = 1,
+ S1A_SPECIAL_0_OPCODE_Y1 = 1,
+ S2A_SPECIAL_0_OPCODE_X0 = 56,
+ S2A_SPECIAL_0_OPCODE_X1 = 30,
+ S2A_SPECIAL_0_OPCODE_Y0 = 2,
+ S2A_SPECIAL_0_OPCODE_Y1 = 2,
+ S3A_SPECIAL_0_OPCODE_X0 = 57,
+ S3A_SPECIAL_0_OPCODE_X1 = 31,
+ S3A_SPECIAL_5_OPCODE_Y0 = 1,
+ S3A_SPECIAL_5_OPCODE_Y1 = 1,
+ SADAB_U_SPECIAL_0_OPCODE_X0 = 58,
+ SADAH_SPECIAL_0_OPCODE_X0 = 59,
+ SADAH_U_SPECIAL_0_OPCODE_X0 = 60,
+ SADB_U_SPECIAL_0_OPCODE_X0 = 61,
+ SADH_SPECIAL_0_OPCODE_X0 = 62,
+ SADH_U_SPECIAL_0_OPCODE_X0 = 63,
+ SBADD_IMM_0_OPCODE_X1 = 28,
+ SB_OPCODE_Y2 = 5,
+ SB_SPECIAL_0_OPCODE_X1 = 32,
+ SEQB_SPECIAL_0_OPCODE_X0 = 64,
+ SEQB_SPECIAL_0_OPCODE_X1 = 33,
+ SEQH_SPECIAL_0_OPCODE_X0 = 65,
+ SEQH_SPECIAL_0_OPCODE_X1 = 34,
+ SEQIB_IMM_0_OPCODE_X0 = 9,
+ SEQIB_IMM_0_OPCODE_X1 = 12,
+ SEQIH_IMM_0_OPCODE_X0 = 10,
+ SEQIH_IMM_0_OPCODE_X1 = 13,
+ SEQI_IMM_0_OPCODE_X0 = 11,
+ SEQI_IMM_0_OPCODE_X1 = 14,
+ SEQI_OPCODE_Y0 = 12,
+ SEQI_OPCODE_Y1 = 10,
+ SEQ_SPECIAL_0_OPCODE_X0 = 66,
+ SEQ_SPECIAL_0_OPCODE_X1 = 35,
+ SEQ_SPECIAL_5_OPCODE_Y0 = 2,
+ SEQ_SPECIAL_5_OPCODE_Y1 = 2,
+ SHADD_IMM_0_OPCODE_X1 = 29,
+ SHL8II_IMM_0_OPCODE_SN = 3,
+ SHLB_SPECIAL_0_OPCODE_X0 = 67,
+ SHLB_SPECIAL_0_OPCODE_X1 = 36,
+ SHLH_SPECIAL_0_OPCODE_X0 = 68,
+ SHLH_SPECIAL_0_OPCODE_X1 = 37,
+ SHLIB_SHUN_0_OPCODE_X0 = 2,
+ SHLIB_SHUN_0_OPCODE_X1 = 2,
+ SHLIH_SHUN_0_OPCODE_X0 = 3,
+ SHLIH_SHUN_0_OPCODE_X1 = 3,
+ SHLI_SHUN_0_OPCODE_X0 = 4,
+ SHLI_SHUN_0_OPCODE_X1 = 4,
+ SHLI_SHUN_0_OPCODE_Y0 = 2,
+ SHLI_SHUN_0_OPCODE_Y1 = 2,
+ SHL_SPECIAL_0_OPCODE_X0 = 69,
+ SHL_SPECIAL_0_OPCODE_X1 = 38,
+ SHL_SPECIAL_3_OPCODE_Y0 = 1,
+ SHL_SPECIAL_3_OPCODE_Y1 = 1,
+ SHR1_RR_IMM_0_OPCODE_SN = 9,
+ SHRB_SPECIAL_0_OPCODE_X0 = 70,
+ SHRB_SPECIAL_0_OPCODE_X1 = 39,
+ SHRH_SPECIAL_0_OPCODE_X0 = 71,
+ SHRH_SPECIAL_0_OPCODE_X1 = 40,
+ SHRIB_SHUN_0_OPCODE_X0 = 5,
+ SHRIB_SHUN_0_OPCODE_X1 = 5,
+ SHRIH_SHUN_0_OPCODE_X0 = 6,
+ SHRIH_SHUN_0_OPCODE_X1 = 6,
+ SHRI_SHUN_0_OPCODE_X0 = 7,
+ SHRI_SHUN_0_OPCODE_X1 = 7,
+ SHRI_SHUN_0_OPCODE_Y0 = 3,
+ SHRI_SHUN_0_OPCODE_Y1 = 3,
+ SHR_SPECIAL_0_OPCODE_X0 = 72,
+ SHR_SPECIAL_0_OPCODE_X1 = 41,
+ SHR_SPECIAL_3_OPCODE_Y0 = 2,
+ SHR_SPECIAL_3_OPCODE_Y1 = 2,
+ SHUN_0_OPCODE_X0 = 7,
+ SHUN_0_OPCODE_X1 = 8,
+ SHUN_0_OPCODE_Y0 = 13,
+ SHUN_0_OPCODE_Y1 = 11,
+ SH_OPCODE_Y2 = 6,
+ SH_SPECIAL_0_OPCODE_X1 = 42,
+ SLTB_SPECIAL_0_OPCODE_X0 = 73,
+ SLTB_SPECIAL_0_OPCODE_X1 = 43,
+ SLTB_U_SPECIAL_0_OPCODE_X0 = 74,
+ SLTB_U_SPECIAL_0_OPCODE_X1 = 44,
+ SLTEB_SPECIAL_0_OPCODE_X0 = 75,
+ SLTEB_SPECIAL_0_OPCODE_X1 = 45,
+ SLTEB_U_SPECIAL_0_OPCODE_X0 = 76,
+ SLTEB_U_SPECIAL_0_OPCODE_X1 = 46,
+ SLTEH_SPECIAL_0_OPCODE_X0 = 77,
+ SLTEH_SPECIAL_0_OPCODE_X1 = 47,
+ SLTEH_U_SPECIAL_0_OPCODE_X0 = 78,
+ SLTEH_U_SPECIAL_0_OPCODE_X1 = 48,
+ SLTE_SPECIAL_0_OPCODE_X0 = 79,
+ SLTE_SPECIAL_0_OPCODE_X1 = 49,
+ SLTE_SPECIAL_4_OPCODE_Y0 = 0,
+ SLTE_SPECIAL_4_OPCODE_Y1 = 0,
+ SLTE_U_SPECIAL_0_OPCODE_X0 = 80,
+ SLTE_U_SPECIAL_0_OPCODE_X1 = 50,
+ SLTE_U_SPECIAL_4_OPCODE_Y0 = 1,
+ SLTE_U_SPECIAL_4_OPCODE_Y1 = 1,
+ SLTH_SPECIAL_0_OPCODE_X0 = 81,
+ SLTH_SPECIAL_0_OPCODE_X1 = 51,
+ SLTH_U_SPECIAL_0_OPCODE_X0 = 82,
+ SLTH_U_SPECIAL_0_OPCODE_X1 = 52,
+ SLTIB_IMM_0_OPCODE_X0 = 12,
+ SLTIB_IMM_0_OPCODE_X1 = 15,
+ SLTIB_U_IMM_0_OPCODE_X0 = 13,
+ SLTIB_U_IMM_0_OPCODE_X1 = 16,
+ SLTIH_IMM_0_OPCODE_X0 = 14,
+ SLTIH_IMM_0_OPCODE_X1 = 17,
+ SLTIH_U_IMM_0_OPCODE_X0 = 15,
+ SLTIH_U_IMM_0_OPCODE_X1 = 18,
+ SLTI_IMM_0_OPCODE_X0 = 16,
+ SLTI_IMM_0_OPCODE_X1 = 19,
+ SLTI_OPCODE_Y0 = 14,
+ SLTI_OPCODE_Y1 = 12,
+ SLTI_U_IMM_0_OPCODE_X0 = 17,
+ SLTI_U_IMM_0_OPCODE_X1 = 20,
+ SLTI_U_OPCODE_Y0 = 15,
+ SLTI_U_OPCODE_Y1 = 13,
+ SLT_SPECIAL_0_OPCODE_X0 = 83,
+ SLT_SPECIAL_0_OPCODE_X1 = 53,
+ SLT_SPECIAL_4_OPCODE_Y0 = 2,
+ SLT_SPECIAL_4_OPCODE_Y1 = 2,
+ SLT_U_SPECIAL_0_OPCODE_X0 = 84,
+ SLT_U_SPECIAL_0_OPCODE_X1 = 54,
+ SLT_U_SPECIAL_4_OPCODE_Y0 = 3,
+ SLT_U_SPECIAL_4_OPCODE_Y1 = 3,
+ SNEB_SPECIAL_0_OPCODE_X0 = 85,
+ SNEB_SPECIAL_0_OPCODE_X1 = 55,
+ SNEH_SPECIAL_0_OPCODE_X0 = 86,
+ SNEH_SPECIAL_0_OPCODE_X1 = 56,
+ SNE_SPECIAL_0_OPCODE_X0 = 87,
+ SNE_SPECIAL_0_OPCODE_X1 = 57,
+ SNE_SPECIAL_5_OPCODE_Y0 = 3,
+ SNE_SPECIAL_5_OPCODE_Y1 = 3,
+ SPECIAL_0_OPCODE_X0 = 0,
+ SPECIAL_0_OPCODE_X1 = 1,
+ SPECIAL_0_OPCODE_Y0 = 1,
+ SPECIAL_0_OPCODE_Y1 = 1,
+ SPECIAL_1_OPCODE_Y0 = 2,
+ SPECIAL_1_OPCODE_Y1 = 2,
+ SPECIAL_2_OPCODE_Y0 = 3,
+ SPECIAL_2_OPCODE_Y1 = 3,
+ SPECIAL_3_OPCODE_Y0 = 4,
+ SPECIAL_3_OPCODE_Y1 = 4,
+ SPECIAL_4_OPCODE_Y0 = 5,
+ SPECIAL_4_OPCODE_Y1 = 5,
+ SPECIAL_5_OPCODE_Y0 = 6,
+ SPECIAL_5_OPCODE_Y1 = 6,
+ SPECIAL_6_OPCODE_Y0 = 7,
+ SPECIAL_7_OPCODE_Y0 = 8,
+ SRAB_SPECIAL_0_OPCODE_X0 = 88,
+ SRAB_SPECIAL_0_OPCODE_X1 = 58,
+ SRAH_SPECIAL_0_OPCODE_X0 = 89,
+ SRAH_SPECIAL_0_OPCODE_X1 = 59,
+ SRAIB_SHUN_0_OPCODE_X0 = 8,
+ SRAIB_SHUN_0_OPCODE_X1 = 8,
+ SRAIH_SHUN_0_OPCODE_X0 = 9,
+ SRAIH_SHUN_0_OPCODE_X1 = 9,
+ SRAI_SHUN_0_OPCODE_X0 = 10,
+ SRAI_SHUN_0_OPCODE_X1 = 10,
+ SRAI_SHUN_0_OPCODE_Y0 = 4,
+ SRAI_SHUN_0_OPCODE_Y1 = 4,
+ SRA_SPECIAL_0_OPCODE_X0 = 90,
+ SRA_SPECIAL_0_OPCODE_X1 = 60,
+ SRA_SPECIAL_3_OPCODE_Y0 = 3,
+ SRA_SPECIAL_3_OPCODE_Y1 = 3,
+ SUBBS_U_SPECIAL_0_OPCODE_X0 = 100,
+ SUBBS_U_SPECIAL_0_OPCODE_X1 = 70,
+ SUBB_SPECIAL_0_OPCODE_X0 = 91,
+ SUBB_SPECIAL_0_OPCODE_X1 = 61,
+ SUBHS_SPECIAL_0_OPCODE_X0 = 101,
+ SUBHS_SPECIAL_0_OPCODE_X1 = 71,
+ SUBH_SPECIAL_0_OPCODE_X0 = 92,
+ SUBH_SPECIAL_0_OPCODE_X1 = 62,
+ SUBS_SPECIAL_0_OPCODE_X0 = 97,
+ SUBS_SPECIAL_0_OPCODE_X1 = 67,
+ SUB_SPECIAL_0_OPCODE_X0 = 93,
+ SUB_SPECIAL_0_OPCODE_X1 = 63,
+ SUB_SPECIAL_0_OPCODE_Y0 = 3,
+ SUB_SPECIAL_0_OPCODE_Y1 = 3,
+ SWADD_IMM_0_OPCODE_X1 = 30,
+ SWINT0_UN_0_SHUN_0_OPCODE_X1 = 18,
+ SWINT1_UN_0_SHUN_0_OPCODE_X1 = 19,
+ SWINT2_UN_0_SHUN_0_OPCODE_X1 = 20,
+ SWINT3_UN_0_SHUN_0_OPCODE_X1 = 21,
+ SW_OPCODE_Y2 = 7,
+ SW_SPECIAL_0_OPCODE_X1 = 64,
+ TBLIDXB0_UN_0_SHUN_0_OPCODE_X0 = 8,
+ TBLIDXB0_UN_0_SHUN_0_OPCODE_Y0 = 8,
+ TBLIDXB1_UN_0_SHUN_0_OPCODE_X0 = 9,
+ TBLIDXB1_UN_0_SHUN_0_OPCODE_Y0 = 9,
+ TBLIDXB2_UN_0_SHUN_0_OPCODE_X0 = 10,
+ TBLIDXB2_UN_0_SHUN_0_OPCODE_Y0 = 10,
+ TBLIDXB3_UN_0_SHUN_0_OPCODE_X0 = 11,
+ TBLIDXB3_UN_0_SHUN_0_OPCODE_Y0 = 11,
+ TNS_UN_0_SHUN_0_OPCODE_X1 = 22,
+ UN_0_SHUN_0_OPCODE_X0 = 11,
+ UN_0_SHUN_0_OPCODE_X1 = 11,
+ UN_0_SHUN_0_OPCODE_Y0 = 5,
+ UN_0_SHUN_0_OPCODE_Y1 = 5,
+ WH64_UN_0_SHUN_0_OPCODE_X1 = 23,
+ XORI_IMM_0_OPCODE_X0 = 2,
+ XORI_IMM_0_OPCODE_X1 = 21,
+ XOR_SPECIAL_0_OPCODE_X0 = 94,
+ XOR_SPECIAL_0_OPCODE_X1 = 65,
+ XOR_SPECIAL_2_OPCODE_Y0 = 3,
+ XOR_SPECIAL_2_OPCODE_Y1 = 3
+};
+
+
+#endif /* __ASSEMBLER__ */
+
+#endif /* __ARCH_OPCODE_H__ */
diff --git a/arch/tile/include/arch/sim.h b/arch/tile/include/arch/sim.h
new file mode 100644
index 00000000..e54b7b05
--- /dev/null
+++ b/arch/tile/include/arch/sim.h
@@ -0,0 +1,643 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+/**
+ * @file
+ *
+ * Provides an API for controlling the simulator at runtime.
+ */
+
+/**
+ * @addtogroup arch_sim
+ * @{
+ *
+ * An API for controlling the simulator at runtime.
+ *
+ * The simulator's behavior can be modified while it is running.
+ * For example, human-readable trace output can be enabled and disabled
+ * around code of interest.
+ *
+ * There are two ways to modify simulator behavior:
+ * programmatically, by calling various sim_* functions, and
+ * interactively, by entering commands like "sim set functional true"
+ * at the tile-monitor prompt. Typing "sim help" at that prompt provides
+ * a list of interactive commands.
+ *
+ * All interactive commands can also be executed programmatically by
+ * passing a string to the sim_command function.
+ */
+
+#ifndef __ARCH_SIM_H__
+#define __ARCH_SIM_H__
+
+#include <arch/sim_def.h>
+#include <arch/abi.h>
+
+#ifndef __ASSEMBLER__
+
+#include <arch/spr_def.h>
+
+
+/**
+ * Return true if the current program is running under a simulator,
+ * rather than on real hardware. If running on hardware, other "sim_xxx()"
+ * calls have no useful effect.
+ */
+static inline int
+sim_is_simulator(void)
+{
+ return __insn_mfspr(SPR_SIM_CONTROL) != 0;
+}
+
+
+/**
+ * Checkpoint the simulator state to a checkpoint file.
+ *
+ * The checkpoint file name is either the default or the name specified
+ * on the command line with "--checkpoint-file".
+ */
+static __inline void
+sim_checkpoint(void)
+{
+ __insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_CHECKPOINT);
+}
+
+
+/**
+ * Report whether or not various kinds of simulator tracing are enabled.
+ *
+ * @return The bitwise OR of these values:
+ *
+ * SIM_TRACE_CYCLES (--trace-cycles),
+ * SIM_TRACE_ROUTER (--trace-router),
+ * SIM_TRACE_REGISTER_WRITES (--trace-register-writes),
+ * SIM_TRACE_DISASM (--trace-disasm),
+ * SIM_TRACE_STALL_INFO (--trace-stall-info)
+ * SIM_TRACE_MEMORY_CONTROLLER (--trace-memory-controller)
+ * SIM_TRACE_L2_CACHE (--trace-l2)
+ * SIM_TRACE_LINES (--trace-lines)
+ */
+static __inline unsigned int
+sim_get_tracing(void)
+{
+ return __insn_mfspr(SPR_SIM_CONTROL) & SIM_TRACE_FLAG_MASK;
+}
+
+
+/**
+ * Turn on or off different kinds of simulator tracing.
+ *
+ * @param mask Either one of these special values:
+ *
+ * SIM_TRACE_NONE (turns off tracing),
+ * SIM_TRACE_ALL (turns on all possible tracing).
+ *
+ * or the bitwise OR of these values:
+ *
+ * SIM_TRACE_CYCLES (--trace-cycles),
+ * SIM_TRACE_ROUTER (--trace-router),
+ * SIM_TRACE_REGISTER_WRITES (--trace-register-writes),
+ * SIM_TRACE_DISASM (--trace-disasm),
+ * SIM_TRACE_STALL_INFO (--trace-stall-info)
+ * SIM_TRACE_MEMORY_CONTROLLER (--trace-memory-controller)
+ * SIM_TRACE_L2_CACHE (--trace-l2)
+ * SIM_TRACE_LINES (--trace-lines)
+ */
+static __inline void
+sim_set_tracing(unsigned int mask)
+{
+ __insn_mtspr(SPR_SIM_CONTROL, SIM_TRACE_SPR_ARG(mask));
+}
+
+
+/**
+ * Request dumping of different kinds of simulator state.
+ *
+ * @param mask Either this special value:
+ *
+ * SIM_DUMP_ALL (dump all known state)
+ *
+ * or the bitwise OR of these values:
+ *
+ * SIM_DUMP_REGS (the register file),
+ * SIM_DUMP_SPRS (the SPRs),
+ * SIM_DUMP_ITLB (the iTLB),
+ * SIM_DUMP_DTLB (the dTLB),
+ * SIM_DUMP_L1I (the L1 I-cache),
+ * SIM_DUMP_L1D (the L1 D-cache),
+ * SIM_DUMP_L2 (the L2 cache),
+ * SIM_DUMP_SNREGS (the switch register file),
+ * SIM_DUMP_SNITLB (the switch iTLB),
+ * SIM_DUMP_SNL1I (the switch L1 I-cache),
+ * SIM_DUMP_BACKTRACE (the current backtrace)
+ */
+static __inline void
+sim_dump(unsigned int mask)
+{
+ __insn_mtspr(SPR_SIM_CONTROL, SIM_DUMP_SPR_ARG(mask));
+}
+
+
+/**
+ * Print a string to the simulator stdout.
+ *
+ * @param str The string to be written.
+ */
+static __inline void
+sim_print(const char* str)
+{
+ for ( ; *str != '\0'; str++)
+ {
+ __insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_PUTC |
+ (*str << _SIM_CONTROL_OPERATOR_BITS));
+ }
+ __insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_PUTC |
+ (SIM_PUTC_FLUSH_BINARY << _SIM_CONTROL_OPERATOR_BITS));
+}
+
+
+/**
+ * Print a string to the simulator stdout.
+ *
+ * @param str The string to be written (a newline is automatically added).
+ */
+static __inline void
+sim_print_string(const char* str)
+{
+ for ( ; *str != '\0'; str++)
+ {
+ __insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_PUTC |
+ (*str << _SIM_CONTROL_OPERATOR_BITS));
+ }
+ __insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_PUTC |
+ (SIM_PUTC_FLUSH_STRING << _SIM_CONTROL_OPERATOR_BITS));
+}
+
+
+/**
+ * Execute a simulator command string.
+ *
+ * Type 'sim help' at the tile-monitor prompt to learn what commands
+ * are available. Note the use of the tile-monitor "sim" command to
+ * pass commands to the simulator.
+ *
+ * The argument to sim_command() does not include the leading "sim"
+ * prefix used at the tile-monitor prompt; for example, you might call
+ * sim_command("trace disasm").
+ */
+static __inline void
+sim_command(const char* str)
+{
+ int c;
+ do
+ {
+ c = *str++;
+ __insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_COMMAND |
+ (c << _SIM_CONTROL_OPERATOR_BITS));
+ }
+ while (c);
+}
+
+
+
+#ifndef __DOXYGEN__
+
+/**
+ * The underlying implementation of "_sim_syscall()".
+ *
+ * We use extra "and" instructions to ensure that all the values
+ * we are passing to the simulator are actually valid in the registers
+ * (i.e. returned from memory) prior to the SIM_CONTROL spr.
+ */
+static __inline long _sim_syscall0(int val)
+{
+ long result;
+ __asm__ __volatile__ ("mtspr SIM_CONTROL, r0"
+ : "=R00" (result) : "R00" (val));
+ return result;
+}
+
+static __inline long _sim_syscall1(int val, long arg1)
+{
+ long result;
+ __asm__ __volatile__ ("{ and zero, r1, r1; mtspr SIM_CONTROL, r0 }"
+ : "=R00" (result) : "R00" (val), "R01" (arg1));
+ return result;
+}
+
+static __inline long _sim_syscall2(int val, long arg1, long arg2)
+{
+ long result;
+ __asm__ __volatile__ ("{ and zero, r1, r2; mtspr SIM_CONTROL, r0 }"
+ : "=R00" (result)
+ : "R00" (val), "R01" (arg1), "R02" (arg2));
+ return result;
+}
+
+/* Note that _sim_syscall3() and higher are technically at risk of
+ receiving an interrupt right before the mtspr bundle, in which case
+ the register values for arguments 3 and up may still be in flight
+ to the core from a stack frame reload. */
+
+static __inline long _sim_syscall3(int val, long arg1, long arg2, long arg3)
+{
+ long result;
+ __asm__ __volatile__ ("{ and zero, r3, r3 };"
+ "{ and zero, r1, r2; mtspr SIM_CONTROL, r0 }"
+ : "=R00" (result)
+ : "R00" (val), "R01" (arg1), "R02" (arg2),
+ "R03" (arg3));
+ return result;
+}
+
+static __inline long _sim_syscall4(int val, long arg1, long arg2, long arg3,
+ long arg4)
+{
+ long result;
+ __asm__ __volatile__ ("{ and zero, r3, r4 };"
+ "{ and zero, r1, r2; mtspr SIM_CONTROL, r0 }"
+ : "=R00" (result)
+ : "R00" (val), "R01" (arg1), "R02" (arg2),
+ "R03" (arg3), "R04" (arg4));
+ return result;
+}
+
+static __inline long _sim_syscall5(int val, long arg1, long arg2, long arg3,
+ long arg4, long arg5)
+{
+ long result;
+ __asm__ __volatile__ ("{ and zero, r3, r4; and zero, r5, r5 };"
+ "{ and zero, r1, r2; mtspr SIM_CONTROL, r0 }"
+ : "=R00" (result)
+ : "R00" (val), "R01" (arg1), "R02" (arg2),
+ "R03" (arg3), "R04" (arg4), "R05" (arg5));
+ return result;
+}
+
+/**
+ * Make a special syscall to the simulator itself, if running under
+ * simulation. This is used as the implementation of other functions
+ * and should not be used outside this file.
+ *
+ * @param syscall_num The simulator syscall number.
+ * @param nr The number of additional arguments provided.
+ *
+ * @return Varies by syscall.
+ */
+#define _sim_syscall(syscall_num, nr, args...) \
+ _sim_syscall##nr( \
+ ((syscall_num) << _SIM_CONTROL_OPERATOR_BITS) | SIM_CONTROL_SYSCALL, \
+ ##args)
+
+
+/* Values for the "access_mask" parameters below. */
+#define SIM_WATCHPOINT_READ 1
+#define SIM_WATCHPOINT_WRITE 2
+#define SIM_WATCHPOINT_EXECUTE 4
+
+
+static __inline int
+sim_add_watchpoint(unsigned int process_id,
+ unsigned long address,
+ unsigned long size,
+ unsigned int access_mask,
+ unsigned long user_data)
+{
+ return _sim_syscall(SIM_SYSCALL_ADD_WATCHPOINT, 5, process_id,
+ address, size, access_mask, user_data);
+}
+
+
+static __inline int
+sim_remove_watchpoint(unsigned int process_id,
+ unsigned long address,
+ unsigned long size,
+ unsigned int access_mask,
+ unsigned long user_data)
+{
+ return _sim_syscall(SIM_SYSCALL_REMOVE_WATCHPOINT, 5, process_id,
+ address, size, access_mask, user_data);
+}
+
+
+/**
+ * Return value from sim_query_watchpoint.
+ */
+struct SimQueryWatchpointStatus
+{
+ /**
+ * 0 if a watchpoint fired, 1 if no watchpoint fired, or -1 for
+ * error (meaning a bad process_id).
+ */
+ int syscall_status;
+
+ /**
+ * The address of the watchpoint that fired (this is the address
+ * passed to sim_add_watchpoint, not an address within that range
+ * that actually triggered the watchpoint).
+ */
+ unsigned long address;
+
+ /** The arbitrary user_data installed by sim_add_watchpoint. */
+ unsigned long user_data;
+};
+
+
+static __inline struct SimQueryWatchpointStatus
+sim_query_watchpoint(unsigned int process_id)
+{
+ struct SimQueryWatchpointStatus status;
+ long val = SIM_CONTROL_SYSCALL |
+ (SIM_SYSCALL_QUERY_WATCHPOINT << _SIM_CONTROL_OPERATOR_BITS);
+ __asm__ __volatile__ ("{ and zero, r1, r1; mtspr SIM_CONTROL, r0 }"
+ : "=R00" (status.syscall_status),
+ "=R01" (status.address),
+ "=R02" (status.user_data)
+ : "R00" (val), "R01" (process_id));
+ return status;
+}
+
+
+/* On the simulator, confirm lines have been evicted everywhere. */
+static __inline void
+sim_validate_lines_evicted(unsigned long long pa, unsigned long length)
+{
+#ifdef __LP64__
+ _sim_syscall(SIM_SYSCALL_VALIDATE_LINES_EVICTED, 2, pa, length);
+#else
+ _sim_syscall(SIM_SYSCALL_VALIDATE_LINES_EVICTED, 4,
+ 0 /* dummy */, (long)(pa), (long)(pa >> 32), length);
+#endif
+}
+
+
+/* Return the current CPU speed in cycles per second. */
+static __inline long
+sim_query_cpu_speed(void)
+{
+ return _sim_syscall(SIM_SYSCALL_QUERY_CPU_SPEED, 0);
+}
+
+#endif /* !__DOXYGEN__ */
+
+
+
+
+/**
+ * Modify the shaping parameters of a shim.
+ *
+ * @param shim The shim to modify. One of:
+ * SIM_CONTROL_SHAPING_GBE_0
+ * SIM_CONTROL_SHAPING_GBE_1
+ * SIM_CONTROL_SHAPING_GBE_2
+ * SIM_CONTROL_SHAPING_GBE_3
+ * SIM_CONTROL_SHAPING_XGBE_0
+ * SIM_CONTROL_SHAPING_XGBE_1
+ *
+ * @param type The type of shaping. This should be the same type of
+ * shaping that is already in place on the shim. One of:
+ * SIM_CONTROL_SHAPING_MULTIPLIER
+ * SIM_CONTROL_SHAPING_PPS
+ * SIM_CONTROL_SHAPING_BPS
+ *
+ * @param units The magnitude of the rate. One of:
+ * SIM_CONTROL_SHAPING_UNITS_SINGLE
+ * SIM_CONTROL_SHAPING_UNITS_KILO
+ * SIM_CONTROL_SHAPING_UNITS_MEGA
+ * SIM_CONTROL_SHAPING_UNITS_GIGA
+ *
+ * @param rate The rate to which to change it. This must fit in
+ * SIM_CONTROL_SHAPING_RATE_BITS bits or a warning is issued and
+ * the shaping is not changed.
+ *
+ * @return 0 if no problems were detected in the arguments to sim_set_shaping
+ * or 1 if problems were detected (for example, rate does not fit in 17 bits).
+ */
+static __inline int
+sim_set_shaping(unsigned shim,
+ unsigned type,
+ unsigned units,
+ unsigned rate)
+{
+ if ((rate & ~((1 << SIM_CONTROL_SHAPING_RATE_BITS) - 1)) != 0)
+ return 1;
+
+ __insn_mtspr(SPR_SIM_CONTROL, SIM_SHAPING_SPR_ARG(shim, type, units, rate));
+ return 0;
+}
+
+#ifdef __tilegx__
+
+/** Enable a set of mPIPE links. Pass a -1 link_mask to enable all links. */
+static __inline void
+sim_enable_mpipe_links(unsigned mpipe, unsigned long link_mask)
+{
+ __insn_mtspr(SPR_SIM_CONTROL,
+ (SIM_CONTROL_ENABLE_MPIPE_LINK_MAGIC_BYTE |
+ (mpipe << 8) | (1 << 16) | ((uint_reg_t)link_mask << 32)));
+}
+
+/** Disable a set of mPIPE links. Pass a -1 link_mask to disable all links. */
+static __inline void
+sim_disable_mpipe_links(unsigned mpipe, unsigned long link_mask)
+{
+ __insn_mtspr(SPR_SIM_CONTROL,
+ (SIM_CONTROL_ENABLE_MPIPE_LINK_MAGIC_BYTE |
+ (mpipe << 8) | (0 << 16) | ((uint_reg_t)link_mask << 32)));
+}
+
+#endif /* __tilegx__ */
+
+
+/*
+ * An API for changing "functional" mode.
+ */
+
+#ifndef __DOXYGEN__
+
+#define sim_enable_functional() \
+ __insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_ENABLE_FUNCTIONAL)
+
+#define sim_disable_functional() \
+ __insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_DISABLE_FUNCTIONAL)
+
+#endif /* __DOXYGEN__ */
+
+
+/*
+ * Profiler support.
+ */
+
+/**
+ * Turn profiling on for the current task.
+ *
+ * Note that this has no effect if run in an environment without
+ * profiling support (thus, the proper flags to the simulator must
+ * be supplied).
+ */
+static __inline void
+sim_profiler_enable(void)
+{
+ __insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_PROFILER_ENABLE);
+}
+
+
+/** Turn profiling off for the current task. */
+static __inline void
+sim_profiler_disable(void)
+{
+ __insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_PROFILER_DISABLE);
+}
+
+
+/**
+ * Turn profiling on or off for the current task.
+ *
+ * @param enabled If true, turns on profiling. If false, turns it off.
+ *
+ * Note that this has no effect if run in an environment without
+ * profiling support (thus, the proper flags to the simulator must
+ * be supplied).
+ */
+static __inline void
+sim_profiler_set_enabled(int enabled)
+{
+ int val =
+ enabled ? SIM_CONTROL_PROFILER_ENABLE : SIM_CONTROL_PROFILER_DISABLE;
+ __insn_mtspr(SPR_SIM_CONTROL, val);
+}
+
+
+/**
+ * Return true if and only if profiling is currently enabled
+ * for the current task.
+ *
+ * This returns false even if sim_profiler_enable() was called
+ * if the current execution environment does not support profiling.
+ */
+static __inline int
+sim_profiler_is_enabled(void)
+{
+ return ((__insn_mfspr(SPR_SIM_CONTROL) & SIM_PROFILER_ENABLED_MASK) != 0);
+}
+
+
+/**
+ * Reset profiling counters to zero for the current task.
+ *
+ * Resetting can be done while profiling is enabled. It does not affect
+ * the chip-wide profiling counters.
+ */
+static __inline void
+sim_profiler_clear(void)
+{
+ __insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_PROFILER_CLEAR);
+}
+
+
+/**
+ * Enable specified chip-level profiling counters.
+ *
+ * Does not affect the per-task profiling counters.
+ *
+ * @param mask Either this special value:
+ *
+ * SIM_CHIP_ALL (enables all chip-level components).
+ *
+ * or the bitwise OR of these values:
+ *
+ * SIM_CHIP_MEMCTL (enable all memory controllers)
+ * SIM_CHIP_XAUI (enable all XAUI controllers)
+ * SIM_CHIP_MPIPE (enable all MPIPE controllers)
+ */
+static __inline void
+sim_profiler_chip_enable(unsigned int mask)
+{
+ __insn_mtspr(SPR_SIM_CONTROL, SIM_PROFILER_CHIP_ENABLE_SPR_ARG(mask));
+}
+
+
+/**
+ * Disable specified chip-level profiling counters.
+ *
+ * Does not affect the per-task profiling counters.
+ *
+ * @param mask Either this special value:
+ *
+ * SIM_CHIP_ALL (disables all chip-level components).
+ *
+ * or the bitwise OR of these values:
+ *
+ * SIM_CHIP_MEMCTL (disable all memory controllers)
+ * SIM_CHIP_XAUI (disable all XAUI controllers)
+ * SIM_CHIP_MPIPE (disable all MPIPE controllers)
+ */
+static __inline void
+sim_profiler_chip_disable(unsigned int mask)
+{
+ __insn_mtspr(SPR_SIM_CONTROL, SIM_PROFILER_CHIP_DISABLE_SPR_ARG(mask));
+}
+
+
+/**
+ * Reset specified chip-level profiling counters to zero.
+ *
+ * Does not affect the per-task profiling counters.
+ *
+ * @param mask Either this special value:
+ *
+ * SIM_CHIP_ALL (clears all chip-level components).
+ *
+ * or the bitwise OR of these values:
+ *
+ * SIM_CHIP_MEMCTL (clear all memory controllers)
+ * SIM_CHIP_XAUI (clear all XAUI controllers)
+ * SIM_CHIP_MPIPE (clear all MPIPE controllers)
+ */
+static __inline void
+sim_profiler_chip_clear(unsigned int mask)
+{
+ __insn_mtspr(SPR_SIM_CONTROL, SIM_PROFILER_CHIP_CLEAR_SPR_ARG(mask));
+}
+
+
+/*
+ * Event support.
+ */
+
+#ifndef __DOXYGEN__
+
+static __inline void
+sim_event_begin(unsigned int x)
+{
+#if defined(__tile__) && !defined(__NO_EVENT_SPR__)
+ __insn_mtspr(SPR_EVENT_BEGIN, x);
+#endif
+}
+
+static __inline void
+sim_event_end(unsigned int x)
+{
+#if defined(__tile__) && !defined(__NO_EVENT_SPR__)
+ __insn_mtspr(SPR_EVENT_END, x);
+#endif
+}
+
+#endif /* !__DOXYGEN__ */
+
+#endif /* !__ASSEMBLER__ */
+
+#endif /* !__ARCH_SIM_H__ */
+
+/** @} */
diff --git a/arch/tile/include/arch/sim_def.h b/arch/tile/include/arch/sim_def.h
new file mode 100644
index 00000000..4b44a2b6
--- /dev/null
+++ b/arch/tile/include/arch/sim_def.h
@@ -0,0 +1,505 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+/**
+ * @file
+ *
+ * Some low-level simulator definitions.
+ */
+
+#ifndef __ARCH_SIM_DEF_H__
+#define __ARCH_SIM_DEF_H__
+
+
+/**
+ * Internal: the low bits of the SIM_CONTROL_* SPR values specify
+ * the operation to perform, and the remaining bits are
+ * an operation-specific parameter (often unused).
+ */
+#define _SIM_CONTROL_OPERATOR_BITS 8
+
+
+/*
+ * Values which can be written to SPR_SIM_CONTROL.
+ */
+
+/** If written to SPR_SIM_CONTROL, stops profiling. */
+#define SIM_CONTROL_PROFILER_DISABLE 0
+
+/** If written to SPR_SIM_CONTROL, starts profiling. */
+#define SIM_CONTROL_PROFILER_ENABLE 1
+
+/** If written to SPR_SIM_CONTROL, clears profiling counters. */
+#define SIM_CONTROL_PROFILER_CLEAR 2
+
+/** If written to SPR_SIM_CONTROL, checkpoints the simulator. */
+#define SIM_CONTROL_CHECKPOINT 3
+
+/**
+ * If written to SPR_SIM_CONTROL, combined with a mask (shifted by 8),
+ * sets the tracing mask to the given mask. See "sim_set_tracing()".
+ */
+#define SIM_CONTROL_SET_TRACING 4
+
+/**
+ * If written to SPR_SIM_CONTROL, combined with a mask (shifted by 8),
+ * dumps the requested items of machine state to the log.
+ */
+#define SIM_CONTROL_DUMP 5
+
+/** If written to SPR_SIM_CONTROL, clears chip-level profiling counters. */
+#define SIM_CONTROL_PROFILER_CHIP_CLEAR 6
+
+/** If written to SPR_SIM_CONTROL, disables chip-level profiling. */
+#define SIM_CONTROL_PROFILER_CHIP_DISABLE 7
+
+/** If written to SPR_SIM_CONTROL, enables chip-level profiling. */
+#define SIM_CONTROL_PROFILER_CHIP_ENABLE 8
+
+/** If written to SPR_SIM_CONTROL, enables chip-level functional mode */
+#define SIM_CONTROL_ENABLE_FUNCTIONAL 9
+
+/** If written to SPR_SIM_CONTROL, disables chip-level functional mode. */
+#define SIM_CONTROL_DISABLE_FUNCTIONAL 10
+
+/**
+ * If written to SPR_SIM_CONTROL, enables chip-level functional mode.
+ * All tiles must perform this write for functional mode to be enabled.
+ * Ignored in naked boot mode unless --functional is specified.
+ * WARNING: Only the hypervisor startup code should use this!
+ */
+#define SIM_CONTROL_ENABLE_FUNCTIONAL_BARRIER 11
+
+/**
+ * If written to SPR_SIM_CONTROL, combined with a character (shifted by 8),
+ * writes a string directly to the simulator output. Written to once for
+ * each character in the string, plus a final NUL. Instead of NUL,
+ * you can also use "SIM_PUTC_FLUSH_STRING" or "SIM_PUTC_FLUSH_BINARY".
+ */
+/* ISSUE: Document the meaning of "newline", and the handling of NUL. */
+#define SIM_CONTROL_PUTC 12
+
+/**
+ * If written to SPR_SIM_CONTROL, clears the --grind-coherence state for
+ * this core. This is intended to be used before a loop that will
+ * invalidate the cache by loading new data and evicting all current data.
+ * Generally speaking, this API should only be used by system code.
+ */
+#define SIM_CONTROL_GRINDER_CLEAR 13
+
+/** If written to SPR_SIM_CONTROL, shuts down the simulator. */
+#define SIM_CONTROL_SHUTDOWN 14
+
+/**
+ * If written to SPR_SIM_CONTROL, combined with a pid (shifted by 8),
+ * indicates that a fork syscall just created the given process.
+ */
+#define SIM_CONTROL_OS_FORK 15
+
+/**
+ * If written to SPR_SIM_CONTROL, combined with a pid (shifted by 8),
+ * indicates that an exit syscall was just executed by the given process.
+ */
+#define SIM_CONTROL_OS_EXIT 16
+
+/**
+ * If written to SPR_SIM_CONTROL, combined with a pid (shifted by 8),
+ * indicates that the OS just switched to the given process.
+ */
+#define SIM_CONTROL_OS_SWITCH 17
+
+/**
+ * If written to SPR_SIM_CONTROL, combined with a character (shifted by 8),
+ * indicates that an exec syscall was just executed. Written to once for
+ * each character in the executable name, plus a final NUL.
+ */
+#define SIM_CONTROL_OS_EXEC 18
+
+/**
+ * If written to SPR_SIM_CONTROL, combined with a character (shifted by 8),
+ * indicates that an interpreter (PT_INTERP) was loaded. Written to once
+ * for each character in "ADDR:PATH", plus a final NUL, where "ADDR" is a
+ * hex load address starting with "0x", and "PATH" is the executable name.
+ */
+#define SIM_CONTROL_OS_INTERP 19
+
+/**
+ * If written to SPR_SIM_CONTROL, combined with a character (shifted by 8),
+ * indicates that a dll was loaded. Written to once for each character
+ * in "ADDR:PATH", plus a final NUL, where "ADDR" is a hexadecimal load
+ * address starting with "0x", and "PATH" is the executable name.
+ */
+#define SIM_CONTROL_DLOPEN 20
+
+/**
+ * If written to SPR_SIM_CONTROL, combined with a character (shifted by 8),
+ * indicates that a dll was unloaded. Written to once for each character
+ * in "ADDR", plus a final NUL, where "ADDR" is a hexadecimal load
+ * address starting with "0x".
+ */
+#define SIM_CONTROL_DLCLOSE 21
+
+/**
+ * If written to SPR_SIM_CONTROL, combined with a flag (shifted by 8),
+ * indicates whether to allow data reads to remotely-cached
+ * dirty cache lines to be cached locally without grinder warnings or
+ * assertions (used by Linux kernel fast memcpy).
+ */
+#define SIM_CONTROL_ALLOW_MULTIPLE_CACHING 22
+
+/** If written to SPR_SIM_CONTROL, enables memory tracing. */
+#define SIM_CONTROL_ENABLE_MEM_LOGGING 23
+
+/** If written to SPR_SIM_CONTROL, disables memory tracing. */
+#define SIM_CONTROL_DISABLE_MEM_LOGGING 24
+
+/**
+ * If written to SPR_SIM_CONTROL, changes the shaping parameters of one of
+ * the gbe or xgbe shims. Must specify the shim id, the type, the units, and
+ * the rate, as defined in SIM_SHAPING_SPR_ARG.
+ */
+#define SIM_CONTROL_SHAPING 25
+
+/**
+ * If written to SPR_SIM_CONTROL, combined with character (shifted by 8),
+ * requests that a simulator command be executed. Written to once for each
+ * character in the command, plus a final NUL.
+ */
+#define SIM_CONTROL_COMMAND 26
+
+/**
+ * If written to SPR_SIM_CONTROL, indicates that the simulated system
+ * is panicking, to allow debugging via --debug-on-panic.
+ */
+#define SIM_CONTROL_PANIC 27
+
+/**
+ * If written to SPR_SIM_CONTROL, triggers a simulator syscall.
+ * See "sim_syscall()" for more info.
+ */
+#define SIM_CONTROL_SYSCALL 32
+
+/**
+ * If written to SPR_SIM_CONTROL, combined with a pid (shifted by 8),
+ * provides the pid that subsequent SIM_CONTROL_OS_FORK writes should
+ * use as the pid, rather than the default previous SIM_CONTROL_OS_SWITCH.
+ */
+#define SIM_CONTROL_OS_FORK_PARENT 33
+
+/**
+ * If written to SPR_SIM_CONTROL, combined with a mPIPE shim number
+ * (shifted by 8), clears the pending magic data section. The cleared
+ * pending magic data section and any subsequently appended magic bytes
+ * will only take effect when the classifier blast programmer is run.
+ */
+#define SIM_CONTROL_CLEAR_MPIPE_MAGIC_BYTES 34
+
+/**
+ * If written to SPR_SIM_CONTROL, combined with a mPIPE shim number
+ * (shifted by 8) and a byte of data (shifted by 16), appends that byte
+ * to the shim's pending magic data section. The pending magic data
+ * section takes effect when the classifier blast programmer is run.
+ */
+#define SIM_CONTROL_APPEND_MPIPE_MAGIC_BYTE 35
+
+/**
+ * If written to SPR_SIM_CONTROL, combined with a mPIPE shim number
+ * (shifted by 8), an enable=1/disable=0 bit (shifted by 16), and a
+ * mask of links (shifted by 32), enable or disable the corresponding
+ * mPIPE links.
+ */
+#define SIM_CONTROL_ENABLE_MPIPE_LINK_MAGIC_BYTE 36
+
+
+/*
+ * Syscall numbers for use with "sim_syscall()".
+ */
+
+/** Syscall number for sim_add_watchpoint(). */
+#define SIM_SYSCALL_ADD_WATCHPOINT 2
+
+/** Syscall number for sim_remove_watchpoint(). */
+#define SIM_SYSCALL_REMOVE_WATCHPOINT 3
+
+/** Syscall number for sim_query_watchpoint(). */
+#define SIM_SYSCALL_QUERY_WATCHPOINT 4
+
+/**
+ * Syscall number that asserts that the cache lines whose 64-bit PA
+ * is passed as the second argument to sim_syscall(), and over a
+ * range passed as the third argument, are no longer in cache.
+ * The simulator raises an error if this is not the case.
+ */
+#define SIM_SYSCALL_VALIDATE_LINES_EVICTED 5
+
+/** Syscall number for sim_query_cpu_speed(). */
+#define SIM_SYSCALL_QUERY_CPU_SPEED 6
+
+
+/*
+ * Bit masks which can be shifted by 8, combined with
+ * SIM_CONTROL_SET_TRACING, and written to SPR_SIM_CONTROL.
+ */
+
+/**
+ * @addtogroup arch_sim
+ * @{
+ */
+
+/** Enable --trace-cycle when passed to simulator_set_tracing(). */
+#define SIM_TRACE_CYCLES 0x01
+
+/** Enable --trace-router when passed to simulator_set_tracing(). */
+#define SIM_TRACE_ROUTER 0x02
+
+/** Enable --trace-register-writes when passed to simulator_set_tracing(). */
+#define SIM_TRACE_REGISTER_WRITES 0x04
+
+/** Enable --trace-disasm when passed to simulator_set_tracing(). */
+#define SIM_TRACE_DISASM 0x08
+
+/** Enable --trace-stall-info when passed to simulator_set_tracing(). */
+#define SIM_TRACE_STALL_INFO 0x10
+
+/** Enable --trace-memory-controller when passed to simulator_set_tracing(). */
+#define SIM_TRACE_MEMORY_CONTROLLER 0x20
+
+/** Enable --trace-l2 when passed to simulator_set_tracing(). */
+#define SIM_TRACE_L2_CACHE 0x40
+
+/** Enable --trace-lines when passed to simulator_set_tracing(). */
+#define SIM_TRACE_LINES 0x80
+
+/** Turn off all tracing when passed to simulator_set_tracing(). */
+#define SIM_TRACE_NONE 0
+
+/** Turn on all tracing when passed to simulator_set_tracing(). */
+#define SIM_TRACE_ALL (-1)
+
+/** @} */
+
+/** Computes the value to write to SPR_SIM_CONTROL to set tracing flags. */
+#define SIM_TRACE_SPR_ARG(mask) \
+ (SIM_CONTROL_SET_TRACING | ((mask) << _SIM_CONTROL_OPERATOR_BITS))
+
+
+/*
+ * Bit masks which can be shifted by 8, combined with
+ * SIM_CONTROL_DUMP, and written to SPR_SIM_CONTROL.
+ */
+
+/**
+ * @addtogroup arch_sim
+ * @{
+ */
+
+/** Dump the general-purpose registers. */
+#define SIM_DUMP_REGS 0x001
+
+/** Dump the SPRs. */
+#define SIM_DUMP_SPRS 0x002
+
+/** Dump the ITLB. */
+#define SIM_DUMP_ITLB 0x004
+
+/** Dump the DTLB. */
+#define SIM_DUMP_DTLB 0x008
+
+/** Dump the L1 I-cache. */
+#define SIM_DUMP_L1I 0x010
+
+/** Dump the L1 D-cache. */
+#define SIM_DUMP_L1D 0x020
+
+/** Dump the L2 cache. */
+#define SIM_DUMP_L2 0x040
+
+/** Dump the switch registers. */
+#define SIM_DUMP_SNREGS 0x080
+
+/** Dump the switch ITLB. */
+#define SIM_DUMP_SNITLB 0x100
+
+/** Dump the switch L1 I-cache. */
+#define SIM_DUMP_SNL1I 0x200
+
+/** Dump the current backtrace. */
+#define SIM_DUMP_BACKTRACE 0x400
+
+/** Only dump valid lines in caches. */
+#define SIM_DUMP_VALID_LINES 0x800
+
+/** Dump everything that is dumpable. */
+#define SIM_DUMP_ALL (-1 & ~SIM_DUMP_VALID_LINES)
+
+/** @} */
+
+/** Computes the value to write to SPR_SIM_CONTROL to dump machine state. */
+#define SIM_DUMP_SPR_ARG(mask) \
+ (SIM_CONTROL_DUMP | ((mask) << _SIM_CONTROL_OPERATOR_BITS))
+
+
+/*
+ * Bit masks which can be shifted by 8, combined with
+ * SIM_CONTROL_PROFILER_CHIP_xxx, and written to SPR_SIM_CONTROL.
+ */
+
+/**
+ * @addtogroup arch_sim
+ * @{
+ */
+
+/** Use with with SIM_PROFILER_CHIP_xxx to control the memory controllers. */
+#define SIM_CHIP_MEMCTL 0x001
+
+/** Use with with SIM_PROFILER_CHIP_xxx to control the XAUI interface. */
+#define SIM_CHIP_XAUI 0x002
+
+/** Use with with SIM_PROFILER_CHIP_xxx to control the PCIe interface. */
+#define SIM_CHIP_PCIE 0x004
+
+/** Use with with SIM_PROFILER_CHIP_xxx to control the MPIPE interface. */
+#define SIM_CHIP_MPIPE 0x008
+
+/** Use with with SIM_PROFILER_CHIP_xxx to control the TRIO interface. */
+#define SIM_CHIP_TRIO 0x010
+
+/** Reference all chip devices. */
+#define SIM_CHIP_ALL (-1)
+
+/** @} */
+
+/** Computes the value to write to SPR_SIM_CONTROL to clear chip statistics. */
+#define SIM_PROFILER_CHIP_CLEAR_SPR_ARG(mask) \
+ (SIM_CONTROL_PROFILER_CHIP_CLEAR | ((mask) << _SIM_CONTROL_OPERATOR_BITS))
+
+/** Computes the value to write to SPR_SIM_CONTROL to disable chip statistics.*/
+#define SIM_PROFILER_CHIP_DISABLE_SPR_ARG(mask) \
+ (SIM_CONTROL_PROFILER_CHIP_DISABLE | ((mask) << _SIM_CONTROL_OPERATOR_BITS))
+
+/** Computes the value to write to SPR_SIM_CONTROL to enable chip statistics. */
+#define SIM_PROFILER_CHIP_ENABLE_SPR_ARG(mask) \
+ (SIM_CONTROL_PROFILER_CHIP_ENABLE | ((mask) << _SIM_CONTROL_OPERATOR_BITS))
+
+
+
+/* Shim bitrate controls. */
+
+/** The number of bits used to store the shim id. */
+#define SIM_CONTROL_SHAPING_SHIM_ID_BITS 3
+
+/**
+ * @addtogroup arch_sim
+ * @{
+ */
+
+/** Change the gbe 0 bitrate. */
+#define SIM_CONTROL_SHAPING_GBE_0 0x0
+
+/** Change the gbe 1 bitrate. */
+#define SIM_CONTROL_SHAPING_GBE_1 0x1
+
+/** Change the gbe 2 bitrate. */
+#define SIM_CONTROL_SHAPING_GBE_2 0x2
+
+/** Change the gbe 3 bitrate. */
+#define SIM_CONTROL_SHAPING_GBE_3 0x3
+
+/** Change the xgbe 0 bitrate. */
+#define SIM_CONTROL_SHAPING_XGBE_0 0x4
+
+/** Change the xgbe 1 bitrate. */
+#define SIM_CONTROL_SHAPING_XGBE_1 0x5
+
+/** The type of shaping to do. */
+#define SIM_CONTROL_SHAPING_TYPE_BITS 2
+
+/** Control the multiplier. */
+#define SIM_CONTROL_SHAPING_MULTIPLIER 0
+
+/** Control the PPS. */
+#define SIM_CONTROL_SHAPING_PPS 1
+
+/** Control the BPS. */
+#define SIM_CONTROL_SHAPING_BPS 2
+
+/** The number of bits for the units for the shaping parameter. */
+#define SIM_CONTROL_SHAPING_UNITS_BITS 2
+
+/** Provide a number in single units. */
+#define SIM_CONTROL_SHAPING_UNITS_SINGLE 0
+
+/** Provide a number in kilo units. */
+#define SIM_CONTROL_SHAPING_UNITS_KILO 1
+
+/** Provide a number in mega units. */
+#define SIM_CONTROL_SHAPING_UNITS_MEGA 2
+
+/** Provide a number in giga units. */
+#define SIM_CONTROL_SHAPING_UNITS_GIGA 3
+
+/** @} */
+
+/** How many bits are available for the rate. */
+#define SIM_CONTROL_SHAPING_RATE_BITS \
+ (32 - (_SIM_CONTROL_OPERATOR_BITS + \
+ SIM_CONTROL_SHAPING_SHIM_ID_BITS + \
+ SIM_CONTROL_SHAPING_TYPE_BITS + \
+ SIM_CONTROL_SHAPING_UNITS_BITS))
+
+/** Computes the value to write to SPR_SIM_CONTROL to change a bitrate. */
+#define SIM_SHAPING_SPR_ARG(shim, type, units, rate) \
+ (SIM_CONTROL_SHAPING | \
+ ((shim) | \
+ ((type) << (SIM_CONTROL_SHAPING_SHIM_ID_BITS)) | \
+ ((units) << (SIM_CONTROL_SHAPING_SHIM_ID_BITS + \
+ SIM_CONTROL_SHAPING_TYPE_BITS)) | \
+ ((rate) << (SIM_CONTROL_SHAPING_SHIM_ID_BITS + \
+ SIM_CONTROL_SHAPING_TYPE_BITS + \
+ SIM_CONTROL_SHAPING_UNITS_BITS))) << _SIM_CONTROL_OPERATOR_BITS)
+
+
+/*
+ * Values returned when reading SPR_SIM_CONTROL.
+ * ISSUE: These names should share a longer common prefix.
+ */
+
+/**
+ * When reading SPR_SIM_CONTROL, the mask of simulator tracing bits
+ * (SIM_TRACE_xxx values).
+ */
+#define SIM_TRACE_FLAG_MASK 0xFFFF
+
+/** When reading SPR_SIM_CONTROL, the mask for whether profiling is enabled. */
+#define SIM_PROFILER_ENABLED_MASK 0x10000
+
+
+/*
+ * Special arguments for "SIM_CONTROL_PUTC".
+ */
+
+/**
+ * Flag value for forcing a PUTC string-flush, including
+ * coordinate/cycle prefix and newline.
+ */
+#define SIM_PUTC_FLUSH_STRING 0x100
+
+/**
+ * Flag value for forcing a PUTC binary-data-flush, which skips the
+ * prefix and does not append a newline.
+ */
+#define SIM_PUTC_FLUSH_BINARY 0x101
+
+
+#endif /* __ARCH_SIM_DEF_H__ */
diff --git a/arch/tile/include/arch/spr_def.h b/arch/tile/include/arch/spr_def.h
new file mode 100644
index 00000000..d6ba449b
--- /dev/null
+++ b/arch/tile/include/arch/spr_def.h
@@ -0,0 +1,113 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+/* Include the proper base SPR definition file. */
+#ifdef __tilegx__
+#include <arch/spr_def_64.h>
+#else
+#include <arch/spr_def_32.h>
+#endif
+
+#ifdef __KERNEL__
+
+/*
+ * In addition to including the proper base SPR definition file, depending
+ * on machine architecture, this file defines several macros which allow
+ * kernel code to use protection-level dependent SPRs without worrying
+ * about which PL it's running at. In these macros, the PL that the SPR
+ * or interrupt number applies to is replaced by K.
+ */
+
+#if CONFIG_KERNEL_PL != 1 && CONFIG_KERNEL_PL != 2
+#error CONFIG_KERNEL_PL must be 1 or 2
+#endif
+
+/* Concatenate 4 strings. */
+#define __concat4(a, b, c, d) a ## b ## c ## d
+#define _concat4(a, b, c, d) __concat4(a, b, c, d)
+
+#ifdef __tilegx__
+
+/* TILE-Gx dependent, protection-level dependent SPRs. */
+
+#define SPR_INTERRUPT_MASK_K \
+ _concat4(SPR_INTERRUPT_MASK_, CONFIG_KERNEL_PL,,)
+#define SPR_INTERRUPT_MASK_SET_K \
+ _concat4(SPR_INTERRUPT_MASK_SET_, CONFIG_KERNEL_PL,,)
+#define SPR_INTERRUPT_MASK_RESET_K \
+ _concat4(SPR_INTERRUPT_MASK_RESET_, CONFIG_KERNEL_PL,,)
+#define SPR_INTERRUPT_VECTOR_BASE_K \
+ _concat4(SPR_INTERRUPT_VECTOR_BASE_, CONFIG_KERNEL_PL,,)
+
+#define SPR_IPI_MASK_K \
+ _concat4(SPR_IPI_MASK_, CONFIG_KERNEL_PL,,)
+#define SPR_IPI_MASK_RESET_K \
+ _concat4(SPR_IPI_MASK_RESET_, CONFIG_KERNEL_PL,,)
+#define SPR_IPI_MASK_SET_K \
+ _concat4(SPR_IPI_MASK_SET_, CONFIG_KERNEL_PL,,)
+#define SPR_IPI_EVENT_K \
+ _concat4(SPR_IPI_EVENT_, CONFIG_KERNEL_PL,,)
+#define SPR_IPI_EVENT_RESET_K \
+ _concat4(SPR_IPI_EVENT_RESET_, CONFIG_KERNEL_PL,,)
+#define SPR_IPI_EVENT_SET_K \
+ _concat4(SPR_IPI_EVENT_SET_, CONFIG_KERNEL_PL,,)
+#define INT_IPI_K \
+ _concat4(INT_IPI_, CONFIG_KERNEL_PL,,)
+
+#define SPR_SINGLE_STEP_CONTROL_K \
+ _concat4(SPR_SINGLE_STEP_CONTROL_, CONFIG_KERNEL_PL,,)
+#define SPR_SINGLE_STEP_EN_K_K \
+ _concat4(SPR_SINGLE_STEP_EN_, CONFIG_KERNEL_PL, _, CONFIG_KERNEL_PL)
+#define INT_SINGLE_STEP_K \
+ _concat4(INT_SINGLE_STEP_, CONFIG_KERNEL_PL,,)
+
+#else
+
+/* TILEPro dependent, protection-level dependent SPRs. */
+
+#define SPR_INTERRUPT_MASK_K_0 \
+ _concat4(SPR_INTERRUPT_MASK_, CONFIG_KERNEL_PL, _0,)
+#define SPR_INTERRUPT_MASK_K_1 \
+ _concat4(SPR_INTERRUPT_MASK_, CONFIG_KERNEL_PL, _1,)
+#define SPR_INTERRUPT_MASK_SET_K_0 \
+ _concat4(SPR_INTERRUPT_MASK_SET_, CONFIG_KERNEL_PL, _0,)
+#define SPR_INTERRUPT_MASK_SET_K_1 \
+ _concat4(SPR_INTERRUPT_MASK_SET_, CONFIG_KERNEL_PL, _1,)
+#define SPR_INTERRUPT_MASK_RESET_K_0 \
+ _concat4(SPR_INTERRUPT_MASK_RESET_, CONFIG_KERNEL_PL, _0,)
+#define SPR_INTERRUPT_MASK_RESET_K_1 \
+ _concat4(SPR_INTERRUPT_MASK_RESET_, CONFIG_KERNEL_PL, _1,)
+
+#endif
+
+/* Generic protection-level dependent SPRs. */
+
+#define SPR_SYSTEM_SAVE_K_0 \
+ _concat4(SPR_SYSTEM_SAVE_, CONFIG_KERNEL_PL, _0,)
+#define SPR_SYSTEM_SAVE_K_1 \
+ _concat4(SPR_SYSTEM_SAVE_, CONFIG_KERNEL_PL, _1,)
+#define SPR_SYSTEM_SAVE_K_2 \
+ _concat4(SPR_SYSTEM_SAVE_, CONFIG_KERNEL_PL, _2,)
+#define SPR_SYSTEM_SAVE_K_3 \
+ _concat4(SPR_SYSTEM_SAVE_, CONFIG_KERNEL_PL, _3,)
+#define SPR_EX_CONTEXT_K_0 \
+ _concat4(SPR_EX_CONTEXT_, CONFIG_KERNEL_PL, _0,)
+#define SPR_EX_CONTEXT_K_1 \
+ _concat4(SPR_EX_CONTEXT_, CONFIG_KERNEL_PL, _1,)
+#define SPR_INTCTRL_K_STATUS \
+ _concat4(SPR_INTCTRL_, CONFIG_KERNEL_PL, _STATUS,)
+#define INT_INTCTRL_K \
+ _concat4(INT_INTCTRL_, CONFIG_KERNEL_PL,,)
+
+#endif /* __KERNEL__ */
diff --git a/arch/tile/include/arch/spr_def_32.h b/arch/tile/include/arch/spr_def_32.h
new file mode 100644
index 00000000..bbc1f4c9
--- /dev/null
+++ b/arch/tile/include/arch/spr_def_32.h
@@ -0,0 +1,201 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef __DOXYGEN__
+
+#ifndef __ARCH_SPR_DEF_H__
+#define __ARCH_SPR_DEF_H__
+
+#define SPR_AUX_PERF_COUNT_0 0x6005
+#define SPR_AUX_PERF_COUNT_1 0x6006
+#define SPR_AUX_PERF_COUNT_CTL 0x6007
+#define SPR_AUX_PERF_COUNT_STS 0x6008
+#define SPR_CYCLE_HIGH 0x4e06
+#define SPR_CYCLE_LOW 0x4e07
+#define SPR_DMA_BYTE 0x3900
+#define SPR_DMA_CHUNK_SIZE 0x3901
+#define SPR_DMA_CTR 0x3902
+#define SPR_DMA_CTR__REQUEST_MASK 0x1
+#define SPR_DMA_CTR__SUSPEND_MASK 0x2
+#define SPR_DMA_DST_ADDR 0x3903
+#define SPR_DMA_DST_CHUNK_ADDR 0x3904
+#define SPR_DMA_SRC_ADDR 0x3905
+#define SPR_DMA_SRC_CHUNK_ADDR 0x3906
+#define SPR_DMA_STATUS__DONE_MASK 0x1
+#define SPR_DMA_STATUS__BUSY_MASK 0x2
+#define SPR_DMA_STATUS__RUNNING_MASK 0x10
+#define SPR_DMA_STRIDE 0x3907
+#define SPR_DMA_USER_STATUS 0x3908
+#define SPR_DONE 0x4e08
+#define SPR_EVENT_BEGIN 0x4e0d
+#define SPR_EVENT_END 0x4e0e
+#define SPR_EX_CONTEXT_0_0 0x4a05
+#define SPR_EX_CONTEXT_0_1 0x4a06
+#define SPR_EX_CONTEXT_0_1__PL_SHIFT 0
+#define SPR_EX_CONTEXT_0_1__PL_RMASK 0x3
+#define SPR_EX_CONTEXT_0_1__PL_MASK 0x3
+#define SPR_EX_CONTEXT_0_1__ICS_SHIFT 2
+#define SPR_EX_CONTEXT_0_1__ICS_RMASK 0x1
+#define SPR_EX_CONTEXT_0_1__ICS_MASK 0x4
+#define SPR_EX_CONTEXT_1_0 0x4805
+#define SPR_EX_CONTEXT_1_1 0x4806
+#define SPR_EX_CONTEXT_1_1__PL_SHIFT 0
+#define SPR_EX_CONTEXT_1_1__PL_RMASK 0x3
+#define SPR_EX_CONTEXT_1_1__PL_MASK 0x3
+#define SPR_EX_CONTEXT_1_1__ICS_SHIFT 2
+#define SPR_EX_CONTEXT_1_1__ICS_RMASK 0x1
+#define SPR_EX_CONTEXT_1_1__ICS_MASK 0x4
+#define SPR_EX_CONTEXT_2_0 0x4605
+#define SPR_EX_CONTEXT_2_1 0x4606
+#define SPR_EX_CONTEXT_2_1__PL_SHIFT 0
+#define SPR_EX_CONTEXT_2_1__PL_RMASK 0x3
+#define SPR_EX_CONTEXT_2_1__PL_MASK 0x3
+#define SPR_EX_CONTEXT_2_1__ICS_SHIFT 2
+#define SPR_EX_CONTEXT_2_1__ICS_RMASK 0x1
+#define SPR_EX_CONTEXT_2_1__ICS_MASK 0x4
+#define SPR_FAIL 0x4e09
+#define SPR_INTCTRL_0_STATUS 0x4a07
+#define SPR_INTCTRL_1_STATUS 0x4807
+#define SPR_INTCTRL_2_STATUS 0x4607
+#define SPR_INTERRUPT_CRITICAL_SECTION 0x4e0a
+#define SPR_INTERRUPT_MASK_0_0 0x4a08
+#define SPR_INTERRUPT_MASK_0_1 0x4a09
+#define SPR_INTERRUPT_MASK_1_0 0x4809
+#define SPR_INTERRUPT_MASK_1_1 0x480a
+#define SPR_INTERRUPT_MASK_2_0 0x4608
+#define SPR_INTERRUPT_MASK_2_1 0x4609
+#define SPR_INTERRUPT_MASK_RESET_0_0 0x4a0a
+#define SPR_INTERRUPT_MASK_RESET_0_1 0x4a0b
+#define SPR_INTERRUPT_MASK_RESET_1_0 0x480b
+#define SPR_INTERRUPT_MASK_RESET_1_1 0x480c
+#define SPR_INTERRUPT_MASK_RESET_2_0 0x460a
+#define SPR_INTERRUPT_MASK_RESET_2_1 0x460b
+#define SPR_INTERRUPT_MASK_SET_0_0 0x4a0c
+#define SPR_INTERRUPT_MASK_SET_0_1 0x4a0d
+#define SPR_INTERRUPT_MASK_SET_1_0 0x480d
+#define SPR_INTERRUPT_MASK_SET_1_1 0x480e
+#define SPR_INTERRUPT_MASK_SET_2_0 0x460c
+#define SPR_INTERRUPT_MASK_SET_2_1 0x460d
+#define SPR_MPL_DMA_CPL_SET_0 0x5800
+#define SPR_MPL_DMA_CPL_SET_1 0x5801
+#define SPR_MPL_DMA_CPL_SET_2 0x5802
+#define SPR_MPL_DMA_NOTIFY_SET_0 0x3800
+#define SPR_MPL_DMA_NOTIFY_SET_1 0x3801
+#define SPR_MPL_DMA_NOTIFY_SET_2 0x3802
+#define SPR_MPL_INTCTRL_0_SET_0 0x4a00
+#define SPR_MPL_INTCTRL_0_SET_1 0x4a01
+#define SPR_MPL_INTCTRL_0_SET_2 0x4a02
+#define SPR_MPL_INTCTRL_1_SET_0 0x4800
+#define SPR_MPL_INTCTRL_1_SET_1 0x4801
+#define SPR_MPL_INTCTRL_1_SET_2 0x4802
+#define SPR_MPL_INTCTRL_2_SET_0 0x4600
+#define SPR_MPL_INTCTRL_2_SET_1 0x4601
+#define SPR_MPL_INTCTRL_2_SET_2 0x4602
+#define SPR_MPL_SN_ACCESS_SET_0 0x0800
+#define SPR_MPL_SN_ACCESS_SET_1 0x0801
+#define SPR_MPL_SN_ACCESS_SET_2 0x0802
+#define SPR_MPL_SN_CPL_SET_0 0x5a00
+#define SPR_MPL_SN_CPL_SET_1 0x5a01
+#define SPR_MPL_SN_CPL_SET_2 0x5a02
+#define SPR_MPL_SN_FIREWALL_SET_0 0x2c00
+#define SPR_MPL_SN_FIREWALL_SET_1 0x2c01
+#define SPR_MPL_SN_FIREWALL_SET_2 0x2c02
+#define SPR_MPL_SN_NOTIFY_SET_0 0x2a00
+#define SPR_MPL_SN_NOTIFY_SET_1 0x2a01
+#define SPR_MPL_SN_NOTIFY_SET_2 0x2a02
+#define SPR_MPL_UDN_ACCESS_SET_0 0x0c00
+#define SPR_MPL_UDN_ACCESS_SET_1 0x0c01
+#define SPR_MPL_UDN_ACCESS_SET_2 0x0c02
+#define SPR_MPL_UDN_AVAIL_SET_0 0x4000
+#define SPR_MPL_UDN_AVAIL_SET_1 0x4001
+#define SPR_MPL_UDN_AVAIL_SET_2 0x4002
+#define SPR_MPL_UDN_CA_SET_0 0x3c00
+#define SPR_MPL_UDN_CA_SET_1 0x3c01
+#define SPR_MPL_UDN_CA_SET_2 0x3c02
+#define SPR_MPL_UDN_COMPLETE_SET_0 0x1400
+#define SPR_MPL_UDN_COMPLETE_SET_1 0x1401
+#define SPR_MPL_UDN_COMPLETE_SET_2 0x1402
+#define SPR_MPL_UDN_FIREWALL_SET_0 0x3000
+#define SPR_MPL_UDN_FIREWALL_SET_1 0x3001
+#define SPR_MPL_UDN_FIREWALL_SET_2 0x3002
+#define SPR_MPL_UDN_REFILL_SET_0 0x1000
+#define SPR_MPL_UDN_REFILL_SET_1 0x1001
+#define SPR_MPL_UDN_REFILL_SET_2 0x1002
+#define SPR_MPL_UDN_TIMER_SET_0 0x3600
+#define SPR_MPL_UDN_TIMER_SET_1 0x3601
+#define SPR_MPL_UDN_TIMER_SET_2 0x3602
+#define SPR_MPL_WORLD_ACCESS_SET_0 0x4e00
+#define SPR_MPL_WORLD_ACCESS_SET_1 0x4e01
+#define SPR_MPL_WORLD_ACCESS_SET_2 0x4e02
+#define SPR_PASS 0x4e0b
+#define SPR_PERF_COUNT_0 0x4205
+#define SPR_PERF_COUNT_1 0x4206
+#define SPR_PERF_COUNT_CTL 0x4207
+#define SPR_PERF_COUNT_DN_CTL 0x4210
+#define SPR_PERF_COUNT_STS 0x4208
+#define SPR_PROC_STATUS 0x4f00
+#define SPR_SIM_CONTROL 0x4e0c
+#define SPR_SNCTL 0x0805
+#define SPR_SNCTL__FRZFABRIC_MASK 0x1
+#define SPR_SNCTL__FRZPROC_MASK 0x2
+#define SPR_SNPC 0x080b
+#define SPR_SNSTATIC 0x080c
+#define SPR_SYSTEM_SAVE_0_0 0x4b00
+#define SPR_SYSTEM_SAVE_0_1 0x4b01
+#define SPR_SYSTEM_SAVE_0_2 0x4b02
+#define SPR_SYSTEM_SAVE_0_3 0x4b03
+#define SPR_SYSTEM_SAVE_1_0 0x4900
+#define SPR_SYSTEM_SAVE_1_1 0x4901
+#define SPR_SYSTEM_SAVE_1_2 0x4902
+#define SPR_SYSTEM_SAVE_1_3 0x4903
+#define SPR_SYSTEM_SAVE_2_0 0x4700
+#define SPR_SYSTEM_SAVE_2_1 0x4701
+#define SPR_SYSTEM_SAVE_2_2 0x4702
+#define SPR_SYSTEM_SAVE_2_3 0x4703
+#define SPR_TILE_COORD 0x4c17
+#define SPR_TILE_RTF_HWM 0x4e10
+#define SPR_TILE_TIMER_CONTROL 0x3205
+#define SPR_TILE_WRITE_PENDING 0x4e0f
+#define SPR_UDN_AVAIL_EN 0x4005
+#define SPR_UDN_CA_DATA 0x0d00
+#define SPR_UDN_DATA_AVAIL 0x0d03
+#define SPR_UDN_DEADLOCK_TIMEOUT 0x3606
+#define SPR_UDN_DEMUX_CA_COUNT 0x0c05
+#define SPR_UDN_DEMUX_COUNT_0 0x0c06
+#define SPR_UDN_DEMUX_COUNT_1 0x0c07
+#define SPR_UDN_DEMUX_COUNT_2 0x0c08
+#define SPR_UDN_DEMUX_COUNT_3 0x0c09
+#define SPR_UDN_DEMUX_CTL 0x0c0a
+#define SPR_UDN_DEMUX_QUEUE_SEL 0x0c0c
+#define SPR_UDN_DEMUX_STATUS 0x0c0d
+#define SPR_UDN_DEMUX_WRITE_FIFO 0x0c0e
+#define SPR_UDN_DIRECTION_PROTECT 0x3005
+#define SPR_UDN_REFILL_EN 0x1005
+#define SPR_UDN_SP_FIFO_DATA 0x0c11
+#define SPR_UDN_SP_FIFO_SEL 0x0c12
+#define SPR_UDN_SP_FREEZE 0x0c13
+#define SPR_UDN_SP_FREEZE__SP_FRZ_MASK 0x1
+#define SPR_UDN_SP_FREEZE__DEMUX_FRZ_MASK 0x2
+#define SPR_UDN_SP_FREEZE__NON_DEST_EXT_MASK 0x4
+#define SPR_UDN_SP_STATE 0x0c14
+#define SPR_UDN_TAG_0 0x0c15
+#define SPR_UDN_TAG_1 0x0c16
+#define SPR_UDN_TAG_2 0x0c17
+#define SPR_UDN_TAG_3 0x0c18
+#define SPR_UDN_TAG_VALID 0x0c19
+#define SPR_UDN_TILE_COORD 0x0c1a
+
+#endif /* !defined(__ARCH_SPR_DEF_H__) */
+
+#endif /* !defined(__DOXYGEN__) */
diff --git a/arch/tile/include/arch/spr_def_64.h b/arch/tile/include/arch/spr_def_64.h
new file mode 100644
index 00000000..cd3e5f95
--- /dev/null
+++ b/arch/tile/include/arch/spr_def_64.h
@@ -0,0 +1,173 @@
+/*
+ * Copyright 2011 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef __DOXYGEN__
+
+#ifndef __ARCH_SPR_DEF_H__
+#define __ARCH_SPR_DEF_H__
+
+#define SPR_AUX_PERF_COUNT_0 0x2105
+#define SPR_AUX_PERF_COUNT_1 0x2106
+#define SPR_AUX_PERF_COUNT_CTL 0x2107
+#define SPR_AUX_PERF_COUNT_STS 0x2108
+#define SPR_CMPEXCH_VALUE 0x2780
+#define SPR_CYCLE 0x2781
+#define SPR_DONE 0x2705
+#define SPR_DSTREAM_PF 0x2706
+#define SPR_EVENT_BEGIN 0x2782
+#define SPR_EVENT_END 0x2783
+#define SPR_EX_CONTEXT_0_0 0x2580
+#define SPR_EX_CONTEXT_0_1 0x2581
+#define SPR_EX_CONTEXT_0_1__PL_SHIFT 0
+#define SPR_EX_CONTEXT_0_1__PL_RMASK 0x3
+#define SPR_EX_CONTEXT_0_1__PL_MASK 0x3
+#define SPR_EX_CONTEXT_0_1__ICS_SHIFT 2
+#define SPR_EX_CONTEXT_0_1__ICS_RMASK 0x1
+#define SPR_EX_CONTEXT_0_1__ICS_MASK 0x4
+#define SPR_EX_CONTEXT_1_0 0x2480
+#define SPR_EX_CONTEXT_1_1 0x2481
+#define SPR_EX_CONTEXT_1_1__PL_SHIFT 0
+#define SPR_EX_CONTEXT_1_1__PL_RMASK 0x3
+#define SPR_EX_CONTEXT_1_1__PL_MASK 0x3
+#define SPR_EX_CONTEXT_1_1__ICS_SHIFT 2
+#define SPR_EX_CONTEXT_1_1__ICS_RMASK 0x1
+#define SPR_EX_CONTEXT_1_1__ICS_MASK 0x4
+#define SPR_EX_CONTEXT_2_0 0x2380
+#define SPR_EX_CONTEXT_2_1 0x2381
+#define SPR_EX_CONTEXT_2_1__PL_SHIFT 0
+#define SPR_EX_CONTEXT_2_1__PL_RMASK 0x3
+#define SPR_EX_CONTEXT_2_1__PL_MASK 0x3
+#define SPR_EX_CONTEXT_2_1__ICS_SHIFT 2
+#define SPR_EX_CONTEXT_2_1__ICS_RMASK 0x1
+#define SPR_EX_CONTEXT_2_1__ICS_MASK 0x4
+#define SPR_FAIL 0x2707
+#define SPR_ILL_TRANS_REASON__I_STREAM_VA_RMASK 0x1
+#define SPR_INTCTRL_0_STATUS 0x2505
+#define SPR_INTCTRL_1_STATUS 0x2405
+#define SPR_INTCTRL_2_STATUS 0x2305
+#define SPR_INTERRUPT_CRITICAL_SECTION 0x2708
+#define SPR_INTERRUPT_MASK_0 0x2506
+#define SPR_INTERRUPT_MASK_1 0x2406
+#define SPR_INTERRUPT_MASK_2 0x2306
+#define SPR_INTERRUPT_MASK_RESET_0 0x2507
+#define SPR_INTERRUPT_MASK_RESET_1 0x2407
+#define SPR_INTERRUPT_MASK_RESET_2 0x2307
+#define SPR_INTERRUPT_MASK_SET_0 0x2508
+#define SPR_INTERRUPT_MASK_SET_1 0x2408
+#define SPR_INTERRUPT_MASK_SET_2 0x2308
+#define SPR_INTERRUPT_VECTOR_BASE_0 0x2509
+#define SPR_INTERRUPT_VECTOR_BASE_1 0x2409
+#define SPR_INTERRUPT_VECTOR_BASE_2 0x2309
+#define SPR_INTERRUPT_VECTOR_BASE_3 0x2209
+#define SPR_IPI_EVENT_0 0x1f05
+#define SPR_IPI_EVENT_1 0x1e05
+#define SPR_IPI_EVENT_2 0x1d05
+#define SPR_IPI_EVENT_RESET_0 0x1f06
+#define SPR_IPI_EVENT_RESET_1 0x1e06
+#define SPR_IPI_EVENT_RESET_2 0x1d06
+#define SPR_IPI_EVENT_SET_0 0x1f07
+#define SPR_IPI_EVENT_SET_1 0x1e07
+#define SPR_IPI_EVENT_SET_2 0x1d07
+#define SPR_IPI_MASK_0 0x1f08
+#define SPR_IPI_MASK_1 0x1e08
+#define SPR_IPI_MASK_2 0x1d08
+#define SPR_IPI_MASK_RESET_0 0x1f09
+#define SPR_IPI_MASK_RESET_1 0x1e09
+#define SPR_IPI_MASK_RESET_2 0x1d09
+#define SPR_IPI_MASK_SET_0 0x1f0a
+#define SPR_IPI_MASK_SET_1 0x1e0a
+#define SPR_IPI_MASK_SET_2 0x1d0a
+#define SPR_MPL_AUX_TILE_TIMER_SET_0 0x1700
+#define SPR_MPL_AUX_TILE_TIMER_SET_1 0x1701
+#define SPR_MPL_AUX_TILE_TIMER_SET_2 0x1702
+#define SPR_MPL_INTCTRL_0_SET_0 0x2500
+#define SPR_MPL_INTCTRL_0_SET_1 0x2501
+#define SPR_MPL_INTCTRL_0_SET_2 0x2502
+#define SPR_MPL_INTCTRL_1_SET_0 0x2400
+#define SPR_MPL_INTCTRL_1_SET_1 0x2401
+#define SPR_MPL_INTCTRL_1_SET_2 0x2402
+#define SPR_MPL_INTCTRL_2_SET_0 0x2300
+#define SPR_MPL_INTCTRL_2_SET_1 0x2301
+#define SPR_MPL_INTCTRL_2_SET_2 0x2302
+#define SPR_MPL_UDN_ACCESS_SET_0 0x0b00
+#define SPR_MPL_UDN_ACCESS_SET_1 0x0b01
+#define SPR_MPL_UDN_ACCESS_SET_2 0x0b02
+#define SPR_MPL_UDN_AVAIL_SET_0 0x1b00
+#define SPR_MPL_UDN_AVAIL_SET_1 0x1b01
+#define SPR_MPL_UDN_AVAIL_SET_2 0x1b02
+#define SPR_MPL_UDN_COMPLETE_SET_0 0x0600
+#define SPR_MPL_UDN_COMPLETE_SET_1 0x0601
+#define SPR_MPL_UDN_COMPLETE_SET_2 0x0602
+#define SPR_MPL_UDN_FIREWALL_SET_0 0x1500
+#define SPR_MPL_UDN_FIREWALL_SET_1 0x1501
+#define SPR_MPL_UDN_FIREWALL_SET_2 0x1502
+#define SPR_MPL_UDN_TIMER_SET_0 0x1900
+#define SPR_MPL_UDN_TIMER_SET_1 0x1901
+#define SPR_MPL_UDN_TIMER_SET_2 0x1902
+#define SPR_MPL_WORLD_ACCESS_SET_0 0x2700
+#define SPR_MPL_WORLD_ACCESS_SET_1 0x2701
+#define SPR_MPL_WORLD_ACCESS_SET_2 0x2702
+#define SPR_PASS 0x2709
+#define SPR_PERF_COUNT_0 0x2005
+#define SPR_PERF_COUNT_1 0x2006
+#define SPR_PERF_COUNT_CTL 0x2007
+#define SPR_PERF_COUNT_DN_CTL 0x2008
+#define SPR_PERF_COUNT_STS 0x2009
+#define SPR_PROC_STATUS 0x2784
+#define SPR_SIM_CONTROL 0x2785
+#define SPR_SINGLE_STEP_CONTROL_0 0x0405
+#define SPR_SINGLE_STEP_CONTROL_0__CANCELED_MASK 0x1
+#define SPR_SINGLE_STEP_CONTROL_0__INHIBIT_MASK 0x2
+#define SPR_SINGLE_STEP_CONTROL_1 0x0305
+#define SPR_SINGLE_STEP_CONTROL_1__CANCELED_MASK 0x1
+#define SPR_SINGLE_STEP_CONTROL_1__INHIBIT_MASK 0x2
+#define SPR_SINGLE_STEP_CONTROL_2 0x0205
+#define SPR_SINGLE_STEP_CONTROL_2__CANCELED_MASK 0x1
+#define SPR_SINGLE_STEP_CONTROL_2__INHIBIT_MASK 0x2
+#define SPR_SINGLE_STEP_EN_0_0 0x250a
+#define SPR_SINGLE_STEP_EN_0_1 0x240a
+#define SPR_SINGLE_STEP_EN_0_2 0x230a
+#define SPR_SINGLE_STEP_EN_1_0 0x250b
+#define SPR_SINGLE_STEP_EN_1_1 0x240b
+#define SPR_SINGLE_STEP_EN_1_2 0x230b
+#define SPR_SINGLE_STEP_EN_2_0 0x250c
+#define SPR_SINGLE_STEP_EN_2_1 0x240c
+#define SPR_SINGLE_STEP_EN_2_2 0x230c
+#define SPR_SYSTEM_SAVE_0_0 0x2582
+#define SPR_SYSTEM_SAVE_0_1 0x2583
+#define SPR_SYSTEM_SAVE_0_2 0x2584
+#define SPR_SYSTEM_SAVE_0_3 0x2585
+#define SPR_SYSTEM_SAVE_1_0 0x2482
+#define SPR_SYSTEM_SAVE_1_1 0x2483
+#define SPR_SYSTEM_SAVE_1_2 0x2484
+#define SPR_SYSTEM_SAVE_1_3 0x2485
+#define SPR_SYSTEM_SAVE_2_0 0x2382
+#define SPR_SYSTEM_SAVE_2_1 0x2383
+#define SPR_SYSTEM_SAVE_2_2 0x2384
+#define SPR_SYSTEM_SAVE_2_3 0x2385
+#define SPR_TILE_COORD 0x270b
+#define SPR_TILE_RTF_HWM 0x270c
+#define SPR_TILE_TIMER_CONTROL 0x1605
+#define SPR_UDN_AVAIL_EN 0x1b05
+#define SPR_UDN_DATA_AVAIL 0x0b80
+#define SPR_UDN_DEADLOCK_TIMEOUT 0x1906
+#define SPR_UDN_DEMUX_COUNT_0 0x0b05
+#define SPR_UDN_DEMUX_COUNT_1 0x0b06
+#define SPR_UDN_DEMUX_COUNT_2 0x0b07
+#define SPR_UDN_DEMUX_COUNT_3 0x0b08
+#define SPR_UDN_DIRECTION_PROTECT 0x1505
+
+#endif /* !defined(__ARCH_SPR_DEF_H__) */
+
+#endif /* !defined(__DOXYGEN__) */
diff --git a/arch/tile/include/asm/Kbuild b/arch/tile/include/asm/Kbuild
new file mode 100644
index 00000000..0bb42642
--- /dev/null
+++ b/arch/tile/include/asm/Kbuild
@@ -0,0 +1,44 @@
+include include/asm-generic/Kbuild.asm
+
+header-y += ../arch/
+
+header-y += ucontext.h
+header-y += hardwall.h
+
+generic-y += bug.h
+generic-y += bugs.h
+generic-y += cputime.h
+generic-y += device.h
+generic-y += div64.h
+generic-y += emergency-restart.h
+generic-y += errno.h
+generic-y += fb.h
+generic-y += fcntl.h
+generic-y += ioctl.h
+generic-y += ioctls.h
+generic-y += ipc.h
+generic-y += ipcbuf.h
+generic-y += irq_regs.h
+generic-y += kdebug.h
+generic-y += local.h
+generic-y += module.h
+generic-y += msgbuf.h
+generic-y += mutex.h
+generic-y += param.h
+generic-y += parport.h
+generic-y += poll.h
+generic-y += posix_types.h
+generic-y += resource.h
+generic-y += scatterlist.h
+generic-y += sembuf.h
+generic-y += serial.h
+generic-y += shmbuf.h
+generic-y += shmparam.h
+generic-y += socket.h
+generic-y += sockios.h
+generic-y += statfs.h
+generic-y += termbits.h
+generic-y += termios.h
+generic-y += types.h
+generic-y += ucontext.h
+generic-y += xor.h
diff --git a/arch/tile/include/asm/asm-offsets.h b/arch/tile/include/asm/asm-offsets.h
new file mode 100644
index 00000000..d370ee36
--- /dev/null
+++ b/arch/tile/include/asm/asm-offsets.h
@@ -0,0 +1 @@
+#include <generated/asm-offsets.h>
diff --git a/arch/tile/include/asm/atomic.h b/arch/tile/include/asm/atomic.h
new file mode 100644
index 00000000..f2461429
--- /dev/null
+++ b/arch/tile/include/asm/atomic.h
@@ -0,0 +1,134 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ *
+ * Atomic primitives.
+ */
+
+#ifndef _ASM_TILE_ATOMIC_H
+#define _ASM_TILE_ATOMIC_H
+
+#include <asm/cmpxchg.h>
+
+#ifndef __ASSEMBLY__
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+
+#define ATOMIC_INIT(i) { (i) }
+
+/**
+ * atomic_read - read atomic variable
+ * @v: pointer of type atomic_t
+ *
+ * Atomically reads the value of @v.
+ */
+static inline int atomic_read(const atomic_t *v)
+{
+ return ACCESS_ONCE(v->counter);
+}
+
+/**
+ * atomic_sub_return - subtract integer and return
+ * @v: pointer of type atomic_t
+ * @i: integer value to subtract
+ *
+ * Atomically subtracts @i from @v and returns @v - @i
+ */
+#define atomic_sub_return(i, v) atomic_add_return((int)(-(i)), (v))
+
+/**
+ * atomic_sub - subtract integer from atomic variable
+ * @i: integer value to subtract
+ * @v: pointer of type atomic_t
+ *
+ * Atomically subtracts @i from @v.
+ */
+#define atomic_sub(i, v) atomic_add((int)(-(i)), (v))
+
+/**
+ * atomic_sub_and_test - subtract value from variable and test result
+ * @i: integer value to subtract
+ * @v: pointer of type atomic_t
+ *
+ * Atomically subtracts @i from @v and returns true if the result is
+ * zero, or false for all other cases.
+ */
+#define atomic_sub_and_test(i, v) (atomic_sub_return((i), (v)) == 0)
+
+/**
+ * atomic_inc_return - increment memory and return
+ * @v: pointer of type atomic_t
+ *
+ * Atomically increments @v by 1 and returns the new value.
+ */
+#define atomic_inc_return(v) atomic_add_return(1, (v))
+
+/**
+ * atomic_dec_return - decrement memory and return
+ * @v: pointer of type atomic_t
+ *
+ * Atomically decrements @v by 1 and returns the new value.
+ */
+#define atomic_dec_return(v) atomic_sub_return(1, (v))
+
+/**
+ * atomic_inc - increment atomic variable
+ * @v: pointer of type atomic_t
+ *
+ * Atomically increments @v by 1.
+ */
+#define atomic_inc(v) atomic_add(1, (v))
+
+/**
+ * atomic_dec - decrement atomic variable
+ * @v: pointer of type atomic_t
+ *
+ * Atomically decrements @v by 1.
+ */
+#define atomic_dec(v) atomic_sub(1, (v))
+
+/**
+ * atomic_dec_and_test - decrement and test
+ * @v: pointer of type atomic_t
+ *
+ * Atomically decrements @v by 1 and returns true if the result is 0.
+ */
+#define atomic_dec_and_test(v) (atomic_dec_return(v) == 0)
+
+/**
+ * atomic_inc_and_test - increment and test
+ * @v: pointer of type atomic_t
+ *
+ * Atomically increments @v by 1 and returns true if the result is 0.
+ */
+#define atomic_inc_and_test(v) (atomic_inc_return(v) == 0)
+
+/**
+ * atomic_add_negative - add and test if negative
+ * @v: pointer of type atomic_t
+ * @i: integer value to add
+ *
+ * Atomically adds @i to @v and returns true if the result is
+ * negative, or false when result is greater than or equal to zero.
+ */
+#define atomic_add_negative(i, v) (atomic_add_return((i), (v)) < 0)
+
+#endif /* __ASSEMBLY__ */
+
+#ifndef __tilegx__
+#include <asm/atomic_32.h>
+#else
+#include <asm/atomic_64.h>
+#endif
+
+#endif /* _ASM_TILE_ATOMIC_H */
diff --git a/arch/tile/include/asm/atomic_32.h b/arch/tile/include/asm/atomic_32.h
new file mode 100644
index 00000000..54d1da82
--- /dev/null
+++ b/arch/tile/include/asm/atomic_32.h
@@ -0,0 +1,324 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ *
+ * Do not include directly; use <linux/atomic.h>.
+ */
+
+#ifndef _ASM_TILE_ATOMIC_32_H
+#define _ASM_TILE_ATOMIC_32_H
+
+#include <asm/barrier.h>
+#include <arch/chip.h>
+
+#ifndef __ASSEMBLY__
+
+/* Tile-specific routines to support <linux/atomic.h>. */
+int _atomic_xchg(atomic_t *v, int n);
+int _atomic_xchg_add(atomic_t *v, int i);
+int _atomic_xchg_add_unless(atomic_t *v, int a, int u);
+int _atomic_cmpxchg(atomic_t *v, int o, int n);
+
+/**
+ * atomic_xchg - atomically exchange contents of memory with a new value
+ * @v: pointer of type atomic_t
+ * @i: integer value to store in memory
+ *
+ * Atomically sets @v to @i and returns old @v
+ */
+static inline int atomic_xchg(atomic_t *v, int n)
+{
+ smp_mb(); /* barrier for proper semantics */
+ return _atomic_xchg(v, n);
+}
+
+/**
+ * atomic_cmpxchg - atomically exchange contents of memory if it matches
+ * @v: pointer of type atomic_t
+ * @o: old value that memory should have
+ * @n: new value to write to memory if it matches
+ *
+ * Atomically checks if @v holds @o and replaces it with @n if so.
+ * Returns the old value at @v.
+ */
+static inline int atomic_cmpxchg(atomic_t *v, int o, int n)
+{
+ smp_mb(); /* barrier for proper semantics */
+ return _atomic_cmpxchg(v, o, n);
+}
+
+/**
+ * atomic_add - add integer to atomic variable
+ * @i: integer value to add
+ * @v: pointer of type atomic_t
+ *
+ * Atomically adds @i to @v.
+ */
+static inline void atomic_add(int i, atomic_t *v)
+{
+ _atomic_xchg_add(v, i);
+}
+
+/**
+ * atomic_add_return - add integer and return
+ * @v: pointer of type atomic_t
+ * @i: integer value to add
+ *
+ * Atomically adds @i to @v and returns @i + @v
+ */
+static inline int atomic_add_return(int i, atomic_t *v)
+{
+ smp_mb(); /* barrier for proper semantics */
+ return _atomic_xchg_add(v, i) + i;
+}
+
+/**
+ * __atomic_add_unless - add unless the number is already a given value
+ * @v: pointer of type atomic_t
+ * @a: the amount to add to v...
+ * @u: ...unless v is equal to u.
+ *
+ * Atomically adds @a to @v, so long as @v was not already @u.
+ * Returns the old value of @v.
+ */
+static inline int __atomic_add_unless(atomic_t *v, int a, int u)
+{
+ smp_mb(); /* barrier for proper semantics */
+ return _atomic_xchg_add_unless(v, a, u);
+}
+
+/**
+ * atomic_set - set atomic variable
+ * @v: pointer of type atomic_t
+ * @i: required value
+ *
+ * Atomically sets the value of @v to @i.
+ *
+ * atomic_set() can't be just a raw store, since it would be lost if it
+ * fell between the load and store of one of the other atomic ops.
+ */
+static inline void atomic_set(atomic_t *v, int n)
+{
+ _atomic_xchg(v, n);
+}
+
+/* A 64bit atomic type */
+
+typedef struct {
+ u64 __aligned(8) counter;
+} atomic64_t;
+
+#define ATOMIC64_INIT(val) { (val) }
+
+u64 _atomic64_xchg(atomic64_t *v, u64 n);
+u64 _atomic64_xchg_add(atomic64_t *v, u64 i);
+u64 _atomic64_xchg_add_unless(atomic64_t *v, u64 a, u64 u);
+u64 _atomic64_cmpxchg(atomic64_t *v, u64 o, u64 n);
+
+/**
+ * atomic64_read - read atomic variable
+ * @v: pointer of type atomic64_t
+ *
+ * Atomically reads the value of @v.
+ */
+static inline u64 atomic64_read(const atomic64_t *v)
+{
+ /*
+ * Requires an atomic op to read both 32-bit parts consistently.
+ * Casting away const is safe since the atomic support routines
+ * do not write to memory if the value has not been modified.
+ */
+ return _atomic64_xchg_add((atomic64_t *)v, 0);
+}
+
+/**
+ * atomic64_xchg - atomically exchange contents of memory with a new value
+ * @v: pointer of type atomic64_t
+ * @i: integer value to store in memory
+ *
+ * Atomically sets @v to @i and returns old @v
+ */
+static inline u64 atomic64_xchg(atomic64_t *v, u64 n)
+{
+ smp_mb(); /* barrier for proper semantics */
+ return _atomic64_xchg(v, n);
+}
+
+/**
+ * atomic64_cmpxchg - atomically exchange contents of memory if it matches
+ * @v: pointer of type atomic64_t
+ * @o: old value that memory should have
+ * @n: new value to write to memory if it matches
+ *
+ * Atomically checks if @v holds @o and replaces it with @n if so.
+ * Returns the old value at @v.
+ */
+static inline u64 atomic64_cmpxchg(atomic64_t *v, u64 o, u64 n)
+{
+ smp_mb(); /* barrier for proper semantics */
+ return _atomic64_cmpxchg(v, o, n);
+}
+
+/**
+ * atomic64_add - add integer to atomic variable
+ * @i: integer value to add
+ * @v: pointer of type atomic64_t
+ *
+ * Atomically adds @i to @v.
+ */
+static inline void atomic64_add(u64 i, atomic64_t *v)
+{
+ _atomic64_xchg_add(v, i);
+}
+
+/**
+ * atomic64_add_return - add integer and return
+ * @v: pointer of type atomic64_t
+ * @i: integer value to add
+ *
+ * Atomically adds @i to @v and returns @i + @v
+ */
+static inline u64 atomic64_add_return(u64 i, atomic64_t *v)
+{
+ smp_mb(); /* barrier for proper semantics */
+ return _atomic64_xchg_add(v, i) + i;
+}
+
+/**
+ * atomic64_add_unless - add unless the number is already a given value
+ * @v: pointer of type atomic64_t
+ * @a: the amount to add to v...
+ * @u: ...unless v is equal to u.
+ *
+ * Atomically adds @a to @v, so long as @v was not already @u.
+ * Returns non-zero if @v was not @u, and zero otherwise.
+ */
+static inline u64 atomic64_add_unless(atomic64_t *v, u64 a, u64 u)
+{
+ smp_mb(); /* barrier for proper semantics */
+ return _atomic64_xchg_add_unless(v, a, u) != u;
+}
+
+/**
+ * atomic64_set - set atomic variable
+ * @v: pointer of type atomic64_t
+ * @i: required value
+ *
+ * Atomically sets the value of @v to @i.
+ *
+ * atomic64_set() can't be just a raw store, since it would be lost if it
+ * fell between the load and store of one of the other atomic ops.
+ */
+static inline void atomic64_set(atomic64_t *v, u64 n)
+{
+ _atomic64_xchg(v, n);
+}
+
+#define atomic64_add_negative(a, v) (atomic64_add_return((a), (v)) < 0)
+#define atomic64_inc(v) atomic64_add(1LL, (v))
+#define atomic64_inc_return(v) atomic64_add_return(1LL, (v))
+#define atomic64_inc_and_test(v) (atomic64_inc_return(v) == 0)
+#define atomic64_sub_return(i, v) atomic64_add_return(-(i), (v))
+#define atomic64_sub_and_test(a, v) (atomic64_sub_return((a), (v)) == 0)
+#define atomic64_sub(i, v) atomic64_add(-(i), (v))
+#define atomic64_dec(v) atomic64_sub(1LL, (v))
+#define atomic64_dec_return(v) atomic64_sub_return(1LL, (v))
+#define atomic64_dec_and_test(v) (atomic64_dec_return((v)) == 0)
+#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1LL, 0LL)
+
+/*
+ * We need to barrier before modifying the word, since the _atomic_xxx()
+ * routines just tns the lock and then read/modify/write of the word.
+ * But after the word is updated, the routine issues an "mf" before returning,
+ * and since it's a function call, we don't even need a compiler barrier.
+ */
+#define smp_mb__before_atomic_dec() smp_mb()
+#define smp_mb__before_atomic_inc() smp_mb()
+#define smp_mb__after_atomic_dec() do { } while (0)
+#define smp_mb__after_atomic_inc() do { } while (0)
+
+#endif /* !__ASSEMBLY__ */
+
+/*
+ * Internal definitions only beyond this point.
+ */
+
+#define ATOMIC_LOCKS_FOUND_VIA_TABLE() \
+ (!CHIP_HAS_CBOX_HOME_MAP() && defined(CONFIG_SMP))
+
+#if ATOMIC_LOCKS_FOUND_VIA_TABLE()
+
+/* Number of entries in atomic_lock_ptr[]. */
+#define ATOMIC_HASH_L1_SHIFT 6
+#define ATOMIC_HASH_L1_SIZE (1 << ATOMIC_HASH_L1_SHIFT)
+
+/* Number of locks in each struct pointed to by atomic_lock_ptr[]. */
+#define ATOMIC_HASH_L2_SHIFT (CHIP_L2_LOG_LINE_SIZE() - 2)
+#define ATOMIC_HASH_L2_SIZE (1 << ATOMIC_HASH_L2_SHIFT)
+
+#else /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */
+
+/*
+ * Number of atomic locks in atomic_locks[]. Must be a power of two.
+ * There is no reason for more than PAGE_SIZE / 8 entries, since that
+ * is the maximum number of pointer bits we can use to index this.
+ * And we cannot have more than PAGE_SIZE / 4, since this has to
+ * fit on a single page and each entry takes 4 bytes.
+ */
+#define ATOMIC_HASH_SHIFT (PAGE_SHIFT - 3)
+#define ATOMIC_HASH_SIZE (1 << ATOMIC_HASH_SHIFT)
+
+#ifndef __ASSEMBLY__
+extern int atomic_locks[];
+#endif
+
+#endif /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */
+
+/*
+ * All the code that may fault while holding an atomic lock must
+ * place the pointer to the lock in ATOMIC_LOCK_REG so the fault code
+ * can correctly release and reacquire the lock. Note that we
+ * mention the register number in a comment in "lib/atomic_asm.S" to help
+ * assembly coders from using this register by mistake, so if it
+ * is changed here, change that comment as well.
+ */
+#define ATOMIC_LOCK_REG 20
+#define ATOMIC_LOCK_REG_NAME r20
+
+#ifndef __ASSEMBLY__
+/* Called from setup to initialize a hash table to point to per_cpu locks. */
+void __init_atomic_per_cpu(void);
+
+#ifdef CONFIG_SMP
+/* Support releasing the atomic lock in do_page_fault_ics(). */
+void __atomic_fault_unlock(int *lock_ptr);
+#endif
+
+/* Private helper routines in lib/atomic_asm_32.S */
+extern struct __get_user __atomic_cmpxchg(volatile int *p,
+ int *lock, int o, int n);
+extern struct __get_user __atomic_xchg(volatile int *p, int *lock, int n);
+extern struct __get_user __atomic_xchg_add(volatile int *p, int *lock, int n);
+extern struct __get_user __atomic_xchg_add_unless(volatile int *p,
+ int *lock, int o, int n);
+extern struct __get_user __atomic_or(volatile int *p, int *lock, int n);
+extern struct __get_user __atomic_andn(volatile int *p, int *lock, int n);
+extern struct __get_user __atomic_xor(volatile int *p, int *lock, int n);
+extern u64 __atomic64_cmpxchg(volatile u64 *p, int *lock, u64 o, u64 n);
+extern u64 __atomic64_xchg(volatile u64 *p, int *lock, u64 n);
+extern u64 __atomic64_xchg_add(volatile u64 *p, int *lock, u64 n);
+extern u64 __atomic64_xchg_add_unless(volatile u64 *p,
+ int *lock, u64 o, u64 n);
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* _ASM_TILE_ATOMIC_32_H */
diff --git a/arch/tile/include/asm/atomic_64.h b/arch/tile/include/asm/atomic_64.h
new file mode 100644
index 00000000..f4500c68
--- /dev/null
+++ b/arch/tile/include/asm/atomic_64.h
@@ -0,0 +1,157 @@
+/*
+ * Copyright 2011 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ *
+ * Do not include directly; use <linux/atomic.h>.
+ */
+
+#ifndef _ASM_TILE_ATOMIC_64_H
+#define _ASM_TILE_ATOMIC_64_H
+
+#ifndef __ASSEMBLY__
+
+#include <asm/barrier.h>
+#include <arch/spr_def.h>
+
+/* First, the 32-bit atomic ops that are "real" on our 64-bit platform. */
+
+#define atomic_set(v, i) ((v)->counter = (i))
+
+/*
+ * The smp_mb() operations throughout are to support the fact that
+ * Linux requires memory barriers before and after the operation,
+ * on any routine which updates memory and returns a value.
+ */
+
+static inline int atomic_cmpxchg(atomic_t *v, int o, int n)
+{
+ int val;
+ __insn_mtspr(SPR_CMPEXCH_VALUE, o);
+ smp_mb(); /* barrier for proper semantics */
+ val = __insn_cmpexch4((void *)&v->counter, n);
+ smp_mb(); /* barrier for proper semantics */
+ return val;
+}
+
+static inline int atomic_xchg(atomic_t *v, int n)
+{
+ int val;
+ smp_mb(); /* barrier for proper semantics */
+ val = __insn_exch4((void *)&v->counter, n);
+ smp_mb(); /* barrier for proper semantics */
+ return val;
+}
+
+static inline void atomic_add(int i, atomic_t *v)
+{
+ __insn_fetchadd4((void *)&v->counter, i);
+}
+
+static inline int atomic_add_return(int i, atomic_t *v)
+{
+ int val;
+ smp_mb(); /* barrier for proper semantics */
+ val = __insn_fetchadd4((void *)&v->counter, i) + i;
+ barrier(); /* the "+ i" above will wait on memory */
+ return val;
+}
+
+static inline int __atomic_add_unless(atomic_t *v, int a, int u)
+{
+ int guess, oldval = v->counter;
+ do {
+ if (oldval == u)
+ break;
+ guess = oldval;
+ oldval = atomic_cmpxchg(v, guess, guess + a);
+ } while (guess != oldval);
+ return oldval;
+}
+
+/* Now the true 64-bit operations. */
+
+#define ATOMIC64_INIT(i) { (i) }
+
+#define atomic64_read(v) ((v)->counter)
+#define atomic64_set(v, i) ((v)->counter = (i))
+
+static inline long atomic64_cmpxchg(atomic64_t *v, long o, long n)
+{
+ long val;
+ smp_mb(); /* barrier for proper semantics */
+ __insn_mtspr(SPR_CMPEXCH_VALUE, o);
+ val = __insn_cmpexch((void *)&v->counter, n);
+ smp_mb(); /* barrier for proper semantics */
+ return val;
+}
+
+static inline long atomic64_xchg(atomic64_t *v, long n)
+{
+ long val;
+ smp_mb(); /* barrier for proper semantics */
+ val = __insn_exch((void *)&v->counter, n);
+ smp_mb(); /* barrier for proper semantics */
+ return val;
+}
+
+static inline void atomic64_add(long i, atomic64_t *v)
+{
+ __insn_fetchadd((void *)&v->counter, i);
+}
+
+static inline long atomic64_add_return(long i, atomic64_t *v)
+{
+ int val;
+ smp_mb(); /* barrier for proper semantics */
+ val = __insn_fetchadd((void *)&v->counter, i) + i;
+ barrier(); /* the "+ i" above will wait on memory */
+ return val;
+}
+
+static inline long atomic64_add_unless(atomic64_t *v, long a, long u)
+{
+ long guess, oldval = v->counter;
+ do {
+ if (oldval == u)
+ break;
+ guess = oldval;
+ oldval = atomic64_cmpxchg(v, guess, guess + a);
+ } while (guess != oldval);
+ return oldval != u;
+}
+
+#define atomic64_sub_return(i, v) atomic64_add_return(-(i), (v))
+#define atomic64_sub(i, v) atomic64_add(-(i), (v))
+#define atomic64_inc_return(v) atomic64_add_return(1, (v))
+#define atomic64_dec_return(v) atomic64_sub_return(1, (v))
+#define atomic64_inc(v) atomic64_add(1, (v))
+#define atomic64_dec(v) atomic64_sub(1, (v))
+
+#define atomic64_inc_and_test(v) (atomic64_inc_return(v) == 0)
+#define atomic64_dec_and_test(v) (atomic64_dec_return(v) == 0)
+#define atomic64_sub_and_test(i, v) (atomic64_sub_return((i), (v)) == 0)
+#define atomic64_add_negative(i, v) (atomic64_add_return((i), (v)) < 0)
+
+#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0)
+
+/* Atomic dec and inc don't implement barrier, so provide them if needed. */
+#define smp_mb__before_atomic_dec() smp_mb()
+#define smp_mb__after_atomic_dec() smp_mb()
+#define smp_mb__before_atomic_inc() smp_mb()
+#define smp_mb__after_atomic_inc() smp_mb()
+
+/* Define this to indicate that cmpxchg is an efficient operation. */
+#define __HAVE_ARCH_CMPXCHG
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* _ASM_TILE_ATOMIC_64_H */
diff --git a/arch/tile/include/asm/auxvec.h b/arch/tile/include/asm/auxvec.h
new file mode 100644
index 00000000..1d393edb
--- /dev/null
+++ b/arch/tile/include/asm/auxvec.h
@@ -0,0 +1,20 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _ASM_TILE_AUXVEC_H
+#define _ASM_TILE_AUXVEC_H
+
+/* No extensions to auxvec */
+
+#endif /* _ASM_TILE_AUXVEC_H */
diff --git a/arch/tile/include/asm/backtrace.h b/arch/tile/include/asm/backtrace.h
new file mode 100644
index 00000000..bd5399a6
--- /dev/null
+++ b/arch/tile/include/asm/backtrace.h
@@ -0,0 +1,162 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _ASM_TILE_BACKTRACE_H
+#define _ASM_TILE_BACKTRACE_H
+
+#include <linux/types.h>
+
+/* Reads 'size' bytes from 'address' and writes the data to 'result'.
+ * Returns true if successful, else false (e.g. memory not readable).
+ */
+typedef bool (*BacktraceMemoryReader)(void *result,
+ unsigned long address,
+ unsigned int size,
+ void *extra);
+
+typedef struct {
+ /* Current PC. */
+ unsigned long pc;
+
+ /* Current stack pointer value. */
+ unsigned long sp;
+
+ /* Current frame pointer value (i.e. caller's stack pointer) */
+ unsigned long fp;
+
+ /* Internal use only: caller's PC for first frame. */
+ unsigned long initial_frame_caller_pc;
+
+ /* Internal use only: callback to read memory. */
+ BacktraceMemoryReader read_memory_func;
+
+ /* Internal use only: arbitrary argument to read_memory_func. */
+ void *read_memory_func_extra;
+
+} BacktraceIterator;
+
+
+typedef enum {
+
+ /* We have no idea what the caller's pc is. */
+ PC_LOC_UNKNOWN,
+
+ /* The caller's pc is currently in lr. */
+ PC_LOC_IN_LR,
+
+ /* The caller's pc can be found by dereferencing the caller's sp. */
+ PC_LOC_ON_STACK
+
+} CallerPCLocation;
+
+
+typedef enum {
+
+ /* We have no idea what the caller's sp is. */
+ SP_LOC_UNKNOWN,
+
+ /* The caller's sp is currently in r52. */
+ SP_LOC_IN_R52,
+
+ /* The caller's sp can be found by adding a certain constant
+ * to the current value of sp.
+ */
+ SP_LOC_OFFSET
+
+} CallerSPLocation;
+
+
+/* Bit values ORed into CALLER_* values for info ops. */
+enum {
+ /* Setting the low bit on any of these values means the info op
+ * applies only to one bundle ago.
+ */
+ ONE_BUNDLE_AGO_FLAG = 1,
+
+ /* Setting this bit on a CALLER_SP_* value means the PC is in LR.
+ * If not set, PC is on the stack.
+ */
+ PC_IN_LR_FLAG = 2,
+
+ /* This many of the low bits of a CALLER_SP_* value are for the
+ * flag bits above.
+ */
+ NUM_INFO_OP_FLAGS = 2,
+
+ /* We cannot have one in the memory pipe so this is the maximum. */
+ MAX_INFO_OPS_PER_BUNDLE = 2
+};
+
+
+/* Internal constants used to define 'info' operands. */
+enum {
+ /* 0 and 1 are reserved, as are all negative numbers. */
+
+ CALLER_UNKNOWN_BASE = 2,
+
+ CALLER_SP_IN_R52_BASE = 4,
+
+ CALLER_SP_OFFSET_BASE = 8,
+};
+
+
+/* Current backtracer state describing where it thinks the caller is. */
+typedef struct {
+ /*
+ * Public fields
+ */
+
+ /* How do we find the caller's PC? */
+ CallerPCLocation pc_location : 8;
+
+ /* How do we find the caller's SP? */
+ CallerSPLocation sp_location : 8;
+
+ /* If sp_location == SP_LOC_OFFSET, then caller_sp == sp +
+ * loc->sp_offset. Else this field is undefined.
+ */
+ uint16_t sp_offset;
+
+ /* In the most recently visited bundle a terminating bundle? */
+ bool at_terminating_bundle;
+
+ /*
+ * Private fields
+ */
+
+ /* Will the forward scanner see someone clobbering sp
+ * (i.e. changing it with something other than addi sp, sp, N?)
+ */
+ bool sp_clobber_follows;
+
+ /* Operand to next "visible" info op (no more than one bundle past
+ * the next terminating bundle), or -32768 if none.
+ */
+ int16_t next_info_operand;
+
+ /* Is the info of in next_info_op in the very next bundle? */
+ bool is_next_info_operand_adjacent;
+
+} CallerLocation;
+
+extern void backtrace_init(BacktraceIterator *state,
+ BacktraceMemoryReader read_memory_func,
+ void *read_memory_func_extra,
+ unsigned long pc, unsigned long lr,
+ unsigned long sp, unsigned long r52);
+
+
+extern bool backtrace_next(BacktraceIterator *state);
+
+#endif /* _ASM_TILE_BACKTRACE_H */
diff --git a/arch/tile/include/asm/barrier.h b/arch/tile/include/asm/barrier.h
new file mode 100644
index 00000000..990a217a
--- /dev/null
+++ b/arch/tile/include/asm/barrier.h
@@ -0,0 +1,148 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _ASM_TILE_BARRIER_H
+#define _ASM_TILE_BARRIER_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/types.h>
+#include <arch/chip.h>
+#include <arch/spr_def.h>
+#include <asm/timex.h>
+
+/*
+ * read_barrier_depends - Flush all pending reads that subsequents reads
+ * depend on.
+ *
+ * No data-dependent reads from memory-like regions are ever reordered
+ * over this barrier. All reads preceding this primitive are guaranteed
+ * to access memory (but not necessarily other CPUs' caches) before any
+ * reads following this primitive that depend on the data return by
+ * any of the preceding reads. This primitive is much lighter weight than
+ * rmb() on most CPUs, and is never heavier weight than is
+ * rmb().
+ *
+ * These ordering constraints are respected by both the local CPU
+ * and the compiler.
+ *
+ * Ordering is not guaranteed by anything other than these primitives,
+ * not even by data dependencies. See the documentation for
+ * memory_barrier() for examples and URLs to more information.
+ *
+ * For example, the following code would force ordering (the initial
+ * value of "a" is zero, "b" is one, and "p" is "&a"):
+ *
+ * <programlisting>
+ * CPU 0 CPU 1
+ *
+ * b = 2;
+ * memory_barrier();
+ * p = &b; q = p;
+ * read_barrier_depends();
+ * d = *q;
+ * </programlisting>
+ *
+ * because the read of "*q" depends on the read of "p" and these
+ * two reads are separated by a read_barrier_depends(). However,
+ * the following code, with the same initial values for "a" and "b":
+ *
+ * <programlisting>
+ * CPU 0 CPU 1
+ *
+ * a = 2;
+ * memory_barrier();
+ * b = 3; y = b;
+ * read_barrier_depends();
+ * x = a;
+ * </programlisting>
+ *
+ * does not enforce ordering, since there is no data dependency between
+ * the read of "a" and the read of "b". Therefore, on some CPUs, such
+ * as Alpha, "y" could be set to 3 and "x" to 0. Use rmb()
+ * in cases like this where there are no data dependencies.
+ */
+#define read_barrier_depends() do { } while (0)
+
+#define __sync() __insn_mf()
+
+#if !CHIP_HAS_MF_WAITS_FOR_VICTIMS()
+#include <hv/syscall_public.h>
+/*
+ * Issue an uncacheable load to each memory controller, then
+ * wait until those loads have completed.
+ */
+static inline void __mb_incoherent(void)
+{
+ long clobber_r10;
+ asm volatile("swint2"
+ : "=R10" (clobber_r10)
+ : "R10" (HV_SYS_fence_incoherent)
+ : "r0", "r1", "r2", "r3", "r4",
+ "r5", "r6", "r7", "r8", "r9",
+ "r11", "r12", "r13", "r14",
+ "r15", "r16", "r17", "r18", "r19",
+ "r20", "r21", "r22", "r23", "r24",
+ "r25", "r26", "r27", "r28", "r29");
+}
+#endif
+
+/* Fence to guarantee visibility of stores to incoherent memory. */
+static inline void
+mb_incoherent(void)
+{
+ __insn_mf();
+
+#if !CHIP_HAS_MF_WAITS_FOR_VICTIMS()
+ {
+#if CHIP_HAS_TILE_WRITE_PENDING()
+ const unsigned long WRITE_TIMEOUT_CYCLES = 400;
+ unsigned long start = get_cycles_low();
+ do {
+ if (__insn_mfspr(SPR_TILE_WRITE_PENDING) == 0)
+ return;
+ } while ((get_cycles_low() - start) < WRITE_TIMEOUT_CYCLES);
+#endif /* CHIP_HAS_TILE_WRITE_PENDING() */
+ (void) __mb_incoherent();
+ }
+#endif /* CHIP_HAS_MF_WAITS_FOR_VICTIMS() */
+}
+
+#define fast_wmb() __sync()
+#define fast_rmb() __sync()
+#define fast_mb() __sync()
+#define fast_iob() mb_incoherent()
+
+#define wmb() fast_wmb()
+#define rmb() fast_rmb()
+#define mb() fast_mb()
+#define iob() fast_iob()
+
+#ifdef CONFIG_SMP
+#define smp_mb() mb()
+#define smp_rmb() rmb()
+#define smp_wmb() wmb()
+#define smp_read_barrier_depends() read_barrier_depends()
+#else
+#define smp_mb() barrier()
+#define smp_rmb() barrier()
+#define smp_wmb() barrier()
+#define smp_read_barrier_depends() do { } while (0)
+#endif
+
+#define set_mb(var, value) \
+ do { var = value; mb(); } while (0)
+
+#endif /* !__ASSEMBLY__ */
+#endif /* _ASM_TILE_BARRIER_H */
diff --git a/arch/tile/include/asm/bitops.h b/arch/tile/include/asm/bitops.h
new file mode 100644
index 00000000..bd186c4e
--- /dev/null
+++ b/arch/tile/include/asm/bitops.h
@@ -0,0 +1,128 @@
+/*
+ * Copyright 1992, Linus Torvalds.
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _ASM_TILE_BITOPS_H
+#define _ASM_TILE_BITOPS_H
+
+#include <linux/types.h>
+
+#ifndef _LINUX_BITOPS_H
+#error only <linux/bitops.h> can be included directly
+#endif
+
+#ifdef __tilegx__
+#include <asm/bitops_64.h>
+#else
+#include <asm/bitops_32.h>
+#endif
+
+/**
+ * __ffs - find first set bit in word
+ * @word: The word to search
+ *
+ * Undefined if no set bit exists, so code should check against 0 first.
+ */
+static inline unsigned long __ffs(unsigned long word)
+{
+ return __builtin_ctzl(word);
+}
+
+/**
+ * ffz - find first zero bit in word
+ * @word: The word to search
+ *
+ * Undefined if no zero exists, so code should check against ~0UL first.
+ */
+static inline unsigned long ffz(unsigned long word)
+{
+ return __builtin_ctzl(~word);
+}
+
+/**
+ * __fls - find last set bit in word
+ * @word: The word to search
+ *
+ * Undefined if no set bit exists, so code should check against 0 first.
+ */
+static inline unsigned long __fls(unsigned long word)
+{
+ return (sizeof(word) * 8) - 1 - __builtin_clzl(word);
+}
+
+/**
+ * ffs - find first set bit in word
+ * @x: the word to search
+ *
+ * This is defined the same way as the libc and compiler builtin ffs
+ * routines, therefore differs in spirit from the other bitops.
+ *
+ * ffs(value) returns 0 if value is 0 or the position of the first
+ * set bit if value is nonzero. The first (least significant) bit
+ * is at position 1.
+ */
+static inline int ffs(int x)
+{
+ return __builtin_ffs(x);
+}
+
+static inline int fls64(__u64 w)
+{
+ return (sizeof(__u64) * 8) - __builtin_clzll(w);
+}
+
+/**
+ * fls - find last set bit in word
+ * @x: the word to search
+ *
+ * This is defined in a similar way as the libc and compiler builtin
+ * ffs, but returns the position of the most significant set bit.
+ *
+ * fls(value) returns 0 if value is 0 or the position of the last
+ * set bit if value is nonzero. The last (most significant) bit is
+ * at position 32.
+ */
+static inline int fls(int x)
+{
+ return fls64((unsigned int) x);
+}
+
+static inline unsigned int __arch_hweight32(unsigned int w)
+{
+ return __builtin_popcount(w);
+}
+
+static inline unsigned int __arch_hweight16(unsigned int w)
+{
+ return __builtin_popcount(w & 0xffff);
+}
+
+static inline unsigned int __arch_hweight8(unsigned int w)
+{
+ return __builtin_popcount(w & 0xff);
+}
+
+static inline unsigned long __arch_hweight64(__u64 w)
+{
+ return __builtin_popcountll(w);
+}
+
+#include <asm-generic/bitops/const_hweight.h>
+#include <asm-generic/bitops/lock.h>
+#include <asm-generic/bitops/find.h>
+#include <asm-generic/bitops/sched.h>
+#include <asm-generic/bitops/non-atomic.h>
+#include <asm-generic/bitops/le.h>
+
+#endif /* _ASM_TILE_BITOPS_H */
diff --git a/arch/tile/include/asm/bitops_32.h b/arch/tile/include/asm/bitops_32.h
new file mode 100644
index 00000000..ddc4c1ef
--- /dev/null
+++ b/arch/tile/include/asm/bitops_32.h
@@ -0,0 +1,130 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _ASM_TILE_BITOPS_32_H
+#define _ASM_TILE_BITOPS_32_H
+
+#include <linux/compiler.h>
+#include <linux/atomic.h>
+
+/* Tile-specific routines to support <asm/bitops.h>. */
+unsigned long _atomic_or(volatile unsigned long *p, unsigned long mask);
+unsigned long _atomic_andn(volatile unsigned long *p, unsigned long mask);
+unsigned long _atomic_xor(volatile unsigned long *p, unsigned long mask);
+
+/**
+ * set_bit - Atomically set a bit in memory
+ * @nr: the bit to set
+ * @addr: the address to start counting from
+ *
+ * This function is atomic and may not be reordered.
+ * See __set_bit() if you do not require the atomic guarantees.
+ * Note that @nr may be almost arbitrarily large; this function is not
+ * restricted to acting on a single-word quantity.
+ */
+static inline void set_bit(unsigned nr, volatile unsigned long *addr)
+{
+ _atomic_or(addr + BIT_WORD(nr), BIT_MASK(nr));
+}
+
+/**
+ * clear_bit - Clears a bit in memory
+ * @nr: Bit to clear
+ * @addr: Address to start counting from
+ *
+ * clear_bit() is atomic and may not be reordered.
+ * See __clear_bit() if you do not require the atomic guarantees.
+ * Note that @nr may be almost arbitrarily large; this function is not
+ * restricted to acting on a single-word quantity.
+ *
+ * clear_bit() may not contain a memory barrier, so if it is used for
+ * locking purposes, you should call smp_mb__before_clear_bit() and/or
+ * smp_mb__after_clear_bit() to ensure changes are visible on other cpus.
+ */
+static inline void clear_bit(unsigned nr, volatile unsigned long *addr)
+{
+ _atomic_andn(addr + BIT_WORD(nr), BIT_MASK(nr));
+}
+
+/**
+ * change_bit - Toggle a bit in memory
+ * @nr: Bit to change
+ * @addr: Address to start counting from
+ *
+ * change_bit() is atomic and may not be reordered.
+ * See __change_bit() if you do not require the atomic guarantees.
+ * Note that @nr may be almost arbitrarily large; this function is not
+ * restricted to acting on a single-word quantity.
+ */
+static inline void change_bit(unsigned nr, volatile unsigned long *addr)
+{
+ _atomic_xor(addr + BIT_WORD(nr), BIT_MASK(nr));
+}
+
+/**
+ * test_and_set_bit - Set a bit and return its old value
+ * @nr: Bit to set
+ * @addr: Address to count from
+ *
+ * This operation is atomic and cannot be reordered.
+ * It also implies a memory barrier.
+ */
+static inline int test_and_set_bit(unsigned nr, volatile unsigned long *addr)
+{
+ unsigned long mask = BIT_MASK(nr);
+ addr += BIT_WORD(nr);
+ smp_mb(); /* barrier for proper semantics */
+ return (_atomic_or(addr, mask) & mask) != 0;
+}
+
+/**
+ * test_and_clear_bit - Clear a bit and return its old value
+ * @nr: Bit to clear
+ * @addr: Address to count from
+ *
+ * This operation is atomic and cannot be reordered.
+ * It also implies a memory barrier.
+ */
+static inline int test_and_clear_bit(unsigned nr, volatile unsigned long *addr)
+{
+ unsigned long mask = BIT_MASK(nr);
+ addr += BIT_WORD(nr);
+ smp_mb(); /* barrier for proper semantics */
+ return (_atomic_andn(addr, mask) & mask) != 0;
+}
+
+/**
+ * test_and_change_bit - Change a bit and return its old value
+ * @nr: Bit to change
+ * @addr: Address to count from
+ *
+ * This operation is atomic and cannot be reordered.
+ * It also implies a memory barrier.
+ */
+static inline int test_and_change_bit(unsigned nr,
+ volatile unsigned long *addr)
+{
+ unsigned long mask = BIT_MASK(nr);
+ addr += BIT_WORD(nr);
+ smp_mb(); /* barrier for proper semantics */
+ return (_atomic_xor(addr, mask) & mask) != 0;
+}
+
+/* See discussion at smp_mb__before_atomic_dec() in <asm/atomic_32.h>. */
+#define smp_mb__before_clear_bit() smp_mb()
+#define smp_mb__after_clear_bit() do {} while (0)
+
+#include <asm-generic/bitops/ext2-atomic.h>
+
+#endif /* _ASM_TILE_BITOPS_32_H */
diff --git a/arch/tile/include/asm/bitops_64.h b/arch/tile/include/asm/bitops_64.h
new file mode 100644
index 00000000..60b87ee5
--- /dev/null
+++ b/arch/tile/include/asm/bitops_64.h
@@ -0,0 +1,101 @@
+/*
+ * Copyright 2011 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _ASM_TILE_BITOPS_64_H
+#define _ASM_TILE_BITOPS_64_H
+
+#include <linux/compiler.h>
+#include <linux/atomic.h>
+
+/* See <asm/bitops.h> for API comments. */
+
+static inline void set_bit(unsigned nr, volatile unsigned long *addr)
+{
+ unsigned long mask = (1UL << (nr % BITS_PER_LONG));
+ __insn_fetchor((void *)(addr + nr / BITS_PER_LONG), mask);
+}
+
+static inline void clear_bit(unsigned nr, volatile unsigned long *addr)
+{
+ unsigned long mask = (1UL << (nr % BITS_PER_LONG));
+ __insn_fetchand((void *)(addr + nr / BITS_PER_LONG), ~mask);
+}
+
+#define smp_mb__before_clear_bit() smp_mb()
+#define smp_mb__after_clear_bit() smp_mb()
+
+
+static inline void change_bit(unsigned nr, volatile unsigned long *addr)
+{
+ unsigned long mask = (1UL << (nr % BITS_PER_LONG));
+ unsigned long guess, oldval;
+ addr += nr / BITS_PER_LONG;
+ oldval = *addr;
+ do {
+ guess = oldval;
+ oldval = atomic64_cmpxchg((atomic64_t *)addr,
+ guess, guess ^ mask);
+ } while (guess != oldval);
+}
+
+
+/*
+ * The test_and_xxx_bit() routines require a memory fence before we
+ * start the operation, and after the operation completes. We use
+ * smp_mb() before, and rely on the "!= 0" comparison, plus a compiler
+ * barrier(), to block until the atomic op is complete.
+ */
+
+static inline int test_and_set_bit(unsigned nr, volatile unsigned long *addr)
+{
+ int val;
+ unsigned long mask = (1UL << (nr % BITS_PER_LONG));
+ smp_mb(); /* barrier for proper semantics */
+ val = (__insn_fetchor((void *)(addr + nr / BITS_PER_LONG), mask)
+ & mask) != 0;
+ barrier();
+ return val;
+}
+
+
+static inline int test_and_clear_bit(unsigned nr, volatile unsigned long *addr)
+{
+ int val;
+ unsigned long mask = (1UL << (nr % BITS_PER_LONG));
+ smp_mb(); /* barrier for proper semantics */
+ val = (__insn_fetchand((void *)(addr + nr / BITS_PER_LONG), ~mask)
+ & mask) != 0;
+ barrier();
+ return val;
+}
+
+
+static inline int test_and_change_bit(unsigned nr,
+ volatile unsigned long *addr)
+{
+ unsigned long mask = (1UL << (nr % BITS_PER_LONG));
+ unsigned long guess, oldval;
+ addr += nr / BITS_PER_LONG;
+ oldval = *addr;
+ do {
+ guess = oldval;
+ oldval = atomic64_cmpxchg((atomic64_t *)addr,
+ guess, guess ^ mask);
+ } while (guess != oldval);
+ return (oldval & mask) != 0;
+}
+
+#include <asm-generic/bitops/ext2-atomic-setbit.h>
+
+#endif /* _ASM_TILE_BITOPS_64_H */
diff --git a/arch/tile/include/asm/bitsperlong.h b/arch/tile/include/asm/bitsperlong.h
new file mode 100644
index 00000000..58c771f2
--- /dev/null
+++ b/arch/tile/include/asm/bitsperlong.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _ASM_TILE_BITSPERLONG_H
+#define _ASM_TILE_BITSPERLONG_H
+
+#ifdef __LP64__
+# define __BITS_PER_LONG 64
+#else
+# define __BITS_PER_LONG 32
+#endif
+
+#include <asm-generic/bitsperlong.h>
+
+#endif /* _ASM_TILE_BITSPERLONG_H */
diff --git a/arch/tile/include/asm/byteorder.h b/arch/tile/include/asm/byteorder.h
new file mode 100644
index 00000000..9558416d
--- /dev/null
+++ b/arch/tile/include/asm/byteorder.h
@@ -0,0 +1 @@
+#include <linux/byteorder/little_endian.h>
diff --git a/arch/tile/include/asm/cache.h b/arch/tile/include/asm/cache.h
new file mode 100644
index 00000000..392e5333
--- /dev/null
+++ b/arch/tile/include/asm/cache.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _ASM_TILE_CACHE_H
+#define _ASM_TILE_CACHE_H
+
+#include <arch/chip.h>
+
+/* bytes per L1 data cache line */
+#define L1_CACHE_SHIFT CHIP_L1D_LOG_LINE_SIZE()
+#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT)
+
+/* bytes per L2 cache line */
+#define L2_CACHE_SHIFT CHIP_L2_LOG_LINE_SIZE()
+#define L2_CACHE_BYTES (1 << L2_CACHE_SHIFT)
+#define L2_CACHE_ALIGN(x) (((x)+(L2_CACHE_BYTES-1)) & -L2_CACHE_BYTES)
+
+/*
+ * TILE-Gx is fully coherent so we don't need to define ARCH_DMA_MINALIGN.
+ */
+#ifndef __tilegx__
+#define ARCH_DMA_MINALIGN L2_CACHE_BYTES
+#endif
+
+/* use the cache line size for the L2, which is where it counts */
+#define SMP_CACHE_BYTES_SHIFT L2_CACHE_SHIFT
+#define SMP_CACHE_BYTES L2_CACHE_BYTES
+#define INTERNODE_CACHE_SHIFT L2_CACHE_SHIFT
+#define INTERNODE_CACHE_BYTES L2_CACHE_BYTES
+
+/* Group together read-mostly things to avoid cache false sharing */
+#define __read_mostly __attribute__((__section__(".data..read_mostly")))
+
+/*
+ * Attribute for data that is kept read/write coherent until the end of
+ * initialization, then bumped to read/only incoherent for performance.
+ */
+#define __write_once __attribute__((__section__(".w1data")))
+
+#endif /* _ASM_TILE_CACHE_H */
diff --git a/arch/tile/include/asm/cacheflush.h b/arch/tile/include/asm/cacheflush.h
new file mode 100644
index 00000000..0fc63c48
--- /dev/null
+++ b/arch/tile/include/asm/cacheflush.h
@@ -0,0 +1,164 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _ASM_TILE_CACHEFLUSH_H
+#define _ASM_TILE_CACHEFLUSH_H
+
+#include <arch/chip.h>
+
+/* Keep includes the same across arches. */
+#include <linux/mm.h>
+#include <linux/cache.h>
+#include <arch/icache.h>
+
+/* Caches are physically-indexed and so don't need special treatment */
+#define flush_cache_all() do { } while (0)
+#define flush_cache_mm(mm) do { } while (0)
+#define flush_cache_dup_mm(mm) do { } while (0)
+#define flush_cache_range(vma, start, end) do { } while (0)
+#define flush_cache_page(vma, vmaddr, pfn) do { } while (0)
+#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0
+#define flush_dcache_page(page) do { } while (0)
+#define flush_dcache_mmap_lock(mapping) do { } while (0)
+#define flush_dcache_mmap_unlock(mapping) do { } while (0)
+#define flush_cache_vmap(start, end) do { } while (0)
+#define flush_cache_vunmap(start, end) do { } while (0)
+#define flush_icache_page(vma, pg) do { } while (0)
+#define flush_icache_user_range(vma, pg, adr, len) do { } while (0)
+
+/* Flush the icache just on this cpu */
+extern void __flush_icache_range(unsigned long start, unsigned long end);
+
+/* Flush the entire icache on this cpu. */
+#define __flush_icache() __flush_icache_range(0, CHIP_L1I_CACHE_SIZE())
+
+#ifdef CONFIG_SMP
+/*
+ * When the kernel writes to its own text we need to do an SMP
+ * broadcast to make the L1I coherent everywhere. This includes
+ * module load and single step.
+ */
+extern void flush_icache_range(unsigned long start, unsigned long end);
+#else
+#define flush_icache_range __flush_icache_range
+#endif
+
+/*
+ * An update to an executable user page requires icache flushing.
+ * We could carefully update only tiles that are running this process,
+ * and rely on the fact that we flush the icache on every context
+ * switch to avoid doing extra work here. But for now, I'll be
+ * conservative and just do a global icache flush.
+ */
+static inline void copy_to_user_page(struct vm_area_struct *vma,
+ struct page *page, unsigned long vaddr,
+ void *dst, void *src, int len)
+{
+ memcpy(dst, src, len);
+ if (vma->vm_flags & VM_EXEC) {
+ flush_icache_range((unsigned long) dst,
+ (unsigned long) dst + len);
+ }
+}
+
+#define copy_from_user_page(vma, page, vaddr, dst, src, len) \
+ memcpy((dst), (src), (len))
+
+/*
+ * Invalidate a VA range; pads to L2 cacheline boundaries.
+ *
+ * Note that on TILE64, __inv_buffer() actually flushes modified
+ * cache lines in addition to invalidating them, i.e., it's the
+ * same as __finv_buffer().
+ */
+static inline void __inv_buffer(void *buffer, size_t size)
+{
+ char *next = (char *)((long)buffer & -L2_CACHE_BYTES);
+ char *finish = (char *)L2_CACHE_ALIGN((long)buffer + size);
+ while (next < finish) {
+ __insn_inv(next);
+ next += CHIP_INV_STRIDE();
+ }
+}
+
+/* Flush a VA range; pads to L2 cacheline boundaries. */
+static inline void __flush_buffer(void *buffer, size_t size)
+{
+ char *next = (char *)((long)buffer & -L2_CACHE_BYTES);
+ char *finish = (char *)L2_CACHE_ALIGN((long)buffer + size);
+ while (next < finish) {
+ __insn_flush(next);
+ next += CHIP_FLUSH_STRIDE();
+ }
+}
+
+/* Flush & invalidate a VA range; pads to L2 cacheline boundaries. */
+static inline void __finv_buffer(void *buffer, size_t size)
+{
+ char *next = (char *)((long)buffer & -L2_CACHE_BYTES);
+ char *finish = (char *)L2_CACHE_ALIGN((long)buffer + size);
+ while (next < finish) {
+ __insn_finv(next);
+ next += CHIP_FINV_STRIDE();
+ }
+}
+
+
+/* Invalidate a VA range and wait for it to be complete. */
+static inline void inv_buffer(void *buffer, size_t size)
+{
+ __inv_buffer(buffer, size);
+ mb();
+}
+
+/*
+ * Flush a locally-homecached VA range and wait for the evicted
+ * cachelines to hit memory.
+ */
+static inline void flush_buffer_local(void *buffer, size_t size)
+{
+ __flush_buffer(buffer, size);
+ mb_incoherent();
+}
+
+/*
+ * Flush and invalidate a locally-homecached VA range and wait for the
+ * evicted cachelines to hit memory.
+ */
+static inline void finv_buffer_local(void *buffer, size_t size)
+{
+ __finv_buffer(buffer, size);
+ mb_incoherent();
+}
+
+/*
+ * Flush and invalidate a VA range that is homed remotely, waiting
+ * until the memory controller holds the flushed values. If "hfh" is
+ * true, we will do a more expensive flush involving additional loads
+ * to make sure we have touched all the possible home cpus of a buffer
+ * that is homed with "hash for home".
+ */
+void finv_buffer_remote(void *buffer, size_t size, int hfh);
+
+/*
+ * On SMP systems, when the scheduler does migration-cost autodetection,
+ * it needs a way to flush as much of the CPU's caches as possible:
+ *
+ * TODO: fill this in!
+ */
+static inline void sched_cacheflush(void)
+{
+}
+
+#endif /* _ASM_TILE_CACHEFLUSH_H */
diff --git a/arch/tile/include/asm/checksum.h b/arch/tile/include/asm/checksum.h
new file mode 100644
index 00000000..a120766c
--- /dev/null
+++ b/arch/tile/include/asm/checksum.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _ASM_TILE_CHECKSUM_H
+#define _ASM_TILE_CHECKSUM_H
+
+#include <asm-generic/checksum.h>
+
+/* Allow us to provide a more optimized do_csum(). */
+__wsum do_csum(const unsigned char *buff, int len);
+#define do_csum do_csum
+
+#endif /* _ASM_TILE_CHECKSUM_H */
diff --git a/arch/tile/include/asm/cmpxchg.h b/arch/tile/include/asm/cmpxchg.h
new file mode 100644
index 00000000..276f067e
--- /dev/null
+++ b/arch/tile/include/asm/cmpxchg.h
@@ -0,0 +1,73 @@
+/*
+ * cmpxchg.h -- forked from asm/atomic.h with this copyright:
+ *
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#ifndef _ASM_TILE_CMPXCHG_H
+#define _ASM_TILE_CMPXCHG_H
+
+#ifndef __ASSEMBLY__
+
+/* Nonexistent functions intended to cause link errors. */
+extern unsigned long __xchg_called_with_bad_pointer(void);
+extern unsigned long __cmpxchg_called_with_bad_pointer(void);
+
+#define xchg(ptr, x) \
+ ({ \
+ typeof(*(ptr)) __x; \
+ switch (sizeof(*(ptr))) { \
+ case 4: \
+ __x = (typeof(__x))(typeof(__x-__x))atomic_xchg( \
+ (atomic_t *)(ptr), \
+ (u32)(typeof((x)-(x)))(x)); \
+ break; \
+ case 8: \
+ __x = (typeof(__x))(typeof(__x-__x))atomic64_xchg( \
+ (atomic64_t *)(ptr), \
+ (u64)(typeof((x)-(x)))(x)); \
+ break; \
+ default: \
+ __xchg_called_with_bad_pointer(); \
+ } \
+ __x; \
+ })
+
+#define cmpxchg(ptr, o, n) \
+ ({ \
+ typeof(*(ptr)) __x; \
+ switch (sizeof(*(ptr))) { \
+ case 4: \
+ __x = (typeof(__x))(typeof(__x-__x))atomic_cmpxchg( \
+ (atomic_t *)(ptr), \
+ (u32)(typeof((o)-(o)))(o), \
+ (u32)(typeof((n)-(n)))(n)); \
+ break; \
+ case 8: \
+ __x = (typeof(__x))(typeof(__x-__x))atomic64_cmpxchg( \
+ (atomic64_t *)(ptr), \
+ (u64)(typeof((o)-(o)))(o), \
+ (u64)(typeof((n)-(n)))(n)); \
+ break; \
+ default: \
+ __cmpxchg_called_with_bad_pointer(); \
+ } \
+ __x; \
+ })
+
+#define tas(ptr) (xchg((ptr), 1))
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_TILE_CMPXCHG_H */
diff --git a/arch/tile/include/asm/compat.h b/arch/tile/include/asm/compat.h
new file mode 100644
index 00000000..4b4b2896
--- /dev/null
+++ b/arch/tile/include/asm/compat.h
@@ -0,0 +1,256 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _ASM_TILE_COMPAT_H
+#define _ASM_TILE_COMPAT_H
+
+/*
+ * Architecture specific compatibility types
+ */
+#include <linux/types.h>
+#include <linux/sched.h>
+
+#define COMPAT_USER_HZ 100
+
+/* "long" and pointer-based types are different. */
+typedef s32 compat_long_t;
+typedef u32 compat_ulong_t;
+typedef u32 compat_size_t;
+typedef s32 compat_ssize_t;
+typedef s32 compat_off_t;
+typedef s32 compat_time_t;
+typedef s32 compat_clock_t;
+typedef u32 compat_ino_t;
+typedef u32 compat_caddr_t;
+typedef u32 compat_uptr_t;
+
+/* Many types are "int" or otherwise the same. */
+typedef __kernel_pid_t compat_pid_t;
+typedef __kernel_uid_t __compat_uid_t;
+typedef __kernel_gid_t __compat_gid_t;
+typedef __kernel_uid32_t __compat_uid32_t;
+typedef __kernel_uid32_t __compat_gid32_t;
+typedef __kernel_mode_t compat_mode_t;
+typedef __kernel_dev_t compat_dev_t;
+typedef __kernel_loff_t compat_loff_t;
+typedef __kernel_nlink_t compat_nlink_t;
+typedef __kernel_ipc_pid_t compat_ipc_pid_t;
+typedef __kernel_daddr_t compat_daddr_t;
+typedef __kernel_fsid_t compat_fsid_t;
+typedef __kernel_timer_t compat_timer_t;
+typedef __kernel_key_t compat_key_t;
+typedef int compat_int_t;
+typedef s64 compat_s64;
+typedef uint compat_uint_t;
+typedef u64 compat_u64;
+
+/* We use the same register dump format in 32-bit images. */
+typedef unsigned long compat_elf_greg_t;
+#define COMPAT_ELF_NGREG (sizeof(struct pt_regs) / sizeof(compat_elf_greg_t))
+typedef compat_elf_greg_t compat_elf_gregset_t[COMPAT_ELF_NGREG];
+
+struct compat_timespec {
+ compat_time_t tv_sec;
+ s32 tv_nsec;
+};
+
+struct compat_timeval {
+ compat_time_t tv_sec;
+ s32 tv_usec;
+};
+
+#define compat_stat stat
+#define compat_statfs statfs
+
+struct compat_sysctl {
+ unsigned int name;
+ int nlen;
+ unsigned int oldval;
+ unsigned int oldlenp;
+ unsigned int newval;
+ unsigned int newlen;
+ unsigned int __unused[4];
+};
+
+
+struct compat_flock {
+ short l_type;
+ short l_whence;
+ compat_off_t l_start;
+ compat_off_t l_len;
+ compat_pid_t l_pid;
+};
+
+#define F_GETLK64 12 /* using 'struct flock64' */
+#define F_SETLK64 13
+#define F_SETLKW64 14
+
+struct compat_flock64 {
+ short l_type;
+ short l_whence;
+ compat_loff_t l_start;
+ compat_loff_t l_len;
+ compat_pid_t l_pid;
+};
+
+#define COMPAT_RLIM_INFINITY 0xffffffff
+
+#define _COMPAT_NSIG 64
+#define _COMPAT_NSIG_BPW 32
+
+typedef u32 compat_sigset_word;
+
+#define COMPAT_OFF_T_MAX 0x7fffffff
+#define COMPAT_LOFF_T_MAX 0x7fffffffffffffffL
+
+struct compat_ipc64_perm {
+ compat_key_t key;
+ __compat_uid32_t uid;
+ __compat_gid32_t gid;
+ __compat_uid32_t cuid;
+ __compat_gid32_t cgid;
+ unsigned short mode;
+ unsigned short __pad1;
+ unsigned short seq;
+ unsigned short __pad2;
+ compat_ulong_t unused1;
+ compat_ulong_t unused2;
+};
+
+struct compat_semid64_ds {
+ struct compat_ipc64_perm sem_perm;
+ compat_time_t sem_otime;
+ compat_ulong_t __unused1;
+ compat_time_t sem_ctime;
+ compat_ulong_t __unused2;
+ compat_ulong_t sem_nsems;
+ compat_ulong_t __unused3;
+ compat_ulong_t __unused4;
+};
+
+struct compat_msqid64_ds {
+ struct compat_ipc64_perm msg_perm;
+ compat_time_t msg_stime;
+ compat_ulong_t __unused1;
+ compat_time_t msg_rtime;
+ compat_ulong_t __unused2;
+ compat_time_t msg_ctime;
+ compat_ulong_t __unused3;
+ compat_ulong_t msg_cbytes;
+ compat_ulong_t msg_qnum;
+ compat_ulong_t msg_qbytes;
+ compat_pid_t msg_lspid;
+ compat_pid_t msg_lrpid;
+ compat_ulong_t __unused4;
+ compat_ulong_t __unused5;
+};
+
+struct compat_shmid64_ds {
+ struct compat_ipc64_perm shm_perm;
+ compat_size_t shm_segsz;
+ compat_time_t shm_atime;
+ compat_ulong_t __unused1;
+ compat_time_t shm_dtime;
+ compat_ulong_t __unused2;
+ compat_time_t shm_ctime;
+ compat_ulong_t __unused3;
+ compat_pid_t shm_cpid;
+ compat_pid_t shm_lpid;
+ compat_ulong_t shm_nattch;
+ compat_ulong_t __unused4;
+ compat_ulong_t __unused5;
+};
+
+/*
+ * A pointer passed in from user mode. This should not
+ * be used for syscall parameters, just declare them
+ * as pointers because the syscall entry code will have
+ * appropriately converted them already.
+ */
+
+static inline void __user *compat_ptr(compat_uptr_t uptr)
+{
+ return (void __user *)(long)(s32)uptr;
+}
+
+static inline compat_uptr_t ptr_to_compat(void __user *uptr)
+{
+ return (u32)(unsigned long)uptr;
+}
+
+/* Sign-extend when storing a kernel pointer to a user's ptregs. */
+static inline unsigned long ptr_to_compat_reg(void __user *uptr)
+{
+ return (long)(int)(long __force)uptr;
+}
+
+static inline void __user *arch_compat_alloc_user_space(long len)
+{
+ struct pt_regs *regs = task_pt_regs(current);
+ return (void __user *)regs->sp - len;
+}
+
+static inline int is_compat_task(void)
+{
+ return current_thread_info()->status & TS_COMPAT;
+}
+
+extern int compat_setup_rt_frame(int sig, struct k_sigaction *ka,
+ siginfo_t *info, sigset_t *set,
+ struct pt_regs *regs);
+
+/* Compat syscalls. */
+struct compat_sigaction;
+struct compat_siginfo;
+struct compat_sigaltstack;
+long compat_sys_execve(const char __user *path,
+ compat_uptr_t __user *argv,
+ compat_uptr_t __user *envp, struct pt_regs *);
+long compat_sys_rt_sigaction(int sig, struct compat_sigaction __user *act,
+ struct compat_sigaction __user *oact,
+ size_t sigsetsize);
+long compat_sys_rt_sigqueueinfo(int pid, int sig,
+ struct compat_siginfo __user *uinfo);
+long compat_sys_rt_sigreturn(struct pt_regs *);
+long compat_sys_sigaltstack(const struct compat_sigaltstack __user *uss_ptr,
+ struct compat_sigaltstack __user *uoss_ptr,
+ struct pt_regs *);
+long compat_sys_truncate64(char __user *filename, u32 dummy, u32 low, u32 high);
+long compat_sys_ftruncate64(unsigned int fd, u32 dummy, u32 low, u32 high);
+long compat_sys_pread64(unsigned int fd, char __user *ubuf, size_t count,
+ u32 dummy, u32 low, u32 high);
+long compat_sys_pwrite64(unsigned int fd, char __user *ubuf, size_t count,
+ u32 dummy, u32 low, u32 high);
+long compat_sys_lookup_dcookie(u32 low, u32 high, char __user *buf, size_t len);
+long compat_sys_sync_file_range2(int fd, unsigned int flags,
+ u32 offset_lo, u32 offset_hi,
+ u32 nbytes_lo, u32 nbytes_hi);
+long compat_sys_fallocate(int fd, int mode,
+ u32 offset_lo, u32 offset_hi,
+ u32 len_lo, u32 len_hi);
+long compat_sys_sched_rr_get_interval(compat_pid_t pid,
+ struct compat_timespec __user *interval);
+
+/* Tilera Linux syscalls that don't have "compat" versions. */
+#define compat_sys_flush_cache sys_flush_cache
+
+/* These are the intvec_64.S trampolines. */
+long _compat_sys_execve(const char __user *path,
+ const compat_uptr_t __user *argv,
+ const compat_uptr_t __user *envp);
+long _compat_sys_sigaltstack(const struct compat_sigaltstack __user *uss_ptr,
+ struct compat_sigaltstack __user *uoss_ptr);
+long _compat_sys_rt_sigreturn(void);
+
+#endif /* _ASM_TILE_COMPAT_H */
diff --git a/arch/tile/include/asm/current.h b/arch/tile/include/asm/current.h
new file mode 100644
index 00000000..da21acf0
--- /dev/null
+++ b/arch/tile/include/asm/current.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _ASM_TILE_CURRENT_H
+#define _ASM_TILE_CURRENT_H
+
+#include <linux/thread_info.h>
+
+struct task_struct;
+
+static inline struct task_struct *get_current(void)
+{
+ return current_thread_info()->task;
+}
+#define current get_current()
+
+/* Return a usable "task_struct" pointer even if the real one is corrupt. */
+struct task_struct *validate_current(void);
+
+#endif /* _ASM_TILE_CURRENT_H */
diff --git a/arch/tile/include/asm/delay.h b/arch/tile/include/asm/delay.h
new file mode 100644
index 00000000..97b0e69e
--- /dev/null
+++ b/arch/tile/include/asm/delay.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _ASM_TILE_DELAY_H
+#define _ASM_TILE_DELAY_H
+
+/* Undefined functions to get compile-time errors. */
+extern void __bad_udelay(void);
+extern void __bad_ndelay(void);
+
+extern void __udelay(unsigned long usecs);
+extern void __ndelay(unsigned long nsecs);
+extern void __delay(unsigned long loops);
+
+#define udelay(n) (__builtin_constant_p(n) ? \
+ ((n) > 20000 ? __bad_udelay() : __ndelay((n) * 1000)) : \
+ __udelay(n))
+
+#define ndelay(n) (__builtin_constant_p(n) ? \
+ ((n) > 20000 ? __bad_ndelay() : __ndelay(n)) : \
+ __ndelay(n))
+
+#endif /* _ASM_TILE_DELAY_H */
diff --git a/arch/tile/include/asm/dma-mapping.h b/arch/tile/include/asm/dma-mapping.h
new file mode 100644
index 00000000..eaa06d17
--- /dev/null
+++ b/arch/tile/include/asm/dma-mapping.h
@@ -0,0 +1,94 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _ASM_TILE_DMA_MAPPING_H
+#define _ASM_TILE_DMA_MAPPING_H
+
+#include <linux/mm.h>
+#include <linux/scatterlist.h>
+#include <linux/cache.h>
+#include <linux/io.h>
+
+/*
+ * Note that on x86 and powerpc, there is a "struct dma_mapping_ops"
+ * that is used for all the DMA operations. For now, we don't have an
+ * equivalent on tile, because we only have a single way of doing DMA.
+ * (Tilera bug 7994 to use dma_mapping_ops.)
+ */
+
+#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
+#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
+
+extern dma_addr_t dma_map_single(struct device *dev, void *ptr, size_t size,
+ enum dma_data_direction);
+extern void dma_unmap_single(struct device *dev, dma_addr_t dma_addr,
+ size_t size, enum dma_data_direction);
+extern int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
+ enum dma_data_direction);
+extern void dma_unmap_sg(struct device *dev, struct scatterlist *sg,
+ int nhwentries, enum dma_data_direction);
+extern dma_addr_t dma_map_page(struct device *dev, struct page *page,
+ unsigned long offset, size_t size,
+ enum dma_data_direction);
+extern void dma_unmap_page(struct device *dev, dma_addr_t dma_address,
+ size_t size, enum dma_data_direction);
+extern void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
+ int nelems, enum dma_data_direction);
+extern void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
+ int nelems, enum dma_data_direction);
+
+
+void *dma_alloc_coherent(struct device *dev, size_t size,
+ dma_addr_t *dma_handle, gfp_t flag);
+
+void dma_free_coherent(struct device *dev, size_t size,
+ void *vaddr, dma_addr_t dma_handle);
+
+extern void dma_sync_single_for_cpu(struct device *, dma_addr_t, size_t,
+ enum dma_data_direction);
+extern void dma_sync_single_for_device(struct device *, dma_addr_t,
+ size_t, enum dma_data_direction);
+extern void dma_sync_single_range_for_cpu(struct device *, dma_addr_t,
+ unsigned long offset, size_t,
+ enum dma_data_direction);
+extern void dma_sync_single_range_for_device(struct device *, dma_addr_t,
+ unsigned long offset, size_t,
+ enum dma_data_direction);
+extern void dma_cache_sync(struct device *dev, void *vaddr, size_t,
+ enum dma_data_direction);
+
+static inline int
+dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
+{
+ return 0;
+}
+
+static inline int
+dma_supported(struct device *dev, u64 mask)
+{
+ return 1;
+}
+
+static inline int
+dma_set_mask(struct device *dev, u64 mask)
+{
+ if (!dev->dma_mask || !dma_supported(dev, mask))
+ return -EIO;
+
+ *dev->dma_mask = mask;
+
+ return 0;
+}
+
+#endif /* _ASM_TILE_DMA_MAPPING_H */
diff --git a/arch/tile/include/asm/dma.h b/arch/tile/include/asm/dma.h
new file mode 100644
index 00000000..12a7ca16
--- /dev/null
+++ b/arch/tile/include/asm/dma.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _ASM_TILE_DMA_H
+#define _ASM_TILE_DMA_H
+
+#include <asm-generic/dma.h>
+
+/* Needed by drivers/pci/quirks.c */
+#ifdef CONFIG_PCI
+extern int isa_dma_bridge_buggy;
+#endif
+
+#endif /* _ASM_TILE_DMA_H */
diff --git a/arch/tile/include/asm/edac.h b/arch/tile/include/asm/edac.h
new file mode 100644
index 00000000..87fc83ee
--- /dev/null
+++ b/arch/tile/include/asm/edac.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2011 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _ASM_TILE_EDAC_H
+#define _ASM_TILE_EDAC_H
+
+/* ECC atomic, DMA, SMP and interrupt safe scrub function */
+
+static inline void atomic_scrub(void *va, u32 size)
+{
+ /*
+ * These is nothing to be done here because CE is
+ * corrected by the mshim.
+ */
+ return;
+}
+
+#endif /* _ASM_TILE_EDAC_H */
diff --git a/arch/tile/include/asm/elf.h b/arch/tile/include/asm/elf.h
new file mode 100644
index 00000000..623a6bb7
--- /dev/null
+++ b/arch/tile/include/asm/elf.h
@@ -0,0 +1,167 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _ASM_TILE_ELF_H
+#define _ASM_TILE_ELF_H
+
+/*
+ * ELF register definitions.
+ */
+
+#include <arch/chip.h>
+
+#include <linux/ptrace.h>
+#include <asm/byteorder.h>
+#include <asm/page.h>
+
+typedef unsigned long elf_greg_t;
+
+#define ELF_NGREG (sizeof(struct pt_regs) / sizeof(elf_greg_t))
+typedef elf_greg_t elf_gregset_t[ELF_NGREG];
+
+#define EM_TILE64 187
+#define EM_TILEPRO 188
+#define EM_TILEGX 191
+
+/* Provide a nominal data structure. */
+#define ELF_NFPREG 0
+typedef double elf_fpreg_t;
+typedef elf_fpreg_t elf_fpregset_t[ELF_NFPREG];
+
+#ifdef __tilegx__
+#define ELF_CLASS ELFCLASS64
+#else
+#define ELF_CLASS ELFCLASS32
+#endif
+#define ELF_DATA ELFDATA2LSB
+
+/*
+ * There seems to be a bug in how compat_binfmt_elf.c works: it
+ * #undefs ELF_ARCH, but it is then used in binfmt_elf.c for fill_note_info().
+ * Hack around this by providing an enum value of ELF_ARCH.
+ */
+enum { ELF_ARCH = CHIP_ELF_TYPE() };
+#define ELF_ARCH ELF_ARCH
+
+/*
+ * This is used to ensure we don't load something for the wrong architecture.
+ */
+#define elf_check_arch(x) \
+ ((x)->e_ident[EI_CLASS] == ELF_CLASS && \
+ (x)->e_machine == CHIP_ELF_TYPE())
+
+/* The module loader only handles a few relocation types. */
+#ifndef __tilegx__
+#define R_TILE_32 1
+#define R_TILE_JOFFLONG_X1 15
+#define R_TILE_IMM16_X0_LO 25
+#define R_TILE_IMM16_X1_LO 26
+#define R_TILE_IMM16_X0_HA 29
+#define R_TILE_IMM16_X1_HA 30
+#else
+#define R_TILEGX_64 1
+#define R_TILEGX_JUMPOFF_X1 21
+#define R_TILEGX_IMM16_X0_HW0 36
+#define R_TILEGX_IMM16_X1_HW0 37
+#define R_TILEGX_IMM16_X0_HW1 38
+#define R_TILEGX_IMM16_X1_HW1 39
+#define R_TILEGX_IMM16_X0_HW2_LAST 48
+#define R_TILEGX_IMM16_X1_HW2_LAST 49
+#endif
+
+/* Use standard page size for core dumps. */
+#define ELF_EXEC_PAGESIZE PAGE_SIZE
+
+/*
+ * This is the location that an ET_DYN program is loaded if exec'ed. Typical
+ * use of this is to invoke "./ld.so someprog" to test out a new version of
+ * the loader. We need to make sure that it is out of the way of the program
+ * that it will "exec", and that there is sufficient room for the brk.
+ */
+#define ELF_ET_DYN_BASE (TASK_SIZE / 3 * 2)
+
+#define ELF_CORE_COPY_REGS(_dest, _regs) \
+ memcpy((char *) &_dest, (char *) _regs, \
+ sizeof(struct pt_regs));
+
+/* No additional FP registers to copy. */
+#define ELF_CORE_COPY_FPREGS(t, fpu) 0
+
+/*
+ * This yields a mask that user programs can use to figure out what
+ * instruction set this CPU supports. This could be done in user space,
+ * but it's not easy, and we've already done it here.
+ */
+#define ELF_HWCAP (0)
+
+/*
+ * This yields a string that ld.so will use to load implementation
+ * specific libraries for optimization. This is more specific in
+ * intent than poking at uname or /proc/cpuinfo.
+ */
+#define ELF_PLATFORM (NULL)
+
+extern void elf_plat_init(struct pt_regs *regs, unsigned long load_addr);
+
+#define ELF_PLAT_INIT(_r, load_addr) elf_plat_init(_r, load_addr)
+
+extern int dump_task_regs(struct task_struct *, elf_gregset_t *);
+#define ELF_CORE_COPY_TASK_REGS(tsk, elf_regs) dump_task_regs(tsk, elf_regs)
+
+/* Tilera Linux has no personalities currently, so no need to do anything. */
+#define SET_PERSONALITY(ex) do { } while (0)
+
+#define ARCH_HAS_SETUP_ADDITIONAL_PAGES
+/* Support auto-mapping of the user interrupt vectors. */
+struct linux_binprm;
+extern int arch_setup_additional_pages(struct linux_binprm *bprm,
+ int executable_stack);
+#ifdef CONFIG_COMPAT
+
+#define COMPAT_ELF_PLATFORM "tilegx-m32"
+
+/*
+ * "Compat" binaries have the same machine type, but 32-bit class,
+ * since they're not a separate machine type, but just a 32-bit
+ * variant of the standard 64-bit architecture.
+ */
+#define compat_elf_check_arch(x) \
+ ((x)->e_ident[EI_CLASS] == ELFCLASS32 && \
+ (x)->e_machine == CHIP_ELF_TYPE())
+
+#define compat_start_thread(regs, ip, usp) do { \
+ regs->pc = ptr_to_compat_reg((void *)(ip)); \
+ regs->sp = ptr_to_compat_reg((void *)(usp)); \
+ } while (0)
+
+/*
+ * Use SET_PERSONALITY to indicate compatibility via TS_COMPAT.
+ */
+#undef SET_PERSONALITY
+#define SET_PERSONALITY(ex) \
+do { \
+ current->personality = PER_LINUX; \
+ current_thread_info()->status &= ~TS_COMPAT; \
+} while (0)
+#define COMPAT_SET_PERSONALITY(ex) \
+do { \
+ current->personality = PER_LINUX_32BIT; \
+ current_thread_info()->status |= TS_COMPAT; \
+} while (0)
+
+#define COMPAT_ELF_ET_DYN_BASE (0xffffffff / 3 * 2)
+
+#endif /* CONFIG_COMPAT */
+
+#endif /* _ASM_TILE_ELF_H */
diff --git a/arch/tile/include/asm/exec.h b/arch/tile/include/asm/exec.h
new file mode 100644
index 00000000..a714e195
--- /dev/null
+++ b/arch/tile/include/asm/exec.h
@@ -0,0 +1,20 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _ASM_TILE_EXEC_H
+#define _ASM_TILE_EXEC_H
+
+#define arch_align_stack(x) (x)
+
+#endif /* _ASM_TILE_EXEC_H */
diff --git a/arch/tile/include/asm/fixmap.h b/arch/tile/include/asm/fixmap.h
new file mode 100644
index 00000000..c66f7933
--- /dev/null
+++ b/arch/tile/include/asm/fixmap.h
@@ -0,0 +1,118 @@
+/*
+ * Copyright (C) 1998 Ingo Molnar
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _ASM_TILE_FIXMAP_H
+#define _ASM_TILE_FIXMAP_H
+
+#include <asm/page.h>
+
+#ifndef __ASSEMBLY__
+#include <linux/kernel.h>
+#ifdef CONFIG_HIGHMEM
+#include <linux/threads.h>
+#include <asm/kmap_types.h>
+#endif
+
+#define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT))
+#define __virt_to_fix(x) ((FIXADDR_TOP - ((x)&PAGE_MASK)) >> PAGE_SHIFT)
+
+/*
+ * Here we define all the compile-time 'special' virtual
+ * addresses. The point is to have a constant address at
+ * compile time, but to set the physical address only
+ * in the boot process. We allocate these special addresses
+ * from the end of supervisor virtual memory backwards.
+ * Also this lets us do fail-safe vmalloc(), we
+ * can guarantee that these special addresses and
+ * vmalloc()-ed addresses never overlap.
+ *
+ * these 'compile-time allocated' memory buffers are
+ * fixed-size 4k pages. (or larger if used with an increment
+ * higher than 1) use fixmap_set(idx,phys) to associate
+ * physical memory with fixmap indices.
+ *
+ * TLB entries of such buffers will not be flushed across
+ * task switches.
+ *
+ * We don't bother with a FIX_HOLE since above the fixmaps
+ * is unmapped memory in any case.
+ */
+enum fixed_addresses {
+#ifdef CONFIG_HIGHMEM
+ FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */
+ FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,
+#endif
+ __end_of_permanent_fixed_addresses,
+
+ /*
+ * Temporary boot-time mappings, used before ioremap() is functional.
+ * Not currently needed by the Tile architecture.
+ */
+#define NR_FIX_BTMAPS 0
+#if NR_FIX_BTMAPS
+ FIX_BTMAP_END = __end_of_permanent_fixed_addresses,
+ FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS - 1,
+ __end_of_fixed_addresses
+#else
+ __end_of_fixed_addresses = __end_of_permanent_fixed_addresses
+#endif
+};
+
+extern void __set_fixmap(enum fixed_addresses idx,
+ unsigned long phys, pgprot_t flags);
+
+#define set_fixmap(idx, phys) \
+ __set_fixmap(idx, phys, PAGE_KERNEL)
+#define clear_fixmap(idx) \
+ __set_fixmap(idx, 0, __pgprot(0))
+
+#define __FIXADDR_SIZE (__end_of_permanent_fixed_addresses << PAGE_SHIFT)
+#define __FIXADDR_BOOT_SIZE (__end_of_fixed_addresses << PAGE_SHIFT)
+#define FIXADDR_START (FIXADDR_TOP + PAGE_SIZE - __FIXADDR_SIZE)
+#define FIXADDR_BOOT_START (FIXADDR_TOP + PAGE_SIZE - __FIXADDR_BOOT_SIZE)
+
+extern void __this_fixmap_does_not_exist(void);
+
+/*
+ * 'index to address' translation. If anyone tries to use the idx
+ * directly without tranlation, we catch the bug with a NULL-deference
+ * kernel oops. Illegal ranges of incoming indices are caught too.
+ */
+static __always_inline unsigned long fix_to_virt(const unsigned int idx)
+{
+ /*
+ * this branch gets completely eliminated after inlining,
+ * except when someone tries to use fixaddr indices in an
+ * illegal way. (such as mixing up address types or using
+ * out-of-range indices).
+ *
+ * If it doesn't get removed, the linker will complain
+ * loudly with a reasonably clear error message..
+ */
+ if (idx >= __end_of_fixed_addresses)
+ __this_fixmap_does_not_exist();
+
+ return __fix_to_virt(idx);
+}
+
+static inline unsigned long virt_to_fix(const unsigned long vaddr)
+{
+ BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START);
+ return __virt_to_fix(vaddr);
+}
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* _ASM_TILE_FIXMAP_H */
diff --git a/arch/tile/include/asm/ftrace.h b/arch/tile/include/asm/ftrace.h
new file mode 100644
index 00000000..461459b0
--- /dev/null
+++ b/arch/tile/include/asm/ftrace.h
@@ -0,0 +1,20 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _ASM_TILE_FTRACE_H
+#define _ASM_TILE_FTRACE_H
+
+/* empty */
+
+#endif /* _ASM_TILE_FTRACE_H */
diff --git a/arch/tile/include/asm/futex.h b/arch/tile/include/asm/futex.h
new file mode 100644
index 00000000..d03ec124
--- /dev/null
+++ b/arch/tile/include/asm/futex.h
@@ -0,0 +1,142 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ *
+ * These routines make two important assumptions:
+ *
+ * 1. atomic_t is really an int and can be freely cast back and forth
+ * (validated in __init_atomic_per_cpu).
+ *
+ * 2. userspace uses sys_cmpxchg() for all atomic operations, thus using
+ * the same locking convention that all the kernel atomic routines use.
+ */
+
+#ifndef _ASM_TILE_FUTEX_H
+#define _ASM_TILE_FUTEX_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/futex.h>
+#include <linux/uaccess.h>
+#include <linux/errno.h>
+
+extern struct __get_user futex_set(u32 __user *v, int i);
+extern struct __get_user futex_add(u32 __user *v, int n);
+extern struct __get_user futex_or(u32 __user *v, int n);
+extern struct __get_user futex_andn(u32 __user *v, int n);
+extern struct __get_user futex_cmpxchg(u32 __user *v, int o, int n);
+
+#ifndef __tilegx__
+extern struct __get_user futex_xor(u32 __user *v, int n);
+#else
+static inline struct __get_user futex_xor(u32 __user *uaddr, int n)
+{
+ struct __get_user asm_ret = __get_user_4(uaddr);
+ if (!asm_ret.err) {
+ int oldval, newval;
+ do {
+ oldval = asm_ret.val;
+ newval = oldval ^ n;
+ asm_ret = futex_cmpxchg(uaddr, oldval, newval);
+ } while (asm_ret.err == 0 && oldval != asm_ret.val);
+ }
+ return asm_ret;
+}
+#endif
+
+static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
+{
+ int op = (encoded_op >> 28) & 7;
+ int cmp = (encoded_op >> 24) & 15;
+ int oparg = (encoded_op << 8) >> 20;
+ int cmparg = (encoded_op << 20) >> 20;
+ int ret;
+ struct __get_user asm_ret;
+
+ if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
+ oparg = 1 << oparg;
+
+ if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
+ return -EFAULT;
+
+ pagefault_disable();
+ switch (op) {
+ case FUTEX_OP_SET:
+ asm_ret = futex_set(uaddr, oparg);
+ break;
+ case FUTEX_OP_ADD:
+ asm_ret = futex_add(uaddr, oparg);
+ break;
+ case FUTEX_OP_OR:
+ asm_ret = futex_or(uaddr, oparg);
+ break;
+ case FUTEX_OP_ANDN:
+ asm_ret = futex_andn(uaddr, oparg);
+ break;
+ case FUTEX_OP_XOR:
+ asm_ret = futex_xor(uaddr, oparg);
+ break;
+ default:
+ asm_ret.err = -ENOSYS;
+ }
+ pagefault_enable();
+
+ ret = asm_ret.err;
+
+ if (!ret) {
+ switch (cmp) {
+ case FUTEX_OP_CMP_EQ:
+ ret = (asm_ret.val == cmparg);
+ break;
+ case FUTEX_OP_CMP_NE:
+ ret = (asm_ret.val != cmparg);
+ break;
+ case FUTEX_OP_CMP_LT:
+ ret = (asm_ret.val < cmparg);
+ break;
+ case FUTEX_OP_CMP_GE:
+ ret = (asm_ret.val >= cmparg);
+ break;
+ case FUTEX_OP_CMP_LE:
+ ret = (asm_ret.val <= cmparg);
+ break;
+ case FUTEX_OP_CMP_GT:
+ ret = (asm_ret.val > cmparg);
+ break;
+ default:
+ ret = -ENOSYS;
+ }
+ }
+ return ret;
+}
+
+static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
+ u32 oldval, u32 newval)
+{
+ struct __get_user asm_ret;
+
+ if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
+ return -EFAULT;
+
+ asm_ret = futex_cmpxchg(uaddr, oldval, newval);
+ *uval = asm_ret.val;
+ return asm_ret.err;
+}
+
+#ifndef __tilegx__
+/* Return failure from the atomic wrappers. */
+struct __get_user __atomic_bad_address(int __user *addr);
+#endif
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* _ASM_TILE_FUTEX_H */
diff --git a/arch/tile/include/asm/hardirq.h b/arch/tile/include/asm/hardirq.h
new file mode 100644
index 00000000..822390f9
--- /dev/null
+++ b/arch/tile/include/asm/hardirq.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _ASM_TILE_HARDIRQ_H
+#define _ASM_TILE_HARDIRQ_H
+
+#include <linux/threads.h>
+#include <linux/cache.h>
+
+#include <asm/irq.h>
+
+typedef struct {
+ unsigned int __softirq_pending;
+ long idle_timestamp;
+
+ /* Hard interrupt statistics. */
+ unsigned int irq_timer_count;
+ unsigned int irq_syscall_count;
+ unsigned int irq_resched_count;
+ unsigned int irq_hv_flush_count;
+ unsigned int irq_call_count;
+ unsigned int irq_hv_msg_count;
+ unsigned int irq_dev_intr_count;
+
+} ____cacheline_aligned irq_cpustat_t;
+
+DECLARE_PER_CPU(irq_cpustat_t, irq_stat);
+
+#define __ARCH_IRQ_STAT
+#define __IRQ_STAT(cpu, member) (per_cpu(irq_stat, cpu).member)
+
+#include <linux/irq_cpustat.h> /* Standard mappings for irq_cpustat_t above */
+
+#define HARDIRQ_BITS 8
+
+#endif /* _ASM_TILE_HARDIRQ_H */
diff --git a/arch/tile/include/asm/hardwall.h b/arch/tile/include/asm/hardwall.h
new file mode 100644
index 00000000..2ac42284
--- /dev/null
+++ b/arch/tile/include/asm/hardwall.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ *
+ * Provide methods for the HARDWALL_FILE for accessing the UDN.
+ */
+
+#ifndef _ASM_TILE_HARDWALL_H
+#define _ASM_TILE_HARDWALL_H
+
+#include <linux/ioctl.h>
+
+#define HARDWALL_IOCTL_BASE 0xa2
+
+/*
+ * The HARDWALL_CREATE() ioctl is a macro with a "size" argument.
+ * The resulting ioctl value is passed to the kernel in conjunction
+ * with a pointer to a little-endian bitmask of cpus, which must be
+ * physically in a rectangular configuration on the chip.
+ * The "size" is the number of bytes of cpu mask data.
+ */
+#define _HARDWALL_CREATE 1
+#define HARDWALL_CREATE(size) \
+ _IOC(_IOC_READ, HARDWALL_IOCTL_BASE, _HARDWALL_CREATE, (size))
+
+#define _HARDWALL_ACTIVATE 2
+#define HARDWALL_ACTIVATE \
+ _IO(HARDWALL_IOCTL_BASE, _HARDWALL_ACTIVATE)
+
+#define _HARDWALL_DEACTIVATE 3
+#define HARDWALL_DEACTIVATE \
+ _IO(HARDWALL_IOCTL_BASE, _HARDWALL_DEACTIVATE)
+
+#define _HARDWALL_GET_ID 4
+#define HARDWALL_GET_ID \
+ _IO(HARDWALL_IOCTL_BASE, _HARDWALL_GET_ID)
+
+#ifndef __KERNEL__
+
+/* This is the canonical name expected by userspace. */
+#define HARDWALL_FILE "/dev/hardwall"
+
+#else
+
+/* /proc hooks for hardwall. */
+struct proc_dir_entry;
+#ifdef CONFIG_HARDWALL
+void proc_tile_hardwall_init(struct proc_dir_entry *root);
+int proc_pid_hardwall(struct task_struct *task, char *buffer);
+#else
+static inline void proc_tile_hardwall_init(struct proc_dir_entry *root) {}
+#endif
+
+#endif
+
+#endif /* _ASM_TILE_HARDWALL_H */
diff --git a/arch/tile/include/asm/highmem.h b/arch/tile/include/asm/highmem.h
new file mode 100644
index 00000000..fc8429a3
--- /dev/null
+++ b/arch/tile/include/asm/highmem.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (C) 1999 Gerhard Wichert, Siemens AG
+ * Gerhard.Wichert@pdb.siemens.de
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ *
+ * Used in CONFIG_HIGHMEM systems for memory pages which
+ * are not addressable by direct kernel virtual addresses.
+ *
+ */
+
+#ifndef _ASM_TILE_HIGHMEM_H
+#define _ASM_TILE_HIGHMEM_H
+
+#include <linux/interrupt.h>
+#include <linux/threads.h>
+#include <asm/tlbflush.h>
+#include <asm/homecache.h>
+
+/* declarations for highmem.c */
+extern unsigned long highstart_pfn, highend_pfn;
+
+extern pte_t *pkmap_page_table;
+
+/*
+ * Ordering is:
+ *
+ * FIXADDR_TOP
+ * fixed_addresses
+ * FIXADDR_START
+ * temp fixed addresses
+ * FIXADDR_BOOT_START
+ * Persistent kmap area
+ * PKMAP_BASE
+ * VMALLOC_END
+ * Vmalloc area
+ * VMALLOC_START
+ * high_memory
+ */
+#define LAST_PKMAP_MASK (LAST_PKMAP-1)
+#define PKMAP_NR(virt) ((virt-PKMAP_BASE) >> PAGE_SHIFT)
+#define PKMAP_ADDR(nr) (PKMAP_BASE + ((nr) << PAGE_SHIFT))
+
+void *kmap_high(struct page *page);
+void kunmap_high(struct page *page);
+void *kmap(struct page *page);
+void kunmap(struct page *page);
+void *kmap_fix_kpte(struct page *page, int finished);
+
+/* This macro is used only in map_new_virtual() to map "page". */
+#define kmap_prot page_to_kpgprot(page)
+
+void *kmap_atomic(struct page *page);
+void __kunmap_atomic(void *kvaddr);
+void *kmap_atomic_pfn(unsigned long pfn);
+void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot);
+struct page *kmap_atomic_to_page(void *ptr);
+void *kmap_atomic_prot(struct page *page, pgprot_t prot);
+void kmap_atomic_fix_kpte(struct page *page, int finished);
+
+#define flush_cache_kmaps() do { } while (0)
+
+#endif /* _ASM_TILE_HIGHMEM_H */
diff --git a/arch/tile/include/asm/homecache.h b/arch/tile/include/asm/homecache.h
new file mode 100644
index 00000000..a8243865
--- /dev/null
+++ b/arch/tile/include/asm/homecache.h
@@ -0,0 +1,125 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ *
+ * Handle issues around the Tile "home cache" model of coherence.
+ */
+
+#ifndef _ASM_TILE_HOMECACHE_H
+#define _ASM_TILE_HOMECACHE_H
+
+#include <asm/page.h>
+#include <linux/cpumask.h>
+
+struct page;
+struct task_struct;
+struct vm_area_struct;
+struct zone;
+
+/*
+ * Coherence point for the page is its memory controller.
+ * It is not present in any cache (L1 or L2).
+ */
+#define PAGE_HOME_UNCACHED -1
+
+/*
+ * Is this page immutable (unwritable) and thus able to be cached more
+ * widely than would otherwise be possible? On tile64 this means we
+ * mark the PTE to cache locally; on tilepro it means we have "nc" set.
+ */
+#define PAGE_HOME_IMMUTABLE -2
+
+/*
+ * Each cpu considers its own cache to be the home for the page,
+ * which makes it incoherent.
+ */
+#define PAGE_HOME_INCOHERENT -3
+
+#if CHIP_HAS_CBOX_HOME_MAP()
+/* Home for the page is distributed via hash-for-home. */
+#define PAGE_HOME_HASH -4
+#endif
+
+/* Homing is unknown or unspecified. Not valid for page_home(). */
+#define PAGE_HOME_UNKNOWN -5
+
+/* Home on the current cpu. Not valid for page_home(). */
+#define PAGE_HOME_HERE -6
+
+/* Support wrapper to use instead of explicit hv_flush_remote(). */
+extern void flush_remote(unsigned long cache_pfn, unsigned long cache_length,
+ const struct cpumask *cache_cpumask,
+ HV_VirtAddr tlb_va, unsigned long tlb_length,
+ unsigned long tlb_pgsize,
+ const struct cpumask *tlb_cpumask,
+ HV_Remote_ASID *asids, int asidcount);
+
+/* Set homing-related bits in a PTE (can also pass a pgprot_t). */
+extern pte_t pte_set_home(pte_t pte, int home);
+
+/* Do a cache eviction on the specified cpus. */
+extern void homecache_evict(const struct cpumask *mask);
+
+/*
+ * Change a kernel page's homecache. It must not be mapped in user space.
+ * If !CONFIG_HOMECACHE, only usable on LOWMEM, and can only be called when
+ * no other cpu can reference the page, and causes a full-chip cache/TLB flush.
+ */
+extern void homecache_change_page_home(struct page *, int order, int home);
+
+/*
+ * Flush a page out of whatever cache(s) it is in.
+ * This is more than just finv, since it properly handles waiting
+ * for the data to reach memory on tilepro, but it can be quite
+ * heavyweight, particularly on hash-for-home memory.
+ */
+extern void homecache_flush_cache(struct page *, int order);
+
+/*
+ * Allocate a page with the given GFP flags, home, and optionally
+ * node. These routines are actually just wrappers around the normal
+ * alloc_pages() / alloc_pages_node() functions, which set and clear
+ * a per-cpu variable to communicate with homecache_new_kernel_page().
+ * If !CONFIG_HOMECACHE, uses homecache_change_page_home().
+ */
+extern struct page *homecache_alloc_pages(gfp_t gfp_mask,
+ unsigned int order, int home);
+extern struct page *homecache_alloc_pages_node(int nid, gfp_t gfp_mask,
+ unsigned int order, int home);
+#define homecache_alloc_page(gfp_mask, home) \
+ homecache_alloc_pages(gfp_mask, 0, home)
+
+/*
+ * These routines are just pass-throughs to free_pages() when
+ * we support full homecaching. If !CONFIG_HOMECACHE, then these
+ * routines use homecache_change_page_home() to reset the home
+ * back to the default before returning the page to the allocator.
+ */
+void homecache_free_pages(unsigned long addr, unsigned int order);
+#define homecache_free_page(page) \
+ homecache_free_pages((page), 0)
+
+
+
+/*
+ * Report the page home for LOWMEM pages by examining their kernel PTE,
+ * or for highmem pages as the default home.
+ */
+extern int page_home(struct page *);
+
+#define homecache_migrate_kthread() do {} while (0)
+
+#define homecache_kpte_lock() 0
+#define homecache_kpte_unlock(flags) do {} while (0)
+
+
+#endif /* _ASM_TILE_HOMECACHE_H */
diff --git a/arch/tile/include/asm/hugetlb.h b/arch/tile/include/asm/hugetlb.h
new file mode 100644
index 00000000..d396d180
--- /dev/null
+++ b/arch/tile/include/asm/hugetlb.h
@@ -0,0 +1,109 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _ASM_TILE_HUGETLB_H
+#define _ASM_TILE_HUGETLB_H
+
+#include <asm/page.h>
+
+
+static inline int is_hugepage_only_range(struct mm_struct *mm,
+ unsigned long addr,
+ unsigned long len) {
+ return 0;
+}
+
+/*
+ * If the arch doesn't supply something else, assume that hugepage
+ * size aligned regions are ok without further preparation.
+ */
+static inline int prepare_hugepage_range(struct file *file,
+ unsigned long addr, unsigned long len)
+{
+ struct hstate *h = hstate_file(file);
+ if (len & ~huge_page_mask(h))
+ return -EINVAL;
+ if (addr & ~huge_page_mask(h))
+ return -EINVAL;
+ return 0;
+}
+
+static inline void hugetlb_prefault_arch_hook(struct mm_struct *mm)
+{
+}
+
+static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb,
+ unsigned long addr, unsigned long end,
+ unsigned long floor,
+ unsigned long ceiling)
+{
+ free_pgd_range(tlb, addr, end, floor, ceiling);
+}
+
+static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t pte)
+{
+ set_pte(ptep, pte);
+}
+
+static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep)
+{
+ return ptep_get_and_clear(mm, addr, ptep);
+}
+
+static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep)
+{
+ ptep_clear_flush(vma, addr, ptep);
+}
+
+static inline int huge_pte_none(pte_t pte)
+{
+ return pte_none(pte);
+}
+
+static inline pte_t huge_pte_wrprotect(pte_t pte)
+{
+ return pte_wrprotect(pte);
+}
+
+static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep)
+{
+ ptep_set_wrprotect(mm, addr, ptep);
+}
+
+static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep,
+ pte_t pte, int dirty)
+{
+ return ptep_set_access_flags(vma, addr, ptep, pte, dirty);
+}
+
+static inline pte_t huge_ptep_get(pte_t *ptep)
+{
+ return *ptep;
+}
+
+static inline int arch_prepare_hugepage(struct page *page)
+{
+ return 0;
+}
+
+static inline void arch_release_hugepage(struct page *page)
+{
+}
+
+#endif /* _ASM_TILE_HUGETLB_H */
diff --git a/arch/tile/include/asm/hv_driver.h b/arch/tile/include/asm/hv_driver.h
new file mode 100644
index 00000000..ad614de8
--- /dev/null
+++ b/arch/tile/include/asm/hv_driver.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ *
+ * This header defines a wrapper interface for managing hypervisor
+ * device calls that will result in an interrupt at some later time.
+ * In particular, this provides wrappers for hv_preada() and
+ * hv_pwritea().
+ */
+
+#ifndef _ASM_TILE_HV_DRIVER_H
+#define _ASM_TILE_HV_DRIVER_H
+
+#include <hv/hypervisor.h>
+
+struct hv_driver_cb;
+
+/* A callback to be invoked when an operation completes. */
+typedef void hv_driver_callback_t(struct hv_driver_cb *cb, __hv32 result);
+
+/*
+ * A structure to hold information about an outstanding call.
+ * The driver must allocate a separate structure for each call.
+ */
+struct hv_driver_cb {
+ hv_driver_callback_t *callback; /* Function to call on interrupt. */
+ void *dev; /* Driver-specific state variable. */
+};
+
+/* Wrapper for invoking hv_dev_preada(). */
+static inline int
+tile_hv_dev_preada(int devhdl, __hv32 flags, __hv32 sgl_len,
+ HV_SGL sgl[/* sgl_len */], __hv64 offset,
+ struct hv_driver_cb *callback)
+{
+ return hv_dev_preada(devhdl, flags, sgl_len, sgl,
+ offset, (HV_IntArg)callback);
+}
+
+/* Wrapper for invoking hv_dev_pwritea(). */
+static inline int
+tile_hv_dev_pwritea(int devhdl, __hv32 flags, __hv32 sgl_len,
+ HV_SGL sgl[/* sgl_len */], __hv64 offset,
+ struct hv_driver_cb *callback)
+{
+ return hv_dev_pwritea(devhdl, flags, sgl_len, sgl,
+ offset, (HV_IntArg)callback);
+}
+
+
+#endif /* _ASM_TILE_HV_DRIVER_H */
diff --git a/arch/tile/include/asm/hw_irq.h b/arch/tile/include/asm/hw_irq.h
new file mode 100644
index 00000000..4fac5fbf
--- /dev/null
+++ b/arch/tile/include/asm/hw_irq.h
@@ -0,0 +1,18 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _ASM_TILE_HW_IRQ_H
+#define _ASM_TILE_HW_IRQ_H
+
+#endif /* _ASM_TILE_HW_IRQ_H */
diff --git a/arch/tile/include/asm/ide.h b/arch/tile/include/asm/ide.h
new file mode 100644
index 00000000..3c6f2ed8
--- /dev/null
+++ b/arch/tile/include/asm/ide.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _ASM_TILE_IDE_H
+#define _ASM_TILE_IDE_H
+
+/* For IDE on PCI */
+#define MAX_HWIFS 10
+
+#define ide_default_io_ctl(base) (0)
+
+#include <asm-generic/ide_iops.h>
+
+#endif /* _ASM_TILE_IDE_H */
diff --git a/arch/tile/include/asm/io.h b/arch/tile/include/asm/io.h
new file mode 100644
index 00000000..d2152deb
--- /dev/null
+++ b/arch/tile/include/asm/io.h
@@ -0,0 +1,305 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _ASM_TILE_IO_H
+#define _ASM_TILE_IO_H
+
+#include <linux/kernel.h>
+#include <linux/bug.h>
+#include <asm/page.h>
+
+#define IO_SPACE_LIMIT 0xfffffffful
+
+/*
+ * Convert a physical pointer to a virtual kernel pointer for /dev/mem
+ * access.
+ */
+#define xlate_dev_mem_ptr(p) __va(p)
+
+/*
+ * Convert a virtual cached pointer to an uncached pointer.
+ */
+#define xlate_dev_kmem_ptr(p) p
+
+/*
+ * Change "struct page" to physical address.
+ */
+#define page_to_phys(page) ((dma_addr_t)page_to_pfn(page) << PAGE_SHIFT)
+
+/*
+ * Some places try to pass in an loff_t for PHYSADDR (?!), so we cast it to
+ * long before casting it to a pointer to avoid compiler warnings.
+ */
+#if CHIP_HAS_MMIO()
+extern void __iomem *ioremap(resource_size_t offset, unsigned long size);
+extern void __iomem *ioremap_prot(resource_size_t offset, unsigned long size,
+ pgprot_t pgprot);
+extern void iounmap(volatile void __iomem *addr);
+#else
+#define ioremap(physaddr, size) ((void __iomem *)(unsigned long)(physaddr))
+#define iounmap(addr) ((void)0)
+#endif
+
+#define ioremap_nocache(physaddr, size) ioremap(physaddr, size)
+#define ioremap_wc(physaddr, size) ioremap(physaddr, size)
+#define ioremap_writethrough(physaddr, size) ioremap(physaddr, size)
+#define ioremap_fullcache(physaddr, size) ioremap(physaddr, size)
+
+#define mmiowb()
+
+/* Conversion between virtual and physical mappings. */
+#define mm_ptov(addr) ((void *)phys_to_virt(addr))
+#define mm_vtop(addr) ((unsigned long)virt_to_phys(addr))
+
+#ifdef CONFIG_PCI
+
+extern u8 _tile_readb(unsigned long addr);
+extern u16 _tile_readw(unsigned long addr);
+extern u32 _tile_readl(unsigned long addr);
+extern u64 _tile_readq(unsigned long addr);
+extern void _tile_writeb(u8 val, unsigned long addr);
+extern void _tile_writew(u16 val, unsigned long addr);
+extern void _tile_writel(u32 val, unsigned long addr);
+extern void _tile_writeq(u64 val, unsigned long addr);
+
+#else
+
+/*
+ * The Tile architecture does not support IOMEM unless PCI is enabled.
+ * Unfortunately we can't yet simply not declare these methods,
+ * since some generic code that compiles into the kernel, but
+ * we never run, uses them unconditionally.
+ */
+
+static inline int iomem_panic(void)
+{
+ panic("readb/writeb and friends do not exist on tile without PCI");
+ return 0;
+}
+
+static inline u8 _tile_readb(unsigned long addr)
+{
+ return iomem_panic();
+}
+
+static inline u16 _tile_readw(unsigned long addr)
+{
+ return iomem_panic();
+}
+
+static inline u32 _tile_readl(unsigned long addr)
+{
+ return iomem_panic();
+}
+
+static inline u64 _tile_readq(unsigned long addr)
+{
+ return iomem_panic();
+}
+
+static inline void _tile_writeb(u8 val, unsigned long addr)
+{
+ iomem_panic();
+}
+
+static inline void _tile_writew(u16 val, unsigned long addr)
+{
+ iomem_panic();
+}
+
+static inline void _tile_writel(u32 val, unsigned long addr)
+{
+ iomem_panic();
+}
+
+static inline void _tile_writeq(u64 val, unsigned long addr)
+{
+ iomem_panic();
+}
+
+#endif
+
+#define readb(addr) _tile_readb((unsigned long)addr)
+#define readw(addr) _tile_readw((unsigned long)addr)
+#define readl(addr) _tile_readl((unsigned long)addr)
+#define readq(addr) _tile_readq((unsigned long)addr)
+#define writeb(val, addr) _tile_writeb(val, (unsigned long)addr)
+#define writew(val, addr) _tile_writew(val, (unsigned long)addr)
+#define writel(val, addr) _tile_writel(val, (unsigned long)addr)
+#define writeq(val, addr) _tile_writeq(val, (unsigned long)addr)
+
+#define __raw_readb readb
+#define __raw_readw readw
+#define __raw_readl readl
+#define __raw_readq readq
+#define __raw_writeb writeb
+#define __raw_writew writew
+#define __raw_writel writel
+#define __raw_writeq writeq
+
+#define readb_relaxed readb
+#define readw_relaxed readw
+#define readl_relaxed readl
+#define readq_relaxed readq
+
+#define ioread8 readb
+#define ioread16 readw
+#define ioread32 readl
+#define ioread64 readq
+#define iowrite8 writeb
+#define iowrite16 writew
+#define iowrite32 writel
+#define iowrite64 writeq
+
+static inline void memset_io(void *dst, int val, size_t len)
+{
+ int x;
+ BUG_ON((unsigned long)dst & 0x3);
+ val = (val & 0xff) * 0x01010101;
+ for (x = 0; x < len; x += 4)
+ writel(val, dst + x);
+}
+
+static inline void memcpy_fromio(void *dst, const volatile void __iomem *src,
+ size_t len)
+{
+ int x;
+ BUG_ON((unsigned long)src & 0x3);
+ for (x = 0; x < len; x += 4)
+ *(u32 *)(dst + x) = readl(src + x);
+}
+
+static inline void memcpy_toio(volatile void __iomem *dst, const void *src,
+ size_t len)
+{
+ int x;
+ BUG_ON((unsigned long)dst & 0x3);
+ for (x = 0; x < len; x += 4)
+ writel(*(u32 *)(src + x), dst + x);
+}
+
+/*
+ * The Tile architecture does not support IOPORT, even with PCI.
+ * Unfortunately we can't yet simply not declare these methods,
+ * since some generic code that compiles into the kernel, but
+ * we never run, uses them unconditionally.
+ */
+
+static inline long ioport_panic(void)
+{
+ panic("inb/outb and friends do not exist on tile");
+ return 0;
+}
+
+static inline void __iomem *ioport_map(unsigned long port, unsigned int len)
+{
+ pr_info("ioport_map: mapping IO resources is unsupported on tile.\n");
+ return NULL;
+}
+
+static inline void ioport_unmap(void __iomem *addr)
+{
+ ioport_panic();
+}
+
+static inline u8 inb(unsigned long addr)
+{
+ return ioport_panic();
+}
+
+static inline u16 inw(unsigned long addr)
+{
+ return ioport_panic();
+}
+
+static inline u32 inl(unsigned long addr)
+{
+ return ioport_panic();
+}
+
+static inline void outb(u8 b, unsigned long addr)
+{
+ ioport_panic();
+}
+
+static inline void outw(u16 b, unsigned long addr)
+{
+ ioport_panic();
+}
+
+static inline void outl(u32 b, unsigned long addr)
+{
+ ioport_panic();
+}
+
+#define inb_p(addr) inb(addr)
+#define inw_p(addr) inw(addr)
+#define inl_p(addr) inl(addr)
+#define outb_p(x, addr) outb((x), (addr))
+#define outw_p(x, addr) outw((x), (addr))
+#define outl_p(x, addr) outl((x), (addr))
+
+static inline void insb(unsigned long addr, void *buffer, int count)
+{
+ ioport_panic();
+}
+
+static inline void insw(unsigned long addr, void *buffer, int count)
+{
+ ioport_panic();
+}
+
+static inline void insl(unsigned long addr, void *buffer, int count)
+{
+ ioport_panic();
+}
+
+static inline void outsb(unsigned long addr, const void *buffer, int count)
+{
+ ioport_panic();
+}
+
+static inline void outsw(unsigned long addr, const void *buffer, int count)
+{
+ ioport_panic();
+}
+
+static inline void outsl(unsigned long addr, const void *buffer, int count)
+{
+ ioport_panic();
+}
+
+#define ioread16be(addr) be16_to_cpu(ioread16(addr))
+#define ioread32be(addr) be32_to_cpu(ioread32(addr))
+#define iowrite16be(v, addr) iowrite16(be16_to_cpu(v), (addr))
+#define iowrite32be(v, addr) iowrite32(be32_to_cpu(v), (addr))
+
+#define ioread8_rep(p, dst, count) \
+ insb((unsigned long) (p), (dst), (count))
+#define ioread16_rep(p, dst, count) \
+ insw((unsigned long) (p), (dst), (count))
+#define ioread32_rep(p, dst, count) \
+ insl((unsigned long) (p), (dst), (count))
+
+#define iowrite8_rep(p, src, count) \
+ outsb((unsigned long) (p), (src), (count))
+#define iowrite16_rep(p, src, count) \
+ outsw((unsigned long) (p), (src), (count))
+#define iowrite32_rep(p, src, count) \
+ outsl((unsigned long) (p), (src), (count))
+
+#define virt_to_bus virt_to_phys
+#define bus_to_virt phys_to_virt
+
+#endif /* _ASM_TILE_IO_H */
diff --git a/arch/tile/include/asm/irq.h b/arch/tile/include/asm/irq.h
new file mode 100644
index 00000000..33cff9a3
--- /dev/null
+++ b/arch/tile/include/asm/irq.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _ASM_TILE_IRQ_H
+#define _ASM_TILE_IRQ_H
+
+#include <linux/hardirq.h>
+
+/* The hypervisor interface provides 32 IRQs. */
+#define NR_IRQS 32
+
+/* IRQ numbers used for linux IPIs. */
+#define IRQ_RESCHEDULE 0
+
+#define irq_canonicalize(irq) (irq)
+
+void ack_bad_irq(unsigned int irq);
+
+/*
+ * Different ways of handling interrupts. Tile interrupts are always
+ * per-cpu; there is no global interrupt controller to implement
+ * enable/disable. Most onboard devices can send their interrupts to
+ * many tiles at the same time, and Tile-specific drivers know how to
+ * deal with this.
+ *
+ * However, generic devices (usually PCIE based, sometimes GPIO)
+ * expect that interrupts will fire on a single core at a time and
+ * that the irq can be enabled or disabled from any core at any time.
+ * We implement this by directing such interrupts to a single core.
+ *
+ * One added wrinkle is that PCI interrupts can be either
+ * hardware-cleared (legacy interrupts) or software cleared (MSI).
+ * Other generic device systems (GPIO) are always software-cleared.
+ *
+ * The enums below are used by drivers for onboard devices, including
+ * the internals of PCI root complex and GPIO. They allow the driver
+ * to tell the generic irq code what kind of interrupt is mapped to a
+ * particular IRQ number.
+ */
+enum {
+ /* per-cpu interrupt; use enable/disable_percpu_irq() to mask */
+ TILE_IRQ_PERCPU,
+ /* global interrupt, hardware responsible for clearing. */
+ TILE_IRQ_HW_CLEAR,
+ /* global interrupt, software responsible for clearing. */
+ TILE_IRQ_SW_CLEAR,
+};
+
+
+/*
+ * Paravirtualized drivers should call this when they dynamically
+ * allocate a new IRQ or discover an IRQ that was pre-allocated by the
+ * hypervisor for use with their particular device. This gives the
+ * IRQ subsystem an opportunity to do interrupt-type-specific
+ * initialization.
+ *
+ * ISSUE: We should modify this API so that registering anything
+ * except percpu interrupts also requires providing callback methods
+ * for enabling and disabling the interrupt. This would allow the
+ * generic IRQ code to proxy enable/disable_irq() calls back into the
+ * PCI subsystem, which in turn could enable or disable the interrupt
+ * at the PCI shim.
+ */
+void tile_irq_activate(unsigned int irq, int tile_irq_type);
+
+void setup_irq_regs(void);
+
+#endif /* _ASM_TILE_IRQ_H */
diff --git a/arch/tile/include/asm/irqflags.h b/arch/tile/include/asm/irqflags.h
new file mode 100644
index 00000000..5db0ce54
--- /dev/null
+++ b/arch/tile/include/asm/irqflags.h
@@ -0,0 +1,282 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _ASM_TILE_IRQFLAGS_H
+#define _ASM_TILE_IRQFLAGS_H
+
+#include <arch/interrupts.h>
+#include <arch/chip.h>
+
+#if !defined(__tilegx__) && defined(__ASSEMBLY__)
+
+/*
+ * The set of interrupts we want to allow when interrupts are nominally
+ * disabled. The remainder are effectively "NMI" interrupts from
+ * the point of view of the generic Linux code. Note that synchronous
+ * interrupts (aka "non-queued") are not blocked by the mask in any case.
+ */
+#if CHIP_HAS_AUX_PERF_COUNTERS()
+#define LINUX_MASKABLE_INTERRUPTS_HI \
+ (~(INT_MASK_HI(INT_PERF_COUNT) | INT_MASK_HI(INT_AUX_PERF_COUNT)))
+#else
+#define LINUX_MASKABLE_INTERRUPTS_HI \
+ (~(INT_MASK_HI(INT_PERF_COUNT)))
+#endif
+
+#else
+
+#if CHIP_HAS_AUX_PERF_COUNTERS()
+#define LINUX_MASKABLE_INTERRUPTS \
+ (~(INT_MASK(INT_PERF_COUNT) | INT_MASK(INT_AUX_PERF_COUNT)))
+#else
+#define LINUX_MASKABLE_INTERRUPTS \
+ (~(INT_MASK(INT_PERF_COUNT)))
+#endif
+
+#endif
+
+#ifndef __ASSEMBLY__
+
+/* NOTE: we can't include <linux/percpu.h> due to #include dependencies. */
+#include <asm/percpu.h>
+#include <arch/spr_def.h>
+
+/* Set and clear kernel interrupt masks. */
+#if CHIP_HAS_SPLIT_INTR_MASK()
+#if INT_PERF_COUNT < 32 || INT_AUX_PERF_COUNT < 32 || INT_MEM_ERROR >= 32
+# error Fix assumptions about which word various interrupts are in
+#endif
+#define interrupt_mask_set(n) do { \
+ int __n = (n); \
+ int __mask = 1 << (__n & 0x1f); \
+ if (__n < 32) \
+ __insn_mtspr(SPR_INTERRUPT_MASK_SET_K_0, __mask); \
+ else \
+ __insn_mtspr(SPR_INTERRUPT_MASK_SET_K_1, __mask); \
+} while (0)
+#define interrupt_mask_reset(n) do { \
+ int __n = (n); \
+ int __mask = 1 << (__n & 0x1f); \
+ if (__n < 32) \
+ __insn_mtspr(SPR_INTERRUPT_MASK_RESET_K_0, __mask); \
+ else \
+ __insn_mtspr(SPR_INTERRUPT_MASK_RESET_K_1, __mask); \
+} while (0)
+#define interrupt_mask_check(n) ({ \
+ int __n = (n); \
+ (((__n < 32) ? \
+ __insn_mfspr(SPR_INTERRUPT_MASK_K_0) : \
+ __insn_mfspr(SPR_INTERRUPT_MASK_K_1)) \
+ >> (__n & 0x1f)) & 1; \
+})
+#define interrupt_mask_set_mask(mask) do { \
+ unsigned long long __m = (mask); \
+ __insn_mtspr(SPR_INTERRUPT_MASK_SET_K_0, (unsigned long)(__m)); \
+ __insn_mtspr(SPR_INTERRUPT_MASK_SET_K_1, (unsigned long)(__m>>32)); \
+} while (0)
+#define interrupt_mask_reset_mask(mask) do { \
+ unsigned long long __m = (mask); \
+ __insn_mtspr(SPR_INTERRUPT_MASK_RESET_K_0, (unsigned long)(__m)); \
+ __insn_mtspr(SPR_INTERRUPT_MASK_RESET_K_1, (unsigned long)(__m>>32)); \
+} while (0)
+#else
+#define interrupt_mask_set(n) \
+ __insn_mtspr(SPR_INTERRUPT_MASK_SET_K, (1UL << (n)))
+#define interrupt_mask_reset(n) \
+ __insn_mtspr(SPR_INTERRUPT_MASK_RESET_K, (1UL << (n)))
+#define interrupt_mask_check(n) \
+ ((__insn_mfspr(SPR_INTERRUPT_MASK_K) >> (n)) & 1)
+#define interrupt_mask_set_mask(mask) \
+ __insn_mtspr(SPR_INTERRUPT_MASK_SET_K, (mask))
+#define interrupt_mask_reset_mask(mask) \
+ __insn_mtspr(SPR_INTERRUPT_MASK_RESET_K, (mask))
+#endif
+
+/*
+ * The set of interrupts we want active if irqs are enabled.
+ * Note that in particular, the tile timer interrupt comes and goes
+ * from this set, since we have no other way to turn off the timer.
+ * Likewise, INTCTRL_K is removed and re-added during device
+ * interrupts, as is the the hardwall UDN_FIREWALL interrupt.
+ * We use a low bit (MEM_ERROR) as our sentinel value and make sure it
+ * is always claimed as an "active interrupt" so we can query that bit
+ * to know our current state.
+ */
+DECLARE_PER_CPU(unsigned long long, interrupts_enabled_mask);
+#define INITIAL_INTERRUPTS_ENABLED INT_MASK(INT_MEM_ERROR)
+
+/* Disable interrupts. */
+#define arch_local_irq_disable() \
+ interrupt_mask_set_mask(LINUX_MASKABLE_INTERRUPTS)
+
+/* Disable all interrupts, including NMIs. */
+#define arch_local_irq_disable_all() \
+ interrupt_mask_set_mask(-1UL)
+
+/* Re-enable all maskable interrupts. */
+#define arch_local_irq_enable() \
+ interrupt_mask_reset_mask(__get_cpu_var(interrupts_enabled_mask))
+
+/* Disable or enable interrupts based on flag argument. */
+#define arch_local_irq_restore(disabled) do { \
+ if (disabled) \
+ arch_local_irq_disable(); \
+ else \
+ arch_local_irq_enable(); \
+} while (0)
+
+/* Return true if "flags" argument means interrupts are disabled. */
+#define arch_irqs_disabled_flags(flags) ((flags) != 0)
+
+/* Return true if interrupts are currently disabled. */
+#define arch_irqs_disabled() interrupt_mask_check(INT_MEM_ERROR)
+
+/* Save whether interrupts are currently disabled. */
+#define arch_local_save_flags() arch_irqs_disabled()
+
+/* Save whether interrupts are currently disabled, then disable them. */
+#define arch_local_irq_save() ({ \
+ unsigned long __flags = arch_local_save_flags(); \
+ arch_local_irq_disable(); \
+ __flags; })
+
+/* Prevent the given interrupt from being enabled next time we enable irqs. */
+#define arch_local_irq_mask(interrupt) \
+ (__get_cpu_var(interrupts_enabled_mask) &= ~INT_MASK(interrupt))
+
+/* Prevent the given interrupt from being enabled immediately. */
+#define arch_local_irq_mask_now(interrupt) do { \
+ arch_local_irq_mask(interrupt); \
+ interrupt_mask_set(interrupt); \
+} while (0)
+
+/* Allow the given interrupt to be enabled next time we enable irqs. */
+#define arch_local_irq_unmask(interrupt) \
+ (__get_cpu_var(interrupts_enabled_mask) |= INT_MASK(interrupt))
+
+/* Allow the given interrupt to be enabled immediately, if !irqs_disabled. */
+#define arch_local_irq_unmask_now(interrupt) do { \
+ arch_local_irq_unmask(interrupt); \
+ if (!irqs_disabled()) \
+ interrupt_mask_reset(interrupt); \
+} while (0)
+
+#else /* __ASSEMBLY__ */
+
+/* We provide a somewhat more restricted set for assembly. */
+
+#ifdef __tilegx__
+
+#if INT_MEM_ERROR != 0
+# error Fix IRQ_DISABLED() macro
+#endif
+
+/* Return 0 or 1 to indicate whether interrupts are currently disabled. */
+#define IRQS_DISABLED(tmp) \
+ mfspr tmp, SPR_INTERRUPT_MASK_K; \
+ andi tmp, tmp, 1
+
+/* Load up a pointer to &interrupts_enabled_mask. */
+#define GET_INTERRUPTS_ENABLED_MASK_PTR(reg) \
+ moveli reg, hw2_last(interrupts_enabled_mask); \
+ shl16insli reg, reg, hw1(interrupts_enabled_mask); \
+ shl16insli reg, reg, hw0(interrupts_enabled_mask); \
+ add reg, reg, tp
+
+/* Disable interrupts. */
+#define IRQ_DISABLE(tmp0, tmp1) \
+ moveli tmp0, hw2_last(LINUX_MASKABLE_INTERRUPTS); \
+ shl16insli tmp0, tmp0, hw1(LINUX_MASKABLE_INTERRUPTS); \
+ shl16insli tmp0, tmp0, hw0(LINUX_MASKABLE_INTERRUPTS); \
+ mtspr SPR_INTERRUPT_MASK_SET_K, tmp0
+
+/* Disable ALL synchronous interrupts (used by NMI entry). */
+#define IRQ_DISABLE_ALL(tmp) \
+ movei tmp, -1; \
+ mtspr SPR_INTERRUPT_MASK_SET_K, tmp
+
+/* Enable interrupts. */
+#define IRQ_ENABLE(tmp0, tmp1) \
+ GET_INTERRUPTS_ENABLED_MASK_PTR(tmp0); \
+ ld tmp0, tmp0; \
+ mtspr SPR_INTERRUPT_MASK_RESET_K, tmp0
+
+#else /* !__tilegx__ */
+
+/*
+ * Return 0 or 1 to indicate whether interrupts are currently disabled.
+ * Note that it's important that we use a bit from the "low" mask word,
+ * since when we are enabling, that is the word we write first, so if we
+ * are interrupted after only writing half of the mask, the interrupt
+ * handler will correctly observe that we have interrupts enabled, and
+ * will enable interrupts itself on return from the interrupt handler
+ * (making the original code's write of the "high" mask word idempotent).
+ */
+#define IRQS_DISABLED(tmp) \
+ mfspr tmp, SPR_INTERRUPT_MASK_K_0; \
+ shri tmp, tmp, INT_MEM_ERROR; \
+ andi tmp, tmp, 1
+
+/* Load up a pointer to &interrupts_enabled_mask. */
+#define GET_INTERRUPTS_ENABLED_MASK_PTR(reg) \
+ moveli reg, lo16(interrupts_enabled_mask); \
+ auli reg, reg, ha16(interrupts_enabled_mask); \
+ add reg, reg, tp
+
+/* Disable interrupts. */
+#define IRQ_DISABLE(tmp0, tmp1) \
+ { \
+ movei tmp0, -1; \
+ moveli tmp1, lo16(LINUX_MASKABLE_INTERRUPTS_HI) \
+ }; \
+ { \
+ mtspr SPR_INTERRUPT_MASK_SET_K_0, tmp0; \
+ auli tmp1, tmp1, ha16(LINUX_MASKABLE_INTERRUPTS_HI) \
+ }; \
+ mtspr SPR_INTERRUPT_MASK_SET_K_1, tmp1
+
+/* Disable ALL synchronous interrupts (used by NMI entry). */
+#define IRQ_DISABLE_ALL(tmp) \
+ movei tmp, -1; \
+ mtspr SPR_INTERRUPT_MASK_SET_K_0, tmp; \
+ mtspr SPR_INTERRUPT_MASK_SET_K_1, tmp
+
+/* Enable interrupts. */
+#define IRQ_ENABLE(tmp0, tmp1) \
+ GET_INTERRUPTS_ENABLED_MASK_PTR(tmp0); \
+ { \
+ lw tmp0, tmp0; \
+ addi tmp1, tmp0, 4 \
+ }; \
+ lw tmp1, tmp1; \
+ mtspr SPR_INTERRUPT_MASK_RESET_K_0, tmp0; \
+ mtspr SPR_INTERRUPT_MASK_RESET_K_1, tmp1
+#endif
+
+/*
+ * Do the CPU's IRQ-state tracing from assembly code. We call a
+ * C function, but almost everywhere we do, we don't mind clobbering
+ * all the caller-saved registers.
+ */
+#ifdef CONFIG_TRACE_IRQFLAGS
+# define TRACE_IRQS_ON jal trace_hardirqs_on
+# define TRACE_IRQS_OFF jal trace_hardirqs_off
+#else
+# define TRACE_IRQS_ON
+# define TRACE_IRQS_OFF
+#endif
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_TILE_IRQFLAGS_H */
diff --git a/arch/tile/include/asm/kexec.h b/arch/tile/include/asm/kexec.h
new file mode 100644
index 00000000..c11a6cc7
--- /dev/null
+++ b/arch/tile/include/asm/kexec.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ *
+ * based on kexec.h from other architectures in linux-2.6.18
+ */
+
+#ifndef _ASM_TILE_KEXEC_H
+#define _ASM_TILE_KEXEC_H
+
+#include <asm/page.h>
+
+/* Maximum physical address we can use pages from. */
+#define KEXEC_SOURCE_MEMORY_LIMIT TASK_SIZE
+/* Maximum address we can reach in physical address mode. */
+#define KEXEC_DESTINATION_MEMORY_LIMIT TASK_SIZE
+/* Maximum address we can use for the control code buffer. */
+#define KEXEC_CONTROL_MEMORY_LIMIT TASK_SIZE
+
+#define KEXEC_CONTROL_PAGE_SIZE PAGE_SIZE
+
+/*
+ * We don't bother to provide a unique identifier, since we can only
+ * reboot with a single type of kernel image anyway.
+ */
+#define KEXEC_ARCH KEXEC_ARCH_DEFAULT
+
+/* Use the tile override for the page allocator. */
+struct page *kimage_alloc_pages_arch(gfp_t gfp_mask, unsigned int order);
+#define kimage_alloc_pages_arch kimage_alloc_pages_arch
+
+#define MAX_NOTE_BYTES 1024
+
+/* Defined in arch/tile/kernel/relocate_kernel.S */
+extern const unsigned char relocate_new_kernel[];
+extern const unsigned long relocate_new_kernel_size;
+extern void relocate_new_kernel_end(void);
+
+/* Provide a dummy definition to avoid build failures. */
+static inline void crash_setup_regs(struct pt_regs *n, struct pt_regs *o)
+{
+}
+
+#endif /* _ASM_TILE_KEXEC_H */
diff --git a/arch/tile/include/asm/kmap_types.h b/arch/tile/include/asm/kmap_types.h
new file mode 100644
index 00000000..3d0f2024
--- /dev/null
+++ b/arch/tile/include/asm/kmap_types.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _ASM_TILE_KMAP_TYPES_H
+#define _ASM_TILE_KMAP_TYPES_H
+
+/*
+ * In 32-bit TILE Linux we have to balance the desire to have a lot of
+ * nested atomic mappings with the fact that large page sizes and many
+ * processors chew up address space quickly. In a typical
+ * 64-processor, 64KB-page layout build, making KM_TYPE_NR one larger
+ * adds 4MB of required address-space. For now we leave KM_TYPE_NR
+ * set to depth 8.
+ */
+enum km_type {
+ KM_TYPE_NR = 8
+};
+
+/*
+ * We provide dummy definitions of all the stray values that used to be
+ * required for kmap_atomic() and no longer are.
+ */
+enum {
+ KM_BOUNCE_READ,
+ KM_SKB_SUNRPC_DATA,
+ KM_SKB_DATA_SOFTIRQ,
+ KM_USER0,
+ KM_USER1,
+ KM_BIO_SRC_IRQ,
+ KM_BIO_DST_IRQ,
+ KM_PTE0,
+ KM_PTE1,
+ KM_IRQ0,
+ KM_IRQ1,
+ KM_SOFTIRQ0,
+ KM_SOFTIRQ1,
+ KM_SYNC_ICACHE,
+ KM_SYNC_DCACHE,
+ KM_UML_USERCOPY,
+ KM_IRQ_PTE,
+ KM_NMI,
+ KM_NMI_PTE,
+ KM_KDB
+};
+
+#endif /* _ASM_TILE_KMAP_TYPES_H */
diff --git a/arch/tile/include/asm/linkage.h b/arch/tile/include/asm/linkage.h
new file mode 100644
index 00000000..e121c397
--- /dev/null
+++ b/arch/tile/include/asm/linkage.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _ASM_TILE_LINKAGE_H
+#define _ASM_TILE_LINKAGE_H
+
+#include <feedback.h>
+
+#define __ALIGN .align 8
+
+/*
+ * The STD_ENTRY and STD_ENDPROC macros put the function in a
+ * self-named .text.foo section, and if linker feedback collection
+ * is enabled, add a suitable call to the feedback collection code.
+ * STD_ENTRY_SECTION lets you specify a non-standard section name.
+ */
+
+#define STD_ENTRY(name) \
+ .pushsection .text.##name, "ax"; \
+ ENTRY(name); \
+ FEEDBACK_ENTER(name)
+
+#define STD_ENTRY_SECTION(name, section) \
+ .pushsection section, "ax"; \
+ ENTRY(name); \
+ FEEDBACK_ENTER_EXPLICIT(name, section, .Lend_##name - name)
+
+#define STD_ENDPROC(name) \
+ ENDPROC(name); \
+ .Lend_##name:; \
+ .popsection
+
+/* Create a file-static function entry set up for feedback gathering. */
+#define STD_ENTRY_LOCAL(name) \
+ .pushsection .text.##name, "ax"; \
+ ALIGN; \
+ name:; \
+ FEEDBACK_ENTER(name)
+
+#endif /* _ASM_TILE_LINKAGE_H */
diff --git a/arch/tile/include/asm/memprof.h b/arch/tile/include/asm/memprof.h
new file mode 100644
index 00000000..359949be
--- /dev/null
+++ b/arch/tile/include/asm/memprof.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ *
+ * The hypervisor's memory controller profiling infrastructure allows
+ * the programmer to find out what fraction of the available memory
+ * bandwidth is being consumed at each memory controller. The
+ * profiler provides start, stop, and clear operations to allows
+ * profiling over a specific time window, as well as an interface for
+ * reading the most recent profile values.
+ *
+ * This header declares IOCTL codes necessary to control memprof.
+ */
+#ifndef _ASM_TILE_MEMPROF_H
+#define _ASM_TILE_MEMPROF_H
+
+#include <linux/ioctl.h>
+
+#define MEMPROF_IOCTL_TYPE 0xB4
+#define MEMPROF_IOCTL_START _IO(MEMPROF_IOCTL_TYPE, 0)
+#define MEMPROF_IOCTL_STOP _IO(MEMPROF_IOCTL_TYPE, 1)
+#define MEMPROF_IOCTL_CLEAR _IO(MEMPROF_IOCTL_TYPE, 2)
+
+#endif /* _ASM_TILE_MEMPROF_H */
diff --git a/arch/tile/include/asm/mman.h b/arch/tile/include/asm/mman.h
new file mode 100644
index 00000000..81b8fc34
--- /dev/null
+++ b/arch/tile/include/asm/mman.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _ASM_TILE_MMAN_H
+#define _ASM_TILE_MMAN_H
+
+#include <asm-generic/mman-common.h>
+#include <arch/chip.h>
+
+/* Standard Linux flags */
+
+#define MAP_POPULATE 0x0040 /* populate (prefault) pagetables */
+#define MAP_NONBLOCK 0x0080 /* do not block on IO */
+#define MAP_GROWSDOWN 0x0100 /* stack-like segment */
+#define MAP_STACK MAP_GROWSDOWN /* provide convenience alias */
+#define MAP_LOCKED 0x0200 /* pages are locked */
+#define MAP_NORESERVE 0x0400 /* don't check for reservations */
+#define MAP_DENYWRITE 0x0800 /* ETXTBSY */
+#define MAP_EXECUTABLE 0x1000 /* mark it as an executable */
+#define MAP_HUGETLB 0x4000 /* create a huge page mapping */
+
+
+/*
+ * Flags for mlockall
+ */
+#define MCL_CURRENT 1 /* lock all current mappings */
+#define MCL_FUTURE 2 /* lock all future mappings */
+
+
+#endif /* _ASM_TILE_MMAN_H */
diff --git a/arch/tile/include/asm/mmu.h b/arch/tile/include/asm/mmu.h
new file mode 100644
index 00000000..92f94c77
--- /dev/null
+++ b/arch/tile/include/asm/mmu.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _ASM_TILE_MMU_H
+#define _ASM_TILE_MMU_H
+
+/* Capture any arch- and mm-specific information. */
+struct mm_context {
+ /*
+ * Written under the mmap_sem semaphore; read without the
+ * semaphore but atomically, but it is conservatively set.
+ */
+ unsigned int priority_cached;
+};
+
+typedef struct mm_context mm_context_t;
+
+void leave_mm(int cpu);
+
+#endif /* _ASM_TILE_MMU_H */
diff --git a/arch/tile/include/asm/mmu_context.h b/arch/tile/include/asm/mmu_context.h
new file mode 100644
index 00000000..15fb2464
--- /dev/null
+++ b/arch/tile/include/asm/mmu_context.h
@@ -0,0 +1,131 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _ASM_TILE_MMU_CONTEXT_H
+#define _ASM_TILE_MMU_CONTEXT_H
+
+#include <linux/smp.h>
+#include <asm/setup.h>
+#include <asm/page.h>
+#include <asm/pgalloc.h>
+#include <asm/pgtable.h>
+#include <asm/tlbflush.h>
+#include <asm/homecache.h>
+#include <asm-generic/mm_hooks.h>
+
+static inline int
+init_new_context(struct task_struct *tsk, struct mm_struct *mm)
+{
+ return 0;
+}
+
+/* Note that arch/tile/kernel/head.S also calls hv_install_context() */
+static inline void __install_page_table(pgd_t *pgdir, int asid, pgprot_t prot)
+{
+ /* FIXME: DIRECTIO should not always be set. FIXME. */
+ int rc = hv_install_context(__pa(pgdir), prot, asid, HV_CTX_DIRECTIO);
+ if (rc < 0)
+ panic("hv_install_context failed: %d", rc);
+}
+
+static inline void install_page_table(pgd_t *pgdir, int asid)
+{
+ pte_t *ptep = virt_to_pte(NULL, (unsigned long)pgdir);
+ __install_page_table(pgdir, asid, *ptep);
+}
+
+/*
+ * "Lazy" TLB mode is entered when we are switching to a kernel task,
+ * which borrows the mm of the previous task. The goal of this
+ * optimization is to avoid having to install a new page table. On
+ * early x86 machines (where the concept originated) you couldn't do
+ * anything short of a full page table install for invalidation, so
+ * handling a remote TLB invalidate required doing a page table
+ * re-install. Someone clearly decided that it was silly to keep
+ * doing this while in "lazy" TLB mode, so the optimization involves
+ * installing the swapper page table instead the first time one
+ * occurs, and clearing the cpu out of cpu_vm_mask, so the cpu running
+ * the kernel task doesn't need to take any more interrupts. At that
+ * point it's then necessary to explicitly reinstall it when context
+ * switching back to the original mm.
+ *
+ * On Tile, we have to do a page-table install whenever DMA is enabled,
+ * so in that case lazy mode doesn't help anyway. And more generally,
+ * we have efficient per-page TLB shootdown, and don't expect to spend
+ * that much time in kernel tasks in general, so just leaving the
+ * kernel task borrowing the old page table, but handling TLB
+ * shootdowns, is a reasonable thing to do. And importantly, this
+ * lets us use the hypervisor's internal APIs for TLB shootdown, which
+ * means we don't have to worry about having TLB shootdowns blocked
+ * when Linux is disabling interrupts; see the page migration code for
+ * an example of where it's important for TLB shootdowns to complete
+ * even when interrupts are disabled at the Linux level.
+ */
+static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *t)
+{
+#if CHIP_HAS_TILE_DMA()
+ /*
+ * We have to do an "identity" page table switch in order to
+ * clear any pending DMA interrupts.
+ */
+ if (current->thread.tile_dma_state.enabled)
+ install_page_table(mm->pgd, __get_cpu_var(current_asid));
+#endif
+}
+
+static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
+ struct task_struct *tsk)
+{
+ if (likely(prev != next)) {
+
+ int cpu = smp_processor_id();
+
+ /* Pick new ASID. */
+ int asid = __get_cpu_var(current_asid) + 1;
+ if (asid > max_asid) {
+ asid = min_asid;
+ local_flush_tlb();
+ }
+ __get_cpu_var(current_asid) = asid;
+
+ /* Clear cpu from the old mm, and set it in the new one. */
+ cpumask_clear_cpu(cpu, mm_cpumask(prev));
+ cpumask_set_cpu(cpu, mm_cpumask(next));
+
+ /* Re-load page tables */
+ install_page_table(next->pgd, asid);
+
+ /* See how we should set the red/black cache info */
+ check_mm_caching(prev, next);
+
+ /*
+ * Since we're changing to a new mm, we have to flush
+ * the icache in case some physical page now being mapped
+ * has subsequently been repurposed and has new code.
+ */
+ __flush_icache();
+
+ }
+}
+
+static inline void activate_mm(struct mm_struct *prev_mm,
+ struct mm_struct *next_mm)
+{
+ switch_mm(prev_mm, next_mm, NULL);
+}
+
+#define destroy_context(mm) do { } while (0)
+#define deactivate_mm(tsk, mm) do { } while (0)
+
+#endif /* _ASM_TILE_MMU_CONTEXT_H */
diff --git a/arch/tile/include/asm/mmzone.h b/arch/tile/include/asm/mmzone.h
new file mode 100644
index 00000000..9d3dbce8
--- /dev/null
+++ b/arch/tile/include/asm/mmzone.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _ASM_TILE_MMZONE_H
+#define _ASM_TILE_MMZONE_H
+
+extern struct pglist_data node_data[];
+#define NODE_DATA(nid) (&node_data[nid])
+
+extern void get_memcfg_numa(void);
+
+#ifdef CONFIG_DISCONTIGMEM
+
+#include <asm/page.h>
+
+/*
+ * Generally, memory ranges are always doled out by the hypervisor in
+ * fixed-size, power-of-two increments. That would make computing the node
+ * very easy. We could just take a couple high bits of the PA, which
+ * denote the memory shim, and we'd be done. However, when we're doing
+ * memory striping, this may not be true; PAs with different high bit
+ * values might be in the same node. Thus, we keep a lookup table to
+ * translate the high bits of the PFN to the node number.
+ */
+extern int highbits_to_node[];
+
+static inline int pfn_to_nid(unsigned long pfn)
+{
+ return highbits_to_node[__pfn_to_highbits(pfn)];
+}
+
+#define kern_addr_valid(kaddr) virt_addr_valid((void *)kaddr)
+
+static inline int pfn_valid(int pfn)
+{
+ int nid = pfn_to_nid(pfn);
+
+ if (nid >= 0)
+ return (pfn < node_end_pfn(nid));
+ return 0;
+}
+
+/* Information on the NUMA nodes that we compute early */
+extern unsigned long node_start_pfn[];
+extern unsigned long node_end_pfn[];
+extern unsigned long node_memmap_pfn[];
+extern unsigned long node_percpu_pfn[];
+extern unsigned long node_free_pfn[];
+#ifdef CONFIG_HIGHMEM
+extern unsigned long node_lowmem_end_pfn[];
+#endif
+#ifdef CONFIG_PCI
+extern unsigned long pci_reserve_start_pfn;
+extern unsigned long pci_reserve_end_pfn;
+#endif
+
+#endif /* CONFIG_DISCONTIGMEM */
+
+#endif /* _ASM_TILE_MMZONE_H */
diff --git a/arch/tile/include/asm/page.h b/arch/tile/include/asm/page.h
new file mode 100644
index 00000000..db93518f
--- /dev/null
+++ b/arch/tile/include/asm/page.h
@@ -0,0 +1,336 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _ASM_TILE_PAGE_H
+#define _ASM_TILE_PAGE_H
+
+#include <linux/const.h>
+#include <hv/hypervisor.h>
+#include <arch/chip.h>
+
+/* PAGE_SHIFT and HPAGE_SHIFT determine the page sizes. */
+#define PAGE_SHIFT HV_LOG2_PAGE_SIZE_SMALL
+#define HPAGE_SHIFT HV_LOG2_PAGE_SIZE_LARGE
+
+#define PAGE_SIZE (_AC(1, UL) << PAGE_SHIFT)
+#define HPAGE_SIZE (_AC(1, UL) << HPAGE_SHIFT)
+
+#define PAGE_MASK (~(PAGE_SIZE - 1))
+#define HPAGE_MASK (~(HPAGE_SIZE - 1))
+
+/*
+ * If the Kconfig doesn't specify, set a maximum zone order that
+ * is enough so that we can create huge pages from small pages given
+ * the respective sizes of the two page types. See <linux/mmzone.h>.
+ */
+#ifndef CONFIG_FORCE_MAX_ZONEORDER
+#define CONFIG_FORCE_MAX_ZONEORDER (HPAGE_SHIFT - PAGE_SHIFT + 1)
+#endif
+
+#ifndef __ASSEMBLY__
+
+#include <linux/types.h>
+#include <linux/string.h>
+
+struct page;
+
+static inline void clear_page(void *page)
+{
+ memset(page, 0, PAGE_SIZE);
+}
+
+static inline void copy_page(void *to, void *from)
+{
+ memcpy(to, from, PAGE_SIZE);
+}
+
+static inline void clear_user_page(void *page, unsigned long vaddr,
+ struct page *pg)
+{
+ clear_page(page);
+}
+
+static inline void copy_user_page(void *to, void *from, unsigned long vaddr,
+ struct page *topage)
+{
+ copy_page(to, from);
+}
+
+/*
+ * Hypervisor page tables are made of the same basic structure.
+ */
+
+typedef HV_PTE pte_t;
+typedef HV_PTE pgd_t;
+typedef HV_PTE pgprot_t;
+
+/*
+ * User L2 page tables are managed as one L2 page table per page,
+ * because we use the page allocator for them. This keeps the allocation
+ * simple and makes it potentially useful to implement HIGHPTE at some point.
+ * However, it's also inefficient, since L2 page tables are much smaller
+ * than pages (currently 2KB vs 64KB). So we should revisit this.
+ */
+typedef struct page *pgtable_t;
+
+/* Must be a macro since it is used to create constants. */
+#define __pgprot(val) hv_pte(val)
+
+/* Rarely-used initializers, typically with a "zero" value. */
+#define __pte(x) hv_pte(x)
+#define __pgd(x) hv_pte(x)
+
+static inline u64 pgprot_val(pgprot_t pgprot)
+{
+ return hv_pte_val(pgprot);
+}
+
+static inline u64 pte_val(pte_t pte)
+{
+ return hv_pte_val(pte);
+}
+
+static inline u64 pgd_val(pgd_t pgd)
+{
+ return hv_pte_val(pgd);
+}
+
+#ifdef __tilegx__
+
+typedef HV_PTE pmd_t;
+
+#define __pmd(x) hv_pte(x)
+
+static inline u64 pmd_val(pmd_t pmd)
+{
+ return hv_pte_val(pmd);
+}
+
+#endif
+
+static inline __attribute_const__ int get_order(unsigned long size)
+{
+ return BITS_PER_LONG - __builtin_clzl((size - 1) >> PAGE_SHIFT);
+}
+
+#endif /* !__ASSEMBLY__ */
+
+#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT)
+
+#define HUGE_MAX_HSTATE 2
+
+#ifdef CONFIG_HUGETLB_PAGE
+#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
+#endif
+
+/* Each memory controller has PAs distinct in their high bits. */
+#define NR_PA_HIGHBIT_SHIFT (CHIP_PA_WIDTH() - CHIP_LOG_NUM_MSHIMS())
+#define NR_PA_HIGHBIT_VALUES (1 << CHIP_LOG_NUM_MSHIMS())
+#define __pa_to_highbits(pa) ((phys_addr_t)(pa) >> NR_PA_HIGHBIT_SHIFT)
+#define __pfn_to_highbits(pfn) ((pfn) >> (NR_PA_HIGHBIT_SHIFT - PAGE_SHIFT))
+
+#ifdef __tilegx__
+
+/*
+ * We reserve the lower half of memory for user-space programs, and the
+ * upper half for system code. We re-map all of physical memory in the
+ * upper half, which takes a quarter of our VA space. Then we have
+ * the vmalloc regions. The supervisor code lives at 0xfffffff700000000,
+ * with the hypervisor above that.
+ *
+ * Loadable kernel modules are placed immediately after the static
+ * supervisor code, with each being allocated a 256MB region of
+ * address space, so we don't have to worry about the range of "jal"
+ * and other branch instructions.
+ *
+ * For now we keep life simple and just allocate one pmd (4GB) for vmalloc.
+ * Similarly, for now we don't play any struct page mapping games.
+ */
+
+#if CHIP_PA_WIDTH() + 2 > CHIP_VA_WIDTH()
+# error Too much PA to map with the VA available!
+#endif
+#define HALF_VA_SPACE (_AC(1, UL) << (CHIP_VA_WIDTH() - 1))
+
+#define MEM_LOW_END (HALF_VA_SPACE - 1) /* low half */
+#define MEM_HIGH_START (-HALF_VA_SPACE) /* high half */
+#define PAGE_OFFSET MEM_HIGH_START
+#define _VMALLOC_START _AC(0xfffffff500000000, UL) /* 4 GB */
+#define HUGE_VMAP_BASE _AC(0xfffffff600000000, UL) /* 4 GB */
+#define MEM_SV_START _AC(0xfffffff700000000, UL) /* 256 MB */
+#define MEM_SV_INTRPT MEM_SV_START
+#define MEM_MODULE_START _AC(0xfffffff710000000, UL) /* 256 MB */
+#define MEM_MODULE_END (MEM_MODULE_START + (256*1024*1024))
+#define MEM_HV_START _AC(0xfffffff800000000, UL) /* 32 GB */
+
+/* Highest DTLB address we will use */
+#define KERNEL_HIGH_VADDR MEM_SV_START
+
+/* Since we don't currently provide any fixmaps, we use an impossible VA. */
+#define FIXADDR_TOP MEM_HV_START
+
+#else /* !__tilegx__ */
+
+/*
+ * A PAGE_OFFSET of 0xC0000000 means that the kernel has
+ * a virtual address space of one gigabyte, which limits the
+ * amount of physical memory you can use to about 768MB.
+ * If you want more physical memory than this then see the CONFIG_HIGHMEM
+ * option in the kernel configuration.
+ *
+ * The top 16MB chunk in the table below is unavailable to Linux. Since
+ * the kernel interrupt vectors must live at ether 0xfe000000 or 0xfd000000
+ * (depending on whether the kernel is at PL2 or Pl1), we map all of the
+ * bottom of RAM at this address with a huge page table entry to minimize
+ * its ITLB footprint (as well as at PAGE_OFFSET). The last architected
+ * requirement is that user interrupt vectors live at 0xfc000000, so we
+ * make that range of memory available to user processes. The remaining
+ * regions are sized as shown; the first four addresses use the PL 1
+ * values, and after that, we show "typical" values, since the actual
+ * addresses depend on kernel #defines.
+ *
+ * MEM_HV_INTRPT 0xfe000000
+ * MEM_SV_INTRPT (kernel code) 0xfd000000
+ * MEM_USER_INTRPT (user vector) 0xfc000000
+ * FIX_KMAP_xxx 0xf8000000 (via NR_CPUS * KM_TYPE_NR)
+ * PKMAP_BASE 0xf7000000 (via LAST_PKMAP)
+ * HUGE_VMAP 0xf3000000 (via CONFIG_NR_HUGE_VMAPS)
+ * VMALLOC_START 0xf0000000 (via __VMALLOC_RESERVE)
+ * mapped LOWMEM 0xc0000000
+ */
+
+#define MEM_USER_INTRPT _AC(0xfc000000, UL)
+#if CONFIG_KERNEL_PL == 1
+#define MEM_SV_INTRPT _AC(0xfd000000, UL)
+#define MEM_HV_INTRPT _AC(0xfe000000, UL)
+#else
+#define MEM_GUEST_INTRPT _AC(0xfd000000, UL)
+#define MEM_SV_INTRPT _AC(0xfe000000, UL)
+#define MEM_HV_INTRPT _AC(0xff000000, UL)
+#endif
+
+#define INTRPT_SIZE 0x4000
+
+/* Tolerate page size larger than the architecture interrupt region size. */
+#if PAGE_SIZE > INTRPT_SIZE
+#undef INTRPT_SIZE
+#define INTRPT_SIZE PAGE_SIZE
+#endif
+
+#define KERNEL_HIGH_VADDR MEM_USER_INTRPT
+#define FIXADDR_TOP (KERNEL_HIGH_VADDR - PAGE_SIZE)
+
+#define PAGE_OFFSET _AC(CONFIG_PAGE_OFFSET, UL)
+
+/* On 32-bit architectures we mix kernel modules in with other vmaps. */
+#define MEM_MODULE_START VMALLOC_START
+#define MEM_MODULE_END VMALLOC_END
+
+#endif /* __tilegx__ */
+
+#ifndef __ASSEMBLY__
+
+#ifdef CONFIG_HIGHMEM
+
+/* Map kernel virtual addresses to page frames, in HPAGE_SIZE chunks. */
+extern unsigned long pbase_map[];
+extern void *vbase_map[];
+
+static inline unsigned long kaddr_to_pfn(const volatile void *_kaddr)
+{
+ unsigned long kaddr = (unsigned long)_kaddr;
+ return pbase_map[kaddr >> HPAGE_SHIFT] +
+ ((kaddr & (HPAGE_SIZE - 1)) >> PAGE_SHIFT);
+}
+
+static inline void *pfn_to_kaddr(unsigned long pfn)
+{
+ return vbase_map[__pfn_to_highbits(pfn)] + (pfn << PAGE_SHIFT);
+}
+
+static inline phys_addr_t virt_to_phys(const volatile void *kaddr)
+{
+ unsigned long pfn = kaddr_to_pfn(kaddr);
+ return ((phys_addr_t)pfn << PAGE_SHIFT) +
+ ((unsigned long)kaddr & (PAGE_SIZE-1));
+}
+
+static inline void *phys_to_virt(phys_addr_t paddr)
+{
+ return pfn_to_kaddr(paddr >> PAGE_SHIFT) + (paddr & (PAGE_SIZE-1));
+}
+
+/* With HIGHMEM, we pack PAGE_OFFSET through high_memory with all valid VAs. */
+static inline int virt_addr_valid(const volatile void *kaddr)
+{
+ extern void *high_memory; /* copied from <linux/mm.h> */
+ return ((unsigned long)kaddr >= PAGE_OFFSET && kaddr < high_memory);
+}
+
+#else /* !CONFIG_HIGHMEM */
+
+static inline unsigned long kaddr_to_pfn(const volatile void *kaddr)
+{
+ return ((unsigned long)kaddr - PAGE_OFFSET) >> PAGE_SHIFT;
+}
+
+static inline void *pfn_to_kaddr(unsigned long pfn)
+{
+ return (void *)((pfn << PAGE_SHIFT) + PAGE_OFFSET);
+}
+
+static inline phys_addr_t virt_to_phys(const volatile void *kaddr)
+{
+ return (phys_addr_t)((unsigned long)kaddr - PAGE_OFFSET);
+}
+
+static inline void *phys_to_virt(phys_addr_t paddr)
+{
+ return (void *)((unsigned long)paddr + PAGE_OFFSET);
+}
+
+/* Check that the given address is within some mapped range of PAs. */
+#define virt_addr_valid(kaddr) pfn_valid(kaddr_to_pfn(kaddr))
+
+#endif /* !CONFIG_HIGHMEM */
+
+/* All callers are not consistent in how they call these functions. */
+#define __pa(kaddr) virt_to_phys((void *)(unsigned long)(kaddr))
+#define __va(paddr) phys_to_virt((phys_addr_t)(paddr))
+
+extern int devmem_is_allowed(unsigned long pagenr);
+
+#ifdef CONFIG_FLATMEM
+static inline int pfn_valid(unsigned long pfn)
+{
+ return pfn < max_mapnr;
+}
+#endif
+
+/* Provide as macros since these require some other headers included. */
+#define page_to_pa(page) ((phys_addr_t)(page_to_pfn(page)) << PAGE_SHIFT)
+#define virt_to_page(kaddr) pfn_to_page(kaddr_to_pfn((void *)(kaddr)))
+#define page_to_virt(page) pfn_to_kaddr(page_to_pfn(page))
+
+struct mm_struct;
+extern pte_t *virt_to_pte(struct mm_struct *mm, unsigned long addr);
+
+#endif /* !__ASSEMBLY__ */
+
+#define VM_DATA_DEFAULT_FLAGS \
+ (VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+
+#include <asm-generic/memory_model.h>
+
+#endif /* _ASM_TILE_PAGE_H */
diff --git a/arch/tile/include/asm/pci.h b/arch/tile/include/asm/pci.h
new file mode 100644
index 00000000..32e6cbe8
--- /dev/null
+++ b/arch/tile/include/asm/pci.h
@@ -0,0 +1,97 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _ASM_TILE_PCI_H
+#define _ASM_TILE_PCI_H
+
+#include <linux/pci.h>
+#include <asm-generic/pci_iomap.h>
+
+/*
+ * Structure of a PCI controller (host bridge)
+ */
+struct pci_controller {
+ int index; /* PCI domain number */
+ struct pci_bus *root_bus;
+
+ int first_busno;
+ int last_busno;
+
+ int hv_cfg_fd[2]; /* config{0,1} fds for this PCIe controller */
+ int hv_mem_fd; /* fd to Hypervisor for MMIO operations */
+
+ struct pci_ops *ops;
+
+ int irq_base; /* Base IRQ from the Hypervisor */
+ int plx_gen1; /* flag for PLX Gen 1 configuration */
+
+ /* Address ranges that are routed to this controller/bridge. */
+ struct resource mem_resources[3];
+};
+
+/*
+ * The hypervisor maps the entirety of CPA-space as bus addresses, so
+ * bus addresses are physical addresses. The networking and block
+ * device layers use this boolean for bounce buffer decisions.
+ */
+#define PCI_DMA_BUS_IS_PHYS 1
+
+int __init tile_pci_init(void);
+int __init pcibios_init(void);
+
+static inline void pci_iounmap(struct pci_dev *dev, void __iomem *addr) {}
+
+void __devinit pcibios_fixup_bus(struct pci_bus *bus);
+
+#define TILE_NUM_PCIE 2
+
+#define pci_domain_nr(bus) (((struct pci_controller *)(bus)->sysdata)->index)
+
+/*
+ * This decides whether to display the domain number in /proc.
+ */
+static inline int pci_proc_domain(struct pci_bus *bus)
+{
+ return 1;
+}
+
+/*
+ * pcibios_assign_all_busses() tells whether or not the bus numbers
+ * should be reassigned, in case the BIOS didn't do it correctly, or
+ * in case we don't have a BIOS and we want to let Linux do it.
+ */
+static inline int pcibios_assign_all_busses(void)
+{
+ return 1;
+}
+
+#define PCIBIOS_MIN_MEM 0
+#define PCIBIOS_MIN_IO 0
+
+/*
+ * This flag tells if the platform is TILEmpower that needs
+ * special configuration for the PLX switch chip.
+ */
+extern int tile_plx_gen1;
+
+/* Use any cpu for PCI. */
+#define cpumask_of_pcibus(bus) cpu_online_mask
+
+/* implement the pci_ DMA API in terms of the generic device dma_ one */
+#include <asm-generic/pci-dma-compat.h>
+
+/* generic pci stuff */
+#include <asm-generic/pci.h>
+
+#endif /* _ASM_TILE_PCI_H */
diff --git a/arch/tile/include/asm/percpu.h b/arch/tile/include/asm/percpu.h
new file mode 100644
index 00000000..63294f5a
--- /dev/null
+++ b/arch/tile/include/asm/percpu.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _ASM_TILE_PERCPU_H
+#define _ASM_TILE_PERCPU_H
+
+register unsigned long __my_cpu_offset __asm__("tp");
+#define __my_cpu_offset __my_cpu_offset
+#define set_my_cpu_offset(tp) (__my_cpu_offset = (tp))
+
+#include <asm-generic/percpu.h>
+
+#endif /* _ASM_TILE_PERCPU_H */
diff --git a/arch/tile/include/asm/pgalloc.h b/arch/tile/include/asm/pgalloc.h
new file mode 100644
index 00000000..e919c0bd
--- /dev/null
+++ b/arch/tile/include/asm/pgalloc.h
@@ -0,0 +1,122 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _ASM_TILE_PGALLOC_H
+#define _ASM_TILE_PGALLOC_H
+
+#include <linux/threads.h>
+#include <linux/mm.h>
+#include <linux/mmzone.h>
+#include <asm/fixmap.h>
+#include <hv/hypervisor.h>
+
+/* Bits for the size of the second-level page table. */
+#define L2_KERNEL_PGTABLE_SHIFT \
+ (HV_LOG2_PAGE_SIZE_LARGE - HV_LOG2_PAGE_SIZE_SMALL + HV_LOG2_PTE_SIZE)
+
+/* We currently allocate user L2 page tables by page (unlike kernel L2s). */
+#if L2_KERNEL_PGTABLE_SHIFT < HV_LOG2_PAGE_SIZE_SMALL
+#define L2_USER_PGTABLE_SHIFT HV_LOG2_PAGE_SIZE_SMALL
+#else
+#define L2_USER_PGTABLE_SHIFT L2_KERNEL_PGTABLE_SHIFT
+#endif
+
+/* How many pages do we need, as an "order", for a user L2 page table? */
+#define L2_USER_PGTABLE_ORDER (L2_USER_PGTABLE_SHIFT - HV_LOG2_PAGE_SIZE_SMALL)
+
+/* How big is a kernel L2 page table? */
+#define L2_KERNEL_PGTABLE_SIZE (1 << L2_KERNEL_PGTABLE_SHIFT)
+
+static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
+{
+#ifdef CONFIG_64BIT
+ set_pte(pmdp, pmd);
+#else
+ set_pte(&pmdp->pud.pgd, pmd.pud.pgd);
+#endif
+}
+
+static inline void pmd_populate_kernel(struct mm_struct *mm,
+ pmd_t *pmd, pte_t *ptep)
+{
+ set_pmd(pmd, ptfn_pmd(__pa(ptep) >> HV_LOG2_PAGE_TABLE_ALIGN,
+ __pgprot(_PAGE_PRESENT)));
+}
+
+static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
+ pgtable_t page)
+{
+ set_pmd(pmd, ptfn_pmd(HV_PFN_TO_PTFN(page_to_pfn(page)),
+ __pgprot(_PAGE_PRESENT)));
+}
+
+/*
+ * Allocate and free page tables.
+ */
+
+extern pgd_t *pgd_alloc(struct mm_struct *mm);
+extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
+
+extern pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address);
+extern void pte_free(struct mm_struct *mm, struct page *pte);
+
+#define pmd_pgtable(pmd) pmd_page(pmd)
+
+static inline pte_t *
+pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
+{
+ return pfn_to_kaddr(page_to_pfn(pte_alloc_one(mm, address)));
+}
+
+static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
+{
+ BUG_ON((unsigned long)pte & (PAGE_SIZE-1));
+ pte_free(mm, virt_to_page(pte));
+}
+
+extern void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte,
+ unsigned long address);
+
+#define check_pgt_cache() do { } while (0)
+
+/*
+ * Get the small-page pte_t lowmem entry for a given pfn.
+ * This may or may not be in use, depending on whether the initial
+ * huge-page entry for the page has already been shattered.
+ */
+pte_t *get_prealloc_pte(unsigned long pfn);
+
+/* During init, we can shatter kernel huge pages if needed. */
+void shatter_pmd(pmd_t *pmd);
+
+/* After init, a more complex technique is required. */
+void shatter_huge_page(unsigned long addr);
+
+#ifdef __tilegx__
+/* We share a single page allocator for both L1 and L2 page tables. */
+#if HV_L1_SIZE != HV_L2_SIZE
+# error Rework assumption that L1 and L2 page tables are same size.
+#endif
+#define L1_USER_PGTABLE_ORDER L2_USER_PGTABLE_ORDER
+#define pud_populate(mm, pud, pmd) \
+ pmd_populate_kernel((mm), (pmd_t *)(pud), (pte_t *)(pmd))
+#define pmd_alloc_one(mm, addr) \
+ ((pmd_t *)page_to_virt(pte_alloc_one((mm), (addr))))
+#define pmd_free(mm, pmdp) \
+ pte_free((mm), virt_to_page(pmdp))
+#define __pmd_free_tlb(tlb, pmdp, address) \
+ __pte_free_tlb((tlb), virt_to_page(pmdp), (address))
+#endif
+
+#endif /* _ASM_TILE_PGALLOC_H */
diff --git a/arch/tile/include/asm/pgtable.h b/arch/tile/include/asm/pgtable.h
new file mode 100644
index 00000000..67490910
--- /dev/null
+++ b/arch/tile/include/asm/pgtable.h
@@ -0,0 +1,465 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ *
+ * This file contains the functions and defines necessary to modify and use
+ * the TILE page table tree.
+ */
+
+#ifndef _ASM_TILE_PGTABLE_H
+#define _ASM_TILE_PGTABLE_H
+
+#include <hv/hypervisor.h>
+
+#ifndef __ASSEMBLY__
+
+#include <linux/bitops.h>
+#include <linux/threads.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <asm/processor.h>
+#include <asm/fixmap.h>
+
+struct mm_struct;
+struct vm_area_struct;
+
+/*
+ * ZERO_PAGE is a global shared page that is always zero: used
+ * for zero-mapped memory areas etc..
+ */
+extern unsigned long empty_zero_page[PAGE_SIZE/sizeof(unsigned long)];
+#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
+
+extern pgd_t swapper_pg_dir[];
+extern pgprot_t swapper_pgprot;
+extern struct kmem_cache *pgd_cache;
+extern spinlock_t pgd_lock;
+extern struct list_head pgd_list;
+
+/*
+ * The very last slots in the pgd_t are for addresses unusable by Linux
+ * (pgd_addr_invalid() returns true). So we use them for the list structure.
+ * The x86 code we are modelled on uses the page->private/index fields
+ * (older 2.6 kernels) or the lru list (newer 2.6 kernels), but since
+ * our pgds are so much smaller than a page, it seems a waste to
+ * spend a whole page on each pgd.
+ */
+#define PGD_LIST_OFFSET \
+ ((PTRS_PER_PGD * sizeof(pgd_t)) - sizeof(struct list_head))
+#define pgd_to_list(pgd) \
+ ((struct list_head *)((char *)(pgd) + PGD_LIST_OFFSET))
+#define list_to_pgd(list) \
+ ((pgd_t *)((char *)(list) - PGD_LIST_OFFSET))
+
+extern void pgtable_cache_init(void);
+extern void paging_init(void);
+extern void set_page_homes(void);
+
+#define FIRST_USER_ADDRESS 0
+
+#define _PAGE_PRESENT HV_PTE_PRESENT
+#define _PAGE_HUGE_PAGE HV_PTE_PAGE
+#define _PAGE_READABLE HV_PTE_READABLE
+#define _PAGE_WRITABLE HV_PTE_WRITABLE
+#define _PAGE_EXECUTABLE HV_PTE_EXECUTABLE
+#define _PAGE_ACCESSED HV_PTE_ACCESSED
+#define _PAGE_DIRTY HV_PTE_DIRTY
+#define _PAGE_GLOBAL HV_PTE_GLOBAL
+#define _PAGE_USER HV_PTE_USER
+
+/*
+ * All the "standard" bits. Cache-control bits are managed elsewhere.
+ * This is used to test for valid level-2 page table pointers by checking
+ * all the bits, and to mask away the cache control bits for mprotect.
+ */
+#define _PAGE_ALL (\
+ _PAGE_PRESENT | \
+ _PAGE_HUGE_PAGE | \
+ _PAGE_READABLE | \
+ _PAGE_WRITABLE | \
+ _PAGE_EXECUTABLE | \
+ _PAGE_ACCESSED | \
+ _PAGE_DIRTY | \
+ _PAGE_GLOBAL | \
+ _PAGE_USER \
+)
+
+#define PAGE_NONE \
+ __pgprot(_PAGE_PRESENT | _PAGE_ACCESSED)
+#define PAGE_SHARED \
+ __pgprot(_PAGE_PRESENT | _PAGE_READABLE | _PAGE_WRITABLE | \
+ _PAGE_USER | _PAGE_ACCESSED)
+
+#define PAGE_SHARED_EXEC \
+ __pgprot(_PAGE_PRESENT | _PAGE_READABLE | _PAGE_WRITABLE | \
+ _PAGE_EXECUTABLE | _PAGE_USER | _PAGE_ACCESSED)
+#define PAGE_COPY_NOEXEC \
+ __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_READABLE)
+#define PAGE_COPY_EXEC \
+ __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | \
+ _PAGE_READABLE | _PAGE_EXECUTABLE)
+#define PAGE_COPY \
+ PAGE_COPY_NOEXEC
+#define PAGE_READONLY \
+ __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_READABLE)
+#define PAGE_READONLY_EXEC \
+ __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | \
+ _PAGE_READABLE | _PAGE_EXECUTABLE)
+
+#define _PAGE_KERNEL_RO \
+ (_PAGE_PRESENT | _PAGE_GLOBAL | _PAGE_READABLE | _PAGE_ACCESSED)
+#define _PAGE_KERNEL \
+ (_PAGE_KERNEL_RO | _PAGE_WRITABLE | _PAGE_DIRTY)
+#define _PAGE_KERNEL_EXEC (_PAGE_KERNEL_RO | _PAGE_EXECUTABLE)
+
+#define PAGE_KERNEL __pgprot(_PAGE_KERNEL)
+#define PAGE_KERNEL_RO __pgprot(_PAGE_KERNEL_RO)
+#define PAGE_KERNEL_EXEC __pgprot(_PAGE_KERNEL_EXEC)
+
+#define page_to_kpgprot(p) PAGE_KERNEL
+
+/*
+ * We could tighten these up, but for now writable or executable
+ * implies readable.
+ */
+#define __P000 PAGE_NONE
+#define __P001 PAGE_READONLY
+#define __P010 PAGE_COPY /* this is write-only, which we won't support */
+#define __P011 PAGE_COPY
+#define __P100 PAGE_READONLY_EXEC
+#define __P101 PAGE_READONLY_EXEC
+#define __P110 PAGE_COPY_EXEC
+#define __P111 PAGE_COPY_EXEC
+
+#define __S000 PAGE_NONE
+#define __S001 PAGE_READONLY
+#define __S010 PAGE_SHARED
+#define __S011 PAGE_SHARED
+#define __S100 PAGE_READONLY_EXEC
+#define __S101 PAGE_READONLY_EXEC
+#define __S110 PAGE_SHARED_EXEC
+#define __S111 PAGE_SHARED_EXEC
+
+/*
+ * All the normal _PAGE_ALL bits are ignored for PMDs, except PAGE_PRESENT
+ * and PAGE_HUGE_PAGE, which must be one and zero, respectively.
+ * We set the ignored bits to zero.
+ */
+#define _PAGE_TABLE _PAGE_PRESENT
+
+/* Inherit the caching flags from the old protection bits. */
+#define pgprot_modify(oldprot, newprot) \
+ (pgprot_t) { ((oldprot).val & ~_PAGE_ALL) | (newprot).val }
+
+/* Just setting the PFN to zero suffices. */
+#define pte_pgprot(x) hv_pte_set_pfn((x), 0)
+
+/*
+ * For PTEs and PDEs, we must clear the Present bit first when
+ * clearing a page table entry, so clear the bottom half first and
+ * enforce ordering with a barrier.
+ */
+static inline void __pte_clear(pte_t *ptep)
+{
+#ifdef __tilegx__
+ ptep->val = 0;
+#else
+ u32 *tmp = (u32 *)ptep;
+ tmp[0] = 0;
+ barrier();
+ tmp[1] = 0;
+#endif
+}
+#define pte_clear(mm, addr, ptep) __pte_clear(ptep)
+
+/*
+ * The following only work if pte_present() is true.
+ * Undefined behaviour if not..
+ */
+#define pte_present hv_pte_get_present
+#define pte_user hv_pte_get_user
+#define pte_read hv_pte_get_readable
+#define pte_dirty hv_pte_get_dirty
+#define pte_young hv_pte_get_accessed
+#define pte_write hv_pte_get_writable
+#define pte_exec hv_pte_get_executable
+#define pte_huge hv_pte_get_page
+#define pte_rdprotect hv_pte_clear_readable
+#define pte_exprotect hv_pte_clear_executable
+#define pte_mkclean hv_pte_clear_dirty
+#define pte_mkold hv_pte_clear_accessed
+#define pte_wrprotect hv_pte_clear_writable
+#define pte_mksmall hv_pte_clear_page
+#define pte_mkread hv_pte_set_readable
+#define pte_mkexec hv_pte_set_executable
+#define pte_mkdirty hv_pte_set_dirty
+#define pte_mkyoung hv_pte_set_accessed
+#define pte_mkwrite hv_pte_set_writable
+#define pte_mkhuge hv_pte_set_page
+
+#define pte_special(pte) 0
+#define pte_mkspecial(pte) (pte)
+
+/*
+ * Use some spare bits in the PTE for user-caching tags.
+ */
+#define pte_set_forcecache hv_pte_set_client0
+#define pte_get_forcecache hv_pte_get_client0
+#define pte_clear_forcecache hv_pte_clear_client0
+#define pte_set_anyhome hv_pte_set_client1
+#define pte_get_anyhome hv_pte_get_client1
+#define pte_clear_anyhome hv_pte_clear_client1
+
+/*
+ * A migrating PTE has PAGE_PRESENT clear but all the other bits preserved.
+ */
+#define pte_migrating hv_pte_get_migrating
+#define pte_mkmigrate(x) hv_pte_set_migrating(hv_pte_clear_present(x))
+#define pte_donemigrate(x) hv_pte_set_present(hv_pte_clear_migrating(x))
+
+#define pte_ERROR(e) \
+ pr_err("%s:%d: bad pte 0x%016llx.\n", __FILE__, __LINE__, pte_val(e))
+#define pgd_ERROR(e) \
+ pr_err("%s:%d: bad pgd 0x%016llx.\n", __FILE__, __LINE__, pgd_val(e))
+
+/* Return PA and protection info for a given kernel VA. */
+int va_to_cpa_and_pte(void *va, phys_addr_t *cpa, pte_t *pte);
+
+/*
+ * __set_pte() ensures we write the 64-bit PTE with 32-bit words in
+ * the right order on 32-bit platforms and also allows us to write
+ * hooks to check valid PTEs, etc., if we want.
+ */
+void __set_pte(pte_t *ptep, pte_t pte);
+
+/*
+ * set_pte() sets the given PTE and also sanity-checks the
+ * requested PTE against the page homecaching. Unspecified parts
+ * of the PTE are filled in when it is written to memory, i.e. all
+ * caching attributes if "!forcecache", or the home cpu if "anyhome".
+ */
+extern void set_pte(pte_t *ptep, pte_t pte);
+#define set_pte_at(mm, addr, ptep, pteval) set_pte(ptep, pteval)
+#define set_pte_atomic(pteptr, pteval) set_pte(pteptr, pteval)
+
+#define pte_page(x) pfn_to_page(pte_pfn(x))
+
+static inline int pte_none(pte_t pte)
+{
+ return !pte.val;
+}
+
+static inline unsigned long pte_pfn(pte_t pte)
+{
+ return hv_pte_get_pfn(pte);
+}
+
+/* Set or get the remote cache cpu in a pgprot with remote caching. */
+extern pgprot_t set_remote_cache_cpu(pgprot_t prot, int cpu);
+extern int get_remote_cache_cpu(pgprot_t prot);
+
+static inline pte_t pfn_pte(unsigned long pfn, pgprot_t prot)
+{
+ return hv_pte_set_pfn(prot, pfn);
+}
+
+/* Support for priority mappings. */
+extern void start_mm_caching(struct mm_struct *mm);
+extern void check_mm_caching(struct mm_struct *prev, struct mm_struct *next);
+
+/*
+ * Support non-linear file mappings (see sys_remap_file_pages).
+ * This is defined by CLIENT1 set but CLIENT0 and _PAGE_PRESENT clear, and the
+ * file offset in the 32 high bits.
+ */
+#define _PAGE_FILE HV_PTE_CLIENT1
+#define PTE_FILE_MAX_BITS 32
+#define pte_file(pte) (hv_pte_get_client1(pte) && !hv_pte_get_client0(pte))
+#define pte_to_pgoff(pte) ((pte).val >> 32)
+#define pgoff_to_pte(off) ((pte_t) { (((long long)(off)) << 32) | _PAGE_FILE })
+
+/*
+ * Encode and de-code a swap entry (see <linux/swapops.h>).
+ * We put the swap file type+offset in the 32 high bits;
+ * I believe we can just leave the low bits clear.
+ */
+#define __swp_type(swp) ((swp).val & 0x1f)
+#define __swp_offset(swp) ((swp).val >> 5)
+#define __swp_entry(type, off) ((swp_entry_t) { (type) | ((off) << 5) })
+#define __pte_to_swp_entry(pte) ((swp_entry_t) { (pte).val >> 32 })
+#define __swp_entry_to_pte(swp) ((pte_t) { (((long long) ((swp).val)) << 32) })
+
+/*
+ * Conversion functions: convert a page and protection to a page entry,
+ * and a page entry and page directory to the page they refer to.
+ */
+
+#define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot))
+
+/*
+ * If we are doing an mprotect(), just accept the new vma->vm_page_prot
+ * value and combine it with the PFN from the old PTE to get a new PTE.
+ */
+static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
+{
+ return pfn_pte(hv_pte_get_pfn(pte), newprot);
+}
+
+/*
+ * The pgd page can be thought of an array like this: pgd_t[PTRS_PER_PGD]
+ *
+ * This macro returns the index of the entry in the pgd page which would
+ * control the given virtual address.
+ */
+#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1))
+
+/*
+ * pgd_offset() returns a (pgd_t *)
+ * pgd_index() is used get the offset into the pgd page's array of pgd_t's.
+ */
+#define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address))
+
+/*
+ * A shortcut which implies the use of the kernel's pgd, instead
+ * of a process's.
+ */
+#define pgd_offset_k(address) pgd_offset(&init_mm, address)
+
+#if defined(CONFIG_HIGHPTE)
+extern pte_t *pte_offset_map(pmd_t *, unsigned long address);
+#define pte_unmap(pte) kunmap_atomic(pte)
+#else
+#define pte_offset_map(dir, address) pte_offset_kernel(dir, address)
+#define pte_unmap(pte) do { } while (0)
+#endif
+
+/* Clear a non-executable kernel PTE and flush it from the TLB. */
+#define kpte_clear_flush(ptep, vaddr) \
+do { \
+ pte_clear(&init_mm, (vaddr), (ptep)); \
+ local_flush_tlb_page(FLUSH_NONEXEC, (vaddr), PAGE_SIZE); \
+} while (0)
+
+/*
+ * The kernel page tables contain what we need, and we flush when we
+ * change specific page table entries.
+ */
+#define update_mmu_cache(vma, address, pte) do { } while (0)
+
+#ifdef CONFIG_FLATMEM
+#define kern_addr_valid(addr) (1)
+#endif /* CONFIG_FLATMEM */
+
+#define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \
+ remap_pfn_range(vma, vaddr, pfn, size, prot)
+
+extern void vmalloc_sync_all(void);
+
+#endif /* !__ASSEMBLY__ */
+
+#ifdef __tilegx__
+#include <asm/pgtable_64.h>
+#else
+#include <asm/pgtable_32.h>
+#endif
+
+#ifndef __ASSEMBLY__
+
+static inline int pmd_none(pmd_t pmd)
+{
+ /*
+ * Only check low word on 32-bit platforms, since it might be
+ * out of sync with upper half.
+ */
+ return (unsigned long)pmd_val(pmd) == 0;
+}
+
+static inline int pmd_present(pmd_t pmd)
+{
+ return pmd_val(pmd) & _PAGE_PRESENT;
+}
+
+static inline int pmd_bad(pmd_t pmd)
+{
+ return ((pmd_val(pmd) & _PAGE_ALL) != _PAGE_TABLE);
+}
+
+static inline unsigned long pages_to_mb(unsigned long npg)
+{
+ return npg >> (20 - PAGE_SHIFT);
+}
+
+/*
+ * The pmd can be thought of an array like this: pmd_t[PTRS_PER_PMD]
+ *
+ * This function returns the index of the entry in the pmd which would
+ * control the given virtual address.
+ */
+static inline unsigned long pmd_index(unsigned long address)
+{
+ return (address >> PMD_SHIFT) & (PTRS_PER_PMD - 1);
+}
+
+/*
+ * A given kernel pmd_t maps to a specific virtual address (either a
+ * kernel huge page or a kernel pte_t table). Since kernel pte_t
+ * tables can be aligned at sub-page granularity, this function can
+ * return non-page-aligned pointers, despite its name.
+ */
+static inline unsigned long pmd_page_vaddr(pmd_t pmd)
+{
+ phys_addr_t pa =
+ (phys_addr_t)pmd_ptfn(pmd) << HV_LOG2_PAGE_TABLE_ALIGN;
+ return (unsigned long)__va(pa);
+}
+
+/*
+ * A pmd_t points to the base of a huge page or to a pte_t array.
+ * If a pte_t array, since we can have multiple per page, we don't
+ * have a one-to-one mapping of pmd_t's to pages. However, this is
+ * OK for pte_lockptr(), since we just end up with potentially one
+ * lock being used for several pte_t arrays.
+ */
+#define pmd_page(pmd) pfn_to_page(HV_PTFN_TO_PFN(pmd_ptfn(pmd)))
+
+/*
+ * The pte page can be thought of an array like this: pte_t[PTRS_PER_PTE]
+ *
+ * This macro returns the index of the entry in the pte page which would
+ * control the given virtual address.
+ */
+static inline unsigned long pte_index(unsigned long address)
+{
+ return (address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1);
+}
+
+static inline pte_t *pte_offset_kernel(pmd_t *pmd, unsigned long address)
+{
+ return (pte_t *)pmd_page_vaddr(*pmd) + pte_index(address);
+}
+
+static inline int pmd_huge_page(pmd_t pmd)
+{
+ return pmd_val(pmd) & _PAGE_HUGE_PAGE;
+}
+
+#include <asm-generic/pgtable.h>
+
+/* Support /proc/NN/pgtable API. */
+struct seq_file;
+int arch_proc_pgtable_show(struct seq_file *m, struct mm_struct *mm,
+ unsigned long vaddr, pte_t *ptep, void **datap);
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* _ASM_TILE_PGTABLE_H */
diff --git a/arch/tile/include/asm/pgtable_32.h b/arch/tile/include/asm/pgtable_32.h
new file mode 100644
index 00000000..9f985297
--- /dev/null
+++ b/arch/tile/include/asm/pgtable_32.h
@@ -0,0 +1,135 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#ifndef _ASM_TILE_PGTABLE_32_H
+#define _ASM_TILE_PGTABLE_32_H
+
+/*
+ * The level-1 index is defined by the huge page size. A PGD is composed
+ * of PTRS_PER_PGD pgd_t's and is the top level of the page table.
+ */
+#define PGDIR_SHIFT HV_LOG2_PAGE_SIZE_LARGE
+#define PGDIR_SIZE HV_PAGE_SIZE_LARGE
+#define PGDIR_MASK (~(PGDIR_SIZE-1))
+#define PTRS_PER_PGD (1 << (32 - PGDIR_SHIFT))
+#define SIZEOF_PGD (PTRS_PER_PGD * sizeof(pgd_t))
+
+/*
+ * The level-2 index is defined by the difference between the huge
+ * page size and the normal page size. A PTE is composed of
+ * PTRS_PER_PTE pte_t's and is the bottom level of the page table.
+ * Note that the hypervisor docs use PTE for what we call pte_t, so
+ * this nomenclature is somewhat confusing.
+ */
+#define PTRS_PER_PTE (1 << (HV_LOG2_PAGE_SIZE_LARGE - HV_LOG2_PAGE_SIZE_SMALL))
+#define SIZEOF_PTE (PTRS_PER_PTE * sizeof(pte_t))
+
+#ifndef __ASSEMBLY__
+
+/*
+ * Right now we initialize only a single pte table. It can be extended
+ * easily, subsequent pte tables have to be allocated in one physical
+ * chunk of RAM.
+ *
+ * HOWEVER, if we are using an allocation scheme with slop after the
+ * end of the page table (e.g. where our L2 page tables are 2KB but
+ * our pages are 64KB and we are allocating via the page allocator)
+ * we can't extend it easily.
+ */
+#define LAST_PKMAP PTRS_PER_PTE
+
+#define PKMAP_BASE ((FIXADDR_BOOT_START - PAGE_SIZE*LAST_PKMAP) & PGDIR_MASK)
+
+#ifdef CONFIG_HIGHMEM
+# define __VMAPPING_END (PKMAP_BASE & ~(HPAGE_SIZE-1))
+#else
+# define __VMAPPING_END (FIXADDR_START & ~(HPAGE_SIZE-1))
+#endif
+
+#ifdef CONFIG_HUGEVMAP
+#define HUGE_VMAP_END __VMAPPING_END
+#define HUGE_VMAP_BASE (HUGE_VMAP_END - CONFIG_NR_HUGE_VMAPS * HPAGE_SIZE)
+#define _VMALLOC_END HUGE_VMAP_BASE
+#else
+#define _VMALLOC_END __VMAPPING_END
+#endif
+
+/*
+ * Align the vmalloc area to an L2 page table, and leave a guard page
+ * at the beginning and end. The vmalloc code also puts in an internal
+ * guard page between each allocation.
+ */
+#define VMALLOC_END (_VMALLOC_END - PAGE_SIZE)
+extern unsigned long VMALLOC_RESERVE /* = CONFIG_VMALLOC_RESERVE */;
+#define _VMALLOC_START (_VMALLOC_END - VMALLOC_RESERVE)
+#define VMALLOC_START (_VMALLOC_START + PAGE_SIZE)
+
+/* This is the maximum possible amount of lowmem. */
+#define MAXMEM (_VMALLOC_START - PAGE_OFFSET)
+
+/* We have no pmd or pud since we are strictly a two-level page table */
+#include <asm-generic/pgtable-nopmd.h>
+
+/* We don't define any pgds for these addresses. */
+static inline int pgd_addr_invalid(unsigned long addr)
+{
+ return addr >= MEM_HV_INTRPT;
+}
+
+/*
+ * Provide versions of these routines that can be used safely when
+ * the hypervisor may be asynchronously modifying dirty/accessed bits.
+ * ptep_get_and_clear() matches the generic one but we provide it to
+ * be parallel with the 64-bit code.
+ */
+#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
+#define __HAVE_ARCH_PTEP_SET_WRPROTECT
+
+extern int ptep_test_and_clear_young(struct vm_area_struct *,
+ unsigned long addr, pte_t *);
+extern void ptep_set_wrprotect(struct mm_struct *,
+ unsigned long addr, pte_t *);
+
+#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
+static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep)
+{
+ pte_t pte = *ptep;
+ pte_clear(_mm, addr, ptep);
+ return pte;
+}
+
+static inline void __set_pmd(pmd_t *pmdp, pmd_t pmdval)
+{
+ set_pte(&pmdp->pud.pgd, pmdval.pud.pgd);
+}
+
+/* Create a pmd from a PTFN. */
+static inline pmd_t ptfn_pmd(unsigned long ptfn, pgprot_t prot)
+{
+ return (pmd_t){ { hv_pte_set_ptfn(prot, ptfn) } };
+}
+
+/* Return the page-table frame number (ptfn) that a pmd_t points at. */
+#define pmd_ptfn(pmd) hv_pte_get_ptfn((pmd).pud.pgd)
+
+static inline void pmd_clear(pmd_t *pmdp)
+{
+ __pte_clear(&pmdp->pud.pgd);
+}
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_TILE_PGTABLE_32_H */
diff --git a/arch/tile/include/asm/pgtable_64.h b/arch/tile/include/asm/pgtable_64.h
new file mode 100644
index 00000000..fd803285
--- /dev/null
+++ b/arch/tile/include/asm/pgtable_64.h
@@ -0,0 +1,175 @@
+/*
+ * Copyright 2011 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#ifndef _ASM_TILE_PGTABLE_64_H
+#define _ASM_TILE_PGTABLE_64_H
+
+/* The level-0 page table breaks the address space into 32-bit chunks. */
+#define PGDIR_SHIFT HV_LOG2_L1_SPAN
+#define PGDIR_SIZE HV_L1_SPAN
+#define PGDIR_MASK (~(PGDIR_SIZE-1))
+#define PTRS_PER_PGD HV_L0_ENTRIES
+#define SIZEOF_PGD (PTRS_PER_PGD * sizeof(pgd_t))
+
+/*
+ * The level-1 index is defined by the huge page size. A PMD is composed
+ * of PTRS_PER_PMD pgd_t's and is the middle level of the page table.
+ */
+#define PMD_SHIFT HV_LOG2_PAGE_SIZE_LARGE
+#define PMD_SIZE HV_PAGE_SIZE_LARGE
+#define PMD_MASK (~(PMD_SIZE-1))
+#define PTRS_PER_PMD (1 << (PGDIR_SHIFT - PMD_SHIFT))
+#define SIZEOF_PMD (PTRS_PER_PMD * sizeof(pmd_t))
+
+/*
+ * The level-2 index is defined by the difference between the huge
+ * page size and the normal page size. A PTE is composed of
+ * PTRS_PER_PTE pte_t's and is the bottom level of the page table.
+ * Note that the hypervisor docs use PTE for what we call pte_t, so
+ * this nomenclature is somewhat confusing.
+ */
+#define PTRS_PER_PTE (1 << (HV_LOG2_PAGE_SIZE_LARGE - HV_LOG2_PAGE_SIZE_SMALL))
+#define SIZEOF_PTE (PTRS_PER_PTE * sizeof(pte_t))
+
+/*
+ * Align the vmalloc area to an L2 page table, and leave a guard page
+ * at the beginning and end. The vmalloc code also puts in an internal
+ * guard page between each allocation.
+ */
+#define _VMALLOC_END HUGE_VMAP_BASE
+#define VMALLOC_END (_VMALLOC_END - PAGE_SIZE)
+#define VMALLOC_START (_VMALLOC_START + PAGE_SIZE)
+
+#define HUGE_VMAP_END (HUGE_VMAP_BASE + PGDIR_SIZE)
+
+#ifndef __ASSEMBLY__
+
+/* We have no pud since we are a three-level page table. */
+#include <asm-generic/pgtable-nopud.h>
+
+static inline int pud_none(pud_t pud)
+{
+ return pud_val(pud) == 0;
+}
+
+static inline int pud_present(pud_t pud)
+{
+ return pud_val(pud) & _PAGE_PRESENT;
+}
+
+#define pmd_ERROR(e) \
+ pr_err("%s:%d: bad pmd 0x%016llx.\n", __FILE__, __LINE__, pmd_val(e))
+
+static inline void pud_clear(pud_t *pudp)
+{
+ __pte_clear(&pudp->pgd);
+}
+
+static inline int pud_bad(pud_t pud)
+{
+ return ((pud_val(pud) & _PAGE_ALL) != _PAGE_TABLE);
+}
+
+/* Return the page-table frame number (ptfn) that a pud_t points at. */
+#define pud_ptfn(pud) hv_pte_get_ptfn((pud).pgd)
+
+/*
+ * A given kernel pud_t maps to a kernel pmd_t table at a specific
+ * virtual address. Since kernel pmd_t tables can be aligned at
+ * sub-page granularity, this macro can return non-page-aligned
+ * pointers, despite its name.
+ */
+#define pud_page_vaddr(pud) \
+ (__va((phys_addr_t)pud_ptfn(pud) << HV_LOG2_PAGE_TABLE_ALIGN))
+
+/*
+ * A pud_t points to a pmd_t array. Since we can have multiple per
+ * page, we don't have a one-to-one mapping of pud_t's to pages.
+ */
+#define pud_page(pud) pfn_to_page(HV_PTFN_TO_PFN(pud_ptfn(pud)))
+
+static inline unsigned long pud_index(unsigned long address)
+{
+ return (address >> PUD_SHIFT) & (PTRS_PER_PUD - 1);
+}
+
+#define pmd_offset(pud, address) \
+ ((pmd_t *)pud_page_vaddr(*(pud)) + pmd_index(address))
+
+static inline void __set_pmd(pmd_t *pmdp, pmd_t pmdval)
+{
+ set_pte(pmdp, pmdval);
+}
+
+/* Create a pmd from a PTFN and pgprot. */
+static inline pmd_t ptfn_pmd(unsigned long ptfn, pgprot_t prot)
+{
+ return hv_pte_set_ptfn(prot, ptfn);
+}
+
+/* Return the page-table frame number (ptfn) that a pmd_t points at. */
+static inline unsigned long pmd_ptfn(pmd_t pmd)
+{
+ return hv_pte_get_ptfn(pmd);
+}
+
+static inline void pmd_clear(pmd_t *pmdp)
+{
+ __pte_clear(pmdp);
+}
+
+/* Normalize an address to having the correct high bits set. */
+#define pgd_addr_normalize pgd_addr_normalize
+static inline unsigned long pgd_addr_normalize(unsigned long addr)
+{
+ return ((long)addr << (CHIP_WORD_SIZE() - CHIP_VA_WIDTH())) >>
+ (CHIP_WORD_SIZE() - CHIP_VA_WIDTH());
+}
+
+/* We don't define any pgds for these addresses. */
+static inline int pgd_addr_invalid(unsigned long addr)
+{
+ return addr >= MEM_HV_START ||
+ (addr > MEM_LOW_END && addr < MEM_HIGH_START);
+}
+
+/*
+ * Use atomic instructions to provide atomicity against the hypervisor.
+ */
+#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
+static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep)
+{
+ return (__insn_fetchand(&ptep->val, ~HV_PTE_ACCESSED) >>
+ HV_PTE_INDEX_ACCESSED) & 0x1;
+}
+
+#define __HAVE_ARCH_PTEP_SET_WRPROTECT
+static inline void ptep_set_wrprotect(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep)
+{
+ __insn_fetchand(&ptep->val, ~HV_PTE_WRITABLE);
+}
+
+#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
+static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep)
+{
+ return hv_pte(__insn_exch(&ptep->val, 0UL));
+}
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_TILE_PGTABLE_64_H */
diff --git a/arch/tile/include/asm/processor.h b/arch/tile/include/asm/processor.h
new file mode 100644
index 00000000..34c1e01f
--- /dev/null
+++ b/arch/tile/include/asm/processor.h
@@ -0,0 +1,357 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _ASM_TILE_PROCESSOR_H
+#define _ASM_TILE_PROCESSOR_H
+
+#ifndef __ASSEMBLY__
+
+/*
+ * NOTE: we don't include <linux/ptrace.h> or <linux/percpu.h> as one
+ * normally would, due to #include dependencies.
+ */
+#include <linux/types.h>
+#include <asm/ptrace.h>
+#include <asm/percpu.h>
+
+#include <arch/chip.h>
+#include <arch/spr_def.h>
+
+struct task_struct;
+struct thread_struct;
+
+typedef struct {
+ unsigned long seg;
+} mm_segment_t;
+
+/*
+ * Default implementation of macro that returns current
+ * instruction pointer ("program counter").
+ */
+void *current_text_addr(void);
+
+#if CHIP_HAS_TILE_DMA()
+/* Capture the state of a suspended DMA. */
+struct tile_dma_state {
+ int enabled;
+ unsigned long src;
+ unsigned long dest;
+ unsigned long strides;
+ unsigned long chunk_size;
+ unsigned long src_chunk;
+ unsigned long dest_chunk;
+ unsigned long byte;
+ unsigned long status;
+};
+
+/*
+ * A mask of the DMA status register for selecting only the 'running'
+ * and 'done' bits.
+ */
+#define DMA_STATUS_MASK \
+ (SPR_DMA_STATUS__RUNNING_MASK | SPR_DMA_STATUS__DONE_MASK)
+#endif
+
+/*
+ * Track asynchronous TLB events (faults and access violations)
+ * that occur while we are in kernel mode from DMA or the SN processor.
+ */
+struct async_tlb {
+ short fault_num; /* original fault number; 0 if none */
+ char is_fault; /* was it a fault (vs an access violation) */
+ char is_write; /* for fault: was it caused by a write? */
+ unsigned long address; /* what address faulted? */
+};
+
+#ifdef CONFIG_HARDWALL
+struct hardwall_info;
+#endif
+
+struct thread_struct {
+ /* kernel stack pointer */
+ unsigned long ksp;
+ /* kernel PC */
+ unsigned long pc;
+ /* starting user stack pointer (for page migration) */
+ unsigned long usp0;
+ /* pid of process that created this one */
+ pid_t creator_pid;
+#if CHIP_HAS_TILE_DMA()
+ /* DMA info for suspended threads (byte == 0 means no DMA state) */
+ struct tile_dma_state tile_dma_state;
+#endif
+ /* User EX_CONTEXT registers */
+ unsigned long ex_context[2];
+ /* User SYSTEM_SAVE registers */
+ unsigned long system_save[4];
+ /* User interrupt mask */
+ unsigned long long interrupt_mask;
+ /* User interrupt-control 0 state */
+ unsigned long intctrl_0;
+#if CHIP_HAS_PROC_STATUS_SPR()
+ /* Any other miscellaneous processor state bits */
+ unsigned long proc_status;
+#endif
+#if !CHIP_HAS_FIXED_INTVEC_BASE()
+ /* Interrupt base for PL0 interrupts */
+ unsigned long interrupt_vector_base;
+#endif
+#if CHIP_HAS_TILE_RTF_HWM()
+ /* Tile cache retry fifo high-water mark */
+ unsigned long tile_rtf_hwm;
+#endif
+#if CHIP_HAS_DSTREAM_PF()
+ /* Data stream prefetch control */
+ unsigned long dstream_pf;
+#endif
+#ifdef CONFIG_HARDWALL
+ /* Is this task tied to an activated hardwall? */
+ struct hardwall_info *hardwall;
+ /* Chains this task into the list at hardwall->list. */
+ struct list_head hardwall_list;
+#endif
+#if CHIP_HAS_TILE_DMA()
+ /* Async DMA TLB fault information */
+ struct async_tlb dma_async_tlb;
+#endif
+#if CHIP_HAS_SN_PROC()
+ /* Was static network processor when we were switched out? */
+ int sn_proc_running;
+ /* Async SNI TLB fault information */
+ struct async_tlb sn_async_tlb;
+#endif
+};
+
+#endif /* !__ASSEMBLY__ */
+
+/*
+ * Start with "sp" this many bytes below the top of the kernel stack.
+ * This preserves the invariant that a called function may write to *sp.
+ */
+#define STACK_TOP_DELTA 8
+
+/*
+ * When entering the kernel via a fault, start with the top of the
+ * pt_regs structure this many bytes below the top of the page.
+ * This aligns the pt_regs structure optimally for cache-line access.
+ */
+#ifdef __tilegx__
+#define KSTK_PTREGS_GAP 48
+#else
+#define KSTK_PTREGS_GAP 56
+#endif
+
+#ifndef __ASSEMBLY__
+
+#ifdef __tilegx__
+#define TASK_SIZE_MAX (MEM_LOW_END + 1)
+#else
+#define TASK_SIZE_MAX PAGE_OFFSET
+#endif
+
+/* TASK_SIZE and related variables are always checked in "current" context. */
+#ifdef CONFIG_COMPAT
+#define COMPAT_TASK_SIZE (1UL << 31)
+#define TASK_SIZE ((current_thread_info()->status & TS_COMPAT) ?\
+ COMPAT_TASK_SIZE : TASK_SIZE_MAX)
+#else
+#define TASK_SIZE TASK_SIZE_MAX
+#endif
+
+/* We provide a minimal "vdso" a la x86; just the sigreturn code for now. */
+#define VDSO_BASE (TASK_SIZE - PAGE_SIZE)
+
+#define STACK_TOP VDSO_BASE
+
+/* STACK_TOP_MAX is used temporarily in execve and should not check COMPAT. */
+#define STACK_TOP_MAX TASK_SIZE_MAX
+
+/*
+ * This decides where the kernel will search for a free chunk of vm
+ * space during mmap's, if it is using bottom-up mapping.
+ */
+#define TASK_UNMAPPED_BASE (PAGE_ALIGN(TASK_SIZE / 3))
+
+#define HAVE_ARCH_PICK_MMAP_LAYOUT
+
+#define INIT_THREAD { \
+ .ksp = (unsigned long)init_stack + THREAD_SIZE - STACK_TOP_DELTA, \
+ .interrupt_mask = -1ULL \
+}
+
+/* Kernel stack top for the task that first boots on this cpu. */
+DECLARE_PER_CPU(unsigned long, boot_sp);
+
+/* PC to boot from on this cpu. */
+DECLARE_PER_CPU(unsigned long, boot_pc);
+
+/* Do necessary setup to start up a newly executed thread. */
+static inline void start_thread(struct pt_regs *regs,
+ unsigned long pc, unsigned long usp)
+{
+ regs->pc = pc;
+ regs->sp = usp;
+}
+
+/* Free all resources held by a thread. */
+static inline void release_thread(struct task_struct *dead_task)
+{
+ /* Nothing for now */
+}
+
+/* Prepare to copy thread state - unlazy all lazy status. */
+#define prepare_to_copy(tsk) do { } while (0)
+
+extern int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
+
+extern int do_work_pending(struct pt_regs *regs, u32 flags);
+
+
+/*
+ * Return saved (kernel) PC of a blocked thread.
+ * Only used in a printk() in kernel/sched.c, so don't work too hard.
+ */
+#define thread_saved_pc(t) ((t)->thread.pc)
+
+unsigned long get_wchan(struct task_struct *p);
+
+/* Return initial ksp value for given task. */
+#define task_ksp0(task) ((unsigned long)(task)->stack + THREAD_SIZE)
+
+/* Return some info about the user process TASK. */
+#define KSTK_TOP(task) (task_ksp0(task) - STACK_TOP_DELTA)
+#define task_pt_regs(task) \
+ ((struct pt_regs *)(task_ksp0(task) - KSTK_PTREGS_GAP) - 1)
+#define task_sp(task) (task_pt_regs(task)->sp)
+#define task_pc(task) (task_pt_regs(task)->pc)
+/* Aliases for pc and sp (used in fs/proc/array.c) */
+#define KSTK_EIP(task) task_pc(task)
+#define KSTK_ESP(task) task_sp(task)
+
+/* Standard format for printing registers and other word-size data. */
+#ifdef __tilegx__
+# define REGFMT "0x%016lx"
+#else
+# define REGFMT "0x%08lx"
+#endif
+
+/*
+ * Do some slow action (e.g. read a slow SPR).
+ * Note that this must also have compiler-barrier semantics since
+ * it may be used in a busy loop reading memory.
+ */
+static inline void cpu_relax(void)
+{
+ __insn_mfspr(SPR_PASS);
+ barrier();
+}
+
+/* Info on this processor (see fs/proc/cpuinfo.c) */
+struct seq_operations;
+extern const struct seq_operations cpuinfo_op;
+
+/* Provide information about the chip model. */
+extern char chip_model[64];
+
+/* Data on which physical memory controller corresponds to which NUMA node. */
+extern int node_controller[];
+
+#if CHIP_HAS_CBOX_HOME_MAP()
+/* Does the heap allocator return hash-for-home pages by default? */
+extern int hash_default;
+
+/* Should kernel stack pages be hash-for-home? */
+extern int kstack_hash;
+
+/* Does MAP_ANONYMOUS return hash-for-home pages by default? */
+#define uheap_hash hash_default
+
+#else
+#define hash_default 0
+#define kstack_hash 0
+#define uheap_hash 0
+#endif
+
+/* Are we using huge pages in the TLB for kernel data? */
+extern int kdata_huge;
+
+/* Support standard Linux prefetching. */
+#define ARCH_HAS_PREFETCH
+#define prefetch(x) __builtin_prefetch(x)
+#define PREFETCH_STRIDE CHIP_L2_LINE_SIZE()
+
+/* Bring a value into the L1D, faulting the TLB if necessary. */
+#ifdef __tilegx__
+#define prefetch_L1(x) __insn_prefetch_l1_fault((void *)(x))
+#else
+#define prefetch_L1(x) __insn_prefetch_L1((void *)(x))
+#endif
+
+#else /* __ASSEMBLY__ */
+
+/* Do some slow action (e.g. read a slow SPR). */
+#define CPU_RELAX mfspr zero, SPR_PASS
+
+#endif /* !__ASSEMBLY__ */
+
+/* Assembly code assumes that the PL is in the low bits. */
+#if SPR_EX_CONTEXT_1_1__PL_SHIFT != 0
+# error Fix assembly assumptions about PL
+#endif
+
+/* We sometimes use these macros for EX_CONTEXT_0_1 as well. */
+#if SPR_EX_CONTEXT_1_1__PL_SHIFT != SPR_EX_CONTEXT_0_1__PL_SHIFT || \
+ SPR_EX_CONTEXT_1_1__PL_RMASK != SPR_EX_CONTEXT_0_1__PL_RMASK || \
+ SPR_EX_CONTEXT_1_1__ICS_SHIFT != SPR_EX_CONTEXT_0_1__ICS_SHIFT || \
+ SPR_EX_CONTEXT_1_1__ICS_RMASK != SPR_EX_CONTEXT_0_1__ICS_RMASK
+# error Fix assumptions that EX1 macros work for both PL0 and PL1
+#endif
+
+/* Allow pulling apart and recombining the PL and ICS bits in EX_CONTEXT. */
+#define EX1_PL(ex1) \
+ (((ex1) >> SPR_EX_CONTEXT_1_1__PL_SHIFT) & SPR_EX_CONTEXT_1_1__PL_RMASK)
+#define EX1_ICS(ex1) \
+ (((ex1) >> SPR_EX_CONTEXT_1_1__ICS_SHIFT) & SPR_EX_CONTEXT_1_1__ICS_RMASK)
+#define PL_ICS_EX1(pl, ics) \
+ (((pl) << SPR_EX_CONTEXT_1_1__PL_SHIFT) | \
+ ((ics) << SPR_EX_CONTEXT_1_1__ICS_SHIFT))
+
+/*
+ * Provide symbolic constants for PLs.
+ * Note that assembly code assumes that USER_PL is zero.
+ */
+#define USER_PL 0
+#if CONFIG_KERNEL_PL == 2
+#define GUEST_PL 1
+#endif
+#define KERNEL_PL CONFIG_KERNEL_PL
+
+/* SYSTEM_SAVE_K_0 holds the current cpu number ORed with ksp0. */
+#define CPU_LOG_MASK_VALUE 12
+#define CPU_MASK_VALUE ((1 << CPU_LOG_MASK_VALUE) - 1)
+#if CONFIG_NR_CPUS > CPU_MASK_VALUE
+# error Too many cpus!
+#endif
+#define raw_smp_processor_id() \
+ ((int)__insn_mfspr(SPR_SYSTEM_SAVE_K_0) & CPU_MASK_VALUE)
+#define get_current_ksp0() \
+ (__insn_mfspr(SPR_SYSTEM_SAVE_K_0) & ~CPU_MASK_VALUE)
+#define next_current_ksp0(task) ({ \
+ unsigned long __ksp0 = task_ksp0(task); \
+ int __cpu = raw_smp_processor_id(); \
+ BUG_ON(__ksp0 & CPU_MASK_VALUE); \
+ __ksp0 | __cpu; \
+})
+
+#endif /* _ASM_TILE_PROCESSOR_H */
diff --git a/arch/tile/include/asm/ptrace.h b/arch/tile/include/asm/ptrace.h
new file mode 100644
index 00000000..c6cddd7e
--- /dev/null
+++ b/arch/tile/include/asm/ptrace.h
@@ -0,0 +1,164 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _ASM_TILE_PTRACE_H
+#define _ASM_TILE_PTRACE_H
+
+#include <arch/chip.h>
+#include <arch/abi.h>
+
+/* These must match struct pt_regs, below. */
+#if CHIP_WORD_SIZE() == 32
+#define PTREGS_OFFSET_REG(n) ((n)*4)
+#else
+#define PTREGS_OFFSET_REG(n) ((n)*8)
+#endif
+#define PTREGS_OFFSET_BASE 0
+#define PTREGS_OFFSET_TP PTREGS_OFFSET_REG(53)
+#define PTREGS_OFFSET_SP PTREGS_OFFSET_REG(54)
+#define PTREGS_OFFSET_LR PTREGS_OFFSET_REG(55)
+#define PTREGS_NR_GPRS 56
+#define PTREGS_OFFSET_PC PTREGS_OFFSET_REG(56)
+#define PTREGS_OFFSET_EX1 PTREGS_OFFSET_REG(57)
+#define PTREGS_OFFSET_FAULTNUM PTREGS_OFFSET_REG(58)
+#define PTREGS_OFFSET_ORIG_R0 PTREGS_OFFSET_REG(59)
+#define PTREGS_OFFSET_FLAGS PTREGS_OFFSET_REG(60)
+#if CHIP_HAS_CMPEXCH()
+#define PTREGS_OFFSET_CMPEXCH PTREGS_OFFSET_REG(61)
+#endif
+#define PTREGS_SIZE PTREGS_OFFSET_REG(64)
+
+#ifndef __ASSEMBLY__
+
+#ifdef __KERNEL__
+/* Benefit from consistent use of "long" on all chips. */
+typedef unsigned long pt_reg_t;
+#else
+/* Provide appropriate length type to userspace regardless of -m32/-m64. */
+typedef uint_reg_t pt_reg_t;
+#endif
+
+/*
+ * This struct defines the way the registers are stored on the stack during a
+ * system call or exception. "struct sigcontext" has the same shape.
+ */
+struct pt_regs {
+ /* Saved main processor registers; 56..63 are special. */
+ /* tp, sp, and lr must immediately follow regs[] for aliasing. */
+ pt_reg_t regs[53];
+ pt_reg_t tp; /* aliases regs[TREG_TP] */
+ pt_reg_t sp; /* aliases regs[TREG_SP] */
+ pt_reg_t lr; /* aliases regs[TREG_LR] */
+
+ /* Saved special registers. */
+ pt_reg_t pc; /* stored in EX_CONTEXT_K_0 */
+ pt_reg_t ex1; /* stored in EX_CONTEXT_K_1 (PL and ICS bit) */
+ pt_reg_t faultnum; /* fault number (INT_SWINT_1 for syscall) */
+ pt_reg_t orig_r0; /* r0 at syscall entry, else zero */
+ pt_reg_t flags; /* flags (see below) */
+#if !CHIP_HAS_CMPEXCH()
+ pt_reg_t pad[3];
+#else
+ pt_reg_t cmpexch; /* value of CMPEXCH_VALUE SPR at interrupt */
+ pt_reg_t pad[2];
+#endif
+};
+
+#endif /* __ASSEMBLY__ */
+
+#define PTRACE_GETREGS 12
+#define PTRACE_SETREGS 13
+#define PTRACE_GETFPREGS 14
+#define PTRACE_SETFPREGS 15
+
+/* Support TILE-specific ptrace options, with events starting at 16. */
+#define PTRACE_O_TRACEMIGRATE 0x00010000
+#define PTRACE_EVENT_MIGRATE 16
+#ifdef __KERNEL__
+#define PTRACE_O_MASK_TILE (PTRACE_O_TRACEMIGRATE)
+#define PT_TRACE_MIGRATE 0x00080000
+#define PT_TRACE_MASK_TILE (PT_TRACE_MIGRATE)
+#endif
+
+#ifdef __KERNEL__
+
+/* Flag bits in pt_regs.flags */
+#define PT_FLAGS_DISABLE_IRQ 1 /* on return to kernel, disable irqs */
+#define PT_FLAGS_CALLER_SAVES 2 /* caller-save registers are valid */
+#define PT_FLAGS_RESTORE_REGS 4 /* restore callee-save regs on return */
+
+#ifndef __ASSEMBLY__
+
+#define instruction_pointer(regs) ((regs)->pc)
+#define profile_pc(regs) instruction_pointer(regs)
+
+/* Does the process account for user or for system time? */
+#define user_mode(regs) (EX1_PL((regs)->ex1) == USER_PL)
+
+/* Fill in a struct pt_regs with the current kernel registers. */
+struct pt_regs *get_pt_regs(struct pt_regs *);
+
+/* Trace the current syscall. */
+extern void do_syscall_trace(void);
+
+#define arch_has_single_step() (1)
+
+/*
+ * A structure for all single-stepper state.
+ *
+ * Also update defines in assembler section if it changes
+ */
+struct single_step_state {
+ /* the page to which we will write hacked-up bundles */
+ void __user *buffer;
+
+ union {
+ int flags;
+ struct {
+ unsigned long is_enabled:1, update:1, update_reg:6;
+ };
+ };
+
+ unsigned long orig_pc; /* the original PC */
+ unsigned long next_pc; /* return PC if no branch (PC + 1) */
+ unsigned long branch_next_pc; /* return PC if we did branch/jump */
+ unsigned long update_value; /* value to restore to update_target */
+};
+
+/* Single-step the instruction at regs->pc */
+extern void single_step_once(struct pt_regs *regs);
+
+/* Clean up after execve(). */
+extern void single_step_execve(void);
+
+struct task_struct;
+
+extern void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs,
+ int error_code);
+
+#ifdef __tilegx__
+/* We need this since sigval_t has a user pointer in it, for GETSIGINFO etc. */
+#define __ARCH_WANT_COMPAT_SYS_PTRACE
+#endif
+
+#endif /* !__ASSEMBLY__ */
+
+#define SINGLESTEP_STATE_MASK_IS_ENABLED 0x1
+#define SINGLESTEP_STATE_MASK_UPDATE 0x2
+#define SINGLESTEP_STATE_TARGET_LB 2
+#define SINGLESTEP_STATE_TARGET_UB 7
+
+#endif /* !__KERNEL__ */
+
+#endif /* _ASM_TILE_PTRACE_H */
diff --git a/arch/tile/include/asm/sections.h b/arch/tile/include/asm/sections.h
new file mode 100644
index 00000000..d062d463
--- /dev/null
+++ b/arch/tile/include/asm/sections.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _ASM_TILE_SECTIONS_H
+#define _ASM_TILE_SECTIONS_H
+
+#define arch_is_kernel_data arch_is_kernel_data
+
+#include <asm-generic/sections.h>
+
+/* Text and data are at different areas in the kernel VA space. */
+extern char _sinitdata[], _einitdata[];
+
+/* Write-once data is writable only till the end of initialization. */
+extern char __w1data_begin[], __w1data_end[];
+
+
+/* Not exactly sections, but PC comparison points in the code. */
+extern char __rt_sigreturn[], __rt_sigreturn_end[];
+#ifndef __tilegx__
+extern char sys_cmpxchg[], __sys_cmpxchg_end[];
+extern char __sys_cmpxchg_grab_lock[];
+extern char __start_atomic_asm_code[], __end_atomic_asm_code[];
+#endif
+
+/* Handle the discontiguity between _sdata and _stext. */
+static inline int arch_is_kernel_data(unsigned long addr)
+{
+ return addr >= (unsigned long)_sdata &&
+ addr < (unsigned long)_end;
+}
+
+#endif /* _ASM_TILE_SECTIONS_H */
diff --git a/arch/tile/include/asm/setup.h b/arch/tile/include/asm/setup.h
new file mode 100644
index 00000000..e58613e0
--- /dev/null
+++ b/arch/tile/include/asm/setup.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _ASM_TILE_SETUP_H
+#define _ASM_TILE_SETUP_H
+
+#define COMMAND_LINE_SIZE 2048
+
+#ifdef __KERNEL__
+
+#include <linux/pfn.h>
+#include <linux/init.h>
+
+/*
+ * Reserved space for vmalloc and iomap - defined in asm/page.h
+ */
+#define MAXMEM_PFN PFN_DOWN(MAXMEM)
+
+void early_panic(const char *fmt, ...);
+void warn_early_printk(void);
+void __init disable_early_printk(void);
+
+/* Init-time routine to do tile-specific per-cpu setup. */
+void setup_cpu(int boot);
+
+/* User-level DMA management functions */
+void grant_dma_mpls(void);
+void restrict_dma_mpls(void);
+
+#ifdef CONFIG_HARDWALL
+/* User-level network management functions */
+void reset_network_state(void);
+void grant_network_mpls(void);
+void restrict_network_mpls(void);
+struct task_struct;
+int hardwall_deactivate(struct task_struct *task);
+
+/* Hook hardwall code into changes in affinity. */
+#define arch_set_cpus_allowed(p, new_mask) do { \
+ if (p->thread.hardwall && !cpumask_equal(&p->cpus_allowed, new_mask)) \
+ hardwall_deactivate(p); \
+} while (0)
+#endif
+
+#endif /* __KERNEL__ */
+
+#endif /* _ASM_TILE_SETUP_H */
diff --git a/arch/tile/include/asm/sigcontext.h b/arch/tile/include/asm/sigcontext.h
new file mode 100644
index 00000000..6348e59d
--- /dev/null
+++ b/arch/tile/include/asm/sigcontext.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _ASM_TILE_SIGCONTEXT_H
+#define _ASM_TILE_SIGCONTEXT_H
+
+/* Don't pollute the namespace since <signal.h> includes this file. */
+#define __need_int_reg_t
+#include <arch/abi.h>
+
+/*
+ * struct sigcontext has the same shape as struct pt_regs,
+ * but is simplified since we know the fault is from userspace.
+ */
+struct sigcontext {
+ __uint_reg_t gregs[53]; /* General-purpose registers. */
+ __uint_reg_t tp; /* Aliases gregs[TREG_TP]. */
+ __uint_reg_t sp; /* Aliases gregs[TREG_SP]. */
+ __uint_reg_t lr; /* Aliases gregs[TREG_LR]. */
+ __uint_reg_t pc; /* Program counter. */
+ __uint_reg_t ics; /* In Interrupt Critical Section? */
+ __uint_reg_t faultnum; /* Fault number. */
+ __uint_reg_t pad[5];
+};
+
+#endif /* _ASM_TILE_SIGCONTEXT_H */
diff --git a/arch/tile/include/asm/sigframe.h b/arch/tile/include/asm/sigframe.h
new file mode 100644
index 00000000..994d3d30
--- /dev/null
+++ b/arch/tile/include/asm/sigframe.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _ASM_TILE_SIGFRAME_H
+#define _ASM_TILE_SIGFRAME_H
+
+/* Indicate that syscall return should not examine r0 */
+#define INT_SWINT_1_SIGRETURN (~0)
+
+#ifndef __ASSEMBLY__
+
+#include <arch/abi.h>
+
+struct rt_sigframe {
+ unsigned char save_area[C_ABI_SAVE_AREA_SIZE]; /* caller save area */
+ struct siginfo info;
+ struct ucontext uc;
+};
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* _ASM_TILE_SIGFRAME_H */
diff --git a/arch/tile/include/asm/siginfo.h b/arch/tile/include/asm/siginfo.h
new file mode 100644
index 00000000..56d661bb
--- /dev/null
+++ b/arch/tile/include/asm/siginfo.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _ASM_TILE_SIGINFO_H
+#define _ASM_TILE_SIGINFO_H
+
+#define __ARCH_SI_TRAPNO
+
+#ifdef __LP64__
+# define __ARCH_SI_PREAMBLE_SIZE (4 * sizeof(int))
+#endif
+
+#include <asm-generic/siginfo.h>
+
+/*
+ * Additional Tile-specific SIGILL si_codes
+ */
+#define ILL_DBLFLT (__SI_FAULT|9) /* double fault */
+#define ILL_HARDWALL (__SI_FAULT|10) /* user networks hardwall violation */
+#undef NSIGILL
+#define NSIGILL 10
+
+#endif /* _ASM_TILE_SIGINFO_H */
diff --git a/arch/tile/include/asm/signal.h b/arch/tile/include/asm/signal.h
new file mode 100644
index 00000000..1e5e49aa
--- /dev/null
+++ b/arch/tile/include/asm/signal.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _ASM_TILE_SIGNAL_H
+#define _ASM_TILE_SIGNAL_H
+
+/* Do not notify a ptracer when this signal is handled. */
+#define SA_NOPTRACE 0x02000000u
+
+/* Used in earlier Tilera releases, so keeping for binary compatibility. */
+#define SA_RESTORER 0x04000000u
+
+#include <asm-generic/signal.h>
+
+#if defined(__KERNEL__)
+#if !defined(__ASSEMBLY__)
+struct pt_regs;
+int restore_sigcontext(struct pt_regs *, struct sigcontext __user *);
+int setup_sigcontext(struct sigcontext __user *, struct pt_regs *);
+void do_signal(struct pt_regs *regs);
+void signal_fault(const char *type, struct pt_regs *,
+ void __user *frame, int sig);
+void trace_unhandled_signal(const char *type, struct pt_regs *regs,
+ unsigned long address, int signo);
+#endif
+#endif
+
+#endif /* _ASM_TILE_SIGNAL_H */
diff --git a/arch/tile/include/asm/smp.h b/arch/tile/include/asm/smp.h
new file mode 100644
index 00000000..1aa759ae
--- /dev/null
+++ b/arch/tile/include/asm/smp.h
@@ -0,0 +1,140 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _ASM_TILE_SMP_H
+#define _ASM_TILE_SMP_H
+
+#ifdef CONFIG_SMP
+
+#include <asm/processor.h>
+#include <linux/cpumask.h>
+#include <linux/irqreturn.h>
+#include <hv/hypervisor.h>
+
+/* Set up this tile to support receiving hypervisor messages */
+void init_messaging(void);
+
+/* Set up this tile to support receiving device interrupts and IPIs. */
+void init_per_tile_IRQs(void);
+
+/* Send a message to processors specified in mask */
+void send_IPI_many(const struct cpumask *mask, int tag);
+
+/* Send a message to all but the sending processor */
+void send_IPI_allbutself(int tag);
+
+/* Send a message to a specific processor */
+void send_IPI_single(int dest, int tag);
+
+/* Process an IPI message */
+void evaluate_message(int tag);
+
+/* Boot a secondary cpu */
+void online_secondary(void);
+
+/* Topology of the supervisor tile grid, and coordinates of boot processor */
+extern HV_Topology smp_topology;
+
+/* Accessors for grid size */
+#define smp_height (smp_topology.height)
+#define smp_width (smp_topology.width)
+
+/* Convenience functions for converting cpu <-> coords. */
+static inline int cpu_x(int cpu)
+{
+ return cpu % smp_width;
+}
+static inline int cpu_y(int cpu)
+{
+ return cpu / smp_width;
+}
+static inline int xy_to_cpu(int x, int y)
+{
+ return y * smp_width + x;
+}
+
+/* Hypervisor message tags sent via the tile send_IPI*() routines. */
+#define MSG_TAG_START_CPU 1
+#define MSG_TAG_STOP_CPU 2
+#define MSG_TAG_CALL_FUNCTION_MANY 3
+#define MSG_TAG_CALL_FUNCTION_SINGLE 4
+
+/* Hook for the generic smp_call_function_many() routine. */
+static inline void arch_send_call_function_ipi_mask(struct cpumask *mask)
+{
+ send_IPI_many(mask, MSG_TAG_CALL_FUNCTION_MANY);
+}
+
+/* Hook for the generic smp_call_function_single() routine. */
+static inline void arch_send_call_function_single_ipi(int cpu)
+{
+ send_IPI_single(cpu, MSG_TAG_CALL_FUNCTION_SINGLE);
+}
+
+/* Print out the boot string describing which cpus were disabled. */
+void print_disabled_cpus(void);
+
+#else /* !CONFIG_SMP */
+
+#define smp_master_cpu 0
+#define smp_height 1
+#define smp_width 1
+#define cpu_x(cpu) 0
+#define cpu_y(cpu) 0
+#define xy_to_cpu(x, y) 0
+
+#endif /* !CONFIG_SMP */
+
+
+/* Which cpus may be used as the lotar in a page table entry. */
+extern struct cpumask cpu_lotar_map;
+#define cpu_is_valid_lotar(cpu) cpumask_test_cpu((cpu), &cpu_lotar_map)
+
+#if CHIP_HAS_CBOX_HOME_MAP()
+/* Which processors are used for hash-for-home mapping */
+extern struct cpumask hash_for_home_map;
+#endif
+
+/* Which cpus can have their cache flushed by hv_flush_remote(). */
+extern struct cpumask cpu_cacheable_map;
+#define cpu_cacheable(cpu) cpumask_test_cpu((cpu), &cpu_cacheable_map)
+
+/* Convert an HV_LOTAR value into a cpu. */
+static inline int hv_lotar_to_cpu(HV_LOTAR lotar)
+{
+ return HV_LOTAR_X(lotar) + (HV_LOTAR_Y(lotar) * smp_width);
+}
+
+/*
+ * Extension of <linux/cpumask.h> functionality when you just want
+ * to express a mask or suppression or inclusion region without
+ * being too concerned about exactly which cpus are valid in that region.
+ */
+int bitmap_parselist_crop(const char *bp, unsigned long *maskp, int nmaskbits);
+
+#define cpulist_parse_crop(buf, dst) \
+ __cpulist_parse_crop((buf), (dst), NR_CPUS)
+static inline int __cpulist_parse_crop(const char *buf, struct cpumask *dstp,
+ int nbits)
+{
+ return bitmap_parselist_crop(buf, cpumask_bits(dstp), nbits);
+}
+
+/* Initialize the IPI subsystem. */
+void ipi_init(void);
+
+/* Function for start-cpu message to cause us to jump to. */
+extern unsigned long start_cpu_function_addr;
+
+#endif /* _ASM_TILE_SMP_H */
diff --git a/arch/tile/include/asm/spinlock.h b/arch/tile/include/asm/spinlock.h
new file mode 100644
index 00000000..1a8bd474
--- /dev/null
+++ b/arch/tile/include/asm/spinlock.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _ASM_TILE_SPINLOCK_H
+#define _ASM_TILE_SPINLOCK_H
+
+#ifdef __tilegx__
+#include <asm/spinlock_64.h>
+#else
+#include <asm/spinlock_32.h>
+#endif
+
+#endif /* _ASM_TILE_SPINLOCK_H */
diff --git a/arch/tile/include/asm/spinlock_32.h b/arch/tile/include/asm/spinlock_32.h
new file mode 100644
index 00000000..c0a77b38
--- /dev/null
+++ b/arch/tile/include/asm/spinlock_32.h
@@ -0,0 +1,129 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ *
+ * 32-bit SMP spinlocks.
+ */
+
+#ifndef _ASM_TILE_SPINLOCK_32_H
+#define _ASM_TILE_SPINLOCK_32_H
+
+#include <linux/atomic.h>
+#include <asm/page.h>
+#include <linux/compiler.h>
+
+/*
+ * We only use even ticket numbers so the '1' inserted by a tns is
+ * an unambiguous "ticket is busy" flag.
+ */
+#define TICKET_QUANTUM 2
+
+
+/*
+ * SMP ticket spinlocks, allowing only a single CPU anywhere
+ *
+ * (the type definitions are in asm/spinlock_types.h)
+ */
+static inline int arch_spin_is_locked(arch_spinlock_t *lock)
+{
+ /*
+ * Note that even if a new ticket is in the process of being
+ * acquired, so lock->next_ticket is 1, it's still reasonable
+ * to claim the lock is held, since it will be momentarily
+ * if not already. There's no need to wait for a "valid"
+ * lock->next_ticket to become available.
+ */
+ return lock->next_ticket != lock->current_ticket;
+}
+
+void arch_spin_lock(arch_spinlock_t *lock);
+
+/* We cannot take an interrupt after getting a ticket, so don't enable them. */
+#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
+
+int arch_spin_trylock(arch_spinlock_t *lock);
+
+static inline void arch_spin_unlock(arch_spinlock_t *lock)
+{
+ /* For efficiency, overlap fetching the old ticket with the wmb(). */
+ int old_ticket = lock->current_ticket;
+ wmb(); /* guarantee anything modified under the lock is visible */
+ lock->current_ticket = old_ticket + TICKET_QUANTUM;
+}
+
+void arch_spin_unlock_wait(arch_spinlock_t *lock);
+
+/*
+ * Read-write spinlocks, allowing multiple readers
+ * but only one writer.
+ *
+ * We use a "tns/store-back" technique on a single word to manage
+ * the lock state, looping around to retry if the tns returns 1.
+ */
+
+/* Internal layout of the word; do not use. */
+#define _WR_NEXT_SHIFT 8
+#define _WR_CURR_SHIFT 16
+#define _WR_WIDTH 8
+#define _RD_COUNT_SHIFT 24
+#define _RD_COUNT_WIDTH 8
+
+/**
+ * arch_read_can_lock() - would read_trylock() succeed?
+ */
+static inline int arch_read_can_lock(arch_rwlock_t *rwlock)
+{
+ return (rwlock->lock << _RD_COUNT_WIDTH) == 0;
+}
+
+/**
+ * arch_write_can_lock() - would write_trylock() succeed?
+ */
+static inline int arch_write_can_lock(arch_rwlock_t *rwlock)
+{
+ return rwlock->lock == 0;
+}
+
+/**
+ * arch_read_lock() - acquire a read lock.
+ */
+void arch_read_lock(arch_rwlock_t *rwlock);
+
+/**
+ * arch_write_lock() - acquire a write lock.
+ */
+void arch_write_lock(arch_rwlock_t *rwlock);
+
+/**
+ * arch_read_trylock() - try to acquire a read lock.
+ */
+int arch_read_trylock(arch_rwlock_t *rwlock);
+
+/**
+ * arch_write_trylock() - try to acquire a write lock.
+ */
+int arch_write_trylock(arch_rwlock_t *rwlock);
+
+/**
+ * arch_read_unlock() - release a read lock.
+ */
+void arch_read_unlock(arch_rwlock_t *rwlock);
+
+/**
+ * arch_write_unlock() - release a write lock.
+ */
+void arch_write_unlock(arch_rwlock_t *rwlock);
+
+#define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
+#define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
+
+#endif /* _ASM_TILE_SPINLOCK_32_H */
diff --git a/arch/tile/include/asm/spinlock_64.h b/arch/tile/include/asm/spinlock_64.h
new file mode 100644
index 00000000..5f8b6a09
--- /dev/null
+++ b/arch/tile/include/asm/spinlock_64.h
@@ -0,0 +1,161 @@
+/*
+ * Copyright 2011 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ *
+ * 64-bit SMP ticket spinlocks, allowing only a single CPU anywhere
+ * (the type definitions are in asm/spinlock_types.h)
+ */
+
+#ifndef _ASM_TILE_SPINLOCK_64_H
+#define _ASM_TILE_SPINLOCK_64_H
+
+/* Shifts and masks for the various fields in "lock". */
+#define __ARCH_SPIN_CURRENT_SHIFT 17
+#define __ARCH_SPIN_NEXT_MASK 0x7fff
+#define __ARCH_SPIN_NEXT_OVERFLOW 0x8000
+
+/*
+ * Return the "current" portion of a ticket lock value,
+ * i.e. the number that currently owns the lock.
+ */
+static inline int arch_spin_current(u32 val)
+{
+ return val >> __ARCH_SPIN_CURRENT_SHIFT;
+}
+
+/*
+ * Return the "next" portion of a ticket lock value,
+ * i.e. the number that the next task to try to acquire the lock will get.
+ */
+static inline int arch_spin_next(u32 val)
+{
+ return val & __ARCH_SPIN_NEXT_MASK;
+}
+
+/* The lock is locked if a task would have to wait to get it. */
+static inline int arch_spin_is_locked(arch_spinlock_t *lock)
+{
+ u32 val = lock->lock;
+ return arch_spin_current(val) != arch_spin_next(val);
+}
+
+/* Bump the current ticket so the next task owns the lock. */
+static inline void arch_spin_unlock(arch_spinlock_t *lock)
+{
+ wmb(); /* guarantee anything modified under the lock is visible */
+ __insn_fetchadd4(&lock->lock, 1U << __ARCH_SPIN_CURRENT_SHIFT);
+}
+
+void arch_spin_unlock_wait(arch_spinlock_t *lock);
+
+void arch_spin_lock_slow(arch_spinlock_t *lock, u32 val);
+
+/* Grab the "next" ticket number and bump it atomically.
+ * If the current ticket is not ours, go to the slow path.
+ * We also take the slow path if the "next" value overflows.
+ */
+static inline void arch_spin_lock(arch_spinlock_t *lock)
+{
+ u32 val = __insn_fetchadd4(&lock->lock, 1);
+ u32 ticket = val & (__ARCH_SPIN_NEXT_MASK | __ARCH_SPIN_NEXT_OVERFLOW);
+ if (unlikely(arch_spin_current(val) != ticket))
+ arch_spin_lock_slow(lock, ticket);
+}
+
+/* Try to get the lock, and return whether we succeeded. */
+int arch_spin_trylock(arch_spinlock_t *lock);
+
+/* We cannot take an interrupt after getting a ticket, so don't enable them. */
+#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
+
+/*
+ * Read-write spinlocks, allowing multiple readers
+ * but only one writer.
+ *
+ * We use fetchadd() for readers, and fetchor() with the sign bit
+ * for writers.
+ */
+
+#define __WRITE_LOCK_BIT (1 << 31)
+
+static inline int arch_write_val_locked(int val)
+{
+ return val < 0; /* Optimize "val & __WRITE_LOCK_BIT". */
+}
+
+/**
+ * read_can_lock - would read_trylock() succeed?
+ * @lock: the rwlock in question.
+ */
+static inline int arch_read_can_lock(arch_rwlock_t *rw)
+{
+ return !arch_write_val_locked(rw->lock);
+}
+
+/**
+ * write_can_lock - would write_trylock() succeed?
+ * @lock: the rwlock in question.
+ */
+static inline int arch_write_can_lock(arch_rwlock_t *rw)
+{
+ return rw->lock == 0;
+}
+
+extern void __read_lock_failed(arch_rwlock_t *rw);
+
+static inline void arch_read_lock(arch_rwlock_t *rw)
+{
+ u32 val = __insn_fetchaddgez4(&rw->lock, 1);
+ if (unlikely(arch_write_val_locked(val)))
+ __read_lock_failed(rw);
+}
+
+extern void __write_lock_failed(arch_rwlock_t *rw, u32 val);
+
+static inline void arch_write_lock(arch_rwlock_t *rw)
+{
+ u32 val = __insn_fetchor4(&rw->lock, __WRITE_LOCK_BIT);
+ if (unlikely(val != 0))
+ __write_lock_failed(rw, val);
+}
+
+static inline void arch_read_unlock(arch_rwlock_t *rw)
+{
+ __insn_mf();
+ __insn_fetchadd4(&rw->lock, -1);
+}
+
+static inline void arch_write_unlock(arch_rwlock_t *rw)
+{
+ __insn_mf();
+ __insn_exch4(&rw->lock, 0); /* Avoid waiting in the write buffer. */
+}
+
+static inline int arch_read_trylock(arch_rwlock_t *rw)
+{
+ return !arch_write_val_locked(__insn_fetchaddgez4(&rw->lock, 1));
+}
+
+static inline int arch_write_trylock(arch_rwlock_t *rw)
+{
+ u32 val = __insn_fetchor4(&rw->lock, __WRITE_LOCK_BIT);
+ if (likely(val == 0))
+ return 1;
+ if (!arch_write_val_locked(val))
+ __insn_fetchand4(&rw->lock, ~__WRITE_LOCK_BIT);
+ return 0;
+}
+
+#define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
+#define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
+
+#endif /* _ASM_TILE_SPINLOCK_64_H */
diff --git a/arch/tile/include/asm/spinlock_types.h b/arch/tile/include/asm/spinlock_types.h
new file mode 100644
index 00000000..a71f59b4
--- /dev/null
+++ b/arch/tile/include/asm/spinlock_types.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _ASM_TILE_SPINLOCK_TYPES_H
+#define _ASM_TILE_SPINLOCK_TYPES_H
+
+#ifndef __LINUX_SPINLOCK_TYPES_H
+# error "please don't include this file directly"
+#endif
+
+#ifdef __tilegx__
+
+/* Low 15 bits are "next"; high 15 bits are "current". */
+typedef struct arch_spinlock {
+ unsigned int lock;
+} arch_spinlock_t;
+
+#define __ARCH_SPIN_LOCK_UNLOCKED { 0 }
+
+/* High bit is "writer owns"; low 31 bits are a count of readers. */
+typedef struct arch_rwlock {
+ unsigned int lock;
+} arch_rwlock_t;
+
+#define __ARCH_RW_LOCK_UNLOCKED { 0 }
+
+#else
+
+typedef struct arch_spinlock {
+ /* Next ticket number to hand out. */
+ int next_ticket;
+ /* The ticket number that currently owns this lock. */
+ int current_ticket;
+} arch_spinlock_t;
+
+#define __ARCH_SPIN_LOCK_UNLOCKED { 0, 0 }
+
+/*
+ * Byte 0 for tns (only the low bit is used), byte 1 for ticket-lock "next",
+ * byte 2 for ticket-lock "current", byte 3 for reader count.
+ */
+typedef struct arch_rwlock {
+ unsigned int lock;
+} arch_rwlock_t;
+
+#define __ARCH_RW_LOCK_UNLOCKED { 0 }
+
+#endif
+#endif /* _ASM_TILE_SPINLOCK_TYPES_H */
diff --git a/arch/tile/include/asm/stack.h b/arch/tile/include/asm/stack.h
new file mode 100644
index 00000000..0e9d382a
--- /dev/null
+++ b/arch/tile/include/asm/stack.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _ASM_TILE_STACK_H
+#define _ASM_TILE_STACK_H
+
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <asm/backtrace.h>
+#include <asm/page.h>
+#include <hv/hypervisor.h>
+
+/* Everything we need to keep track of a backtrace iteration */
+struct KBacktraceIterator {
+ BacktraceIterator it;
+ struct task_struct *task; /* task we are backtracing */
+ int end; /* iteration complete. */
+ int new_context; /* new context is starting */
+ int profile; /* profiling, so stop on async intrpt */
+ int verbose; /* printk extra info (don't want to
+ * do this for profiling) */
+ int is_current; /* backtracing current task */
+};
+
+/* Iteration methods for kernel backtraces */
+
+/*
+ * Initialize a KBacktraceIterator from a task_struct, and optionally from
+ * a set of registers. If the registers are omitted, the process is
+ * assumed to be descheduled, and registers are read from the process's
+ * thread_struct and stack. "verbose" means to printk some additional
+ * information about fault handlers as we pass them on the stack.
+ */
+extern void KBacktraceIterator_init(struct KBacktraceIterator *kbt,
+ struct task_struct *, struct pt_regs *);
+
+/* Initialize iterator based on current stack. */
+extern void KBacktraceIterator_init_current(struct KBacktraceIterator *kbt);
+
+/* Helper method for above. */
+extern void _KBacktraceIterator_init_current(struct KBacktraceIterator *kbt,
+ ulong pc, ulong lr, ulong sp, ulong r52);
+
+/* No more frames? */
+extern int KBacktraceIterator_end(struct KBacktraceIterator *kbt);
+
+/* Advance to the next frame. */
+extern void KBacktraceIterator_next(struct KBacktraceIterator *kbt);
+
+/*
+ * Dump stack given complete register info. Use only from the
+ * architecture-specific code; show_stack()
+ * and dump_stack() (in entry.S) are architecture-independent entry points.
+ */
+extern void tile_show_stack(struct KBacktraceIterator *, int headers);
+
+/* Dump stack of current process, with registers to seed the backtrace. */
+extern void dump_stack_regs(struct pt_regs *);
+
+/* Helper method for assembly dump_stack(). */
+extern void _dump_stack(int dummy, ulong pc, ulong lr, ulong sp, ulong r52);
+
+#endif /* _ASM_TILE_STACK_H */
diff --git a/arch/tile/include/asm/stat.h b/arch/tile/include/asm/stat.h
new file mode 100644
index 00000000..c0db34d5
--- /dev/null
+++ b/arch/tile/include/asm/stat.h
@@ -0,0 +1,4 @@
+#if defined(__KERNEL__) && defined(CONFIG_COMPAT)
+#define __ARCH_WANT_STAT64 /* Used for compat_sys_stat64() etc. */
+#endif
+#include <asm-generic/stat.h>
diff --git a/arch/tile/include/asm/string.h b/arch/tile/include/asm/string.h
new file mode 100644
index 00000000..7535cf1a
--- /dev/null
+++ b/arch/tile/include/asm/string.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _ASM_TILE_STRING_H
+#define _ASM_TILE_STRING_H
+
+#define __HAVE_ARCH_MEMCHR
+#define __HAVE_ARCH_MEMSET
+#define __HAVE_ARCH_MEMCPY
+#define __HAVE_ARCH_MEMMOVE
+#define __HAVE_ARCH_STRCHR
+#define __HAVE_ARCH_STRLEN
+
+extern __kernel_size_t strlen(const char *);
+extern char *strchr(const char *s, int c);
+extern void *memchr(const void *s, int c, size_t n);
+extern void *memset(void *, int, __kernel_size_t);
+extern void *memcpy(void *, const void *, __kernel_size_t);
+extern void *memmove(void *, const void *, __kernel_size_t);
+
+#endif /* _ASM_TILE_STRING_H */
diff --git a/arch/tile/include/asm/swab.h b/arch/tile/include/asm/swab.h
new file mode 100644
index 00000000..7c37b38f
--- /dev/null
+++ b/arch/tile/include/asm/swab.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _ASM_TILE_SWAB_H
+#define _ASM_TILE_SWAB_H
+
+/* Tile gcc is always >= 4.3.0, so we use __builtin_bswap. */
+#define __arch_swab32(x) __builtin_bswap32(x)
+#define __arch_swab64(x) __builtin_bswap64(x)
+#define __arch_swab16(x) (__builtin_bswap32(x) >> 16)
+
+#endif /* _ASM_TILE_SWAB_H */
diff --git a/arch/tile/include/asm/switch_to.h b/arch/tile/include/asm/switch_to.h
new file mode 100644
index 00000000..1d48c5fe
--- /dev/null
+++ b/arch/tile/include/asm/switch_to.h
@@ -0,0 +1,76 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _ASM_TILE_SWITCH_TO_H
+#define _ASM_TILE_SWITCH_TO_H
+
+#include <arch/sim_def.h>
+
+/*
+ * switch_to(n) should switch tasks to task nr n, first
+ * checking that n isn't the current task, in which case it does nothing.
+ * The number of callee-saved registers saved on the kernel stack
+ * is defined here for use in copy_thread() and must agree with __switch_to().
+ */
+#define CALLEE_SAVED_FIRST_REG 30
+#define CALLEE_SAVED_REGS_COUNT 24 /* r30 to r52, plus an empty to align */
+
+#ifndef __ASSEMBLY__
+
+struct task_struct;
+
+/*
+ * Pause the DMA engine and static network before task switching.
+ */
+#define prepare_arch_switch(next) _prepare_arch_switch(next)
+void _prepare_arch_switch(struct task_struct *next);
+
+struct task_struct;
+#define switch_to(prev, next, last) ((last) = _switch_to((prev), (next)))
+extern struct task_struct *_switch_to(struct task_struct *prev,
+ struct task_struct *next);
+
+/* Helper function for _switch_to(). */
+extern struct task_struct *__switch_to(struct task_struct *prev,
+ struct task_struct *next,
+ unsigned long new_system_save_k_0);
+
+/* Address that switched-away from tasks are at. */
+extern unsigned long get_switch_to_pc(void);
+
+/*
+ * Kernel threads can check to see if they need to migrate their
+ * stack whenever they return from a context switch; for user
+ * threads, we defer until they are returning to user-space.
+ */
+#define finish_arch_switch(prev) do { \
+ if (unlikely((prev)->state == TASK_DEAD)) \
+ __insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_OS_EXIT | \
+ ((prev)->pid << _SIM_CONTROL_OPERATOR_BITS)); \
+ __insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_OS_SWITCH | \
+ (current->pid << _SIM_CONTROL_OPERATOR_BITS)); \
+ if (current->mm == NULL && !kstack_hash && \
+ current_thread_info()->homecache_cpu != smp_processor_id()) \
+ homecache_migrate_kthread(); \
+} while (0)
+
+/* Support function for forking a new task. */
+void ret_from_fork(void);
+
+/* Called from ret_from_fork() when a new process starts up. */
+struct task_struct *sim_notify_fork(struct task_struct *prev);
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* _ASM_TILE_SWITCH_TO_H */
diff --git a/arch/tile/include/asm/syscall.h b/arch/tile/include/asm/syscall.h
new file mode 100644
index 00000000..d35e0dcb
--- /dev/null
+++ b/arch/tile/include/asm/syscall.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright (C) 2008-2009 Red Hat, Inc. All rights reserved.
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ *
+ * See asm-generic/syscall.h for descriptions of what we must do here.
+ */
+
+#ifndef _ASM_TILE_SYSCALL_H
+#define _ASM_TILE_SYSCALL_H
+
+#include <linux/sched.h>
+#include <linux/err.h>
+#include <arch/abi.h>
+
+/*
+ * Only the low 32 bits of orig_r0 are meaningful, so we return int.
+ * This importantly ignores the high bits on 64-bit, so comparisons
+ * sign-extend the low 32 bits.
+ */
+static inline int syscall_get_nr(struct task_struct *t, struct pt_regs *regs)
+{
+ return regs->regs[TREG_SYSCALL_NR];
+}
+
+static inline void syscall_rollback(struct task_struct *task,
+ struct pt_regs *regs)
+{
+ regs->regs[0] = regs->orig_r0;
+}
+
+static inline long syscall_get_error(struct task_struct *task,
+ struct pt_regs *regs)
+{
+ unsigned long error = regs->regs[0];
+ return IS_ERR_VALUE(error) ? error : 0;
+}
+
+static inline long syscall_get_return_value(struct task_struct *task,
+ struct pt_regs *regs)
+{
+ return regs->regs[0];
+}
+
+static inline void syscall_set_return_value(struct task_struct *task,
+ struct pt_regs *regs,
+ int error, long val)
+{
+ regs->regs[0] = (long) error ?: val;
+}
+
+static inline void syscall_get_arguments(struct task_struct *task,
+ struct pt_regs *regs,
+ unsigned int i, unsigned int n,
+ unsigned long *args)
+{
+ BUG_ON(i + n > 6);
+ memcpy(args, &regs[i], n * sizeof(args[0]));
+}
+
+static inline void syscall_set_arguments(struct task_struct *task,
+ struct pt_regs *regs,
+ unsigned int i, unsigned int n,
+ const unsigned long *args)
+{
+ BUG_ON(i + n > 6);
+ memcpy(&regs[i], args, n * sizeof(args[0]));
+}
+
+#endif /* _ASM_TILE_SYSCALL_H */
diff --git a/arch/tile/include/asm/syscalls.h b/arch/tile/include/asm/syscalls.h
new file mode 100644
index 00000000..3b5507c3
--- /dev/null
+++ b/arch/tile/include/asm/syscalls.h
@@ -0,0 +1,76 @@
+/*
+ * syscalls.h - Linux syscall interfaces (arch-specific)
+ *
+ * Copyright (c) 2008 Jaswinder Singh Rajput
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _ASM_TILE_SYSCALLS_H
+#define _ASM_TILE_SYSCALLS_H
+
+#include <linux/compiler.h>
+#include <linux/linkage.h>
+#include <linux/signal.h>
+#include <linux/types.h>
+#include <linux/compat.h>
+
+/* The array of function pointers for syscalls. */
+extern void *sys_call_table[];
+#ifdef CONFIG_COMPAT
+extern void *compat_sys_call_table[];
+#endif
+
+/*
+ * Note that by convention, any syscall which requires the current
+ * register set takes an additional "struct pt_regs *" pointer; a
+ * _sys_xxx() trampoline in intvec*.S just sets up the pointer and
+ * jumps to sys_xxx().
+ */
+
+/* kernel/sys.c */
+ssize_t sys32_readahead(int fd, u32 offset_lo, u32 offset_hi, u32 count);
+long sys32_fadvise64(int fd, u32 offset_lo, u32 offset_hi,
+ u32 len, int advice);
+int sys32_fadvise64_64(int fd, u32 offset_lo, u32 offset_hi,
+ u32 len_lo, u32 len_hi, int advice);
+long sys_flush_cache(void);
+#ifndef __tilegx__ /* No mmap() in the 32-bit kernel. */
+#define sys_mmap sys_mmap
+#endif
+
+#ifndef __tilegx__
+/* mm/fault.c */
+long sys_cmpxchg_badaddr(unsigned long address, struct pt_regs *);
+long _sys_cmpxchg_badaddr(unsigned long address);
+#endif
+
+#ifdef CONFIG_COMPAT
+/* These four are not defined for 64-bit, but serve as "compat" syscalls. */
+long sys_fcntl64(unsigned int fd, unsigned int cmd, unsigned long arg);
+long sys_fstat64(unsigned long fd, struct stat64 __user *statbuf);
+long sys_truncate64(const char __user *path, loff_t length);
+long sys_ftruncate64(unsigned int fd, loff_t length);
+#endif
+
+/* These are the intvec*.S trampolines. */
+long _sys_sigaltstack(const stack_t __user *, stack_t __user *);
+long _sys_rt_sigreturn(void);
+long _sys_clone(unsigned long clone_flags, unsigned long newsp,
+ void __user *parent_tid, void __user *child_tid);
+long _sys_execve(const char __user *filename,
+ const char __user *const __user *argv,
+ const char __user *const __user *envp);
+
+#include <asm-generic/syscalls.h>
+
+#endif /* _ASM_TILE_SYSCALLS_H */
diff --git a/arch/tile/include/asm/thread_info.h b/arch/tile/include/asm/thread_info.h
new file mode 100644
index 00000000..7594764d
--- /dev/null
+++ b/arch/tile/include/asm/thread_info.h
@@ -0,0 +1,175 @@
+/*
+ * Copyright (C) 2002 David Howells (dhowells@redhat.com)
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _ASM_TILE_THREAD_INFO_H
+#define _ASM_TILE_THREAD_INFO_H
+
+#include <asm/processor.h>
+#include <asm/page.h>
+#ifndef __ASSEMBLY__
+
+/*
+ * Low level task data that assembly code needs immediate access to.
+ * The structure is placed at the bottom of the supervisor stack.
+ */
+struct thread_info {
+ struct task_struct *task; /* main task structure */
+ struct exec_domain *exec_domain; /* execution domain */
+ unsigned long flags; /* low level flags */
+ unsigned long status; /* thread-synchronous flags */
+ __u32 homecache_cpu; /* CPU we are homecached on */
+ __u32 cpu; /* current CPU */
+ int preempt_count; /* 0 => preemptable,
+ <0 => BUG */
+
+ mm_segment_t addr_limit; /* thread address space
+ (KERNEL_DS or USER_DS) */
+ struct restart_block restart_block;
+ struct single_step_state *step_state; /* single step state
+ (if non-zero) */
+};
+
+/*
+ * macros/functions for gaining access to the thread information structure.
+ */
+#define INIT_THREAD_INFO(tsk) \
+{ \
+ .task = &tsk, \
+ .exec_domain = &default_exec_domain, \
+ .flags = 0, \
+ .cpu = 0, \
+ .preempt_count = INIT_PREEMPT_COUNT, \
+ .addr_limit = KERNEL_DS, \
+ .restart_block = { \
+ .fn = do_no_restart_syscall, \
+ }, \
+ .step_state = NULL, \
+}
+
+#define init_thread_info (init_thread_union.thread_info)
+#define init_stack (init_thread_union.stack)
+
+#endif /* !__ASSEMBLY__ */
+
+#if PAGE_SIZE < 8192
+#define THREAD_SIZE_ORDER (13 - PAGE_SHIFT)
+#else
+#define THREAD_SIZE_ORDER (0)
+#endif
+#define THREAD_SIZE_PAGES (1 << THREAD_SIZE_ORDER)
+
+#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER)
+#define LOG2_THREAD_SIZE (PAGE_SHIFT + THREAD_SIZE_ORDER)
+
+#define STACK_WARN (THREAD_SIZE/8)
+
+#ifndef __ASSEMBLY__
+
+/* How to get the thread information struct from C. */
+register unsigned long stack_pointer __asm__("sp");
+
+#define current_thread_info() \
+ ((struct thread_info *)(stack_pointer & -THREAD_SIZE))
+
+#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
+extern struct thread_info *alloc_thread_info_node(struct task_struct *task, int node);
+extern void free_thread_info(struct thread_info *info);
+
+/* Sit on a nap instruction until interrupted. */
+extern void smp_nap(void);
+
+/* Enable interrupts racelessly and nap forever: helper for cpu_idle(). */
+extern void _cpu_idle(void);
+
+/* Switch boot idle thread to a freshly-allocated stack and free old stack. */
+extern void cpu_idle_on_new_stack(struct thread_info *old_ti,
+ unsigned long new_sp,
+ unsigned long new_ss10);
+
+#else /* __ASSEMBLY__ */
+
+/*
+ * How to get the thread information struct from assembly.
+ * Note that we use different macros since different architectures
+ * have different semantics in their "mm" instruction and we would
+ * like to guarantee that the macro expands to exactly one instruction.
+ */
+#ifdef __tilegx__
+#define EXTRACT_THREAD_INFO(reg) mm reg, zero, LOG2_THREAD_SIZE, 63
+#else
+#define GET_THREAD_INFO(reg) mm reg, sp, zero, LOG2_THREAD_SIZE, 31
+#endif
+
+#endif /* !__ASSEMBLY__ */
+
+#define PREEMPT_ACTIVE 0x10000000
+
+/*
+ * Thread information flags that various assembly files may need to access.
+ * Keep flags accessed frequently in low bits, particular since it makes
+ * it easier to build constants in assembly.
+ */
+#define TIF_SIGPENDING 0 /* signal pending */
+#define TIF_NEED_RESCHED 1 /* rescheduling necessary */
+#define TIF_SINGLESTEP 2 /* restore singlestep on return to
+ user mode */
+#define TIF_ASYNC_TLB 3 /* got an async TLB fault in kernel */
+#define TIF_SYSCALL_TRACE 4 /* syscall trace active */
+#define TIF_SYSCALL_AUDIT 5 /* syscall auditing active */
+#define TIF_SECCOMP 6 /* secure computing */
+#define TIF_MEMDIE 7 /* OOM killer at work */
+#define TIF_NOTIFY_RESUME 8 /* callback before returning to user */
+
+#define _TIF_SIGPENDING (1<<TIF_SIGPENDING)
+#define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED)
+#define _TIF_SINGLESTEP (1<<TIF_SINGLESTEP)
+#define _TIF_ASYNC_TLB (1<<TIF_ASYNC_TLB)
+#define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
+#define _TIF_SYSCALL_AUDIT (1<<TIF_SYSCALL_AUDIT)
+#define _TIF_SECCOMP (1<<TIF_SECCOMP)
+#define _TIF_MEMDIE (1<<TIF_MEMDIE)
+#define _TIF_NOTIFY_RESUME (1<<TIF_NOTIFY_RESUME)
+
+/* Work to do on any return to user space. */
+#define _TIF_ALLWORK_MASK \
+ (_TIF_SIGPENDING|_TIF_NEED_RESCHED|_TIF_SINGLESTEP|\
+ _TIF_ASYNC_TLB|_TIF_NOTIFY_RESUME)
+
+/*
+ * Thread-synchronous status.
+ *
+ * This is different from the flags in that nobody else
+ * ever touches our thread-synchronous status, so we don't
+ * have to worry about atomic accesses.
+ */
+#ifdef __tilegx__
+#define TS_COMPAT 0x0001 /* 32-bit compatibility mode */
+#endif
+#define TS_POLLING 0x0004 /* in idle loop but not sleeping */
+#define TS_RESTORE_SIGMASK 0x0008 /* restore signal mask in do_signal */
+
+#define tsk_is_polling(t) (task_thread_info(t)->status & TS_POLLING)
+
+#ifndef __ASSEMBLY__
+#define HAVE_SET_RESTORE_SIGMASK 1
+static inline void set_restore_sigmask(void)
+{
+ struct thread_info *ti = current_thread_info();
+ ti->status |= TS_RESTORE_SIGMASK;
+ set_bit(TIF_SIGPENDING, &ti->flags);
+}
+#endif /* !__ASSEMBLY__ */
+
+#endif /* _ASM_TILE_THREAD_INFO_H */
diff --git a/arch/tile/include/asm/tile-desc.h b/arch/tile/include/asm/tile-desc.h
new file mode 100644
index 00000000..43849bf7
--- /dev/null
+++ b/arch/tile/include/asm/tile-desc.h
@@ -0,0 +1,19 @@
+/*
+ * Copyright 2011 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef __tilegx__
+#include <asm/tile-desc_32.h>
+#else
+#include <asm/tile-desc_64.h>
+#endif
diff --git a/arch/tile/include/asm/tile-desc_32.h b/arch/tile/include/asm/tile-desc_32.h
new file mode 100644
index 00000000..f09c5c43
--- /dev/null
+++ b/arch/tile/include/asm/tile-desc_32.h
@@ -0,0 +1,553 @@
+/* TILEPro opcode information.
+ *
+ * Copyright 2011 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ *
+ *
+ *
+ *
+ *
+ */
+
+#ifndef opcode_tilepro_h
+#define opcode_tilepro_h
+
+#include <arch/opcode.h>
+
+
+enum
+{
+ TILEPRO_MAX_OPERANDS = 5 /* mm */
+};
+
+typedef enum
+{
+ TILEPRO_OPC_BPT,
+ TILEPRO_OPC_INFO,
+ TILEPRO_OPC_INFOL,
+ TILEPRO_OPC_J,
+ TILEPRO_OPC_JAL,
+ TILEPRO_OPC_MOVE,
+ TILEPRO_OPC_MOVE_SN,
+ TILEPRO_OPC_MOVEI,
+ TILEPRO_OPC_MOVEI_SN,
+ TILEPRO_OPC_MOVELI,
+ TILEPRO_OPC_MOVELI_SN,
+ TILEPRO_OPC_MOVELIS,
+ TILEPRO_OPC_PREFETCH,
+ TILEPRO_OPC_RAISE,
+ TILEPRO_OPC_ADD,
+ TILEPRO_OPC_ADD_SN,
+ TILEPRO_OPC_ADDB,
+ TILEPRO_OPC_ADDB_SN,
+ TILEPRO_OPC_ADDBS_U,
+ TILEPRO_OPC_ADDBS_U_SN,
+ TILEPRO_OPC_ADDH,
+ TILEPRO_OPC_ADDH_SN,
+ TILEPRO_OPC_ADDHS,
+ TILEPRO_OPC_ADDHS_SN,
+ TILEPRO_OPC_ADDI,
+ TILEPRO_OPC_ADDI_SN,
+ TILEPRO_OPC_ADDIB,
+ TILEPRO_OPC_ADDIB_SN,
+ TILEPRO_OPC_ADDIH,
+ TILEPRO_OPC_ADDIH_SN,
+ TILEPRO_OPC_ADDLI,
+ TILEPRO_OPC_ADDLI_SN,
+ TILEPRO_OPC_ADDLIS,
+ TILEPRO_OPC_ADDS,
+ TILEPRO_OPC_ADDS_SN,
+ TILEPRO_OPC_ADIFFB_U,
+ TILEPRO_OPC_ADIFFB_U_SN,
+ TILEPRO_OPC_ADIFFH,
+ TILEPRO_OPC_ADIFFH_SN,
+ TILEPRO_OPC_AND,
+ TILEPRO_OPC_AND_SN,
+ TILEPRO_OPC_ANDI,
+ TILEPRO_OPC_ANDI_SN,
+ TILEPRO_OPC_AULI,
+ TILEPRO_OPC_AVGB_U,
+ TILEPRO_OPC_AVGB_U_SN,
+ TILEPRO_OPC_AVGH,
+ TILEPRO_OPC_AVGH_SN,
+ TILEPRO_OPC_BBNS,
+ TILEPRO_OPC_BBNS_SN,
+ TILEPRO_OPC_BBNST,
+ TILEPRO_OPC_BBNST_SN,
+ TILEPRO_OPC_BBS,
+ TILEPRO_OPC_BBS_SN,
+ TILEPRO_OPC_BBST,
+ TILEPRO_OPC_BBST_SN,
+ TILEPRO_OPC_BGEZ,
+ TILEPRO_OPC_BGEZ_SN,
+ TILEPRO_OPC_BGEZT,
+ TILEPRO_OPC_BGEZT_SN,
+ TILEPRO_OPC_BGZ,
+ TILEPRO_OPC_BGZ_SN,
+ TILEPRO_OPC_BGZT,
+ TILEPRO_OPC_BGZT_SN,
+ TILEPRO_OPC_BITX,
+ TILEPRO_OPC_BITX_SN,
+ TILEPRO_OPC_BLEZ,
+ TILEPRO_OPC_BLEZ_SN,
+ TILEPRO_OPC_BLEZT,
+ TILEPRO_OPC_BLEZT_SN,
+ TILEPRO_OPC_BLZ,
+ TILEPRO_OPC_BLZ_SN,
+ TILEPRO_OPC_BLZT,
+ TILEPRO_OPC_BLZT_SN,
+ TILEPRO_OPC_BNZ,
+ TILEPRO_OPC_BNZ_SN,
+ TILEPRO_OPC_BNZT,
+ TILEPRO_OPC_BNZT_SN,
+ TILEPRO_OPC_BYTEX,
+ TILEPRO_OPC_BYTEX_SN,
+ TILEPRO_OPC_BZ,
+ TILEPRO_OPC_BZ_SN,
+ TILEPRO_OPC_BZT,
+ TILEPRO_OPC_BZT_SN,
+ TILEPRO_OPC_CLZ,
+ TILEPRO_OPC_CLZ_SN,
+ TILEPRO_OPC_CRC32_32,
+ TILEPRO_OPC_CRC32_32_SN,
+ TILEPRO_OPC_CRC32_8,
+ TILEPRO_OPC_CRC32_8_SN,
+ TILEPRO_OPC_CTZ,
+ TILEPRO_OPC_CTZ_SN,
+ TILEPRO_OPC_DRAIN,
+ TILEPRO_OPC_DTLBPR,
+ TILEPRO_OPC_DWORD_ALIGN,
+ TILEPRO_OPC_DWORD_ALIGN_SN,
+ TILEPRO_OPC_FINV,
+ TILEPRO_OPC_FLUSH,
+ TILEPRO_OPC_FNOP,
+ TILEPRO_OPC_ICOH,
+ TILEPRO_OPC_ILL,
+ TILEPRO_OPC_INTHB,
+ TILEPRO_OPC_INTHB_SN,
+ TILEPRO_OPC_INTHH,
+ TILEPRO_OPC_INTHH_SN,
+ TILEPRO_OPC_INTLB,
+ TILEPRO_OPC_INTLB_SN,
+ TILEPRO_OPC_INTLH,
+ TILEPRO_OPC_INTLH_SN,
+ TILEPRO_OPC_INV,
+ TILEPRO_OPC_IRET,
+ TILEPRO_OPC_JALB,
+ TILEPRO_OPC_JALF,
+ TILEPRO_OPC_JALR,
+ TILEPRO_OPC_JALRP,
+ TILEPRO_OPC_JB,
+ TILEPRO_OPC_JF,
+ TILEPRO_OPC_JR,
+ TILEPRO_OPC_JRP,
+ TILEPRO_OPC_LB,
+ TILEPRO_OPC_LB_SN,
+ TILEPRO_OPC_LB_U,
+ TILEPRO_OPC_LB_U_SN,
+ TILEPRO_OPC_LBADD,
+ TILEPRO_OPC_LBADD_SN,
+ TILEPRO_OPC_LBADD_U,
+ TILEPRO_OPC_LBADD_U_SN,
+ TILEPRO_OPC_LH,
+ TILEPRO_OPC_LH_SN,
+ TILEPRO_OPC_LH_U,
+ TILEPRO_OPC_LH_U_SN,
+ TILEPRO_OPC_LHADD,
+ TILEPRO_OPC_LHADD_SN,
+ TILEPRO_OPC_LHADD_U,
+ TILEPRO_OPC_LHADD_U_SN,
+ TILEPRO_OPC_LNK,
+ TILEPRO_OPC_LNK_SN,
+ TILEPRO_OPC_LW,
+ TILEPRO_OPC_LW_SN,
+ TILEPRO_OPC_LW_NA,
+ TILEPRO_OPC_LW_NA_SN,
+ TILEPRO_OPC_LWADD,
+ TILEPRO_OPC_LWADD_SN,
+ TILEPRO_OPC_LWADD_NA,
+ TILEPRO_OPC_LWADD_NA_SN,
+ TILEPRO_OPC_MAXB_U,
+ TILEPRO_OPC_MAXB_U_SN,
+ TILEPRO_OPC_MAXH,
+ TILEPRO_OPC_MAXH_SN,
+ TILEPRO_OPC_MAXIB_U,
+ TILEPRO_OPC_MAXIB_U_SN,
+ TILEPRO_OPC_MAXIH,
+ TILEPRO_OPC_MAXIH_SN,
+ TILEPRO_OPC_MF,
+ TILEPRO_OPC_MFSPR,
+ TILEPRO_OPC_MINB_U,
+ TILEPRO_OPC_MINB_U_SN,
+ TILEPRO_OPC_MINH,
+ TILEPRO_OPC_MINH_SN,
+ TILEPRO_OPC_MINIB_U,
+ TILEPRO_OPC_MINIB_U_SN,
+ TILEPRO_OPC_MINIH,
+ TILEPRO_OPC_MINIH_SN,
+ TILEPRO_OPC_MM,
+ TILEPRO_OPC_MNZ,
+ TILEPRO_OPC_MNZ_SN,
+ TILEPRO_OPC_MNZB,
+ TILEPRO_OPC_MNZB_SN,
+ TILEPRO_OPC_MNZH,
+ TILEPRO_OPC_MNZH_SN,
+ TILEPRO_OPC_MTSPR,
+ TILEPRO_OPC_MULHH_SS,
+ TILEPRO_OPC_MULHH_SS_SN,
+ TILEPRO_OPC_MULHH_SU,
+ TILEPRO_OPC_MULHH_SU_SN,
+ TILEPRO_OPC_MULHH_UU,
+ TILEPRO_OPC_MULHH_UU_SN,
+ TILEPRO_OPC_MULHHA_SS,
+ TILEPRO_OPC_MULHHA_SS_SN,
+ TILEPRO_OPC_MULHHA_SU,
+ TILEPRO_OPC_MULHHA_SU_SN,
+ TILEPRO_OPC_MULHHA_UU,
+ TILEPRO_OPC_MULHHA_UU_SN,
+ TILEPRO_OPC_MULHHSA_UU,
+ TILEPRO_OPC_MULHHSA_UU_SN,
+ TILEPRO_OPC_MULHL_SS,
+ TILEPRO_OPC_MULHL_SS_SN,
+ TILEPRO_OPC_MULHL_SU,
+ TILEPRO_OPC_MULHL_SU_SN,
+ TILEPRO_OPC_MULHL_US,
+ TILEPRO_OPC_MULHL_US_SN,
+ TILEPRO_OPC_MULHL_UU,
+ TILEPRO_OPC_MULHL_UU_SN,
+ TILEPRO_OPC_MULHLA_SS,
+ TILEPRO_OPC_MULHLA_SS_SN,
+ TILEPRO_OPC_MULHLA_SU,
+ TILEPRO_OPC_MULHLA_SU_SN,
+ TILEPRO_OPC_MULHLA_US,
+ TILEPRO_OPC_MULHLA_US_SN,
+ TILEPRO_OPC_MULHLA_UU,
+ TILEPRO_OPC_MULHLA_UU_SN,
+ TILEPRO_OPC_MULHLSA_UU,
+ TILEPRO_OPC_MULHLSA_UU_SN,
+ TILEPRO_OPC_MULLL_SS,
+ TILEPRO_OPC_MULLL_SS_SN,
+ TILEPRO_OPC_MULLL_SU,
+ TILEPRO_OPC_MULLL_SU_SN,
+ TILEPRO_OPC_MULLL_UU,
+ TILEPRO_OPC_MULLL_UU_SN,
+ TILEPRO_OPC_MULLLA_SS,
+ TILEPRO_OPC_MULLLA_SS_SN,
+ TILEPRO_OPC_MULLLA_SU,
+ TILEPRO_OPC_MULLLA_SU_SN,
+ TILEPRO_OPC_MULLLA_UU,
+ TILEPRO_OPC_MULLLA_UU_SN,
+ TILEPRO_OPC_MULLLSA_UU,
+ TILEPRO_OPC_MULLLSA_UU_SN,
+ TILEPRO_OPC_MVNZ,
+ TILEPRO_OPC_MVNZ_SN,
+ TILEPRO_OPC_MVZ,
+ TILEPRO_OPC_MVZ_SN,
+ TILEPRO_OPC_MZ,
+ TILEPRO_OPC_MZ_SN,
+ TILEPRO_OPC_MZB,
+ TILEPRO_OPC_MZB_SN,
+ TILEPRO_OPC_MZH,
+ TILEPRO_OPC_MZH_SN,
+ TILEPRO_OPC_NAP,
+ TILEPRO_OPC_NOP,
+ TILEPRO_OPC_NOR,
+ TILEPRO_OPC_NOR_SN,
+ TILEPRO_OPC_OR,
+ TILEPRO_OPC_OR_SN,
+ TILEPRO_OPC_ORI,
+ TILEPRO_OPC_ORI_SN,
+ TILEPRO_OPC_PACKBS_U,
+ TILEPRO_OPC_PACKBS_U_SN,
+ TILEPRO_OPC_PACKHB,
+ TILEPRO_OPC_PACKHB_SN,
+ TILEPRO_OPC_PACKHS,
+ TILEPRO_OPC_PACKHS_SN,
+ TILEPRO_OPC_PACKLB,
+ TILEPRO_OPC_PACKLB_SN,
+ TILEPRO_OPC_PCNT,
+ TILEPRO_OPC_PCNT_SN,
+ TILEPRO_OPC_RL,
+ TILEPRO_OPC_RL_SN,
+ TILEPRO_OPC_RLI,
+ TILEPRO_OPC_RLI_SN,
+ TILEPRO_OPC_S1A,
+ TILEPRO_OPC_S1A_SN,
+ TILEPRO_OPC_S2A,
+ TILEPRO_OPC_S2A_SN,
+ TILEPRO_OPC_S3A,
+ TILEPRO_OPC_S3A_SN,
+ TILEPRO_OPC_SADAB_U,
+ TILEPRO_OPC_SADAB_U_SN,
+ TILEPRO_OPC_SADAH,
+ TILEPRO_OPC_SADAH_SN,
+ TILEPRO_OPC_SADAH_U,
+ TILEPRO_OPC_SADAH_U_SN,
+ TILEPRO_OPC_SADB_U,
+ TILEPRO_OPC_SADB_U_SN,
+ TILEPRO_OPC_SADH,
+ TILEPRO_OPC_SADH_SN,
+ TILEPRO_OPC_SADH_U,
+ TILEPRO_OPC_SADH_U_SN,
+ TILEPRO_OPC_SB,
+ TILEPRO_OPC_SBADD,
+ TILEPRO_OPC_SEQ,
+ TILEPRO_OPC_SEQ_SN,
+ TILEPRO_OPC_SEQB,
+ TILEPRO_OPC_SEQB_SN,
+ TILEPRO_OPC_SEQH,
+ TILEPRO_OPC_SEQH_SN,
+ TILEPRO_OPC_SEQI,
+ TILEPRO_OPC_SEQI_SN,
+ TILEPRO_OPC_SEQIB,
+ TILEPRO_OPC_SEQIB_SN,
+ TILEPRO_OPC_SEQIH,
+ TILEPRO_OPC_SEQIH_SN,
+ TILEPRO_OPC_SH,
+ TILEPRO_OPC_SHADD,
+ TILEPRO_OPC_SHL,
+ TILEPRO_OPC_SHL_SN,
+ TILEPRO_OPC_SHLB,
+ TILEPRO_OPC_SHLB_SN,
+ TILEPRO_OPC_SHLH,
+ TILEPRO_OPC_SHLH_SN,
+ TILEPRO_OPC_SHLI,
+ TILEPRO_OPC_SHLI_SN,
+ TILEPRO_OPC_SHLIB,
+ TILEPRO_OPC_SHLIB_SN,
+ TILEPRO_OPC_SHLIH,
+ TILEPRO_OPC_SHLIH_SN,
+ TILEPRO_OPC_SHR,
+ TILEPRO_OPC_SHR_SN,
+ TILEPRO_OPC_SHRB,
+ TILEPRO_OPC_SHRB_SN,
+ TILEPRO_OPC_SHRH,
+ TILEPRO_OPC_SHRH_SN,
+ TILEPRO_OPC_SHRI,
+ TILEPRO_OPC_SHRI_SN,
+ TILEPRO_OPC_SHRIB,
+ TILEPRO_OPC_SHRIB_SN,
+ TILEPRO_OPC_SHRIH,
+ TILEPRO_OPC_SHRIH_SN,
+ TILEPRO_OPC_SLT,
+ TILEPRO_OPC_SLT_SN,
+ TILEPRO_OPC_SLT_U,
+ TILEPRO_OPC_SLT_U_SN,
+ TILEPRO_OPC_SLTB,
+ TILEPRO_OPC_SLTB_SN,
+ TILEPRO_OPC_SLTB_U,
+ TILEPRO_OPC_SLTB_U_SN,
+ TILEPRO_OPC_SLTE,
+ TILEPRO_OPC_SLTE_SN,
+ TILEPRO_OPC_SLTE_U,
+ TILEPRO_OPC_SLTE_U_SN,
+ TILEPRO_OPC_SLTEB,
+ TILEPRO_OPC_SLTEB_SN,
+ TILEPRO_OPC_SLTEB_U,
+ TILEPRO_OPC_SLTEB_U_SN,
+ TILEPRO_OPC_SLTEH,
+ TILEPRO_OPC_SLTEH_SN,
+ TILEPRO_OPC_SLTEH_U,
+ TILEPRO_OPC_SLTEH_U_SN,
+ TILEPRO_OPC_SLTH,
+ TILEPRO_OPC_SLTH_SN,
+ TILEPRO_OPC_SLTH_U,
+ TILEPRO_OPC_SLTH_U_SN,
+ TILEPRO_OPC_SLTI,
+ TILEPRO_OPC_SLTI_SN,
+ TILEPRO_OPC_SLTI_U,
+ TILEPRO_OPC_SLTI_U_SN,
+ TILEPRO_OPC_SLTIB,
+ TILEPRO_OPC_SLTIB_SN,
+ TILEPRO_OPC_SLTIB_U,
+ TILEPRO_OPC_SLTIB_U_SN,
+ TILEPRO_OPC_SLTIH,
+ TILEPRO_OPC_SLTIH_SN,
+ TILEPRO_OPC_SLTIH_U,
+ TILEPRO_OPC_SLTIH_U_SN,
+ TILEPRO_OPC_SNE,
+ TILEPRO_OPC_SNE_SN,
+ TILEPRO_OPC_SNEB,
+ TILEPRO_OPC_SNEB_SN,
+ TILEPRO_OPC_SNEH,
+ TILEPRO_OPC_SNEH_SN,
+ TILEPRO_OPC_SRA,
+ TILEPRO_OPC_SRA_SN,
+ TILEPRO_OPC_SRAB,
+ TILEPRO_OPC_SRAB_SN,
+ TILEPRO_OPC_SRAH,
+ TILEPRO_OPC_SRAH_SN,
+ TILEPRO_OPC_SRAI,
+ TILEPRO_OPC_SRAI_SN,
+ TILEPRO_OPC_SRAIB,
+ TILEPRO_OPC_SRAIB_SN,
+ TILEPRO_OPC_SRAIH,
+ TILEPRO_OPC_SRAIH_SN,
+ TILEPRO_OPC_SUB,
+ TILEPRO_OPC_SUB_SN,
+ TILEPRO_OPC_SUBB,
+ TILEPRO_OPC_SUBB_SN,
+ TILEPRO_OPC_SUBBS_U,
+ TILEPRO_OPC_SUBBS_U_SN,
+ TILEPRO_OPC_SUBH,
+ TILEPRO_OPC_SUBH_SN,
+ TILEPRO_OPC_SUBHS,
+ TILEPRO_OPC_SUBHS_SN,
+ TILEPRO_OPC_SUBS,
+ TILEPRO_OPC_SUBS_SN,
+ TILEPRO_OPC_SW,
+ TILEPRO_OPC_SWADD,
+ TILEPRO_OPC_SWINT0,
+ TILEPRO_OPC_SWINT1,
+ TILEPRO_OPC_SWINT2,
+ TILEPRO_OPC_SWINT3,
+ TILEPRO_OPC_TBLIDXB0,
+ TILEPRO_OPC_TBLIDXB0_SN,
+ TILEPRO_OPC_TBLIDXB1,
+ TILEPRO_OPC_TBLIDXB1_SN,
+ TILEPRO_OPC_TBLIDXB2,
+ TILEPRO_OPC_TBLIDXB2_SN,
+ TILEPRO_OPC_TBLIDXB3,
+ TILEPRO_OPC_TBLIDXB3_SN,
+ TILEPRO_OPC_TNS,
+ TILEPRO_OPC_TNS_SN,
+ TILEPRO_OPC_WH64,
+ TILEPRO_OPC_XOR,
+ TILEPRO_OPC_XOR_SN,
+ TILEPRO_OPC_XORI,
+ TILEPRO_OPC_XORI_SN,
+ TILEPRO_OPC_NONE
+} tilepro_mnemonic;
+
+
+
+
+typedef enum
+{
+ TILEPRO_PIPELINE_X0,
+ TILEPRO_PIPELINE_X1,
+ TILEPRO_PIPELINE_Y0,
+ TILEPRO_PIPELINE_Y1,
+ TILEPRO_PIPELINE_Y2,
+} tilepro_pipeline;
+
+#define tilepro_is_x_pipeline(p) ((int)(p) <= (int)TILEPRO_PIPELINE_X1)
+
+typedef enum
+{
+ TILEPRO_OP_TYPE_REGISTER,
+ TILEPRO_OP_TYPE_IMMEDIATE,
+ TILEPRO_OP_TYPE_ADDRESS,
+ TILEPRO_OP_TYPE_SPR
+} tilepro_operand_type;
+
+struct tilepro_operand
+{
+ /* Is this operand a register, immediate or address? */
+ tilepro_operand_type type;
+
+ /* The default relocation type for this operand. */
+ signed int default_reloc : 16;
+
+ /* How many bits is this value? (used for range checking) */
+ unsigned int num_bits : 5;
+
+ /* Is the value signed? (used for range checking) */
+ unsigned int is_signed : 1;
+
+ /* Is this operand a source register? */
+ unsigned int is_src_reg : 1;
+
+ /* Is this operand written? (i.e. is it a destination register) */
+ unsigned int is_dest_reg : 1;
+
+ /* Is this operand PC-relative? */
+ unsigned int is_pc_relative : 1;
+
+ /* By how many bits do we right shift the value before inserting? */
+ unsigned int rightshift : 2;
+
+ /* Return the bits for this operand to be ORed into an existing bundle. */
+ tilepro_bundle_bits (*insert) (int op);
+
+ /* Extract this operand and return it. */
+ unsigned int (*extract) (tilepro_bundle_bits bundle);
+};
+
+
+extern const struct tilepro_operand tilepro_operands[];
+
+/* One finite-state machine per pipe for rapid instruction decoding. */
+extern const unsigned short * const
+tilepro_bundle_decoder_fsms[TILEPRO_NUM_PIPELINE_ENCODINGS];
+
+
+struct tilepro_opcode
+{
+ /* The opcode mnemonic, e.g. "add" */
+ const char *name;
+
+ /* The enum value for this mnemonic. */
+ tilepro_mnemonic mnemonic;
+
+ /* A bit mask of which of the five pipes this instruction
+ is compatible with:
+ X0 0x01
+ X1 0x02
+ Y0 0x04
+ Y1 0x08
+ Y2 0x10 */
+ unsigned char pipes;
+
+ /* How many operands are there? */
+ unsigned char num_operands;
+
+ /* Which register does this write implicitly, or TREG_ZERO if none? */
+ unsigned char implicitly_written_register;
+
+ /* Can this be bundled with other instructions (almost always true). */
+ unsigned char can_bundle;
+
+ /* The description of the operands. Each of these is an
+ * index into the tilepro_operands[] table. */
+ unsigned char operands[TILEPRO_NUM_PIPELINE_ENCODINGS][TILEPRO_MAX_OPERANDS];
+
+};
+
+extern const struct tilepro_opcode tilepro_opcodes[];
+
+
+/* Used for non-textual disassembly into structs. */
+struct tilepro_decoded_instruction
+{
+ const struct tilepro_opcode *opcode;
+ const struct tilepro_operand *operands[TILEPRO_MAX_OPERANDS];
+ int operand_values[TILEPRO_MAX_OPERANDS];
+};
+
+
+/* Disassemble a bundle into a struct for machine processing. */
+extern int parse_insn_tilepro(tilepro_bundle_bits bits,
+ unsigned int pc,
+ struct tilepro_decoded_instruction
+ decoded[TILEPRO_MAX_INSTRUCTIONS_PER_BUNDLE]);
+
+
+/* Given a set of bundle bits and a specific pipe, returns which
+ * instruction the bundle contains in that pipe.
+ */
+extern const struct tilepro_opcode *
+find_opcode(tilepro_bundle_bits bits, tilepro_pipeline pipe);
+
+
+
+#endif /* opcode_tilepro_h */
diff --git a/arch/tile/include/asm/tile-desc_64.h b/arch/tile/include/asm/tile-desc_64.h
new file mode 100644
index 00000000..1819efcb
--- /dev/null
+++ b/arch/tile/include/asm/tile-desc_64.h
@@ -0,0 +1,483 @@
+/* TILE-Gx opcode information.
+ *
+ * Copyright 2011 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ *
+ *
+ *
+ *
+ *
+ */
+
+#ifndef opcode_tile_h
+#define opcode_tile_h
+
+#include <arch/opcode.h>
+
+
+enum
+{
+ TILEGX_MAX_OPERANDS = 4 /* bfexts */
+};
+
+typedef enum
+{
+ TILEGX_OPC_BPT,
+ TILEGX_OPC_INFO,
+ TILEGX_OPC_INFOL,
+ TILEGX_OPC_MOVE,
+ TILEGX_OPC_MOVEI,
+ TILEGX_OPC_MOVELI,
+ TILEGX_OPC_PREFETCH,
+ TILEGX_OPC_PREFETCH_ADD_L1,
+ TILEGX_OPC_PREFETCH_ADD_L1_FAULT,
+ TILEGX_OPC_PREFETCH_ADD_L2,
+ TILEGX_OPC_PREFETCH_ADD_L2_FAULT,
+ TILEGX_OPC_PREFETCH_ADD_L3,
+ TILEGX_OPC_PREFETCH_ADD_L3_FAULT,
+ TILEGX_OPC_PREFETCH_L1,
+ TILEGX_OPC_PREFETCH_L1_FAULT,
+ TILEGX_OPC_PREFETCH_L2,
+ TILEGX_OPC_PREFETCH_L2_FAULT,
+ TILEGX_OPC_PREFETCH_L3,
+ TILEGX_OPC_PREFETCH_L3_FAULT,
+ TILEGX_OPC_RAISE,
+ TILEGX_OPC_ADD,
+ TILEGX_OPC_ADDI,
+ TILEGX_OPC_ADDLI,
+ TILEGX_OPC_ADDX,
+ TILEGX_OPC_ADDXI,
+ TILEGX_OPC_ADDXLI,
+ TILEGX_OPC_ADDXSC,
+ TILEGX_OPC_AND,
+ TILEGX_OPC_ANDI,
+ TILEGX_OPC_BEQZ,
+ TILEGX_OPC_BEQZT,
+ TILEGX_OPC_BFEXTS,
+ TILEGX_OPC_BFEXTU,
+ TILEGX_OPC_BFINS,
+ TILEGX_OPC_BGEZ,
+ TILEGX_OPC_BGEZT,
+ TILEGX_OPC_BGTZ,
+ TILEGX_OPC_BGTZT,
+ TILEGX_OPC_BLBC,
+ TILEGX_OPC_BLBCT,
+ TILEGX_OPC_BLBS,
+ TILEGX_OPC_BLBST,
+ TILEGX_OPC_BLEZ,
+ TILEGX_OPC_BLEZT,
+ TILEGX_OPC_BLTZ,
+ TILEGX_OPC_BLTZT,
+ TILEGX_OPC_BNEZ,
+ TILEGX_OPC_BNEZT,
+ TILEGX_OPC_CLZ,
+ TILEGX_OPC_CMOVEQZ,
+ TILEGX_OPC_CMOVNEZ,
+ TILEGX_OPC_CMPEQ,
+ TILEGX_OPC_CMPEQI,
+ TILEGX_OPC_CMPEXCH,
+ TILEGX_OPC_CMPEXCH4,
+ TILEGX_OPC_CMPLES,
+ TILEGX_OPC_CMPLEU,
+ TILEGX_OPC_CMPLTS,
+ TILEGX_OPC_CMPLTSI,
+ TILEGX_OPC_CMPLTU,
+ TILEGX_OPC_CMPLTUI,
+ TILEGX_OPC_CMPNE,
+ TILEGX_OPC_CMUL,
+ TILEGX_OPC_CMULA,
+ TILEGX_OPC_CMULAF,
+ TILEGX_OPC_CMULF,
+ TILEGX_OPC_CMULFR,
+ TILEGX_OPC_CMULH,
+ TILEGX_OPC_CMULHR,
+ TILEGX_OPC_CRC32_32,
+ TILEGX_OPC_CRC32_8,
+ TILEGX_OPC_CTZ,
+ TILEGX_OPC_DBLALIGN,
+ TILEGX_OPC_DBLALIGN2,
+ TILEGX_OPC_DBLALIGN4,
+ TILEGX_OPC_DBLALIGN6,
+ TILEGX_OPC_DRAIN,
+ TILEGX_OPC_DTLBPR,
+ TILEGX_OPC_EXCH,
+ TILEGX_OPC_EXCH4,
+ TILEGX_OPC_FDOUBLE_ADD_FLAGS,
+ TILEGX_OPC_FDOUBLE_ADDSUB,
+ TILEGX_OPC_FDOUBLE_MUL_FLAGS,
+ TILEGX_OPC_FDOUBLE_PACK1,
+ TILEGX_OPC_FDOUBLE_PACK2,
+ TILEGX_OPC_FDOUBLE_SUB_FLAGS,
+ TILEGX_OPC_FDOUBLE_UNPACK_MAX,
+ TILEGX_OPC_FDOUBLE_UNPACK_MIN,
+ TILEGX_OPC_FETCHADD,
+ TILEGX_OPC_FETCHADD4,
+ TILEGX_OPC_FETCHADDGEZ,
+ TILEGX_OPC_FETCHADDGEZ4,
+ TILEGX_OPC_FETCHAND,
+ TILEGX_OPC_FETCHAND4,
+ TILEGX_OPC_FETCHOR,
+ TILEGX_OPC_FETCHOR4,
+ TILEGX_OPC_FINV,
+ TILEGX_OPC_FLUSH,
+ TILEGX_OPC_FLUSHWB,
+ TILEGX_OPC_FNOP,
+ TILEGX_OPC_FSINGLE_ADD1,
+ TILEGX_OPC_FSINGLE_ADDSUB2,
+ TILEGX_OPC_FSINGLE_MUL1,
+ TILEGX_OPC_FSINGLE_MUL2,
+ TILEGX_OPC_FSINGLE_PACK1,
+ TILEGX_OPC_FSINGLE_PACK2,
+ TILEGX_OPC_FSINGLE_SUB1,
+ TILEGX_OPC_ICOH,
+ TILEGX_OPC_ILL,
+ TILEGX_OPC_INV,
+ TILEGX_OPC_IRET,
+ TILEGX_OPC_J,
+ TILEGX_OPC_JAL,
+ TILEGX_OPC_JALR,
+ TILEGX_OPC_JALRP,
+ TILEGX_OPC_JR,
+ TILEGX_OPC_JRP,
+ TILEGX_OPC_LD,
+ TILEGX_OPC_LD1S,
+ TILEGX_OPC_LD1S_ADD,
+ TILEGX_OPC_LD1U,
+ TILEGX_OPC_LD1U_ADD,
+ TILEGX_OPC_LD2S,
+ TILEGX_OPC_LD2S_ADD,
+ TILEGX_OPC_LD2U,
+ TILEGX_OPC_LD2U_ADD,
+ TILEGX_OPC_LD4S,
+ TILEGX_OPC_LD4S_ADD,
+ TILEGX_OPC_LD4U,
+ TILEGX_OPC_LD4U_ADD,
+ TILEGX_OPC_LD_ADD,
+ TILEGX_OPC_LDNA,
+ TILEGX_OPC_LDNA_ADD,
+ TILEGX_OPC_LDNT,
+ TILEGX_OPC_LDNT1S,
+ TILEGX_OPC_LDNT1S_ADD,
+ TILEGX_OPC_LDNT1U,
+ TILEGX_OPC_LDNT1U_ADD,
+ TILEGX_OPC_LDNT2S,
+ TILEGX_OPC_LDNT2S_ADD,
+ TILEGX_OPC_LDNT2U,
+ TILEGX_OPC_LDNT2U_ADD,
+ TILEGX_OPC_LDNT4S,
+ TILEGX_OPC_LDNT4S_ADD,
+ TILEGX_OPC_LDNT4U,
+ TILEGX_OPC_LDNT4U_ADD,
+ TILEGX_OPC_LDNT_ADD,
+ TILEGX_OPC_LNK,
+ TILEGX_OPC_MF,
+ TILEGX_OPC_MFSPR,
+ TILEGX_OPC_MM,
+ TILEGX_OPC_MNZ,
+ TILEGX_OPC_MTSPR,
+ TILEGX_OPC_MUL_HS_HS,
+ TILEGX_OPC_MUL_HS_HU,
+ TILEGX_OPC_MUL_HS_LS,
+ TILEGX_OPC_MUL_HS_LU,
+ TILEGX_OPC_MUL_HU_HU,
+ TILEGX_OPC_MUL_HU_LS,
+ TILEGX_OPC_MUL_HU_LU,
+ TILEGX_OPC_MUL_LS_LS,
+ TILEGX_OPC_MUL_LS_LU,
+ TILEGX_OPC_MUL_LU_LU,
+ TILEGX_OPC_MULA_HS_HS,
+ TILEGX_OPC_MULA_HS_HU,
+ TILEGX_OPC_MULA_HS_LS,
+ TILEGX_OPC_MULA_HS_LU,
+ TILEGX_OPC_MULA_HU_HU,
+ TILEGX_OPC_MULA_HU_LS,
+ TILEGX_OPC_MULA_HU_LU,
+ TILEGX_OPC_MULA_LS_LS,
+ TILEGX_OPC_MULA_LS_LU,
+ TILEGX_OPC_MULA_LU_LU,
+ TILEGX_OPC_MULAX,
+ TILEGX_OPC_MULX,
+ TILEGX_OPC_MZ,
+ TILEGX_OPC_NAP,
+ TILEGX_OPC_NOP,
+ TILEGX_OPC_NOR,
+ TILEGX_OPC_OR,
+ TILEGX_OPC_ORI,
+ TILEGX_OPC_PCNT,
+ TILEGX_OPC_REVBITS,
+ TILEGX_OPC_REVBYTES,
+ TILEGX_OPC_ROTL,
+ TILEGX_OPC_ROTLI,
+ TILEGX_OPC_SHL,
+ TILEGX_OPC_SHL16INSLI,
+ TILEGX_OPC_SHL1ADD,
+ TILEGX_OPC_SHL1ADDX,
+ TILEGX_OPC_SHL2ADD,
+ TILEGX_OPC_SHL2ADDX,
+ TILEGX_OPC_SHL3ADD,
+ TILEGX_OPC_SHL3ADDX,
+ TILEGX_OPC_SHLI,
+ TILEGX_OPC_SHLX,
+ TILEGX_OPC_SHLXI,
+ TILEGX_OPC_SHRS,
+ TILEGX_OPC_SHRSI,
+ TILEGX_OPC_SHRU,
+ TILEGX_OPC_SHRUI,
+ TILEGX_OPC_SHRUX,
+ TILEGX_OPC_SHRUXI,
+ TILEGX_OPC_SHUFFLEBYTES,
+ TILEGX_OPC_ST,
+ TILEGX_OPC_ST1,
+ TILEGX_OPC_ST1_ADD,
+ TILEGX_OPC_ST2,
+ TILEGX_OPC_ST2_ADD,
+ TILEGX_OPC_ST4,
+ TILEGX_OPC_ST4_ADD,
+ TILEGX_OPC_ST_ADD,
+ TILEGX_OPC_STNT,
+ TILEGX_OPC_STNT1,
+ TILEGX_OPC_STNT1_ADD,
+ TILEGX_OPC_STNT2,
+ TILEGX_OPC_STNT2_ADD,
+ TILEGX_OPC_STNT4,
+ TILEGX_OPC_STNT4_ADD,
+ TILEGX_OPC_STNT_ADD,
+ TILEGX_OPC_SUB,
+ TILEGX_OPC_SUBX,
+ TILEGX_OPC_SUBXSC,
+ TILEGX_OPC_SWINT0,
+ TILEGX_OPC_SWINT1,
+ TILEGX_OPC_SWINT2,
+ TILEGX_OPC_SWINT3,
+ TILEGX_OPC_TBLIDXB0,
+ TILEGX_OPC_TBLIDXB1,
+ TILEGX_OPC_TBLIDXB2,
+ TILEGX_OPC_TBLIDXB3,
+ TILEGX_OPC_V1ADD,
+ TILEGX_OPC_V1ADDI,
+ TILEGX_OPC_V1ADDUC,
+ TILEGX_OPC_V1ADIFFU,
+ TILEGX_OPC_V1AVGU,
+ TILEGX_OPC_V1CMPEQ,
+ TILEGX_OPC_V1CMPEQI,
+ TILEGX_OPC_V1CMPLES,
+ TILEGX_OPC_V1CMPLEU,
+ TILEGX_OPC_V1CMPLTS,
+ TILEGX_OPC_V1CMPLTSI,
+ TILEGX_OPC_V1CMPLTU,
+ TILEGX_OPC_V1CMPLTUI,
+ TILEGX_OPC_V1CMPNE,
+ TILEGX_OPC_V1DDOTPU,
+ TILEGX_OPC_V1DDOTPUA,
+ TILEGX_OPC_V1DDOTPUS,
+ TILEGX_OPC_V1DDOTPUSA,
+ TILEGX_OPC_V1DOTP,
+ TILEGX_OPC_V1DOTPA,
+ TILEGX_OPC_V1DOTPU,
+ TILEGX_OPC_V1DOTPUA,
+ TILEGX_OPC_V1DOTPUS,
+ TILEGX_OPC_V1DOTPUSA,
+ TILEGX_OPC_V1INT_H,
+ TILEGX_OPC_V1INT_L,
+ TILEGX_OPC_V1MAXU,
+ TILEGX_OPC_V1MAXUI,
+ TILEGX_OPC_V1MINU,
+ TILEGX_OPC_V1MINUI,
+ TILEGX_OPC_V1MNZ,
+ TILEGX_OPC_V1MULTU,
+ TILEGX_OPC_V1MULU,
+ TILEGX_OPC_V1MULUS,
+ TILEGX_OPC_V1MZ,
+ TILEGX_OPC_V1SADAU,
+ TILEGX_OPC_V1SADU,
+ TILEGX_OPC_V1SHL,
+ TILEGX_OPC_V1SHLI,
+ TILEGX_OPC_V1SHRS,
+ TILEGX_OPC_V1SHRSI,
+ TILEGX_OPC_V1SHRU,
+ TILEGX_OPC_V1SHRUI,
+ TILEGX_OPC_V1SUB,
+ TILEGX_OPC_V1SUBUC,
+ TILEGX_OPC_V2ADD,
+ TILEGX_OPC_V2ADDI,
+ TILEGX_OPC_V2ADDSC,
+ TILEGX_OPC_V2ADIFFS,
+ TILEGX_OPC_V2AVGS,
+ TILEGX_OPC_V2CMPEQ,
+ TILEGX_OPC_V2CMPEQI,
+ TILEGX_OPC_V2CMPLES,
+ TILEGX_OPC_V2CMPLEU,
+ TILEGX_OPC_V2CMPLTS,
+ TILEGX_OPC_V2CMPLTSI,
+ TILEGX_OPC_V2CMPLTU,
+ TILEGX_OPC_V2CMPLTUI,
+ TILEGX_OPC_V2CMPNE,
+ TILEGX_OPC_V2DOTP,
+ TILEGX_OPC_V2DOTPA,
+ TILEGX_OPC_V2INT_H,
+ TILEGX_OPC_V2INT_L,
+ TILEGX_OPC_V2MAXS,
+ TILEGX_OPC_V2MAXSI,
+ TILEGX_OPC_V2MINS,
+ TILEGX_OPC_V2MINSI,
+ TILEGX_OPC_V2MNZ,
+ TILEGX_OPC_V2MULFSC,
+ TILEGX_OPC_V2MULS,
+ TILEGX_OPC_V2MULTS,
+ TILEGX_OPC_V2MZ,
+ TILEGX_OPC_V2PACKH,
+ TILEGX_OPC_V2PACKL,
+ TILEGX_OPC_V2PACKUC,
+ TILEGX_OPC_V2SADAS,
+ TILEGX_OPC_V2SADAU,
+ TILEGX_OPC_V2SADS,
+ TILEGX_OPC_V2SADU,
+ TILEGX_OPC_V2SHL,
+ TILEGX_OPC_V2SHLI,
+ TILEGX_OPC_V2SHLSC,
+ TILEGX_OPC_V2SHRS,
+ TILEGX_OPC_V2SHRSI,
+ TILEGX_OPC_V2SHRU,
+ TILEGX_OPC_V2SHRUI,
+ TILEGX_OPC_V2SUB,
+ TILEGX_OPC_V2SUBSC,
+ TILEGX_OPC_V4ADD,
+ TILEGX_OPC_V4ADDSC,
+ TILEGX_OPC_V4INT_H,
+ TILEGX_OPC_V4INT_L,
+ TILEGX_OPC_V4PACKSC,
+ TILEGX_OPC_V4SHL,
+ TILEGX_OPC_V4SHLSC,
+ TILEGX_OPC_V4SHRS,
+ TILEGX_OPC_V4SHRU,
+ TILEGX_OPC_V4SUB,
+ TILEGX_OPC_V4SUBSC,
+ TILEGX_OPC_WH64,
+ TILEGX_OPC_XOR,
+ TILEGX_OPC_XORI,
+ TILEGX_OPC_NONE
+} tilegx_mnemonic;
+
+
+
+typedef enum
+{
+ TILEGX_PIPELINE_X0,
+ TILEGX_PIPELINE_X1,
+ TILEGX_PIPELINE_Y0,
+ TILEGX_PIPELINE_Y1,
+ TILEGX_PIPELINE_Y2,
+} tilegx_pipeline;
+
+#define tilegx_is_x_pipeline(p) ((int)(p) <= (int)TILEGX_PIPELINE_X1)
+
+typedef enum
+{
+ TILEGX_OP_TYPE_REGISTER,
+ TILEGX_OP_TYPE_IMMEDIATE,
+ TILEGX_OP_TYPE_ADDRESS,
+ TILEGX_OP_TYPE_SPR
+} tilegx_operand_type;
+
+struct tilegx_operand
+{
+ /* Is this operand a register, immediate or address? */
+ tilegx_operand_type type;
+
+ /* The default relocation type for this operand. */
+ signed int default_reloc : 16;
+
+ /* How many bits is this value? (used for range checking) */
+ unsigned int num_bits : 5;
+
+ /* Is the value signed? (used for range checking) */
+ unsigned int is_signed : 1;
+
+ /* Is this operand a source register? */
+ unsigned int is_src_reg : 1;
+
+ /* Is this operand written? (i.e. is it a destination register) */
+ unsigned int is_dest_reg : 1;
+
+ /* Is this operand PC-relative? */
+ unsigned int is_pc_relative : 1;
+
+ /* By how many bits do we right shift the value before inserting? */
+ unsigned int rightshift : 2;
+
+ /* Return the bits for this operand to be ORed into an existing bundle. */
+ tilegx_bundle_bits (*insert) (int op);
+
+ /* Extract this operand and return it. */
+ unsigned int (*extract) (tilegx_bundle_bits bundle);
+};
+
+
+extern const struct tilegx_operand tilegx_operands[];
+
+/* One finite-state machine per pipe for rapid instruction decoding. */
+extern const unsigned short * const
+tilegx_bundle_decoder_fsms[TILEGX_NUM_PIPELINE_ENCODINGS];
+
+
+struct tilegx_opcode
+{
+ /* The opcode mnemonic, e.g. "add" */
+ const char *name;
+
+ /* The enum value for this mnemonic. */
+ tilegx_mnemonic mnemonic;
+
+ /* A bit mask of which of the five pipes this instruction
+ is compatible with:
+ X0 0x01
+ X1 0x02
+ Y0 0x04
+ Y1 0x08
+ Y2 0x10 */
+ unsigned char pipes;
+
+ /* How many operands are there? */
+ unsigned char num_operands;
+
+ /* Which register does this write implicitly, or TREG_ZERO if none? */
+ unsigned char implicitly_written_register;
+
+ /* Can this be bundled with other instructions (almost always true). */
+ unsigned char can_bundle;
+
+ /* The description of the operands. Each of these is an
+ * index into the tilegx_operands[] table. */
+ unsigned char operands[TILEGX_NUM_PIPELINE_ENCODINGS][TILEGX_MAX_OPERANDS];
+
+};
+
+extern const struct tilegx_opcode tilegx_opcodes[];
+
+/* Used for non-textual disassembly into structs. */
+struct tilegx_decoded_instruction
+{
+ const struct tilegx_opcode *opcode;
+ const struct tilegx_operand *operands[TILEGX_MAX_OPERANDS];
+ long long operand_values[TILEGX_MAX_OPERANDS];
+};
+
+
+/* Disassemble a bundle into a struct for machine processing. */
+extern int parse_insn_tilegx(tilegx_bundle_bits bits,
+ unsigned long long pc,
+ struct tilegx_decoded_instruction
+ decoded[TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE]);
+
+
+
+#endif /* opcode_tilegx_h */
diff --git a/arch/tile/include/asm/timex.h b/arch/tile/include/asm/timex.h
new file mode 100644
index 00000000..dc987d53
--- /dev/null
+++ b/arch/tile/include/asm/timex.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _ASM_TILE_TIMEX_H
+#define _ASM_TILE_TIMEX_H
+
+/*
+ * This rate should be a multiple of the possible HZ values (100, 250, 1000)
+ * and a fraction of the possible hardware timer frequencies. Our timer
+ * frequency is highly tunable but also quite precise, so for the primary use
+ * of this value (setting ACT_HZ from HZ) we just pick a value that causes
+ * ACT_HZ to be set to HZ. We make the value somewhat large just to be
+ * more robust in case someone tries out a new value of HZ.
+ */
+#define CLOCK_TICK_RATE 1000000
+
+typedef unsigned long long cycles_t;
+
+#if CHIP_HAS_SPLIT_CYCLE()
+cycles_t get_cycles(void);
+#define get_cycles_low() __insn_mfspr(SPR_CYCLE_LOW)
+#else
+static inline cycles_t get_cycles(void)
+{
+ return __insn_mfspr(SPR_CYCLE);
+}
+#define get_cycles_low() __insn_mfspr(SPR_CYCLE) /* just get all 64 bits */
+#endif
+
+cycles_t get_clock_rate(void);
+
+/* Convert nanoseconds to core clock cycles. */
+cycles_t ns2cycles(unsigned long nsecs);
+
+/* Called at cpu initialization to set some low-level constants. */
+void setup_clock(void);
+
+/* Called at cpu initialization to start the tile-timer clock device. */
+void setup_tile_timer(void);
+
+#endif /* _ASM_TILE_TIMEX_H */
diff --git a/arch/tile/include/asm/tlb.h b/arch/tile/include/asm/tlb.h
new file mode 100644
index 00000000..4a891a1a
--- /dev/null
+++ b/arch/tile/include/asm/tlb.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _ASM_TILE_TLB_H
+#define _ASM_TILE_TLB_H
+
+#define tlb_start_vma(tlb, vma) do { } while (0)
+#define tlb_end_vma(tlb, vma) do { } while (0)
+#define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0)
+#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm)
+
+#include <asm-generic/tlb.h>
+
+#endif /* _ASM_TILE_TLB_H */
diff --git a/arch/tile/include/asm/tlbflush.h b/arch/tile/include/asm/tlbflush.h
new file mode 100644
index 00000000..96199d21
--- /dev/null
+++ b/arch/tile/include/asm/tlbflush.h
@@ -0,0 +1,128 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _ASM_TILE_TLBFLUSH_H
+#define _ASM_TILE_TLBFLUSH_H
+
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/smp.h>
+#include <asm/cacheflush.h>
+#include <asm/page.h>
+#include <hv/hypervisor.h>
+
+/*
+ * Rather than associating each mm with its own ASID, we just use
+ * ASIDs to allow us to lazily flush the TLB when we switch mms.
+ * This way we only have to do an actual TLB flush on mm switch
+ * every time we wrap ASIDs, not every single time we switch.
+ *
+ * FIXME: We might improve performance by keeping ASIDs around
+ * properly, though since the hypervisor direct-maps VAs to TSB
+ * entries, we're likely to have lost at least the executable page
+ * mappings by the time we switch back to the original mm.
+ */
+DECLARE_PER_CPU(int, current_asid);
+
+/* The hypervisor tells us what ASIDs are available to us. */
+extern int min_asid, max_asid;
+
+static inline unsigned long hv_page_size(const struct vm_area_struct *vma)
+{
+ return (vma->vm_flags & VM_HUGETLB) ? HPAGE_SIZE : PAGE_SIZE;
+}
+
+/* Pass as vma pointer for non-executable mapping, if no vma available. */
+#define FLUSH_NONEXEC ((const struct vm_area_struct *)-1UL)
+
+/* Flush a single user page on this cpu. */
+static inline void local_flush_tlb_page(const struct vm_area_struct *vma,
+ unsigned long addr,
+ unsigned long page_size)
+{
+ int rc = hv_flush_page(addr, page_size);
+ if (rc < 0)
+ panic("hv_flush_page(%#lx,%#lx) failed: %d",
+ addr, page_size, rc);
+ if (!vma || (vma != FLUSH_NONEXEC && (vma->vm_flags & VM_EXEC)))
+ __flush_icache();
+}
+
+/* Flush range of user pages on this cpu. */
+static inline void local_flush_tlb_pages(const struct vm_area_struct *vma,
+ unsigned long addr,
+ unsigned long page_size,
+ unsigned long len)
+{
+ int rc = hv_flush_pages(addr, page_size, len);
+ if (rc < 0)
+ panic("hv_flush_pages(%#lx,%#lx,%#lx) failed: %d",
+ addr, page_size, len, rc);
+ if (!vma || (vma != FLUSH_NONEXEC && (vma->vm_flags & VM_EXEC)))
+ __flush_icache();
+}
+
+/* Flush all user pages on this cpu. */
+static inline void local_flush_tlb(void)
+{
+ int rc = hv_flush_all(1); /* preserve global mappings */
+ if (rc < 0)
+ panic("hv_flush_all(1) failed: %d", rc);
+ __flush_icache();
+}
+
+/*
+ * Global pages have to be flushed a bit differently. Not a real
+ * performance problem because this does not happen often.
+ */
+static inline void local_flush_tlb_all(void)
+{
+ int i;
+ for (i = 0; ; ++i) {
+ HV_VirtAddrRange r = hv_inquire_virtual(i);
+ if (r.size == 0)
+ break;
+ local_flush_tlb_pages(NULL, r.start, PAGE_SIZE, r.size);
+ local_flush_tlb_pages(NULL, r.start, HPAGE_SIZE, r.size);
+ }
+}
+
+/*
+ * TLB flushing:
+ *
+ * - flush_tlb() flushes the current mm struct TLBs
+ * - flush_tlb_all() flushes all processes TLBs
+ * - flush_tlb_mm(mm) flushes the specified mm context TLB's
+ * - flush_tlb_page(vma, vmaddr) flushes one page
+ * - flush_tlb_range(vma, start, end) flushes a range of pages
+ * - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
+ * - flush_tlb_others(cpumask, mm, va) flushes TLBs on other cpus
+ *
+ * Here (as in vm_area_struct), "end" means the first byte after
+ * our end address.
+ */
+
+extern void flush_tlb_all(void);
+extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);
+extern void flush_tlb_current_task(void);
+extern void flush_tlb_mm(struct mm_struct *);
+extern void flush_tlb_page(const struct vm_area_struct *, unsigned long);
+extern void flush_tlb_page_mm(const struct vm_area_struct *,
+ struct mm_struct *, unsigned long);
+extern void flush_tlb_range(const struct vm_area_struct *,
+ unsigned long start, unsigned long end);
+
+#define flush_tlb() flush_tlb_current_task()
+
+#endif /* _ASM_TILE_TLBFLUSH_H */
diff --git a/arch/tile/include/asm/topology.h b/arch/tile/include/asm/topology.h
new file mode 100644
index 00000000..6fdd0c86
--- /dev/null
+++ b/arch/tile/include/asm/topology.h
@@ -0,0 +1,124 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _ASM_TILE_TOPOLOGY_H
+#define _ASM_TILE_TOPOLOGY_H
+
+#ifdef CONFIG_NUMA
+
+#include <linux/cpumask.h>
+
+/* Mappings between logical cpu number and node number. */
+extern struct cpumask node_2_cpu_mask[];
+extern char cpu_2_node[];
+
+/* Returns the number of the node containing CPU 'cpu'. */
+static inline int cpu_to_node(int cpu)
+{
+ return cpu_2_node[cpu];
+}
+
+/*
+ * Returns the number of the node containing Node 'node'.
+ * This architecture is flat, so it is a pretty simple function!
+ */
+#define parent_node(node) (node)
+
+/* Returns a bitmask of CPUs on Node 'node'. */
+static inline const struct cpumask *cpumask_of_node(int node)
+{
+ return &node_2_cpu_mask[node];
+}
+
+/* For now, use numa node -1 for global allocation. */
+#define pcibus_to_node(bus) ((void)(bus), -1)
+
+/*
+ * TILE architecture has many cores integrated in one processor, so we need
+ * setup bigger balance_interval for both CPU/NODE scheduling domains to
+ * reduce process scheduling costs.
+ */
+
+/* sched_domains SD_CPU_INIT for TILE architecture */
+#define SD_CPU_INIT (struct sched_domain) { \
+ .min_interval = 4, \
+ .max_interval = 128, \
+ .busy_factor = 64, \
+ .imbalance_pct = 125, \
+ .cache_nice_tries = 1, \
+ .busy_idx = 2, \
+ .idle_idx = 1, \
+ .newidle_idx = 0, \
+ .wake_idx = 0, \
+ .forkexec_idx = 0, \
+ \
+ .flags = 1*SD_LOAD_BALANCE \
+ | 1*SD_BALANCE_NEWIDLE \
+ | 1*SD_BALANCE_EXEC \
+ | 1*SD_BALANCE_FORK \
+ | 0*SD_BALANCE_WAKE \
+ | 0*SD_WAKE_AFFINE \
+ | 0*SD_PREFER_LOCAL \
+ | 0*SD_SHARE_CPUPOWER \
+ | 0*SD_SHARE_PKG_RESOURCES \
+ | 0*SD_SERIALIZE \
+ , \
+ .last_balance = jiffies, \
+ .balance_interval = 32, \
+}
+
+/* sched_domains SD_NODE_INIT for TILE architecture */
+#define SD_NODE_INIT (struct sched_domain) { \
+ .min_interval = 16, \
+ .max_interval = 512, \
+ .busy_factor = 32, \
+ .imbalance_pct = 125, \
+ .cache_nice_tries = 1, \
+ .busy_idx = 3, \
+ .idle_idx = 1, \
+ .newidle_idx = 2, \
+ .wake_idx = 1, \
+ .flags = 1*SD_LOAD_BALANCE \
+ | 1*SD_BALANCE_NEWIDLE \
+ | 1*SD_BALANCE_EXEC \
+ | 1*SD_BALANCE_FORK \
+ | 0*SD_BALANCE_WAKE \
+ | 0*SD_WAKE_AFFINE \
+ | 0*SD_PREFER_LOCAL \
+ | 0*SD_SHARE_CPUPOWER \
+ | 0*SD_SHARE_PKG_RESOURCES \
+ | 1*SD_SERIALIZE \
+ , \
+ .last_balance = jiffies, \
+ .balance_interval = 128, \
+}
+
+/* By definition, we create nodes based on online memory. */
+#define node_has_online_mem(nid) 1
+
+#endif /* CONFIG_NUMA */
+
+#include <asm-generic/topology.h>
+
+#ifdef CONFIG_SMP
+#define topology_physical_package_id(cpu) ((void)(cpu), 0)
+#define topology_core_id(cpu) (cpu)
+#define topology_core_cpumask(cpu) ((void)(cpu), cpu_online_mask)
+#define topology_thread_cpumask(cpu) cpumask_of(cpu)
+
+/* indicates that pointers to the topology struct cpumask maps are valid */
+#define arch_provides_topology_pointers yes
+#endif
+
+#endif /* _ASM_TILE_TOPOLOGY_H */
diff --git a/arch/tile/include/asm/traps.h b/arch/tile/include/asm/traps.h
new file mode 100644
index 00000000..e28c3df4
--- /dev/null
+++ b/arch/tile/include/asm/traps.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _ASM_TILE_TRAPS_H
+#define _ASM_TILE_TRAPS_H
+
+#include <arch/chip.h>
+
+/* mm/fault.c */
+void do_page_fault(struct pt_regs *, int fault_num,
+ unsigned long address, unsigned long write);
+#if CHIP_HAS_TILE_DMA() || CHIP_HAS_SN_PROC()
+void do_async_page_fault(struct pt_regs *);
+#endif
+
+#ifndef __tilegx__
+/*
+ * We return this structure in registers to avoid having to write
+ * additional save/restore code in the intvec.S caller.
+ */
+struct intvec_state {
+ void *handler;
+ unsigned long vecnum;
+ unsigned long fault_num;
+ unsigned long info;
+ unsigned long retval;
+};
+struct intvec_state do_page_fault_ics(struct pt_regs *regs, int fault_num,
+ unsigned long address,
+ unsigned long info);
+#endif
+
+/* kernel/traps.c */
+void do_trap(struct pt_regs *, int fault_num, unsigned long reason);
+void kernel_double_fault(int dummy, ulong pc, ulong lr, ulong sp, ulong r52);
+
+/* kernel/time.c */
+void do_timer_interrupt(struct pt_regs *, int fault_num);
+
+/* kernel/messaging.c */
+void hv_message_intr(struct pt_regs *, int intnum);
+
+/* kernel/irq.c */
+void tile_dev_intr(struct pt_regs *, int intnum);
+
+#ifdef CONFIG_HARDWALL
+/* kernel/hardwall.c */
+void do_hardwall_trap(struct pt_regs *, int fault_num);
+#endif
+
+/* kernel/ptrace.c */
+void do_breakpoint(struct pt_regs *, int fault_num);
+
+
+#ifdef __tilegx__
+/* kernel/single_step.c */
+void gx_singlestep_handle(struct pt_regs *, int fault_num);
+
+/* kernel/intvec_64.S */
+void fill_ra_stack(void);
+#endif
+
+#endif /* _ASM_TILE_TRAPS_H */
diff --git a/arch/tile/include/asm/uaccess.h b/arch/tile/include/asm/uaccess.h
new file mode 100644
index 00000000..ef34d2ca
--- /dev/null
+++ b/arch/tile/include/asm/uaccess.h
@@ -0,0 +1,580 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _ASM_TILE_UACCESS_H
+#define _ASM_TILE_UACCESS_H
+
+/*
+ * User space memory access functions
+ */
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <asm-generic/uaccess-unaligned.h>
+#include <asm/processor.h>
+#include <asm/page.h>
+
+#define VERIFY_READ 0
+#define VERIFY_WRITE 1
+
+/*
+ * The fs value determines whether argument validity checking should be
+ * performed or not. If get_fs() == USER_DS, checking is performed, with
+ * get_fs() == KERNEL_DS, checking is bypassed.
+ *
+ * For historical reasons, these macros are grossly misnamed.
+ */
+#define MAKE_MM_SEG(a) ((mm_segment_t) { (a) })
+
+#define KERNEL_DS MAKE_MM_SEG(-1UL)
+#define USER_DS MAKE_MM_SEG(PAGE_OFFSET)
+
+#define get_ds() (KERNEL_DS)
+#define get_fs() (current_thread_info()->addr_limit)
+#define set_fs(x) (current_thread_info()->addr_limit = (x))
+
+#define segment_eq(a, b) ((a).seg == (b).seg)
+
+#ifndef __tilegx__
+/*
+ * We could allow mapping all 16 MB at 0xfc000000, but we set up a
+ * special hack in arch_setup_additional_pages() to auto-create a mapping
+ * for the first 16 KB, and it would seem strange to have different
+ * user-accessible semantics for memory at 0xfc000000 and above 0xfc004000.
+ */
+static inline int is_arch_mappable_range(unsigned long addr,
+ unsigned long size)
+{
+ return (addr >= MEM_USER_INTRPT &&
+ addr < (MEM_USER_INTRPT + INTRPT_SIZE) &&
+ size <= (MEM_USER_INTRPT + INTRPT_SIZE) - addr);
+}
+#define is_arch_mappable_range is_arch_mappable_range
+#else
+#define is_arch_mappable_range(addr, size) 0
+#endif
+
+/*
+ * Test whether a block of memory is a valid user space address.
+ * Returns 0 if the range is valid, nonzero otherwise.
+ */
+int __range_ok(unsigned long addr, unsigned long size);
+
+/**
+ * access_ok: - Checks if a user space pointer is valid
+ * @type: Type of access: %VERIFY_READ or %VERIFY_WRITE. Note that
+ * %VERIFY_WRITE is a superset of %VERIFY_READ - if it is safe
+ * to write to a block, it is always safe to read from it.
+ * @addr: User space pointer to start of block to check
+ * @size: Size of block to check
+ *
+ * Context: User context only. This function may sleep.
+ *
+ * Checks if a pointer to a block of memory in user space is valid.
+ *
+ * Returns true (nonzero) if the memory block may be valid, false (zero)
+ * if it is definitely invalid.
+ *
+ * Note that, depending on architecture, this function probably just
+ * checks that the pointer is in the user space range - after calling
+ * this function, memory access functions may still return -EFAULT.
+ */
+#define access_ok(type, addr, size) ({ \
+ __chk_user_ptr(addr); \
+ likely(__range_ok((unsigned long)(addr), (size)) == 0); \
+})
+
+/*
+ * The exception table consists of pairs of addresses: the first is the
+ * address of an instruction that is allowed to fault, and the second is
+ * the address at which the program should continue. No registers are
+ * modified, so it is entirely up to the continuation code to figure out
+ * what to do.
+ *
+ * All the routines below use bits of fixup code that are out of line
+ * with the main instruction path. This means when everything is well,
+ * we don't even have to jump over them. Further, they do not intrude
+ * on our cache or tlb entries.
+ */
+
+struct exception_table_entry {
+ unsigned long insn, fixup;
+};
+
+extern int fixup_exception(struct pt_regs *regs);
+
+/*
+ * We return the __get_user_N function results in a structure,
+ * thus in r0 and r1. If "err" is zero, "val" is the result
+ * of the read; otherwise, "err" is -EFAULT.
+ *
+ * We rarely need 8-byte values on a 32-bit architecture, but
+ * we size the structure to accommodate. In practice, for the
+ * the smaller reads, we can zero the high word for free, and
+ * the caller will ignore it by virtue of casting anyway.
+ */
+struct __get_user {
+ unsigned long long val;
+ int err;
+};
+
+/*
+ * FIXME: we should express these as inline extended assembler, since
+ * they're fundamentally just a variable dereference and some
+ * supporting exception_table gunk. Note that (a la i386) we can
+ * extend the copy_to_user and copy_from_user routines to call into
+ * such extended assembler routines, though we will have to use a
+ * different return code in that case (1, 2, or 4, rather than -EFAULT).
+ */
+extern struct __get_user __get_user_1(const void __user *);
+extern struct __get_user __get_user_2(const void __user *);
+extern struct __get_user __get_user_4(const void __user *);
+extern struct __get_user __get_user_8(const void __user *);
+extern int __put_user_1(long, void __user *);
+extern int __put_user_2(long, void __user *);
+extern int __put_user_4(long, void __user *);
+extern int __put_user_8(long long, void __user *);
+
+/* Unimplemented routines to cause linker failures */
+extern struct __get_user __get_user_bad(void);
+extern int __put_user_bad(void);
+
+/*
+ * Careful: we have to cast the result to the type of the pointer
+ * for sign reasons.
+ */
+/**
+ * __get_user: - Get a simple variable from user space, with less checking.
+ * @x: Variable to store result.
+ * @ptr: Source address, in user space.
+ *
+ * Context: User context only. This function may sleep.
+ *
+ * This macro copies a single simple variable from user space to kernel
+ * space. It supports simple types like char and int, but not larger
+ * data types like structures or arrays.
+ *
+ * @ptr must have pointer-to-simple-variable type, and the result of
+ * dereferencing @ptr must be assignable to @x without a cast.
+ *
+ * Returns zero on success, or -EFAULT on error.
+ * On error, the variable @x is set to zero.
+ *
+ * Caller must check the pointer with access_ok() before calling this
+ * function.
+ */
+#define __get_user(x, ptr) \
+({ struct __get_user __ret; \
+ __typeof__(*(ptr)) const __user *__gu_addr = (ptr); \
+ __chk_user_ptr(__gu_addr); \
+ switch (sizeof(*(__gu_addr))) { \
+ case 1: \
+ __ret = __get_user_1(__gu_addr); \
+ break; \
+ case 2: \
+ __ret = __get_user_2(__gu_addr); \
+ break; \
+ case 4: \
+ __ret = __get_user_4(__gu_addr); \
+ break; \
+ case 8: \
+ __ret = __get_user_8(__gu_addr); \
+ break; \
+ default: \
+ __ret = __get_user_bad(); \
+ break; \
+ } \
+ (x) = (__typeof__(*__gu_addr)) (__typeof__(*__gu_addr - *__gu_addr)) \
+ __ret.val; \
+ __ret.err; \
+})
+
+/**
+ * __put_user: - Write a simple value into user space, with less checking.
+ * @x: Value to copy to user space.
+ * @ptr: Destination address, in user space.
+ *
+ * Context: User context only. This function may sleep.
+ *
+ * This macro copies a single simple value from kernel space to user
+ * space. It supports simple types like char and int, but not larger
+ * data types like structures or arrays.
+ *
+ * @ptr must have pointer-to-simple-variable type, and @x must be assignable
+ * to the result of dereferencing @ptr.
+ *
+ * Caller must check the pointer with access_ok() before calling this
+ * function.
+ *
+ * Returns zero on success, or -EFAULT on error.
+ *
+ * Implementation note: The "case 8" logic of casting to the type of
+ * the result of subtracting the value from itself is basically a way
+ * of keeping all integer types the same, but casting any pointers to
+ * ptrdiff_t, i.e. also an integer type. This way there are no
+ * questionable casts seen by the compiler on an ILP32 platform.
+ */
+#define __put_user(x, ptr) \
+({ \
+ int __pu_err = 0; \
+ __typeof__(*(ptr)) __user *__pu_addr = (ptr); \
+ typeof(*__pu_addr) __pu_val = (x); \
+ __chk_user_ptr(__pu_addr); \
+ switch (sizeof(__pu_val)) { \
+ case 1: \
+ __pu_err = __put_user_1((long)__pu_val, __pu_addr); \
+ break; \
+ case 2: \
+ __pu_err = __put_user_2((long)__pu_val, __pu_addr); \
+ break; \
+ case 4: \
+ __pu_err = __put_user_4((long)__pu_val, __pu_addr); \
+ break; \
+ case 8: \
+ __pu_err = \
+ __put_user_8((__typeof__(__pu_val - __pu_val))__pu_val,\
+ __pu_addr); \
+ break; \
+ default: \
+ __pu_err = __put_user_bad(); \
+ break; \
+ } \
+ __pu_err; \
+})
+
+/*
+ * The versions of get_user and put_user without initial underscores
+ * check the address of their arguments to make sure they are not
+ * in kernel space.
+ */
+#define put_user(x, ptr) \
+({ \
+ __typeof__(*(ptr)) __user *__Pu_addr = (ptr); \
+ access_ok(VERIFY_WRITE, (__Pu_addr), sizeof(*(__Pu_addr))) ? \
+ __put_user((x), (__Pu_addr)) : \
+ -EFAULT; \
+})
+
+#define get_user(x, ptr) \
+({ \
+ __typeof__(*(ptr)) const __user *__Gu_addr = (ptr); \
+ access_ok(VERIFY_READ, (__Gu_addr), sizeof(*(__Gu_addr))) ? \
+ __get_user((x), (__Gu_addr)) : \
+ ((x) = 0, -EFAULT); \
+})
+
+/**
+ * __copy_to_user() - copy data into user space, with less checking.
+ * @to: Destination address, in user space.
+ * @from: Source address, in kernel space.
+ * @n: Number of bytes to copy.
+ *
+ * Context: User context only. This function may sleep.
+ *
+ * Copy data from kernel space to user space. Caller must check
+ * the specified block with access_ok() before calling this function.
+ *
+ * Returns number of bytes that could not be copied.
+ * On success, this will be zero.
+ *
+ * An alternate version - __copy_to_user_inatomic() - is designed
+ * to be called from atomic context, typically bracketed by calls
+ * to pagefault_disable() and pagefault_enable().
+ */
+extern unsigned long __must_check __copy_to_user_inatomic(
+ void __user *to, const void *from, unsigned long n);
+
+static inline unsigned long __must_check
+__copy_to_user(void __user *to, const void *from, unsigned long n)
+{
+ might_fault();
+ return __copy_to_user_inatomic(to, from, n);
+}
+
+static inline unsigned long __must_check
+copy_to_user(void __user *to, const void *from, unsigned long n)
+{
+ if (access_ok(VERIFY_WRITE, to, n))
+ n = __copy_to_user(to, from, n);
+ return n;
+}
+
+/**
+ * __copy_from_user() - copy data from user space, with less checking.
+ * @to: Destination address, in kernel space.
+ * @from: Source address, in user space.
+ * @n: Number of bytes to copy.
+ *
+ * Context: User context only. This function may sleep.
+ *
+ * Copy data from user space to kernel space. Caller must check
+ * the specified block with access_ok() before calling this function.
+ *
+ * Returns number of bytes that could not be copied.
+ * On success, this will be zero.
+ *
+ * If some data could not be copied, this function will pad the copied
+ * data to the requested size using zero bytes.
+ *
+ * An alternate version - __copy_from_user_inatomic() - is designed
+ * to be called from atomic context, typically bracketed by calls
+ * to pagefault_disable() and pagefault_enable(). This version
+ * does *NOT* pad with zeros.
+ */
+extern unsigned long __must_check __copy_from_user_inatomic(
+ void *to, const void __user *from, unsigned long n);
+extern unsigned long __must_check __copy_from_user_zeroing(
+ void *to, const void __user *from, unsigned long n);
+
+static inline unsigned long __must_check
+__copy_from_user(void *to, const void __user *from, unsigned long n)
+{
+ might_fault();
+ return __copy_from_user_zeroing(to, from, n);
+}
+
+static inline unsigned long __must_check
+_copy_from_user(void *to, const void __user *from, unsigned long n)
+{
+ if (access_ok(VERIFY_READ, from, n))
+ n = __copy_from_user(to, from, n);
+ else
+ memset(to, 0, n);
+ return n;
+}
+
+#ifdef CONFIG_DEBUG_COPY_FROM_USER
+extern void copy_from_user_overflow(void)
+ __compiletime_warning("copy_from_user() size is not provably correct");
+
+static inline unsigned long __must_check copy_from_user(void *to,
+ const void __user *from,
+ unsigned long n)
+{
+ int sz = __compiletime_object_size(to);
+
+ if (likely(sz == -1 || sz >= n))
+ n = _copy_from_user(to, from, n);
+ else
+ copy_from_user_overflow();
+
+ return n;
+}
+#else
+#define copy_from_user _copy_from_user
+#endif
+
+#ifdef __tilegx__
+/**
+ * __copy_in_user() - copy data within user space, with less checking.
+ * @to: Destination address, in user space.
+ * @from: Source address, in kernel space.
+ * @n: Number of bytes to copy.
+ *
+ * Context: User context only. This function may sleep.
+ *
+ * Copy data from user space to user space. Caller must check
+ * the specified blocks with access_ok() before calling this function.
+ *
+ * Returns number of bytes that could not be copied.
+ * On success, this will be zero.
+ */
+extern unsigned long __copy_in_user_inatomic(
+ void __user *to, const void __user *from, unsigned long n);
+
+static inline unsigned long __must_check
+__copy_in_user(void __user *to, const void __user *from, unsigned long n)
+{
+ might_sleep();
+ return __copy_in_user_inatomic(to, from, n);
+}
+
+static inline unsigned long __must_check
+copy_in_user(void __user *to, const void __user *from, unsigned long n)
+{
+ if (access_ok(VERIFY_WRITE, to, n) && access_ok(VERIFY_READ, from, n))
+ n = __copy_in_user(to, from, n);
+ return n;
+}
+#endif
+
+
+/**
+ * strlen_user: - Get the size of a string in user space.
+ * @str: The string to measure.
+ *
+ * Context: User context only. This function may sleep.
+ *
+ * Get the size of a NUL-terminated string in user space.
+ *
+ * Returns the size of the string INCLUDING the terminating NUL.
+ * On exception, returns 0.
+ *
+ * If there is a limit on the length of a valid string, you may wish to
+ * consider using strnlen_user() instead.
+ */
+extern long strnlen_user_asm(const char __user *str, long n);
+static inline long __must_check strnlen_user(const char __user *str, long n)
+{
+ might_fault();
+ return strnlen_user_asm(str, n);
+}
+#define strlen_user(str) strnlen_user(str, LONG_MAX)
+
+/**
+ * strncpy_from_user: - Copy a NUL terminated string from userspace, with less checking.
+ * @dst: Destination address, in kernel space. This buffer must be at
+ * least @count bytes long.
+ * @src: Source address, in user space.
+ * @count: Maximum number of bytes to copy, including the trailing NUL.
+ *
+ * Copies a NUL-terminated string from userspace to kernel space.
+ * Caller must check the specified block with access_ok() before calling
+ * this function.
+ *
+ * On success, returns the length of the string (not including the trailing
+ * NUL).
+ *
+ * If access to userspace fails, returns -EFAULT (some data may have been
+ * copied).
+ *
+ * If @count is smaller than the length of the string, copies @count bytes
+ * and returns @count.
+ */
+extern long strncpy_from_user_asm(char *dst, const char __user *src, long);
+static inline long __must_check __strncpy_from_user(
+ char *dst, const char __user *src, long count)
+{
+ might_fault();
+ return strncpy_from_user_asm(dst, src, count);
+}
+static inline long __must_check strncpy_from_user(
+ char *dst, const char __user *src, long count)
+{
+ if (access_ok(VERIFY_READ, src, 1))
+ return __strncpy_from_user(dst, src, count);
+ return -EFAULT;
+}
+
+/**
+ * clear_user: - Zero a block of memory in user space.
+ * @mem: Destination address, in user space.
+ * @len: Number of bytes to zero.
+ *
+ * Zero a block of memory in user space.
+ *
+ * Returns number of bytes that could not be cleared.
+ * On success, this will be zero.
+ */
+extern unsigned long clear_user_asm(void __user *mem, unsigned long len);
+static inline unsigned long __must_check __clear_user(
+ void __user *mem, unsigned long len)
+{
+ might_fault();
+ return clear_user_asm(mem, len);
+}
+static inline unsigned long __must_check clear_user(
+ void __user *mem, unsigned long len)
+{
+ if (access_ok(VERIFY_WRITE, mem, len))
+ return __clear_user(mem, len);
+ return len;
+}
+
+/**
+ * flush_user: - Flush a block of memory in user space from cache.
+ * @mem: Destination address, in user space.
+ * @len: Number of bytes to flush.
+ *
+ * Returns number of bytes that could not be flushed.
+ * On success, this will be zero.
+ */
+extern unsigned long flush_user_asm(void __user *mem, unsigned long len);
+static inline unsigned long __must_check __flush_user(
+ void __user *mem, unsigned long len)
+{
+ int retval;
+
+ might_fault();
+ retval = flush_user_asm(mem, len);
+ mb_incoherent();
+ return retval;
+}
+
+static inline unsigned long __must_check flush_user(
+ void __user *mem, unsigned long len)
+{
+ if (access_ok(VERIFY_WRITE, mem, len))
+ return __flush_user(mem, len);
+ return len;
+}
+
+/**
+ * inv_user: - Invalidate a block of memory in user space from cache.
+ * @mem: Destination address, in user space.
+ * @len: Number of bytes to invalidate.
+ *
+ * Returns number of bytes that could not be invalidated.
+ * On success, this will be zero.
+ *
+ * Note that on Tile64, the "inv" operation is in fact a
+ * "flush and invalidate", so cache write-backs will occur prior
+ * to the cache being marked invalid.
+ */
+extern unsigned long inv_user_asm(void __user *mem, unsigned long len);
+static inline unsigned long __must_check __inv_user(
+ void __user *mem, unsigned long len)
+{
+ int retval;
+
+ might_fault();
+ retval = inv_user_asm(mem, len);
+ mb_incoherent();
+ return retval;
+}
+static inline unsigned long __must_check inv_user(
+ void __user *mem, unsigned long len)
+{
+ if (access_ok(VERIFY_WRITE, mem, len))
+ return __inv_user(mem, len);
+ return len;
+}
+
+/**
+ * finv_user: - Flush-inval a block of memory in user space from cache.
+ * @mem: Destination address, in user space.
+ * @len: Number of bytes to invalidate.
+ *
+ * Returns number of bytes that could not be flush-invalidated.
+ * On success, this will be zero.
+ */
+extern unsigned long finv_user_asm(void __user *mem, unsigned long len);
+static inline unsigned long __must_check __finv_user(
+ void __user *mem, unsigned long len)
+{
+ int retval;
+
+ might_fault();
+ retval = finv_user_asm(mem, len);
+ mb_incoherent();
+ return retval;
+}
+static inline unsigned long __must_check finv_user(
+ void __user *mem, unsigned long len)
+{
+ if (access_ok(VERIFY_WRITE, mem, len))
+ return __finv_user(mem, len);
+ return len;
+}
+
+#endif /* _ASM_TILE_UACCESS_H */
diff --git a/arch/tile/include/asm/unaligned.h b/arch/tile/include/asm/unaligned.h
new file mode 100644
index 00000000..37dfbe59
--- /dev/null
+++ b/arch/tile/include/asm/unaligned.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _ASM_TILE_UNALIGNED_H
+#define _ASM_TILE_UNALIGNED_H
+
+#include <linux/unaligned/le_struct.h>
+#include <linux/unaligned/be_byteshift.h>
+#include <linux/unaligned/generic.h>
+#define get_unaligned __get_unaligned_le
+#define put_unaligned __put_unaligned_le
+
+/*
+ * Is the kernel doing fixups of unaligned accesses? If <0, no kernel
+ * intervention occurs and SIGBUS is delivered with no data address
+ * info. If 0, the kernel single-steps the instruction to discover
+ * the data address to provide with the SIGBUS. If 1, the kernel does
+ * a fixup.
+ */
+extern int unaligned_fixup;
+
+/* Is the kernel printing on each unaligned fixup? */
+extern int unaligned_printk;
+
+/* Number of unaligned fixups performed */
+extern unsigned int unaligned_fixup_count;
+
+#endif /* _ASM_TILE_UNALIGNED_H */
diff --git a/arch/tile/include/asm/unistd.h b/arch/tile/include/asm/unistd.h
new file mode 100644
index 00000000..f70bf1c5
--- /dev/null
+++ b/arch/tile/include/asm/unistd.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#if !defined(_ASM_TILE_UNISTD_H) || defined(__SYSCALL)
+#define _ASM_TILE_UNISTD_H
+
+#if !defined(__LP64__) || defined(__SYSCALL_COMPAT)
+/* Use the flavor of this syscall that matches the 32-bit API better. */
+#define __ARCH_WANT_SYNC_FILE_RANGE2
+#endif
+
+/* Use the standard ABI for syscalls. */
+#include <asm-generic/unistd.h>
+
+/* Additional Tilera-specific syscalls. */
+#define __NR_flush_cache (__NR_arch_specific_syscall + 1)
+__SYSCALL(__NR_flush_cache, sys_flush_cache)
+
+#ifndef __tilegx__
+/* "Fast" syscalls provide atomic support for 32-bit chips. */
+#define __NR_FAST_cmpxchg -1
+#define __NR_FAST_atomic_update -2
+#define __NR_FAST_cmpxchg64 -3
+#define __NR_cmpxchg_badaddr (__NR_arch_specific_syscall + 0)
+__SYSCALL(__NR_cmpxchg_badaddr, sys_cmpxchg_badaddr)
+#endif
+
+#ifdef __KERNEL__
+/* In compat mode, we use sys_llseek() for compat_sys_llseek(). */
+#ifdef CONFIG_COMPAT
+#define __ARCH_WANT_SYS_LLSEEK
+#endif
+#define __ARCH_WANT_SYS_NEWFSTATAT
+#endif
+
+#endif /* _ASM_TILE_UNISTD_H */
diff --git a/arch/tile/include/asm/user.h b/arch/tile/include/asm/user.h
new file mode 100644
index 00000000..cbc8b4d5
--- /dev/null
+++ b/arch/tile/include/asm/user.h
@@ -0,0 +1,21 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#ifndef _ASM_TILE_USER_H
+#define _ASM_TILE_USER_H
+
+/* This header is for a.out file formats, which TILE does not support. */
+
+#endif /* _ASM_TILE_USER_H */
diff --git a/arch/tile/include/asm/vga.h b/arch/tile/include/asm/vga.h
new file mode 100644
index 00000000..7b46e754
--- /dev/null
+++ b/arch/tile/include/asm/vga.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ *
+ * Access to VGA videoram.
+ */
+
+#ifndef _ASM_TILE_VGA_H
+#define _ASM_TILE_VGA_H
+
+#include <asm/io.h>
+
+#define VT_BUF_HAVE_RW
+
+static inline void scr_writew(u16 val, volatile u16 *addr)
+{
+ __raw_writew(val, (volatile u16 __iomem *) addr);
+}
+
+static inline u16 scr_readw(volatile const u16 *addr)
+{
+ return __raw_readw((volatile const u16 __iomem *) addr);
+}
+
+#define vga_readb(a) readb((u8 __iomem *)(a))
+#define vga_writeb(v,a) writeb(v, (u8 __iomem *)(a))
+
+#define VGA_MAP_MEM(x,s) ((unsigned long) ioremap(x, s))
+
+#endif
diff --git a/arch/tile/include/hv/drv_mshim_intf.h b/arch/tile/include/hv/drv_mshim_intf.h
new file mode 100644
index 00000000..c6ef3bdc
--- /dev/null
+++ b/arch/tile/include/hv/drv_mshim_intf.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright 2011 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+/**
+ * @file drv_mshim_intf.h
+ * Interface definitions for the Linux EDAC memory controller driver.
+ */
+
+#ifndef _SYS_HV_INCLUDE_DRV_MSHIM_INTF_H
+#define _SYS_HV_INCLUDE_DRV_MSHIM_INTF_H
+
+/** Number of memory controllers in the public API. */
+#define TILE_MAX_MSHIMS 4
+
+/** Memory info under each memory controller. */
+struct mshim_mem_info
+{
+ uint64_t mem_size; /**< Total memory size in bytes. */
+ uint8_t mem_type; /**< Memory type, DDR2 or DDR3. */
+ uint8_t mem_ecc; /**< Memory supports ECC. */
+};
+
+/**
+ * DIMM error structure.
+ * For now, only correctable errors are counted and the mshim doesn't record
+ * the error PA. HV takes panic upon uncorrectable errors.
+ */
+struct mshim_mem_error
+{
+ uint32_t sbe_count; /**< Number of single-bit errors. */
+};
+
+/** Read this offset to get the memory info per mshim. */
+#define MSHIM_MEM_INFO_OFF 0x100
+
+/** Read this offset to check DIMM error. */
+#define MSHIM_MEM_ERROR_OFF 0x200
+
+#endif /* _SYS_HV_INCLUDE_DRV_MSHIM_INTF_H */
diff --git a/arch/tile/include/hv/drv_pcie_rc_intf.h b/arch/tile/include/hv/drv_pcie_rc_intf.h
new file mode 100644
index 00000000..9bd2243b
--- /dev/null
+++ b/arch/tile/include/hv/drv_pcie_rc_intf.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+/**
+ * @file drv_pcie_rc_intf.h
+ * Interface definitions for the PCIE Root Complex.
+ */
+
+#ifndef _SYS_HV_DRV_PCIE_RC_INTF_H
+#define _SYS_HV_DRV_PCIE_RC_INTF_H
+
+/** File offset for reading the interrupt base number used for PCIE legacy
+ interrupts and PLX Gen 1 requirement flag */
+#define PCIE_RC_CONFIG_MASK_OFF 0
+
+
+/**
+ * Structure used for obtaining PCIe config information, read from the PCIE
+ * subsystem /ctl file at initialization
+ */
+typedef struct pcie_rc_config
+{
+ int intr; /**< interrupt number used for downcall */
+ int plx_gen1; /**< flag for PLX Gen 1 configuration */
+} pcie_rc_config_t;
+
+#endif /* _SYS_HV_DRV_PCIE_RC_INTF_H */
diff --git a/arch/tile/include/hv/drv_srom_intf.h b/arch/tile/include/hv/drv_srom_intf.h
new file mode 100644
index 00000000..6395faa6
--- /dev/null
+++ b/arch/tile/include/hv/drv_srom_intf.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright 2011 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+/**
+ * @file drv_srom_intf.h
+ * Interface definitions for the SPI Flash ROM driver.
+ */
+
+#ifndef _SYS_HV_INCLUDE_DRV_SROM_INTF_H
+#define _SYS_HV_INCLUDE_DRV_SROM_INTF_H
+
+/** Read this offset to get the total device size. */
+#define SROM_TOTAL_SIZE_OFF 0xF0000000
+
+/** Read this offset to get the device sector size. */
+#define SROM_SECTOR_SIZE_OFF 0xF0000004
+
+/** Read this offset to get the device page size. */
+#define SROM_PAGE_SIZE_OFF 0xF0000008
+
+/** Write this offset to flush any pending writes. */
+#define SROM_FLUSH_OFF 0xF1000000
+
+/** Write this offset, plus the byte offset of the start of a sector, to
+ * erase a sector. Any write data is ignored, but there must be at least
+ * one byte of write data. Only applies when the driver is in MTD mode.
+ */
+#define SROM_ERASE_OFF 0xF2000000
+
+#endif /* _SYS_HV_INCLUDE_DRV_SROM_INTF_H */
diff --git a/arch/tile/include/hv/drv_xgbe_impl.h b/arch/tile/include/hv/drv_xgbe_impl.h
new file mode 100644
index 00000000..3a73b2b4
--- /dev/null
+++ b/arch/tile/include/hv/drv_xgbe_impl.h
@@ -0,0 +1,300 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+/**
+ * @file drivers/xgbe/impl.h
+ * Implementation details for the NetIO library.
+ */
+
+#ifndef __DRV_XGBE_IMPL_H__
+#define __DRV_XGBE_IMPL_H__
+
+#include <hv/netio_errors.h>
+#include <hv/netio_intf.h>
+#include <hv/drv_xgbe_intf.h>
+
+
+/** How many groups we have (log2). */
+#define LOG2_NUM_GROUPS (12)
+/** How many groups we have. */
+#define NUM_GROUPS (1 << LOG2_NUM_GROUPS)
+
+/** Number of output requests we'll buffer per tile. */
+#define EPP_REQS_PER_TILE (32)
+
+/** Words used in an eDMA command without checksum acceleration. */
+#define EDMA_WDS_NO_CSUM 8
+/** Words used in an eDMA command with checksum acceleration. */
+#define EDMA_WDS_CSUM 10
+/** Total available words in the eDMA command FIFO. */
+#define EDMA_WDS_TOTAL 128
+
+
+/*
+ * FIXME: These definitions are internal and should have underscores!
+ * NOTE: The actual numeric values here are intentional and allow us to
+ * optimize the concept "if small ... else if large ... else ...", by
+ * checking for the low bit being set, and then for non-zero.
+ * These are used as array indices, so they must have the values (0, 1, 2)
+ * in some order.
+ */
+#define SIZE_SMALL (1) /**< Small packet queue. */
+#define SIZE_LARGE (2) /**< Large packet queue. */
+#define SIZE_JUMBO (0) /**< Jumbo packet queue. */
+
+/** The number of "SIZE_xxx" values. */
+#define NETIO_NUM_SIZES 3
+
+
+/*
+ * Default numbers of packets for IPP drivers. These values are chosen
+ * such that CIPP1 will not overflow its L2 cache.
+ */
+
+/** The default number of small packets. */
+#define NETIO_DEFAULT_SMALL_PACKETS 2750
+/** The default number of large packets. */
+#define NETIO_DEFAULT_LARGE_PACKETS 2500
+/** The default number of jumbo packets. */
+#define NETIO_DEFAULT_JUMBO_PACKETS 250
+
+
+/** Log2 of the size of a memory arena. */
+#define NETIO_ARENA_SHIFT 24 /* 16 MB */
+/** Size of a memory arena. */
+#define NETIO_ARENA_SIZE (1 << NETIO_ARENA_SHIFT)
+
+
+/** A queue of packets.
+ *
+ * This structure partially defines a queue of packets waiting to be
+ * processed. The queue as a whole is written to by an interrupt handler and
+ * read by non-interrupt code; this data structure is what's touched by the
+ * interrupt handler. The other part of the queue state, the read offset, is
+ * kept in user space, not in hypervisor space, so it is in a separate data
+ * structure.
+ *
+ * The read offset (__packet_receive_read in the user part of the queue
+ * structure) points to the next packet to be read. When the read offset is
+ * equal to the write offset, the queue is empty; therefore the queue must
+ * contain one more slot than the required maximum queue size.
+ *
+ * Here's an example of all 3 state variables and what they mean. All
+ * pointers move left to right.
+ *
+ * @code
+ * I I V V V V I I I I
+ * 0 1 2 3 4 5 6 7 8 9 10
+ * ^ ^ ^ ^
+ * | | |
+ * | | __last_packet_plus_one
+ * | __buffer_write
+ * __packet_receive_read
+ * @endcode
+ *
+ * This queue has 10 slots, and thus can hold 9 packets (_last_packet_plus_one
+ * = 10). The read pointer is at 2, and the write pointer is at 6; thus,
+ * there are valid, unread packets in slots 2, 3, 4, and 5. The remaining
+ * slots are invalid (do not contain a packet).
+ */
+typedef struct {
+ /** Byte offset of the next notify packet to be written: zero for the first
+ * packet on the queue, sizeof (netio_pkt_t) for the second packet on the
+ * queue, etc. */
+ volatile uint32_t __packet_write;
+
+ /** Offset of the packet after the last valid packet (i.e., when any
+ * pointer is incremented to this value, it wraps back to zero). */
+ uint32_t __last_packet_plus_one;
+}
+__netio_packet_queue_t;
+
+
+/** A queue of buffers.
+ *
+ * This structure partially defines a queue of empty buffers which have been
+ * obtained via requests to the IPP. (The elements of the queue are packet
+ * handles, which are transformed into a full netio_pkt_t when the buffer is
+ * retrieved.) The queue as a whole is written to by an interrupt handler and
+ * read by non-interrupt code; this data structure is what's touched by the
+ * interrupt handler. The other parts of the queue state, the read offset and
+ * requested write offset, are kept in user space, not in hypervisor space, so
+ * they are in a separate data structure.
+ *
+ * The read offset (__buffer_read in the user part of the queue structure)
+ * points to the next buffer to be read. When the read offset is equal to the
+ * write offset, the queue is empty; therefore the queue must contain one more
+ * slot than the required maximum queue size.
+ *
+ * The requested write offset (__buffer_requested_write in the user part of
+ * the queue structure) points to the slot which will hold the next buffer we
+ * request from the IPP, once we get around to sending such a request. When
+ * the requested write offset is equal to the write offset, no requests for
+ * new buffers are outstanding; when the requested write offset is one greater
+ * than the read offset, no more requests may be sent.
+ *
+ * Note that, unlike the packet_queue, the buffer_queue places incoming
+ * buffers at decreasing addresses. This makes the check for "is it time to
+ * wrap the buffer pointer" cheaper in the assembly code which receives new
+ * buffers, and means that the value which defines the queue size,
+ * __last_buffer, is different than in the packet queue. Also, the offset
+ * used in the packet_queue is already scaled by the size of a packet; here we
+ * use unscaled slot indices for the offsets. (These differences are
+ * historical, and in the future it's possible that the packet_queue will look
+ * more like this queue.)
+ *
+ * @code
+ * Here's an example of all 4 state variables and what they mean. Remember:
+ * all pointers move right to left.
+ *
+ * V V V I I R R V V V
+ * 0 1 2 3 4 5 6 7 8 9
+ * ^ ^ ^ ^
+ * | | | |
+ * | | | __last_buffer
+ * | | __buffer_write
+ * | __buffer_requested_write
+ * __buffer_read
+ * @endcode
+ *
+ * This queue has 10 slots, and thus can hold 9 buffers (_last_buffer = 9).
+ * The read pointer is at 2, and the write pointer is at 6; thus, there are
+ * valid, unread buffers in slots 2, 1, 0, 9, 8, and 7. The requested write
+ * pointer is at 4; thus, requests have been made to the IPP for buffers which
+ * will be placed in slots 6 and 5 when they arrive. Finally, the remaining
+ * slots are invalid (do not contain a buffer).
+ */
+typedef struct
+{
+ /** Ordinal number of the next buffer to be written: 0 for the first slot in
+ * the queue, 1 for the second slot in the queue, etc. */
+ volatile uint32_t __buffer_write;
+
+ /** Ordinal number of the last buffer (i.e., when any pointer is decremented
+ * below zero, it is reloaded with this value). */
+ uint32_t __last_buffer;
+}
+__netio_buffer_queue_t;
+
+
+/**
+ * An object for providing Ethernet packets to a process.
+ */
+typedef struct __netio_queue_impl_t
+{
+ /** The queue of packets waiting to be received. */
+ __netio_packet_queue_t __packet_receive_queue;
+ /** The intr bit mask that IDs this device. */
+ unsigned int __intr_id;
+ /** Offset to queues of empty buffers, one per size. */
+ uint32_t __buffer_queue[NETIO_NUM_SIZES];
+ /** The address of the first EPP tile, or -1 if no EPP. */
+ /* ISSUE: Actually this is always "0" or "~0". */
+ uint32_t __epp_location;
+ /** The queue ID that this queue represents. */
+ unsigned int __queue_id;
+ /** Number of acknowledgements received. */
+ volatile uint32_t __acks_received;
+ /** Last completion number received for packet_sendv. */
+ volatile uint32_t __last_completion_rcv;
+ /** Number of packets allowed to be outstanding. */
+ uint32_t __max_outstanding;
+ /** First VA available for packets. */
+ void* __va_0;
+ /** First VA in second range available for packets. */
+ void* __va_1;
+ /** Padding to align the "__packets" field to the size of a netio_pkt_t. */
+ uint32_t __padding[3];
+ /** The packets themselves. */
+ netio_pkt_t __packets[0];
+}
+netio_queue_impl_t;
+
+
+/**
+ * An object for managing the user end of a NetIO queue.
+ */
+typedef struct __netio_queue_user_impl_t
+{
+ /** The next incoming packet to be read. */
+ uint32_t __packet_receive_read;
+ /** The next empty buffers to be read, one index per size. */
+ uint8_t __buffer_read[NETIO_NUM_SIZES];
+ /** Where the empty buffer we next request from the IPP will go, one index
+ * per size. */
+ uint8_t __buffer_requested_write[NETIO_NUM_SIZES];
+ /** PCIe interface flag. */
+ uint8_t __pcie;
+ /** Number of packets left to be received before we send a credit update. */
+ uint32_t __receive_credit_remaining;
+ /** Value placed in __receive_credit_remaining when it reaches zero. */
+ uint32_t __receive_credit_interval;
+ /** First fast I/O routine index. */
+ uint32_t __fastio_index;
+ /** Number of acknowledgements expected. */
+ uint32_t __acks_outstanding;
+ /** Last completion number requested. */
+ uint32_t __last_completion_req;
+ /** File descriptor for driver. */
+ int __fd;
+}
+netio_queue_user_impl_t;
+
+
+#define NETIO_GROUP_CHUNK_SIZE 64 /**< Max # groups in one IPP request */
+#define NETIO_BUCKET_CHUNK_SIZE 64 /**< Max # buckets in one IPP request */
+
+
+/** Internal structure used to convey packet send information to the
+ * hypervisor. FIXME: Actually, it's not used for that anymore, but
+ * netio_packet_send() still uses it internally.
+ */
+typedef struct
+{
+ uint16_t flags; /**< Packet flags (__NETIO_SEND_FLG_xxx) */
+ uint16_t transfer_size; /**< Size of packet */
+ uint32_t va; /**< VA of start of packet */
+ __netio_pkt_handle_t handle; /**< Packet handle */
+ uint32_t csum0; /**< First checksum word */
+ uint32_t csum1; /**< Second checksum word */
+}
+__netio_send_cmd_t;
+
+
+/** Flags used in two contexts:
+ * - As the "flags" member in the __netio_send_cmd_t, above; used only
+ * for netio_pkt_send_{prepare,commit}.
+ * - As part of the flags passed to the various send packet fast I/O calls.
+ */
+
+/** Need acknowledgement on this packet. Note that some code in the
+ * normal send_pkt fast I/O handler assumes that this is equal to 1. */
+#define __NETIO_SEND_FLG_ACK 0x1
+
+/** Do checksum on this packet. (Only used with the __netio_send_cmd_t;
+ * normal packet sends use a special fast I/O index to denote checksumming,
+ * and multi-segment sends test the checksum descriptor.) */
+#define __NETIO_SEND_FLG_CSUM 0x2
+
+/** Get a completion on this packet. Only used with multi-segment sends. */
+#define __NETIO_SEND_FLG_COMPLETION 0x4
+
+/** Position of the number-of-extra-segments value in the flags word.
+ Only used with multi-segment sends. */
+#define __NETIO_SEND_FLG_XSEG_SHIFT 3
+
+/** Width of the number-of-extra-segments value in the flags word. */
+#define __NETIO_SEND_FLG_XSEG_WIDTH 2
+
+#endif /* __DRV_XGBE_IMPL_H__ */
diff --git a/arch/tile/include/hv/drv_xgbe_intf.h b/arch/tile/include/hv/drv_xgbe_intf.h
new file mode 100644
index 00000000..f13188ac
--- /dev/null
+++ b/arch/tile/include/hv/drv_xgbe_intf.h
@@ -0,0 +1,615 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+/**
+ * @file drv_xgbe_intf.h
+ * Interface to the hypervisor XGBE driver.
+ */
+
+#ifndef __DRV_XGBE_INTF_H__
+#define __DRV_XGBE_INTF_H__
+
+/**
+ * An object for forwarding VAs and PAs to the hypervisor.
+ * @ingroup types
+ *
+ * This allows the supervisor to specify a number of areas of memory to
+ * store packet buffers.
+ */
+typedef struct
+{
+ /** The physical address of the memory. */
+ HV_PhysAddr pa;
+ /** Page table entry for the memory. This is only used to derive the
+ * memory's caching mode; the PA bits are ignored. */
+ HV_PTE pte;
+ /** The virtual address of the memory. */
+ HV_VirtAddr va;
+ /** Size (in bytes) of the memory area. */
+ int size;
+
+}
+netio_ipp_address_t;
+
+/** The various pread/pwrite offsets into the hypervisor-level driver.
+ * @ingroup types
+ */
+typedef enum
+{
+ /** Inform the Linux driver of the address of the NetIO arena memory.
+ * This offset is actually only used to convey information from netio
+ * to the Linux driver; it never makes it from there to the hypervisor.
+ * Write-only; takes a uint32_t specifying the VA address. */
+ NETIO_FIXED_ADDR = 0x5000000000000000ULL,
+
+ /** Inform the Linux driver of the size of the NetIO arena memory.
+ * This offset is actually only used to convey information from netio
+ * to the Linux driver; it never makes it from there to the hypervisor.
+ * Write-only; takes a uint32_t specifying the VA size. */
+ NETIO_FIXED_SIZE = 0x5100000000000000ULL,
+
+ /** Register current tile with IPP. Write then read: write, takes a
+ * netio_input_config_t, read returns a pointer to a netio_queue_impl_t. */
+ NETIO_IPP_INPUT_REGISTER_OFF = 0x6000000000000000ULL,
+
+ /** Unregister current tile from IPP. Write-only, takes a dummy argument. */
+ NETIO_IPP_INPUT_UNREGISTER_OFF = 0x6100000000000000ULL,
+
+ /** Start packets flowing. Write-only, takes a dummy argument. */
+ NETIO_IPP_INPUT_INIT_OFF = 0x6200000000000000ULL,
+
+ /** Stop packets flowing. Write-only, takes a dummy argument. */
+ NETIO_IPP_INPUT_UNINIT_OFF = 0x6300000000000000ULL,
+
+ /** Configure group (typically we group on VLAN). Write-only: takes an
+ * array of netio_group_t's, low 24 bits of the offset is the base group
+ * number times the size of a netio_group_t. */
+ NETIO_IPP_INPUT_GROUP_CFG_OFF = 0x6400000000000000ULL,
+
+ /** Configure bucket. Write-only: takes an array of netio_bucket_t's, low
+ * 24 bits of the offset is the base bucket number times the size of a
+ * netio_bucket_t. */
+ NETIO_IPP_INPUT_BUCKET_CFG_OFF = 0x6500000000000000ULL,
+
+ /** Get/set a parameter. Read or write: read or write data is the parameter
+ * value, low 32 bits of the offset is a __netio_getset_offset_t. */
+ NETIO_IPP_PARAM_OFF = 0x6600000000000000ULL,
+
+ /** Get fast I/O index. Read-only; returns a 4-byte base index value. */
+ NETIO_IPP_GET_FASTIO_OFF = 0x6700000000000000ULL,
+
+ /** Configure hijack IP address. Packets with this IPv4 dest address
+ * go to bucket NETIO_NUM_BUCKETS - 1. Write-only: takes an IP address
+ * in some standard form. FIXME: Define the form! */
+ NETIO_IPP_INPUT_HIJACK_CFG_OFF = 0x6800000000000000ULL,
+
+ /**
+ * Offsets beyond this point are reserved for the supervisor (although that
+ * enforcement must be done by the supervisor driver itself).
+ */
+ NETIO_IPP_USER_MAX_OFF = 0x6FFFFFFFFFFFFFFFULL,
+
+ /** Register I/O memory. Write-only, takes a netio_ipp_address_t. */
+ NETIO_IPP_IOMEM_REGISTER_OFF = 0x7000000000000000ULL,
+
+ /** Unregister I/O memory. Write-only, takes a netio_ipp_address_t. */
+ NETIO_IPP_IOMEM_UNREGISTER_OFF = 0x7100000000000000ULL,
+
+ /* Offsets greater than 0x7FFFFFFF can't be used directly from Linux
+ * userspace code due to limitations in the pread/pwrite syscalls. */
+
+ /** Drain LIPP buffers. */
+ NETIO_IPP_DRAIN_OFF = 0xFA00000000000000ULL,
+
+ /** Supply a netio_ipp_address_t to be used as shared memory for the
+ * LEPP command queue. */
+ NETIO_EPP_SHM_OFF = 0xFB00000000000000ULL,
+
+ /* 0xFC... is currently unused. */
+
+ /** Stop IPP/EPP tiles. Write-only, takes a dummy argument. */
+ NETIO_IPP_STOP_SHIM_OFF = 0xFD00000000000000ULL,
+
+ /** Start IPP/EPP tiles. Write-only, takes a dummy argument. */
+ NETIO_IPP_START_SHIM_OFF = 0xFE00000000000000ULL,
+
+ /** Supply packet arena. Write-only, takes an array of
+ * netio_ipp_address_t values. */
+ NETIO_IPP_ADDRESS_OFF = 0xFF00000000000000ULL,
+} netio_hv_offset_t;
+
+/** Extract the base offset from an offset */
+#define NETIO_BASE_OFFSET(off) ((off) & 0xFF00000000000000ULL)
+/** Extract the local offset from an offset */
+#define NETIO_LOCAL_OFFSET(off) ((off) & 0x00FFFFFFFFFFFFFFULL)
+
+
+/**
+ * Get/set offset.
+ */
+typedef union
+{
+ struct
+ {
+ uint64_t addr:48; /**< Class-specific address */
+ unsigned int class:8; /**< Class (e.g., NETIO_PARAM) */
+ unsigned int opcode:8; /**< High 8 bits of NETIO_IPP_PARAM_OFF */
+ }
+ bits; /**< Bitfields */
+ uint64_t word; /**< Aggregated value to use as the offset */
+}
+__netio_getset_offset_t;
+
+/**
+ * Fast I/O index offsets (must be contiguous).
+ */
+typedef enum
+{
+ NETIO_FASTIO_ALLOCATE = 0, /**< Get empty packet buffer */
+ NETIO_FASTIO_FREE_BUFFER = 1, /**< Give buffer back to IPP */
+ NETIO_FASTIO_RETURN_CREDITS = 2, /**< Give credits to IPP */
+ NETIO_FASTIO_SEND_PKT_NOCK = 3, /**< Send a packet, no checksum */
+ NETIO_FASTIO_SEND_PKT_CK = 4, /**< Send a packet, with checksum */
+ NETIO_FASTIO_SEND_PKT_VEC = 5, /**< Send a vector of packets */
+ NETIO_FASTIO_SENDV_PKT = 6, /**< Sendv one packet */
+ NETIO_FASTIO_NUM_INDEX = 7, /**< Total number of fast I/O indices */
+} netio_fastio_index_t;
+
+/** 3-word return type for Fast I/O call. */
+typedef struct
+{
+ int err; /**< Error code. */
+ uint32_t val0; /**< Value. Meaning depends upon the specific call. */
+ uint32_t val1; /**< Value. Meaning depends upon the specific call. */
+} netio_fastio_rv3_t;
+
+/** 0-argument fast I/O call */
+int __netio_fastio0(uint32_t fastio_index);
+/** 1-argument fast I/O call */
+int __netio_fastio1(uint32_t fastio_index, uint32_t arg0);
+/** 3-argument fast I/O call, 2-word return value */
+netio_fastio_rv3_t __netio_fastio3_rv3(uint32_t fastio_index, uint32_t arg0,
+ uint32_t arg1, uint32_t arg2);
+/** 4-argument fast I/O call */
+int __netio_fastio4(uint32_t fastio_index, uint32_t arg0, uint32_t arg1,
+ uint32_t arg2, uint32_t arg3);
+/** 6-argument fast I/O call */
+int __netio_fastio6(uint32_t fastio_index, uint32_t arg0, uint32_t arg1,
+ uint32_t arg2, uint32_t arg3, uint32_t arg4, uint32_t arg5);
+/** 9-argument fast I/O call */
+int __netio_fastio9(uint32_t fastio_index, uint32_t arg0, uint32_t arg1,
+ uint32_t arg2, uint32_t arg3, uint32_t arg4, uint32_t arg5,
+ uint32_t arg6, uint32_t arg7, uint32_t arg8);
+
+/** Allocate an empty packet.
+ * @param fastio_index Fast I/O index.
+ * @param size Size of the packet to allocate.
+ */
+#define __netio_fastio_allocate(fastio_index, size) \
+ __netio_fastio1((fastio_index) + NETIO_FASTIO_ALLOCATE, size)
+
+/** Free a buffer.
+ * @param fastio_index Fast I/O index.
+ * @param handle Handle for the packet to free.
+ */
+#define __netio_fastio_free_buffer(fastio_index, handle) \
+ __netio_fastio1((fastio_index) + NETIO_FASTIO_FREE_BUFFER, handle)
+
+/** Increment our receive credits.
+ * @param fastio_index Fast I/O index.
+ * @param credits Number of credits to add.
+ */
+#define __netio_fastio_return_credits(fastio_index, credits) \
+ __netio_fastio1((fastio_index) + NETIO_FASTIO_RETURN_CREDITS, credits)
+
+/** Send packet, no checksum.
+ * @param fastio_index Fast I/O index.
+ * @param ackflag Nonzero if we want an ack.
+ * @param size Size of the packet.
+ * @param va Virtual address of start of packet.
+ * @param handle Packet handle.
+ */
+#define __netio_fastio_send_pkt_nock(fastio_index, ackflag, size, va, handle) \
+ __netio_fastio4((fastio_index) + NETIO_FASTIO_SEND_PKT_NOCK, ackflag, \
+ size, va, handle)
+
+/** Send packet, calculate checksum.
+ * @param fastio_index Fast I/O index.
+ * @param ackflag Nonzero if we want an ack.
+ * @param size Size of the packet.
+ * @param va Virtual address of start of packet.
+ * @param handle Packet handle.
+ * @param csum0 Shim checksum header.
+ * @param csum1 Checksum seed.
+ */
+#define __netio_fastio_send_pkt_ck(fastio_index, ackflag, size, va, handle, \
+ csum0, csum1) \
+ __netio_fastio6((fastio_index) + NETIO_FASTIO_SEND_PKT_CK, ackflag, \
+ size, va, handle, csum0, csum1)
+
+
+/** Format for the "csum0" argument to the __netio_fastio_send routines
+ * and LEPP. Note that this is currently exactly identical to the
+ * ShimProtocolOffloadHeader.
+ */
+typedef union
+{
+ struct
+ {
+ unsigned int start_byte:7; /**< The first byte to be checksummed */
+ unsigned int count:14; /**< Number of bytes to be checksummed. */
+ unsigned int destination_byte:7; /**< The byte to write the checksum to. */
+ unsigned int reserved:4; /**< Reserved. */
+ } bits; /**< Decomposed method of access. */
+ unsigned int word; /**< To send out the IDN. */
+} __netio_checksum_header_t;
+
+
+/** Sendv packet with 1 or 2 segments.
+ * @param fastio_index Fast I/O index.
+ * @param flags Ack/csum/notify flags in low 3 bits; number of segments minus
+ * 1 in next 2 bits; expected checksum in high 16 bits.
+ * @param confno Confirmation number to request, if notify flag set.
+ * @param csum0 Checksum descriptor; if zero, no checksum.
+ * @param va_F Virtual address of first segment.
+ * @param va_L Virtual address of last segment, if 2 segments.
+ * @param len_F_L Length of first segment in low 16 bits; length of last
+ * segment, if 2 segments, in high 16 bits.
+ */
+#define __netio_fastio_sendv_pkt_1_2(fastio_index, flags, confno, csum0, \
+ va_F, va_L, len_F_L) \
+ __netio_fastio6((fastio_index) + NETIO_FASTIO_SENDV_PKT, flags, confno, \
+ csum0, va_F, va_L, len_F_L)
+
+/** Send packet on PCIe interface.
+ * @param fastio_index Fast I/O index.
+ * @param flags Ack/csum/notify flags in low 3 bits.
+ * @param confno Confirmation number to request, if notify flag set.
+ * @param csum0 Checksum descriptor; Hard wired 0, not needed for PCIe.
+ * @param va_F Virtual address of the packet buffer.
+ * @param va_L Virtual address of last segment, if 2 segments. Hard wired 0.
+ * @param len_F_L Length of the packet buffer in low 16 bits.
+ */
+#define __netio_fastio_send_pcie_pkt(fastio_index, flags, confno, csum0, \
+ va_F, va_L, len_F_L) \
+ __netio_fastio6((fastio_index) + PCIE_FASTIO_SENDV_PKT, flags, confno, \
+ csum0, va_F, va_L, len_F_L)
+
+/** Sendv packet with 3 or 4 segments.
+ * @param fastio_index Fast I/O index.
+ * @param flags Ack/csum/notify flags in low 3 bits; number of segments minus
+ * 1 in next 2 bits; expected checksum in high 16 bits.
+ * @param confno Confirmation number to request, if notify flag set.
+ * @param csum0 Checksum descriptor; if zero, no checksum.
+ * @param va_F Virtual address of first segment.
+ * @param va_L Virtual address of last segment (third segment if 3 segments,
+ * fourth segment if 4 segments).
+ * @param len_F_L Length of first segment in low 16 bits; length of last
+ * segment in high 16 bits.
+ * @param va_M0 Virtual address of "middle 0" segment; this segment is sent
+ * second when there are three segments, and third if there are four.
+ * @param va_M1 Virtual address of "middle 1" segment; this segment is sent
+ * second when there are four segments.
+ * @param len_M0_M1 Length of middle 0 segment in low 16 bits; length of middle
+ * 1 segment, if 4 segments, in high 16 bits.
+ */
+#define __netio_fastio_sendv_pkt_3_4(fastio_index, flags, confno, csum0, va_F, \
+ va_L, len_F_L, va_M0, va_M1, len_M0_M1) \
+ __netio_fastio9((fastio_index) + NETIO_FASTIO_SENDV_PKT, flags, confno, \
+ csum0, va_F, va_L, len_F_L, va_M0, va_M1, len_M0_M1)
+
+/** Send vector of packets.
+ * @param fastio_index Fast I/O index.
+ * @param seqno Number of packets transmitted so far on this interface;
+ * used to decide which packets should be acknowledged.
+ * @param nentries Number of entries in vector.
+ * @param va Virtual address of start of vector entry array.
+ * @return 3-word netio_fastio_rv3_t structure. The structure's err member
+ * is an error code, or zero if no error. The val0 member is the
+ * updated value of seqno; it has been incremented by 1 for each
+ * packet sent. That increment may be less than nentries if an
+ * error occurred, or if some of the entries in the vector contain
+ * handles equal to NETIO_PKT_HANDLE_NONE. The val1 member is the
+ * updated value of nentries; it has been decremented by 1 for each
+ * vector entry processed. Again, that decrement may be less than
+ * nentries (leaving the returned value positive) if an error
+ * occurred.
+ */
+#define __netio_fastio_send_pkt_vec(fastio_index, seqno, nentries, va) \
+ __netio_fastio3_rv3((fastio_index) + NETIO_FASTIO_SEND_PKT_VEC, seqno, \
+ nentries, va)
+
+
+/** An egress DMA command for LEPP. */
+typedef struct
+{
+ /** Is this a TSO transfer?
+ *
+ * NOTE: This field is always 0, to distinguish it from
+ * lepp_tso_cmd_t. It must come first!
+ */
+ uint8_t tso : 1;
+
+ /** Unused padding bits. */
+ uint8_t _unused : 3;
+
+ /** Should this packet be sent directly from caches instead of DRAM,
+ * using hash-for-home to locate the packet data?
+ */
+ uint8_t hash_for_home : 1;
+
+ /** Should we compute a checksum? */
+ uint8_t compute_checksum : 1;
+
+ /** Is this the final buffer for this packet?
+ *
+ * A single packet can be split over several input buffers (a "gather"
+ * operation). This flag indicates that this is the last buffer
+ * in a packet.
+ */
+ uint8_t end_of_packet : 1;
+
+ /** Should LEPP advance 'comp_busy' when this DMA is fully finished? */
+ uint8_t send_completion : 1;
+
+ /** High bits of Client Physical Address of the start of the buffer
+ * to be egressed.
+ *
+ * NOTE: Only 6 bits are actually needed here, as CPAs are
+ * currently 38 bits. So two bits could be scavenged from this.
+ */
+ uint8_t cpa_hi;
+
+ /** The number of bytes to be egressed. */
+ uint16_t length;
+
+ /** Low 32 bits of Client Physical Address of the start of the buffer
+ * to be egressed.
+ */
+ uint32_t cpa_lo;
+
+ /** Checksum information (only used if 'compute_checksum'). */
+ __netio_checksum_header_t checksum_data;
+
+} lepp_cmd_t;
+
+
+/** A chunk of physical memory for a TSO egress. */
+typedef struct
+{
+ /** The low bits of the CPA. */
+ uint32_t cpa_lo;
+ /** The high bits of the CPA. */
+ uint16_t cpa_hi : 15;
+ /** Should this packet be sent directly from caches instead of DRAM,
+ * using hash-for-home to locate the packet data?
+ */
+ uint16_t hash_for_home : 1;
+ /** The length in bytes. */
+ uint16_t length;
+} lepp_frag_t;
+
+
+/** An LEPP command that handles TSO. */
+typedef struct
+{
+ /** Is this a TSO transfer?
+ *
+ * NOTE: This field is always 1, to distinguish it from
+ * lepp_cmd_t. It must come first!
+ */
+ uint8_t tso : 1;
+
+ /** Unused padding bits. */
+ uint8_t _unused : 7;
+
+ /** Size of the header[] array in bytes. It must be in the range
+ * [40, 127], which are the smallest header for a TCP packet over
+ * Ethernet and the maximum possible prepend size supported by
+ * hardware, respectively. Note that the array storage must be
+ * padded out to a multiple of four bytes so that the following
+ * LEPP command is aligned properly.
+ */
+ uint8_t header_size;
+
+ /** Byte offset of the IP header in header[]. */
+ uint8_t ip_offset;
+
+ /** Byte offset of the TCP header in header[]. */
+ uint8_t tcp_offset;
+
+ /** The number of bytes to use for the payload of each packet,
+ * except of course the last one, which may not have enough bytes.
+ * This means that each Ethernet packet except the last will have a
+ * size of header_size + payload_size.
+ */
+ uint16_t payload_size;
+
+ /** The length of the 'frags' array that follows this struct. */
+ uint16_t num_frags;
+
+ /** The actual frags. */
+ lepp_frag_t frags[0 /* Variable-sized; num_frags entries. */];
+
+ /*
+ * The packet header template logically follows frags[],
+ * but you can't declare that in C.
+ *
+ * uint32_t header[header_size_in_words_rounded_up];
+ */
+
+} lepp_tso_cmd_t;
+
+
+/** An LEPP completion ring entry. */
+typedef void* lepp_comp_t;
+
+
+/** Maximum number of frags for one TSO command. This is adapted from
+ * linux's "MAX_SKB_FRAGS", and presumably over-estimates by one, for
+ * our page size of exactly 65536. We add one for a "body" fragment.
+ */
+#define LEPP_MAX_FRAGS (65536 / HV_PAGE_SIZE_SMALL + 2 + 1)
+
+/** Total number of bytes needed for an lepp_tso_cmd_t. */
+#define LEPP_TSO_CMD_SIZE(num_frags, header_size) \
+ (sizeof(lepp_tso_cmd_t) + \
+ (num_frags) * sizeof(lepp_frag_t) + \
+ (((header_size) + 3) & -4))
+
+/** The size of the lepp "cmd" queue. */
+#define LEPP_CMD_QUEUE_BYTES \
+ (((CHIP_L2_CACHE_SIZE() - 2 * CHIP_L2_LINE_SIZE()) / \
+ (sizeof(lepp_cmd_t) + sizeof(lepp_comp_t))) * sizeof(lepp_cmd_t))
+
+/** The largest possible command that can go in lepp_queue_t::cmds[]. */
+#define LEPP_MAX_CMD_SIZE LEPP_TSO_CMD_SIZE(LEPP_MAX_FRAGS, 128)
+
+/** The largest possible value of lepp_queue_t::cmd_{head, tail} (inclusive).
+ */
+#define LEPP_CMD_LIMIT \
+ (LEPP_CMD_QUEUE_BYTES - LEPP_MAX_CMD_SIZE)
+
+/** The maximum number of completions in an LEPP queue. */
+#define LEPP_COMP_QUEUE_SIZE \
+ ((LEPP_CMD_LIMIT + sizeof(lepp_cmd_t) - 1) / sizeof(lepp_cmd_t))
+
+/** Increment an index modulo the queue size. */
+#define LEPP_QINC(var) \
+ (var = __insn_mnz(var - (LEPP_COMP_QUEUE_SIZE - 1), var + 1))
+
+/** A queue used to convey egress commands from the client to LEPP. */
+typedef struct
+{
+ /** Index of first completion not yet processed by user code.
+ * If this is equal to comp_busy, there are no such completions.
+ *
+ * NOTE: This is only read/written by the user.
+ */
+ unsigned int comp_head;
+
+ /** Index of first completion record not yet completed.
+ * If this is equal to comp_tail, there are no such completions.
+ * This index gets advanced (modulo LEPP_QUEUE_SIZE) whenever
+ * a command with the 'completion' bit set is finished.
+ *
+ * NOTE: This is only written by LEPP, only read by the user.
+ */
+ volatile unsigned int comp_busy;
+
+ /** Index of the first empty slot in the completion ring.
+ * Entries from this up to but not including comp_head (in ring order)
+ * can be filled in with completion data.
+ *
+ * NOTE: This is only read/written by the user.
+ */
+ unsigned int comp_tail;
+
+ /** Byte index of first command enqueued for LEPP but not yet processed.
+ *
+ * This is always divisible by sizeof(void*) and always <= LEPP_CMD_LIMIT.
+ *
+ * NOTE: LEPP advances this counter as soon as it no longer needs
+ * the cmds[] storage for this entry, but the transfer is not actually
+ * complete (i.e. the buffer pointed to by the command is no longer
+ * needed) until comp_busy advances.
+ *
+ * If this is equal to cmd_tail, the ring is empty.
+ *
+ * NOTE: This is only written by LEPP, only read by the user.
+ */
+ volatile unsigned int cmd_head;
+
+ /** Byte index of first empty slot in the command ring. This field can
+ * be incremented up to but not equal to cmd_head (because that would
+ * mean the ring is empty).
+ *
+ * This is always divisible by sizeof(void*) and always <= LEPP_CMD_LIMIT.
+ *
+ * NOTE: This is read/written by the user, only read by LEPP.
+ */
+ volatile unsigned int cmd_tail;
+
+ /** A ring of variable-sized egress DMA commands.
+ *
+ * NOTE: Only written by the user, only read by LEPP.
+ */
+ char cmds[LEPP_CMD_QUEUE_BYTES]
+ __attribute__((aligned(CHIP_L2_LINE_SIZE())));
+
+ /** A ring of user completion data.
+ * NOTE: Only read/written by the user.
+ */
+ lepp_comp_t comps[LEPP_COMP_QUEUE_SIZE]
+ __attribute__((aligned(CHIP_L2_LINE_SIZE())));
+} lepp_queue_t;
+
+
+/** An internal helper function for determining the number of entries
+ * available in a ring buffer, given that there is one sentinel.
+ */
+static inline unsigned int
+_lepp_num_free_slots(unsigned int head, unsigned int tail)
+{
+ /*
+ * One entry is reserved for use as a sentinel, to distinguish
+ * "empty" from "full". So we compute
+ * (head - tail - 1) % LEPP_QUEUE_SIZE, but without using a slow % operation.
+ */
+ return (head - tail - 1) + ((head <= tail) ? LEPP_COMP_QUEUE_SIZE : 0);
+}
+
+
+/** Returns how many new comp entries can be enqueued. */
+static inline unsigned int
+lepp_num_free_comp_slots(const lepp_queue_t* q)
+{
+ return _lepp_num_free_slots(q->comp_head, q->comp_tail);
+}
+
+static inline int
+lepp_qsub(int v1, int v2)
+{
+ int delta = v1 - v2;
+ return delta + ((delta >> 31) & LEPP_COMP_QUEUE_SIZE);
+}
+
+
+/** FIXME: Check this from linux, via a new "pwrite()" call. */
+#define LIPP_VERSION 1
+
+
+/** We use exactly two bytes of alignment padding. */
+#define LIPP_PACKET_PADDING 2
+
+/** The minimum size of a "small" buffer (including the padding). */
+#define LIPP_SMALL_PACKET_SIZE 128
+
+/*
+ * NOTE: The following two values should total to less than around
+ * 13582, to keep the total size used for "lipp_state_t" below 64K.
+ */
+
+/** The maximum number of "small" buffers.
+ * This is enough for 53 network cpus with 128 credits. Note that
+ * if these are exhausted, we will fall back to using large buffers.
+ */
+#define LIPP_SMALL_BUFFERS 6785
+
+/** The maximum number of "large" buffers.
+ * This is enough for 53 network cpus with 128 credits.
+ */
+#define LIPP_LARGE_BUFFERS 6785
+
+#endif /* __DRV_XGBE_INTF_H__ */
diff --git a/arch/tile/include/hv/hypervisor.h b/arch/tile/include/hv/hypervisor.h
new file mode 100644
index 00000000..72ec1e97
--- /dev/null
+++ b/arch/tile/include/hv/hypervisor.h
@@ -0,0 +1,2427 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+/**
+ * @file hypervisor.h
+ * The hypervisor's public API.
+ */
+
+#ifndef _TILE_HV_H
+#define _TILE_HV_H
+
+#include <arch/chip.h>
+
+/* Linux builds want unsigned long constants, but assembler wants numbers */
+#ifdef __ASSEMBLER__
+/** One, for assembler */
+#define __HV_SIZE_ONE 1
+#elif !defined(__tile__) && CHIP_VA_WIDTH() > 32
+/** One, for 64-bit on host */
+#define __HV_SIZE_ONE 1ULL
+#else
+/** One, for Linux */
+#define __HV_SIZE_ONE 1UL
+#endif
+
+/** The log2 of the span of a level-1 page table, in bytes.
+ */
+#define HV_LOG2_L1_SPAN 32
+
+/** The span of a level-1 page table, in bytes.
+ */
+#define HV_L1_SPAN (__HV_SIZE_ONE << HV_LOG2_L1_SPAN)
+
+/** The log2 of the size of small pages, in bytes. This value should
+ * be verified at runtime by calling hv_sysconf(HV_SYSCONF_PAGE_SIZE_SMALL).
+ */
+#define HV_LOG2_PAGE_SIZE_SMALL 16
+
+/** The size of small pages, in bytes. This value should be verified
+ * at runtime by calling hv_sysconf(HV_SYSCONF_PAGE_SIZE_SMALL).
+ */
+#define HV_PAGE_SIZE_SMALL (__HV_SIZE_ONE << HV_LOG2_PAGE_SIZE_SMALL)
+
+/** The log2 of the size of large pages, in bytes. This value should be
+ * verified at runtime by calling hv_sysconf(HV_SYSCONF_PAGE_SIZE_LARGE).
+ */
+#define HV_LOG2_PAGE_SIZE_LARGE 24
+
+/** The size of large pages, in bytes. This value should be verified
+ * at runtime by calling hv_sysconf(HV_SYSCONF_PAGE_SIZE_LARGE).
+ */
+#define HV_PAGE_SIZE_LARGE (__HV_SIZE_ONE << HV_LOG2_PAGE_SIZE_LARGE)
+
+/** The log2 of the granularity at which page tables must be aligned;
+ * in other words, the CPA for a page table must have this many zero
+ * bits at the bottom of the address.
+ */
+#define HV_LOG2_PAGE_TABLE_ALIGN 11
+
+/** The granularity at which page tables must be aligned.
+ */
+#define HV_PAGE_TABLE_ALIGN (__HV_SIZE_ONE << HV_LOG2_PAGE_TABLE_ALIGN)
+
+/** Normal start of hypervisor glue in client physical memory. */
+#define HV_GLUE_START_CPA 0x10000
+
+/** This much space is reserved at HV_GLUE_START_CPA
+ * for the hypervisor glue. The client program must start at
+ * some address higher than this, and in particular the address of
+ * its text section should be equal to zero modulo HV_PAGE_SIZE_LARGE
+ * so that relative offsets to the HV glue are correct.
+ */
+#define HV_GLUE_RESERVED_SIZE 0x10000
+
+/** Each entry in the hv dispatch array takes this many bytes. */
+#define HV_DISPATCH_ENTRY_SIZE 32
+
+/** Version of the hypervisor interface defined by this file */
+#define _HV_VERSION 11
+
+/* Index into hypervisor interface dispatch code blocks.
+ *
+ * Hypervisor calls are invoked from user space by calling code
+ * at an address HV_BASE_ADDRESS + (index) * HV_DISPATCH_ENTRY_SIZE,
+ * where index is one of these enum values.
+ *
+ * Normally a supervisor is expected to produce a set of symbols
+ * starting at HV_BASE_ADDRESS that obey this convention, but a user
+ * program could call directly through function pointers if desired.
+ *
+ * These numbers are part of the binary API and will not be changed
+ * without updating HV_VERSION, which should be a rare event.
+ */
+
+/** reserved. */
+#define _HV_DISPATCH_RESERVED 0
+
+/** hv_init */
+#define HV_DISPATCH_INIT 1
+
+/** hv_install_context */
+#define HV_DISPATCH_INSTALL_CONTEXT 2
+
+/** hv_sysconf */
+#define HV_DISPATCH_SYSCONF 3
+
+/** hv_get_rtc */
+#define HV_DISPATCH_GET_RTC 4
+
+/** hv_set_rtc */
+#define HV_DISPATCH_SET_RTC 5
+
+/** hv_flush_asid */
+#define HV_DISPATCH_FLUSH_ASID 6
+
+/** hv_flush_page */
+#define HV_DISPATCH_FLUSH_PAGE 7
+
+/** hv_flush_pages */
+#define HV_DISPATCH_FLUSH_PAGES 8
+
+/** hv_restart */
+#define HV_DISPATCH_RESTART 9
+
+/** hv_halt */
+#define HV_DISPATCH_HALT 10
+
+/** hv_power_off */
+#define HV_DISPATCH_POWER_OFF 11
+
+/** hv_inquire_physical */
+#define HV_DISPATCH_INQUIRE_PHYSICAL 12
+
+/** hv_inquire_memory_controller */
+#define HV_DISPATCH_INQUIRE_MEMORY_CONTROLLER 13
+
+/** hv_inquire_virtual */
+#define HV_DISPATCH_INQUIRE_VIRTUAL 14
+
+/** hv_inquire_asid */
+#define HV_DISPATCH_INQUIRE_ASID 15
+
+/** hv_nanosleep */
+#define HV_DISPATCH_NANOSLEEP 16
+
+/** hv_console_read_if_ready */
+#define HV_DISPATCH_CONSOLE_READ_IF_READY 17
+
+/** hv_console_write */
+#define HV_DISPATCH_CONSOLE_WRITE 18
+
+/** hv_downcall_dispatch */
+#define HV_DISPATCH_DOWNCALL_DISPATCH 19
+
+/** hv_inquire_topology */
+#define HV_DISPATCH_INQUIRE_TOPOLOGY 20
+
+/** hv_fs_findfile */
+#define HV_DISPATCH_FS_FINDFILE 21
+
+/** hv_fs_fstat */
+#define HV_DISPATCH_FS_FSTAT 22
+
+/** hv_fs_pread */
+#define HV_DISPATCH_FS_PREAD 23
+
+/** hv_physaddr_read64 */
+#define HV_DISPATCH_PHYSADDR_READ64 24
+
+/** hv_physaddr_write64 */
+#define HV_DISPATCH_PHYSADDR_WRITE64 25
+
+/** hv_get_command_line */
+#define HV_DISPATCH_GET_COMMAND_LINE 26
+
+/** hv_set_caching */
+#define HV_DISPATCH_SET_CACHING 27
+
+/** hv_bzero_page */
+#define HV_DISPATCH_BZERO_PAGE 28
+
+/** hv_register_message_state */
+#define HV_DISPATCH_REGISTER_MESSAGE_STATE 29
+
+/** hv_send_message */
+#define HV_DISPATCH_SEND_MESSAGE 30
+
+/** hv_receive_message */
+#define HV_DISPATCH_RECEIVE_MESSAGE 31
+
+/** hv_inquire_context */
+#define HV_DISPATCH_INQUIRE_CONTEXT 32
+
+/** hv_start_all_tiles */
+#define HV_DISPATCH_START_ALL_TILES 33
+
+/** hv_dev_open */
+#define HV_DISPATCH_DEV_OPEN 34
+
+/** hv_dev_close */
+#define HV_DISPATCH_DEV_CLOSE 35
+
+/** hv_dev_pread */
+#define HV_DISPATCH_DEV_PREAD 36
+
+/** hv_dev_pwrite */
+#define HV_DISPATCH_DEV_PWRITE 37
+
+/** hv_dev_poll */
+#define HV_DISPATCH_DEV_POLL 38
+
+/** hv_dev_poll_cancel */
+#define HV_DISPATCH_DEV_POLL_CANCEL 39
+
+/** hv_dev_preada */
+#define HV_DISPATCH_DEV_PREADA 40
+
+/** hv_dev_pwritea */
+#define HV_DISPATCH_DEV_PWRITEA 41
+
+/** hv_flush_remote */
+#define HV_DISPATCH_FLUSH_REMOTE 42
+
+/** hv_console_putc */
+#define HV_DISPATCH_CONSOLE_PUTC 43
+
+/** hv_inquire_tiles */
+#define HV_DISPATCH_INQUIRE_TILES 44
+
+/** hv_confstr */
+#define HV_DISPATCH_CONFSTR 45
+
+/** hv_reexec */
+#define HV_DISPATCH_REEXEC 46
+
+/** hv_set_command_line */
+#define HV_DISPATCH_SET_COMMAND_LINE 47
+
+#if !CHIP_HAS_IPI()
+
+/** hv_clear_intr */
+#define HV_DISPATCH_CLEAR_INTR 48
+
+/** hv_enable_intr */
+#define HV_DISPATCH_ENABLE_INTR 49
+
+/** hv_disable_intr */
+#define HV_DISPATCH_DISABLE_INTR 50
+
+/** hv_raise_intr */
+#define HV_DISPATCH_RAISE_INTR 51
+
+/** hv_trigger_ipi */
+#define HV_DISPATCH_TRIGGER_IPI 52
+
+#endif /* !CHIP_HAS_IPI() */
+
+/** hv_store_mapping */
+#define HV_DISPATCH_STORE_MAPPING 53
+
+/** hv_inquire_realpa */
+#define HV_DISPATCH_INQUIRE_REALPA 54
+
+/** hv_flush_all */
+#define HV_DISPATCH_FLUSH_ALL 55
+
+#if CHIP_HAS_IPI()
+/** hv_get_ipi_pte */
+#define HV_DISPATCH_GET_IPI_PTE 56
+#endif
+
+/** One more than the largest dispatch value */
+#define _HV_DISPATCH_END 57
+
+
+#ifndef __ASSEMBLER__
+
+#ifdef __KERNEL__
+#include <asm/types.h>
+typedef u32 __hv32; /**< 32-bit value */
+typedef u64 __hv64; /**< 64-bit value */
+#else
+#include <stdint.h>
+typedef uint32_t __hv32; /**< 32-bit value */
+typedef uint64_t __hv64; /**< 64-bit value */
+#endif
+
+
+/** Hypervisor physical address. */
+typedef __hv64 HV_PhysAddr;
+
+#if CHIP_VA_WIDTH() > 32
+/** Hypervisor virtual address. */
+typedef __hv64 HV_VirtAddr;
+#else
+/** Hypervisor virtual address. */
+typedef __hv32 HV_VirtAddr;
+#endif /* CHIP_VA_WIDTH() > 32 */
+
+/** Hypervisor ASID. */
+typedef unsigned int HV_ASID;
+
+/** Hypervisor tile location for a memory access
+ * ("location overridden target").
+ */
+typedef unsigned int HV_LOTAR;
+
+/** Hypervisor size of a page. */
+typedef unsigned long HV_PageSize;
+
+/** A page table entry.
+ */
+typedef struct
+{
+ __hv64 val; /**< Value of PTE */
+} HV_PTE;
+
+/** Hypervisor error code. */
+typedef int HV_Errno;
+
+#endif /* !__ASSEMBLER__ */
+
+#define HV_OK 0 /**< No error */
+#define HV_EINVAL -801 /**< Invalid argument */
+#define HV_ENODEV -802 /**< No such device */
+#define HV_ENOENT -803 /**< No such file or directory */
+#define HV_EBADF -804 /**< Bad file number */
+#define HV_EFAULT -805 /**< Bad address */
+#define HV_ERECIP -806 /**< Bad recipients */
+#define HV_E2BIG -807 /**< Message too big */
+#define HV_ENOTSUP -808 /**< Service not supported */
+#define HV_EBUSY -809 /**< Device busy */
+#define HV_ENOSYS -810 /**< Invalid syscall */
+#define HV_EPERM -811 /**< No permission */
+#define HV_ENOTREADY -812 /**< Device not ready */
+#define HV_EIO -813 /**< I/O error */
+#define HV_ENOMEM -814 /**< Out of memory */
+#define HV_EAGAIN -815 /**< Try again */
+
+#define HV_ERR_MAX -801 /**< Largest HV error code */
+#define HV_ERR_MIN -815 /**< Smallest HV error code */
+
+#ifndef __ASSEMBLER__
+
+/** Pass HV_VERSION to hv_init to request this version of the interface. */
+typedef enum { HV_VERSION = _HV_VERSION } HV_VersionNumber;
+
+/** Initializes the hypervisor.
+ *
+ * @param interface_version_number The version of the hypervisor interface
+ * that this program expects, typically HV_VERSION.
+ * @param chip_num Architecture number of the chip the client was built for.
+ * @param chip_rev_num Revision number of the chip the client was built for.
+ */
+void hv_init(HV_VersionNumber interface_version_number,
+ int chip_num, int chip_rev_num);
+
+
+/** Queries we can make for hv_sysconf().
+ *
+ * These numbers are part of the binary API and guaranteed not to change.
+ */
+typedef enum {
+ /** An invalid value; do not use. */
+ _HV_SYSCONF_RESERVED = 0,
+
+ /** The length of the glue section containing the hv_ procs, in bytes. */
+ HV_SYSCONF_GLUE_SIZE = 1,
+
+ /** The size of small pages, in bytes. */
+ HV_SYSCONF_PAGE_SIZE_SMALL = 2,
+
+ /** The size of large pages, in bytes. */
+ HV_SYSCONF_PAGE_SIZE_LARGE = 3,
+
+ /** Processor clock speed, in hertz. */
+ HV_SYSCONF_CPU_SPEED = 4,
+
+ /** Processor temperature, in degrees Kelvin. The value
+ * HV_SYSCONF_TEMP_KTOC may be subtracted from this to get degrees
+ * Celsius. If that Celsius value is HV_SYSCONF_OVERTEMP, this indicates
+ * that the temperature has hit an upper limit and is no longer being
+ * accurately tracked.
+ */
+ HV_SYSCONF_CPU_TEMP = 5,
+
+ /** Board temperature, in degrees Kelvin. The value
+ * HV_SYSCONF_TEMP_KTOC may be subtracted from this to get degrees
+ * Celsius. If that Celsius value is HV_SYSCONF_OVERTEMP, this indicates
+ * that the temperature has hit an upper limit and is no longer being
+ * accurately tracked.
+ */
+ HV_SYSCONF_BOARD_TEMP = 6
+
+} HV_SysconfQuery;
+
+/** Offset to subtract from returned Kelvin temperature to get degrees
+ Celsius. */
+#define HV_SYSCONF_TEMP_KTOC 273
+
+/** Pseudo-temperature value indicating that the temperature has
+ * pegged at its upper limit and is no longer accurate; note that this is
+ * the value after subtracting HV_SYSCONF_TEMP_KTOC. */
+#define HV_SYSCONF_OVERTEMP 999
+
+/** Query a configuration value from the hypervisor.
+ * @param query Which value is requested (HV_SYSCONF_xxx).
+ * @return The requested value, or -1 the requested value is illegal or
+ * unavailable.
+ */
+long hv_sysconf(HV_SysconfQuery query);
+
+
+/** Queries we can make for hv_confstr().
+ *
+ * These numbers are part of the binary API and guaranteed not to change.
+ */
+typedef enum {
+ /** An invalid value; do not use. */
+ _HV_CONFSTR_RESERVED = 0,
+
+ /** Board part number. */
+ HV_CONFSTR_BOARD_PART_NUM = 1,
+
+ /** Board serial number. */
+ HV_CONFSTR_BOARD_SERIAL_NUM = 2,
+
+ /** Chip serial number. */
+ HV_CONFSTR_CHIP_SERIAL_NUM = 3,
+
+ /** Board revision level. */
+ HV_CONFSTR_BOARD_REV = 4,
+
+ /** Hypervisor software version. */
+ HV_CONFSTR_HV_SW_VER = 5,
+
+ /** The name for this chip model. */
+ HV_CONFSTR_CHIP_MODEL = 6,
+
+ /** Human-readable board description. */
+ HV_CONFSTR_BOARD_DESC = 7,
+
+ /** Human-readable description of the hypervisor configuration. */
+ HV_CONFSTR_HV_CONFIG = 8,
+
+ /** Human-readable version string for the boot image (for instance,
+ * who built it and when, what configuration file was used). */
+ HV_CONFSTR_HV_CONFIG_VER = 9,
+
+ /** Mezzanine part number. */
+ HV_CONFSTR_MEZZ_PART_NUM = 10,
+
+ /** Mezzanine serial number. */
+ HV_CONFSTR_MEZZ_SERIAL_NUM = 11,
+
+ /** Mezzanine revision level. */
+ HV_CONFSTR_MEZZ_REV = 12,
+
+ /** Human-readable mezzanine description. */
+ HV_CONFSTR_MEZZ_DESC = 13,
+
+ /** Control path for the onboard network switch. */
+ HV_CONFSTR_SWITCH_CONTROL = 14,
+
+ /** Chip revision level. */
+ HV_CONFSTR_CHIP_REV = 15
+
+} HV_ConfstrQuery;
+
+/** Query a configuration string from the hypervisor.
+ *
+ * @param query Identifier for the specific string to be retrieved
+ * (HV_CONFSTR_xxx).
+ * @param buf Buffer in which to place the string.
+ * @param len Length of the buffer.
+ * @return If query is valid, then the length of the corresponding string,
+ * including the trailing null; if this is greater than len, the string
+ * was truncated. If query is invalid, HV_EINVAL. If the specified
+ * buffer is not writable by the client, HV_EFAULT.
+ */
+int hv_confstr(HV_ConfstrQuery query, HV_VirtAddr buf, int len);
+
+/** Tile coordinate */
+typedef struct
+{
+ /** X coordinate, relative to supervisor's top-left coordinate */
+ int x;
+
+ /** Y coordinate, relative to supervisor's top-left coordinate */
+ int y;
+} HV_Coord;
+
+
+#if CHIP_HAS_IPI()
+
+/** Get the PTE for sending an IPI to a particular tile.
+ *
+ * @param tile Tile which will receive the IPI.
+ * @param pl Indicates which IPI registers: 0 = IPI_0, 1 = IPI_1.
+ * @param pte Filled with resulting PTE.
+ * @result Zero if no error, non-zero for invalid parameters.
+ */
+int hv_get_ipi_pte(HV_Coord tile, int pl, HV_PTE* pte);
+
+#else /* !CHIP_HAS_IPI() */
+
+/** A set of interrupts. */
+typedef __hv32 HV_IntrMask;
+
+/** The low interrupt numbers are reserved for use by the client in
+ * delivering IPIs. Any interrupt numbers higher than this value are
+ * reserved for use by HV device drivers. */
+#define HV_MAX_IPI_INTERRUPT 7
+
+/** Enable a set of device interrupts.
+ *
+ * @param enab_mask Bitmap of interrupts to enable.
+ */
+void hv_enable_intr(HV_IntrMask enab_mask);
+
+/** Disable a set of device interrupts.
+ *
+ * @param disab_mask Bitmap of interrupts to disable.
+ */
+void hv_disable_intr(HV_IntrMask disab_mask);
+
+/** Clear a set of device interrupts.
+ *
+ * @param clear_mask Bitmap of interrupts to clear.
+ */
+void hv_clear_intr(HV_IntrMask clear_mask);
+
+/** Raise a set of device interrupts.
+ *
+ * @param raise_mask Bitmap of interrupts to raise.
+ */
+void hv_raise_intr(HV_IntrMask raise_mask);
+
+/** Trigger a one-shot interrupt on some tile
+ *
+ * @param tile Which tile to interrupt.
+ * @param interrupt Interrupt number to trigger; must be between 0 and
+ * HV_MAX_IPI_INTERRUPT.
+ * @return HV_OK on success, or a hypervisor error code.
+ */
+HV_Errno hv_trigger_ipi(HV_Coord tile, int interrupt);
+
+#endif /* !CHIP_HAS_IPI() */
+
+/** Store memory mapping in debug memory so that external debugger can read it.
+ * A maximum of 16 entries can be stored.
+ *
+ * @param va VA of memory that is mapped.
+ * @param len Length of mapped memory.
+ * @param pa PA of memory that is mapped.
+ * @return 0 on success, -1 if the maximum number of mappings is exceeded.
+ */
+int hv_store_mapping(HV_VirtAddr va, unsigned int len, HV_PhysAddr pa);
+
+/** Given a client PA and a length, return its real (HV) PA.
+ *
+ * @param cpa Client physical address.
+ * @param len Length of mapped memory.
+ * @return physical address, or -1 if cpa or len is not valid.
+ */
+HV_PhysAddr hv_inquire_realpa(HV_PhysAddr cpa, unsigned int len);
+
+/** RTC return flag for no RTC chip present.
+ */
+#define HV_RTC_NO_CHIP 0x1
+
+/** RTC return flag for low-voltage condition, indicating that battery had
+ * died and time read is unreliable.
+ */
+#define HV_RTC_LOW_VOLTAGE 0x2
+
+/** Date/Time of day */
+typedef struct {
+#if CHIP_WORD_SIZE() > 32
+ __hv64 tm_sec; /**< Seconds, 0-59 */
+ __hv64 tm_min; /**< Minutes, 0-59 */
+ __hv64 tm_hour; /**< Hours, 0-23 */
+ __hv64 tm_mday; /**< Day of month, 0-30 */
+ __hv64 tm_mon; /**< Month, 0-11 */
+ __hv64 tm_year; /**< Years since 1900, 0-199 */
+ __hv64 flags; /**< Return flags, 0 if no error */
+#else
+ __hv32 tm_sec; /**< Seconds, 0-59 */
+ __hv32 tm_min; /**< Minutes, 0-59 */
+ __hv32 tm_hour; /**< Hours, 0-23 */
+ __hv32 tm_mday; /**< Day of month, 0-30 */
+ __hv32 tm_mon; /**< Month, 0-11 */
+ __hv32 tm_year; /**< Years since 1900, 0-199 */
+ __hv32 flags; /**< Return flags, 0 if no error */
+#endif
+} HV_RTCTime;
+
+/** Read the current time-of-day clock.
+ * @return HV_RTCTime of current time (GMT).
+ */
+HV_RTCTime hv_get_rtc(void);
+
+
+/** Set the current time-of-day clock.
+ * @param time time to reset time-of-day to (GMT).
+ */
+void hv_set_rtc(HV_RTCTime time);
+
+/** Installs a context, comprising a page table and other attributes.
+ *
+ * Once this service completes, page_table will be used to translate
+ * subsequent virtual address references to physical memory.
+ *
+ * Installing a context does not cause an implicit TLB flush. Before
+ * reusing an ASID value for a different address space, the client is
+ * expected to flush old references from the TLB with hv_flush_asid().
+ * (Alternately, hv_flush_all() may be used to flush many ASIDs at once.)
+ * After invalidating a page table entry, changing its attributes, or
+ * changing its target CPA, the client is expected to flush old references
+ * from the TLB with hv_flush_page() or hv_flush_pages(). Making a
+ * previously invalid page valid does not require a flush.
+ *
+ * Specifying an invalid ASID, or an invalid CPA (client physical address)
+ * (either as page_table_pointer, or within the referenced table),
+ * or another page table data item documented as above as illegal may
+ * lead to client termination; since the validation of the table is
+ * done as needed, this may happen before the service returns, or at
+ * some later time, or never, depending upon the client's pattern of
+ * memory references. Page table entries which supply translations for
+ * invalid virtual addresses may result in client termination, or may
+ * be silently ignored. "Invalid" in this context means a value which
+ * was not provided to the client via the appropriate hv_inquire_* routine.
+ *
+ * To support changing the instruction VAs at the same time as
+ * installing the new page table, this call explicitly supports
+ * setting the "lr" register to a different address and then jumping
+ * directly to the hv_install_context() routine. In this case, the
+ * new page table does not need to contain any mapping for the
+ * hv_install_context address itself.
+ *
+ * @param page_table Root of the page table.
+ * @param access PTE providing info on how to read the page table. This
+ * value must be consistent between multiple tiles sharing a page table,
+ * and must also be consistent with any virtual mappings the client
+ * may be using to access the page table.
+ * @param asid HV_ASID the page table is to be used for.
+ * @param flags Context flags, denoting attributes or privileges of the
+ * current context (HV_CTX_xxx).
+ * @return Zero on success, or a hypervisor error code on failure.
+ */
+int hv_install_context(HV_PhysAddr page_table, HV_PTE access, HV_ASID asid,
+ __hv32 flags);
+
+#endif /* !__ASSEMBLER__ */
+
+#define HV_CTX_DIRECTIO 0x1 /**< Direct I/O requests are accepted from
+ PL0. */
+
+#ifndef __ASSEMBLER__
+
+/** Value returned from hv_inquire_context(). */
+typedef struct
+{
+ /** Physical address of page table */
+ HV_PhysAddr page_table;
+
+ /** PTE which defines access method for top of page table */
+ HV_PTE access;
+
+ /** ASID associated with this page table */
+ HV_ASID asid;
+
+ /** Context flags */
+ __hv32 flags;
+} HV_Context;
+
+/** Retrieve information about the currently installed context.
+ * @return The data passed to the last successful hv_install_context call.
+ */
+HV_Context hv_inquire_context(void);
+
+
+/** Flushes all translations associated with the named address space
+ * identifier from the TLB and any other hypervisor data structures.
+ * Translations installed with the "global" bit are not flushed.
+ *
+ * Specifying an invalid ASID may lead to client termination. "Invalid"
+ * in this context means a value which was not provided to the client
+ * via <tt>hv_inquire_asid()</tt>.
+ *
+ * @param asid HV_ASID whose entries are to be flushed.
+ * @return Zero on success, or a hypervisor error code on failure.
+*/
+int hv_flush_asid(HV_ASID asid);
+
+
+/** Flushes all translations associated with the named virtual address
+ * and page size from the TLB and other hypervisor data structures. Only
+ * pages visible to the current ASID are affected; note that this includes
+ * global pages in addition to pages specific to the current ASID.
+ *
+ * The supplied VA need not be aligned; it may be anywhere in the
+ * subject page.
+ *
+ * Specifying an invalid virtual address may lead to client termination,
+ * or may silently succeed. "Invalid" in this context means a value
+ * which was not provided to the client via hv_inquire_virtual.
+ *
+ * @param address Address of the page to flush.
+ * @param page_size Size of pages to assume.
+ * @return Zero on success, or a hypervisor error code on failure.
+ */
+int hv_flush_page(HV_VirtAddr address, HV_PageSize page_size);
+
+
+/** Flushes all translations associated with the named virtual address range
+ * and page size from the TLB and other hypervisor data structures. Only
+ * pages visible to the current ASID are affected; note that this includes
+ * global pages in addition to pages specific to the current ASID.
+ *
+ * The supplied VA need not be aligned; it may be anywhere in the
+ * subject page.
+ *
+ * Specifying an invalid virtual address may lead to client termination,
+ * or may silently succeed. "Invalid" in this context means a value
+ * which was not provided to the client via hv_inquire_virtual.
+ *
+ * @param start Address to flush.
+ * @param page_size Size of pages to assume.
+ * @param size The number of bytes to flush. Any page in the range
+ * [start, start + size) will be flushed from the TLB.
+ * @return Zero on success, or a hypervisor error code on failure.
+ */
+int hv_flush_pages(HV_VirtAddr start, HV_PageSize page_size,
+ unsigned long size);
+
+
+/** Flushes all non-global translations (if preserve_global is true),
+ * or absolutely all translations (if preserve_global is false).
+ *
+ * @param preserve_global Non-zero if we want to preserve "global" mappings.
+ * @return Zero on success, or a hypervisor error code on failure.
+*/
+int hv_flush_all(int preserve_global);
+
+
+/** Restart machine with optional restart command and optional args.
+ * @param cmd Const pointer to command to restart with, or NULL
+ * @param args Const pointer to argument string to restart with, or NULL
+ */
+void hv_restart(HV_VirtAddr cmd, HV_VirtAddr args);
+
+
+/** Halt machine. */
+void hv_halt(void);
+
+
+/** Power off machine. */
+void hv_power_off(void);
+
+
+/** Re-enter virtual-is-physical memory translation mode and restart
+ * execution at a given address.
+ * @param entry Client physical address at which to begin execution.
+ * @return A hypervisor error code on failure; if the operation is
+ * successful the call does not return.
+ */
+int hv_reexec(HV_PhysAddr entry);
+
+
+/** Chip topology */
+typedef struct
+{
+ /** Relative coordinates of the querying tile */
+ HV_Coord coord;
+
+ /** Width of the querying supervisor's tile rectangle. */
+ int width;
+
+ /** Height of the querying supervisor's tile rectangle. */
+ int height;
+
+} HV_Topology;
+
+/** Returns information about the tile coordinate system.
+ *
+ * Each supervisor is given a rectangle of tiles it potentially controls.
+ * These tiles are labeled using a relative coordinate system with (0,0) as
+ * the upper left tile regardless of their physical location on the chip.
+ *
+ * This call returns both the size of that rectangle and the position
+ * within that rectangle of the querying tile.
+ *
+ * Not all tiles within that rectangle may be available to the supervisor;
+ * to get the precise set of available tiles, you must also call
+ * hv_inquire_tiles(HV_INQ_TILES_AVAIL, ...).
+ **/
+HV_Topology hv_inquire_topology(void);
+
+/** Sets of tiles we can retrieve with hv_inquire_tiles().
+ *
+ * These numbers are part of the binary API and guaranteed not to change.
+ */
+typedef enum {
+ /** An invalid value; do not use. */
+ _HV_INQ_TILES_RESERVED = 0,
+
+ /** All available tiles within the supervisor's tile rectangle. */
+ HV_INQ_TILES_AVAIL = 1,
+
+ /** The set of tiles used for hash-for-home caching. */
+ HV_INQ_TILES_HFH_CACHE = 2,
+
+ /** The set of tiles that can be legally used as a LOTAR for a PTE. */
+ HV_INQ_TILES_LOTAR = 3
+} HV_InqTileSet;
+
+/** Returns specific information about various sets of tiles within the
+ * supervisor's tile rectangle.
+ *
+ * @param set Which set of tiles to retrieve.
+ * @param cpumask Pointer to a returned bitmask (in row-major order,
+ * supervisor-relative) of tiles. The low bit of the first word
+ * corresponds to the tile at the upper left-hand corner of the
+ * supervisor's rectangle. In order for the supervisor to know the
+ * buffer length to supply, it should first call hv_inquire_topology.
+ * @param length Number of bytes available for the returned bitmask.
+ **/
+HV_Errno hv_inquire_tiles(HV_InqTileSet set, HV_VirtAddr cpumask, int length);
+
+
+/** An identifier for a memory controller. Multiple memory controllers
+ * may be connected to one chip, and this uniquely identifies each one.
+ */
+typedef int HV_MemoryController;
+
+/** A range of physical memory. */
+typedef struct
+{
+ HV_PhysAddr start; /**< Starting address. */
+ __hv64 size; /**< Size in bytes. */
+ HV_MemoryController controller; /**< Which memory controller owns this. */
+} HV_PhysAddrRange;
+
+/** Returns information about a range of physical memory.
+ *
+ * hv_inquire_physical() returns one of the ranges of client
+ * physical addresses which are available to this client.
+ *
+ * The first range is retrieved by specifying an idx of 0, and
+ * successive ranges are returned with subsequent idx values. Ranges
+ * are ordered by increasing start address (i.e., as idx increases,
+ * so does start), do not overlap, and do not touch (i.e., the
+ * available memory is described with the fewest possible ranges).
+ *
+ * If an out-of-range idx value is specified, the returned size will be zero.
+ * A client can count the number of ranges by increasing idx until the
+ * returned size is zero. There will always be at least one valid range.
+ *
+ * Some clients might not be prepared to deal with more than one
+ * physical address range; they still ought to call this routine and
+ * issue a warning message if they're given more than one range, on the
+ * theory that whoever configured the hypervisor to provide that memory
+ * should know that it's being wasted.
+ */
+HV_PhysAddrRange hv_inquire_physical(int idx);
+
+/** Possible DIMM types. */
+typedef enum
+{
+ NO_DIMM = 0, /**< No DIMM */
+ DDR2 = 1, /**< DDR2 */
+ DDR3 = 2 /**< DDR3 */
+} HV_DIMM_Type;
+
+#ifdef __tilegx__
+
+/** Log2 of minimum DIMM bytes supported by the memory controller. */
+#define HV_MSH_MIN_DIMM_SIZE_SHIFT 29
+
+/** Max number of DIMMs contained by one memory controller. */
+#define HV_MSH_MAX_DIMMS 8
+
+#else
+
+/** Log2 of minimum DIMM bytes supported by the memory controller. */
+#define HV_MSH_MIN_DIMM_SIZE_SHIFT 26
+
+/** Max number of DIMMs contained by one memory controller. */
+#define HV_MSH_MAX_DIMMS 2
+
+#endif
+
+/** Number of bits to right-shift to get the DIMM type. */
+#define HV_DIMM_TYPE_SHIFT 0
+
+/** Bits to mask to get the DIMM type. */
+#define HV_DIMM_TYPE_MASK 0xf
+
+/** Number of bits to right-shift to get the DIMM size. */
+#define HV_DIMM_SIZE_SHIFT 4
+
+/** Bits to mask to get the DIMM size. */
+#define HV_DIMM_SIZE_MASK 0xf
+
+/** Memory controller information. */
+typedef struct
+{
+ HV_Coord coord; /**< Relative tile coordinates of the port used by a
+ specified tile to communicate with this controller. */
+ __hv64 speed; /**< Speed of this controller in bytes per second. */
+} HV_MemoryControllerInfo;
+
+/** Returns information about a particular memory controller.
+ *
+ * hv_inquire_memory_controller(coord,idx) returns information about a
+ * particular controller. Two pieces of information are returned:
+ * - The relative coordinates of the port on the controller that the specified
+ * tile would use to contact it. The relative coordinates may lie
+ * outside the supervisor's rectangle, i.e. the controller may not
+ * be attached to a node managed by the querying node's supervisor.
+ * In particular note that x or y may be negative.
+ * - The speed of the memory controller. (This is a not-to-exceed value
+ * based on the raw hardware data rate, and may not be achievable in
+ * practice; it is provided to give clients information on the relative
+ * performance of the available controllers.)
+ *
+ * Clients should avoid calling this interface with invalid values.
+ * A client who does may be terminated.
+ * @param coord Tile for which to calculate the relative port position.
+ * @param controller Index of the controller; identical to value returned
+ * from other routines like hv_inquire_physical.
+ * @return Information about the controller.
+ */
+HV_MemoryControllerInfo hv_inquire_memory_controller(HV_Coord coord,
+ int controller);
+
+
+/** A range of virtual memory. */
+typedef struct
+{
+ HV_VirtAddr start; /**< Starting address. */
+ __hv64 size; /**< Size in bytes. */
+} HV_VirtAddrRange;
+
+/** Returns information about a range of virtual memory.
+ *
+ * hv_inquire_virtual() returns one of the ranges of client
+ * virtual addresses which are available to this client.
+ *
+ * The first range is retrieved by specifying an idx of 0, and
+ * successive ranges are returned with subsequent idx values. Ranges
+ * are ordered by increasing start address (i.e., as idx increases,
+ * so does start), do not overlap, and do not touch (i.e., the
+ * available memory is described with the fewest possible ranges).
+ *
+ * If an out-of-range idx value is specified, the returned size will be zero.
+ * A client can count the number of ranges by increasing idx until the
+ * returned size is zero. There will always be at least one valid range.
+ *
+ * Some clients may well have various virtual addresses hardwired
+ * into themselves; for instance, their instruction stream may
+ * have been compiled expecting to live at a particular address.
+ * Such clients should use this interface to verify they've been
+ * given the virtual address space they expect, and issue a (potentially
+ * fatal) warning message otherwise.
+ *
+ * Note that the returned size is a __hv64, not a __hv32, so it is
+ * possible to express a single range spanning the entire 32-bit
+ * address space.
+ */
+HV_VirtAddrRange hv_inquire_virtual(int idx);
+
+
+/** A range of ASID values. */
+typedef struct
+{
+ HV_ASID start; /**< First ASID in the range. */
+ unsigned int size; /**< Number of ASIDs. Zero for an invalid range. */
+} HV_ASIDRange;
+
+/** Returns information about a range of ASIDs.
+ *
+ * hv_inquire_asid() returns one of the ranges of address
+ * space identifiers which are available to this client.
+ *
+ * The first range is retrieved by specifying an idx of 0, and
+ * successive ranges are returned with subsequent idx values. Ranges
+ * are ordered by increasing start value (i.e., as idx increases,
+ * so does start), do not overlap, and do not touch (i.e., the
+ * available ASIDs are described with the fewest possible ranges).
+ *
+ * If an out-of-range idx value is specified, the returned size will be zero.
+ * A client can count the number of ranges by increasing idx until the
+ * returned size is zero. There will always be at least one valid range.
+ */
+HV_ASIDRange hv_inquire_asid(int idx);
+
+
+/** Waits for at least the specified number of nanoseconds then returns.
+ *
+ * NOTE: this deprecated function currently assumes a 750 MHz clock,
+ * and is thus not generally suitable for use. New code should call
+ * hv_sysconf(HV_SYSCONF_CPU_SPEED), compute a cycle count to wait for,
+ * and delay by looping while checking the cycle counter SPR.
+ *
+ * @param nanosecs The number of nanoseconds to sleep.
+ */
+void hv_nanosleep(int nanosecs);
+
+
+/** Reads a character from the console without blocking.
+ *
+ * @return A value from 0-255 indicates the value successfully read.
+ * A negative value means no value was ready.
+ */
+int hv_console_read_if_ready(void);
+
+
+/** Writes a character to the console, blocking if the console is busy.
+ *
+ * This call cannot fail. If the console is broken for some reason,
+ * output will simply vanish.
+ * @param byte Character to write.
+ */
+void hv_console_putc(int byte);
+
+
+/** Writes a string to the console, blocking if the console is busy.
+ * @param bytes Pointer to characters to write.
+ * @param len Number of characters to write.
+ * @return Number of characters written, or HV_EFAULT if the buffer is invalid.
+ */
+int hv_console_write(HV_VirtAddr bytes, int len);
+
+
+/** Dispatch the next interrupt from the client downcall mechanism.
+ *
+ * The hypervisor uses downcalls to notify the client of asynchronous
+ * events. Some of these events are hypervisor-created (like incoming
+ * messages). Some are regular interrupts which initially occur in
+ * the hypervisor, and are normally handled directly by the client;
+ * when these occur in a client's interrupt critical section, they must
+ * be delivered through the downcall mechanism.
+ *
+ * A downcall is initially delivered to the client as an INTCTRL_CL
+ * interrupt, where CL is the client's PL. Upon entry to the INTCTRL_CL
+ * vector, the client must immediately invoke the hv_downcall_dispatch
+ * service. This service will not return; instead it will cause one of
+ * the client's actual downcall-handling interrupt vectors to be entered.
+ * The EX_CONTEXT registers in the client will be set so that when the
+ * client irets, it will return to the code which was interrupted by the
+ * INTCTRL_CL interrupt.
+ *
+ * Under some circumstances, the firing of INTCTRL_CL can race with
+ * the lowering of a device interrupt. In such a case, the
+ * hv_downcall_dispatch service may issue an iret instruction instead
+ * of entering one of the client's actual downcall-handling interrupt
+ * vectors. This will return execution to the location that was
+ * interrupted by INTCTRL_CL.
+ *
+ * Any saving of registers should be done by the actual handling
+ * vectors; no registers should be changed by the INTCTRL_CL handler.
+ * In particular, the client should not use a jal instruction to invoke
+ * the hv_downcall_dispatch service, as that would overwrite the client's
+ * lr register. Note that the hv_downcall_dispatch service may overwrite
+ * one or more of the client's system save registers.
+ *
+ * The client must not modify the INTCTRL_CL_STATUS SPR. The hypervisor
+ * will set this register to cause a downcall to happen, and will clear
+ * it when no further downcalls are pending.
+ *
+ * When a downcall vector is entered, the INTCTRL_CL interrupt will be
+ * masked. When the client is done processing a downcall, and is ready
+ * to accept another, it must unmask this interrupt; if more downcalls
+ * are pending, this will cause the INTCTRL_CL vector to be reentered.
+ * Currently the following interrupt vectors can be entered through a
+ * downcall:
+ *
+ * INT_MESSAGE_RCV_DWNCL (hypervisor message available)
+ * INT_DEV_INTR_DWNCL (device interrupt)
+ * INT_DMATLB_MISS_DWNCL (DMA TLB miss)
+ * INT_SNITLB_MISS_DWNCL (SNI TLB miss)
+ * INT_DMATLB_ACCESS_DWNCL (DMA TLB access violation)
+ */
+void hv_downcall_dispatch(void);
+
+#endif /* !__ASSEMBLER__ */
+
+/** We use actual interrupt vectors which never occur (they're only there
+ * to allow setting MPLs for related SPRs) for our downcall vectors.
+ */
+/** Message receive downcall interrupt vector */
+#define INT_MESSAGE_RCV_DWNCL INT_BOOT_ACCESS
+/** DMA TLB miss downcall interrupt vector */
+#define INT_DMATLB_MISS_DWNCL INT_DMA_ASID
+/** Static nework processor instruction TLB miss interrupt vector */
+#define INT_SNITLB_MISS_DWNCL INT_SNI_ASID
+/** DMA TLB access violation downcall interrupt vector */
+#define INT_DMATLB_ACCESS_DWNCL INT_DMA_CPL
+/** Device interrupt downcall interrupt vector */
+#define INT_DEV_INTR_DWNCL INT_WORLD_ACCESS
+
+#ifndef __ASSEMBLER__
+
+/** Requests the inode for a specific full pathname.
+ *
+ * Performs a lookup in the hypervisor filesystem for a given filename.
+ * Multiple calls with the same filename will always return the same inode.
+ * If there is no such filename, HV_ENOENT is returned.
+ * A bad filename pointer may result in HV_EFAULT instead.
+ *
+ * @param filename Constant pointer to name of requested file
+ * @return Inode of requested file
+ */
+int hv_fs_findfile(HV_VirtAddr filename);
+
+
+/** Data returned from an fstat request.
+ * Note that this structure should be no more than 40 bytes in size so
+ * that it can always be returned completely in registers.
+ */
+typedef struct
+{
+ int size; /**< Size of file (or HV_Errno on error) */
+ unsigned int flags; /**< Flags (see HV_FS_FSTAT_FLAGS) */
+} HV_FS_StatInfo;
+
+/** Bitmask flags for fstat request */
+typedef enum
+{
+ HV_FS_ISDIR = 0x0001 /**< Is the entry a directory? */
+} HV_FS_FSTAT_FLAGS;
+
+/** Get stat information on a given file inode.
+ *
+ * Return information on the file with the given inode.
+ *
+ * IF the HV_FS_ISDIR bit is set, the "file" is a directory. Reading
+ * it will return NUL-separated filenames (no directory part) relative
+ * to the path to the inode of the directory "file". These can be
+ * appended to the path to the directory "file" after a forward slash
+ * to create additional filenames. Note that it is not required
+ * that all valid paths be decomposable into valid parent directories;
+ * a filesystem may validly have just a few files, none of which have
+ * HV_FS_ISDIR set. However, if clients may wish to enumerate the
+ * files in the filesystem, it is recommended to include all the
+ * appropriate parent directory "files" to give a consistent view.
+ *
+ * An invalid file inode will cause an HV_EBADF error to be returned.
+ *
+ * @param inode The inode number of the query
+ * @return An HV_FS_StatInfo structure
+ */
+HV_FS_StatInfo hv_fs_fstat(int inode);
+
+
+/** Read data from a specific hypervisor file.
+ * On error, may return HV_EBADF for a bad inode or HV_EFAULT for a bad buf.
+ * Reads near the end of the file will return fewer bytes than requested.
+ * Reads at or beyond the end of a file will return zero.
+ *
+ * @param inode the hypervisor file to read
+ * @param buf the buffer to read data into
+ * @param length the number of bytes of data to read
+ * @param offset the offset into the file to read the data from
+ * @return number of bytes successfully read, or an HV_Errno code
+ */
+int hv_fs_pread(int inode, HV_VirtAddr buf, int length, int offset);
+
+
+/** Read a 64-bit word from the specified physical address.
+ * The address must be 8-byte aligned.
+ * Specifying an invalid physical address will lead to client termination.
+ * @param addr The physical address to read
+ * @param access The PTE describing how to read the memory
+ * @return The 64-bit value read from the given address
+ */
+unsigned long long hv_physaddr_read64(HV_PhysAddr addr, HV_PTE access);
+
+
+/** Write a 64-bit word to the specified physical address.
+ * The address must be 8-byte aligned.
+ * Specifying an invalid physical address will lead to client termination.
+ * @param addr The physical address to write
+ * @param access The PTE that says how to write the memory
+ * @param val The 64-bit value to write to the given address
+ */
+void hv_physaddr_write64(HV_PhysAddr addr, HV_PTE access,
+ unsigned long long val);
+
+
+/** Get the value of the command-line for the supervisor, if any.
+ * This will not include the filename of the booted supervisor, but may
+ * include configured-in boot arguments or the hv_restart() arguments.
+ * If the buffer is not long enough the hypervisor will NUL the first
+ * character of the buffer but not write any other data.
+ * @param buf The virtual address to write the command-line string to.
+ * @param length The length of buf, in characters.
+ * @return The actual length of the command line, including the trailing NUL
+ * (may be larger than "length").
+ */
+int hv_get_command_line(HV_VirtAddr buf, int length);
+
+
+/** Set a new value for the command-line for the supervisor, which will
+ * be returned from subsequent invocations of hv_get_command_line() on
+ * this tile.
+ * @param buf The virtual address to read the command-line string from.
+ * @param length The length of buf, in characters; must be no more than
+ * HV_COMMAND_LINE_LEN.
+ * @return Zero if successful, or a hypervisor error code.
+ */
+HV_Errno hv_set_command_line(HV_VirtAddr buf, int length);
+
+/** Maximum size of a command line passed to hv_set_command_line(); note
+ * that a line returned from hv_get_command_line() could be larger than
+ * this.*/
+#define HV_COMMAND_LINE_LEN 256
+
+/** Tell the hypervisor how to cache non-priority pages
+ * (its own as well as pages explicitly represented in page tables).
+ * Normally these will be represented as red/black pages, but
+ * when the supervisor starts to allocate "priority" pages in the PTE
+ * the hypervisor will need to start marking those pages as (e.g.) "red"
+ * and non-priority pages as either "black" (if they cache-alias
+ * with the existing priority pages) or "red/black" (if they don't).
+ * The bitmask provides information on which parts of the cache
+ * have been used for pinned pages so far on this tile; if (1 << N)
+ * appears in the bitmask, that indicates that a page has been marked
+ * "priority" whose PFN equals N, mod 8.
+ * @param bitmask A bitmap of priority page set values
+ */
+void hv_set_caching(unsigned int bitmask);
+
+
+/** Zero out a specified number of pages.
+ * The va and size must both be multiples of 4096.
+ * Caches are bypassed and memory is directly set to zero.
+ * This API is implemented only in the magic hypervisor and is intended
+ * to provide a performance boost to the minimal supervisor by
+ * giving it a fast way to zero memory pages when allocating them.
+ * @param va Virtual address where the page has been mapped
+ * @param size Number of bytes (must be a page size multiple)
+ */
+void hv_bzero_page(HV_VirtAddr va, unsigned int size);
+
+
+/** State object for the hypervisor messaging subsystem. */
+typedef struct
+{
+#if CHIP_VA_WIDTH() > 32
+ __hv64 opaque[2]; /**< No user-serviceable parts inside */
+#else
+ __hv32 opaque[2]; /**< No user-serviceable parts inside */
+#endif
+}
+HV_MsgState;
+
+/** Register to receive incoming messages.
+ *
+ * This routine configures the current tile so that it can receive
+ * incoming messages. It must be called before the client can receive
+ * messages with the hv_receive_message routine, and must be called on
+ * each tile which will receive messages.
+ *
+ * msgstate is the virtual address of a state object of type HV_MsgState.
+ * Once the state is registered, the client must not read or write the
+ * state object; doing so will cause undefined results.
+ *
+ * If this routine is called with msgstate set to 0, the client's message
+ * state will be freed and it will no longer be able to receive messages.
+ * Note that this may cause the loss of any as-yet-undelivered messages
+ * for the client.
+ *
+ * If another client attempts to send a message to a client which has
+ * not yet called hv_register_message_state, or which has freed its
+ * message state, the message will not be delivered, as if the client
+ * had insufficient buffering.
+ *
+ * This routine returns HV_OK if the registration was successful, and
+ * HV_EINVAL if the supplied state object is unsuitable. Note that some
+ * errors may not be detected during this routine, but might be detected
+ * during a subsequent message delivery.
+ * @param msgstate State object.
+ **/
+HV_Errno hv_register_message_state(HV_MsgState* msgstate);
+
+/** Possible message recipient states. */
+typedef enum
+{
+ HV_TO_BE_SENT, /**< Not sent (not attempted, or recipient not ready) */
+ HV_SENT, /**< Successfully sent */
+ HV_BAD_RECIP /**< Bad recipient coordinates (permanent error) */
+} HV_Recip_State;
+
+/** Message recipient. */
+typedef struct
+{
+ /** X coordinate, relative to supervisor's top-left coordinate */
+ unsigned int x:11;
+
+ /** Y coordinate, relative to supervisor's top-left coordinate */
+ unsigned int y:11;
+
+ /** Status of this recipient */
+ HV_Recip_State state:10;
+} HV_Recipient;
+
+/** Send a message to a set of recipients.
+ *
+ * This routine sends a message to a set of recipients.
+ *
+ * recips is an array of HV_Recipient structures. Each specifies a tile,
+ * and a message state; initially, it is expected that the state will
+ * be set to HV_TO_BE_SENT. nrecip specifies the number of recipients
+ * in the recips array.
+ *
+ * For each recipient whose state is HV_TO_BE_SENT, the hypervisor attempts
+ * to send that tile the specified message. In order to successfully
+ * receive the message, the receiver must be a valid tile to which the
+ * sender has access, must not be the sending tile itself, and must have
+ * sufficient free buffer space. (The hypervisor guarantees that each
+ * tile which has called hv_register_message_state() will be able to
+ * buffer one message from every other tile which can legally send to it;
+ * more space may be provided but is not guaranteed.) If an invalid tile
+ * is specified, the recipient's state is set to HV_BAD_RECIP; this is a
+ * permanent delivery error. If the message is successfully delivered
+ * to the recipient's buffer, the recipient's state is set to HV_SENT.
+ * Otherwise, the recipient's state is unchanged. Message delivery is
+ * synchronous; all attempts to send messages are completed before this
+ * routine returns.
+ *
+ * If no permanent delivery errors were encountered, the routine returns
+ * the number of messages successfully sent: that is, the number of
+ * recipients whose states changed from HV_TO_BE_SENT to HV_SENT during
+ * this operation. If any permanent delivery errors were encountered,
+ * the routine returns HV_ERECIP. In the event of permanent delivery
+ * errors, it may be the case that delivery was not attempted to all
+ * recipients; if any messages were successfully delivered, however,
+ * recipients' state values will be updated appropriately.
+ *
+ * It is explicitly legal to specify a recipient structure whose state
+ * is not HV_TO_BE_SENT; such a recipient is ignored. One suggested way
+ * of using hv_send_message to send a message to multiple tiles is to set
+ * up a list of recipients, and then call the routine repeatedly with the
+ * same list, each time accumulating the number of messages successfully
+ * sent, until all messages are sent, a permanent error is encountered,
+ * or the desired number of attempts have been made. When used in this
+ * way, the routine will deliver each message no more than once to each
+ * recipient.
+ *
+ * Note that a message being successfully delivered to the recipient's
+ * buffer space does not guarantee that it is received by the recipient,
+ * either immediately or at any time in the future; the recipient might
+ * never call hv_receive_message, or could register a different state
+ * buffer, losing the message.
+ *
+ * Specifying the same recipient more than once in the recipient list
+ * is an error, which will not result in an error return but which may
+ * or may not result in more than one message being delivered to the
+ * recipient tile.
+ *
+ * buf and buflen specify the message to be sent. buf is a virtual address
+ * which must be currently mapped in the client's page table; if not, the
+ * routine returns HV_EFAULT. buflen must be greater than zero and less
+ * than or equal to HV_MAX_MESSAGE_SIZE, and nrecip must be less than the
+ * number of tiles to which the sender has access; if not, the routine
+ * returns HV_EINVAL.
+ * @param recips List of recipients.
+ * @param nrecip Number of recipients.
+ * @param buf Address of message data.
+ * @param buflen Length of message data.
+ **/
+int hv_send_message(HV_Recipient *recips, int nrecip,
+ HV_VirtAddr buf, int buflen);
+
+/** Maximum hypervisor message size, in bytes */
+#define HV_MAX_MESSAGE_SIZE 28
+
+
+/** Return value from hv_receive_message() */
+typedef struct
+{
+ int msglen; /**< Message length in bytes, or an error code */
+ __hv32 source; /**< Code identifying message sender (HV_MSG_xxx) */
+} HV_RcvMsgInfo;
+
+#define HV_MSG_TILE 0x0 /**< Message source is another tile */
+#define HV_MSG_INTR 0x1 /**< Message source is a driver interrupt */
+
+/** Receive a message.
+ *
+ * This routine retrieves a message from the client's incoming message
+ * buffer.
+ *
+ * Multiple messages sent from a particular sending tile to a particular
+ * receiving tile are received in the order that they were sent; however,
+ * no ordering is guaranteed between messages sent by different tiles.
+ *
+ * Whenever the a client's message buffer is empty, the first message
+ * subsequently received will cause the client's MESSAGE_RCV_DWNCL
+ * interrupt vector to be invoked through the interrupt downcall mechanism
+ * (see the description of the hv_downcall_dispatch() routine for details
+ * on downcalls).
+ *
+ * Another message-available downcall will not occur until a call to
+ * this routine is made when the message buffer is empty, and a message
+ * subsequently arrives. Note that such a downcall could occur while
+ * this routine is executing. If the calling code does not wish this
+ * to happen, it is recommended that this routine be called with the
+ * INTCTRL_1 interrupt masked, or inside an interrupt critical section.
+ *
+ * msgstate is the value previously passed to hv_register_message_state().
+ * buf is the virtual address of the buffer into which the message will
+ * be written; buflen is the length of the buffer.
+ *
+ * This routine returns an HV_RcvMsgInfo structure. The msglen member
+ * of that structure is the length of the message received, zero if no
+ * message is available, or HV_E2BIG if the message is too large for the
+ * specified buffer. If the message is too large, it is not consumed,
+ * and may be retrieved by a subsequent call to this routine specifying
+ * a sufficiently large buffer. A buffer which is HV_MAX_MESSAGE_SIZE
+ * bytes long is guaranteed to be able to receive any possible message.
+ *
+ * The source member of the HV_RcvMsgInfo structure describes the sender
+ * of the message. For messages sent by another client tile via an
+ * hv_send_message() call, this value is HV_MSG_TILE; for messages sent
+ * as a result of a device interrupt, this value is HV_MSG_INTR.
+ */
+
+HV_RcvMsgInfo hv_receive_message(HV_MsgState msgstate, HV_VirtAddr buf,
+ int buflen);
+
+
+/** Start remaining tiles owned by this supervisor. Initially, only one tile
+ * executes the client program; after it calls this service, the other tiles
+ * are started. This allows the initial tile to do one-time configuration
+ * of shared data structures without having to lock them against simultaneous
+ * access.
+ */
+void hv_start_all_tiles(void);
+
+
+/** Open a hypervisor device.
+ *
+ * This service initializes an I/O device and its hypervisor driver software,
+ * and makes it available for use. The open operation is per-device per-chip;
+ * once it has been performed, the device handle returned may be used in other
+ * device services calls made by any tile.
+ *
+ * @param name Name of the device. A base device name is just a text string
+ * (say, "pcie"). If there is more than one instance of a device, the
+ * base name is followed by a slash and a device number (say, "pcie/0").
+ * Some devices may support further structure beneath those components;
+ * most notably, devices which require control operations do so by
+ * supporting reads and/or writes to a control device whose name
+ * includes a trailing "/ctl" (say, "pcie/0/ctl").
+ * @param flags Flags (HV_DEV_xxx).
+ * @return A positive integer device handle, or a negative error code.
+ */
+int hv_dev_open(HV_VirtAddr name, __hv32 flags);
+
+
+/** Close a hypervisor device.
+ *
+ * This service uninitializes an I/O device and its hypervisor driver
+ * software, and makes it unavailable for use. The close operation is
+ * per-device per-chip; once it has been performed, the device is no longer
+ * available. Normally there is no need to ever call the close service.
+ *
+ * @param devhdl Device handle of the device to be closed.
+ * @return Zero if the close is successful, otherwise, a negative error code.
+ */
+int hv_dev_close(int devhdl);
+
+
+/** Read data from a hypervisor device synchronously.
+ *
+ * This service transfers data from a hypervisor device to a memory buffer.
+ * When the service returns, the data has been written from the memory buffer,
+ * and the buffer will not be further modified by the driver.
+ *
+ * No ordering is guaranteed between requests issued from different tiles.
+ *
+ * Devices may choose to support both the synchronous and asynchronous read
+ * operations, only one of them, or neither of them.
+ *
+ * @param devhdl Device handle of the device to be read from.
+ * @param flags Flags (HV_DEV_xxx).
+ * @param va Virtual address of the target data buffer. This buffer must
+ * be mapped in the currently installed page table; if not, HV_EFAULT
+ * may be returned.
+ * @param len Number of bytes to be transferred.
+ * @param offset Driver-dependent offset. For a random-access device, this is
+ * often a byte offset from the beginning of the device; in other cases,
+ * like on a control device, it may have a different meaning.
+ * @return A non-negative value if the read was at least partially successful;
+ * otherwise, a negative error code. The precise interpretation of
+ * the return value is driver-dependent, but many drivers will return
+ * the number of bytes successfully transferred.
+ */
+int hv_dev_pread(int devhdl, __hv32 flags, HV_VirtAddr va, __hv32 len,
+ __hv64 offset);
+
+#define HV_DEV_NB_EMPTY 0x1 /**< Don't block when no bytes of data can
+ be transferred. */
+#define HV_DEV_NB_PARTIAL 0x2 /**< Don't block when some bytes, but not all
+ of the requested bytes, can be
+ transferred. */
+#define HV_DEV_NOCACHE 0x4 /**< The caller warrants that none of the
+ cache lines which might contain data
+ from the requested buffer are valid.
+ Useful with asynchronous operations
+ only. */
+
+#define HV_DEV_ALLFLAGS (HV_DEV_NB_EMPTY | HV_DEV_NB_PARTIAL | \
+ HV_DEV_NOCACHE) /**< All HV_DEV_xxx flags */
+
+/** Write data to a hypervisor device synchronously.
+ *
+ * This service transfers data from a memory buffer to a hypervisor device.
+ * When the service returns, the data has been read from the memory buffer,
+ * and the buffer may be overwritten by the client; the data may not
+ * necessarily have been conveyed to the actual hardware I/O interface.
+ *
+ * No ordering is guaranteed between requests issued from different tiles.
+ *
+ * Devices may choose to support both the synchronous and asynchronous write
+ * operations, only one of them, or neither of them.
+ *
+ * @param devhdl Device handle of the device to be written to.
+ * @param flags Flags (HV_DEV_xxx).
+ * @param va Virtual address of the source data buffer. This buffer must
+ * be mapped in the currently installed page table; if not, HV_EFAULT
+ * may be returned.
+ * @param len Number of bytes to be transferred.
+ * @param offset Driver-dependent offset. For a random-access device, this is
+ * often a byte offset from the beginning of the device; in other cases,
+ * like on a control device, it may have a different meaning.
+ * @return A non-negative value if the write was at least partially successful;
+ * otherwise, a negative error code. The precise interpretation of
+ * the return value is driver-dependent, but many drivers will return
+ * the number of bytes successfully transferred.
+ */
+int hv_dev_pwrite(int devhdl, __hv32 flags, HV_VirtAddr va, __hv32 len,
+ __hv64 offset);
+
+
+/** Interrupt arguments, used in the asynchronous I/O interfaces. */
+#if CHIP_VA_WIDTH() > 32
+typedef __hv64 HV_IntArg;
+#else
+typedef __hv32 HV_IntArg;
+#endif
+
+/** Interrupt messages are delivered via the mechanism as normal messages,
+ * but have a message source of HV_DEV_INTR. The message is formatted
+ * as an HV_IntrMsg structure.
+ */
+
+typedef struct
+{
+ HV_IntArg intarg; /**< Interrupt argument, passed to the poll/preada/pwritea
+ services */
+ HV_IntArg intdata; /**< Interrupt-specific interrupt data */
+} HV_IntrMsg;
+
+/** Request an interrupt message when a device condition is satisfied.
+ *
+ * This service requests that an interrupt message be delivered to the
+ * requesting tile when a device becomes readable or writable, or when any
+ * data queued to the device via previous write operations from this tile
+ * has been actually sent out on the hardware I/O interface. Devices may
+ * choose to support any, all, or none of the available conditions.
+ *
+ * If multiple conditions are specified, only one message will be
+ * delivered. If the event mask delivered to that interrupt handler
+ * indicates that some of the conditions have not yet occurred, the
+ * client must issue another poll() call if it wishes to wait for those
+ * conditions.
+ *
+ * Only one poll may be outstanding per device handle per tile. If more than
+ * one tile is polling on the same device and condition, they will all be
+ * notified when it happens. Because of this, clients may not assume that
+ * the condition signaled is necessarily still true when they request a
+ * subsequent service; for instance, the readable data which caused the
+ * poll call to interrupt may have been read by another tile in the interim.
+ *
+ * The notification interrupt message could come directly, or via the
+ * downcall (intctrl1) method, depending on what the tile is doing
+ * when the condition is satisfied. Note that it is possible for the
+ * requested interrupt to be delivered after this service is called but
+ * before it returns.
+ *
+ * @param devhdl Device handle of the device to be polled.
+ * @param events Flags denoting the events which will cause the interrupt to
+ * be delivered (HV_DEVPOLL_xxx).
+ * @param intarg Value which will be delivered as the intarg member of the
+ * eventual interrupt message; the intdata member will be set to a
+ * mask of HV_DEVPOLL_xxx values indicating which conditions have been
+ * satisifed.
+ * @return Zero if the interrupt was successfully scheduled; otherwise, a
+ * negative error code.
+ */
+int hv_dev_poll(int devhdl, __hv32 events, HV_IntArg intarg);
+
+#define HV_DEVPOLL_READ 0x1 /**< Test device for readability */
+#define HV_DEVPOLL_WRITE 0x2 /**< Test device for writability */
+#define HV_DEVPOLL_FLUSH 0x4 /**< Test device for output drained */
+
+
+/** Cancel a request for an interrupt when a device event occurs.
+ *
+ * This service requests that no interrupt be delivered when the events
+ * noted in the last-issued poll() call happen. Once this service returns,
+ * the interrupt has been canceled; however, it is possible for the interrupt
+ * to be delivered after this service is called but before it returns.
+ *
+ * @param devhdl Device handle of the device on which to cancel polling.
+ * @return Zero if the poll was successfully canceled; otherwise, a negative
+ * error code.
+ */
+int hv_dev_poll_cancel(int devhdl);
+
+
+/** Scatter-gather list for preada/pwritea calls. */
+typedef struct
+#if CHIP_VA_WIDTH() <= 32
+__attribute__ ((packed, aligned(4)))
+#endif
+{
+ HV_PhysAddr pa; /**< Client physical address of the buffer segment. */
+ HV_PTE pte; /**< Page table entry describing the caching and location
+ override characteristics of the buffer segment. Some
+ drivers ignore this element and will require that
+ the NOCACHE flag be set on their requests. */
+ __hv32 len; /**< Length of the buffer segment. */
+} HV_SGL;
+
+#define HV_SGL_MAXLEN 16 /**< Maximum number of entries in a scatter-gather
+ list */
+
+/** Read data from a hypervisor device asynchronously.
+ *
+ * This service transfers data from a hypervisor device to a memory buffer.
+ * When the service returns, the read has been scheduled. When the read
+ * completes, an interrupt message will be delivered, and the buffer will
+ * not be further modified by the driver.
+ *
+ * The number of possible outstanding asynchronous requests is defined by
+ * each driver, but it is recommended that it be at least two requests
+ * per tile per device.
+ *
+ * No ordering is guaranteed between synchronous and asynchronous requests,
+ * even those issued on the same tile.
+ *
+ * The completion interrupt message could come directly, or via the downcall
+ * (intctrl1) method, depending on what the tile is doing when the read
+ * completes. Interrupts do not coalesce; one is delivered for each
+ * asynchronous I/O request. Note that it is possible for the requested
+ * interrupt to be delivered after this service is called but before it
+ * returns.
+ *
+ * Devices may choose to support both the synchronous and asynchronous read
+ * operations, only one of them, or neither of them.
+ *
+ * @param devhdl Device handle of the device to be read from.
+ * @param flags Flags (HV_DEV_xxx).
+ * @param sgl_len Number of elements in the scatter-gather list.
+ * @param sgl Scatter-gather list describing the memory to which data will be
+ * written.
+ * @param offset Driver-dependent offset. For a random-access device, this is
+ * often a byte offset from the beginning of the device; in other cases,
+ * like on a control device, it may have a different meaning.
+ * @param intarg Value which will be delivered as the intarg member of the
+ * eventual interrupt message; the intdata member will be set to the
+ * normal return value from the read request.
+ * @return Zero if the read was successfully scheduled; otherwise, a negative
+ * error code. Note that some drivers may choose to pre-validate
+ * their arguments, and may thus detect certain device error
+ * conditions at this time rather than when the completion notification
+ * occurs, but this is not required.
+ */
+int hv_dev_preada(int devhdl, __hv32 flags, __hv32 sgl_len,
+ HV_SGL sgl[/* sgl_len */], __hv64 offset, HV_IntArg intarg);
+
+
+/** Write data to a hypervisor device asynchronously.
+ *
+ * This service transfers data from a memory buffer to a hypervisor
+ * device. When the service returns, the write has been scheduled.
+ * When the write completes, an interrupt message will be delivered,
+ * and the buffer may be overwritten by the client; the data may not
+ * necessarily have been conveyed to the actual hardware I/O interface.
+ *
+ * The number of possible outstanding asynchronous requests is defined by
+ * each driver, but it is recommended that it be at least two requests
+ * per tile per device.
+ *
+ * No ordering is guaranteed between synchronous and asynchronous requests,
+ * even those issued on the same tile.
+ *
+ * The completion interrupt message could come directly, or via the downcall
+ * (intctrl1) method, depending on what the tile is doing when the read
+ * completes. Interrupts do not coalesce; one is delivered for each
+ * asynchronous I/O request. Note that it is possible for the requested
+ * interrupt to be delivered after this service is called but before it
+ * returns.
+ *
+ * Devices may choose to support both the synchronous and asynchronous write
+ * operations, only one of them, or neither of them.
+ *
+ * @param devhdl Device handle of the device to be read from.
+ * @param flags Flags (HV_DEV_xxx).
+ * @param sgl_len Number of elements in the scatter-gather list.
+ * @param sgl Scatter-gather list describing the memory from which data will be
+ * read.
+ * @param offset Driver-dependent offset. For a random-access device, this is
+ * often a byte offset from the beginning of the device; in other cases,
+ * like on a control device, it may have a different meaning.
+ * @param intarg Value which will be delivered as the intarg member of the
+ * eventual interrupt message; the intdata member will be set to the
+ * normal return value from the write request.
+ * @return Zero if the write was successfully scheduled; otherwise, a negative
+ * error code. Note that some drivers may choose to pre-validate
+ * their arguments, and may thus detect certain device error
+ * conditions at this time rather than when the completion notification
+ * occurs, but this is not required.
+ */
+int hv_dev_pwritea(int devhdl, __hv32 flags, __hv32 sgl_len,
+ HV_SGL sgl[/* sgl_len */], __hv64 offset, HV_IntArg intarg);
+
+
+/** Define a pair of tile and ASID to identify a user process context. */
+typedef struct
+{
+ /** X coordinate, relative to supervisor's top-left coordinate */
+ unsigned int x:11;
+
+ /** Y coordinate, relative to supervisor's top-left coordinate */
+ unsigned int y:11;
+
+ /** ASID of the process on this x,y tile */
+ HV_ASID asid:10;
+} HV_Remote_ASID;
+
+/** Flush cache and/or TLB state on remote tiles.
+ *
+ * @param cache_pa Client physical address to flush from cache (ignored if
+ * the length encoded in cache_control is zero, or if
+ * HV_FLUSH_EVICT_L2 is set, or if cache_cpumask is NULL).
+ * @param cache_control This argument allows you to specify a length of
+ * physical address space to flush (maximum HV_FLUSH_MAX_CACHE_LEN).
+ * You can "or" in HV_FLUSH_EVICT_L2 to flush the whole L2 cache.
+ * You can "or" in HV_FLUSH_EVICT_L1I to flush the whole L1I cache.
+ * HV_FLUSH_ALL flushes all caches.
+ * @param cache_cpumask Bitmask (in row-major order, supervisor-relative) of
+ * tile indices to perform cache flush on. The low bit of the first
+ * word corresponds to the tile at the upper left-hand corner of the
+ * supervisor's rectangle. If passed as a NULL pointer, equivalent
+ * to an empty bitmask. On chips which support hash-for-home caching,
+ * if passed as -1, equivalent to a mask containing tiles which could
+ * be doing hash-for-home caching.
+ * @param tlb_va Virtual address to flush from TLB (ignored if
+ * tlb_length is zero or tlb_cpumask is NULL).
+ * @param tlb_length Number of bytes of data to flush from the TLB.
+ * @param tlb_pgsize Page size to use for TLB flushes.
+ * tlb_va and tlb_length need not be aligned to this size.
+ * @param tlb_cpumask Bitmask for tlb flush, like cache_cpumask.
+ * If passed as a NULL pointer, equivalent to an empty bitmask.
+ * @param asids Pointer to an HV_Remote_ASID array of tile/ASID pairs to flush.
+ * @param asidcount Number of HV_Remote_ASID entries in asids[].
+ * @return Zero for success, or else HV_EINVAL or HV_EFAULT for errors that
+ * are detected while parsing the arguments.
+ */
+int hv_flush_remote(HV_PhysAddr cache_pa, unsigned long cache_control,
+ unsigned long* cache_cpumask,
+ HV_VirtAddr tlb_va, unsigned long tlb_length,
+ unsigned long tlb_pgsize, unsigned long* tlb_cpumask,
+ HV_Remote_ASID* asids, int asidcount);
+
+/** Include in cache_control to ensure a flush of the entire L2. */
+#define HV_FLUSH_EVICT_L2 (1UL << 31)
+
+/** Include in cache_control to ensure a flush of the entire L1I. */
+#define HV_FLUSH_EVICT_L1I (1UL << 30)
+
+/** Maximum legal size to use for the "length" component of cache_control. */
+#define HV_FLUSH_MAX_CACHE_LEN ((1UL << 30) - 1)
+
+/** Use for cache_control to ensure a flush of all caches. */
+#define HV_FLUSH_ALL -1UL
+
+#else /* __ASSEMBLER__ */
+
+/** Include in cache_control to ensure a flush of the entire L2. */
+#define HV_FLUSH_EVICT_L2 (1 << 31)
+
+/** Include in cache_control to ensure a flush of the entire L1I. */
+#define HV_FLUSH_EVICT_L1I (1 << 30)
+
+/** Maximum legal size to use for the "length" component of cache_control. */
+#define HV_FLUSH_MAX_CACHE_LEN ((1 << 30) - 1)
+
+/** Use for cache_control to ensure a flush of all caches. */
+#define HV_FLUSH_ALL -1
+
+#endif /* __ASSEMBLER__ */
+
+#ifndef __ASSEMBLER__
+
+/** Return a 64-bit value corresponding to the PTE if needed */
+#define hv_pte_val(pte) ((pte).val)
+
+/** Cast a 64-bit value to an HV_PTE */
+#define hv_pte(val) ((HV_PTE) { val })
+
+#endif /* !__ASSEMBLER__ */
+
+
+/** Bits in the size of an HV_PTE */
+#define HV_LOG2_PTE_SIZE 3
+
+/** Size of an HV_PTE */
+#define HV_PTE_SIZE (1 << HV_LOG2_PTE_SIZE)
+
+
+/* Bits in HV_PTE's low word. */
+#define HV_PTE_INDEX_PRESENT 0 /**< PTE is valid */
+#define HV_PTE_INDEX_MIGRATING 1 /**< Page is migrating */
+#define HV_PTE_INDEX_CLIENT0 2 /**< Page client state 0 */
+#define HV_PTE_INDEX_CLIENT1 3 /**< Page client state 1 */
+#define HV_PTE_INDEX_NC 4 /**< L1$/L2$ incoherent with L3$ */
+#define HV_PTE_INDEX_NO_ALLOC_L1 5 /**< Page is uncached in local L1$ */
+#define HV_PTE_INDEX_NO_ALLOC_L2 6 /**< Page is uncached in local L2$ */
+#define HV_PTE_INDEX_CACHED_PRIORITY 7 /**< Page is priority cached */
+#define HV_PTE_INDEX_PAGE 8 /**< PTE describes a page */
+#define HV_PTE_INDEX_GLOBAL 9 /**< Page is global */
+#define HV_PTE_INDEX_USER 10 /**< Page is user-accessible */
+#define HV_PTE_INDEX_ACCESSED 11 /**< Page has been accessed */
+#define HV_PTE_INDEX_DIRTY 12 /**< Page has been written */
+ /* Bits 13-15 are reserved for
+ future use. */
+#define HV_PTE_INDEX_MODE 16 /**< Page mode; see HV_PTE_MODE_xxx */
+#define HV_PTE_MODE_BITS 3 /**< Number of bits in mode */
+ /* Bit 19 is reserved for
+ future use. */
+#define HV_PTE_INDEX_LOTAR 20 /**< Page's LOTAR; must be high bits
+ of word */
+#define HV_PTE_LOTAR_BITS 12 /**< Number of bits in a LOTAR */
+
+/* Bits in HV_PTE's high word. */
+#define HV_PTE_INDEX_READABLE 32 /**< Page is readable */
+#define HV_PTE_INDEX_WRITABLE 33 /**< Page is writable */
+#define HV_PTE_INDEX_EXECUTABLE 34 /**< Page is executable */
+#define HV_PTE_INDEX_PTFN 35 /**< Page's PTFN; must be high bits
+ of word */
+#define HV_PTE_PTFN_BITS 29 /**< Number of bits in a PTFN */
+
+/** Position of the PFN field within the PTE (subset of the PTFN). */
+#define HV_PTE_INDEX_PFN (HV_PTE_INDEX_PTFN + (HV_LOG2_PAGE_SIZE_SMALL - \
+ HV_LOG2_PAGE_TABLE_ALIGN))
+
+/** Length of the PFN field within the PTE (subset of the PTFN). */
+#define HV_PTE_INDEX_PFN_BITS (HV_PTE_INDEX_PTFN_BITS - \
+ (HV_LOG2_PAGE_SIZE_SMALL - \
+ HV_LOG2_PAGE_TABLE_ALIGN))
+
+/*
+ * Legal values for the PTE's mode field
+ */
+/** Data is not resident in any caches; loads and stores access memory
+ * directly.
+ */
+#define HV_PTE_MODE_UNCACHED 1
+
+/** Data is resident in the tile's local L1 and/or L2 caches; if a load
+ * or store misses there, it goes to memory.
+ *
+ * The copy in the local L1$/L2$ is not invalidated when the copy in
+ * memory is changed.
+ */
+#define HV_PTE_MODE_CACHE_NO_L3 2
+
+/** Data is resident in the tile's local L1 and/or L2 caches. If a load
+ * or store misses there, it goes to an L3 cache in a designated tile;
+ * if it misses there, it goes to memory.
+ *
+ * If the NC bit is not set, the copy in the local L1$/L2$ is invalidated
+ * when the copy in the remote L3$ is changed. Otherwise, such
+ * invalidation will not occur.
+ *
+ * Chips for which CHIP_HAS_COHERENT_LOCAL_CACHE() is 0 do not support
+ * invalidation from an L3$ to another tile's L1$/L2$. If the NC bit is
+ * clear on such a chip, no copy is kept in the local L1$/L2$ in this mode.
+ */
+#define HV_PTE_MODE_CACHE_TILE_L3 3
+
+/** Data is resident in the tile's local L1 and/or L2 caches. If a load
+ * or store misses there, it goes to an L3 cache in one of a set of
+ * designated tiles; if it misses there, it goes to memory. Which tile
+ * is chosen from the set depends upon a hash function applied to the
+ * physical address. This mode is not supported on chips for which
+ * CHIP_HAS_CBOX_HOME_MAP() is 0.
+ *
+ * If the NC bit is not set, the copy in the local L1$/L2$ is invalidated
+ * when the copy in the remote L3$ is changed. Otherwise, such
+ * invalidation will not occur.
+ *
+ * Chips for which CHIP_HAS_COHERENT_LOCAL_CACHE() is 0 do not support
+ * invalidation from an L3$ to another tile's L1$/L2$. If the NC bit is
+ * clear on such a chip, no copy is kept in the local L1$/L2$ in this mode.
+ */
+#define HV_PTE_MODE_CACHE_HASH_L3 4
+
+/** Data is not resident in memory; accesses are instead made to an I/O
+ * device, whose tile coordinates are given by the PTE's LOTAR field.
+ * This mode is only supported on chips for which CHIP_HAS_MMIO() is 1.
+ * The EXECUTABLE bit may not be set in an MMIO PTE.
+ */
+#define HV_PTE_MODE_MMIO 5
+
+
+/* C wants 1ULL so it is typed as __hv64, but the assembler needs just numbers.
+ * The assembler can't handle shifts greater than 31, but treats them
+ * as shifts mod 32, so assembler code must be aware of which word
+ * the bit belongs in when using these macros.
+ */
+#ifdef __ASSEMBLER__
+#define __HV_PTE_ONE 1 /**< One, for assembler */
+#else
+#define __HV_PTE_ONE 1ULL /**< One, for C */
+#endif
+
+/** Is this PTE present?
+ *
+ * If this bit is set, this PTE represents a valid translation or level-2
+ * page table pointer. Otherwise, the page table does not contain a
+ * translation for the subject virtual pages.
+ *
+ * If this bit is not set, the other bits in the PTE are not
+ * interpreted by the hypervisor, and may contain any value.
+ */
+#define HV_PTE_PRESENT (__HV_PTE_ONE << HV_PTE_INDEX_PRESENT)
+
+/** Does this PTE map a page?
+ *
+ * If this bit is set in the level-1 page table, the entry should be
+ * interpreted as a level-2 page table entry mapping a large page.
+ *
+ * This bit should not be modified by the client while PRESENT is set, as
+ * doing so may race with the hypervisor's update of ACCESSED and DIRTY bits.
+ *
+ * In a level-2 page table, this bit is ignored and must be zero.
+ */
+#define HV_PTE_PAGE (__HV_PTE_ONE << HV_PTE_INDEX_PAGE)
+
+/** Is this a global (non-ASID) mapping?
+ *
+ * If this bit is set, the translations established by this PTE will
+ * not be flushed from the TLB by the hv_flush_asid() service; they
+ * will be flushed by the hv_flush_page() or hv_flush_pages() services.
+ *
+ * Setting this bit for translations which are identical in all page
+ * tables (for instance, code and data belonging to a client OS) can
+ * be very beneficial, as it will reduce the number of TLB misses.
+ * Note that, while it is not an error which will be detected by the
+ * hypervisor, it is an extremely bad idea to set this bit for
+ * translations which are _not_ identical in all page tables.
+ *
+ * This bit should not be modified by the client while PRESENT is set, as
+ * doing so may race with the hypervisor's update of ACCESSED and DIRTY bits.
+ *
+ * This bit is ignored in level-1 PTEs unless the Page bit is set.
+ */
+#define HV_PTE_GLOBAL (__HV_PTE_ONE << HV_PTE_INDEX_GLOBAL)
+
+/** Is this mapping accessible to users?
+ *
+ * If this bit is set, code running at any PL will be permitted to
+ * access the virtual addresses mapped by this PTE. Otherwise, only
+ * code running at PL 1 or above will be allowed to do so.
+ *
+ * This bit should not be modified by the client while PRESENT is set, as
+ * doing so may race with the hypervisor's update of ACCESSED and DIRTY bits.
+ *
+ * This bit is ignored in level-1 PTEs unless the Page bit is set.
+ */
+#define HV_PTE_USER (__HV_PTE_ONE << HV_PTE_INDEX_USER)
+
+/** Has this mapping been accessed?
+ *
+ * This bit is set by the hypervisor when the memory described by the
+ * translation is accessed for the first time. It is never cleared by
+ * the hypervisor, but may be cleared by the client. After the bit
+ * has been cleared, subsequent references are not guaranteed to set
+ * it again until the translation has been flushed from the TLB.
+ *
+ * This bit is ignored in level-1 PTEs unless the Page bit is set.
+ */
+#define HV_PTE_ACCESSED (__HV_PTE_ONE << HV_PTE_INDEX_ACCESSED)
+
+/** Is this mapping dirty?
+ *
+ * This bit is set by the hypervisor when the memory described by the
+ * translation is written for the first time. It is never cleared by
+ * the hypervisor, but may be cleared by the client. After the bit
+ * has been cleared, subsequent references are not guaranteed to set
+ * it again until the translation has been flushed from the TLB.
+ *
+ * This bit is ignored in level-1 PTEs unless the Page bit is set.
+ */
+#define HV_PTE_DIRTY (__HV_PTE_ONE << HV_PTE_INDEX_DIRTY)
+
+/** Migrating bit in PTE.
+ *
+ * This bit is guaranteed not to be inspected or modified by the
+ * hypervisor. The name is indicative of the suggested use by the client
+ * to tag pages whose L3 cache is being migrated from one cpu to another.
+ */
+#define HV_PTE_MIGRATING (__HV_PTE_ONE << HV_PTE_INDEX_MIGRATING)
+
+/** Client-private bit in PTE.
+ *
+ * This bit is guaranteed not to be inspected or modified by the
+ * hypervisor.
+ */
+#define HV_PTE_CLIENT0 (__HV_PTE_ONE << HV_PTE_INDEX_CLIENT0)
+
+/** Client-private bit in PTE.
+ *
+ * This bit is guaranteed not to be inspected or modified by the
+ * hypervisor.
+ */
+#define HV_PTE_CLIENT1 (__HV_PTE_ONE << HV_PTE_INDEX_CLIENT1)
+
+/** Non-coherent (NC) bit in PTE.
+ *
+ * If this bit is set, the mapping that is set up will be non-coherent
+ * (also known as non-inclusive). This means that changes to the L3
+ * cache will not cause a local copy to be invalidated. It is generally
+ * recommended only for read-only mappings.
+ *
+ * In level-1 PTEs, if the Page bit is clear, this bit determines how the
+ * level-2 page table is accessed.
+ */
+#define HV_PTE_NC (__HV_PTE_ONE << HV_PTE_INDEX_NC)
+
+/** Is this page prevented from filling the L1$?
+ *
+ * If this bit is set, the page described by the PTE will not be cached
+ * the local cpu's L1 cache.
+ *
+ * If CHIP_HAS_NC_AND_NOALLOC_BITS() is not true in <chip.h> for this chip,
+ * it is illegal to use this attribute, and may cause client termination.
+ *
+ * In level-1 PTEs, if the Page bit is clear, this bit
+ * determines how the level-2 page table is accessed.
+ */
+#define HV_PTE_NO_ALLOC_L1 (__HV_PTE_ONE << HV_PTE_INDEX_NO_ALLOC_L1)
+
+/** Is this page prevented from filling the L2$?
+ *
+ * If this bit is set, the page described by the PTE will not be cached
+ * the local cpu's L2 cache.
+ *
+ * If CHIP_HAS_NC_AND_NOALLOC_BITS() is not true in <chip.h> for this chip,
+ * it is illegal to use this attribute, and may cause client termination.
+ *
+ * In level-1 PTEs, if the Page bit is clear, this bit determines how the
+ * level-2 page table is accessed.
+ */
+#define HV_PTE_NO_ALLOC_L2 (__HV_PTE_ONE << HV_PTE_INDEX_NO_ALLOC_L2)
+
+/** Is this a priority page?
+ *
+ * If this bit is set, the page described by the PTE will be given
+ * priority in the cache. Normally this translates into allowing the
+ * page to use only the "red" half of the cache. The client may wish to
+ * then use the hv_set_caching service to specify that other pages which
+ * alias this page will use only the "black" half of the cache.
+ *
+ * If the Cached Priority bit is clear, the hypervisor uses the
+ * current hv_set_caching() value to choose how to cache the page.
+ *
+ * It is illegal to set the Cached Priority bit if the Non-Cached bit
+ * is set and the Cached Remotely bit is clear, i.e. if requests to
+ * the page map directly to memory.
+ *
+ * This bit is ignored in level-1 PTEs unless the Page bit is set.
+ */
+#define HV_PTE_CACHED_PRIORITY (__HV_PTE_ONE << \
+ HV_PTE_INDEX_CACHED_PRIORITY)
+
+/** Is this a readable mapping?
+ *
+ * If this bit is set, code will be permitted to read from (e.g.,
+ * issue load instructions against) the virtual addresses mapped by
+ * this PTE.
+ *
+ * It is illegal for this bit to be clear if the Writable bit is set.
+ *
+ * This bit is ignored in level-1 PTEs unless the Page bit is set.
+ */
+#define HV_PTE_READABLE (__HV_PTE_ONE << HV_PTE_INDEX_READABLE)
+
+/** Is this a writable mapping?
+ *
+ * If this bit is set, code will be permitted to write to (e.g., issue
+ * store instructions against) the virtual addresses mapped by this
+ * PTE.
+ *
+ * This bit is ignored in level-1 PTEs unless the Page bit is set.
+ */
+#define HV_PTE_WRITABLE (__HV_PTE_ONE << HV_PTE_INDEX_WRITABLE)
+
+/** Is this an executable mapping?
+ *
+ * If this bit is set, code will be permitted to execute from
+ * (e.g., jump to) the virtual addresses mapped by this PTE.
+ *
+ * This bit applies to any processor on the tile, if there are more
+ * than one.
+ *
+ * This bit is ignored in level-1 PTEs unless the Page bit is set.
+ */
+#define HV_PTE_EXECUTABLE (__HV_PTE_ONE << HV_PTE_INDEX_EXECUTABLE)
+
+/** The width of a LOTAR's x or y bitfield. */
+#define HV_LOTAR_WIDTH 11
+
+/** Converts an x,y pair to a LOTAR value. */
+#define HV_XY_TO_LOTAR(x, y) ((HV_LOTAR)(((x) << HV_LOTAR_WIDTH) | (y)))
+
+/** Extracts the X component of a lotar. */
+#define HV_LOTAR_X(lotar) ((lotar) >> HV_LOTAR_WIDTH)
+
+/** Extracts the Y component of a lotar. */
+#define HV_LOTAR_Y(lotar) ((lotar) & ((1 << HV_LOTAR_WIDTH) - 1))
+
+#ifndef __ASSEMBLER__
+
+/** Define accessor functions for a PTE bit. */
+#define _HV_BIT(name, bit) \
+static __inline int \
+hv_pte_get_##name(HV_PTE pte) \
+{ \
+ return (pte.val >> HV_PTE_INDEX_##bit) & 1; \
+} \
+ \
+static __inline HV_PTE \
+hv_pte_set_##name(HV_PTE pte) \
+{ \
+ pte.val |= 1ULL << HV_PTE_INDEX_##bit; \
+ return pte; \
+} \
+ \
+static __inline HV_PTE \
+hv_pte_clear_##name(HV_PTE pte) \
+{ \
+ pte.val &= ~(1ULL << HV_PTE_INDEX_##bit); \
+ return pte; \
+}
+
+/* Generate accessors to get, set, and clear various PTE flags.
+ */
+_HV_BIT(present, PRESENT)
+_HV_BIT(page, PAGE)
+_HV_BIT(client0, CLIENT0)
+_HV_BIT(client1, CLIENT1)
+_HV_BIT(migrating, MIGRATING)
+_HV_BIT(nc, NC)
+_HV_BIT(readable, READABLE)
+_HV_BIT(writable, WRITABLE)
+_HV_BIT(executable, EXECUTABLE)
+_HV_BIT(accessed, ACCESSED)
+_HV_BIT(dirty, DIRTY)
+_HV_BIT(no_alloc_l1, NO_ALLOC_L1)
+_HV_BIT(no_alloc_l2, NO_ALLOC_L2)
+_HV_BIT(cached_priority, CACHED_PRIORITY)
+_HV_BIT(global, GLOBAL)
+_HV_BIT(user, USER)
+
+#undef _HV_BIT
+
+/** Get the page mode from the PTE.
+ *
+ * This field generally determines whether and how accesses to the page
+ * are cached; the HV_PTE_MODE_xxx symbols define the legal values for the
+ * page mode. The NC, NO_ALLOC_L1, and NO_ALLOC_L2 bits modify this
+ * general policy.
+ */
+static __inline unsigned int
+hv_pte_get_mode(const HV_PTE pte)
+{
+ return (((__hv32) pte.val) >> HV_PTE_INDEX_MODE) &
+ ((1 << HV_PTE_MODE_BITS) - 1);
+}
+
+/** Set the page mode into a PTE. See hv_pte_get_mode. */
+static __inline HV_PTE
+hv_pte_set_mode(HV_PTE pte, unsigned int val)
+{
+ pte.val &= ~(((1ULL << HV_PTE_MODE_BITS) - 1) << HV_PTE_INDEX_MODE);
+ pte.val |= val << HV_PTE_INDEX_MODE;
+ return pte;
+}
+
+/** Get the page frame number from the PTE.
+ *
+ * This field contains the upper bits of the CPA (client physical
+ * address) of the target page; the complete CPA is this field with
+ * HV_LOG2_PAGE_SIZE_SMALL zero bits appended to it.
+ *
+ * For PTEs in a level-1 page table where the Page bit is set, the
+ * CPA must be aligned modulo the large page size.
+ */
+static __inline unsigned int
+hv_pte_get_pfn(const HV_PTE pte)
+{
+ return pte.val >> HV_PTE_INDEX_PFN;
+}
+
+
+/** Set the page frame number into a PTE. See hv_pte_get_pfn. */
+static __inline HV_PTE
+hv_pte_set_pfn(HV_PTE pte, unsigned int val)
+{
+ /*
+ * Note that the use of "PTFN" in the next line is intentional; we
+ * don't want any garbage lower bits left in that field.
+ */
+ pte.val &= ~(((1ULL << HV_PTE_PTFN_BITS) - 1) << HV_PTE_INDEX_PTFN);
+ pte.val |= (__hv64) val << HV_PTE_INDEX_PFN;
+ return pte;
+}
+
+/** Get the page table frame number from the PTE.
+ *
+ * This field contains the upper bits of the CPA (client physical
+ * address) of the target page table; the complete CPA is this field with
+ * with HV_PAGE_TABLE_ALIGN zero bits appended to it.
+ *
+ * For PTEs in a level-1 page table when the Page bit is not set, the
+ * CPA must be aligned modulo the sticter of HV_PAGE_TABLE_ALIGN and
+ * the level-2 page table size.
+ */
+static __inline unsigned long
+hv_pte_get_ptfn(const HV_PTE pte)
+{
+ return pte.val >> HV_PTE_INDEX_PTFN;
+}
+
+
+/** Set the page table frame number into a PTE. See hv_pte_get_ptfn. */
+static __inline HV_PTE
+hv_pte_set_ptfn(HV_PTE pte, unsigned long val)
+{
+ pte.val &= ~(((1ULL << HV_PTE_PTFN_BITS)-1) << HV_PTE_INDEX_PTFN);
+ pte.val |= (__hv64) val << HV_PTE_INDEX_PTFN;
+ return pte;
+}
+
+
+/** Get the remote tile caching this page.
+ *
+ * Specifies the remote tile which is providing the L3 cache for this page.
+ *
+ * This field is ignored unless the page mode is HV_PTE_MODE_CACHE_TILE_L3.
+ *
+ * In level-1 PTEs, if the Page bit is clear, this field determines how the
+ * level-2 page table is accessed.
+ */
+static __inline unsigned int
+hv_pte_get_lotar(const HV_PTE pte)
+{
+ unsigned int lotar = ((__hv32) pte.val) >> HV_PTE_INDEX_LOTAR;
+
+ return HV_XY_TO_LOTAR( (lotar >> (HV_PTE_LOTAR_BITS / 2)),
+ (lotar & ((1 << (HV_PTE_LOTAR_BITS / 2)) - 1)) );
+}
+
+
+/** Set the remote tile caching a page into a PTE. See hv_pte_get_lotar. */
+static __inline HV_PTE
+hv_pte_set_lotar(HV_PTE pte, unsigned int val)
+{
+ unsigned int x = HV_LOTAR_X(val);
+ unsigned int y = HV_LOTAR_Y(val);
+
+ pte.val &= ~(((1ULL << HV_PTE_LOTAR_BITS)-1) << HV_PTE_INDEX_LOTAR);
+ pte.val |= (x << (HV_PTE_INDEX_LOTAR + HV_PTE_LOTAR_BITS / 2)) |
+ (y << HV_PTE_INDEX_LOTAR);
+ return pte;
+}
+
+#endif /* !__ASSEMBLER__ */
+
+/** Converts a client physical address to a pfn. */
+#define HV_CPA_TO_PFN(p) ((p) >> HV_LOG2_PAGE_SIZE_SMALL)
+
+/** Converts a pfn to a client physical address. */
+#define HV_PFN_TO_CPA(p) (((HV_PhysAddr)(p)) << HV_LOG2_PAGE_SIZE_SMALL)
+
+/** Converts a client physical address to a ptfn. */
+#define HV_CPA_TO_PTFN(p) ((p) >> HV_LOG2_PAGE_TABLE_ALIGN)
+
+/** Converts a ptfn to a client physical address. */
+#define HV_PTFN_TO_CPA(p) (((HV_PhysAddr)(p)) << HV_LOG2_PAGE_TABLE_ALIGN)
+
+/** Converts a ptfn to a pfn. */
+#define HV_PTFN_TO_PFN(p) \
+ ((p) >> (HV_LOG2_PAGE_SIZE_SMALL - HV_LOG2_PAGE_TABLE_ALIGN))
+
+/** Converts a pfn to a ptfn. */
+#define HV_PFN_TO_PTFN(p) \
+ ((p) << (HV_LOG2_PAGE_SIZE_SMALL - HV_LOG2_PAGE_TABLE_ALIGN))
+
+#if CHIP_VA_WIDTH() > 32
+
+/** Log number of HV_PTE entries in L0 page table */
+#define HV_LOG2_L0_ENTRIES (CHIP_VA_WIDTH() - HV_LOG2_L1_SPAN)
+
+/** Number of HV_PTE entries in L0 page table */
+#define HV_L0_ENTRIES (1 << HV_LOG2_L0_ENTRIES)
+
+/** Log size of L0 page table in bytes */
+#define HV_LOG2_L0_SIZE (HV_LOG2_PTE_SIZE + HV_LOG2_L0_ENTRIES)
+
+/** Size of L0 page table in bytes */
+#define HV_L0_SIZE (1 << HV_LOG2_L0_SIZE)
+
+#ifdef __ASSEMBLER__
+
+/** Index in L0 for a specific VA */
+#define HV_L0_INDEX(va) \
+ (((va) >> HV_LOG2_L1_SPAN) & (HV_L0_ENTRIES - 1))
+
+#else
+
+/** Index in L1 for a specific VA */
+#define HV_L0_INDEX(va) \
+ (((HV_VirtAddr)(va) >> HV_LOG2_L1_SPAN) & (HV_L0_ENTRIES - 1))
+
+#endif
+
+#endif /* CHIP_VA_WIDTH() > 32 */
+
+/** Log number of HV_PTE entries in L1 page table */
+#define HV_LOG2_L1_ENTRIES (HV_LOG2_L1_SPAN - HV_LOG2_PAGE_SIZE_LARGE)
+
+/** Number of HV_PTE entries in L1 page table */
+#define HV_L1_ENTRIES (1 << HV_LOG2_L1_ENTRIES)
+
+/** Log size of L1 page table in bytes */
+#define HV_LOG2_L1_SIZE (HV_LOG2_PTE_SIZE + HV_LOG2_L1_ENTRIES)
+
+/** Size of L1 page table in bytes */
+#define HV_L1_SIZE (1 << HV_LOG2_L1_SIZE)
+
+/** Log number of HV_PTE entries in level-2 page table */
+#define HV_LOG2_L2_ENTRIES (HV_LOG2_PAGE_SIZE_LARGE - HV_LOG2_PAGE_SIZE_SMALL)
+
+/** Number of HV_PTE entries in level-2 page table */
+#define HV_L2_ENTRIES (1 << HV_LOG2_L2_ENTRIES)
+
+/** Log size of level-2 page table in bytes */
+#define HV_LOG2_L2_SIZE (HV_LOG2_PTE_SIZE + HV_LOG2_L2_ENTRIES)
+
+/** Size of level-2 page table in bytes */
+#define HV_L2_SIZE (1 << HV_LOG2_L2_SIZE)
+
+#ifdef __ASSEMBLER__
+
+#if CHIP_VA_WIDTH() > 32
+
+/** Index in L1 for a specific VA */
+#define HV_L1_INDEX(va) \
+ (((va) >> HV_LOG2_PAGE_SIZE_LARGE) & (HV_L1_ENTRIES - 1))
+
+#else /* CHIP_VA_WIDTH() > 32 */
+
+/** Index in L1 for a specific VA */
+#define HV_L1_INDEX(va) \
+ (((va) >> HV_LOG2_PAGE_SIZE_LARGE))
+
+#endif /* CHIP_VA_WIDTH() > 32 */
+
+/** Index in level-2 page table for a specific VA */
+#define HV_L2_INDEX(va) \
+ (((va) >> HV_LOG2_PAGE_SIZE_SMALL) & (HV_L2_ENTRIES - 1))
+
+#else /* __ASSEMBLER __ */
+
+#if CHIP_VA_WIDTH() > 32
+
+/** Index in L1 for a specific VA */
+#define HV_L1_INDEX(va) \
+ (((HV_VirtAddr)(va) >> HV_LOG2_PAGE_SIZE_LARGE) & (HV_L1_ENTRIES - 1))
+
+#else /* CHIP_VA_WIDTH() > 32 */
+
+/** Index in L1 for a specific VA */
+#define HV_L1_INDEX(va) \
+ (((HV_VirtAddr)(va) >> HV_LOG2_PAGE_SIZE_LARGE))
+
+#endif /* CHIP_VA_WIDTH() > 32 */
+
+/** Index in level-2 page table for a specific VA */
+#define HV_L2_INDEX(va) \
+ (((HV_VirtAddr)(va) >> HV_LOG2_PAGE_SIZE_SMALL) & (HV_L2_ENTRIES - 1))
+
+#endif /* __ASSEMBLER __ */
+
+#endif /* _TILE_HV_H */
diff --git a/arch/tile/include/hv/netio_errors.h b/arch/tile/include/hv/netio_errors.h
new file mode 100644
index 00000000..e1591bff
--- /dev/null
+++ b/arch/tile/include/hv/netio_errors.h
@@ -0,0 +1,122 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+/**
+ * Error codes returned from NetIO routines.
+ */
+
+#ifndef __NETIO_ERRORS_H__
+#define __NETIO_ERRORS_H__
+
+/**
+ * @addtogroup error
+ *
+ * @brief The error codes returned by NetIO functions.
+ *
+ * NetIO functions return 0 (defined as ::NETIO_NO_ERROR) on success, and
+ * a negative value if an error occurs.
+ *
+ * In cases where a NetIO function failed due to a error reported by
+ * system libraries, the error code will be the negation of the
+ * system errno at the time of failure. The @ref netio_strerror()
+ * function will deliver error strings for both NetIO and system error
+ * codes.
+ *
+ * @{
+ */
+
+/** The set of all NetIO errors. */
+typedef enum
+{
+ /** Operation successfully completed. */
+ NETIO_NO_ERROR = 0,
+
+ /** A packet was successfully retrieved from an input queue. */
+ NETIO_PKT = 0,
+
+ /** Largest NetIO error number. */
+ NETIO_ERR_MAX = -701,
+
+ /** The tile is not registered with the IPP. */
+ NETIO_NOT_REGISTERED = -701,
+
+ /** No packet was available to retrieve from the input queue. */
+ NETIO_NOPKT = -702,
+
+ /** The requested function is not implemented. */
+ NETIO_NOT_IMPLEMENTED = -703,
+
+ /** On a registration operation, the target queue already has the maximum
+ * number of tiles registered for it, and no more may be added. On a
+ * packet send operation, the output queue is full and nothing more can
+ * be queued until some of the queued packets are actually transmitted. */
+ NETIO_QUEUE_FULL = -704,
+
+ /** The calling process or thread is not bound to exactly one CPU. */
+ NETIO_BAD_AFFINITY = -705,
+
+ /** Cannot allocate memory on requested controllers. */
+ NETIO_CANNOT_HOME = -706,
+
+ /** On a registration operation, the IPP specified is not configured
+ * to support the options requested; for instance, the application
+ * wants a specific type of tagged headers which the configured IPP
+ * doesn't support. Or, the supplied configuration information is
+ * not self-consistent, or is out of range; for instance, specifying
+ * both NETIO_RECV and NETIO_NO_RECV, or asking for more than
+ * NETIO_MAX_SEND_BUFFERS to be preallocated. On a VLAN or bucket
+ * configure operation, the number of items, or the base item, was
+ * out of range.
+ */
+ NETIO_BAD_CONFIG = -707,
+
+ /** Too many tiles have registered to transmit packets. */
+ NETIO_TOOMANY_XMIT = -708,
+
+ /** Packet transmission was attempted on a queue which was registered
+ with transmit disabled. */
+ NETIO_UNREG_XMIT = -709,
+
+ /** This tile is already registered with the IPP. */
+ NETIO_ALREADY_REGISTERED = -710,
+
+ /** The Ethernet link is down. The application should try again later. */
+ NETIO_LINK_DOWN = -711,
+
+ /** An invalid memory buffer has been specified. This may be an unmapped
+ * virtual address, or one which does not meet alignment requirements.
+ * For netio_input_register(), this error may be returned when multiple
+ * processes specify different memory regions to be used for NetIO
+ * buffers. That can happen if these processes specify explicit memory
+ * regions with the ::NETIO_FIXED_BUFFER_VA flag, or if tmc_cmem_init()
+ * has not been called by a common ancestor of the processes.
+ */
+ NETIO_FAULT = -712,
+
+ /** Cannot combine user-managed shared memory and cache coherence. */
+ NETIO_BAD_CACHE_CONFIG = -713,
+
+ /** Smallest NetIO error number. */
+ NETIO_ERR_MIN = -713,
+
+#ifndef __DOXYGEN__
+ /** Used internally to mean that no response is needed; never returned to
+ * an application. */
+ NETIO_NO_RESPONSE = 1
+#endif
+} netio_error_t;
+
+/** @} */
+
+#endif /* __NETIO_ERRORS_H__ */
diff --git a/arch/tile/include/hv/netio_intf.h b/arch/tile/include/hv/netio_intf.h
new file mode 100644
index 00000000..8d20972a
--- /dev/null
+++ b/arch/tile/include/hv/netio_intf.h
@@ -0,0 +1,2975 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+/**
+ * NetIO interface structures and macros.
+ */
+
+#ifndef __NETIO_INTF_H__
+#define __NETIO_INTF_H__
+
+#include <hv/netio_errors.h>
+
+#ifdef __KERNEL__
+#include <linux/types.h>
+#else
+#include <stdint.h>
+#endif
+
+#if !defined(__HV__) && !defined(__BOGUX__) && !defined(__KERNEL__)
+#include <assert.h>
+#define netio_assert assert /**< Enable assertions from macros */
+#else
+#define netio_assert(...) ((void)(0)) /**< Disable assertions from macros */
+#endif
+
+/*
+ * If none of these symbols are defined, we're building libnetio in an
+ * environment where we have pthreads, so we'll enable locking.
+ */
+#if !defined(__HV__) && !defined(__BOGUX__) && !defined(__KERNEL__) && \
+ !defined(__NEWLIB__)
+#define _NETIO_PTHREAD /**< Include a mutex in netio_queue_t below */
+
+/*
+ * If NETIO_UNLOCKED is defined, we don't do use per-cpu locks on
+ * per-packet NetIO operations. We still do pthread locking on things
+ * like netio_input_register, though. This is used for building
+ * libnetio_unlocked.
+ */
+#ifndef NETIO_UNLOCKED
+
+/* Avoid PLT overhead by using our own inlined per-cpu lock. */
+#include <sched.h>
+typedef int _netio_percpu_mutex_t;
+
+static __inline int
+_netio_percpu_mutex_init(_netio_percpu_mutex_t* lock)
+{
+ *lock = 0;
+ return 0;
+}
+
+static __inline int
+_netio_percpu_mutex_lock(_netio_percpu_mutex_t* lock)
+{
+ while (__builtin_expect(__insn_tns(lock), 0))
+ sched_yield();
+ return 0;
+}
+
+static __inline int
+_netio_percpu_mutex_unlock(_netio_percpu_mutex_t* lock)
+{
+ *lock = 0;
+ return 0;
+}
+
+#else /* NETIO_UNLOCKED */
+
+/* Don't do any locking for per-packet NetIO operations. */
+typedef int _netio_percpu_mutex_t;
+#define _netio_percpu_mutex_init(L)
+#define _netio_percpu_mutex_lock(L)
+#define _netio_percpu_mutex_unlock(L)
+
+#endif /* NETIO_UNLOCKED */
+#endif /* !__HV__, !__BOGUX, !__KERNEL__, !__NEWLIB__ */
+
+/** How many tiles can register for a given queue.
+ * @ingroup setup */
+#define NETIO_MAX_TILES_PER_QUEUE 64
+
+
+/** Largest permissible queue identifier.
+ * @ingroup setup */
+#define NETIO_MAX_QUEUE_ID 255
+
+
+#ifndef __DOXYGEN__
+
+/* Metadata packet checksum/ethertype flags. */
+
+/** The L4 checksum has not been calculated. */
+#define _NETIO_PKT_NO_L4_CSUM_SHIFT 0
+#define _NETIO_PKT_NO_L4_CSUM_RMASK 1
+#define _NETIO_PKT_NO_L4_CSUM_MASK \
+ (_NETIO_PKT_NO_L4_CSUM_RMASK << _NETIO_PKT_NO_L4_CSUM_SHIFT)
+
+/** The L3 checksum has not been calculated. */
+#define _NETIO_PKT_NO_L3_CSUM_SHIFT 1
+#define _NETIO_PKT_NO_L3_CSUM_RMASK 1
+#define _NETIO_PKT_NO_L3_CSUM_MASK \
+ (_NETIO_PKT_NO_L3_CSUM_RMASK << _NETIO_PKT_NO_L3_CSUM_SHIFT)
+
+/** The L3 checksum is incorrect (or perhaps has not been calculated). */
+#define _NETIO_PKT_BAD_L3_CSUM_SHIFT 2
+#define _NETIO_PKT_BAD_L3_CSUM_RMASK 1
+#define _NETIO_PKT_BAD_L3_CSUM_MASK \
+ (_NETIO_PKT_BAD_L3_CSUM_RMASK << _NETIO_PKT_BAD_L3_CSUM_SHIFT)
+
+/** The Ethernet packet type is unrecognized. */
+#define _NETIO_PKT_TYPE_UNRECOGNIZED_SHIFT 3
+#define _NETIO_PKT_TYPE_UNRECOGNIZED_RMASK 1
+#define _NETIO_PKT_TYPE_UNRECOGNIZED_MASK \
+ (_NETIO_PKT_TYPE_UNRECOGNIZED_RMASK << \
+ _NETIO_PKT_TYPE_UNRECOGNIZED_SHIFT)
+
+/* Metadata packet type flags. */
+
+/** Where the packet type bits are; this field is the index into
+ * _netio_pkt_info. */
+#define _NETIO_PKT_TYPE_SHIFT 4
+#define _NETIO_PKT_TYPE_RMASK 0x3F
+
+/** How many VLAN tags the packet has, and, if we have two, which one we
+ * actually grouped on. A VLAN within a proprietary (Marvell or Broadcom)
+ * tag is counted here. */
+#define _NETIO_PKT_VLAN_SHIFT 4
+#define _NETIO_PKT_VLAN_RMASK 0x3
+#define _NETIO_PKT_VLAN_MASK \
+ (_NETIO_PKT_VLAN_RMASK << _NETIO_PKT_VLAN_SHIFT)
+#define _NETIO_PKT_VLAN_NONE 0 /* No VLAN tag. */
+#define _NETIO_PKT_VLAN_ONE 1 /* One VLAN tag. */
+#define _NETIO_PKT_VLAN_TWO_OUTER 2 /* Two VLAN tags, outer one used. */
+#define _NETIO_PKT_VLAN_TWO_INNER 3 /* Two VLAN tags, inner one used. */
+
+/** Which proprietary tags the packet has. */
+#define _NETIO_PKT_TAG_SHIFT 6
+#define _NETIO_PKT_TAG_RMASK 0x3
+#define _NETIO_PKT_TAG_MASK \
+ (_NETIO_PKT_TAG_RMASK << _NETIO_PKT_TAG_SHIFT)
+#define _NETIO_PKT_TAG_NONE 0 /* No proprietary tags. */
+#define _NETIO_PKT_TAG_MRVL 1 /* Marvell HyperG.Stack tags. */
+#define _NETIO_PKT_TAG_MRVL_EXT 2 /* HyperG.Stack extended tags. */
+#define _NETIO_PKT_TAG_BRCM 3 /* Broadcom HiGig tags. */
+
+/** Whether a packet has an LLC + SNAP header. */
+#define _NETIO_PKT_SNAP_SHIFT 8
+#define _NETIO_PKT_SNAP_RMASK 0x1
+#define _NETIO_PKT_SNAP_MASK \
+ (_NETIO_PKT_SNAP_RMASK << _NETIO_PKT_SNAP_SHIFT)
+
+/* NOTE: Bits 9 and 10 are unused. */
+
+/** Length of any custom data before the L2 header, in words. */
+#define _NETIO_PKT_CUSTOM_LEN_SHIFT 11
+#define _NETIO_PKT_CUSTOM_LEN_RMASK 0x1F
+#define _NETIO_PKT_CUSTOM_LEN_MASK \
+ (_NETIO_PKT_CUSTOM_LEN_RMASK << _NETIO_PKT_CUSTOM_LEN_SHIFT)
+
+/** The L4 checksum is incorrect (or perhaps has not been calculated). */
+#define _NETIO_PKT_BAD_L4_CSUM_SHIFT 16
+#define _NETIO_PKT_BAD_L4_CSUM_RMASK 0x1
+#define _NETIO_PKT_BAD_L4_CSUM_MASK \
+ (_NETIO_PKT_BAD_L4_CSUM_RMASK << _NETIO_PKT_BAD_L4_CSUM_SHIFT)
+
+/** Length of the L2 header, in words. */
+#define _NETIO_PKT_L2_LEN_SHIFT 17
+#define _NETIO_PKT_L2_LEN_RMASK 0x1F
+#define _NETIO_PKT_L2_LEN_MASK \
+ (_NETIO_PKT_L2_LEN_RMASK << _NETIO_PKT_L2_LEN_SHIFT)
+
+
+/* Flags in minimal packet metadata. */
+
+/** We need an eDMA checksum on this packet. */
+#define _NETIO_PKT_NEED_EDMA_CSUM_SHIFT 0
+#define _NETIO_PKT_NEED_EDMA_CSUM_RMASK 1
+#define _NETIO_PKT_NEED_EDMA_CSUM_MASK \
+ (_NETIO_PKT_NEED_EDMA_CSUM_RMASK << _NETIO_PKT_NEED_EDMA_CSUM_SHIFT)
+
+/* Data within the packet information table. */
+
+/* Note that, for efficiency, code which uses these fields assumes that none
+ * of the shift values below are zero. See uses below for an explanation. */
+
+/** Offset within the L2 header of the innermost ethertype (in halfwords). */
+#define _NETIO_PKT_INFO_ETYPE_SHIFT 6
+#define _NETIO_PKT_INFO_ETYPE_RMASK 0x1F
+
+/** Offset within the L2 header of the VLAN tag (in halfwords). */
+#define _NETIO_PKT_INFO_VLAN_SHIFT 11
+#define _NETIO_PKT_INFO_VLAN_RMASK 0x1F
+
+#endif
+
+
+/** The size of a memory buffer representing a small packet.
+ * @ingroup egress */
+#define SMALL_PACKET_SIZE 256
+
+/** The size of a memory buffer representing a large packet.
+ * @ingroup egress */
+#define LARGE_PACKET_SIZE 2048
+
+/** The size of a memory buffer representing a jumbo packet.
+ * @ingroup egress */
+#define JUMBO_PACKET_SIZE (12 * 1024)
+
+
+/* Common ethertypes.
+ * @ingroup ingress */
+/** @{ */
+/** The ethertype of IPv4. */
+#define ETHERTYPE_IPv4 (0x0800)
+/** The ethertype of ARP. */
+#define ETHERTYPE_ARP (0x0806)
+/** The ethertype of VLANs. */
+#define ETHERTYPE_VLAN (0x8100)
+/** The ethertype of a Q-in-Q header. */
+#define ETHERTYPE_Q_IN_Q (0x9100)
+/** The ethertype of IPv6. */
+#define ETHERTYPE_IPv6 (0x86DD)
+/** The ethertype of MPLS. */
+#define ETHERTYPE_MPLS (0x8847)
+/** @} */
+
+
+/** The possible return values of NETIO_PKT_STATUS.
+ * @ingroup ingress
+ */
+typedef enum
+{
+ /** No problems were detected with this packet. */
+ NETIO_PKT_STATUS_OK,
+ /** The packet is undersized; this is expected behavior if the packet's
+ * ethertype is unrecognized, but otherwise the packet is likely corrupt. */
+ NETIO_PKT_STATUS_UNDERSIZE,
+ /** The packet is oversized and some trailing bytes have been discarded.
+ This is expected behavior for short packets, since it's impossible to
+ precisely determine the amount of padding which may have been added to
+ them to make them meet the minimum Ethernet packet size. */
+ NETIO_PKT_STATUS_OVERSIZE,
+ /** The packet was judged to be corrupt by hardware (for instance, it had
+ a bad CRC, or part of it was discarded due to lack of buffer space in
+ the I/O shim) and should be discarded. */
+ NETIO_PKT_STATUS_BAD
+} netio_pkt_status_t;
+
+
+/** Log2 of how many buckets we have. */
+#define NETIO_LOG2_NUM_BUCKETS (10)
+
+/** How many buckets we have.
+ * @ingroup ingress */
+#define NETIO_NUM_BUCKETS (1 << NETIO_LOG2_NUM_BUCKETS)
+
+
+/**
+ * @brief A group-to-bucket identifier.
+ *
+ * @ingroup setup
+ *
+ * This tells us what to do with a given group.
+ */
+typedef union {
+ /** The header broken down into bits. */
+ struct {
+ /** Whether we should balance on L4, if available */
+ unsigned int __balance_on_l4:1;
+ /** Whether we should balance on L3, if available */
+ unsigned int __balance_on_l3:1;
+ /** Whether we should balance on L2, if available */
+ unsigned int __balance_on_l2:1;
+ /** Reserved for future use */
+ unsigned int __reserved:1;
+ /** The base bucket to use to send traffic */
+ unsigned int __bucket_base:NETIO_LOG2_NUM_BUCKETS;
+ /** The mask to apply to the balancing value. This must be one less
+ * than a power of two, e.g. 0x3 or 0xFF.
+ */
+ unsigned int __bucket_mask:NETIO_LOG2_NUM_BUCKETS;
+ /** Pad to 32 bits */
+ unsigned int __padding:(32 - 4 - 2 * NETIO_LOG2_NUM_BUCKETS);
+ } bits;
+ /** To send out the IDN. */
+ unsigned int word;
+}
+netio_group_t;
+
+
+/**
+ * @brief A VLAN-to-bucket identifier.
+ *
+ * @ingroup setup
+ *
+ * This tells us what to do with a given VLAN.
+ */
+typedef netio_group_t netio_vlan_t;
+
+
+/**
+ * A bucket-to-queue mapping.
+ * @ingroup setup
+ */
+typedef unsigned char netio_bucket_t;
+
+
+/**
+ * A packet size can always fit in a netio_size_t.
+ * @ingroup setup
+ */
+typedef unsigned int netio_size_t;
+
+
+/**
+ * @brief Ethernet standard (ingress) packet metadata.
+ *
+ * @ingroup ingress
+ *
+ * This is additional data associated with each packet.
+ * This structure is opaque and accessed through the @ref ingress.
+ *
+ * Also, the buffer population operation currently assumes that standard
+ * metadata is at least as large as minimal metadata, and will need to be
+ * modified if that is no longer the case.
+ */
+typedef struct
+{
+#ifdef __DOXYGEN__
+ /** This structure is opaque. */
+ unsigned char opaque[24];
+#else
+ /** The overall ordinal of the packet */
+ unsigned int __packet_ordinal;
+ /** The ordinal of the packet within the group */
+ unsigned int __group_ordinal;
+ /** The best flow hash IPP could compute. */
+ unsigned int __flow_hash;
+ /** Flags pertaining to checksum calculation, packet type, etc. */
+ unsigned int __flags;
+ /** The first word of "user data". */
+ unsigned int __user_data_0;
+ /** The second word of "user data". */
+ unsigned int __user_data_1;
+#endif
+}
+netio_pkt_metadata_t;
+
+
+/** To ensure that the L3 header is aligned mod 4, the L2 header should be
+ * aligned mod 4 plus 2, since every supported L2 header is 4n + 2 bytes
+ * long. The standard way to do this is to simply add 2 bytes of padding
+ * before the L2 header.
+ */
+#define NETIO_PACKET_PADDING 2
+
+
+
+/**
+ * @brief Ethernet minimal (egress) packet metadata.
+ *
+ * @ingroup egress
+ *
+ * This structure represents information about packets which have
+ * been processed by @ref netio_populate_buffer() or
+ * @ref netio_populate_prepend_buffer(). This structure is opaque
+ * and accessed through the @ref egress.
+ *
+ * @internal This structure is actually copied into the memory used by
+ * standard metadata, which is assumed to be large enough.
+ */
+typedef struct
+{
+#ifdef __DOXYGEN__
+ /** This structure is opaque. */
+ unsigned char opaque[14];
+#else
+ /** The offset of the L2 header from the start of the packet data. */
+ unsigned short l2_offset;
+ /** The offset of the L3 header from the start of the packet data. */
+ unsigned short l3_offset;
+ /** Where to write the checksum. */
+ unsigned char csum_location;
+ /** Where to start checksumming from. */
+ unsigned char csum_start;
+ /** Flags pertaining to checksum calculation etc. */
+ unsigned short flags;
+ /** The L2 length of the packet. */
+ unsigned short l2_length;
+ /** The checksum with which to seed the checksum generator. */
+ unsigned short csum_seed;
+ /** How much to checksum. */
+ unsigned short csum_length;
+#endif
+}
+netio_pkt_minimal_metadata_t;
+
+
+#ifndef __DOXYGEN__
+
+/**
+ * @brief An I/O notification header.
+ *
+ * This is the first word of data received from an I/O shim in a notification
+ * packet. It contains framing and status information.
+ */
+typedef union
+{
+ unsigned int word; /**< The whole word. */
+ /** The various fields. */
+ struct
+ {
+ unsigned int __channel:7; /**< Resource channel. */
+ unsigned int __type:4; /**< Type. */
+ unsigned int __ack:1; /**< Whether an acknowledgement is needed. */
+ unsigned int __reserved:1; /**< Reserved. */
+ unsigned int __protocol:1; /**< A protocol-specific word is added. */
+ unsigned int __status:2; /**< Status of the transfer. */
+ unsigned int __framing:2; /**< Framing of the transfer. */
+ unsigned int __transfer_size:14; /**< Transfer size in bytes (total). */
+ } bits;
+}
+__netio_pkt_notif_t;
+
+
+/**
+ * Returns the base address of the packet.
+ */
+#define _NETIO_PKT_HANDLE_BASE(p) \
+ ((unsigned char*)((p).word & 0xFFFFFFC0))
+
+/**
+ * Returns the base address of the packet.
+ */
+#define _NETIO_PKT_BASE(p) \
+ _NETIO_PKT_HANDLE_BASE(p->__packet)
+
+/**
+ * @brief An I/O notification packet (second word)
+ *
+ * This is the second word of data received from an I/O shim in a notification
+ * packet. This is the virtual address of the packet buffer, plus some flag
+ * bits. (The virtual address of the packet is always 256-byte aligned so we
+ * have room for 8 bits' worth of flags in the low 8 bits.)
+ *
+ * @internal
+ * NOTE: The low two bits must contain "__queue", so the "packet size"
+ * (SIZE_SMALL, SIZE_LARGE, or SIZE_JUMBO) can be determined quickly.
+ *
+ * If __addr or __offset are moved, _NETIO_PKT_BASE
+ * (defined right below this) must be changed.
+ */
+typedef union
+{
+ unsigned int word; /**< The whole word. */
+ /** The various fields. */
+ struct
+ {
+ /** Which queue the packet will be returned to once it is sent back to
+ the IPP. This is one of the SIZE_xxx values. */
+ unsigned int __queue:2;
+
+ /** The IPP handle of the sending IPP. */
+ unsigned int __ipp_handle:2;
+
+ /** Reserved for future use. */
+ unsigned int __reserved:1;
+
+ /** If 1, this packet has minimal (egress) metadata; otherwise, it
+ has standard (ingress) metadata. */
+ unsigned int __minimal:1;
+
+ /** Offset of the metadata within the packet. This value is multiplied
+ * by 64 and added to the base packet address to get the metadata
+ * address. Note that this field is aligned within the word such that
+ * you can easily extract the metadata address with a 26-bit mask. */
+ unsigned int __offset:2;
+
+ /** The top 24 bits of the packet's virtual address. */
+ unsigned int __addr:24;
+ } bits;
+}
+__netio_pkt_handle_t;
+
+#endif /* !__DOXYGEN__ */
+
+
+/**
+ * @brief A handle for an I/O packet's storage.
+ * @ingroup ingress
+ *
+ * netio_pkt_handle_t encodes the concept of a ::netio_pkt_t with its
+ * packet metadata removed. It is a much smaller type that exists to
+ * facilitate applications where the full ::netio_pkt_t type is too
+ * large, such as those that cache enormous numbers of packets or wish
+ * to transmit packet descriptors over the UDN.
+ *
+ * Because there is no metadata, most ::netio_pkt_t operations cannot be
+ * performed on a netio_pkt_handle_t. It supports only
+ * netio_free_handle() (to free the buffer) and
+ * NETIO_PKT_CUSTOM_DATA_H() (to access a pointer to its contents).
+ * The application must acquire any additional metadata it wants from the
+ * original ::netio_pkt_t and record it separately.
+ *
+ * A netio_pkt_handle_t can be extracted from a ::netio_pkt_t by calling
+ * NETIO_PKT_HANDLE(). An invalid handle (analogous to NULL) can be
+ * created by assigning the value ::NETIO_PKT_HANDLE_NONE. A handle can
+ * be tested for validity with NETIO_PKT_HANDLE_IS_VALID().
+ */
+typedef struct
+{
+ unsigned int word; /**< Opaque bits. */
+} netio_pkt_handle_t;
+
+/**
+ * @brief A packet descriptor.
+ *
+ * @ingroup ingress
+ * @ingroup egress
+ *
+ * This data structure represents a packet. The structure is manipulated
+ * through the @ref ingress and the @ref egress.
+ *
+ * While the contents of a netio_pkt_t are opaque, the structure itself is
+ * portable. This means that it may be shared between all tiles which have
+ * done a netio_input_register() call for the interface on which the pkt_t
+ * was initially received (via netio_get_packet()) or retrieved (via
+ * netio_get_buffer()). The contents of a netio_pkt_t can be transmitted to
+ * another tile via shared memory, or via a UDN message, or by other means.
+ * The destination tile may then use the pkt_t as if it had originally been
+ * received locally; it may read or write the packet's data, read its
+ * metadata, free the packet, send the packet, transfer the netio_pkt_t to
+ * yet another tile, and so forth.
+ *
+ * Once a netio_pkt_t has been transferred to a second tile, the first tile
+ * should not reference the original copy; in particular, if more than one
+ * tile frees or sends the same netio_pkt_t, the IPP's packet free lists will
+ * become corrupted. Note also that each tile which reads or modifies
+ * packet data must obey the memory coherency rules outlined in @ref input.
+ */
+typedef struct
+{
+#ifdef __DOXYGEN__
+ /** This structure is opaque. */
+ unsigned char opaque[32];
+#else
+ /** For an ingress packet (one with standard metadata), this is the
+ * notification header we got from the I/O shim. For an egress packet
+ * (one with minimal metadata), this word is zero if the packet has not
+ * been populated, and nonzero if it has. */
+ __netio_pkt_notif_t __notif_header;
+
+ /** Virtual address of the packet buffer, plus state flags. */
+ __netio_pkt_handle_t __packet;
+
+ /** Metadata associated with the packet. */
+ netio_pkt_metadata_t __metadata;
+#endif
+}
+netio_pkt_t;
+
+
+#ifndef __DOXYGEN__
+
+#define __NETIO_PKT_NOTIF_HEADER(pkt) ((pkt)->__notif_header)
+#define __NETIO_PKT_IPP_HANDLE(pkt) ((pkt)->__packet.bits.__ipp_handle)
+#define __NETIO_PKT_QUEUE(pkt) ((pkt)->__packet.bits.__queue)
+#define __NETIO_PKT_NOTIF_HEADER_M(mda, pkt) ((pkt)->__notif_header)
+#define __NETIO_PKT_IPP_HANDLE_M(mda, pkt) ((pkt)->__packet.bits.__ipp_handle)
+#define __NETIO_PKT_MINIMAL(pkt) ((pkt)->__packet.bits.__minimal)
+#define __NETIO_PKT_QUEUE_M(mda, pkt) ((pkt)->__packet.bits.__queue)
+#define __NETIO_PKT_FLAGS_M(mda, pkt) ((mda)->__flags)
+
+/* Packet information table, used by the attribute access functions below. */
+extern const uint16_t _netio_pkt_info[];
+
+#endif /* __DOXYGEN__ */
+
+
+#ifndef __DOXYGEN__
+/* These macros are deprecated and will disappear in a future MDE release. */
+#define NETIO_PKT_GOOD_CHECKSUM(pkt) \
+ NETIO_PKT_L4_CSUM_CORRECT(pkt)
+#define NETIO_PKT_GOOD_CHECKSUM_M(mda, pkt) \
+ NETIO_PKT_L4_CSUM_CORRECT_M(mda, pkt)
+#endif /* __DOXYGEN__ */
+
+
+/* Packet attribute access functions. */
+
+/** Return a pointer to the metadata for a packet.
+ * @ingroup ingress
+ *
+ * Calling this function once and passing the result to other retrieval
+ * functions with a "_M" suffix usually improves performance. This
+ * function must be called on an 'ingress' packet (i.e. one retrieved
+ * by @ref netio_get_packet(), on which @ref netio_populate_buffer() or
+ * @ref netio_populate_prepend_buffer have not been called). Use of this
+ * function on an 'egress' packet will cause an assertion failure.
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return A pointer to the packet's standard metadata.
+ */
+static __inline netio_pkt_metadata_t*
+NETIO_PKT_METADATA(netio_pkt_t* pkt)
+{
+ netio_assert(!pkt->__packet.bits.__minimal);
+ return &pkt->__metadata;
+}
+
+
+/** Return a pointer to the minimal metadata for a packet.
+ * @ingroup egress
+ *
+ * Calling this function once and passing the result to other retrieval
+ * functions with a "_MM" suffix usually improves performance. This
+ * function must be called on an 'egress' packet (i.e. one on which
+ * @ref netio_populate_buffer() or @ref netio_populate_prepend_buffer()
+ * have been called, or one retrieved by @ref netio_get_buffer()). Use of
+ * this function on an 'ingress' packet will cause an assertion failure.
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return A pointer to the packet's standard metadata.
+ */
+static __inline netio_pkt_minimal_metadata_t*
+NETIO_PKT_MINIMAL_METADATA(netio_pkt_t* pkt)
+{
+ netio_assert(pkt->__packet.bits.__minimal);
+ return (netio_pkt_minimal_metadata_t*) &pkt->__metadata;
+}
+
+
+/** Determine whether a packet has 'minimal' metadata.
+ * @ingroup pktfuncs
+ *
+ * This function will return nonzero if the packet is an 'egress'
+ * packet (i.e. one on which @ref netio_populate_buffer() or
+ * @ref netio_populate_prepend_buffer() have been called, or one
+ * retrieved by @ref netio_get_buffer()), and zero if the packet
+ * is an 'ingress' packet (i.e. one retrieved by @ref netio_get_packet(),
+ * which has not been converted into an 'egress' packet).
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return Nonzero if the packet has minimal metadata.
+ */
+static __inline unsigned int
+NETIO_PKT_IS_MINIMAL(netio_pkt_t* pkt)
+{
+ return pkt->__packet.bits.__minimal;
+}
+
+
+/** Return a handle for a packet's storage.
+ * @ingroup pktfuncs
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return A handle for the packet's storage.
+ */
+static __inline netio_pkt_handle_t
+NETIO_PKT_HANDLE(netio_pkt_t* pkt)
+{
+ netio_pkt_handle_t h;
+ h.word = pkt->__packet.word;
+ return h;
+}
+
+
+/** A special reserved value indicating the absence of a packet handle.
+ *
+ * @ingroup pktfuncs
+ */
+#define NETIO_PKT_HANDLE_NONE ((netio_pkt_handle_t) { 0 })
+
+
+/** Test whether a packet handle is valid.
+ *
+ * Applications may wish to use the reserved value NETIO_PKT_HANDLE_NONE
+ * to indicate no packet at all. This function tests to see if a packet
+ * handle is a real handle, not this special reserved value.
+ *
+ * @ingroup pktfuncs
+ *
+ * @param[in] handle Handle on which to operate.
+ * @return One if the packet handle is valid, else zero.
+ */
+static __inline unsigned int
+NETIO_PKT_HANDLE_IS_VALID(netio_pkt_handle_t handle)
+{
+ return handle.word != 0;
+}
+
+
+
+/** Return a pointer to the start of the packet's custom header.
+ * A custom header may or may not be present, depending upon the IPP; its
+ * contents and alignment are also IPP-dependent. Currently, none of the
+ * standard IPPs supplied by Tilera produce a custom header. If present,
+ * the custom header precedes the L2 header in the packet buffer.
+ * @ingroup ingress
+ *
+ * @param[in] handle Handle on which to operate.
+ * @return A pointer to start of the packet.
+ */
+static __inline unsigned char*
+NETIO_PKT_CUSTOM_DATA_H(netio_pkt_handle_t handle)
+{
+ return _NETIO_PKT_HANDLE_BASE(handle) + NETIO_PACKET_PADDING;
+}
+
+
+/** Return the length of the packet's custom header.
+ * A custom header may or may not be present, depending upon the IPP; its
+ * contents and alignment are also IPP-dependent. Currently, none of the
+ * standard IPPs supplied by Tilera produce a custom header. If present,
+ * the custom header precedes the L2 header in the packet buffer.
+ *
+ * @ingroup ingress
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return The length of the packet's custom header, in bytes.
+ */
+static __inline netio_size_t
+NETIO_PKT_CUSTOM_HEADER_LENGTH_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+ /*
+ * Note that we effectively need to extract a quantity from the flags word
+ * which is measured in words, and then turn it into bytes by shifting
+ * it left by 2. We do this all at once by just shifting right two less
+ * bits, and shifting the mask up two bits.
+ */
+ return ((mda->__flags >> (_NETIO_PKT_CUSTOM_LEN_SHIFT - 2)) &
+ (_NETIO_PKT_CUSTOM_LEN_RMASK << 2));
+}
+
+
+/** Return the length of the packet, starting with the custom header.
+ * A custom header may or may not be present, depending upon the IPP; its
+ * contents and alignment are also IPP-dependent. Currently, none of the
+ * standard IPPs supplied by Tilera produce a custom header. If present,
+ * the custom header precedes the L2 header in the packet buffer.
+ * @ingroup ingress
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return The length of the packet, in bytes.
+ */
+static __inline netio_size_t
+NETIO_PKT_CUSTOM_LENGTH_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+ return (__NETIO_PKT_NOTIF_HEADER(pkt).bits.__transfer_size -
+ NETIO_PACKET_PADDING);
+}
+
+
+/** Return a pointer to the start of the packet's custom header.
+ * A custom header may or may not be present, depending upon the IPP; its
+ * contents and alignment are also IPP-dependent. Currently, none of the
+ * standard IPPs supplied by Tilera produce a custom header. If present,
+ * the custom header precedes the L2 header in the packet buffer.
+ * @ingroup ingress
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return A pointer to start of the packet.
+ */
+static __inline unsigned char*
+NETIO_PKT_CUSTOM_DATA_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+ return NETIO_PKT_CUSTOM_DATA_H(NETIO_PKT_HANDLE(pkt));
+}
+
+
+/** Return the length of the packet's L2 (Ethernet plus VLAN or SNAP) header.
+ * @ingroup ingress
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return The length of the packet's L2 header, in bytes.
+ */
+static __inline netio_size_t
+NETIO_PKT_L2_HEADER_LENGTH_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+ /*
+ * Note that we effectively need to extract a quantity from the flags word
+ * which is measured in words, and then turn it into bytes by shifting
+ * it left by 2. We do this all at once by just shifting right two less
+ * bits, and shifting the mask up two bits. We then add two bytes.
+ */
+ return ((mda->__flags >> (_NETIO_PKT_L2_LEN_SHIFT - 2)) &
+ (_NETIO_PKT_L2_LEN_RMASK << 2)) + 2;
+}
+
+
+/** Return the length of the packet, starting with the L2 (Ethernet) header.
+ * @ingroup ingress
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return The length of the packet, in bytes.
+ */
+static __inline netio_size_t
+NETIO_PKT_L2_LENGTH_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+ return (NETIO_PKT_CUSTOM_LENGTH_M(mda, pkt) -
+ NETIO_PKT_CUSTOM_HEADER_LENGTH_M(mda,pkt));
+}
+
+
+/** Return a pointer to the start of the packet's L2 (Ethernet) header.
+ * @ingroup ingress
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return A pointer to start of the packet.
+ */
+static __inline unsigned char*
+NETIO_PKT_L2_DATA_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+ return (NETIO_PKT_CUSTOM_DATA_M(mda, pkt) +
+ NETIO_PKT_CUSTOM_HEADER_LENGTH_M(mda, pkt));
+}
+
+
+/** Retrieve the length of the packet, starting with the L3 (generally,
+ * the IP) header.
+ * @ingroup ingress
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return Length of the packet's L3 header and data, in bytes.
+ */
+static __inline netio_size_t
+NETIO_PKT_L3_LENGTH_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+ return (NETIO_PKT_L2_LENGTH_M(mda, pkt) -
+ NETIO_PKT_L2_HEADER_LENGTH_M(mda,pkt));
+}
+
+
+/** Return a pointer to the packet's L3 (generally, the IP) header.
+ * @ingroup ingress
+ *
+ * Note that we guarantee word alignment of the L3 header.
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return A pointer to the packet's L3 header.
+ */
+static __inline unsigned char*
+NETIO_PKT_L3_DATA_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+ return (NETIO_PKT_L2_DATA_M(mda, pkt) +
+ NETIO_PKT_L2_HEADER_LENGTH_M(mda, pkt));
+}
+
+
+/** Return the ordinal of the packet.
+ * @ingroup ingress
+ *
+ * Each packet is given an ordinal number when it is delivered by the IPP.
+ * In the medium term, the ordinal is unique and monotonically increasing,
+ * being incremented by 1 for each packet; the ordinal of the first packet
+ * delivered after the IPP starts is zero. (Since the ordinal is of finite
+ * size, given enough input packets, it will eventually wrap around to zero;
+ * in the long term, therefore, ordinals are not unique.) The ordinals
+ * handed out by different IPPs are not disjoint, so two packets from
+ * different IPPs may have identical ordinals. Packets dropped by the
+ * IPP or by the I/O shim are not assigned ordinals.
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return The packet's per-IPP packet ordinal.
+ */
+static __inline unsigned int
+NETIO_PKT_ORDINAL_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+ return mda->__packet_ordinal;
+}
+
+
+/** Return the per-group ordinal of the packet.
+ * @ingroup ingress
+ *
+ * Each packet is given a per-group ordinal number when it is
+ * delivered by the IPP. By default, the group is the packet's VLAN,
+ * although IPP can be recompiled to use different values. In
+ * the medium term, the ordinal is unique and monotonically
+ * increasing, being incremented by 1 for each packet; the ordinal of
+ * the first packet distributed to a particular group is zero.
+ * (Since the ordinal is of finite size, given enough input packets,
+ * it will eventually wrap around to zero; in the long term,
+ * therefore, ordinals are not unique.) The ordinals handed out by
+ * different IPPs are not disjoint, so two packets from different IPPs
+ * may have identical ordinals; similarly, packets distributed to
+ * different groups may have identical ordinals. Packets dropped by
+ * the IPP or by the I/O shim are not assigned ordinals.
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return The packet's per-IPP, per-group ordinal.
+ */
+static __inline unsigned int
+NETIO_PKT_GROUP_ORDINAL_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+ return mda->__group_ordinal;
+}
+
+
+/** Return the VLAN ID assigned to the packet.
+ * @ingroup ingress
+ *
+ * This value is usually contained within the packet header.
+ *
+ * This value will be zero if the packet does not have a VLAN tag, or if
+ * this value was not extracted from the packet.
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return The packet's VLAN ID.
+ */
+static __inline unsigned short
+NETIO_PKT_VLAN_ID_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+ int vl = (mda->__flags >> _NETIO_PKT_VLAN_SHIFT) & _NETIO_PKT_VLAN_RMASK;
+ unsigned short* pkt_p;
+ int index;
+ unsigned short val;
+
+ if (vl == _NETIO_PKT_VLAN_NONE)
+ return 0;
+
+ pkt_p = (unsigned short*) NETIO_PKT_L2_DATA_M(mda, pkt);
+ index = (mda->__flags >> _NETIO_PKT_TYPE_SHIFT) & _NETIO_PKT_TYPE_RMASK;
+
+ val = pkt_p[(_netio_pkt_info[index] >> _NETIO_PKT_INFO_VLAN_SHIFT) &
+ _NETIO_PKT_INFO_VLAN_RMASK];
+
+#ifdef __TILECC__
+ return (__insn_bytex(val) >> 16) & 0xFFF;
+#else
+ return (__builtin_bswap32(val) >> 16) & 0xFFF;
+#endif
+}
+
+
+/** Return the ethertype of the packet.
+ * @ingroup ingress
+ *
+ * This value is usually contained within the packet header.
+ *
+ * This value is reliable if @ref NETIO_PKT_ETHERTYPE_RECOGNIZED_M()
+ * returns true, and otherwise, may not be well defined.
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return The packet's ethertype.
+ */
+static __inline unsigned short
+NETIO_PKT_ETHERTYPE_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+ unsigned short* pkt_p = (unsigned short*) NETIO_PKT_L2_DATA_M(mda, pkt);
+ int index = (mda->__flags >> _NETIO_PKT_TYPE_SHIFT) & _NETIO_PKT_TYPE_RMASK;
+
+ unsigned short val =
+ pkt_p[(_netio_pkt_info[index] >> _NETIO_PKT_INFO_ETYPE_SHIFT) &
+ _NETIO_PKT_INFO_ETYPE_RMASK];
+
+ return __builtin_bswap32(val) >> 16;
+}
+
+
+/** Return the flow hash computed on the packet.
+ * @ingroup ingress
+ *
+ * For TCP and UDP packets, this hash is calculated by hashing together
+ * the "5-tuple" values, specifically the source IP address, destination
+ * IP address, protocol type, source port and destination port.
+ * The hash value is intended to be helpful for millions of distinct
+ * flows.
+ *
+ * For IPv4 or IPv6 packets which are neither TCP nor UDP, the flow hash is
+ * derived by hashing together the source and destination IP addresses.
+ *
+ * For MPLS-encapsulated packets, the flow hash is derived by hashing
+ * the first MPLS label.
+ *
+ * For all other packets the flow hash is computed from the source
+ * and destination Ethernet addresses.
+ *
+ * The hash is symmetric, meaning it produces the same value if the
+ * source and destination are swapped. The only exceptions are
+ * tunneling protocols 0x04 (IP in IP Encapsulation), 0x29 (Simple
+ * Internet Protocol), 0x2F (General Routing Encapsulation) and 0x32
+ * (Encap Security Payload), which use only the destination address
+ * since the source address is not meaningful.
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return The packet's 32-bit flow hash.
+ */
+static __inline unsigned int
+NETIO_PKT_FLOW_HASH_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+ return mda->__flow_hash;
+}
+
+
+/** Return the first word of "user data" for the packet.
+ *
+ * The contents of the user data words depend on the IPP.
+ *
+ * When using the standard ipp1, ipp2, or ipp4 sub-drivers, the first
+ * word of user data contains the least significant bits of the 64-bit
+ * arrival cycle count (see @c get_cycle_count_low()).
+ *
+ * See the <em>System Programmer's Guide</em> for details.
+ *
+ * @ingroup ingress
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return The packet's first word of "user data".
+ */
+static __inline unsigned int
+NETIO_PKT_USER_DATA_0_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+ return mda->__user_data_0;
+}
+
+
+/** Return the second word of "user data" for the packet.
+ *
+ * The contents of the user data words depend on the IPP.
+ *
+ * When using the standard ipp1, ipp2, or ipp4 sub-drivers, the second
+ * word of user data contains the most significant bits of the 64-bit
+ * arrival cycle count (see @c get_cycle_count_high()).
+ *
+ * See the <em>System Programmer's Guide</em> for details.
+ *
+ * @ingroup ingress
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return The packet's second word of "user data".
+ */
+static __inline unsigned int
+NETIO_PKT_USER_DATA_1_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+ return mda->__user_data_1;
+}
+
+
+/** Determine whether the L4 (TCP/UDP) checksum was calculated.
+ * @ingroup ingress
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return Nonzero if the L4 checksum was calculated.
+ */
+static __inline unsigned int
+NETIO_PKT_L4_CSUM_CALCULATED_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+ return !(mda->__flags & _NETIO_PKT_NO_L4_CSUM_MASK);
+}
+
+
+/** Determine whether the L4 (TCP/UDP) checksum was calculated and found to
+ * be correct.
+ * @ingroup ingress
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return Nonzero if the checksum was calculated and is correct.
+ */
+static __inline unsigned int
+NETIO_PKT_L4_CSUM_CORRECT_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+ return !(mda->__flags &
+ (_NETIO_PKT_BAD_L4_CSUM_MASK | _NETIO_PKT_NO_L4_CSUM_MASK));
+}
+
+
+/** Determine whether the L3 (IP) checksum was calculated.
+ * @ingroup ingress
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return Nonzero if the L3 (IP) checksum was calculated.
+*/
+static __inline unsigned int
+NETIO_PKT_L3_CSUM_CALCULATED_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+ return !(mda->__flags & _NETIO_PKT_NO_L3_CSUM_MASK);
+}
+
+
+/** Determine whether the L3 (IP) checksum was calculated and found to be
+ * correct.
+ * @ingroup ingress
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return Nonzero if the checksum was calculated and is correct.
+ */
+static __inline unsigned int
+NETIO_PKT_L3_CSUM_CORRECT_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+ return !(mda->__flags &
+ (_NETIO_PKT_BAD_L3_CSUM_MASK | _NETIO_PKT_NO_L3_CSUM_MASK));
+}
+
+
+/** Determine whether the ethertype was recognized and L3 packet data was
+ * processed.
+ * @ingroup ingress
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return Nonzero if the ethertype was recognized and L3 packet data was
+ * processed.
+ */
+static __inline unsigned int
+NETIO_PKT_ETHERTYPE_RECOGNIZED_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+ return !(mda->__flags & _NETIO_PKT_TYPE_UNRECOGNIZED_MASK);
+}
+
+
+/** Retrieve the status of a packet and any errors that may have occurred
+ * during ingress processing (length mismatches, CRC errors, etc.).
+ * @ingroup ingress
+ *
+ * Note that packets for which @ref NETIO_PKT_ETHERTYPE_RECOGNIZED()
+ * returns zero are always reported as underlength, as there is no a priori
+ * means to determine their length. Normally, applications should use
+ * @ref NETIO_PKT_BAD_M() instead of explicitly checking status with this
+ * function.
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return The packet's status.
+ */
+static __inline netio_pkt_status_t
+NETIO_PKT_STATUS_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+ return (netio_pkt_status_t) __NETIO_PKT_NOTIF_HEADER(pkt).bits.__status;
+}
+
+
+/** Report whether a packet is bad (i.e., was shorter than expected based on
+ * its headers, or had a bad CRC).
+ * @ingroup ingress
+ *
+ * Note that this function does not verify L3 or L4 checksums.
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return Nonzero if the packet is bad and should be discarded.
+ */
+static __inline unsigned int
+NETIO_PKT_BAD_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+ return ((NETIO_PKT_STATUS_M(mda, pkt) & 1) &&
+ (NETIO_PKT_ETHERTYPE_RECOGNIZED_M(mda, pkt) ||
+ NETIO_PKT_STATUS_M(mda, pkt) == NETIO_PKT_STATUS_BAD));
+}
+
+
+/** Return the length of the packet, starting with the L2 (Ethernet) header.
+ * @ingroup egress
+ *
+ * @param[in] mmd Pointer to packet's minimal metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return The length of the packet, in bytes.
+ */
+static __inline netio_size_t
+NETIO_PKT_L2_LENGTH_MM(netio_pkt_minimal_metadata_t* mmd, netio_pkt_t* pkt)
+{
+ return mmd->l2_length;
+}
+
+
+/** Return the length of the L2 (Ethernet) header.
+ * @ingroup egress
+ *
+ * @param[in] mmd Pointer to packet's minimal metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return The length of the packet's L2 header, in bytes.
+ */
+static __inline netio_size_t
+NETIO_PKT_L2_HEADER_LENGTH_MM(netio_pkt_minimal_metadata_t* mmd,
+ netio_pkt_t* pkt)
+{
+ return mmd->l3_offset - mmd->l2_offset;
+}
+
+
+/** Return the length of the packet, starting with the L3 (IP) header.
+ * @ingroup egress
+ *
+ * @param[in] mmd Pointer to packet's minimal metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return Length of the packet's L3 header and data, in bytes.
+ */
+static __inline netio_size_t
+NETIO_PKT_L3_LENGTH_MM(netio_pkt_minimal_metadata_t* mmd, netio_pkt_t* pkt)
+{
+ return (NETIO_PKT_L2_LENGTH_MM(mmd, pkt) -
+ NETIO_PKT_L2_HEADER_LENGTH_MM(mmd, pkt));
+}
+
+
+/** Return a pointer to the packet's L3 (generally, the IP) header.
+ * @ingroup egress
+ *
+ * Note that we guarantee word alignment of the L3 header.
+ *
+ * @param[in] mmd Pointer to packet's minimal metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return A pointer to the packet's L3 header.
+ */
+static __inline unsigned char*
+NETIO_PKT_L3_DATA_MM(netio_pkt_minimal_metadata_t* mmd, netio_pkt_t* pkt)
+{
+ return _NETIO_PKT_BASE(pkt) + mmd->l3_offset;
+}
+
+
+/** Return a pointer to the packet's L2 (Ethernet) header.
+ * @ingroup egress
+ *
+ * @param[in] mmd Pointer to packet's minimal metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return A pointer to start of the packet.
+ */
+static __inline unsigned char*
+NETIO_PKT_L2_DATA_MM(netio_pkt_minimal_metadata_t* mmd, netio_pkt_t* pkt)
+{
+ return _NETIO_PKT_BASE(pkt) + mmd->l2_offset;
+}
+
+
+/** Retrieve the status of a packet and any errors that may have occurred
+ * during ingress processing (length mismatches, CRC errors, etc.).
+ * @ingroup ingress
+ *
+ * Note that packets for which @ref NETIO_PKT_ETHERTYPE_RECOGNIZED()
+ * returns zero are always reported as underlength, as there is no a priori
+ * means to determine their length. Normally, applications should use
+ * @ref NETIO_PKT_BAD() instead of explicitly checking status with this
+ * function.
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return The packet's status.
+ */
+static __inline netio_pkt_status_t
+NETIO_PKT_STATUS(netio_pkt_t* pkt)
+{
+ netio_assert(!pkt->__packet.bits.__minimal);
+
+ return (netio_pkt_status_t) __NETIO_PKT_NOTIF_HEADER(pkt).bits.__status;
+}
+
+
+/** Report whether a packet is bad (i.e., was shorter than expected based on
+ * its headers, or had a bad CRC).
+ * @ingroup ingress
+ *
+ * Note that this function does not verify L3 or L4 checksums.
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return Nonzero if the packet is bad and should be discarded.
+ */
+static __inline unsigned int
+NETIO_PKT_BAD(netio_pkt_t* pkt)
+{
+ netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+ return NETIO_PKT_BAD_M(mda, pkt);
+}
+
+
+/** Return the length of the packet's custom header.
+ * A custom header may or may not be present, depending upon the IPP; its
+ * contents and alignment are also IPP-dependent. Currently, none of the
+ * standard IPPs supplied by Tilera produce a custom header. If present,
+ * the custom header precedes the L2 header in the packet buffer.
+ * @ingroup pktfuncs
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return The length of the packet's custom header, in bytes.
+ */
+static __inline netio_size_t
+NETIO_PKT_CUSTOM_HEADER_LENGTH(netio_pkt_t* pkt)
+{
+ netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+ return NETIO_PKT_CUSTOM_HEADER_LENGTH_M(mda, pkt);
+}
+
+
+/** Return the length of the packet, starting with the custom header.
+ * A custom header may or may not be present, depending upon the IPP; its
+ * contents and alignment are also IPP-dependent. Currently, none of the
+ * standard IPPs supplied by Tilera produce a custom header. If present,
+ * the custom header precedes the L2 header in the packet buffer.
+ * @ingroup pktfuncs
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return The length of the packet, in bytes.
+ */
+static __inline netio_size_t
+NETIO_PKT_CUSTOM_LENGTH(netio_pkt_t* pkt)
+{
+ netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+ return NETIO_PKT_CUSTOM_LENGTH_M(mda, pkt);
+}
+
+
+/** Return a pointer to the packet's custom header.
+ * A custom header may or may not be present, depending upon the IPP; its
+ * contents and alignment are also IPP-dependent. Currently, none of the
+ * standard IPPs supplied by Tilera produce a custom header. If present,
+ * the custom header precedes the L2 header in the packet buffer.
+ * @ingroup pktfuncs
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return A pointer to start of the packet.
+ */
+static __inline unsigned char*
+NETIO_PKT_CUSTOM_DATA(netio_pkt_t* pkt)
+{
+ netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+ return NETIO_PKT_CUSTOM_DATA_M(mda, pkt);
+}
+
+
+/** Return the length of the packet's L2 (Ethernet plus VLAN or SNAP) header.
+ * @ingroup pktfuncs
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return The length of the packet's L2 header, in bytes.
+ */
+static __inline netio_size_t
+NETIO_PKT_L2_HEADER_LENGTH(netio_pkt_t* pkt)
+{
+ if (NETIO_PKT_IS_MINIMAL(pkt))
+ {
+ netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt);
+
+ return NETIO_PKT_L2_HEADER_LENGTH_MM(mmd, pkt);
+ }
+ else
+ {
+ netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+ return NETIO_PKT_L2_HEADER_LENGTH_M(mda, pkt);
+ }
+}
+
+
+/** Return the length of the packet, starting with the L2 (Ethernet) header.
+ * @ingroup pktfuncs
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return The length of the packet, in bytes.
+ */
+static __inline netio_size_t
+NETIO_PKT_L2_LENGTH(netio_pkt_t* pkt)
+{
+ if (NETIO_PKT_IS_MINIMAL(pkt))
+ {
+ netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt);
+
+ return NETIO_PKT_L2_LENGTH_MM(mmd, pkt);
+ }
+ else
+ {
+ netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+ return NETIO_PKT_L2_LENGTH_M(mda, pkt);
+ }
+}
+
+
+/** Return a pointer to the packet's L2 (Ethernet) header.
+ * @ingroup pktfuncs
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return A pointer to start of the packet.
+ */
+static __inline unsigned char*
+NETIO_PKT_L2_DATA(netio_pkt_t* pkt)
+{
+ if (NETIO_PKT_IS_MINIMAL(pkt))
+ {
+ netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt);
+
+ return NETIO_PKT_L2_DATA_MM(mmd, pkt);
+ }
+ else
+ {
+ netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+ return NETIO_PKT_L2_DATA_M(mda, pkt);
+ }
+}
+
+
+/** Retrieve the length of the packet, starting with the L3 (generally, the IP)
+ * header.
+ * @ingroup pktfuncs
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return Length of the packet's L3 header and data, in bytes.
+ */
+static __inline netio_size_t
+NETIO_PKT_L3_LENGTH(netio_pkt_t* pkt)
+{
+ if (NETIO_PKT_IS_MINIMAL(pkt))
+ {
+ netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt);
+
+ return NETIO_PKT_L3_LENGTH_MM(mmd, pkt);
+ }
+ else
+ {
+ netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+ return NETIO_PKT_L3_LENGTH_M(mda, pkt);
+ }
+}
+
+
+/** Return a pointer to the packet's L3 (generally, the IP) header.
+ * @ingroup pktfuncs
+ *
+ * Note that we guarantee word alignment of the L3 header.
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return A pointer to the packet's L3 header.
+ */
+static __inline unsigned char*
+NETIO_PKT_L3_DATA(netio_pkt_t* pkt)
+{
+ if (NETIO_PKT_IS_MINIMAL(pkt))
+ {
+ netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt);
+
+ return NETIO_PKT_L3_DATA_MM(mmd, pkt);
+ }
+ else
+ {
+ netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+ return NETIO_PKT_L3_DATA_M(mda, pkt);
+ }
+}
+
+
+/** Return the ordinal of the packet.
+ * @ingroup ingress
+ *
+ * Each packet is given an ordinal number when it is delivered by the IPP.
+ * In the medium term, the ordinal is unique and monotonically increasing,
+ * being incremented by 1 for each packet; the ordinal of the first packet
+ * delivered after the IPP starts is zero. (Since the ordinal is of finite
+ * size, given enough input packets, it will eventually wrap around to zero;
+ * in the long term, therefore, ordinals are not unique.) The ordinals
+ * handed out by different IPPs are not disjoint, so two packets from
+ * different IPPs may have identical ordinals. Packets dropped by the
+ * IPP or by the I/O shim are not assigned ordinals.
+ *
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return The packet's per-IPP packet ordinal.
+ */
+static __inline unsigned int
+NETIO_PKT_ORDINAL(netio_pkt_t* pkt)
+{
+ netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+ return NETIO_PKT_ORDINAL_M(mda, pkt);
+}
+
+
+/** Return the per-group ordinal of the packet.
+ * @ingroup ingress
+ *
+ * Each packet is given a per-group ordinal number when it is
+ * delivered by the IPP. By default, the group is the packet's VLAN,
+ * although IPP can be recompiled to use different values. In
+ * the medium term, the ordinal is unique and monotonically
+ * increasing, being incremented by 1 for each packet; the ordinal of
+ * the first packet distributed to a particular group is zero.
+ * (Since the ordinal is of finite size, given enough input packets,
+ * it will eventually wrap around to zero; in the long term,
+ * therefore, ordinals are not unique.) The ordinals handed out by
+ * different IPPs are not disjoint, so two packets from different IPPs
+ * may have identical ordinals; similarly, packets distributed to
+ * different groups may have identical ordinals. Packets dropped by
+ * the IPP or by the I/O shim are not assigned ordinals.
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return The packet's per-IPP, per-group ordinal.
+ */
+static __inline unsigned int
+NETIO_PKT_GROUP_ORDINAL(netio_pkt_t* pkt)
+{
+ netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+ return NETIO_PKT_GROUP_ORDINAL_M(mda, pkt);
+}
+
+
+/** Return the VLAN ID assigned to the packet.
+ * @ingroup ingress
+ *
+ * This is usually also contained within the packet header. If the packet
+ * does not have a VLAN tag, the VLAN ID returned by this function is zero.
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return The packet's VLAN ID.
+ */
+static __inline unsigned short
+NETIO_PKT_VLAN_ID(netio_pkt_t* pkt)
+{
+ netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+ return NETIO_PKT_VLAN_ID_M(mda, pkt);
+}
+
+
+/** Return the ethertype of the packet.
+ * @ingroup ingress
+ *
+ * This value is reliable if @ref NETIO_PKT_ETHERTYPE_RECOGNIZED()
+ * returns true, and otherwise, may not be well defined.
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return The packet's ethertype.
+ */
+static __inline unsigned short
+NETIO_PKT_ETHERTYPE(netio_pkt_t* pkt)
+{
+ netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+ return NETIO_PKT_ETHERTYPE_M(mda, pkt);
+}
+
+
+/** Return the flow hash computed on the packet.
+ * @ingroup ingress
+ *
+ * For TCP and UDP packets, this hash is calculated by hashing together
+ * the "5-tuple" values, specifically the source IP address, destination
+ * IP address, protocol type, source port and destination port.
+ * The hash value is intended to be helpful for millions of distinct
+ * flows.
+ *
+ * For IPv4 or IPv6 packets which are neither TCP nor UDP, the flow hash is
+ * derived by hashing together the source and destination IP addresses.
+ *
+ * For MPLS-encapsulated packets, the flow hash is derived by hashing
+ * the first MPLS label.
+ *
+ * For all other packets the flow hash is computed from the source
+ * and destination Ethernet addresses.
+ *
+ * The hash is symmetric, meaning it produces the same value if the
+ * source and destination are swapped. The only exceptions are
+ * tunneling protocols 0x04 (IP in IP Encapsulation), 0x29 (Simple
+ * Internet Protocol), 0x2F (General Routing Encapsulation) and 0x32
+ * (Encap Security Payload), which use only the destination address
+ * since the source address is not meaningful.
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return The packet's 32-bit flow hash.
+ */
+static __inline unsigned int
+NETIO_PKT_FLOW_HASH(netio_pkt_t* pkt)
+{
+ netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+ return NETIO_PKT_FLOW_HASH_M(mda, pkt);
+}
+
+
+/** Return the first word of "user data" for the packet.
+ *
+ * The contents of the user data words depend on the IPP.
+ *
+ * When using the standard ipp1, ipp2, or ipp4 sub-drivers, the first
+ * word of user data contains the least significant bits of the 64-bit
+ * arrival cycle count (see @c get_cycle_count_low()).
+ *
+ * See the <em>System Programmer's Guide</em> for details.
+ *
+ * @ingroup ingress
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return The packet's first word of "user data".
+ */
+static __inline unsigned int
+NETIO_PKT_USER_DATA_0(netio_pkt_t* pkt)
+{
+ netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+ return NETIO_PKT_USER_DATA_0_M(mda, pkt);
+}
+
+
+/** Return the second word of "user data" for the packet.
+ *
+ * The contents of the user data words depend on the IPP.
+ *
+ * When using the standard ipp1, ipp2, or ipp4 sub-drivers, the second
+ * word of user data contains the most significant bits of the 64-bit
+ * arrival cycle count (see @c get_cycle_count_high()).
+ *
+ * See the <em>System Programmer's Guide</em> for details.
+ *
+ * @ingroup ingress
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return The packet's second word of "user data".
+ */
+static __inline unsigned int
+NETIO_PKT_USER_DATA_1(netio_pkt_t* pkt)
+{
+ netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+ return NETIO_PKT_USER_DATA_1_M(mda, pkt);
+}
+
+
+/** Determine whether the L4 (TCP/UDP) checksum was calculated.
+ * @ingroup ingress
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return Nonzero if the L4 checksum was calculated.
+ */
+static __inline unsigned int
+NETIO_PKT_L4_CSUM_CALCULATED(netio_pkt_t* pkt)
+{
+ netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+ return NETIO_PKT_L4_CSUM_CALCULATED_M(mda, pkt);
+}
+
+
+/** Determine whether the L4 (TCP/UDP) checksum was calculated and found to
+ * be correct.
+ * @ingroup ingress
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return Nonzero if the checksum was calculated and is correct.
+ */
+static __inline unsigned int
+NETIO_PKT_L4_CSUM_CORRECT(netio_pkt_t* pkt)
+{
+ netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+ return NETIO_PKT_L4_CSUM_CORRECT_M(mda, pkt);
+}
+
+
+/** Determine whether the L3 (IP) checksum was calculated.
+ * @ingroup ingress
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return Nonzero if the L3 (IP) checksum was calculated.
+*/
+static __inline unsigned int
+NETIO_PKT_L3_CSUM_CALCULATED(netio_pkt_t* pkt)
+{
+ netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+ return NETIO_PKT_L3_CSUM_CALCULATED_M(mda, pkt);
+}
+
+
+/** Determine whether the L3 (IP) checksum was calculated and found to be
+ * correct.
+ * @ingroup ingress
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return Nonzero if the checksum was calculated and is correct.
+ */
+static __inline unsigned int
+NETIO_PKT_L3_CSUM_CORRECT(netio_pkt_t* pkt)
+{
+ netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+ return NETIO_PKT_L3_CSUM_CORRECT_M(mda, pkt);
+}
+
+
+/** Determine whether the Ethertype was recognized and L3 packet data was
+ * processed.
+ * @ingroup ingress
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return Nonzero if the Ethertype was recognized and L3 packet data was
+ * processed.
+ */
+static __inline unsigned int
+NETIO_PKT_ETHERTYPE_RECOGNIZED(netio_pkt_t* pkt)
+{
+ netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+ return NETIO_PKT_ETHERTYPE_RECOGNIZED_M(mda, pkt);
+}
+
+
+/** Set an egress packet's L2 length, using a metadata pointer to speed the
+ * computation.
+ * @ingroup egress
+ *
+ * @param[in,out] mmd Pointer to packet's minimal metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @param[in] len Packet L2 length, in bytes.
+ */
+static __inline void
+NETIO_PKT_SET_L2_LENGTH_MM(netio_pkt_minimal_metadata_t* mmd, netio_pkt_t* pkt,
+ int len)
+{
+ mmd->l2_length = len;
+}
+
+
+/** Set an egress packet's L2 length.
+ * @ingroup egress
+ *
+ * @param[in,out] pkt Packet on which to operate.
+ * @param[in] len Packet L2 length, in bytes.
+ */
+static __inline void
+NETIO_PKT_SET_L2_LENGTH(netio_pkt_t* pkt, int len)
+{
+ netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt);
+
+ NETIO_PKT_SET_L2_LENGTH_MM(mmd, pkt, len);
+}
+
+
+/** Set an egress packet's L2 header length, using a metadata pointer to
+ * speed the computation.
+ * @ingroup egress
+ *
+ * It is not normally necessary to call this routine; only the L2 length,
+ * not the header length, is needed to transmit a packet. It may be useful if
+ * the egress packet will later be processed by code which expects to use
+ * functions like @ref NETIO_PKT_L3_DATA() to get a pointer to the L3 payload.
+ *
+ * @param[in,out] mmd Pointer to packet's minimal metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @param[in] len Packet L2 header length, in bytes.
+ */
+static __inline void
+NETIO_PKT_SET_L2_HEADER_LENGTH_MM(netio_pkt_minimal_metadata_t* mmd,
+ netio_pkt_t* pkt, int len)
+{
+ mmd->l3_offset = mmd->l2_offset + len;
+}
+
+
+/** Set an egress packet's L2 header length.
+ * @ingroup egress
+ *
+ * It is not normally necessary to call this routine; only the L2 length,
+ * not the header length, is needed to transmit a packet. It may be useful if
+ * the egress packet will later be processed by code which expects to use
+ * functions like @ref NETIO_PKT_L3_DATA() to get a pointer to the L3 payload.
+ *
+ * @param[in,out] pkt Packet on which to operate.
+ * @param[in] len Packet L2 header length, in bytes.
+ */
+static __inline void
+NETIO_PKT_SET_L2_HEADER_LENGTH(netio_pkt_t* pkt, int len)
+{
+ netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt);
+
+ NETIO_PKT_SET_L2_HEADER_LENGTH_MM(mmd, pkt, len);
+}
+
+
+/** Set up an egress packet for hardware checksum computation, using a
+ * metadata pointer to speed the operation.
+ * @ingroup egress
+ *
+ * NetIO provides the ability to automatically calculate a standard
+ * 16-bit Internet checksum on transmitted packets. The application
+ * may specify the point in the packet where the checksum starts, the
+ * number of bytes to be checksummed, and the two bytes in the packet
+ * which will be replaced with the completed checksum. (If the range
+ * of bytes to be checksummed includes the bytes to be replaced, the
+ * initial values of those bytes will be included in the checksum.)
+ *
+ * For some protocols, the packet checksum covers data which is not present
+ * in the packet, or is at least not contiguous to the main data payload.
+ * For instance, the TCP checksum includes a "pseudo-header" which includes
+ * the source and destination IP addresses of the packet. To accommodate
+ * this, the checksum engine may be "seeded" with an initial value, which
+ * the application would need to compute based on the specific protocol's
+ * requirements. Note that the seed is given in host byte order (little-
+ * endian), not network byte order (big-endian); code written to compute a
+ * pseudo-header checksum in network byte order will need to byte-swap it
+ * before use as the seed.
+ *
+ * Note that the checksum is computed as part of the transmission process,
+ * so it will not be present in the packet upon completion of this routine.
+ *
+ * @param[in,out] mmd Pointer to packet's minimal metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @param[in] start Offset within L2 packet of the first byte to include in
+ * the checksum.
+ * @param[in] length Number of bytes to include in the checksum.
+ * the checksum.
+ * @param[in] location Offset within L2 packet of the first of the two bytes
+ * to be replaced with the calculated checksum.
+ * @param[in] seed Initial value of the running checksum before any of the
+ * packet data is added.
+ */
+static __inline void
+NETIO_PKT_DO_EGRESS_CSUM_MM(netio_pkt_minimal_metadata_t* mmd,
+ netio_pkt_t* pkt, int start, int length,
+ int location, uint16_t seed)
+{
+ mmd->csum_start = start;
+ mmd->csum_length = length;
+ mmd->csum_location = location;
+ mmd->csum_seed = seed;
+ mmd->flags |= _NETIO_PKT_NEED_EDMA_CSUM_MASK;
+}
+
+
+/** Set up an egress packet for hardware checksum computation.
+ * @ingroup egress
+ *
+ * NetIO provides the ability to automatically calculate a standard
+ * 16-bit Internet checksum on transmitted packets. The application
+ * may specify the point in the packet where the checksum starts, the
+ * number of bytes to be checksummed, and the two bytes in the packet
+ * which will be replaced with the completed checksum. (If the range
+ * of bytes to be checksummed includes the bytes to be replaced, the
+ * initial values of those bytes will be included in the checksum.)
+ *
+ * For some protocols, the packet checksum covers data which is not present
+ * in the packet, or is at least not contiguous to the main data payload.
+ * For instance, the TCP checksum includes a "pseudo-header" which includes
+ * the source and destination IP addresses of the packet. To accommodate
+ * this, the checksum engine may be "seeded" with an initial value, which
+ * the application would need to compute based on the specific protocol's
+ * requirements. Note that the seed is given in host byte order (little-
+ * endian), not network byte order (big-endian); code written to compute a
+ * pseudo-header checksum in network byte order will need to byte-swap it
+ * before use as the seed.
+ *
+ * Note that the checksum is computed as part of the transmission process,
+ * so it will not be present in the packet upon completion of this routine.
+ *
+ * @param[in,out] pkt Packet on which to operate.
+ * @param[in] start Offset within L2 packet of the first byte to include in
+ * the checksum.
+ * @param[in] length Number of bytes to include in the checksum.
+ * the checksum.
+ * @param[in] location Offset within L2 packet of the first of the two bytes
+ * to be replaced with the calculated checksum.
+ * @param[in] seed Initial value of the running checksum before any of the
+ * packet data is added.
+ */
+static __inline void
+NETIO_PKT_DO_EGRESS_CSUM(netio_pkt_t* pkt, int start, int length,
+ int location, uint16_t seed)
+{
+ netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt);
+
+ NETIO_PKT_DO_EGRESS_CSUM_MM(mmd, pkt, start, length, location, seed);
+}
+
+
+/** Return the number of bytes which could be prepended to a packet, using a
+ * metadata pointer to speed the operation.
+ * See @ref netio_populate_prepend_buffer() to get a full description of
+ * prepending.
+ *
+ * @param[in,out] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ */
+static __inline int
+NETIO_PKT_PREPEND_AVAIL_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+ return (pkt->__packet.bits.__offset << 6) +
+ NETIO_PKT_CUSTOM_HEADER_LENGTH_M(mda, pkt);
+}
+
+
+/** Return the number of bytes which could be prepended to a packet, using a
+ * metadata pointer to speed the operation.
+ * See @ref netio_populate_prepend_buffer() to get a full description of
+ * prepending.
+ * @ingroup egress
+ *
+ * @param[in,out] mmd Pointer to packet's minimal metadata.
+ * @param[in] pkt Packet on which to operate.
+ */
+static __inline int
+NETIO_PKT_PREPEND_AVAIL_MM(netio_pkt_minimal_metadata_t* mmd, netio_pkt_t* pkt)
+{
+ return (pkt->__packet.bits.__offset << 6) + mmd->l2_offset;
+}
+
+
+/** Return the number of bytes which could be prepended to a packet.
+ * See @ref netio_populate_prepend_buffer() to get a full description of
+ * prepending.
+ * @ingroup egress
+ *
+ * @param[in] pkt Packet on which to operate.
+ */
+static __inline int
+NETIO_PKT_PREPEND_AVAIL(netio_pkt_t* pkt)
+{
+ if (NETIO_PKT_IS_MINIMAL(pkt))
+ {
+ netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt);
+
+ return NETIO_PKT_PREPEND_AVAIL_MM(mmd, pkt);
+ }
+ else
+ {
+ netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+ return NETIO_PKT_PREPEND_AVAIL_M(mda, pkt);
+ }
+}
+
+
+/** Flush a packet's minimal metadata from the cache, using a metadata pointer
+ * to speed the operation.
+ * @ingroup egress
+ *
+ * @param[in] mmd Pointer to packet's minimal metadata.
+ * @param[in] pkt Packet on which to operate.
+ */
+static __inline void
+NETIO_PKT_FLUSH_MINIMAL_METADATA_MM(netio_pkt_minimal_metadata_t* mmd,
+ netio_pkt_t* pkt)
+{
+}
+
+
+/** Invalidate a packet's minimal metadata from the cache, using a metadata
+ * pointer to speed the operation.
+ * @ingroup egress
+ *
+ * @param[in] mmd Pointer to packet's minimal metadata.
+ * @param[in] pkt Packet on which to operate.
+ */
+static __inline void
+NETIO_PKT_INV_MINIMAL_METADATA_MM(netio_pkt_minimal_metadata_t* mmd,
+ netio_pkt_t* pkt)
+{
+}
+
+
+/** Flush and then invalidate a packet's minimal metadata from the cache,
+ * using a metadata pointer to speed the operation.
+ * @ingroup egress
+ *
+ * @param[in] mmd Pointer to packet's minimal metadata.
+ * @param[in] pkt Packet on which to operate.
+ */
+static __inline void
+NETIO_PKT_FLUSH_INV_MINIMAL_METADATA_MM(netio_pkt_minimal_metadata_t* mmd,
+ netio_pkt_t* pkt)
+{
+}
+
+
+/** Flush a packet's metadata from the cache, using a metadata pointer
+ * to speed the operation.
+ * @ingroup ingress
+ *
+ * @param[in] mda Pointer to packet's minimal metadata.
+ * @param[in] pkt Packet on which to operate.
+ */
+static __inline void
+NETIO_PKT_FLUSH_METADATA_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+}
+
+
+/** Invalidate a packet's metadata from the cache, using a metadata
+ * pointer to speed the operation.
+ * @ingroup ingress
+ *
+ * @param[in] mda Pointer to packet's metadata.
+ * @param[in] pkt Packet on which to operate.
+ */
+static __inline void
+NETIO_PKT_INV_METADATA_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+}
+
+
+/** Flush and then invalidate a packet's metadata from the cache,
+ * using a metadata pointer to speed the operation.
+ * @ingroup ingress
+ *
+ * @param[in] mda Pointer to packet's metadata.
+ * @param[in] pkt Packet on which to operate.
+ */
+static __inline void
+NETIO_PKT_FLUSH_INV_METADATA_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+}
+
+
+/** Flush a packet's minimal metadata from the cache.
+ * @ingroup egress
+ *
+ * @param[in] pkt Packet on which to operate.
+ */
+static __inline void
+NETIO_PKT_FLUSH_MINIMAL_METADATA(netio_pkt_t* pkt)
+{
+}
+
+
+/** Invalidate a packet's minimal metadata from the cache.
+ * @ingroup egress
+ *
+ * @param[in] pkt Packet on which to operate.
+ */
+static __inline void
+NETIO_PKT_INV_MINIMAL_METADATA(netio_pkt_t* pkt)
+{
+}
+
+
+/** Flush and then invalidate a packet's minimal metadata from the cache.
+ * @ingroup egress
+ *
+ * @param[in] pkt Packet on which to operate.
+ */
+static __inline void
+NETIO_PKT_FLUSH_INV_MINIMAL_METADATA(netio_pkt_t* pkt)
+{
+}
+
+
+/** Flush a packet's metadata from the cache.
+ * @ingroup ingress
+ *
+ * @param[in] pkt Packet on which to operate.
+ */
+static __inline void
+NETIO_PKT_FLUSH_METADATA(netio_pkt_t* pkt)
+{
+}
+
+
+/** Invalidate a packet's metadata from the cache.
+ * @ingroup ingress
+ *
+ * @param[in] pkt Packet on which to operate.
+ */
+static __inline void
+NETIO_PKT_INV_METADATA(netio_pkt_t* pkt)
+{
+}
+
+
+/** Flush and then invalidate a packet's metadata from the cache.
+ * @ingroup ingress
+ *
+ * @param[in] pkt Packet on which to operate.
+ */
+static __inline void
+NETIO_PKT_FLUSH_INV_METADATA(netio_pkt_t* pkt)
+{
+}
+
+/** Number of NUMA nodes we can distribute buffers to.
+ * @ingroup setup */
+#define NETIO_NUM_NODE_WEIGHTS 16
+
+/**
+ * @brief An object for specifying the characteristics of NetIO communication
+ * endpoint.
+ *
+ * @ingroup setup
+ *
+ * The @ref netio_input_register() function uses this structure to define
+ * how an application tile will communicate with an IPP.
+ *
+ *
+ * Future updates to NetIO may add new members to this structure,
+ * which can affect the success of the registration operation. Thus,
+ * if dynamically initializing the structure, applications are urged to
+ * zero it out first, for example:
+ *
+ * @code
+ * netio_input_config_t config;
+ * memset(&config, 0, sizeof (config));
+ * config.flags = NETIO_RECV | NETIO_XMIT_CSUM | NETIO_TAG_NONE;
+ * config.num_receive_packets = NETIO_MAX_RECEIVE_PKTS;
+ * config.queue_id = 0;
+ * .
+ * .
+ * .
+ * @endcode
+ *
+ * since that guarantees that any unused structure members, including
+ * members which did not exist when the application was first developed,
+ * will not have unexpected values.
+ *
+ * If statically initializing the structure, we strongly recommend use of
+ * C99-style named initializers, for example:
+ *
+ * @code
+ * netio_input_config_t config = {
+ * .flags = NETIO_RECV | NETIO_XMIT_CSUM | NETIO_TAG_NONE,
+ * .num_receive_packets = NETIO_MAX_RECEIVE_PKTS,
+ * .queue_id = 0,
+ * },
+ * @endcode
+ *
+ * instead of the old-style structure initialization:
+ *
+ * @code
+ * // Bad example! Currently equivalent to the above, but don't do this.
+ * netio_input_config_t config = {
+ * NETIO_RECV | NETIO_XMIT_CSUM | NETIO_TAG_NONE, NETIO_MAX_RECEIVE_PKTS, 0
+ * },
+ * @endcode
+ *
+ * since the C99 style requires no changes to the code if elements of the
+ * config structure are rearranged. (It also makes the initialization much
+ * easier to understand.)
+ *
+ * Except for items which address a particular tile's transmit or receive
+ * characteristics, such as the ::NETIO_RECV flag, applications are advised
+ * to specify the same set of configuration data on all registrations.
+ * This prevents differing results if multiple tiles happen to do their
+ * registration operations in a different order on different invocations of
+ * the application. This is particularly important for things like link
+ * management flags, and buffer size and homing specifications.
+ *
+ * Unless the ::NETIO_FIXED_BUFFER_VA flag is specified in flags, the NetIO
+ * buffer pool is automatically created and mapped into the application's
+ * virtual address space at an address chosen by the operating system,
+ * using the common memory (cmem) facility in the Tilera Multicore
+ * Components library. The cmem facility allows multiple processes to gain
+ * access to shared memory which is mapped into each process at an
+ * identical virtual address. In order for this to work, the processes
+ * must have a common ancestor, which must create the common memory using
+ * tmc_cmem_init().
+ *
+ * In programs using the iLib process creation API, or in programs which use
+ * only one process (which include programs using the pthreads library),
+ * tmc_cmem_init() is called automatically. All other applications
+ * must call it explicitly, before any child processes which might call
+ * netio_input_register() are created.
+ */
+typedef struct
+{
+ /** Registration characteristics.
+
+ This value determines several characteristics of the registration;
+ flags for different types of behavior are ORed together to make the
+ final flag value. Generally applications should specify exactly
+ one flag from each of the following categories:
+
+ - Whether the application will be receiving packets on this queue
+ (::NETIO_RECV or ::NETIO_NO_RECV).
+
+ - Whether the application will be transmitting packets on this queue,
+ and if so, whether it will request egress checksum calculation
+ (::NETIO_XMIT, ::NETIO_XMIT_CSUM, or ::NETIO_NO_XMIT). It is
+ legal to call netio_get_buffer() without one of the XMIT flags,
+ as long as ::NETIO_RECV is specified; in this case, the retrieved
+ buffers must be passed to another tile for transmission.
+
+ - Whether the application expects any vendor-specific tags in
+ its packets' L2 headers (::NETIO_TAG_NONE, ::NETIO_TAG_BRCM,
+ or ::NETIO_TAG_MRVL). This must match the configuration of the
+ target IPP.
+
+ To accommodate applications written to previous versions of the NetIO
+ interface, none of the flags above are currently required; if omitted,
+ NetIO behaves more or less as if ::NETIO_RECV | ::NETIO_XMIT_CSUM |
+ ::NETIO_TAG_NONE were used. However, explicit specification of
+ the relevant flags allows NetIO to do a better job of resource
+ allocation, allows earlier detection of certain configuration errors,
+ and may enable advanced features or higher performance in the future,
+ so their use is strongly recommended.
+
+ Note that specifying ::NETIO_NO_RECV along with ::NETIO_NO_XMIT
+ is a special case, intended primarily for use by programs which
+ retrieve network statistics or do link management operations.
+ When these flags are both specified, the resulting queue may not
+ be used with NetIO routines other than netio_get(), netio_set(),
+ and netio_input_unregister(). See @ref link for more information
+ on link management.
+
+ Other flags are optional; their use is described below.
+ */
+ int flags;
+
+ /** Interface name. This is a string which identifies the specific
+ Ethernet controller hardware to be used. The format of the string
+ is a device type and a device index, separated by a slash; so,
+ the first 10 Gigabit Ethernet controller is named "xgbe/0", while
+ the second 10/100/1000 Megabit Ethernet controller is named "gbe/1".
+ */
+ const char* interface;
+
+ /** Receive packet queue size. This specifies the maximum number
+ of ingress packets that can be received on this queue without
+ being retrieved by @ref netio_get_packet(). If the IPP's distribution
+ algorithm calls for a packet to be sent to this queue, and this
+ number of packets are already pending there, the new packet
+ will either be discarded, or sent to another tile registered
+ for the same queue_id (see @ref drops). This value must
+ be at least ::NETIO_MIN_RECEIVE_PKTS, can always be at least
+ ::NETIO_MAX_RECEIVE_PKTS, and may be larger than that on certain
+ interfaces.
+ */
+ int num_receive_packets;
+
+ /** The queue ID being requested. Legal values for this range from 0
+ to ::NETIO_MAX_QUEUE_ID, inclusive. ::NETIO_MAX_QUEUE_ID is always
+ greater than or equal to the number of tiles; this allows one queue
+ for each tile, plus at least one additional queue. Some applications
+ may wish to use the additional queue as a destination for unwanted
+ packets, since packets delivered to queues for which no tiles have
+ registered are discarded.
+ */
+ unsigned int queue_id;
+
+ /** Maximum number of small send buffers to be held in the local empty
+ buffer cache. This specifies the size of the area which holds
+ empty small egress buffers requested from the IPP but not yet
+ retrieved via @ref netio_get_buffer(). This value must be greater
+ than zero if the application will ever use @ref netio_get_buffer()
+ to allocate empty small egress buffers; it may be no larger than
+ ::NETIO_MAX_SEND_BUFFERS. See @ref epp for more details on empty
+ buffer caching.
+ */
+ int num_send_buffers_small_total;
+
+ /** Number of small send buffers to be preallocated at registration.
+ If this value is nonzero, the specified number of empty small egress
+ buffers will be requested from the IPP during the netio_input_register
+ operation; this may speed the execution of @ref netio_get_buffer().
+ This may be no larger than @ref num_send_buffers_small_total. See @ref
+ epp for more details on empty buffer caching.
+ */
+ int num_send_buffers_small_prealloc;
+
+ /** Maximum number of large send buffers to be held in the local empty
+ buffer cache. This specifies the size of the area which holds empty
+ large egress buffers requested from the IPP but not yet retrieved via
+ @ref netio_get_buffer(). This value must be greater than zero if the
+ application will ever use @ref netio_get_buffer() to allocate empty
+ large egress buffers; it may be no larger than ::NETIO_MAX_SEND_BUFFERS.
+ See @ref epp for more details on empty buffer caching.
+ */
+ int num_send_buffers_large_total;
+
+ /** Number of large send buffers to be preallocated at registration.
+ If this value is nonzero, the specified number of empty large egress
+ buffers will be requested from the IPP during the netio_input_register
+ operation; this may speed the execution of @ref netio_get_buffer().
+ This may be no larger than @ref num_send_buffers_large_total. See @ref
+ epp for more details on empty buffer caching.
+ */
+ int num_send_buffers_large_prealloc;
+
+ /** Maximum number of jumbo send buffers to be held in the local empty
+ buffer cache. This specifies the size of the area which holds empty
+ jumbo egress buffers requested from the IPP but not yet retrieved via
+ @ref netio_get_buffer(). This value must be greater than zero if the
+ application will ever use @ref netio_get_buffer() to allocate empty
+ jumbo egress buffers; it may be no larger than ::NETIO_MAX_SEND_BUFFERS.
+ See @ref epp for more details on empty buffer caching.
+ */
+ int num_send_buffers_jumbo_total;
+
+ /** Number of jumbo send buffers to be preallocated at registration.
+ If this value is nonzero, the specified number of empty jumbo egress
+ buffers will be requested from the IPP during the netio_input_register
+ operation; this may speed the execution of @ref netio_get_buffer().
+ This may be no larger than @ref num_send_buffers_jumbo_total. See @ref
+ epp for more details on empty buffer caching.
+ */
+ int num_send_buffers_jumbo_prealloc;
+
+ /** Total packet buffer size. This determines the total size, in bytes,
+ of the NetIO buffer pool. Note that the maximum number of available
+ buffers of each size is determined during hypervisor configuration
+ (see the <em>System Programmer's Guide</em> for details); this just
+ influences how much host memory is allocated for those buffers.
+
+ The buffer pool is allocated from common memory, which will be
+ automatically initialized if needed. If your buffer pool is larger
+ than 240 MB, you might need to explicitly call @c tmc_cmem_init(),
+ as described in the Application Libraries Reference Manual (UG227).
+
+ Packet buffers are currently allocated in chunks of 16 MB; this
+ value will be rounded up to the next larger multiple of 16 MB.
+ If this value is zero, a default of 32 MB will be used; this was
+ the value used by previous versions of NetIO. Note that taking this
+ default also affects the placement of buffers on Linux NUMA nodes.
+ See @ref buffer_node_weights for an explanation of buffer placement.
+
+ In order to successfully allocate packet buffers, Linux must have
+ available huge pages on the relevant Linux NUMA nodes. See the
+ <em>System Programmer's Guide</em> for information on configuring
+ huge page support in Linux.
+ */
+ uint64_t total_buffer_size;
+
+ /** Buffer placement weighting factors.
+
+ This array specifies the relative amount of buffering to place
+ on each of the available Linux NUMA nodes. This array is
+ indexed by the NUMA node, and the values in the array are
+ proportional to the amount of buffer space to allocate on that
+ node.
+
+ If memory striping is enabled in the Hypervisor, then there is
+ only one logical NUMA node (node 0). In that case, NetIO will by
+ default ignore the suggested buffer node weights, and buffers
+ will be striped across the physical memory controllers. See
+ UG209 System Programmer's Guide for a description of the
+ hypervisor option that controls memory striping.
+
+ If memory striping is disabled, then there are up to four NUMA
+ nodes, corresponding to the four DDRAM controllers in the TILE
+ processor architecture. See UG100 Tile Processor Architecture
+ Overview for a diagram showing the location of each of the DDRAM
+ controllers relative to the tile array.
+
+ For instance, if memory striping is disabled, the following
+ configuration strucure:
+
+ @code
+ netio_input_config_t config = {
+ .
+ .
+ .
+ .total_buffer_size = 4 * 16 * 1024 * 1024;
+ .buffer_node_weights = { 1, 0, 1, 0 },
+ },
+ @endcode
+
+ would result in 32 MB of buffers being placed on controller 0, and
+ 32 MB on controller 2. (Since buffers are allocated in units of
+ 16 MB, some sets of weights will not be able to be matched exactly.)
+
+ For the weights to be effective, @ref total_buffer_size must be
+ nonzero. If @ref total_buffer_size is zero, causing the default
+ 32 MB of buffer space to be used, then any specified weights will
+ be ignored, and buffers will positioned as they were in previous
+ versions of NetIO:
+
+ - For xgbe/0 and gbe/0, 16 MB of buffers will be placed on controller 1,
+ and the other 16 MB will be placed on controller 2.
+
+ - For xgbe/1 and gbe/1, 16 MB of buffers will be placed on controller 2,
+ and the other 16 MB will be placed on controller 3.
+
+ If @ref total_buffer_size is nonzero, but all weights are zero,
+ then all buffer space will be allocated on Linux NUMA node zero.
+
+ By default, the specified buffer placement is treated as a hint;
+ if sufficient free memory is not available on the specified
+ controllers, the buffers will be allocated elsewhere. However,
+ if the ::NETIO_STRICT_HOMING flag is specified in @ref flags, then a
+ failure to allocate buffer space exactly as requested will cause the
+ registration operation to fail with an error of ::NETIO_CANNOT_HOME.
+
+ Note that maximal network performance cannot be achieved with
+ only one memory controller.
+ */
+ uint8_t buffer_node_weights[NETIO_NUM_NODE_WEIGHTS];
+
+ /** Fixed virtual address for packet buffers. Only valid when
+ ::NETIO_FIXED_BUFFER_VA is specified in @ref flags; see the
+ description of that flag for details.
+ */
+ void* fixed_buffer_va;
+
+ /**
+ Maximum number of outstanding send packet requests. This value is
+ only relevant when an EPP is in use; it determines the number of
+ slots in the EPP's outgoing packet queue which this tile is allowed
+ to consume, and thus the number of packets which may be sent before
+ the sending tile must wait for an acknowledgment from the EPP.
+ Modifying this value is generally only helpful when using @ref
+ netio_send_packet_vector(), where it can help improve performance by
+ allowing a single vector send operation to process more packets.
+ Typically it is not specified, and the default, which divides the
+ outgoing packet slots evenly between all tiles on the chip, is used.
+
+ If a registration asks for more outgoing packet queue slots than are
+ available, ::NETIO_TOOMANY_XMIT will be returned. The total number
+ of packet queue slots which are available for all tiles for each EPP
+ is subject to change, but is currently ::NETIO_TOTAL_SENDS_OUTSTANDING.
+
+
+ This value is ignored if ::NETIO_XMIT is not specified in flags.
+ If you want to specify a large value here for a specific tile, you are
+ advised to specify NETIO_NO_XMIT on other, non-transmitting tiles so
+ that they do not consume a default number of packet slots. Any tile
+ transmitting is required to have at least ::NETIO_MIN_SENDS_OUTSTANDING
+ slots allocated to it; values less than that will be silently
+ increased by the NetIO library.
+ */
+ int num_sends_outstanding;
+}
+netio_input_config_t;
+
+
+/** Registration flags; used in the @ref netio_input_config_t structure.
+ * @addtogroup setup
+ */
+/** @{ */
+
+/** Fail a registration request if we can't put packet buffers
+ on the specified memory controllers. */
+#define NETIO_STRICT_HOMING 0x00000002
+
+/** This application expects no tags on its L2 headers. */
+#define NETIO_TAG_NONE 0x00000004
+
+/** This application expects Marvell extended tags on its L2 headers. */
+#define NETIO_TAG_MRVL 0x00000008
+
+/** This application expects Broadcom tags on its L2 headers. */
+#define NETIO_TAG_BRCM 0x00000010
+
+/** This registration may call routines which receive packets. */
+#define NETIO_RECV 0x00000020
+
+/** This registration may not call routines which receive packets. */
+#define NETIO_NO_RECV 0x00000040
+
+/** This registration may call routines which transmit packets. */
+#define NETIO_XMIT 0x00000080
+
+/** This registration may call routines which transmit packets with
+ checksum acceleration. */
+#define NETIO_XMIT_CSUM 0x00000100
+
+/** This registration may not call routines which transmit packets. */
+#define NETIO_NO_XMIT 0x00000200
+
+/** This registration wants NetIO buffers mapped at an application-specified
+ virtual address.
+
+ NetIO buffers are by default created by the TMC common memory facility,
+ which must be configured by a common ancestor of all processes sharing
+ a network interface. When this flag is specified, NetIO buffers are
+ instead mapped at an address chosen by the application (and specified
+ in @ref netio_input_config_t::fixed_buffer_va). This allows multiple
+ unrelated but cooperating processes to share a NetIO interface.
+ All processes sharing the same interface must specify this flag,
+ and all must specify the same fixed virtual address.
+
+ @ref netio_input_config_t::fixed_buffer_va must be a
+ multiple of 16 MB, and the packet buffers will occupy @ref
+ netio_input_config_t::total_buffer_size bytes of virtual address
+ space, beginning at that address. If any of those virtual addresses
+ are currently occupied by other memory objects, like application or
+ shared library code or data, @ref netio_input_register() will return
+ ::NETIO_FAULT. While it is impossible to provide a fixed_buffer_va
+ which will work for all applications, a good first guess might be to
+ use 0xb0000000 minus @ref netio_input_config_t::total_buffer_size.
+ If that fails, it might be helpful to consult the running application's
+ virtual address description file (/proc/<em>pid</em>/maps) to see
+ which regions of virtual address space are available.
+ */
+#define NETIO_FIXED_BUFFER_VA 0x00000400
+
+/** This registration call will not complete unless the network link
+ is up. The process will wait several seconds for this to happen (the
+ precise interval is link-dependent), but if the link does not come up,
+ ::NETIO_LINK_DOWN will be returned. This flag is the default if
+ ::NETIO_NOREQUIRE_LINK_UP is not specified. Note that this flag by
+ itself does not request that the link be brought up; that can be done
+ with the ::NETIO_AUTO_LINK_UPDN or ::NETIO_AUTO_LINK_UP flags (the
+ latter is the default if no NETIO_AUTO_LINK_xxx flags are specified),
+ or by explicitly setting the link's desired state via netio_set().
+ If the link is not brought up by one of those methods, and this flag
+ is specified, the registration operation will return ::NETIO_LINK_DOWN.
+ This flag is ignored if it is specified along with ::NETIO_NO_XMIT and
+ ::NETIO_NO_RECV. See @ref link for more information on link
+ management.
+ */
+#define NETIO_REQUIRE_LINK_UP 0x00000800
+
+/** This registration call will complete even if the network link is not up.
+ Whenever the link is not up, packets will not be sent or received:
+ netio_get_packet() will return ::NETIO_NOPKT once all queued packets
+ have been drained, and netio_send_packet() and similar routines will
+ return NETIO_QUEUE_FULL once the outgoing packet queue in the EPP
+ or the I/O shim is full. See @ref link for more information on link
+ management.
+ */
+#define NETIO_NOREQUIRE_LINK_UP 0x00001000
+
+#ifndef __DOXYGEN__
+/*
+ * These are part of the implementation of the NETIO_AUTO_LINK_xxx flags,
+ * but should not be used directly by applications, and are thus not
+ * documented.
+ */
+#define _NETIO_AUTO_UP 0x00002000
+#define _NETIO_AUTO_DN 0x00004000
+#define _NETIO_AUTO_PRESENT 0x00008000
+#endif
+
+/** Set the desired state of the link to up, allowing any speeds which are
+ supported by the link hardware, as part of this registration operation.
+ Do not take down the link automatically. This is the default if
+ no other NETIO_AUTO_LINK_xxx flags are specified. This flag is ignored
+ if it is specified along with ::NETIO_NO_XMIT and ::NETIO_NO_RECV.
+ See @ref link for more information on link management.
+ */
+#define NETIO_AUTO_LINK_UP (_NETIO_AUTO_PRESENT | _NETIO_AUTO_UP)
+
+/** Set the desired state of the link to up, allowing any speeds which are
+ supported by the link hardware, as part of this registration operation.
+ Set the desired state of the link to down the next time no tiles are
+ registered for packet reception or transmission. This flag is ignored
+ if it is specified along with ::NETIO_NO_XMIT and ::NETIO_NO_RECV.
+ See @ref link for more information on link management.
+ */
+#define NETIO_AUTO_LINK_UPDN (_NETIO_AUTO_PRESENT | _NETIO_AUTO_UP | \
+ _NETIO_AUTO_DN)
+
+/** Set the desired state of the link to down the next time no tiles are
+ registered for packet reception or transmission. This flag is ignored
+ if it is specified along with ::NETIO_NO_XMIT and ::NETIO_NO_RECV.
+ See @ref link for more information on link management.
+ */
+#define NETIO_AUTO_LINK_DN (_NETIO_AUTO_PRESENT | _NETIO_AUTO_DN)
+
+/** Do not bring up the link automatically as part of this registration
+ operation. Do not take down the link automatically. This flag
+ is ignored if it is specified along with ::NETIO_NO_XMIT and
+ ::NETIO_NO_RECV. See @ref link for more information on link management.
+ */
+#define NETIO_AUTO_LINK_NONE _NETIO_AUTO_PRESENT
+
+
+/** Minimum number of receive packets. */
+#define NETIO_MIN_RECEIVE_PKTS 16
+
+/** Lower bound on the maximum number of receive packets; may be higher
+ than this on some interfaces. */
+#define NETIO_MAX_RECEIVE_PKTS 128
+
+/** Maximum number of send buffers, per packet size. */
+#define NETIO_MAX_SEND_BUFFERS 16
+
+/** Number of EPP queue slots, and thus outstanding sends, per EPP. */
+#define NETIO_TOTAL_SENDS_OUTSTANDING 2015
+
+/** Minimum number of EPP queue slots, and thus outstanding sends, per
+ * transmitting tile. */
+#define NETIO_MIN_SENDS_OUTSTANDING 16
+
+
+/**@}*/
+
+#ifndef __DOXYGEN__
+
+/**
+ * An object for providing Ethernet packets to a process.
+ */
+struct __netio_queue_impl_t;
+
+/**
+ * An object for managing the user end of a NetIO queue.
+ */
+struct __netio_queue_user_impl_t;
+
+#endif /* !__DOXYGEN__ */
+
+
+/** A netio_queue_t describes a NetIO communications endpoint.
+ * @ingroup setup
+ */
+typedef struct
+{
+#ifdef __DOXYGEN__
+ uint8_t opaque[8]; /**< This is an opaque structure. */
+#else
+ struct __netio_queue_impl_t* __system_part; /**< The system part. */
+ struct __netio_queue_user_impl_t* __user_part; /**< The user part. */
+#ifdef _NETIO_PTHREAD
+ _netio_percpu_mutex_t lock; /**< Queue lock. */
+#endif
+#endif
+}
+netio_queue_t;
+
+
+/**
+ * @brief Packet send context.
+ *
+ * @ingroup egress
+ *
+ * Packet send context for use with netio_send_packet_prepare and _commit.
+ */
+typedef struct
+{
+#ifdef __DOXYGEN__
+ uint8_t opaque[44]; /**< This is an opaque structure. */
+#else
+ uint8_t flags; /**< Defined below */
+ uint8_t datalen; /**< Number of valid words pointed to by data. */
+ uint32_t request[9]; /**< Request to be sent to the EPP or shim. Note
+ that this is smaller than the 11-word maximum
+ request size, since some constant values are
+ not saved in the context. */
+ uint32_t *data; /**< Data to be sent to the EPP or shim via IDN. */
+#endif
+}
+netio_send_pkt_context_t;
+
+
+#ifndef __DOXYGEN__
+#define SEND_PKT_CTX_USE_EPP 1 /**< We're sending to an EPP. */
+#define SEND_PKT_CTX_SEND_CSUM 2 /**< Request includes a checksum. */
+#endif
+
+/**
+ * @brief Packet vector entry.
+ *
+ * @ingroup egress
+ *
+ * This data structure is used with netio_send_packet_vector() to send multiple
+ * packets with one NetIO call. The structure should be initialized by
+ * calling netio_pkt_vector_set(), rather than by setting the fields
+ * directly.
+ *
+ * This structure is guaranteed to be a power of two in size, no
+ * bigger than one L2 cache line, and to be aligned modulo its size.
+ */
+typedef struct
+#ifndef __DOXYGEN__
+__attribute__((aligned(8)))
+#endif
+{
+ /** Reserved for use by the user application. When initialized with
+ * the netio_set_pkt_vector_entry() function, this field is guaranteed
+ * to be visible to readers only after all other fields are already
+ * visible. This way it can be used as a valid flag or generation
+ * counter. */
+ uint8_t user_data;
+
+ /* Structure members below this point should not be accessed directly by
+ * applications, as they may change in the future. */
+
+ /** Low 8 bits of the packet address to send. The high bits are
+ * acquired from the 'handle' field. */
+ uint8_t buffer_address_low;
+
+ /** Number of bytes to transmit. */
+ uint16_t size;
+
+ /** The raw handle from a netio_pkt_t. If this is NETIO_PKT_HANDLE_NONE,
+ * this vector entry will be skipped and no packet will be transmitted. */
+ netio_pkt_handle_t handle;
+}
+netio_pkt_vector_entry_t;
+
+
+/**
+ * @brief Initialize fields in a packet vector entry.
+ *
+ * @ingroup egress
+ *
+ * @param[out] v Pointer to the vector entry to be initialized.
+ * @param[in] pkt Packet to be transmitted when the vector entry is passed to
+ * netio_send_packet_vector(). Note that the packet's attributes
+ * (e.g., its L2 offset and length) are captured at the time this
+ * routine is called; subsequent changes in those attributes will not
+ * be reflected in the packet which is actually transmitted.
+ * Changes in the packet's contents, however, will be so reflected.
+ * If this is NULL, no packet will be transmitted.
+ * @param[in] user_data User data to be set in the vector entry.
+ * This function guarantees that the "user_data" field will become
+ * visible to a reader only after all other fields have become visible.
+ * This allows a structure in a ring buffer to be written and read
+ * by a polling reader without any locks or other synchronization.
+ */
+static __inline void
+netio_pkt_vector_set(volatile netio_pkt_vector_entry_t* v, netio_pkt_t* pkt,
+ uint8_t user_data)
+{
+ if (pkt)
+ {
+ if (NETIO_PKT_IS_MINIMAL(pkt))
+ {
+ netio_pkt_minimal_metadata_t* mmd =
+ (netio_pkt_minimal_metadata_t*) &pkt->__metadata;
+ v->buffer_address_low = (uintptr_t) NETIO_PKT_L2_DATA_MM(mmd, pkt) & 0xFF;
+ v->size = NETIO_PKT_L2_LENGTH_MM(mmd, pkt);
+ }
+ else
+ {
+ netio_pkt_metadata_t* mda = &pkt->__metadata;
+ v->buffer_address_low = (uintptr_t) NETIO_PKT_L2_DATA_M(mda, pkt) & 0xFF;
+ v->size = NETIO_PKT_L2_LENGTH_M(mda, pkt);
+ }
+ v->handle.word = pkt->__packet.word;
+ }
+ else
+ {
+ v->handle.word = 0; /* Set handle to NETIO_PKT_HANDLE_NONE. */
+ }
+
+ __asm__("" : : : "memory");
+
+ v->user_data = user_data;
+}
+
+
+/**
+ * Flags and structures for @ref netio_get() and @ref netio_set().
+ * @ingroup config
+ */
+
+/** @{ */
+/** Parameter class; addr is a NETIO_PARAM_xxx value. */
+#define NETIO_PARAM 0
+/** Interface MAC address. This address is only valid with @ref netio_get().
+ * The value is a 6-byte MAC address. Depending upon the overall system
+ * design, a MAC address may or may not be available for each interface. */
+#define NETIO_PARAM_MAC 0
+
+/** Determine whether to suspend output on the receipt of pause frames.
+ * If the value is nonzero, the I/O shim will suspend output when a pause
+ * frame is received. If the value is zero, pause frames will be ignored. */
+#define NETIO_PARAM_PAUSE_IN 1
+
+/** Determine whether to send pause frames if the I/O shim packet FIFOs are
+ * nearly full. If the value is zero, pause frames are not sent. If
+ * the value is nonzero, it is the delay value which will be sent in any
+ * pause frames which are output, in units of 512 bit times. */
+#define NETIO_PARAM_PAUSE_OUT 2
+
+/** Jumbo frame support. The value is a 4-byte integer. If the value is
+ * nonzero, the MAC will accept frames of up to 10240 bytes. If the value
+ * is zero, the MAC will only accept frames of up to 1544 bytes. */
+#define NETIO_PARAM_JUMBO 3
+
+/** I/O shim's overflow statistics register. The value is two 16-bit integers.
+ * The first 16-bit value (or the low 16 bits, if the value is treated as a
+ * 32-bit number) is the count of packets which were completely dropped and
+ * not delivered by the shim. The second 16-bit value (or the high 16 bits,
+ * if the value is treated as a 32-bit number) is the count of packets
+ * which were truncated and thus only partially delivered by the shim. This
+ * register is automatically reset to zero after it has been read.
+ */
+#define NETIO_PARAM_OVERFLOW 4
+
+/** IPP statistics. This address is only valid with @ref netio_get(). The
+ * value is a netio_stat_t structure. Unlike the I/O shim statistics, the
+ * IPP statistics are not all reset to zero on read; see the description
+ * of the netio_stat_t for details. */
+#define NETIO_PARAM_STAT 5
+
+/** Possible link state. The value is a combination of "NETIO_LINK_xxx"
+ * flags. With @ref netio_get(), this will indicate which flags are
+ * actually supported by the hardware.
+ *
+ * For historical reasons, specifying this value to netio_set() will have
+ * the same behavior as using ::NETIO_PARAM_LINK_CONFIG, but this usage is
+ * discouraged.
+ */
+#define NETIO_PARAM_LINK_POSSIBLE_STATE 6
+
+/** Link configuration. The value is a combination of "NETIO_LINK_xxx" flags.
+ * With @ref netio_set(), this will attempt to immediately bring up the
+ * link using whichever of the requested flags are supported by the
+ * hardware, or take down the link if the flags are zero; if this is
+ * not possible, an error will be returned. Many programs will want
+ * to use ::NETIO_PARAM_LINK_DESIRED_STATE instead.
+ *
+ * For historical reasons, specifying this value to netio_get() will
+ * have the same behavior as using ::NETIO_PARAM_LINK_POSSIBLE_STATE,
+ * but this usage is discouraged.
+ */
+#define NETIO_PARAM_LINK_CONFIG NETIO_PARAM_LINK_POSSIBLE_STATE
+
+/** Current link state. This address is only valid with @ref netio_get().
+ * The value is zero or more of the "NETIO_LINK_xxx" flags, ORed together.
+ * If the link is down, the value ANDed with NETIO_LINK_SPEED will be
+ * zero; if the link is up, the value ANDed with NETIO_LINK_SPEED will
+ * result in exactly one of the NETIO_LINK_xxx values, indicating the
+ * current speed. */
+#define NETIO_PARAM_LINK_CURRENT_STATE 7
+
+/** Variant symbol for current state, retained for compatibility with
+ * pre-MDE-2.1 programs. */
+#define NETIO_PARAM_LINK_STATUS NETIO_PARAM_LINK_CURRENT_STATE
+
+/** Packet Coherence protocol. This address is only valid with @ref netio_get().
+ * The value is nonzero if the interface is configured for cache-coherent DMA.
+ */
+#define NETIO_PARAM_COHERENT 8
+
+/** Desired link state. The value is a conbination of "NETIO_LINK_xxx"
+ * flags, which specify the desired state for the link. With @ref
+ * netio_set(), this will, in the background, attempt to bring up the link
+ * using whichever of the requested flags are reasonable, or take down the
+ * link if the flags are zero. The actual link up or down operation may
+ * happen after this call completes. If the link state changes in the
+ * future, the system will continue to try to get back to the desired link
+ * state; for instance, if the link is brought up successfully, and then
+ * the network cable is disconnected, the link will go down. However, the
+ * desired state of the link is still up, so if the cable is reconnected,
+ * the link will be brought up again.
+ *
+ * With @ref netio_get(), this will indicate the desired state for the
+ * link, as set with a previous netio_set() call, or implicitly by a
+ * netio_input_register() or netio_input_unregister() operation. This may
+ * not reflect the current state of the link; to get that, use
+ * ::NETIO_PARAM_LINK_CURRENT_STATE. */
+#define NETIO_PARAM_LINK_DESIRED_STATE 9
+
+/** NetIO statistics structure. Retrieved using the ::NETIO_PARAM_STAT
+ * address passed to @ref netio_get(). */
+typedef struct
+{
+ /** Number of packets which have been received by the IPP and forwarded
+ * to a tile's receive queue for processing. This value wraps at its
+ * maximum, and is not cleared upon read. */
+ uint32_t packets_received;
+
+ /** Number of packets which have been dropped by the IPP, because they could
+ * not be received, or could not be forwarded to a tile. The former happens
+ * when the IPP does not have a free packet buffer of suitable size for an
+ * incoming frame. The latter happens when all potential destination tiles
+ * for a packet, as defined by the group, bucket, and queue configuration,
+ * have full receive queues. This value wraps at its maximum, and is not
+ * cleared upon read. */
+ uint32_t packets_dropped;
+
+ /*
+ * Note: the #defines after each of the following four one-byte values
+ * denote their location within the third word of the netio_stat_t. They
+ * are intended for use only by the IPP implementation and are thus omitted
+ * from the Doxygen output.
+ */
+
+ /** Number of packets dropped because no worker was able to accept a new
+ * packet. This value saturates at its maximum, and is cleared upon
+ * read. */
+ uint8_t drops_no_worker;
+#ifndef __DOXYGEN__
+#define NETIO_STAT_DROPS_NO_WORKER 0
+#endif
+
+ /** Number of packets dropped because no small buffers were available.
+ * This value saturates at its maximum, and is cleared upon read. */
+ uint8_t drops_no_smallbuf;
+#ifndef __DOXYGEN__
+#define NETIO_STAT_DROPS_NO_SMALLBUF 1
+#endif
+
+ /** Number of packets dropped because no large buffers were available.
+ * This value saturates at its maximum, and is cleared upon read. */
+ uint8_t drops_no_largebuf;
+#ifndef __DOXYGEN__
+#define NETIO_STAT_DROPS_NO_LARGEBUF 2
+#endif
+
+ /** Number of packets dropped because no jumbo buffers were available.
+ * This value saturates at its maximum, and is cleared upon read. */
+ uint8_t drops_no_jumbobuf;
+#ifndef __DOXYGEN__
+#define NETIO_STAT_DROPS_NO_JUMBOBUF 3
+#endif
+}
+netio_stat_t;
+
+
+/** Link can run, should run, or is running at 10 Mbps. */
+#define NETIO_LINK_10M 0x01
+
+/** Link can run, should run, or is running at 100 Mbps. */
+#define NETIO_LINK_100M 0x02
+
+/** Link can run, should run, or is running at 1 Gbps. */
+#define NETIO_LINK_1G 0x04
+
+/** Link can run, should run, or is running at 10 Gbps. */
+#define NETIO_LINK_10G 0x08
+
+/** Link should run at the highest speed supported by the link and by
+ * the device connected to the link. Only usable as a value for
+ * the link's desired state; never returned as a value for the current
+ * or possible states. */
+#define NETIO_LINK_ANYSPEED 0x10
+
+/** All legal link speeds. */
+#define NETIO_LINK_SPEED (NETIO_LINK_10M | \
+ NETIO_LINK_100M | \
+ NETIO_LINK_1G | \
+ NETIO_LINK_10G | \
+ NETIO_LINK_ANYSPEED)
+
+
+/** MAC register class. Addr is a register offset within the MAC.
+ * Registers within the XGbE and GbE MACs are documented in the Tile
+ * Processor I/O Device Guide (UG104). MAC registers start at address
+ * 0x4000, and do not include the MAC_INTERFACE registers. */
+#define NETIO_MAC 1
+
+/** MDIO register class (IEEE 802.3 clause 22 format). Addr is the "addr"
+ * member of a netio_mdio_addr_t structure. */
+#define NETIO_MDIO 2
+
+/** MDIO register class (IEEE 802.3 clause 45 format). Addr is the "addr"
+ * member of a netio_mdio_addr_t structure. */
+#define NETIO_MDIO_CLAUSE45 3
+
+/** NetIO MDIO address type. Retrieved or provided using the ::NETIO_MDIO
+ * address passed to @ref netio_get() or @ref netio_set(). */
+typedef union
+{
+ struct
+ {
+ unsigned int reg:16; /**< MDIO register offset. For clause 22 access,
+ must be less than 32. */
+ unsigned int phy:5; /**< Which MDIO PHY to access. */
+ unsigned int dev:5; /**< Which MDIO device to access within that PHY.
+ Applicable for clause 45 access only; ignored
+ for clause 22 access. */
+ }
+ bits; /**< Container for bitfields. */
+ uint64_t addr; /**< Value to pass to @ref netio_get() or
+ * @ref netio_set(). */
+}
+netio_mdio_addr_t;
+
+/** @} */
+
+#endif /* __NETIO_INTF_H__ */
diff --git a/arch/tile/include/hv/syscall_public.h b/arch/tile/include/hv/syscall_public.h
new file mode 100644
index 00000000..9cc0837e
--- /dev/null
+++ b/arch/tile/include/hv/syscall_public.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+/**
+ * @file syscall.h
+ * Indices for the hypervisor system calls that are intended to be called
+ * directly, rather than only through hypervisor-generated "glue" code.
+ */
+
+#ifndef _SYS_HV_INCLUDE_SYSCALL_PUBLIC_H
+#define _SYS_HV_INCLUDE_SYSCALL_PUBLIC_H
+
+/** Fast syscall flag bit location. When this bit is set, the hypervisor
+ * handles the syscall specially.
+ */
+#define HV_SYS_FAST_SHIFT 14
+
+/** Fast syscall flag bit mask. */
+#define HV_SYS_FAST_MASK (1 << HV_SYS_FAST_SHIFT)
+
+/** Bit location for flagging fast syscalls that can be called from PL0. */
+#define HV_SYS_FAST_PLO_SHIFT 13
+
+/** Fast syscall allowing PL0 bit mask. */
+#define HV_SYS_FAST_PL0_MASK (1 << HV_SYS_FAST_PLO_SHIFT)
+
+/** Perform an MF that waits for all victims to reach DRAM. */
+#define HV_SYS_fence_incoherent (51 | HV_SYS_FAST_MASK \
+ | HV_SYS_FAST_PL0_MASK)
+
+#endif /* !_SYS_HV_INCLUDE_SYSCALL_PUBLIC_H */
diff --git a/arch/tile/kernel/Makefile b/arch/tile/kernel/Makefile
new file mode 100644
index 00000000..b4dbc057
--- /dev/null
+++ b/arch/tile/kernel/Makefile
@@ -0,0 +1,18 @@
+#
+# Makefile for the Linux/TILE kernel.
+#
+
+extra-y := vmlinux.lds head_$(BITS).o
+obj-y := backtrace.o entry.o init_task.o irq.o messaging.o \
+ pci-dma.o proc.o process.o ptrace.o reboot.o \
+ setup.o signal.o single_step.o stack.o sys.o sysfs.o time.o traps.o \
+ intvec_$(BITS).o regs_$(BITS).o tile-desc_$(BITS).o
+
+obj-$(CONFIG_HARDWALL) += hardwall.o
+obj-$(CONFIG_TILEGX) += futex_64.o
+obj-$(CONFIG_COMPAT) += compat.o compat_signal.o
+obj-$(CONFIG_SMP) += smpboot.o smp.o tlb.o
+obj-$(CONFIG_MODULES) += module.o
+obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
+obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o
+obj-$(CONFIG_PCI) += pci.o
diff --git a/arch/tile/kernel/asm-offsets.c b/arch/tile/kernel/asm-offsets.c
new file mode 100644
index 00000000..01ddf19c
--- /dev/null
+++ b/arch/tile/kernel/asm-offsets.c
@@ -0,0 +1,76 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ *
+ * Generates definitions from c-type structures used by assembly sources.
+ */
+
+#include <linux/kbuild.h>
+#include <linux/thread_info.h>
+#include <linux/sched.h>
+#include <linux/hardirq.h>
+#include <linux/ptrace.h>
+#include <hv/hypervisor.h>
+
+/* Check for compatible compiler early in the build. */
+#ifdef CONFIG_TILEGX
+# ifndef __tilegx__
+# error Can only build TILE-Gx configurations with tilegx compiler
+# endif
+# ifndef __LP64__
+# error Must not specify -m32 when building the TILE-Gx kernel
+# endif
+#else
+# ifdef __tilegx__
+# error Can not build TILEPro/TILE64 configurations with tilegx compiler
+# endif
+#endif
+
+void foo(void)
+{
+ DEFINE(SINGLESTEP_STATE_BUFFER_OFFSET, \
+ offsetof(struct single_step_state, buffer));
+ DEFINE(SINGLESTEP_STATE_FLAGS_OFFSET, \
+ offsetof(struct single_step_state, flags));
+ DEFINE(SINGLESTEP_STATE_ORIG_PC_OFFSET, \
+ offsetof(struct single_step_state, orig_pc));
+ DEFINE(SINGLESTEP_STATE_NEXT_PC_OFFSET, \
+ offsetof(struct single_step_state, next_pc));
+ DEFINE(SINGLESTEP_STATE_BRANCH_NEXT_PC_OFFSET, \
+ offsetof(struct single_step_state, branch_next_pc));
+ DEFINE(SINGLESTEP_STATE_UPDATE_VALUE_OFFSET, \
+ offsetof(struct single_step_state, update_value));
+
+ DEFINE(THREAD_INFO_TASK_OFFSET, \
+ offsetof(struct thread_info, task));
+ DEFINE(THREAD_INFO_FLAGS_OFFSET, \
+ offsetof(struct thread_info, flags));
+ DEFINE(THREAD_INFO_STATUS_OFFSET, \
+ offsetof(struct thread_info, status));
+ DEFINE(THREAD_INFO_HOMECACHE_CPU_OFFSET, \
+ offsetof(struct thread_info, homecache_cpu));
+ DEFINE(THREAD_INFO_STEP_STATE_OFFSET, \
+ offsetof(struct thread_info, step_state));
+
+ DEFINE(TASK_STRUCT_THREAD_KSP_OFFSET,
+ offsetof(struct task_struct, thread.ksp));
+ DEFINE(TASK_STRUCT_THREAD_PC_OFFSET,
+ offsetof(struct task_struct, thread.pc));
+
+ DEFINE(HV_TOPOLOGY_WIDTH_OFFSET, \
+ offsetof(HV_Topology, width));
+ DEFINE(HV_TOPOLOGY_HEIGHT_OFFSET, \
+ offsetof(HV_Topology, height));
+
+ DEFINE(IRQ_CPUSTAT_SYSCALL_COUNT_OFFSET, \
+ offsetof(irq_cpustat_t, irq_syscall_count));
+}
diff --git a/arch/tile/kernel/backtrace.c b/arch/tile/kernel/backtrace.c
new file mode 100644
index 00000000..9092ce8a
--- /dev/null
+++ b/arch/tile/kernel/backtrace.c
@@ -0,0 +1,678 @@
+/*
+ * Copyright 2011 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <asm/backtrace.h>
+#include <asm/tile-desc.h>
+#include <arch/abi.h>
+
+#ifdef __tilegx__
+#define TILE_MAX_INSTRUCTIONS_PER_BUNDLE TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE
+#define tile_decoded_instruction tilegx_decoded_instruction
+#define tile_mnemonic tilegx_mnemonic
+#define parse_insn_tile parse_insn_tilegx
+#define TILE_OPC_IRET TILEGX_OPC_IRET
+#define TILE_OPC_ADDI TILEGX_OPC_ADDI
+#define TILE_OPC_ADDLI TILEGX_OPC_ADDLI
+#define TILE_OPC_INFO TILEGX_OPC_INFO
+#define TILE_OPC_INFOL TILEGX_OPC_INFOL
+#define TILE_OPC_JRP TILEGX_OPC_JRP
+#define TILE_OPC_MOVE TILEGX_OPC_MOVE
+#define OPCODE_STORE TILEGX_OPC_ST
+typedef long long bt_int_reg_t;
+#else
+#define TILE_MAX_INSTRUCTIONS_PER_BUNDLE TILEPRO_MAX_INSTRUCTIONS_PER_BUNDLE
+#define tile_decoded_instruction tilepro_decoded_instruction
+#define tile_mnemonic tilepro_mnemonic
+#define parse_insn_tile parse_insn_tilepro
+#define TILE_OPC_IRET TILEPRO_OPC_IRET
+#define TILE_OPC_ADDI TILEPRO_OPC_ADDI
+#define TILE_OPC_ADDLI TILEPRO_OPC_ADDLI
+#define TILE_OPC_INFO TILEPRO_OPC_INFO
+#define TILE_OPC_INFOL TILEPRO_OPC_INFOL
+#define TILE_OPC_JRP TILEPRO_OPC_JRP
+#define TILE_OPC_MOVE TILEPRO_OPC_MOVE
+#define OPCODE_STORE TILEPRO_OPC_SW
+typedef int bt_int_reg_t;
+#endif
+
+/* A decoded bundle used for backtracer analysis. */
+struct BacktraceBundle {
+ tile_bundle_bits bits;
+ int num_insns;
+ struct tile_decoded_instruction
+ insns[TILE_MAX_INSTRUCTIONS_PER_BUNDLE];
+};
+
+
+/* Locates an instruction inside the given bundle that
+ * has the specified mnemonic, and whose first 'num_operands_to_match'
+ * operands exactly match those in 'operand_values'.
+ */
+static const struct tile_decoded_instruction *find_matching_insn(
+ const struct BacktraceBundle *bundle,
+ tile_mnemonic mnemonic,
+ const int *operand_values,
+ int num_operands_to_match)
+{
+ int i, j;
+ bool match;
+
+ for (i = 0; i < bundle->num_insns; i++) {
+ const struct tile_decoded_instruction *insn =
+ &bundle->insns[i];
+
+ if (insn->opcode->mnemonic != mnemonic)
+ continue;
+
+ match = true;
+ for (j = 0; j < num_operands_to_match; j++) {
+ if (operand_values[j] != insn->operand_values[j]) {
+ match = false;
+ break;
+ }
+ }
+
+ if (match)
+ return insn;
+ }
+
+ return NULL;
+}
+
+/* Does this bundle contain an 'iret' instruction? */
+static inline bool bt_has_iret(const struct BacktraceBundle *bundle)
+{
+ return find_matching_insn(bundle, TILE_OPC_IRET, NULL, 0) != NULL;
+}
+
+/* Does this bundle contain an 'addi sp, sp, OFFSET' or
+ * 'addli sp, sp, OFFSET' instruction, and if so, what is OFFSET?
+ */
+static bool bt_has_addi_sp(const struct BacktraceBundle *bundle, int *adjust)
+{
+ static const int vals[2] = { TREG_SP, TREG_SP };
+
+ const struct tile_decoded_instruction *insn =
+ find_matching_insn(bundle, TILE_OPC_ADDI, vals, 2);
+ if (insn == NULL)
+ insn = find_matching_insn(bundle, TILE_OPC_ADDLI, vals, 2);
+#ifdef __tilegx__
+ if (insn == NULL)
+ insn = find_matching_insn(bundle, TILEGX_OPC_ADDXLI, vals, 2);
+ if (insn == NULL)
+ insn = find_matching_insn(bundle, TILEGX_OPC_ADDXI, vals, 2);
+#endif
+ if (insn == NULL)
+ return false;
+
+ *adjust = insn->operand_values[2];
+ return true;
+}
+
+/* Does this bundle contain any 'info OP' or 'infol OP'
+ * instruction, and if so, what are their OP? Note that OP is interpreted
+ * as an unsigned value by this code since that's what the caller wants.
+ * Returns the number of info ops found.
+ */
+static int bt_get_info_ops(const struct BacktraceBundle *bundle,
+ int operands[MAX_INFO_OPS_PER_BUNDLE])
+{
+ int num_ops = 0;
+ int i;
+
+ for (i = 0; i < bundle->num_insns; i++) {
+ const struct tile_decoded_instruction *insn =
+ &bundle->insns[i];
+
+ if (insn->opcode->mnemonic == TILE_OPC_INFO ||
+ insn->opcode->mnemonic == TILE_OPC_INFOL) {
+ operands[num_ops++] = insn->operand_values[0];
+ }
+ }
+
+ return num_ops;
+}
+
+/* Does this bundle contain a jrp instruction, and if so, to which
+ * register is it jumping?
+ */
+static bool bt_has_jrp(const struct BacktraceBundle *bundle, int *target_reg)
+{
+ const struct tile_decoded_instruction *insn =
+ find_matching_insn(bundle, TILE_OPC_JRP, NULL, 0);
+ if (insn == NULL)
+ return false;
+
+ *target_reg = insn->operand_values[0];
+ return true;
+}
+
+/* Does this bundle modify the specified register in any way? */
+static bool bt_modifies_reg(const struct BacktraceBundle *bundle, int reg)
+{
+ int i, j;
+ for (i = 0; i < bundle->num_insns; i++) {
+ const struct tile_decoded_instruction *insn =
+ &bundle->insns[i];
+
+ if (insn->opcode->implicitly_written_register == reg)
+ return true;
+
+ for (j = 0; j < insn->opcode->num_operands; j++)
+ if (insn->operands[j]->is_dest_reg &&
+ insn->operand_values[j] == reg)
+ return true;
+ }
+
+ return false;
+}
+
+/* Does this bundle modify sp? */
+static inline bool bt_modifies_sp(const struct BacktraceBundle *bundle)
+{
+ return bt_modifies_reg(bundle, TREG_SP);
+}
+
+/* Does this bundle modify lr? */
+static inline bool bt_modifies_lr(const struct BacktraceBundle *bundle)
+{
+ return bt_modifies_reg(bundle, TREG_LR);
+}
+
+/* Does this bundle contain the instruction 'move fp, sp'? */
+static inline bool bt_has_move_r52_sp(const struct BacktraceBundle *bundle)
+{
+ static const int vals[2] = { 52, TREG_SP };
+ return find_matching_insn(bundle, TILE_OPC_MOVE, vals, 2) != NULL;
+}
+
+/* Does this bundle contain a store of lr to sp? */
+static inline bool bt_has_sw_sp_lr(const struct BacktraceBundle *bundle)
+{
+ static const int vals[2] = { TREG_SP, TREG_LR };
+ return find_matching_insn(bundle, OPCODE_STORE, vals, 2) != NULL;
+}
+
+#ifdef __tilegx__
+/* Track moveli values placed into registers. */
+static inline void bt_update_moveli(const struct BacktraceBundle *bundle,
+ int moveli_args[])
+{
+ int i;
+ for (i = 0; i < bundle->num_insns; i++) {
+ const struct tile_decoded_instruction *insn =
+ &bundle->insns[i];
+
+ if (insn->opcode->mnemonic == TILEGX_OPC_MOVELI) {
+ int reg = insn->operand_values[0];
+ moveli_args[reg] = insn->operand_values[1];
+ }
+ }
+}
+
+/* Does this bundle contain an 'add sp, sp, reg' instruction
+ * from a register that we saw a moveli into, and if so, what
+ * is the value in the register?
+ */
+static bool bt_has_add_sp(const struct BacktraceBundle *bundle, int *adjust,
+ int moveli_args[])
+{
+ static const int vals[2] = { TREG_SP, TREG_SP };
+
+ const struct tile_decoded_instruction *insn =
+ find_matching_insn(bundle, TILEGX_OPC_ADDX, vals, 2);
+ if (insn) {
+ int reg = insn->operand_values[2];
+ if (moveli_args[reg]) {
+ *adjust = moveli_args[reg];
+ return true;
+ }
+ }
+ return false;
+}
+#endif
+
+/* Locates the caller's PC and SP for a program starting at the
+ * given address.
+ */
+static void find_caller_pc_and_caller_sp(CallerLocation *location,
+ const unsigned long start_pc,
+ BacktraceMemoryReader read_memory_func,
+ void *read_memory_func_extra)
+{
+ /* Have we explicitly decided what the sp is,
+ * rather than just the default?
+ */
+ bool sp_determined = false;
+
+ /* Has any bundle seen so far modified lr? */
+ bool lr_modified = false;
+
+ /* Have we seen a move from sp to fp? */
+ bool sp_moved_to_r52 = false;
+
+ /* Have we seen a terminating bundle? */
+ bool seen_terminating_bundle = false;
+
+ /* Cut down on round-trip reading overhead by reading several
+ * bundles at a time.
+ */
+ tile_bundle_bits prefetched_bundles[32];
+ int num_bundles_prefetched = 0;
+ int next_bundle = 0;
+ unsigned long pc;
+
+#ifdef __tilegx__
+ /* Naively try to track moveli values to support addx for -m32. */
+ int moveli_args[TILEGX_NUM_REGISTERS] = { 0 };
+#endif
+
+ /* Default to assuming that the caller's sp is the current sp.
+ * This is necessary to handle the case where we start backtracing
+ * right at the end of the epilog.
+ */
+ location->sp_location = SP_LOC_OFFSET;
+ location->sp_offset = 0;
+
+ /* Default to having no idea where the caller PC is. */
+ location->pc_location = PC_LOC_UNKNOWN;
+
+ /* Don't even try if the PC is not aligned. */
+ if (start_pc % TILE_BUNDLE_ALIGNMENT_IN_BYTES != 0)
+ return;
+
+ for (pc = start_pc;; pc += sizeof(tile_bundle_bits)) {
+
+ struct BacktraceBundle bundle;
+ int num_info_ops, info_operands[MAX_INFO_OPS_PER_BUNDLE];
+ int one_ago, jrp_reg;
+ bool has_jrp;
+
+ if (next_bundle >= num_bundles_prefetched) {
+ /* Prefetch some bytes, but don't cross a page
+ * boundary since that might cause a read failure we
+ * don't care about if we only need the first few
+ * bytes. Note: we don't care what the actual page
+ * size is; using the minimum possible page size will
+ * prevent any problems.
+ */
+ unsigned int bytes_to_prefetch = 4096 - (pc & 4095);
+ if (bytes_to_prefetch > sizeof prefetched_bundles)
+ bytes_to_prefetch = sizeof prefetched_bundles;
+
+ if (!read_memory_func(prefetched_bundles, pc,
+ bytes_to_prefetch,
+ read_memory_func_extra)) {
+ if (pc == start_pc) {
+ /* The program probably called a bad
+ * address, such as a NULL pointer.
+ * So treat this as if we are at the
+ * start of the function prolog so the
+ * backtrace will show how we got here.
+ */
+ location->pc_location = PC_LOC_IN_LR;
+ return;
+ }
+
+ /* Unreadable address. Give up. */
+ break;
+ }
+
+ next_bundle = 0;
+ num_bundles_prefetched =
+ bytes_to_prefetch / sizeof(tile_bundle_bits);
+ }
+
+ /* Decode the next bundle. */
+ bundle.bits = prefetched_bundles[next_bundle++];
+ bundle.num_insns =
+ parse_insn_tile(bundle.bits, pc, bundle.insns);
+ num_info_ops = bt_get_info_ops(&bundle, info_operands);
+
+ /* First look at any one_ago info ops if they are interesting,
+ * since they should shadow any non-one-ago info ops.
+ */
+ for (one_ago = (pc != start_pc) ? 1 : 0;
+ one_ago >= 0; one_ago--) {
+ int i;
+ for (i = 0; i < num_info_ops; i++) {
+ int info_operand = info_operands[i];
+ if (info_operand < CALLER_UNKNOWN_BASE) {
+ /* Weird; reserved value, ignore it. */
+ continue;
+ }
+
+ /* Skip info ops which are not in the
+ * "one_ago" mode we want right now.
+ */
+ if (((info_operand & ONE_BUNDLE_AGO_FLAG) != 0)
+ != (one_ago != 0))
+ continue;
+
+ /* Clear the flag to make later checking
+ * easier. */
+ info_operand &= ~ONE_BUNDLE_AGO_FLAG;
+
+ /* Default to looking at PC_IN_LR_FLAG. */
+ if (info_operand & PC_IN_LR_FLAG)
+ location->pc_location =
+ PC_LOC_IN_LR;
+ else
+ location->pc_location =
+ PC_LOC_ON_STACK;
+
+ switch (info_operand) {
+ case CALLER_UNKNOWN_BASE:
+ location->pc_location = PC_LOC_UNKNOWN;
+ location->sp_location = SP_LOC_UNKNOWN;
+ return;
+
+ case CALLER_SP_IN_R52_BASE:
+ case CALLER_SP_IN_R52_BASE | PC_IN_LR_FLAG:
+ location->sp_location = SP_LOC_IN_R52;
+ return;
+
+ default:
+ {
+ const unsigned int val = info_operand
+ - CALLER_SP_OFFSET_BASE;
+ const unsigned int sp_offset =
+ (val >> NUM_INFO_OP_FLAGS) * 8;
+ if (sp_offset < 32768) {
+ /* This is a properly encoded
+ * SP offset. */
+ location->sp_location =
+ SP_LOC_OFFSET;
+ location->sp_offset =
+ sp_offset;
+ return;
+ } else {
+ /* This looked like an SP
+ * offset, but it's outside
+ * the legal range, so this
+ * must be an unrecognized
+ * info operand. Ignore it.
+ */
+ }
+ }
+ break;
+ }
+ }
+ }
+
+ if (seen_terminating_bundle) {
+ /* We saw a terminating bundle during the previous
+ * iteration, so we were only looking for an info op.
+ */
+ break;
+ }
+
+ if (bundle.bits == 0) {
+ /* Wacky terminating bundle. Stop looping, and hope
+ * we've already seen enough to find the caller.
+ */
+ break;
+ }
+
+ /*
+ * Try to determine caller's SP.
+ */
+
+ if (!sp_determined) {
+ int adjust;
+ if (bt_has_addi_sp(&bundle, &adjust)
+#ifdef __tilegx__
+ || bt_has_add_sp(&bundle, &adjust, moveli_args)
+#endif
+ ) {
+ location->sp_location = SP_LOC_OFFSET;
+
+ if (adjust <= 0) {
+ /* We are in prolog about to adjust
+ * SP. */
+ location->sp_offset = 0;
+ } else {
+ /* We are in epilog restoring SP. */
+ location->sp_offset = adjust;
+ }
+
+ sp_determined = true;
+ } else {
+ if (bt_has_move_r52_sp(&bundle)) {
+ /* Maybe in prolog, creating an
+ * alloca-style frame. But maybe in
+ * the middle of a fixed-size frame
+ * clobbering r52 with SP.
+ */
+ sp_moved_to_r52 = true;
+ }
+
+ if (bt_modifies_sp(&bundle)) {
+ if (sp_moved_to_r52) {
+ /* We saw SP get saved into
+ * r52 earlier (or now), which
+ * must have been in the
+ * prolog, so we now know that
+ * SP is still holding the
+ * caller's sp value.
+ */
+ location->sp_location =
+ SP_LOC_OFFSET;
+ location->sp_offset = 0;
+ } else {
+ /* Someone must have saved
+ * aside the caller's SP value
+ * into r52, so r52 holds the
+ * current value.
+ */
+ location->sp_location =
+ SP_LOC_IN_R52;
+ }
+ sp_determined = true;
+ }
+ }
+
+#ifdef __tilegx__
+ /* Track moveli arguments for -m32 mode. */
+ bt_update_moveli(&bundle, moveli_args);
+#endif
+ }
+
+ if (bt_has_iret(&bundle)) {
+ /* This is a terminating bundle. */
+ seen_terminating_bundle = true;
+ continue;
+ }
+
+ /*
+ * Try to determine caller's PC.
+ */
+
+ jrp_reg = -1;
+ has_jrp = bt_has_jrp(&bundle, &jrp_reg);
+ if (has_jrp)
+ seen_terminating_bundle = true;
+
+ if (location->pc_location == PC_LOC_UNKNOWN) {
+ if (has_jrp) {
+ if (jrp_reg == TREG_LR && !lr_modified) {
+ /* Looks like a leaf function, or else
+ * lr is already restored. */
+ location->pc_location =
+ PC_LOC_IN_LR;
+ } else {
+ location->pc_location =
+ PC_LOC_ON_STACK;
+ }
+ } else if (bt_has_sw_sp_lr(&bundle)) {
+ /* In prolog, spilling initial lr to stack. */
+ location->pc_location = PC_LOC_IN_LR;
+ } else if (bt_modifies_lr(&bundle)) {
+ lr_modified = true;
+ }
+ }
+ }
+}
+
+/* Initializes a backtracer to start from the given location.
+ *
+ * If the frame pointer cannot be determined it is set to -1.
+ *
+ * state: The state to be filled in.
+ * read_memory_func: A callback that reads memory.
+ * read_memory_func_extra: An arbitrary argument to read_memory_func.
+ * pc: The current PC.
+ * lr: The current value of the 'lr' register.
+ * sp: The current value of the 'sp' register.
+ * r52: The current value of the 'r52' register.
+ */
+void backtrace_init(BacktraceIterator *state,
+ BacktraceMemoryReader read_memory_func,
+ void *read_memory_func_extra,
+ unsigned long pc, unsigned long lr,
+ unsigned long sp, unsigned long r52)
+{
+ CallerLocation location;
+ unsigned long fp, initial_frame_caller_pc;
+
+ /* Find out where we are in the initial frame. */
+ find_caller_pc_and_caller_sp(&location, pc,
+ read_memory_func, read_memory_func_extra);
+
+ switch (location.sp_location) {
+ case SP_LOC_UNKNOWN:
+ /* Give up. */
+ fp = -1;
+ break;
+
+ case SP_LOC_IN_R52:
+ fp = r52;
+ break;
+
+ case SP_LOC_OFFSET:
+ fp = sp + location.sp_offset;
+ break;
+
+ default:
+ /* Give up. */
+ fp = -1;
+ break;
+ }
+
+ /* If the frame pointer is not aligned to the basic word size
+ * something terrible happened and we should mark it as invalid.
+ */
+ if (fp % sizeof(bt_int_reg_t) != 0)
+ fp = -1;
+
+ /* -1 means "don't know initial_frame_caller_pc". */
+ initial_frame_caller_pc = -1;
+
+ switch (location.pc_location) {
+ case PC_LOC_UNKNOWN:
+ /* Give up. */
+ fp = -1;
+ break;
+
+ case PC_LOC_IN_LR:
+ if (lr == 0 || lr % TILE_BUNDLE_ALIGNMENT_IN_BYTES != 0) {
+ /* Give up. */
+ fp = -1;
+ } else {
+ initial_frame_caller_pc = lr;
+ }
+ break;
+
+ case PC_LOC_ON_STACK:
+ /* Leave initial_frame_caller_pc as -1,
+ * meaning check the stack.
+ */
+ break;
+
+ default:
+ /* Give up. */
+ fp = -1;
+ break;
+ }
+
+ state->pc = pc;
+ state->sp = sp;
+ state->fp = fp;
+ state->initial_frame_caller_pc = initial_frame_caller_pc;
+ state->read_memory_func = read_memory_func;
+ state->read_memory_func_extra = read_memory_func_extra;
+}
+
+/* Handle the case where the register holds more bits than the VA. */
+static bool valid_addr_reg(bt_int_reg_t reg)
+{
+ return ((unsigned long)reg == reg);
+}
+
+/* Advances the backtracing state to the calling frame, returning
+ * true iff successful.
+ */
+bool backtrace_next(BacktraceIterator *state)
+{
+ unsigned long next_fp, next_pc;
+ bt_int_reg_t next_frame[2];
+
+ if (state->fp == -1) {
+ /* No parent frame. */
+ return false;
+ }
+
+ /* Try to read the frame linkage data chaining to the next function. */
+ if (!state->read_memory_func(&next_frame, state->fp, sizeof next_frame,
+ state->read_memory_func_extra)) {
+ return false;
+ }
+
+ next_fp = next_frame[1];
+ if (!valid_addr_reg(next_frame[1]) ||
+ next_fp % sizeof(bt_int_reg_t) != 0) {
+ /* Caller's frame pointer is suspect, so give up. */
+ return false;
+ }
+
+ if (state->initial_frame_caller_pc != -1) {
+ /* We must be in the initial stack frame and already know the
+ * caller PC.
+ */
+ next_pc = state->initial_frame_caller_pc;
+
+ /* Force reading stack next time, in case we were in the
+ * initial frame. We don't do this above just to paranoidly
+ * avoid changing the struct at all when we return false.
+ */
+ state->initial_frame_caller_pc = -1;
+ } else {
+ /* Get the caller PC from the frame linkage area. */
+ next_pc = next_frame[0];
+ if (!valid_addr_reg(next_frame[0]) || next_pc == 0 ||
+ next_pc % TILE_BUNDLE_ALIGNMENT_IN_BYTES != 0) {
+ /* The PC is suspect, so give up. */
+ return false;
+ }
+ }
+
+ /* Update state to become the caller's stack frame. */
+ state->pc = next_pc;
+ state->sp = state->fp;
+ state->fp = next_fp;
+
+ return true;
+}
diff --git a/arch/tile/kernel/compat.c b/arch/tile/kernel/compat.c
new file mode 100644
index 00000000..d67459b9
--- /dev/null
+++ b/arch/tile/kernel/compat.c
@@ -0,0 +1,118 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+/* Adjust unistd.h to provide 32-bit numbers and functions. */
+#define __SYSCALL_COMPAT
+
+#include <linux/compat.h>
+#include <linux/syscalls.h>
+#include <linux/kdev_t.h>
+#include <linux/fs.h>
+#include <linux/fcntl.h>
+#include <linux/uaccess.h>
+#include <linux/signal.h>
+#include <asm/syscalls.h>
+
+/*
+ * Syscalls that take 64-bit numbers traditionally take them in 32-bit
+ * "high" and "low" value parts on 32-bit architectures.
+ * In principle, one could imagine passing some register arguments as
+ * fully 64-bit on TILE-Gx in 32-bit mode, but it seems easier to
+ * adapt the usual convention.
+ */
+
+long compat_sys_truncate64(char __user *filename, u32 dummy, u32 low, u32 high)
+{
+ return sys_truncate(filename, ((loff_t)high << 32) | low);
+}
+
+long compat_sys_ftruncate64(unsigned int fd, u32 dummy, u32 low, u32 high)
+{
+ return sys_ftruncate(fd, ((loff_t)high << 32) | low);
+}
+
+long compat_sys_pread64(unsigned int fd, char __user *ubuf, size_t count,
+ u32 dummy, u32 low, u32 high)
+{
+ return sys_pread64(fd, ubuf, count, ((loff_t)high << 32) | low);
+}
+
+long compat_sys_pwrite64(unsigned int fd, char __user *ubuf, size_t count,
+ u32 dummy, u32 low, u32 high)
+{
+ return sys_pwrite64(fd, ubuf, count, ((loff_t)high << 32) | low);
+}
+
+long compat_sys_lookup_dcookie(u32 low, u32 high, char __user *buf, size_t len)
+{
+ return sys_lookup_dcookie(((loff_t)high << 32) | low, buf, len);
+}
+
+long compat_sys_sync_file_range2(int fd, unsigned int flags,
+ u32 offset_lo, u32 offset_hi,
+ u32 nbytes_lo, u32 nbytes_hi)
+{
+ return sys_sync_file_range(fd, ((loff_t)offset_hi << 32) | offset_lo,
+ ((loff_t)nbytes_hi << 32) | nbytes_lo,
+ flags);
+}
+
+long compat_sys_fallocate(int fd, int mode,
+ u32 offset_lo, u32 offset_hi,
+ u32 len_lo, u32 len_hi)
+{
+ return sys_fallocate(fd, mode, ((loff_t)offset_hi << 32) | offset_lo,
+ ((loff_t)len_hi << 32) | len_lo);
+}
+
+
+
+long compat_sys_sched_rr_get_interval(compat_pid_t pid,
+ struct compat_timespec __user *interval)
+{
+ struct timespec t;
+ int ret;
+ mm_segment_t old_fs = get_fs();
+
+ set_fs(KERNEL_DS);
+ ret = sys_sched_rr_get_interval(pid,
+ (struct timespec __force __user *)&t);
+ set_fs(old_fs);
+ if (put_compat_timespec(&t, interval))
+ return -EFAULT;
+ return ret;
+}
+
+/* Provide the compat syscall number to call mapping. */
+#undef __SYSCALL
+#define __SYSCALL(nr, call) [nr] = (call),
+
+/* See comments in sys.c */
+#define compat_sys_fadvise64_64 sys32_fadvise64_64
+#define compat_sys_readahead sys32_readahead
+
+/* Call the trampolines to manage pt_regs where necessary. */
+#define compat_sys_execve _compat_sys_execve
+#define compat_sys_sigaltstack _compat_sys_sigaltstack
+#define compat_sys_rt_sigreturn _compat_sys_rt_sigreturn
+#define sys_clone _sys_clone
+
+/*
+ * Note that we can't include <linux/unistd.h> here since the header
+ * guard will defeat us; <asm/unistd.h> checks for __SYSCALL as well.
+ */
+void *compat_sys_call_table[__NR_syscalls] = {
+ [0 ... __NR_syscalls-1] = sys_ni_syscall,
+#include <asm/unistd.h>
+};
diff --git a/arch/tile/kernel/compat_signal.c b/arch/tile/kernel/compat_signal.c
new file mode 100644
index 00000000..cdef6e5e
--- /dev/null
+++ b/arch/tile/kernel/compat_signal.c
@@ -0,0 +1,431 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/kernel.h>
+#include <linux/signal.h>
+#include <linux/errno.h>
+#include <linux/wait.h>
+#include <linux/unistd.h>
+#include <linux/stddef.h>
+#include <linux/personality.h>
+#include <linux/suspend.h>
+#include <linux/ptrace.h>
+#include <linux/elf.h>
+#include <linux/compat.h>
+#include <linux/syscalls.h>
+#include <linux/uaccess.h>
+#include <asm/processor.h>
+#include <asm/ucontext.h>
+#include <asm/sigframe.h>
+#include <asm/syscalls.h>
+#include <arch/interrupts.h>
+
+struct compat_sigaction {
+ compat_uptr_t sa_handler;
+ compat_ulong_t sa_flags;
+ compat_uptr_t sa_restorer;
+ sigset_t sa_mask __packed;
+};
+
+struct compat_sigaltstack {
+ compat_uptr_t ss_sp;
+ int ss_flags;
+ compat_size_t ss_size;
+};
+
+struct compat_ucontext {
+ compat_ulong_t uc_flags;
+ compat_uptr_t uc_link;
+ struct compat_sigaltstack uc_stack;
+ struct sigcontext uc_mcontext;
+ sigset_t uc_sigmask; /* mask last for extensibility */
+};
+
+#define COMPAT_SI_PAD_SIZE ((SI_MAX_SIZE - 3 * sizeof(int)) / sizeof(int))
+
+struct compat_siginfo {
+ int si_signo;
+ int si_errno;
+ int si_code;
+
+ union {
+ int _pad[COMPAT_SI_PAD_SIZE];
+
+ /* kill() */
+ struct {
+ unsigned int _pid; /* sender's pid */
+ unsigned int _uid; /* sender's uid */
+ } _kill;
+
+ /* POSIX.1b timers */
+ struct {
+ compat_timer_t _tid; /* timer id */
+ int _overrun; /* overrun count */
+ compat_sigval_t _sigval; /* same as below */
+ int _sys_private; /* not to be passed to user */
+ int _overrun_incr; /* amount to add to overrun */
+ } _timer;
+
+ /* POSIX.1b signals */
+ struct {
+ unsigned int _pid; /* sender's pid */
+ unsigned int _uid; /* sender's uid */
+ compat_sigval_t _sigval;
+ } _rt;
+
+ /* SIGCHLD */
+ struct {
+ unsigned int _pid; /* which child */
+ unsigned int _uid; /* sender's uid */
+ int _status; /* exit code */
+ compat_clock_t _utime;
+ compat_clock_t _stime;
+ } _sigchld;
+
+ /* SIGILL, SIGFPE, SIGSEGV, SIGBUS */
+ struct {
+ unsigned int _addr; /* faulting insn/memory ref. */
+#ifdef __ARCH_SI_TRAPNO
+ int _trapno; /* TRAP # which caused the signal */
+#endif
+ } _sigfault;
+
+ /* SIGPOLL */
+ struct {
+ int _band; /* POLL_IN, POLL_OUT, POLL_MSG */
+ int _fd;
+ } _sigpoll;
+ } _sifields;
+};
+
+struct compat_rt_sigframe {
+ unsigned char save_area[C_ABI_SAVE_AREA_SIZE]; /* caller save area */
+ struct compat_siginfo info;
+ struct compat_ucontext uc;
+};
+
+#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
+
+long compat_sys_rt_sigaction(int sig, struct compat_sigaction __user *act,
+ struct compat_sigaction __user *oact,
+ size_t sigsetsize)
+{
+ struct k_sigaction new_sa, old_sa;
+ int ret = -EINVAL;
+
+ /* XXX: Don't preclude handling different sized sigset_t's. */
+ if (sigsetsize != sizeof(sigset_t))
+ goto out;
+
+ if (act) {
+ compat_uptr_t handler, restorer;
+
+ if (!access_ok(VERIFY_READ, act, sizeof(*act)) ||
+ __get_user(handler, &act->sa_handler) ||
+ __get_user(new_sa.sa.sa_flags, &act->sa_flags) ||
+ __get_user(restorer, &act->sa_restorer) ||
+ __copy_from_user(&new_sa.sa.sa_mask, &act->sa_mask,
+ sizeof(sigset_t)))
+ return -EFAULT;
+ new_sa.sa.sa_handler = compat_ptr(handler);
+ new_sa.sa.sa_restorer = compat_ptr(restorer);
+ }
+
+ ret = do_sigaction(sig, act ? &new_sa : NULL, oact ? &old_sa : NULL);
+
+ if (!ret && oact) {
+ if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) ||
+ __put_user(ptr_to_compat(old_sa.sa.sa_handler),
+ &oact->sa_handler) ||
+ __put_user(ptr_to_compat(old_sa.sa.sa_restorer),
+ &oact->sa_restorer) ||
+ __put_user(old_sa.sa.sa_flags, &oact->sa_flags) ||
+ __copy_to_user(&oact->sa_mask, &old_sa.sa.sa_mask,
+ sizeof(sigset_t)))
+ return -EFAULT;
+ }
+out:
+ return ret;
+}
+
+long compat_sys_rt_sigqueueinfo(int pid, int sig,
+ struct compat_siginfo __user *uinfo)
+{
+ siginfo_t info;
+ int ret;
+ mm_segment_t old_fs = get_fs();
+
+ if (copy_siginfo_from_user32(&info, uinfo))
+ return -EFAULT;
+ set_fs(KERNEL_DS);
+ ret = sys_rt_sigqueueinfo(pid, sig, (siginfo_t __force __user *)&info);
+ set_fs(old_fs);
+ return ret;
+}
+
+int copy_siginfo_to_user32(struct compat_siginfo __user *to, siginfo_t *from)
+{
+ int err;
+
+ if (!access_ok(VERIFY_WRITE, to, sizeof(struct compat_siginfo)))
+ return -EFAULT;
+
+ /* If you change siginfo_t structure, please make sure that
+ this code is fixed accordingly.
+ It should never copy any pad contained in the structure
+ to avoid security leaks, but must copy the generic
+ 3 ints plus the relevant union member. */
+ err = __put_user(from->si_signo, &to->si_signo);
+ err |= __put_user(from->si_errno, &to->si_errno);
+ err |= __put_user((short)from->si_code, &to->si_code);
+
+ if (from->si_code < 0) {
+ err |= __put_user(from->si_pid, &to->si_pid);
+ err |= __put_user(from->si_uid, &to->si_uid);
+ err |= __put_user(ptr_to_compat(from->si_ptr), &to->si_ptr);
+ } else {
+ /*
+ * First 32bits of unions are always present:
+ * si_pid === si_band === si_tid === si_addr(LS half)
+ */
+ err |= __put_user(from->_sifields._pad[0],
+ &to->_sifields._pad[0]);
+ switch (from->si_code >> 16) {
+ case __SI_FAULT >> 16:
+ break;
+ case __SI_CHLD >> 16:
+ err |= __put_user(from->si_utime, &to->si_utime);
+ err |= __put_user(from->si_stime, &to->si_stime);
+ err |= __put_user(from->si_status, &to->si_status);
+ /* FALL THROUGH */
+ default:
+ case __SI_KILL >> 16:
+ err |= __put_user(from->si_uid, &to->si_uid);
+ break;
+ case __SI_POLL >> 16:
+ err |= __put_user(from->si_fd, &to->si_fd);
+ break;
+ case __SI_TIMER >> 16:
+ err |= __put_user(from->si_overrun, &to->si_overrun);
+ err |= __put_user(ptr_to_compat(from->si_ptr),
+ &to->si_ptr);
+ break;
+ /* This is not generated by the kernel as of now. */
+ case __SI_RT >> 16:
+ case __SI_MESGQ >> 16:
+ err |= __put_user(from->si_uid, &to->si_uid);
+ err |= __put_user(from->si_int, &to->si_int);
+ break;
+ }
+ }
+ return err;
+}
+
+int copy_siginfo_from_user32(siginfo_t *to, struct compat_siginfo __user *from)
+{
+ int err;
+ u32 ptr32;
+
+ if (!access_ok(VERIFY_READ, from, sizeof(struct compat_siginfo)))
+ return -EFAULT;
+
+ err = __get_user(to->si_signo, &from->si_signo);
+ err |= __get_user(to->si_errno, &from->si_errno);
+ err |= __get_user(to->si_code, &from->si_code);
+
+ err |= __get_user(to->si_pid, &from->si_pid);
+ err |= __get_user(to->si_uid, &from->si_uid);
+ err |= __get_user(ptr32, &from->si_ptr);
+ to->si_ptr = compat_ptr(ptr32);
+
+ return err;
+}
+
+long compat_sys_sigaltstack(const struct compat_sigaltstack __user *uss_ptr,
+ struct compat_sigaltstack __user *uoss_ptr,
+ struct pt_regs *regs)
+{
+ stack_t uss, uoss;
+ int ret;
+ mm_segment_t seg;
+
+ if (uss_ptr) {
+ u32 ptr;
+
+ memset(&uss, 0, sizeof(stack_t));
+ if (!access_ok(VERIFY_READ, uss_ptr, sizeof(*uss_ptr)) ||
+ __get_user(ptr, &uss_ptr->ss_sp) ||
+ __get_user(uss.ss_flags, &uss_ptr->ss_flags) ||
+ __get_user(uss.ss_size, &uss_ptr->ss_size))
+ return -EFAULT;
+ uss.ss_sp = compat_ptr(ptr);
+ }
+ seg = get_fs();
+ set_fs(KERNEL_DS);
+ ret = do_sigaltstack(uss_ptr ? (stack_t __user __force *)&uss : NULL,
+ (stack_t __user __force *)&uoss,
+ (unsigned long)compat_ptr(regs->sp));
+ set_fs(seg);
+ if (ret >= 0 && uoss_ptr) {
+ if (!access_ok(VERIFY_WRITE, uoss_ptr, sizeof(*uoss_ptr)) ||
+ __put_user(ptr_to_compat(uoss.ss_sp), &uoss_ptr->ss_sp) ||
+ __put_user(uoss.ss_flags, &uoss_ptr->ss_flags) ||
+ __put_user(uoss.ss_size, &uoss_ptr->ss_size))
+ ret = -EFAULT;
+ }
+ return ret;
+}
+
+/* The assembly shim for this function arranges to ignore the return value. */
+long compat_sys_rt_sigreturn(struct pt_regs *regs)
+{
+ struct compat_rt_sigframe __user *frame =
+ (struct compat_rt_sigframe __user *) compat_ptr(regs->sp);
+ sigset_t set;
+
+ if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
+ goto badframe;
+ if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
+ goto badframe;
+
+ sigdelsetmask(&set, ~_BLOCKABLE);
+ set_current_blocked(&set);
+
+ if (restore_sigcontext(regs, &frame->uc.uc_mcontext))
+ goto badframe;
+
+ if (compat_sys_sigaltstack(&frame->uc.uc_stack, NULL, regs) != 0)
+ goto badframe;
+
+ return 0;
+
+badframe:
+ signal_fault("bad sigreturn frame", regs, frame, 0);
+ return 0;
+}
+
+/*
+ * Determine which stack to use..
+ */
+static inline void __user *compat_get_sigframe(struct k_sigaction *ka,
+ struct pt_regs *regs,
+ size_t frame_size)
+{
+ unsigned long sp;
+
+ /* Default to using normal stack */
+ sp = (unsigned long)compat_ptr(regs->sp);
+
+ /*
+ * If we are on the alternate signal stack and would overflow
+ * it, don't. Return an always-bogus address instead so we
+ * will die with SIGSEGV.
+ */
+ if (on_sig_stack(sp) && !likely(on_sig_stack(sp - frame_size)))
+ return (void __user __force *)-1UL;
+
+ /* This is the X/Open sanctioned signal stack switching. */
+ if (ka->sa.sa_flags & SA_ONSTACK) {
+ if (sas_ss_flags(sp) == 0)
+ sp = current->sas_ss_sp + current->sas_ss_size;
+ }
+
+ sp -= frame_size;
+ /*
+ * Align the stack pointer according to the TILE ABI,
+ * i.e. so that on function entry (sp & 15) == 0.
+ */
+ sp &= -16UL;
+ return (void __user *) sp;
+}
+
+int compat_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
+ sigset_t *set, struct pt_regs *regs)
+{
+ unsigned long restorer;
+ struct compat_rt_sigframe __user *frame;
+ int err = 0;
+ int usig;
+
+ frame = compat_get_sigframe(ka, regs, sizeof(*frame));
+
+ if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
+ goto give_sigsegv;
+
+ usig = current_thread_info()->exec_domain
+ && current_thread_info()->exec_domain->signal_invmap
+ && sig < 32
+ ? current_thread_info()->exec_domain->signal_invmap[sig]
+ : sig;
+
+ /* Always write at least the signal number for the stack backtracer. */
+ if (ka->sa.sa_flags & SA_SIGINFO) {
+ /* At sigreturn time, restore the callee-save registers too. */
+ err |= copy_siginfo_to_user32(&frame->info, info);
+ regs->flags |= PT_FLAGS_RESTORE_REGS;
+ } else {
+ err |= __put_user(info->si_signo, &frame->info.si_signo);
+ }
+
+ /* Create the ucontext. */
+ err |= __clear_user(&frame->save_area, sizeof(frame->save_area));
+ err |= __put_user(0, &frame->uc.uc_flags);
+ err |= __put_user(0, &frame->uc.uc_link);
+ err |= __put_user(ptr_to_compat((void *)(current->sas_ss_sp)),
+ &frame->uc.uc_stack.ss_sp);
+ err |= __put_user(sas_ss_flags(regs->sp),
+ &frame->uc.uc_stack.ss_flags);
+ err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
+ err |= setup_sigcontext(&frame->uc.uc_mcontext, regs);
+ err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
+ if (err)
+ goto give_sigsegv;
+
+ restorer = VDSO_BASE;
+ if (ka->sa.sa_flags & SA_RESTORER)
+ restorer = ptr_to_compat_reg(ka->sa.sa_restorer);
+
+ /*
+ * Set up registers for signal handler.
+ * Registers that we don't modify keep the value they had from
+ * user-space at the time we took the signal.
+ * We always pass siginfo and mcontext, regardless of SA_SIGINFO,
+ * since some things rely on this (e.g. glibc's debug/segfault.c).
+ */
+ regs->pc = ptr_to_compat_reg(ka->sa.sa_handler);
+ regs->ex1 = PL_ICS_EX1(USER_PL, 1); /* set crit sec in handler */
+ regs->sp = ptr_to_compat_reg(frame);
+ regs->lr = restorer;
+ regs->regs[0] = (unsigned long) usig;
+ regs->regs[1] = ptr_to_compat_reg(&frame->info);
+ regs->regs[2] = ptr_to_compat_reg(&frame->uc);
+ regs->flags |= PT_FLAGS_CALLER_SAVES;
+
+ /*
+ * Notify any tracer that was single-stepping it.
+ * The tracer may want to single-step inside the
+ * handler too.
+ */
+ if (test_thread_flag(TIF_SINGLESTEP))
+ ptrace_notify(SIGTRAP);
+
+ return 0;
+
+give_sigsegv:
+ signal_fault("bad setup frame", regs, frame, sig);
+ return -EFAULT;
+}
diff --git a/arch/tile/kernel/early_printk.c b/arch/tile/kernel/early_printk.c
new file mode 100644
index 00000000..afb9c9a0
--- /dev/null
+++ b/arch/tile/kernel/early_printk.c
@@ -0,0 +1,110 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/console.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/string.h>
+#include <linux/irqflags.h>
+#include <asm/setup.h>
+#include <hv/hypervisor.h>
+
+static void early_hv_write(struct console *con, const char *s, unsigned n)
+{
+ hv_console_write((HV_VirtAddr) s, n);
+}
+
+static struct console early_hv_console = {
+ .name = "earlyhv",
+ .write = early_hv_write,
+ .flags = CON_PRINTBUFFER,
+ .index = -1,
+};
+
+/* Direct interface for emergencies */
+static struct console *early_console = &early_hv_console;
+static int early_console_initialized;
+static int early_console_complete;
+
+static void early_vprintk(const char *fmt, va_list ap)
+{
+ char buf[512];
+ int n = vscnprintf(buf, sizeof(buf), fmt, ap);
+ early_console->write(early_console, buf, n);
+}
+
+void early_printk(const char *fmt, ...)
+{
+ va_list ap;
+ va_start(ap, fmt);
+ early_vprintk(fmt, ap);
+ va_end(ap);
+}
+
+void early_panic(const char *fmt, ...)
+{
+ va_list ap;
+ arch_local_irq_disable_all();
+ va_start(ap, fmt);
+ early_printk("Kernel panic - not syncing: ");
+ early_vprintk(fmt, ap);
+ early_console->write(early_console, "\n", 1);
+ va_end(ap);
+ dump_stack();
+ hv_halt();
+}
+
+static int __initdata keep_early;
+
+static int __init setup_early_printk(char *str)
+{
+ if (early_console_initialized)
+ return 1;
+
+ if (str != NULL && strncmp(str, "keep", 4) == 0)
+ keep_early = 1;
+
+ early_console = &early_hv_console;
+ early_console_initialized = 1;
+ register_console(early_console);
+
+ return 0;
+}
+
+void __init disable_early_printk(void)
+{
+ early_console_complete = 1;
+ if (!early_console_initialized || !early_console)
+ return;
+ if (!keep_early) {
+ early_printk("disabling early console\n");
+ unregister_console(early_console);
+ early_console_initialized = 0;
+ } else {
+ early_printk("keeping early console\n");
+ }
+}
+
+void warn_early_printk(void)
+{
+ if (early_console_complete || early_console_initialized)
+ return;
+ early_printk("\
+Machine shutting down before console output is fully initialized.\n\
+You may wish to reboot and add the option 'earlyprintk' to your\n\
+boot command line to see any diagnostic early console output.\n\
+");
+}
+
+early_param("earlyprintk", setup_early_printk);
diff --git a/arch/tile/kernel/entry.S b/arch/tile/kernel/entry.S
new file mode 100644
index 00000000..ec91568d
--- /dev/null
+++ b/arch/tile/kernel/entry.S
@@ -0,0 +1,111 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/linkage.h>
+#include <linux/unistd.h>
+#include <asm/irqflags.h>
+#include <asm/processor.h>
+#include <arch/abi.h>
+#include <arch/spr_def.h>
+
+#ifdef __tilegx__
+#define bnzt bnezt
+#endif
+
+STD_ENTRY(current_text_addr)
+ { move r0, lr; jrp lr }
+ STD_ENDPROC(current_text_addr)
+
+/*
+ * Implement execve(). The i386 code has a note that forking from kernel
+ * space results in no copy on write until the execve, so we should be
+ * careful not to write to the stack here.
+ */
+STD_ENTRY(kernel_execve)
+ moveli TREG_SYSCALL_NR_NAME, __NR_execve
+ swint1
+ jrp lr
+ STD_ENDPROC(kernel_execve)
+
+/*
+ * We don't run this function directly, but instead copy it to a page
+ * we map into every user process. See vdso_setup().
+ *
+ * Note that libc has a copy of this function that it uses to compare
+ * against the PC when a stack backtrace ends, so if this code is
+ * changed, the libc implementation(s) should also be updated.
+ */
+ .pushsection .data
+ENTRY(__rt_sigreturn)
+ moveli TREG_SYSCALL_NR_NAME,__NR_rt_sigreturn
+ swint1
+ ENDPROC(__rt_sigreturn)
+ ENTRY(__rt_sigreturn_end)
+ .popsection
+
+STD_ENTRY(dump_stack)
+ { move r2, lr; lnk r1 }
+ { move r4, r52; addli r1, r1, dump_stack - . }
+ { move r3, sp; j _dump_stack }
+ jrp lr /* keep backtracer happy */
+ STD_ENDPROC(dump_stack)
+
+STD_ENTRY(KBacktraceIterator_init_current)
+ { move r2, lr; lnk r1 }
+ { move r4, r52; addli r1, r1, KBacktraceIterator_init_current - . }
+ { move r3, sp; j _KBacktraceIterator_init_current }
+ jrp lr /* keep backtracer happy */
+ STD_ENDPROC(KBacktraceIterator_init_current)
+
+/*
+ * Reset our stack to r1/r2 (sp and ksp0+cpu respectively), then
+ * free the old stack (passed in r0) and re-invoke cpu_idle().
+ * We update sp and ksp0 simultaneously to avoid backtracer warnings.
+ */
+STD_ENTRY(cpu_idle_on_new_stack)
+ {
+ move sp, r1
+ mtspr SPR_SYSTEM_SAVE_K_0, r2
+ }
+ jal free_thread_info
+ j cpu_idle
+ STD_ENDPROC(cpu_idle_on_new_stack)
+
+/* Loop forever on a nap during SMP boot. */
+STD_ENTRY(smp_nap)
+ nap
+ nop /* avoid provoking the icache prefetch with a jump */
+ j smp_nap /* we are not architecturally guaranteed not to exit nap */
+ jrp lr /* clue in the backtracer */
+ STD_ENDPROC(smp_nap)
+
+/*
+ * Enable interrupts racelessly and then nap until interrupted.
+ * Architecturally, we are guaranteed that enabling interrupts via
+ * mtspr to INTERRUPT_CRITICAL_SECTION only interrupts at the next PC.
+ * This function's _cpu_idle_nap address is special; see intvec.S.
+ * When interrupted at _cpu_idle_nap, we bump the PC forward 8, and
+ * as a result return to the function that called _cpu_idle().
+ */
+STD_ENTRY(_cpu_idle)
+ movei r1, 1
+ mtspr INTERRUPT_CRITICAL_SECTION, r1
+ IRQ_ENABLE(r2, r3) /* unmask, but still with ICS set */
+ mtspr INTERRUPT_CRITICAL_SECTION, zero
+ .global _cpu_idle_nap
+_cpu_idle_nap:
+ nap
+ nop /* avoid provoking the icache prefetch with a jump */
+ jrp lr
+ STD_ENDPROC(_cpu_idle)
diff --git a/arch/tile/kernel/futex_64.S b/arch/tile/kernel/futex_64.S
new file mode 100644
index 00000000..f465d1ed
--- /dev/null
+++ b/arch/tile/kernel/futex_64.S
@@ -0,0 +1,55 @@
+/*
+ * Copyright 2011 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ *
+ * Atomically access user memory, but use MMU to avoid propagating
+ * kernel exceptions.
+ */
+
+#include <linux/linkage.h>
+#include <asm/errno.h>
+#include <asm/futex.h>
+#include <asm/page.h>
+#include <asm/processor.h>
+
+/*
+ * Provide a set of atomic memory operations supporting <asm/futex.h>.
+ *
+ * r0: user address to manipulate
+ * r1: new value to write, or for cmpxchg, old value to compare against
+ * r2: (cmpxchg only) new value to write
+ *
+ * Return __get_user struct, r0 with value, r1 with error.
+ */
+#define FUTEX_OP(name, ...) \
+STD_ENTRY(futex_##name) \
+ __VA_ARGS__; \
+ { \
+ move r1, zero; \
+ jrp lr \
+ }; \
+ STD_ENDPROC(futex_##name); \
+ .pushsection __ex_table,"a"; \
+ .quad 1b, get_user_fault; \
+ .popsection
+
+ .pushsection .fixup,"ax"
+get_user_fault:
+ { movei r1, -EFAULT; jrp lr }
+ ENDPROC(get_user_fault)
+ .popsection
+
+FUTEX_OP(cmpxchg, mtspr CMPEXCH_VALUE, r1; 1: cmpexch4 r0, r0, r2)
+FUTEX_OP(set, 1: exch4 r0, r0, r1)
+FUTEX_OP(add, 1: fetchadd4 r0, r0, r1)
+FUTEX_OP(or, 1: fetchor4 r0, r0, r1)
+FUTEX_OP(andn, nor r1, r1, zero; 1: fetchand4 r0, r0, r1)
diff --git a/arch/tile/kernel/hardwall.c b/arch/tile/kernel/hardwall.c
new file mode 100644
index 00000000..8c41891a
--- /dev/null
+++ b/arch/tile/kernel/hardwall.c
@@ -0,0 +1,837 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/fs.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/rwsem.h>
+#include <linux/kprobes.h>
+#include <linux/sched.h>
+#include <linux/hardirq.h>
+#include <linux/uaccess.h>
+#include <linux/smp.h>
+#include <linux/cdev.h>
+#include <linux/compat.h>
+#include <asm/hardwall.h>
+#include <asm/traps.h>
+#include <asm/siginfo.h>
+#include <asm/irq_regs.h>
+
+#include <arch/interrupts.h>
+#include <arch/spr_def.h>
+
+
+/*
+ * This data structure tracks the rectangle data, etc., associated
+ * one-to-one with a "struct file *" from opening HARDWALL_FILE.
+ * Note that the file's private data points back to this structure.
+ */
+struct hardwall_info {
+ struct list_head list; /* "rectangles" list */
+ struct list_head task_head; /* head of tasks in this hardwall */
+ struct cpumask cpumask; /* cpus in the rectangle */
+ int ulhc_x; /* upper left hand corner x coord */
+ int ulhc_y; /* upper left hand corner y coord */
+ int width; /* rectangle width */
+ int height; /* rectangle height */
+ int id; /* integer id for this hardwall */
+ int teardown_in_progress; /* are we tearing this one down? */
+};
+
+/* Currently allocated hardwall rectangles */
+static LIST_HEAD(rectangles);
+
+/* /proc/tile/hardwall */
+static struct proc_dir_entry *hardwall_proc_dir;
+
+/* Functions to manage files in /proc/tile/hardwall. */
+static void hardwall_add_proc(struct hardwall_info *rect);
+static void hardwall_remove_proc(struct hardwall_info *rect);
+
+/*
+ * Guard changes to the hardwall data structures.
+ * This could be finer grained (e.g. one lock for the list of hardwall
+ * rectangles, then separate embedded locks for each one's list of tasks),
+ * but there are subtle correctness issues when trying to start with
+ * a task's "hardwall" pointer and lock the correct rectangle's embedded
+ * lock in the presence of a simultaneous deactivation, so it seems
+ * easier to have a single lock, given that none of these data
+ * structures are touched very frequently during normal operation.
+ */
+static DEFINE_SPINLOCK(hardwall_lock);
+
+/* Allow disabling UDN access. */
+static int udn_disabled;
+static int __init noudn(char *str)
+{
+ pr_info("User-space UDN access is disabled\n");
+ udn_disabled = 1;
+ return 0;
+}
+early_param("noudn", noudn);
+
+
+/*
+ * Low-level primitives
+ */
+
+/* Set a CPU bit if the CPU is online. */
+#define cpu_online_set(cpu, dst) do { \
+ if (cpu_online(cpu)) \
+ cpumask_set_cpu(cpu, dst); \
+} while (0)
+
+
+/* Does the given rectangle contain the given x,y coordinate? */
+static int contains(struct hardwall_info *r, int x, int y)
+{
+ return (x >= r->ulhc_x && x < r->ulhc_x + r->width) &&
+ (y >= r->ulhc_y && y < r->ulhc_y + r->height);
+}
+
+/* Compute the rectangle parameters and validate the cpumask. */
+static int setup_rectangle(struct hardwall_info *r, struct cpumask *mask)
+{
+ int x, y, cpu, ulhc, lrhc;
+
+ /* The first cpu is the ULHC, the last the LRHC. */
+ ulhc = find_first_bit(cpumask_bits(mask), nr_cpumask_bits);
+ lrhc = find_last_bit(cpumask_bits(mask), nr_cpumask_bits);
+
+ /* Compute the rectangle attributes from the cpus. */
+ r->ulhc_x = cpu_x(ulhc);
+ r->ulhc_y = cpu_y(ulhc);
+ r->width = cpu_x(lrhc) - r->ulhc_x + 1;
+ r->height = cpu_y(lrhc) - r->ulhc_y + 1;
+ cpumask_copy(&r->cpumask, mask);
+ r->id = ulhc; /* The ulhc cpu id can be the hardwall id. */
+
+ /* Width and height must be positive */
+ if (r->width <= 0 || r->height <= 0)
+ return -EINVAL;
+
+ /* Confirm that the cpumask is exactly the rectangle. */
+ for (y = 0, cpu = 0; y < smp_height; ++y)
+ for (x = 0; x < smp_width; ++x, ++cpu)
+ if (cpumask_test_cpu(cpu, mask) != contains(r, x, y))
+ return -EINVAL;
+
+ /*
+ * Note that offline cpus can't be drained when this UDN
+ * rectangle eventually closes. We used to detect this
+ * situation and print a warning, but it annoyed users and
+ * they ignored it anyway, so now we just return without a
+ * warning.
+ */
+ return 0;
+}
+
+/* Do the two given rectangles overlap on any cpu? */
+static int overlaps(struct hardwall_info *a, struct hardwall_info *b)
+{
+ return a->ulhc_x + a->width > b->ulhc_x && /* A not to the left */
+ b->ulhc_x + b->width > a->ulhc_x && /* B not to the left */
+ a->ulhc_y + a->height > b->ulhc_y && /* A not above */
+ b->ulhc_y + b->height > a->ulhc_y; /* B not above */
+}
+
+
+/*
+ * Hardware management of hardwall setup, teardown, trapping,
+ * and enabling/disabling PL0 access to the networks.
+ */
+
+/* Bit field values to mask together for writes to SPR_XDN_DIRECTION_PROTECT */
+enum direction_protect {
+ N_PROTECT = (1 << 0),
+ E_PROTECT = (1 << 1),
+ S_PROTECT = (1 << 2),
+ W_PROTECT = (1 << 3)
+};
+
+static void enable_firewall_interrupts(void)
+{
+ arch_local_irq_unmask_now(INT_UDN_FIREWALL);
+}
+
+static void disable_firewall_interrupts(void)
+{
+ arch_local_irq_mask_now(INT_UDN_FIREWALL);
+}
+
+/* Set up hardwall on this cpu based on the passed hardwall_info. */
+static void hardwall_setup_ipi_func(void *info)
+{
+ struct hardwall_info *r = info;
+ int cpu = smp_processor_id();
+ int x = cpu % smp_width;
+ int y = cpu / smp_width;
+ int bits = 0;
+ if (x == r->ulhc_x)
+ bits |= W_PROTECT;
+ if (x == r->ulhc_x + r->width - 1)
+ bits |= E_PROTECT;
+ if (y == r->ulhc_y)
+ bits |= N_PROTECT;
+ if (y == r->ulhc_y + r->height - 1)
+ bits |= S_PROTECT;
+ BUG_ON(bits == 0);
+ __insn_mtspr(SPR_UDN_DIRECTION_PROTECT, bits);
+ enable_firewall_interrupts();
+
+}
+
+/* Set up all cpus on edge of rectangle to enable/disable hardwall SPRs. */
+static void hardwall_setup(struct hardwall_info *r)
+{
+ int x, y, cpu, delta;
+ struct cpumask rect_cpus;
+
+ cpumask_clear(&rect_cpus);
+
+ /* First include the top and bottom edges */
+ cpu = r->ulhc_y * smp_width + r->ulhc_x;
+ delta = (r->height - 1) * smp_width;
+ for (x = 0; x < r->width; ++x, ++cpu) {
+ cpu_online_set(cpu, &rect_cpus);
+ cpu_online_set(cpu + delta, &rect_cpus);
+ }
+
+ /* Then the left and right edges */
+ cpu -= r->width;
+ delta = r->width - 1;
+ for (y = 0; y < r->height; ++y, cpu += smp_width) {
+ cpu_online_set(cpu, &rect_cpus);
+ cpu_online_set(cpu + delta, &rect_cpus);
+ }
+
+ /* Then tell all the cpus to set up their protection SPR */
+ on_each_cpu_mask(&rect_cpus, hardwall_setup_ipi_func, r, 1);
+}
+
+void __kprobes do_hardwall_trap(struct pt_regs* regs, int fault_num)
+{
+ struct hardwall_info *rect;
+ struct task_struct *p;
+ struct siginfo info;
+ int x, y;
+ int cpu = smp_processor_id();
+ int found_processes;
+ unsigned long flags;
+
+ struct pt_regs *old_regs = set_irq_regs(regs);
+ irq_enter();
+
+ /* This tile trapped a network access; find the rectangle. */
+ x = cpu % smp_width;
+ y = cpu / smp_width;
+ spin_lock_irqsave(&hardwall_lock, flags);
+ list_for_each_entry(rect, &rectangles, list) {
+ if (contains(rect, x, y))
+ break;
+ }
+
+ /*
+ * It shouldn't be possible not to find this cpu on the
+ * rectangle list, since only cpus in rectangles get hardwalled.
+ * The hardwall is only removed after the UDN is drained.
+ */
+ BUG_ON(&rect->list == &rectangles);
+
+ /*
+ * If we already started teardown on this hardwall, don't worry;
+ * the abort signal has been sent and we are just waiting for things
+ * to quiesce.
+ */
+ if (rect->teardown_in_progress) {
+ pr_notice("cpu %d: detected hardwall violation %#lx"
+ " while teardown already in progress\n",
+ cpu, (long) __insn_mfspr(SPR_UDN_DIRECTION_PROTECT));
+ goto done;
+ }
+
+ /*
+ * Kill off any process that is activated in this rectangle.
+ * We bypass security to deliver the signal, since it must be
+ * one of the activated processes that generated the UDN
+ * message that caused this trap, and all the activated
+ * processes shared a single open file so are pretty tightly
+ * bound together from a security point of view to begin with.
+ */
+ rect->teardown_in_progress = 1;
+ wmb(); /* Ensure visibility of rectangle before notifying processes. */
+ pr_notice("cpu %d: detected hardwall violation %#lx...\n",
+ cpu, (long) __insn_mfspr(SPR_UDN_DIRECTION_PROTECT));
+ info.si_signo = SIGILL;
+ info.si_errno = 0;
+ info.si_code = ILL_HARDWALL;
+ found_processes = 0;
+ list_for_each_entry(p, &rect->task_head, thread.hardwall_list) {
+ BUG_ON(p->thread.hardwall != rect);
+ if (!(p->flags & PF_EXITING)) {
+ found_processes = 1;
+ pr_notice("hardwall: killing %d\n", p->pid);
+ do_send_sig_info(info.si_signo, &info, p, false);
+ }
+ }
+ if (!found_processes)
+ pr_notice("hardwall: no associated processes!\n");
+
+ done:
+ spin_unlock_irqrestore(&hardwall_lock, flags);
+
+ /*
+ * We have to disable firewall interrupts now, or else when we
+ * return from this handler, we will simply re-interrupt back to
+ * it. However, we can't clear the protection bits, since we
+ * haven't yet drained the network, and that would allow packets
+ * to cross out of the hardwall region.
+ */
+ disable_firewall_interrupts();
+
+ irq_exit();
+ set_irq_regs(old_regs);
+}
+
+/* Allow access from user space to the UDN. */
+void grant_network_mpls(void)
+{
+ __insn_mtspr(SPR_MPL_UDN_ACCESS_SET_0, 1);
+ __insn_mtspr(SPR_MPL_UDN_AVAIL_SET_0, 1);
+ __insn_mtspr(SPR_MPL_UDN_COMPLETE_SET_0, 1);
+ __insn_mtspr(SPR_MPL_UDN_TIMER_SET_0, 1);
+#if !CHIP_HAS_REV1_XDN()
+ __insn_mtspr(SPR_MPL_UDN_REFILL_SET_0, 1);
+ __insn_mtspr(SPR_MPL_UDN_CA_SET_0, 1);
+#endif
+}
+
+/* Deny access from user space to the UDN. */
+void restrict_network_mpls(void)
+{
+ __insn_mtspr(SPR_MPL_UDN_ACCESS_SET_1, 1);
+ __insn_mtspr(SPR_MPL_UDN_AVAIL_SET_1, 1);
+ __insn_mtspr(SPR_MPL_UDN_COMPLETE_SET_1, 1);
+ __insn_mtspr(SPR_MPL_UDN_TIMER_SET_1, 1);
+#if !CHIP_HAS_REV1_XDN()
+ __insn_mtspr(SPR_MPL_UDN_REFILL_SET_1, 1);
+ __insn_mtspr(SPR_MPL_UDN_CA_SET_1, 1);
+#endif
+}
+
+
+/*
+ * Code to create, activate, deactivate, and destroy hardwall rectangles.
+ */
+
+/* Create a hardwall for the given rectangle */
+static struct hardwall_info *hardwall_create(
+ size_t size, const unsigned char __user *bits)
+{
+ struct hardwall_info *iter, *rect;
+ struct cpumask mask;
+ unsigned long flags;
+ int rc;
+
+ /* Reject crazy sizes out of hand, a la sys_mbind(). */
+ if (size > PAGE_SIZE)
+ return ERR_PTR(-EINVAL);
+
+ /* Copy whatever fits into a cpumask. */
+ if (copy_from_user(&mask, bits, min(sizeof(struct cpumask), size)))
+ return ERR_PTR(-EFAULT);
+
+ /*
+ * If the size was short, clear the rest of the mask;
+ * otherwise validate that the rest of the user mask was zero
+ * (we don't try hard to be efficient when validating huge masks).
+ */
+ if (size < sizeof(struct cpumask)) {
+ memset((char *)&mask + size, 0, sizeof(struct cpumask) - size);
+ } else if (size > sizeof(struct cpumask)) {
+ size_t i;
+ for (i = sizeof(struct cpumask); i < size; ++i) {
+ char c;
+ if (get_user(c, &bits[i]))
+ return ERR_PTR(-EFAULT);
+ if (c)
+ return ERR_PTR(-EINVAL);
+ }
+ }
+
+ /* Allocate a new rectangle optimistically. */
+ rect = kmalloc(sizeof(struct hardwall_info),
+ GFP_KERNEL | __GFP_ZERO);
+ if (rect == NULL)
+ return ERR_PTR(-ENOMEM);
+ INIT_LIST_HEAD(&rect->task_head);
+
+ /* Compute the rectangle size and validate that it's plausible. */
+ rc = setup_rectangle(rect, &mask);
+ if (rc != 0) {
+ kfree(rect);
+ return ERR_PTR(rc);
+ }
+
+ /* Confirm it doesn't overlap and add it to the list. */
+ spin_lock_irqsave(&hardwall_lock, flags);
+ list_for_each_entry(iter, &rectangles, list) {
+ if (overlaps(iter, rect)) {
+ spin_unlock_irqrestore(&hardwall_lock, flags);
+ kfree(rect);
+ return ERR_PTR(-EBUSY);
+ }
+ }
+ list_add_tail(&rect->list, &rectangles);
+ spin_unlock_irqrestore(&hardwall_lock, flags);
+
+ /* Set up appropriate hardwalling on all affected cpus. */
+ hardwall_setup(rect);
+
+ /* Create a /proc/tile/hardwall entry. */
+ hardwall_add_proc(rect);
+
+ return rect;
+}
+
+/* Activate a given hardwall on this cpu for this process. */
+static int hardwall_activate(struct hardwall_info *rect)
+{
+ int cpu, x, y;
+ unsigned long flags;
+ struct task_struct *p = current;
+ struct thread_struct *ts = &p->thread;
+
+ /* Require a rectangle. */
+ if (rect == NULL)
+ return -ENODATA;
+
+ /* Not allowed to activate a rectangle that is being torn down. */
+ if (rect->teardown_in_progress)
+ return -EINVAL;
+
+ /*
+ * Get our affinity; if we're not bound to this tile uniquely,
+ * we can't access the network registers.
+ */
+ if (cpumask_weight(&p->cpus_allowed) != 1)
+ return -EPERM;
+
+ /* Make sure we are bound to a cpu in this rectangle. */
+ cpu = smp_processor_id();
+ BUG_ON(cpumask_first(&p->cpus_allowed) != cpu);
+ x = cpu_x(cpu);
+ y = cpu_y(cpu);
+ if (!contains(rect, x, y))
+ return -EINVAL;
+
+ /* If we are already bound to this hardwall, it's a no-op. */
+ if (ts->hardwall) {
+ BUG_ON(ts->hardwall != rect);
+ return 0;
+ }
+
+ /* Success! This process gets to use the user networks on this cpu. */
+ ts->hardwall = rect;
+ spin_lock_irqsave(&hardwall_lock, flags);
+ list_add(&ts->hardwall_list, &rect->task_head);
+ spin_unlock_irqrestore(&hardwall_lock, flags);
+ grant_network_mpls();
+ printk(KERN_DEBUG "Pid %d (%s) activated for hardwall: cpu %d\n",
+ p->pid, p->comm, cpu);
+ return 0;
+}
+
+/*
+ * Deactivate a task's hardwall. Must hold hardwall_lock.
+ * This method may be called from free_task(), so we don't want to
+ * rely on too many fields of struct task_struct still being valid.
+ * We assume the cpus_allowed, pid, and comm fields are still valid.
+ */
+static void _hardwall_deactivate(struct task_struct *task)
+{
+ struct thread_struct *ts = &task->thread;
+
+ if (cpumask_weight(&task->cpus_allowed) != 1) {
+ pr_err("pid %d (%s) releasing networks with"
+ " an affinity mask containing %d cpus!\n",
+ task->pid, task->comm,
+ cpumask_weight(&task->cpus_allowed));
+ BUG();
+ }
+
+ BUG_ON(ts->hardwall == NULL);
+ ts->hardwall = NULL;
+ list_del(&ts->hardwall_list);
+ if (task == current)
+ restrict_network_mpls();
+}
+
+/* Deactivate a task's hardwall. */
+int hardwall_deactivate(struct task_struct *task)
+{
+ unsigned long flags;
+ int activated;
+
+ spin_lock_irqsave(&hardwall_lock, flags);
+ activated = (task->thread.hardwall != NULL);
+ if (activated)
+ _hardwall_deactivate(task);
+ spin_unlock_irqrestore(&hardwall_lock, flags);
+
+ if (!activated)
+ return -EINVAL;
+
+ printk(KERN_DEBUG "Pid %d (%s) deactivated for hardwall: cpu %d\n",
+ task->pid, task->comm, smp_processor_id());
+ return 0;
+}
+
+/* Stop a UDN switch before draining the network. */
+static void stop_udn_switch(void *ignored)
+{
+#if !CHIP_HAS_REV1_XDN()
+ /* Freeze the switch and the demux. */
+ __insn_mtspr(SPR_UDN_SP_FREEZE,
+ SPR_UDN_SP_FREEZE__SP_FRZ_MASK |
+ SPR_UDN_SP_FREEZE__DEMUX_FRZ_MASK |
+ SPR_UDN_SP_FREEZE__NON_DEST_EXT_MASK);
+#endif
+}
+
+/* Drain all the state from a stopped switch. */
+static void drain_udn_switch(void *ignored)
+{
+#if !CHIP_HAS_REV1_XDN()
+ int i;
+ int from_tile_words, ca_count;
+
+ /* Empty out the 5 switch point fifos. */
+ for (i = 0; i < 5; i++) {
+ int words, j;
+ __insn_mtspr(SPR_UDN_SP_FIFO_SEL, i);
+ words = __insn_mfspr(SPR_UDN_SP_STATE) & 0xF;
+ for (j = 0; j < words; j++)
+ (void) __insn_mfspr(SPR_UDN_SP_FIFO_DATA);
+ BUG_ON((__insn_mfspr(SPR_UDN_SP_STATE) & 0xF) != 0);
+ }
+
+ /* Dump out the 3 word fifo at top. */
+ from_tile_words = (__insn_mfspr(SPR_UDN_DEMUX_STATUS) >> 10) & 0x3;
+ for (i = 0; i < from_tile_words; i++)
+ (void) __insn_mfspr(SPR_UDN_DEMUX_WRITE_FIFO);
+
+ /* Empty out demuxes. */
+ while (__insn_mfspr(SPR_UDN_DATA_AVAIL) & (1 << 0))
+ (void) __tile_udn0_receive();
+ while (__insn_mfspr(SPR_UDN_DATA_AVAIL) & (1 << 1))
+ (void) __tile_udn1_receive();
+ while (__insn_mfspr(SPR_UDN_DATA_AVAIL) & (1 << 2))
+ (void) __tile_udn2_receive();
+ while (__insn_mfspr(SPR_UDN_DATA_AVAIL) & (1 << 3))
+ (void) __tile_udn3_receive();
+ BUG_ON((__insn_mfspr(SPR_UDN_DATA_AVAIL) & 0xF) != 0);
+
+ /* Empty out catch all. */
+ ca_count = __insn_mfspr(SPR_UDN_DEMUX_CA_COUNT);
+ for (i = 0; i < ca_count; i++)
+ (void) __insn_mfspr(SPR_UDN_CA_DATA);
+ BUG_ON(__insn_mfspr(SPR_UDN_DEMUX_CA_COUNT) != 0);
+
+ /* Clear demux logic. */
+ __insn_mtspr(SPR_UDN_DEMUX_CTL, 1);
+
+ /*
+ * Write switch state; experimentation indicates that 0xc3000
+ * is an idle switch point.
+ */
+ for (i = 0; i < 5; i++) {
+ __insn_mtspr(SPR_UDN_SP_FIFO_SEL, i);
+ __insn_mtspr(SPR_UDN_SP_STATE, 0xc3000);
+ }
+#endif
+}
+
+/* Reset random UDN state registers at boot up and during hardwall teardown. */
+void reset_network_state(void)
+{
+#if !CHIP_HAS_REV1_XDN()
+ /* Reset UDN coordinates to their standard value */
+ unsigned int cpu = smp_processor_id();
+ unsigned int x = cpu % smp_width;
+ unsigned int y = cpu / smp_width;
+#endif
+
+ if (udn_disabled)
+ return;
+
+#if !CHIP_HAS_REV1_XDN()
+ __insn_mtspr(SPR_UDN_TILE_COORD, (x << 18) | (y << 7));
+
+ /* Set demux tags to predefined values and enable them. */
+ __insn_mtspr(SPR_UDN_TAG_VALID, 0xf);
+ __insn_mtspr(SPR_UDN_TAG_0, (1 << 0));
+ __insn_mtspr(SPR_UDN_TAG_1, (1 << 1));
+ __insn_mtspr(SPR_UDN_TAG_2, (1 << 2));
+ __insn_mtspr(SPR_UDN_TAG_3, (1 << 3));
+#endif
+
+ /* Clear out other random registers so we have a clean slate. */
+ __insn_mtspr(SPR_UDN_AVAIL_EN, 0);
+ __insn_mtspr(SPR_UDN_DEADLOCK_TIMEOUT, 0);
+#if !CHIP_HAS_REV1_XDN()
+ __insn_mtspr(SPR_UDN_REFILL_EN, 0);
+ __insn_mtspr(SPR_UDN_DEMUX_QUEUE_SEL, 0);
+ __insn_mtspr(SPR_UDN_SP_FIFO_SEL, 0);
+#endif
+
+ /* Start the switch and demux. */
+#if !CHIP_HAS_REV1_XDN()
+ __insn_mtspr(SPR_UDN_SP_FREEZE, 0);
+#endif
+}
+
+/* Restart a UDN switch after draining. */
+static void restart_udn_switch(void *ignored)
+{
+ reset_network_state();
+
+ /* Disable firewall interrupts. */
+ __insn_mtspr(SPR_UDN_DIRECTION_PROTECT, 0);
+ disable_firewall_interrupts();
+}
+
+/* Build a struct cpumask containing all valid tiles in bounding rectangle. */
+static void fill_mask(struct hardwall_info *r, struct cpumask *result)
+{
+ int x, y, cpu;
+
+ cpumask_clear(result);
+
+ cpu = r->ulhc_y * smp_width + r->ulhc_x;
+ for (y = 0; y < r->height; ++y, cpu += smp_width - r->width) {
+ for (x = 0; x < r->width; ++x, ++cpu)
+ cpu_online_set(cpu, result);
+ }
+}
+
+/* Last reference to a hardwall is gone, so clear the network. */
+static void hardwall_destroy(struct hardwall_info *rect)
+{
+ struct task_struct *task;
+ unsigned long flags;
+ struct cpumask mask;
+
+ /* Make sure this file actually represents a rectangle. */
+ if (rect == NULL)
+ return;
+
+ /*
+ * Deactivate any remaining tasks. It's possible to race with
+ * some other thread that is exiting and hasn't yet called
+ * deactivate (when freeing its thread_info), so we carefully
+ * deactivate any remaining tasks before freeing the
+ * hardwall_info object itself.
+ */
+ spin_lock_irqsave(&hardwall_lock, flags);
+ list_for_each_entry(task, &rect->task_head, thread.hardwall_list)
+ _hardwall_deactivate(task);
+ spin_unlock_irqrestore(&hardwall_lock, flags);
+
+ /* Drain the UDN. */
+ printk(KERN_DEBUG "Clearing hardwall rectangle %dx%d %d,%d\n",
+ rect->width, rect->height, rect->ulhc_x, rect->ulhc_y);
+ fill_mask(rect, &mask);
+ on_each_cpu_mask(&mask, stop_udn_switch, NULL, 1);
+ on_each_cpu_mask(&mask, drain_udn_switch, NULL, 1);
+
+ /* Restart switch and disable firewall. */
+ on_each_cpu_mask(&mask, restart_udn_switch, NULL, 1);
+
+ /* Remove the /proc/tile/hardwall entry. */
+ hardwall_remove_proc(rect);
+
+ /* Now free the rectangle from the list. */
+ spin_lock_irqsave(&hardwall_lock, flags);
+ BUG_ON(!list_empty(&rect->task_head));
+ list_del(&rect->list);
+ spin_unlock_irqrestore(&hardwall_lock, flags);
+ kfree(rect);
+}
+
+
+static int hardwall_proc_show(struct seq_file *sf, void *v)
+{
+ struct hardwall_info *rect = sf->private;
+ char buf[256];
+
+ int rc = cpulist_scnprintf(buf, sizeof(buf), &rect->cpumask);
+ buf[rc++] = '\n';
+ seq_write(sf, buf, rc);
+ return 0;
+}
+
+static int hardwall_proc_open(struct inode *inode,
+ struct file *file)
+{
+ return single_open(file, hardwall_proc_show, PDE(inode)->data);
+}
+
+static const struct file_operations hardwall_proc_fops = {
+ .open = hardwall_proc_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static void hardwall_add_proc(struct hardwall_info *rect)
+{
+ char buf[64];
+ snprintf(buf, sizeof(buf), "%d", rect->id);
+ proc_create_data(buf, 0444, hardwall_proc_dir,
+ &hardwall_proc_fops, rect);
+}
+
+static void hardwall_remove_proc(struct hardwall_info *rect)
+{
+ char buf[64];
+ snprintf(buf, sizeof(buf), "%d", rect->id);
+ remove_proc_entry(buf, hardwall_proc_dir);
+}
+
+int proc_pid_hardwall(struct task_struct *task, char *buffer)
+{
+ struct hardwall_info *rect = task->thread.hardwall;
+ return rect ? sprintf(buffer, "%d\n", rect->id) : 0;
+}
+
+void proc_tile_hardwall_init(struct proc_dir_entry *root)
+{
+ if (!udn_disabled)
+ hardwall_proc_dir = proc_mkdir("hardwall", root);
+}
+
+
+/*
+ * Character device support via ioctl/close.
+ */
+
+static long hardwall_ioctl(struct file *file, unsigned int a, unsigned long b)
+{
+ struct hardwall_info *rect = file->private_data;
+
+ if (_IOC_TYPE(a) != HARDWALL_IOCTL_BASE)
+ return -EINVAL;
+
+ switch (_IOC_NR(a)) {
+ case _HARDWALL_CREATE:
+ if (udn_disabled)
+ return -ENOSYS;
+ if (rect != NULL)
+ return -EALREADY;
+ rect = hardwall_create(_IOC_SIZE(a),
+ (const unsigned char __user *)b);
+ if (IS_ERR(rect))
+ return PTR_ERR(rect);
+ file->private_data = rect;
+ return 0;
+
+ case _HARDWALL_ACTIVATE:
+ return hardwall_activate(rect);
+
+ case _HARDWALL_DEACTIVATE:
+ if (current->thread.hardwall != rect)
+ return -EINVAL;
+ return hardwall_deactivate(current);
+
+ case _HARDWALL_GET_ID:
+ return rect ? rect->id : -EINVAL;
+
+ default:
+ return -EINVAL;
+ }
+}
+
+#ifdef CONFIG_COMPAT
+static long hardwall_compat_ioctl(struct file *file,
+ unsigned int a, unsigned long b)
+{
+ /* Sign-extend the argument so it can be used as a pointer. */
+ return hardwall_ioctl(file, a, (unsigned long)compat_ptr(b));
+}
+#endif
+
+/* The user process closed the file; revoke access to user networks. */
+static int hardwall_flush(struct file *file, fl_owner_t owner)
+{
+ struct hardwall_info *rect = file->private_data;
+ struct task_struct *task, *tmp;
+ unsigned long flags;
+
+ if (rect) {
+ /*
+ * NOTE: if multiple threads are activated on this hardwall
+ * file, the other threads will continue having access to the
+ * UDN until they are context-switched out and back in again.
+ *
+ * NOTE: A NULL files pointer means the task is being torn
+ * down, so in that case we also deactivate it.
+ */
+ spin_lock_irqsave(&hardwall_lock, flags);
+ list_for_each_entry_safe(task, tmp, &rect->task_head,
+ thread.hardwall_list) {
+ if (task->files == owner || task->files == NULL)
+ _hardwall_deactivate(task);
+ }
+ spin_unlock_irqrestore(&hardwall_lock, flags);
+ }
+
+ return 0;
+}
+
+/* This hardwall is gone, so destroy it. */
+static int hardwall_release(struct inode *inode, struct file *file)
+{
+ hardwall_destroy(file->private_data);
+ return 0;
+}
+
+static const struct file_operations dev_hardwall_fops = {
+ .open = nonseekable_open,
+ .unlocked_ioctl = hardwall_ioctl,
+#ifdef CONFIG_COMPAT
+ .compat_ioctl = hardwall_compat_ioctl,
+#endif
+ .flush = hardwall_flush,
+ .release = hardwall_release,
+};
+
+static struct cdev hardwall_dev;
+
+static int __init dev_hardwall_init(void)
+{
+ int rc;
+ dev_t dev;
+
+ rc = alloc_chrdev_region(&dev, 0, 1, "hardwall");
+ if (rc < 0)
+ return rc;
+ cdev_init(&hardwall_dev, &dev_hardwall_fops);
+ rc = cdev_add(&hardwall_dev, dev, 1);
+ if (rc < 0)
+ return rc;
+
+ return 0;
+}
+late_initcall(dev_hardwall_init);
diff --git a/arch/tile/kernel/head_32.S b/arch/tile/kernel/head_32.S
new file mode 100644
index 00000000..1a39b7c1
--- /dev/null
+++ b/arch/tile/kernel/head_32.S
@@ -0,0 +1,184 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ *
+ * TILE startup code.
+ */
+
+#include <linux/linkage.h>
+#include <linux/init.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/thread_info.h>
+#include <asm/processor.h>
+#include <asm/asm-offsets.h>
+#include <hv/hypervisor.h>
+#include <arch/chip.h>
+#include <arch/spr_def.h>
+
+/*
+ * This module contains the entry code for kernel images. It performs the
+ * minimal setup needed to call the generic C routines.
+ */
+
+ __HEAD
+ENTRY(_start)
+ /* Notify the hypervisor of what version of the API we want */
+ {
+ movei r1, TILE_CHIP
+ movei r2, TILE_CHIP_REV
+ }
+ {
+ moveli r0, _HV_VERSION
+ jal hv_init
+ }
+ /* Get a reasonable default ASID in r0 */
+ {
+ move r0, zero
+ jal hv_inquire_asid
+ }
+ /* Install the default page table */
+ {
+ moveli r6, lo16(swapper_pgprot - PAGE_OFFSET)
+ move r4, r0 /* use starting ASID of range for this page table */
+ }
+ {
+ moveli r0, lo16(swapper_pg_dir - PAGE_OFFSET)
+ auli r6, r6, ha16(swapper_pgprot - PAGE_OFFSET)
+ }
+ {
+ lw r2, r6
+ addi r6, r6, 4
+ }
+ {
+ lw r3, r6
+ auli r0, r0, ha16(swapper_pg_dir - PAGE_OFFSET)
+ }
+ {
+ inv r6
+ move r1, zero /* high 32 bits of CPA is zero */
+ }
+ {
+ moveli lr, lo16(1f)
+ move r5, zero
+ }
+ {
+ auli lr, lr, ha16(1f)
+ j hv_install_context
+ }
+1:
+
+ /* Get our processor number and save it away in SAVE_K_0. */
+ jal hv_inquire_topology
+ mulll_uu r4, r1, r2 /* r1 == y, r2 == width */
+ add r4, r4, r0 /* r0 == x, so r4 == cpu == y*width + x */
+
+#ifdef CONFIG_SMP
+ /*
+ * Load up our per-cpu offset. When the first (master) tile
+ * boots, this value is still zero, so we will load boot_pc
+ * with start_kernel, and boot_sp with init_stack + THREAD_SIZE.
+ * The master tile initializes the per-cpu offset array, so that
+ * when subsequent (secondary) tiles boot, they will instead load
+ * from their per-cpu versions of boot_sp and boot_pc.
+ */
+ moveli r5, lo16(__per_cpu_offset)
+ auli r5, r5, ha16(__per_cpu_offset)
+ s2a r5, r4, r5
+ lw r5, r5
+ bnz r5, 1f
+
+ /*
+ * Save the width and height to the smp_topology variable
+ * for later use.
+ */
+ moveli r0, lo16(smp_topology + HV_TOPOLOGY_WIDTH_OFFSET)
+ auli r0, r0, ha16(smp_topology + HV_TOPOLOGY_WIDTH_OFFSET)
+ {
+ sw r0, r2
+ addi r0, r0, (HV_TOPOLOGY_HEIGHT_OFFSET - HV_TOPOLOGY_WIDTH_OFFSET)
+ }
+ sw r0, r3
+1:
+#else
+ move r5, zero
+#endif
+
+ /* Load and go with the correct pc and sp. */
+ {
+ addli r1, r5, lo16(boot_sp)
+ addli r0, r5, lo16(boot_pc)
+ }
+ {
+ auli r1, r1, ha16(boot_sp)
+ auli r0, r0, ha16(boot_pc)
+ }
+ lw r0, r0
+ lw sp, r1
+ or r4, sp, r4
+ mtspr SPR_SYSTEM_SAVE_K_0, r4 /* save ksp0 + cpu */
+ addi sp, sp, -STACK_TOP_DELTA
+ {
+ move lr, zero /* stop backtraces in the called function */
+ jr r0
+ }
+ ENDPROC(_start)
+
+__PAGE_ALIGNED_BSS
+ .align PAGE_SIZE
+ENTRY(empty_zero_page)
+ .fill PAGE_SIZE,1,0
+ END(empty_zero_page)
+
+ .macro PTE va, cpa, bits1, no_org=0
+ .ifeq \no_org
+ .org swapper_pg_dir + HV_L1_INDEX(\va) * HV_PTE_SIZE
+ .endif
+ .word HV_PTE_PAGE | HV_PTE_DIRTY | HV_PTE_PRESENT | HV_PTE_ACCESSED | \
+ (HV_PTE_MODE_CACHE_NO_L3 << HV_PTE_INDEX_MODE)
+ .word (\bits1) | (HV_CPA_TO_PFN(\cpa) << (HV_PTE_INDEX_PFN - 32))
+ .endm
+
+__PAGE_ALIGNED_DATA
+ .align PAGE_SIZE
+ENTRY(swapper_pg_dir)
+ /*
+ * All data pages from PAGE_OFFSET to MEM_USER_INTRPT are mapped as
+ * VA = PA + PAGE_OFFSET. We remap things with more precise access
+ * permissions and more respect for size of RAM later.
+ */
+ .set addr, 0
+ .rept (MEM_USER_INTRPT - PAGE_OFFSET) >> PGDIR_SHIFT
+ PTE addr + PAGE_OFFSET, addr, (1 << (HV_PTE_INDEX_READABLE - 32)) | \
+ (1 << (HV_PTE_INDEX_WRITABLE - 32))
+ .set addr, addr + PGDIR_SIZE
+ .endr
+
+ /* The true text VAs are mapped as VA = PA + MEM_SV_INTRPT */
+ PTE MEM_SV_INTRPT, 0, (1 << (HV_PTE_INDEX_READABLE - 32)) | \
+ (1 << (HV_PTE_INDEX_EXECUTABLE - 32))
+ .org swapper_pg_dir + HV_L1_SIZE
+ END(swapper_pg_dir)
+
+ /*
+ * Isolate swapper_pgprot to its own cache line, since each cpu
+ * starting up will read it using VA-is-PA and local homing.
+ * This would otherwise likely conflict with other data on the cache
+ * line, once we have set its permanent home in the page tables.
+ */
+ __INITDATA
+ .align CHIP_L2_LINE_SIZE()
+ENTRY(swapper_pgprot)
+ PTE 0, 0, (1 << (HV_PTE_INDEX_READABLE - 32)) | \
+ (1 << (HV_PTE_INDEX_WRITABLE - 32)), 1
+ .align CHIP_L2_LINE_SIZE()
+ END(swapper_pgprot)
diff --git a/arch/tile/kernel/head_64.S b/arch/tile/kernel/head_64.S
new file mode 100644
index 00000000..6bc3a932
--- /dev/null
+++ b/arch/tile/kernel/head_64.S
@@ -0,0 +1,269 @@
+/*
+ * Copyright 2011 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ *
+ * TILE startup code.
+ */
+
+#include <linux/linkage.h>
+#include <linux/init.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/thread_info.h>
+#include <asm/processor.h>
+#include <asm/asm-offsets.h>
+#include <hv/hypervisor.h>
+#include <arch/chip.h>
+#include <arch/spr_def.h>
+
+/*
+ * This module contains the entry code for kernel images. It performs the
+ * minimal setup needed to call the generic C routines.
+ */
+
+ __HEAD
+ENTRY(_start)
+ /* Notify the hypervisor of what version of the API we want */
+ {
+ movei r1, TILE_CHIP
+ movei r2, TILE_CHIP_REV
+ }
+ {
+ moveli r0, _HV_VERSION
+ jal hv_init
+ }
+ /* Get a reasonable default ASID in r0 */
+ {
+ move r0, zero
+ jal hv_inquire_asid
+ }
+
+ /*
+ * Install the default page table. The relocation required to
+ * statically define the table is a bit too complex, so we have
+ * to plug in the pointer from the L0 to the L1 table by hand.
+ * We only do this on the first cpu to boot, though, since the
+ * other CPUs should see a properly-constructed page table.
+ */
+ {
+ v4int_l r2, zero, r0 /* ASID for hv_install_context */
+ moveli r4, hw1_last(swapper_pgprot - PAGE_OFFSET)
+ }
+ {
+ shl16insli r4, r4, hw0(swapper_pgprot - PAGE_OFFSET)
+ }
+ {
+ ld r1, r4 /* access_pte for hv_install_context */
+ }
+ {
+ moveli r0, hw1_last(.Lsv_data_pmd - PAGE_OFFSET)
+ moveli r6, hw1_last(temp_data_pmd - PAGE_OFFSET)
+ }
+ {
+ /* After initializing swapper_pgprot, HV_PTE_GLOBAL is set. */
+ bfextu r7, r1, HV_PTE_INDEX_GLOBAL, HV_PTE_INDEX_GLOBAL
+ inv r4
+ }
+ bnez r7, .Lno_write
+ {
+ shl16insli r0, r0, hw0(.Lsv_data_pmd - PAGE_OFFSET)
+ shl16insli r6, r6, hw0(temp_data_pmd - PAGE_OFFSET)
+ }
+ {
+ /* Cut off the low bits of the PT address. */
+ shrui r6, r6, HV_LOG2_PAGE_TABLE_ALIGN
+ /* Start with our access pte. */
+ move r5, r1
+ }
+ {
+ /* Stuff the address into the page table pointer slot of the PTE. */
+ bfins r5, r6, HV_PTE_INDEX_PTFN, \
+ HV_PTE_INDEX_PTFN + HV_PTE_PTFN_BITS - 1
+ }
+ {
+ /* Store the L0 data PTE. */
+ st r0, r5
+ addli r6, r6, (temp_code_pmd - temp_data_pmd) >> \
+ HV_LOG2_PAGE_TABLE_ALIGN
+ }
+ {
+ addli r0, r0, .Lsv_code_pmd - .Lsv_data_pmd
+ bfins r5, r6, HV_PTE_INDEX_PTFN, \
+ HV_PTE_INDEX_PTFN + HV_PTE_PTFN_BITS - 1
+ }
+ /* Store the L0 code PTE. */
+ st r0, r5
+
+.Lno_write:
+ moveli lr, hw2_last(1f)
+ {
+ shl16insli lr, lr, hw1(1f)
+ moveli r0, hw1_last(swapper_pg_dir - PAGE_OFFSET)
+ }
+ {
+ shl16insli lr, lr, hw0(1f)
+ shl16insli r0, r0, hw0(swapper_pg_dir - PAGE_OFFSET)
+ }
+ {
+ move r3, zero
+ j hv_install_context
+ }
+1:
+
+ /* Install the interrupt base. */
+ moveli r0, hw2_last(MEM_SV_START)
+ shl16insli r0, r0, hw1(MEM_SV_START)
+ shl16insli r0, r0, hw0(MEM_SV_START)
+ mtspr SPR_INTERRUPT_VECTOR_BASE_K, r0
+
+ /*
+ * Get our processor number and save it away in SAVE_K_0.
+ * Extract stuff from the topology structure: r4 = y, r6 = x,
+ * r5 = width. FIXME: consider whether we want to just make these
+ * 64-bit values (and if so fix smp_topology write below, too).
+ */
+ jal hv_inquire_topology
+ {
+ v4int_l r5, zero, r1 /* r5 = width */
+ shrui r4, r0, 32 /* r4 = y */
+ }
+ {
+ v4int_l r6, zero, r0 /* r6 = x */
+ mul_lu_lu r4, r4, r5
+ }
+ {
+ add r4, r4, r6 /* r4 == cpu == y*width + x */
+ }
+
+#ifdef CONFIG_SMP
+ /*
+ * Load up our per-cpu offset. When the first (master) tile
+ * boots, this value is still zero, so we will load boot_pc
+ * with start_kernel, and boot_sp with init_stack + THREAD_SIZE.
+ * The master tile initializes the per-cpu offset array, so that
+ * when subsequent (secondary) tiles boot, they will instead load
+ * from their per-cpu versions of boot_sp and boot_pc.
+ */
+ moveli r5, hw2_last(__per_cpu_offset)
+ shl16insli r5, r5, hw1(__per_cpu_offset)
+ shl16insli r5, r5, hw0(__per_cpu_offset)
+ shl3add r5, r4, r5
+ ld r5, r5
+ bnez r5, 1f
+
+ /*
+ * Save the width and height to the smp_topology variable
+ * for later use.
+ */
+ moveli r0, hw2_last(smp_topology + HV_TOPOLOGY_WIDTH_OFFSET)
+ shl16insli r0, r0, hw1(smp_topology + HV_TOPOLOGY_WIDTH_OFFSET)
+ shl16insli r0, r0, hw0(smp_topology + HV_TOPOLOGY_WIDTH_OFFSET)
+ st r0, r1
+1:
+#else
+ move r5, zero
+#endif
+
+ /* Load and go with the correct pc and sp. */
+ {
+ moveli r1, hw2_last(boot_sp)
+ moveli r0, hw2_last(boot_pc)
+ }
+ {
+ shl16insli r1, r1, hw1(boot_sp)
+ shl16insli r0, r0, hw1(boot_pc)
+ }
+ {
+ shl16insli r1, r1, hw0(boot_sp)
+ shl16insli r0, r0, hw0(boot_pc)
+ }
+ {
+ add r1, r1, r5
+ add r0, r0, r5
+ }
+ ld r0, r0
+ ld sp, r1
+ or r4, sp, r4
+ mtspr SPR_SYSTEM_SAVE_K_0, r4 /* save ksp0 + cpu */
+ addi sp, sp, -STACK_TOP_DELTA
+ {
+ move lr, zero /* stop backtraces in the called function */
+ jr r0
+ }
+ ENDPROC(_start)
+
+__PAGE_ALIGNED_BSS
+ .align PAGE_SIZE
+ENTRY(empty_zero_page)
+ .fill PAGE_SIZE,1,0
+ END(empty_zero_page)
+
+ .macro PTE cpa, bits1
+ .quad HV_PTE_PAGE | HV_PTE_DIRTY | HV_PTE_PRESENT | HV_PTE_ACCESSED |\
+ HV_PTE_GLOBAL | (HV_PTE_MODE_CACHE_NO_L3 << HV_PTE_INDEX_MODE) |\
+ (\bits1) | (HV_CPA_TO_PFN(\cpa) << HV_PTE_INDEX_PFN)
+ .endm
+
+__PAGE_ALIGNED_DATA
+ .align PAGE_SIZE
+ENTRY(swapper_pg_dir)
+ .org swapper_pg_dir + HV_L0_INDEX(PAGE_OFFSET) * HV_PTE_SIZE
+.Lsv_data_pmd:
+ .quad 0 /* PTE temp_data_pmd - PAGE_OFFSET, 0 */
+ .org swapper_pg_dir + HV_L0_INDEX(MEM_SV_START) * HV_PTE_SIZE
+.Lsv_code_pmd:
+ .quad 0 /* PTE temp_code_pmd - PAGE_OFFSET, 0 */
+ .org swapper_pg_dir + HV_L0_SIZE
+ END(swapper_pg_dir)
+
+ .align HV_PAGE_TABLE_ALIGN
+ENTRY(temp_data_pmd)
+ /*
+ * We fill the PAGE_OFFSET pmd with huge pages with
+ * VA = PA + PAGE_OFFSET. We remap things with more precise access
+ * permissions later.
+ */
+ .set addr, 0
+ .rept HV_L1_ENTRIES
+ PTE addr, HV_PTE_READABLE | HV_PTE_WRITABLE
+ .set addr, addr + HV_PAGE_SIZE_LARGE
+ .endr
+ .org temp_data_pmd + HV_L1_SIZE
+ END(temp_data_pmd)
+
+ .align HV_PAGE_TABLE_ALIGN
+ENTRY(temp_code_pmd)
+ /*
+ * We fill the MEM_SV_START pmd with huge pages with
+ * VA = PA + PAGE_OFFSET. We remap things with more precise access
+ * permissions later.
+ */
+ .set addr, 0
+ .rept HV_L1_ENTRIES
+ PTE addr, HV_PTE_READABLE | HV_PTE_EXECUTABLE
+ .set addr, addr + HV_PAGE_SIZE_LARGE
+ .endr
+ .org temp_code_pmd + HV_L1_SIZE
+ END(temp_code_pmd)
+
+ /*
+ * Isolate swapper_pgprot to its own cache line, since each cpu
+ * starting up will read it using VA-is-PA and local homing.
+ * This would otherwise likely conflict with other data on the cache
+ * line, once we have set its permanent home in the page tables.
+ */
+ __INITDATA
+ .align CHIP_L2_LINE_SIZE()
+ENTRY(swapper_pgprot)
+ .quad HV_PTE_PRESENT | (HV_PTE_MODE_CACHE_NO_L3 << HV_PTE_INDEX_MODE)
+ .align CHIP_L2_LINE_SIZE()
+ END(swapper_pgprot)
diff --git a/arch/tile/kernel/hvglue.lds b/arch/tile/kernel/hvglue.lds
new file mode 100644
index 00000000..2b7cd0a6
--- /dev/null
+++ b/arch/tile/kernel/hvglue.lds
@@ -0,0 +1,58 @@
+/* Hypervisor call vector addresses; see <hv/hypervisor.h> */
+hv_init = TEXT_OFFSET + 0x10020;
+hv_install_context = TEXT_OFFSET + 0x10040;
+hv_sysconf = TEXT_OFFSET + 0x10060;
+hv_get_rtc = TEXT_OFFSET + 0x10080;
+hv_set_rtc = TEXT_OFFSET + 0x100a0;
+hv_flush_asid = TEXT_OFFSET + 0x100c0;
+hv_flush_page = TEXT_OFFSET + 0x100e0;
+hv_flush_pages = TEXT_OFFSET + 0x10100;
+hv_restart = TEXT_OFFSET + 0x10120;
+hv_halt = TEXT_OFFSET + 0x10140;
+hv_power_off = TEXT_OFFSET + 0x10160;
+hv_inquire_physical = TEXT_OFFSET + 0x10180;
+hv_inquire_memory_controller = TEXT_OFFSET + 0x101a0;
+hv_inquire_virtual = TEXT_OFFSET + 0x101c0;
+hv_inquire_asid = TEXT_OFFSET + 0x101e0;
+hv_nanosleep = TEXT_OFFSET + 0x10200;
+hv_console_read_if_ready = TEXT_OFFSET + 0x10220;
+hv_console_write = TEXT_OFFSET + 0x10240;
+hv_downcall_dispatch = TEXT_OFFSET + 0x10260;
+hv_inquire_topology = TEXT_OFFSET + 0x10280;
+hv_fs_findfile = TEXT_OFFSET + 0x102a0;
+hv_fs_fstat = TEXT_OFFSET + 0x102c0;
+hv_fs_pread = TEXT_OFFSET + 0x102e0;
+hv_physaddr_read64 = TEXT_OFFSET + 0x10300;
+hv_physaddr_write64 = TEXT_OFFSET + 0x10320;
+hv_get_command_line = TEXT_OFFSET + 0x10340;
+hv_set_caching = TEXT_OFFSET + 0x10360;
+hv_bzero_page = TEXT_OFFSET + 0x10380;
+hv_register_message_state = TEXT_OFFSET + 0x103a0;
+hv_send_message = TEXT_OFFSET + 0x103c0;
+hv_receive_message = TEXT_OFFSET + 0x103e0;
+hv_inquire_context = TEXT_OFFSET + 0x10400;
+hv_start_all_tiles = TEXT_OFFSET + 0x10420;
+hv_dev_open = TEXT_OFFSET + 0x10440;
+hv_dev_close = TEXT_OFFSET + 0x10460;
+hv_dev_pread = TEXT_OFFSET + 0x10480;
+hv_dev_pwrite = TEXT_OFFSET + 0x104a0;
+hv_dev_poll = TEXT_OFFSET + 0x104c0;
+hv_dev_poll_cancel = TEXT_OFFSET + 0x104e0;
+hv_dev_preada = TEXT_OFFSET + 0x10500;
+hv_dev_pwritea = TEXT_OFFSET + 0x10520;
+hv_flush_remote = TEXT_OFFSET + 0x10540;
+hv_console_putc = TEXT_OFFSET + 0x10560;
+hv_inquire_tiles = TEXT_OFFSET + 0x10580;
+hv_confstr = TEXT_OFFSET + 0x105a0;
+hv_reexec = TEXT_OFFSET + 0x105c0;
+hv_set_command_line = TEXT_OFFSET + 0x105e0;
+hv_clear_intr = TEXT_OFFSET + 0x10600;
+hv_enable_intr = TEXT_OFFSET + 0x10620;
+hv_disable_intr = TEXT_OFFSET + 0x10640;
+hv_raise_intr = TEXT_OFFSET + 0x10660;
+hv_trigger_ipi = TEXT_OFFSET + 0x10680;
+hv_store_mapping = TEXT_OFFSET + 0x106a0;
+hv_inquire_realpa = TEXT_OFFSET + 0x106c0;
+hv_flush_all = TEXT_OFFSET + 0x106e0;
+hv_get_ipi_pte = TEXT_OFFSET + 0x10700;
+hv_glue_internals = TEXT_OFFSET + 0x10720;
diff --git a/arch/tile/kernel/init_task.c b/arch/tile/kernel/init_task.c
new file mode 100644
index 00000000..928b3187
--- /dev/null
+++ b/arch/tile/kernel/init_task.c
@@ -0,0 +1,59 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/mm.h>
+#include <linux/fs.h>
+#include <linux/init_task.h>
+#include <linux/mqueue.h>
+#include <linux/module.h>
+#include <linux/start_kernel.h>
+#include <linux/uaccess.h>
+
+static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
+static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
+
+/*
+ * Initial thread structure.
+ *
+ * We need to make sure that this is THREAD_SIZE aligned due to the
+ * way process stacks are handled. This is done by having a special
+ * "init_task" linker map entry..
+ */
+union thread_union init_thread_union __init_task_data = {
+ INIT_THREAD_INFO(init_task)
+};
+
+/*
+ * Initial task structure.
+ *
+ * All other task structs will be allocated on slabs in fork.c
+ */
+struct task_struct init_task = INIT_TASK(init_task);
+EXPORT_SYMBOL(init_task);
+
+/*
+ * per-CPU stack and boot info.
+ */
+DEFINE_PER_CPU(unsigned long, boot_sp) =
+ (unsigned long)init_stack + THREAD_SIZE;
+
+#ifdef CONFIG_SMP
+DEFINE_PER_CPU(unsigned long, boot_pc) = (unsigned long)start_kernel;
+#else
+/*
+ * The variable must be __initdata since it references __init code.
+ * With CONFIG_SMP it is per-cpu data, which is exempt from validation.
+ */
+unsigned long __initdata boot_pc = (unsigned long)start_kernel;
+#endif
diff --git a/arch/tile/kernel/intvec_32.S b/arch/tile/kernel/intvec_32.S
new file mode 100644
index 00000000..69435151
--- /dev/null
+++ b/arch/tile/kernel/intvec_32.S
@@ -0,0 +1,1944 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ *
+ * Linux interrupt vectors.
+ */
+
+#include <linux/linkage.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/unistd.h>
+#include <asm/ptrace.h>
+#include <asm/thread_info.h>
+#include <asm/irqflags.h>
+#include <asm/atomic_32.h>
+#include <asm/asm-offsets.h>
+#include <hv/hypervisor.h>
+#include <arch/abi.h>
+#include <arch/interrupts.h>
+#include <arch/spr_def.h>
+
+#ifdef CONFIG_PREEMPT
+# error "No support for kernel preemption currently"
+#endif
+
+#define PTREGS_PTR(reg, ptreg) addli reg, sp, C_ABI_SAVE_AREA_SIZE + (ptreg)
+
+#define PTREGS_OFFSET_SYSCALL PTREGS_OFFSET_REG(TREG_SYSCALL_NR)
+
+#if !CHIP_HAS_WH64()
+ /* By making this an empty macro, we can use wh64 in the code. */
+ .macro wh64 reg
+ .endm
+#endif
+
+ .macro push_reg reg, ptr=sp, delta=-4
+ {
+ sw \ptr, \reg
+ addli \ptr, \ptr, \delta
+ }
+ .endm
+
+ .macro pop_reg reg, ptr=sp, delta=4
+ {
+ lw \reg, \ptr
+ addli \ptr, \ptr, \delta
+ }
+ .endm
+
+ .macro pop_reg_zero reg, zreg, ptr=sp, delta=4
+ {
+ move \zreg, zero
+ lw \reg, \ptr
+ addi \ptr, \ptr, \delta
+ }
+ .endm
+
+ .macro push_extra_callee_saves reg
+ PTREGS_PTR(\reg, PTREGS_OFFSET_REG(51))
+ push_reg r51, \reg
+ push_reg r50, \reg
+ push_reg r49, \reg
+ push_reg r48, \reg
+ push_reg r47, \reg
+ push_reg r46, \reg
+ push_reg r45, \reg
+ push_reg r44, \reg
+ push_reg r43, \reg
+ push_reg r42, \reg
+ push_reg r41, \reg
+ push_reg r40, \reg
+ push_reg r39, \reg
+ push_reg r38, \reg
+ push_reg r37, \reg
+ push_reg r36, \reg
+ push_reg r35, \reg
+ push_reg r34, \reg, PTREGS_OFFSET_BASE - PTREGS_OFFSET_REG(34)
+ .endm
+
+ .macro panic str
+ .pushsection .rodata, "a"
+1:
+ .asciz "\str"
+ .popsection
+ {
+ moveli r0, lo16(1b)
+ }
+ {
+ auli r0, r0, ha16(1b)
+ jal panic
+ }
+ .endm
+
+#ifdef __COLLECT_LINKER_FEEDBACK__
+ .pushsection .text.intvec_feedback,"ax"
+intvec_feedback:
+ .popsection
+#endif
+
+ /*
+ * Default interrupt handler.
+ *
+ * vecnum is where we'll put this code.
+ * c_routine is the C routine we'll call.
+ *
+ * The C routine is passed two arguments:
+ * - A pointer to the pt_regs state.
+ * - The interrupt vector number.
+ *
+ * The "processing" argument specifies the code for processing
+ * the interrupt. Defaults to "handle_interrupt".
+ */
+ .macro int_hand vecnum, vecname, c_routine, processing=handle_interrupt
+ .org (\vecnum << 8)
+intvec_\vecname:
+ .ifc \vecnum, INT_SWINT_1
+ blz TREG_SYSCALL_NR_NAME, sys_cmpxchg
+ .endif
+
+ /* Temporarily save a register so we have somewhere to work. */
+
+ mtspr SPR_SYSTEM_SAVE_K_1, r0
+ mfspr r0, SPR_EX_CONTEXT_K_1
+
+ /* The cmpxchg code clears sp to force us to reset it here on fault. */
+ {
+ bz sp, 2f
+ andi r0, r0, SPR_EX_CONTEXT_1_1__PL_MASK /* mask off ICS */
+ }
+
+ .ifc \vecnum, INT_DOUBLE_FAULT
+ /*
+ * For double-faults from user-space, fall through to the normal
+ * register save and stack setup path. Otherwise, it's the
+ * hypervisor giving us one last chance to dump diagnostics, and we
+ * branch to the kernel_double_fault routine to do so.
+ */
+ bz r0, 1f
+ j _kernel_double_fault
+1:
+ .else
+ /*
+ * If we're coming from user-space, then set sp to the top of
+ * the kernel stack. Otherwise, assume sp is already valid.
+ */
+ {
+ bnz r0, 0f
+ move r0, sp
+ }
+ .endif
+
+ .ifc \c_routine, do_page_fault
+ /*
+ * The page_fault handler may be downcalled directly by the
+ * hypervisor even when Linux is running and has ICS set.
+ *
+ * In this case the contents of EX_CONTEXT_K_1 reflect the
+ * previous fault and can't be relied on to choose whether or
+ * not to reinitialize the stack pointer. So we add a test
+ * to see whether SYSTEM_SAVE_K_2 has the high bit set,
+ * and if so we don't reinitialize sp, since we must be coming
+ * from Linux. (In fact the precise case is !(val & ~1),
+ * but any Linux PC has to have the high bit set.)
+ *
+ * Note that the hypervisor *always* sets SYSTEM_SAVE_K_2 for
+ * any path that turns into a downcall to one of our TLB handlers.
+ */
+ mfspr r0, SPR_SYSTEM_SAVE_K_2
+ {
+ blz r0, 0f /* high bit in S_S_1_2 is for a PC to use */
+ move r0, sp
+ }
+ .endif
+
+2:
+ /*
+ * SYSTEM_SAVE_K_0 holds the cpu number in the low bits, and
+ * the current stack top in the higher bits. So we recover
+ * our stack top by just masking off the low bits, then
+ * point sp at the top aligned address on the actual stack page.
+ */
+ mfspr r0, SPR_SYSTEM_SAVE_K_0
+ mm r0, r0, zero, LOG2_THREAD_SIZE, 31
+
+0:
+ /*
+ * Align the stack mod 64 so we can properly predict what
+ * cache lines we need to write-hint to reduce memory fetch
+ * latency as we enter the kernel. The layout of memory is
+ * as follows, with cache line 0 at the lowest VA, and cache
+ * line 4 just below the r0 value this "andi" computes.
+ * Note that we never write to cache line 4, and we skip
+ * cache line 1 for syscalls.
+ *
+ * cache line 4: ptregs padding (two words)
+ * cache line 3: r46...lr, pc, ex1, faultnum, orig_r0, flags, pad
+ * cache line 2: r30...r45
+ * cache line 1: r14...r29
+ * cache line 0: 2 x frame, r0..r13
+ */
+ andi r0, r0, -64
+
+ /*
+ * Push the first four registers on the stack, so that we can set
+ * them to vector-unique values before we jump to the common code.
+ *
+ * Registers are pushed on the stack as a struct pt_regs,
+ * with the sp initially just above the struct, and when we're
+ * done, sp points to the base of the struct, minus
+ * C_ABI_SAVE_AREA_SIZE, so we can directly jal to C code.
+ *
+ * This routine saves just the first four registers, plus the
+ * stack context so we can do proper backtracing right away,
+ * and defers to handle_interrupt to save the rest.
+ * The backtracer needs pc, ex1, lr, sp, r52, and faultnum.
+ */
+ addli r0, r0, PTREGS_OFFSET_LR - (PTREGS_SIZE + KSTK_PTREGS_GAP)
+ wh64 r0 /* cache line 3 */
+ {
+ sw r0, lr
+ addli r0, r0, PTREGS_OFFSET_SP - PTREGS_OFFSET_LR
+ }
+ {
+ sw r0, sp
+ addli sp, r0, PTREGS_OFFSET_REG(52) - PTREGS_OFFSET_SP
+ }
+ {
+ sw sp, r52
+ addli sp, sp, PTREGS_OFFSET_REG(1) - PTREGS_OFFSET_REG(52)
+ }
+ wh64 sp /* cache line 0 */
+ {
+ sw sp, r1
+ addli sp, sp, PTREGS_OFFSET_REG(2) - PTREGS_OFFSET_REG(1)
+ }
+ {
+ sw sp, r2
+ addli sp, sp, PTREGS_OFFSET_REG(3) - PTREGS_OFFSET_REG(2)
+ }
+ {
+ sw sp, r3
+ addli sp, sp, PTREGS_OFFSET_PC - PTREGS_OFFSET_REG(3)
+ }
+ mfspr r0, SPR_EX_CONTEXT_K_0
+ .ifc \processing,handle_syscall
+ /*
+ * Bump the saved PC by one bundle so that when we return, we won't
+ * execute the same swint instruction again. We need to do this while
+ * we're in the critical section.
+ */
+ addi r0, r0, 8
+ .endif
+ {
+ sw sp, r0
+ addli sp, sp, PTREGS_OFFSET_EX1 - PTREGS_OFFSET_PC
+ }
+ mfspr r0, SPR_EX_CONTEXT_K_1
+ {
+ sw sp, r0
+ addi sp, sp, PTREGS_OFFSET_FAULTNUM - PTREGS_OFFSET_EX1
+ /*
+ * Use r0 for syscalls so it's a temporary; use r1 for interrupts
+ * so that it gets passed through unchanged to the handler routine.
+ * Note that the .if conditional confusingly spans bundles.
+ */
+ .ifc \processing,handle_syscall
+ movei r0, \vecnum
+ }
+ {
+ sw sp, r0
+ .else
+ movei r1, \vecnum
+ }
+ {
+ sw sp, r1
+ .endif
+ addli sp, sp, PTREGS_OFFSET_REG(0) - PTREGS_OFFSET_FAULTNUM
+ }
+ mfspr r0, SPR_SYSTEM_SAVE_K_1 /* Original r0 */
+ {
+ sw sp, r0
+ addi sp, sp, -PTREGS_OFFSET_REG(0) - 4
+ }
+ {
+ sw sp, zero /* write zero into "Next SP" frame pointer */
+ addi sp, sp, -4 /* leave SP pointing at bottom of frame */
+ }
+ .ifc \processing,handle_syscall
+ j handle_syscall
+ .else
+ /*
+ * Capture per-interrupt SPR context to registers.
+ * We overload the meaning of r3 on this path such that if its bit 31
+ * is set, we have to mask all interrupts including NMIs before
+ * clearing the interrupt critical section bit.
+ * See discussion below at "finish_interrupt_save".
+ */
+ .ifc \c_routine, do_page_fault
+ mfspr r2, SPR_SYSTEM_SAVE_K_3 /* address of page fault */
+ mfspr r3, SPR_SYSTEM_SAVE_K_2 /* info about page fault */
+ .else
+ .ifc \vecnum, INT_DOUBLE_FAULT
+ {
+ mfspr r2, SPR_SYSTEM_SAVE_K_2 /* double fault info from HV */
+ movei r3, 0
+ }
+ .else
+ .ifc \c_routine, do_trap
+ {
+ mfspr r2, GPV_REASON
+ movei r3, 0
+ }
+ .else
+ .ifc \c_routine, op_handle_perf_interrupt
+ {
+ mfspr r2, PERF_COUNT_STS
+ movei r3, -1 /* not used, but set for consistency */
+ }
+ .else
+#if CHIP_HAS_AUX_PERF_COUNTERS()
+ .ifc \c_routine, op_handle_aux_perf_interrupt
+ {
+ mfspr r2, AUX_PERF_COUNT_STS
+ movei r3, -1 /* not used, but set for consistency */
+ }
+ .else
+#endif
+ movei r3, 0
+#if CHIP_HAS_AUX_PERF_COUNTERS()
+ .endif
+#endif
+ .endif
+ .endif
+ .endif
+ .endif
+ /* Put function pointer in r0 */
+ moveli r0, lo16(\c_routine)
+ {
+ auli r0, r0, ha16(\c_routine)
+ j \processing
+ }
+ .endif
+ ENDPROC(intvec_\vecname)
+
+#ifdef __COLLECT_LINKER_FEEDBACK__
+ .pushsection .text.intvec_feedback,"ax"
+ .org (\vecnum << 5)
+ FEEDBACK_ENTER_EXPLICIT(intvec_\vecname, .intrpt1, 1 << 8)
+ jrp lr
+ .popsection
+#endif
+
+ .endm
+
+
+ /*
+ * Save the rest of the registers that we didn't save in the actual
+ * vector itself. We can't use r0-r10 inclusive here.
+ */
+ .macro finish_interrupt_save, function
+
+ /* If it's a syscall, save a proper orig_r0, otherwise just zero. */
+ PTREGS_PTR(r52, PTREGS_OFFSET_ORIG_R0)
+ {
+ .ifc \function,handle_syscall
+ sw r52, r0
+ .else
+ sw r52, zero
+ .endif
+ PTREGS_PTR(r52, PTREGS_OFFSET_TP)
+ }
+
+ /*
+ * For ordinary syscalls, we save neither caller- nor callee-
+ * save registers, since the syscall invoker doesn't expect the
+ * caller-saves to be saved, and the called kernel functions will
+ * take care of saving the callee-saves for us.
+ *
+ * For interrupts we save just the caller-save registers. Saving
+ * them is required (since the "caller" can't save them). Again,
+ * the called kernel functions will restore the callee-save
+ * registers for us appropriately.
+ *
+ * On return, we normally restore nothing special for syscalls,
+ * and just the caller-save registers for interrupts.
+ *
+ * However, there are some important caveats to all this:
+ *
+ * - We always save a few callee-save registers to give us
+ * some scratchpad registers to carry across function calls.
+ *
+ * - fork/vfork/etc require us to save all the callee-save
+ * registers, which we do in PTREGS_SYSCALL_ALL_REGS, below.
+ *
+ * - We always save r0..r5 and r10 for syscalls, since we need
+ * to reload them a bit later for the actual kernel call, and
+ * since we might need them for -ERESTARTNOINTR, etc.
+ *
+ * - Before invoking a signal handler, we save the unsaved
+ * callee-save registers so they are visible to the
+ * signal handler or any ptracer.
+ *
+ * - If the unsaved callee-save registers are modified, we set
+ * a bit in pt_regs so we know to reload them from pt_regs
+ * and not just rely on the kernel function unwinding.
+ * (Done for ptrace register writes and SA_SIGINFO handler.)
+ */
+ {
+ sw r52, tp
+ PTREGS_PTR(r52, PTREGS_OFFSET_REG(33))
+ }
+ wh64 r52 /* cache line 2 */
+ push_reg r33, r52
+ push_reg r32, r52
+ push_reg r31, r52
+ .ifc \function,handle_syscall
+ push_reg r30, r52, PTREGS_OFFSET_SYSCALL - PTREGS_OFFSET_REG(30)
+ push_reg TREG_SYSCALL_NR_NAME, r52, \
+ PTREGS_OFFSET_REG(5) - PTREGS_OFFSET_SYSCALL
+ .else
+
+ push_reg r30, r52, PTREGS_OFFSET_REG(29) - PTREGS_OFFSET_REG(30)
+ wh64 r52 /* cache line 1 */
+ push_reg r29, r52
+ push_reg r28, r52
+ push_reg r27, r52
+ push_reg r26, r52
+ push_reg r25, r52
+ push_reg r24, r52
+ push_reg r23, r52
+ push_reg r22, r52
+ push_reg r21, r52
+ push_reg r20, r52
+ push_reg r19, r52
+ push_reg r18, r52
+ push_reg r17, r52
+ push_reg r16, r52
+ push_reg r15, r52
+ push_reg r14, r52
+ push_reg r13, r52
+ push_reg r12, r52
+ push_reg r11, r52
+ push_reg r10, r52
+ push_reg r9, r52
+ push_reg r8, r52
+ push_reg r7, r52
+ push_reg r6, r52
+
+ .endif
+
+ push_reg r5, r52
+ sw r52, r4
+
+ /* Load tp with our per-cpu offset. */
+#ifdef CONFIG_SMP
+ {
+ mfspr r20, SPR_SYSTEM_SAVE_K_0
+ moveli r21, lo16(__per_cpu_offset)
+ }
+ {
+ auli r21, r21, ha16(__per_cpu_offset)
+ mm r20, r20, zero, 0, LOG2_THREAD_SIZE-1
+ }
+ s2a r20, r20, r21
+ lw tp, r20
+#else
+ move tp, zero
+#endif
+
+ /*
+ * If we will be returning to the kernel, we will need to
+ * reset the interrupt masks to the state they had before.
+ * Set DISABLE_IRQ in flags iff we came from PL1 with irqs disabled.
+ * We load flags in r32 here so we can jump to .Lrestore_regs
+ * directly after do_page_fault_ics() if necessary.
+ */
+ mfspr r32, SPR_EX_CONTEXT_K_1
+ {
+ andi r32, r32, SPR_EX_CONTEXT_1_1__PL_MASK /* mask off ICS */
+ PTREGS_PTR(r21, PTREGS_OFFSET_FLAGS)
+ }
+ bzt r32, 1f /* zero if from user space */
+ IRQS_DISABLED(r32) /* zero if irqs enabled */
+#if PT_FLAGS_DISABLE_IRQ != 1
+# error Value of IRQS_DISABLED used to set PT_FLAGS_DISABLE_IRQ; fix
+#endif
+1:
+ .ifnc \function,handle_syscall
+ /* Record the fact that we saved the caller-save registers above. */
+ ori r32, r32, PT_FLAGS_CALLER_SAVES
+ .endif
+ sw r21, r32
+
+#ifdef __COLLECT_LINKER_FEEDBACK__
+ /*
+ * Notify the feedback routines that we were in the
+ * appropriate fixed interrupt vector area. Note that we
+ * still have ICS set at this point, so we can't invoke any
+ * atomic operations or we will panic. The feedback
+ * routines internally preserve r0..r10 and r30 up.
+ */
+ .ifnc \function,handle_syscall
+ shli r20, r1, 5
+ .else
+ moveli r20, INT_SWINT_1 << 5
+ .endif
+ addli r20, r20, lo16(intvec_feedback)
+ auli r20, r20, ha16(intvec_feedback)
+ jalr r20
+
+ /* And now notify the feedback routines that we are here. */
+ FEEDBACK_ENTER(\function)
+#endif
+
+ /*
+ * we've captured enough state to the stack (including in
+ * particular our EX_CONTEXT state) that we can now release
+ * the interrupt critical section and replace it with our
+ * standard "interrupts disabled" mask value. This allows
+ * synchronous interrupts (and profile interrupts) to punch
+ * through from this point onwards.
+ *
+ * If bit 31 of r3 is set during a non-NMI interrupt, we know we
+ * are on the path where the hypervisor has punched through our
+ * ICS with a page fault, so we call out to do_page_fault_ics()
+ * to figure out what to do with it. If the fault was in
+ * an atomic op, we unlock the atomic lock, adjust the
+ * saved register state a little, and return "zero" in r4,
+ * falling through into the normal page-fault interrupt code.
+ * If the fault was in a kernel-space atomic operation, then
+ * do_page_fault_ics() resolves it itself, returns "one" in r4,
+ * and as a result goes directly to restoring registers and iret,
+ * without trying to adjust the interrupt masks at all.
+ * The do_page_fault_ics() API involves passing and returning
+ * a five-word struct (in registers) to avoid writing the
+ * save and restore code here.
+ */
+ .ifc \function,handle_nmi
+ IRQ_DISABLE_ALL(r20)
+ .else
+ .ifnc \function,handle_syscall
+ bgezt r3, 1f
+ {
+ PTREGS_PTR(r0, PTREGS_OFFSET_BASE)
+ jal do_page_fault_ics
+ }
+ FEEDBACK_REENTER(\function)
+ bzt r4, 1f
+ j .Lrestore_regs
+1:
+ .endif
+ IRQ_DISABLE(r20, r21)
+ .endif
+ mtspr INTERRUPT_CRITICAL_SECTION, zero
+
+#if CHIP_HAS_WH64()
+ /*
+ * Prepare the first 256 stack bytes to be rapidly accessible
+ * without having to fetch the background data. We don't really
+ * know how far to write-hint, but kernel stacks generally
+ * aren't that big, and write-hinting here does take some time.
+ */
+ addi r52, sp, -64
+ {
+ wh64 r52
+ addi r52, r52, -64
+ }
+ {
+ wh64 r52
+ addi r52, r52, -64
+ }
+ {
+ wh64 r52
+ addi r52, r52, -64
+ }
+ wh64 r52
+#endif
+
+#ifdef CONFIG_TRACE_IRQFLAGS
+ .ifnc \function,handle_nmi
+ /*
+ * We finally have enough state set up to notify the irq
+ * tracing code that irqs were disabled on entry to the handler.
+ * The TRACE_IRQS_OFF call clobbers registers r0-r29.
+ * For syscalls, we already have the register state saved away
+ * on the stack, so we don't bother to do any register saves here,
+ * and later we pop the registers back off the kernel stack.
+ * For interrupt handlers, save r0-r3 in callee-saved registers.
+ */
+ .ifnc \function,handle_syscall
+ { move r30, r0; move r31, r1 }
+ { move r32, r2; move r33, r3 }
+ .endif
+ TRACE_IRQS_OFF
+ .ifnc \function,handle_syscall
+ { move r0, r30; move r1, r31 }
+ { move r2, r32; move r3, r33 }
+ .endif
+ .endif
+#endif
+
+ .endm
+
+ .macro check_single_stepping, kind, not_single_stepping
+ /*
+ * Check for single stepping in user-level priv
+ * kind can be "normal", "ill", or "syscall"
+ * At end, if fall-thru
+ * r29: thread_info->step_state
+ * r28: &pt_regs->pc
+ * r27: pt_regs->pc
+ * r26: thread_info->step_state->buffer
+ */
+
+ /* Check for single stepping */
+ GET_THREAD_INFO(r29)
+ {
+ /* Get pointer to field holding step state */
+ addi r29, r29, THREAD_INFO_STEP_STATE_OFFSET
+
+ /* Get pointer to EX1 in register state */
+ PTREGS_PTR(r27, PTREGS_OFFSET_EX1)
+ }
+ {
+ /* Get pointer to field holding PC */
+ PTREGS_PTR(r28, PTREGS_OFFSET_PC)
+
+ /* Load the pointer to the step state */
+ lw r29, r29
+ }
+ /* Load EX1 */
+ lw r27, r27
+ {
+ /* Points to flags */
+ addi r23, r29, SINGLESTEP_STATE_FLAGS_OFFSET
+
+ /* No single stepping if there is no step state structure */
+ bzt r29, \not_single_stepping
+ }
+ {
+ /* mask off ICS and any other high bits */
+ andi r27, r27, SPR_EX_CONTEXT_1_1__PL_MASK
+
+ /* Load pointer to single step instruction buffer */
+ lw r26, r29
+ }
+ /* Check priv state */
+ bnz r27, \not_single_stepping
+
+ /* Get flags */
+ lw r22, r23
+ {
+ /* Branch if single-step mode not enabled */
+ bbnst r22, \not_single_stepping
+
+ /* Clear enabled flag */
+ andi r22, r22, ~SINGLESTEP_STATE_MASK_IS_ENABLED
+ }
+ .ifc \kind,normal
+ {
+ /* Load PC */
+ lw r27, r28
+
+ /* Point to the entry containing the original PC */
+ addi r24, r29, SINGLESTEP_STATE_ORIG_PC_OFFSET
+ }
+ {
+ /* Disable single stepping flag */
+ sw r23, r22
+ }
+ {
+ /* Get the original pc */
+ lw r24, r24
+
+ /* See if the PC is at the start of the single step buffer */
+ seq r25, r26, r27
+ }
+ /*
+ * NOTE: it is really expected that the PC be in the single step buffer
+ * at this point
+ */
+ bzt r25, \not_single_stepping
+
+ /* Restore the original PC */
+ sw r28, r24
+ .else
+ .ifc \kind,syscall
+ {
+ /* Load PC */
+ lw r27, r28
+
+ /* Point to the entry containing the next PC */
+ addi r24, r29, SINGLESTEP_STATE_NEXT_PC_OFFSET
+ }
+ {
+ /* Increment the stopped PC by the bundle size */
+ addi r26, r26, 8
+
+ /* Disable single stepping flag */
+ sw r23, r22
+ }
+ {
+ /* Get the next pc */
+ lw r24, r24
+
+ /*
+ * See if the PC is one bundle past the start of the
+ * single step buffer
+ */
+ seq r25, r26, r27
+ }
+ {
+ /*
+ * NOTE: it is really expected that the PC be in the
+ * single step buffer at this point
+ */
+ bzt r25, \not_single_stepping
+ }
+ /* Set to the next PC */
+ sw r28, r24
+ .else
+ {
+ /* Point to 3rd bundle in buffer */
+ addi r25, r26, 16
+
+ /* Load PC */
+ lw r27, r28
+ }
+ {
+ /* Disable single stepping flag */
+ sw r23, r22
+
+ /* See if the PC is in the single step buffer */
+ slte_u r24, r26, r27
+ }
+ {
+ slte_u r25, r27, r25
+
+ /*
+ * NOTE: it is really expected that the PC be in the
+ * single step buffer at this point
+ */
+ bzt r24, \not_single_stepping
+ }
+ bzt r25, \not_single_stepping
+ .endif
+ .endif
+ .endm
+
+ /*
+ * Redispatch a downcall.
+ */
+ .macro dc_dispatch vecnum, vecname
+ .org (\vecnum << 8)
+intvec_\vecname:
+ j hv_downcall_dispatch
+ ENDPROC(intvec_\vecname)
+ .endm
+
+ /*
+ * Common code for most interrupts. The C function we're eventually
+ * going to is in r0, and the faultnum is in r1; the original
+ * values for those registers are on the stack.
+ */
+ .pushsection .text.handle_interrupt,"ax"
+handle_interrupt:
+ finish_interrupt_save handle_interrupt
+
+ /*
+ * Check for if we are single stepping in user level. If so, then
+ * we need to restore the PC.
+ */
+
+ check_single_stepping normal, .Ldispatch_interrupt
+.Ldispatch_interrupt:
+
+ /* Jump to the C routine; it should enable irqs as soon as possible. */
+ {
+ jalr r0
+ PTREGS_PTR(r0, PTREGS_OFFSET_BASE)
+ }
+ FEEDBACK_REENTER(handle_interrupt)
+ {
+ movei r30, 0 /* not an NMI */
+ j interrupt_return
+ }
+ STD_ENDPROC(handle_interrupt)
+
+/*
+ * This routine takes a boolean in r30 indicating if this is an NMI.
+ * If so, we also expect a boolean in r31 indicating whether to
+ * re-enable the oprofile interrupts.
+ *
+ * Note that .Lresume_userspace is jumped to directly in several
+ * places, and we need to make sure r30 is set correctly in those
+ * callers as well.
+ */
+STD_ENTRY(interrupt_return)
+ /* If we're resuming to kernel space, don't check thread flags. */
+ {
+ bnz r30, .Lrestore_all /* NMIs don't special-case user-space */
+ PTREGS_PTR(r29, PTREGS_OFFSET_EX1)
+ }
+ lw r29, r29
+ andi r29, r29, SPR_EX_CONTEXT_1_1__PL_MASK /* mask off ICS */
+ {
+ bzt r29, .Lresume_userspace
+ PTREGS_PTR(r29, PTREGS_OFFSET_PC)
+ }
+
+ /* If we're resuming to _cpu_idle_nap, bump PC forward by 8. */
+ {
+ lw r28, r29
+ moveli r27, lo16(_cpu_idle_nap)
+ }
+ {
+ auli r27, r27, ha16(_cpu_idle_nap)
+ }
+ {
+ seq r27, r27, r28
+ }
+ {
+ bbns r27, .Lrestore_all
+ addi r28, r28, 8
+ }
+ sw r29, r28
+ j .Lrestore_all
+
+.Lresume_userspace:
+ FEEDBACK_REENTER(interrupt_return)
+
+ /*
+ * Use r33 to hold whether we have already loaded the callee-saves
+ * into ptregs. We don't want to do it twice in this loop, since
+ * then we'd clobber whatever changes are made by ptrace, etc.
+ * Get base of stack in r32.
+ */
+ {
+ GET_THREAD_INFO(r32)
+ movei r33, 0
+ }
+
+.Lretry_work_pending:
+ /*
+ * Disable interrupts so as to make sure we don't
+ * miss an interrupt that sets any of the thread flags (like
+ * need_resched or sigpending) between sampling and the iret.
+ * Routines like schedule() or do_signal() may re-enable
+ * interrupts before returning.
+ */
+ IRQ_DISABLE(r20, r21)
+ TRACE_IRQS_OFF /* Note: clobbers registers r0-r29 */
+
+
+ /* Check to see if there is any work to do before returning to user. */
+ {
+ addi r29, r32, THREAD_INFO_FLAGS_OFFSET
+ moveli r1, lo16(_TIF_ALLWORK_MASK)
+ }
+ {
+ lw r29, r29
+ auli r1, r1, ha16(_TIF_ALLWORK_MASK)
+ }
+ and r1, r29, r1
+ bzt r1, .Lrestore_all
+
+ /*
+ * Make sure we have all the registers saved for signal
+ * handling, notify-resume, or single-step. Call out to C
+ * code to figure out exactly what we need to do for each flag bit,
+ * then if necessary, reload the flags and recheck.
+ */
+ {
+ PTREGS_PTR(r0, PTREGS_OFFSET_BASE)
+ bnz r33, 1f
+ }
+ push_extra_callee_saves r0
+ movei r33, 1
+1: jal do_work_pending
+ bnz r0, .Lretry_work_pending
+
+ /*
+ * In the NMI case we
+ * omit the call to single_process_check_nohz, which normally checks
+ * to see if we should start or stop the scheduler tick, because
+ * we can't call arbitrary Linux code from an NMI context.
+ * We always call the homecache TLB deferral code to re-trigger
+ * the deferral mechanism.
+ *
+ * The other chunk of responsibility this code has is to reset the
+ * interrupt masks appropriately to reset irqs and NMIs. We have
+ * to call TRACE_IRQS_OFF and TRACE_IRQS_ON to support all the
+ * lockdep-type stuff, but we can't set ICS until afterwards, since
+ * ICS can only be used in very tight chunks of code to avoid
+ * tripping over various assertions that it is off.
+ *
+ * (There is what looks like a window of vulnerability here since
+ * we might take a profile interrupt between the two SPR writes
+ * that set the mask, but since we write the low SPR word first,
+ * and our interrupt entry code checks the low SPR word, any
+ * profile interrupt will actually disable interrupts in both SPRs
+ * before returning, which is OK.)
+ */
+.Lrestore_all:
+ PTREGS_PTR(r0, PTREGS_OFFSET_EX1)
+ {
+ lw r0, r0
+ PTREGS_PTR(r32, PTREGS_OFFSET_FLAGS)
+ }
+ {
+ andi r0, r0, SPR_EX_CONTEXT_1_1__PL_MASK
+ lw r32, r32
+ }
+ bnz r0, 1f
+ j 2f
+#if PT_FLAGS_DISABLE_IRQ != 1
+# error Assuming PT_FLAGS_DISABLE_IRQ == 1 so we can use bbnst below
+#endif
+1: bbnst r32, 2f
+ IRQ_DISABLE(r20,r21)
+ TRACE_IRQS_OFF
+ movei r0, 1
+ mtspr INTERRUPT_CRITICAL_SECTION, r0
+ bzt r30, .Lrestore_regs
+ j 3f
+2: TRACE_IRQS_ON
+ movei r0, 1
+ mtspr INTERRUPT_CRITICAL_SECTION, r0
+ IRQ_ENABLE(r20, r21)
+ bzt r30, .Lrestore_regs
+3:
+
+
+ /*
+ * We now commit to returning from this interrupt, since we will be
+ * doing things like setting EX_CONTEXT SPRs and unwinding the stack
+ * frame. No calls should be made to any other code after this point.
+ * This code should only be entered with ICS set.
+ * r32 must still be set to ptregs.flags.
+ * We launch loads to each cache line separately first, so we can
+ * get some parallelism out of the memory subsystem.
+ * We start zeroing caller-saved registers throughout, since
+ * that will save some cycles if this turns out to be a syscall.
+ */
+.Lrestore_regs:
+ FEEDBACK_REENTER(interrupt_return) /* called from elsewhere */
+
+ /*
+ * Rotate so we have one high bit and one low bit to test.
+ * - low bit says whether to restore all the callee-saved registers,
+ * or just r30-r33, and r52 up.
+ * - high bit (i.e. sign bit) says whether to restore all the
+ * caller-saved registers, or just r0.
+ */
+#if PT_FLAGS_CALLER_SAVES != 2 || PT_FLAGS_RESTORE_REGS != 4
+# error Rotate trick does not work :-)
+#endif
+ {
+ rli r20, r32, 30
+ PTREGS_PTR(sp, PTREGS_OFFSET_REG(0))
+ }
+
+ /*
+ * Load cache lines 0, 2, and 3 in that order, then use
+ * the last loaded value, which makes it likely that the other
+ * cache lines have also loaded, at which point we should be
+ * able to safely read all the remaining words on those cache
+ * lines without waiting for the memory subsystem.
+ */
+ pop_reg_zero r0, r28, sp, PTREGS_OFFSET_REG(30) - PTREGS_OFFSET_REG(0)
+ pop_reg_zero r30, r2, sp, PTREGS_OFFSET_PC - PTREGS_OFFSET_REG(30)
+ pop_reg_zero r21, r3, sp, PTREGS_OFFSET_EX1 - PTREGS_OFFSET_PC
+ pop_reg_zero lr, r4, sp, PTREGS_OFFSET_REG(52) - PTREGS_OFFSET_EX1
+ {
+ mtspr SPR_EX_CONTEXT_K_0, r21
+ move r5, zero
+ }
+ {
+ mtspr SPR_EX_CONTEXT_K_1, lr
+ andi lr, lr, SPR_EX_CONTEXT_1_1__PL_MASK /* mask off ICS */
+ }
+
+ /* Restore callee-saveds that we actually use. */
+ pop_reg_zero r52, r6, sp, PTREGS_OFFSET_REG(31) - PTREGS_OFFSET_REG(52)
+ pop_reg_zero r31, r7
+ pop_reg_zero r32, r8
+ pop_reg_zero r33, r9, sp, PTREGS_OFFSET_REG(29) - PTREGS_OFFSET_REG(33)
+
+ /*
+ * If we modified other callee-saveds, restore them now.
+ * This is rare, but could be via ptrace or signal handler.
+ */
+ {
+ move r10, zero
+ bbs r20, .Lrestore_callees
+ }
+.Lcontinue_restore_regs:
+
+ /* Check if we're returning from a syscall. */
+ {
+ move r11, zero
+ blzt r20, 1f /* no, so go restore callee-save registers */
+ }
+
+ /*
+ * Check if we're returning to userspace.
+ * Note that if we're not, we don't worry about zeroing everything.
+ */
+ {
+ addli sp, sp, PTREGS_OFFSET_LR - PTREGS_OFFSET_REG(29)
+ bnz lr, .Lkernel_return
+ }
+
+ /*
+ * On return from syscall, we've restored r0 from pt_regs, but we
+ * clear the remainder of the caller-saved registers. We could
+ * restore the syscall arguments, but there's not much point,
+ * and it ensures user programs aren't trying to use the
+ * caller-saves if we clear them, as well as avoiding leaking
+ * kernel pointers into userspace.
+ */
+ pop_reg_zero lr, r12, sp, PTREGS_OFFSET_TP - PTREGS_OFFSET_LR
+ pop_reg_zero tp, r13, sp, PTREGS_OFFSET_SP - PTREGS_OFFSET_TP
+ {
+ lw sp, sp
+ move r14, zero
+ move r15, zero
+ }
+ { move r16, zero; move r17, zero }
+ { move r18, zero; move r19, zero }
+ { move r20, zero; move r21, zero }
+ { move r22, zero; move r23, zero }
+ { move r24, zero; move r25, zero }
+ { move r26, zero; move r27, zero }
+
+ /* Set r1 to errno if we are returning an error, otherwise zero. */
+ {
+ moveli r29, 4096
+ sub r1, zero, r0
+ }
+ slt_u r29, r1, r29
+ {
+ mnz r1, r29, r1
+ move r29, zero
+ }
+ iret
+
+ /*
+ * Not a syscall, so restore caller-saved registers.
+ * First kick off a load for cache line 1, which we're touching
+ * for the first time here.
+ */
+ .align 64
+1: pop_reg r29, sp, PTREGS_OFFSET_REG(1) - PTREGS_OFFSET_REG(29)
+ pop_reg r1
+ pop_reg r2
+ pop_reg r3
+ pop_reg r4
+ pop_reg r5
+ pop_reg r6
+ pop_reg r7
+ pop_reg r8
+ pop_reg r9
+ pop_reg r10
+ pop_reg r11
+ pop_reg r12
+ pop_reg r13
+ pop_reg r14
+ pop_reg r15
+ pop_reg r16
+ pop_reg r17
+ pop_reg r18
+ pop_reg r19
+ pop_reg r20
+ pop_reg r21
+ pop_reg r22
+ pop_reg r23
+ pop_reg r24
+ pop_reg r25
+ pop_reg r26
+ pop_reg r27
+ pop_reg r28, sp, PTREGS_OFFSET_LR - PTREGS_OFFSET_REG(28)
+ /* r29 already restored above */
+ bnz lr, .Lkernel_return
+ pop_reg lr, sp, PTREGS_OFFSET_TP - PTREGS_OFFSET_LR
+ pop_reg tp, sp, PTREGS_OFFSET_SP - PTREGS_OFFSET_TP
+ lw sp, sp
+ iret
+
+ /*
+ * We can't restore tp when in kernel mode, since a thread might
+ * have migrated from another cpu and brought a stale tp value.
+ */
+.Lkernel_return:
+ pop_reg lr, sp, PTREGS_OFFSET_SP - PTREGS_OFFSET_LR
+ lw sp, sp
+ iret
+
+ /* Restore callee-saved registers from r34 to r51. */
+.Lrestore_callees:
+ addli sp, sp, PTREGS_OFFSET_REG(34) - PTREGS_OFFSET_REG(29)
+ pop_reg r34
+ pop_reg r35
+ pop_reg r36
+ pop_reg r37
+ pop_reg r38
+ pop_reg r39
+ pop_reg r40
+ pop_reg r41
+ pop_reg r42
+ pop_reg r43
+ pop_reg r44
+ pop_reg r45
+ pop_reg r46
+ pop_reg r47
+ pop_reg r48
+ pop_reg r49
+ pop_reg r50
+ pop_reg r51, sp, PTREGS_OFFSET_REG(29) - PTREGS_OFFSET_REG(51)
+ j .Lcontinue_restore_regs
+ STD_ENDPROC(interrupt_return)
+
+ /*
+ * Some interrupts don't check for single stepping
+ */
+ .pushsection .text.handle_interrupt_no_single_step,"ax"
+handle_interrupt_no_single_step:
+ finish_interrupt_save handle_interrupt_no_single_step
+ {
+ jalr r0
+ PTREGS_PTR(r0, PTREGS_OFFSET_BASE)
+ }
+ FEEDBACK_REENTER(handle_interrupt_no_single_step)
+ {
+ movei r30, 0 /* not an NMI */
+ j interrupt_return
+ }
+ STD_ENDPROC(handle_interrupt_no_single_step)
+
+ /*
+ * "NMI" interrupts mask ALL interrupts before calling the
+ * handler, and don't check thread flags, etc., on the way
+ * back out. In general, the only things we do here for NMIs
+ * are the register save/restore, fixing the PC if we were
+ * doing single step, and the dataplane kernel-TLB management.
+ * We don't (for example) deal with start/stop of the sched tick.
+ */
+ .pushsection .text.handle_nmi,"ax"
+handle_nmi:
+ finish_interrupt_save handle_nmi
+ check_single_stepping normal, .Ldispatch_nmi
+.Ldispatch_nmi:
+ {
+ jalr r0
+ PTREGS_PTR(r0, PTREGS_OFFSET_BASE)
+ }
+ FEEDBACK_REENTER(handle_nmi)
+ j interrupt_return
+ STD_ENDPROC(handle_nmi)
+
+ /*
+ * Parallel code for syscalls to handle_interrupt.
+ */
+ .pushsection .text.handle_syscall,"ax"
+handle_syscall:
+ finish_interrupt_save handle_syscall
+
+ /*
+ * Check for if we are single stepping in user level. If so, then
+ * we need to restore the PC.
+ */
+ check_single_stepping syscall, .Ldispatch_syscall
+.Ldispatch_syscall:
+
+ /* Enable irqs. */
+ TRACE_IRQS_ON
+ IRQ_ENABLE(r20, r21)
+
+ /* Bump the counter for syscalls made on this tile. */
+ moveli r20, lo16(irq_stat + IRQ_CPUSTAT_SYSCALL_COUNT_OFFSET)
+ auli r20, r20, ha16(irq_stat + IRQ_CPUSTAT_SYSCALL_COUNT_OFFSET)
+ add r20, r20, tp
+ lw r21, r20
+ addi r21, r21, 1
+ {
+ sw r20, r21
+ GET_THREAD_INFO(r31)
+ }
+
+ /* Trace syscalls, if requested. */
+ addi r31, r31, THREAD_INFO_FLAGS_OFFSET
+ lw r30, r31
+ andi r30, r30, _TIF_SYSCALL_TRACE
+ bzt r30, .Lrestore_syscall_regs
+ jal do_syscall_trace
+ FEEDBACK_REENTER(handle_syscall)
+
+ /*
+ * We always reload our registers from the stack at this
+ * point. They might be valid, if we didn't build with
+ * TRACE_IRQFLAGS, and this isn't a dataplane tile, and we're not
+ * doing syscall tracing, but there are enough cases now that it
+ * seems simplest just to do the reload unconditionally.
+ */
+.Lrestore_syscall_regs:
+ PTREGS_PTR(r11, PTREGS_OFFSET_REG(0))
+ pop_reg r0, r11
+ pop_reg r1, r11
+ pop_reg r2, r11
+ pop_reg r3, r11
+ pop_reg r4, r11
+ pop_reg r5, r11, PTREGS_OFFSET_SYSCALL - PTREGS_OFFSET_REG(5)
+ pop_reg TREG_SYSCALL_NR_NAME, r11
+
+ /* Ensure that the syscall number is within the legal range. */
+ moveli r21, __NR_syscalls
+ {
+ slt_u r21, TREG_SYSCALL_NR_NAME, r21
+ moveli r20, lo16(sys_call_table)
+ }
+ {
+ bbns r21, .Linvalid_syscall
+ auli r20, r20, ha16(sys_call_table)
+ }
+ s2a r20, TREG_SYSCALL_NR_NAME, r20
+ lw r20, r20
+
+ /* Jump to syscall handler. */
+ jalr r20
+.Lhandle_syscall_link: /* value of "lr" after "jalr r20" above */
+
+ /*
+ * Write our r0 onto the stack so it gets restored instead
+ * of whatever the user had there before.
+ */
+ PTREGS_PTR(r29, PTREGS_OFFSET_REG(0))
+ sw r29, r0
+
+.Lsyscall_sigreturn_skip:
+ FEEDBACK_REENTER(handle_syscall)
+
+ /* Do syscall trace again, if requested. */
+ lw r30, r31
+ andi r30, r30, _TIF_SYSCALL_TRACE
+ bzt r30, 1f
+ jal do_syscall_trace
+ FEEDBACK_REENTER(handle_syscall)
+1: {
+ movei r30, 0 /* not an NMI */
+ j .Lresume_userspace /* jump into middle of interrupt_return */
+ }
+
+.Linvalid_syscall:
+ /* Report an invalid syscall back to the user program */
+ {
+ PTREGS_PTR(r29, PTREGS_OFFSET_REG(0))
+ movei r28, -ENOSYS
+ }
+ sw r29, r28
+ {
+ movei r30, 0 /* not an NMI */
+ j .Lresume_userspace /* jump into middle of interrupt_return */
+ }
+ STD_ENDPROC(handle_syscall)
+
+ /* Return the address for oprofile to suppress in backtraces. */
+STD_ENTRY_SECTION(handle_syscall_link_address, .text.handle_syscall)
+ lnk r0
+ {
+ addli r0, r0, .Lhandle_syscall_link - .
+ jrp lr
+ }
+ STD_ENDPROC(handle_syscall_link_address)
+
+STD_ENTRY(ret_from_fork)
+ jal sim_notify_fork
+ jal schedule_tail
+ FEEDBACK_REENTER(ret_from_fork)
+ {
+ movei r30, 0 /* not an NMI */
+ j .Lresume_userspace /* jump into middle of interrupt_return */
+ }
+ STD_ENDPROC(ret_from_fork)
+
+ /*
+ * Code for ill interrupt.
+ */
+ .pushsection .text.handle_ill,"ax"
+handle_ill:
+ finish_interrupt_save handle_ill
+
+ /*
+ * Check for if we are single stepping in user level. If so, then
+ * we need to restore the PC.
+ */
+ check_single_stepping ill, .Ldispatch_normal_ill
+
+ {
+ /* See if the PC is the 1st bundle in the buffer */
+ seq r25, r27, r26
+
+ /* Point to the 2nd bundle in the buffer */
+ addi r26, r26, 8
+ }
+ {
+ /* Point to the original pc */
+ addi r24, r29, SINGLESTEP_STATE_ORIG_PC_OFFSET
+
+ /* Branch if the PC is the 1st bundle in the buffer */
+ bnz r25, 3f
+ }
+ {
+ /* See if the PC is the 2nd bundle of the buffer */
+ seq r25, r27, r26
+
+ /* Set PC to next instruction */
+ addi r24, r29, SINGLESTEP_STATE_NEXT_PC_OFFSET
+ }
+ {
+ /* Point to flags */
+ addi r25, r29, SINGLESTEP_STATE_FLAGS_OFFSET
+
+ /* Branch if PC is in the second bundle */
+ bz r25, 2f
+ }
+ /* Load flags */
+ lw r25, r25
+ {
+ /*
+ * Get the offset for the register to restore
+ * Note: the lower bound is 2, so we have implicit scaling by 4.
+ * No multiplication of the register number by the size of a register
+ * is needed.
+ */
+ mm r27, r25, zero, SINGLESTEP_STATE_TARGET_LB, \
+ SINGLESTEP_STATE_TARGET_UB
+
+ /* Mask Rewrite_LR */
+ andi r25, r25, SINGLESTEP_STATE_MASK_UPDATE
+ }
+ {
+ addi r29, r29, SINGLESTEP_STATE_UPDATE_VALUE_OFFSET
+
+ /* Don't rewrite temp register */
+ bz r25, 3f
+ }
+ {
+ /* Get the temp value */
+ lw r29, r29
+
+ /* Point to where the register is stored */
+ add r27, r27, sp
+ }
+
+ /* Add in the C ABI save area size to the register offset */
+ addi r27, r27, C_ABI_SAVE_AREA_SIZE
+
+ /* Restore the user's register with the temp value */
+ sw r27, r29
+ j 3f
+
+2:
+ /* Must be in the third bundle */
+ addi r24, r29, SINGLESTEP_STATE_BRANCH_NEXT_PC_OFFSET
+
+3:
+ /* set PC and continue */
+ lw r26, r24
+ {
+ sw r28, r26
+ GET_THREAD_INFO(r0)
+ }
+
+ /*
+ * Clear TIF_SINGLESTEP to prevent recursion if we execute an ill.
+ * The normal non-arch flow redundantly clears TIF_SINGLESTEP, but we
+ * need to clear it here and can't really impose on all other arches.
+ * So what's another write between friends?
+ */
+
+ addi r1, r0, THREAD_INFO_FLAGS_OFFSET
+ {
+ lw r2, r1
+ addi r0, r0, THREAD_INFO_TASK_OFFSET /* currently a no-op */
+ }
+ andi r2, r2, ~_TIF_SINGLESTEP
+ sw r1, r2
+
+ /* Issue a sigtrap */
+ {
+ lw r0, r0 /* indirect thru thread_info to get task_info*/
+ addi r1, sp, C_ABI_SAVE_AREA_SIZE /* put ptregs pointer into r1 */
+ move r2, zero /* load error code into r2 */
+ }
+
+ jal send_sigtrap /* issue a SIGTRAP */
+ FEEDBACK_REENTER(handle_ill)
+ {
+ movei r30, 0 /* not an NMI */
+ j .Lresume_userspace /* jump into middle of interrupt_return */
+ }
+
+.Ldispatch_normal_ill:
+ {
+ jalr r0
+ PTREGS_PTR(r0, PTREGS_OFFSET_BASE)
+ }
+ FEEDBACK_REENTER(handle_ill)
+ {
+ movei r30, 0 /* not an NMI */
+ j interrupt_return
+ }
+ STD_ENDPROC(handle_ill)
+
+/* Various stub interrupt handlers and syscall handlers */
+
+STD_ENTRY_LOCAL(_kernel_double_fault)
+ mfspr r1, SPR_EX_CONTEXT_K_0
+ move r2, lr
+ move r3, sp
+ move r4, r52
+ addi sp, sp, -C_ABI_SAVE_AREA_SIZE
+ j kernel_double_fault
+ STD_ENDPROC(_kernel_double_fault)
+
+STD_ENTRY_LOCAL(bad_intr)
+ mfspr r2, SPR_EX_CONTEXT_K_0
+ panic "Unhandled interrupt %#x: PC %#lx"
+ STD_ENDPROC(bad_intr)
+
+/* Put address of pt_regs in reg and jump. */
+#define PTREGS_SYSCALL(x, reg) \
+ STD_ENTRY(_##x); \
+ { \
+ PTREGS_PTR(reg, PTREGS_OFFSET_BASE); \
+ j x \
+ }; \
+ STD_ENDPROC(_##x)
+
+/*
+ * Special-case sigreturn to not write r0 to the stack on return.
+ * This is technically more efficient, but it also avoids difficulties
+ * in the 64-bit OS when handling 32-bit compat code, since we must not
+ * sign-extend r0 for the sigreturn return-value case.
+ */
+#define PTREGS_SYSCALL_SIGRETURN(x, reg) \
+ STD_ENTRY(_##x); \
+ addli lr, lr, .Lsyscall_sigreturn_skip - .Lhandle_syscall_link; \
+ { \
+ PTREGS_PTR(reg, PTREGS_OFFSET_BASE); \
+ j x \
+ }; \
+ STD_ENDPROC(_##x)
+
+PTREGS_SYSCALL(sys_execve, r3)
+PTREGS_SYSCALL(sys_sigaltstack, r2)
+PTREGS_SYSCALL_SIGRETURN(sys_rt_sigreturn, r0)
+PTREGS_SYSCALL(sys_cmpxchg_badaddr, r1)
+
+/* Save additional callee-saves to pt_regs, put address in r4 and jump. */
+STD_ENTRY(_sys_clone)
+ push_extra_callee_saves r4
+ j sys_clone
+ STD_ENDPROC(_sys_clone)
+
+/*
+ * This entrypoint is taken for the cmpxchg and atomic_update fast
+ * swints. We may wish to generalize it to other fast swints at some
+ * point, but for now there are just two very similar ones, which
+ * makes it faster.
+ *
+ * The fast swint code is designed to have a small footprint. It does
+ * not save or restore any GPRs, counting on the caller-save registers
+ * to be available to it on entry. It does not modify any callee-save
+ * registers (including "lr"). It does not check what PL it is being
+ * called at, so you'd better not call it other than at PL0.
+ * The <atomic.h> wrapper assumes it only clobbers r20-r29, so if
+ * it ever is necessary to use more registers, be aware.
+ *
+ * It does not use the stack, but since it might be re-interrupted by
+ * a page fault which would assume the stack was valid, it does
+ * save/restore the stack pointer and zero it out to make sure it gets reset.
+ * Since we always keep interrupts disabled, the hypervisor won't
+ * clobber our EX_CONTEXT_K_x registers, so we don't save/restore them
+ * (other than to advance the PC on return).
+ *
+ * We have to manually validate the user vs kernel address range
+ * (since at PL1 we can read/write both), and for performance reasons
+ * we don't allow cmpxchg on the fc000000 memory region, since we only
+ * validate that the user address is below PAGE_OFFSET.
+ *
+ * We place it in the __HEAD section to ensure it is relatively
+ * near to the intvec_SWINT_1 code (reachable by a conditional branch).
+ *
+ * Our use of ATOMIC_LOCK_REG here must match do_page_fault_ics().
+ *
+ * As we do in lib/atomic_asm_32.S, we bypass a store if the value we
+ * would store is the same as the value we just loaded.
+ */
+ __HEAD
+ .align 64
+ /* Align much later jump on the start of a cache line. */
+#if !ATOMIC_LOCKS_FOUND_VIA_TABLE()
+ nop
+#if PAGE_SIZE >= 0x10000
+ nop
+#endif
+#endif
+ENTRY(sys_cmpxchg)
+
+ /*
+ * Save "sp" and set it zero for any possible page fault.
+ *
+ * HACK: We want to both zero sp and check r0's alignment,
+ * so we do both at once. If "sp" becomes nonzero we
+ * know r0 is unaligned and branch to the error handler that
+ * restores sp, so this is OK.
+ *
+ * ICS is disabled right now so having a garbage but nonzero
+ * sp is OK, since we won't execute any faulting instructions
+ * when it is nonzero.
+ */
+ {
+ move r27, sp
+ andi sp, r0, 3
+ }
+
+ /*
+ * Get the lock address in ATOMIC_LOCK_REG, and also validate that the
+ * address is less than PAGE_OFFSET, since that won't trap at PL1.
+ * We only use bits less than PAGE_SHIFT to avoid having to worry
+ * about aliasing among multiple mappings of the same physical page,
+ * and we ignore the low 3 bits so we have one lock that covers
+ * both a cmpxchg64() and a cmpxchg() on either its low or high word.
+ * NOTE: this must match __atomic_hashed_lock() in lib/atomic_32.c.
+ */
+
+#if (PAGE_OFFSET & 0xffff) != 0
+# error Code here assumes PAGE_OFFSET can be loaded with just hi16()
+#endif
+
+#if ATOMIC_LOCKS_FOUND_VIA_TABLE()
+ {
+ /* Check for unaligned input. */
+ bnz sp, .Lcmpxchg_badaddr
+ mm r25, r0, zero, 3, PAGE_SHIFT-1
+ }
+ {
+ crc32_32 r25, zero, r25
+ moveli r21, lo16(atomic_lock_ptr)
+ }
+ {
+ auli r21, r21, ha16(atomic_lock_ptr)
+ auli r23, zero, hi16(PAGE_OFFSET) /* hugepage-aligned */
+ }
+ {
+ shri r20, r25, 32 - ATOMIC_HASH_L1_SHIFT
+ slt_u r23, r0, r23
+ lw r26, r0 /* see comment in the "#else" for the "lw r26". */
+ }
+ {
+ s2a r21, r20, r21
+ bbns r23, .Lcmpxchg_badaddr
+ }
+ {
+ lw r21, r21
+ seqi r23, TREG_SYSCALL_NR_NAME, __NR_FAST_cmpxchg64
+ andi r25, r25, ATOMIC_HASH_L2_SIZE - 1
+ }
+ {
+ /* Branch away at this point if we're doing a 64-bit cmpxchg. */
+ bbs r23, .Lcmpxchg64
+ andi r23, r0, 7 /* Precompute alignment for cmpxchg64. */
+ }
+ {
+ s2a ATOMIC_LOCK_REG_NAME, r25, r21
+ j .Lcmpxchg32_tns /* see comment in the #else for the jump. */
+ }
+
+#else /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */
+ {
+ /* Check for unaligned input. */
+ bnz sp, .Lcmpxchg_badaddr
+ auli r23, zero, hi16(PAGE_OFFSET) /* hugepage-aligned */
+ }
+ {
+ /*
+ * Slide bits into position for 'mm'. We want to ignore
+ * the low 3 bits of r0, and consider only the next
+ * ATOMIC_HASH_SHIFT bits.
+ * Because of C pointer arithmetic, we want to compute this:
+ *
+ * ((char*)atomic_locks +
+ * (((r0 >> 3) & (1 << (ATOMIC_HASH_SIZE - 1))) << 2))
+ *
+ * Instead of two shifts we just ">> 1", and use 'mm'
+ * to ignore the low and high bits we don't want.
+ */
+ shri r25, r0, 1
+
+ slt_u r23, r0, r23
+
+ /*
+ * Ensure that the TLB is loaded before we take out the lock.
+ * On tilepro, this will start fetching the value all the way
+ * into our L1 as well (and if it gets modified before we
+ * grab the lock, it will be invalidated from our cache
+ * before we reload it). On tile64, we'll start fetching it
+ * into our L1 if we're the home, and if we're not, we'll
+ * still at least start fetching it into the home's L2.
+ */
+ lw r26, r0
+ }
+ {
+ auli r21, zero, ha16(atomic_locks)
+
+ bbns r23, .Lcmpxchg_badaddr
+ }
+#if PAGE_SIZE < 0x10000
+ /* atomic_locks is page-aligned so for big pages we don't need this. */
+ addli r21, r21, lo16(atomic_locks)
+#endif
+ {
+ /*
+ * Insert the hash bits into the page-aligned pointer.
+ * ATOMIC_HASH_SHIFT is so big that we don't actually hash
+ * the unmasked address bits, as that may cause unnecessary
+ * collisions.
+ */
+ mm ATOMIC_LOCK_REG_NAME, r25, r21, 2, (ATOMIC_HASH_SHIFT + 2) - 1
+
+ seqi r23, TREG_SYSCALL_NR_NAME, __NR_FAST_cmpxchg64
+ }
+ {
+ /* Branch away at this point if we're doing a 64-bit cmpxchg. */
+ bbs r23, .Lcmpxchg64
+ andi r23, r0, 7 /* Precompute alignment for cmpxchg64. */
+ }
+ {
+ /*
+ * We very carefully align the code that actually runs with
+ * the lock held (twelve bundles) so that we know it is all in
+ * the icache when we start. This instruction (the jump) is
+ * at the start of the first cache line, address zero mod 64;
+ * we jump to the very end of the second cache line to get that
+ * line loaded in the icache, then fall through to issue the tns
+ * in the third cache line, at which point it's all cached.
+ * Note that is for performance, not correctness.
+ */
+ j .Lcmpxchg32_tns
+ }
+
+#endif /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */
+
+/* Symbol for do_page_fault_ics() to use to compare against the PC. */
+.global __sys_cmpxchg_grab_lock
+__sys_cmpxchg_grab_lock:
+
+ /*
+ * Perform the actual cmpxchg or atomic_update.
+ */
+.Ldo_cmpxchg32:
+ {
+ lw r21, r0
+ seqi r23, TREG_SYSCALL_NR_NAME, __NR_FAST_atomic_update
+ move r24, r2
+ }
+ {
+ seq r22, r21, r1 /* See if cmpxchg matches. */
+ and r25, r21, r1 /* If atomic_update, compute (*mem & mask) */
+ }
+ {
+ or r22, r22, r23 /* Skip compare branch for atomic_update. */
+ add r25, r25, r2 /* Compute (*mem & mask) + addend. */
+ }
+ {
+ mvnz r24, r23, r25 /* Use atomic_update value if appropriate. */
+ bbns r22, .Lcmpxchg32_nostore
+ }
+ seq r22, r24, r21 /* Are we storing the value we loaded? */
+ bbs r22, .Lcmpxchg32_nostore
+ sw r0, r24
+
+ /* The following instruction is the start of the second cache line. */
+ /* Do slow mtspr here so the following "mf" waits less. */
+ {
+ move sp, r27
+ mtspr SPR_EX_CONTEXT_K_0, r28
+ }
+ mf
+
+ {
+ move r0, r21
+ sw ATOMIC_LOCK_REG_NAME, zero
+ }
+ iret
+
+ /* Duplicated code here in the case where we don't overlap "mf" */
+.Lcmpxchg32_nostore:
+ {
+ move r0, r21
+ sw ATOMIC_LOCK_REG_NAME, zero
+ }
+ {
+ move sp, r27
+ mtspr SPR_EX_CONTEXT_K_0, r28
+ }
+ iret
+
+ /*
+ * The locking code is the same for 32-bit cmpxchg/atomic_update,
+ * and for 64-bit cmpxchg. We provide it as a macro and put
+ * it into both versions. We can't share the code literally
+ * since it depends on having the right branch-back address.
+ */
+ .macro cmpxchg_lock, bitwidth
+
+ /* Lock; if we succeed, jump back up to the read-modify-write. */
+#ifdef CONFIG_SMP
+ tns r21, ATOMIC_LOCK_REG_NAME
+#else
+ /*
+ * Non-SMP preserves all the lock infrastructure, to keep the
+ * code simpler for the interesting (SMP) case. However, we do
+ * one small optimization here and in atomic_asm.S, which is
+ * to fake out acquiring the actual lock in the atomic_lock table.
+ */
+ movei r21, 0
+#endif
+
+ /* Issue the slow SPR here while the tns result is in flight. */
+ mfspr r28, SPR_EX_CONTEXT_K_0
+
+ {
+ addi r28, r28, 8 /* return to the instruction after the swint1 */
+ bzt r21, .Ldo_cmpxchg\bitwidth
+ }
+ /*
+ * The preceding instruction is the last thing that must be
+ * hot in the icache before we do the "tns" above.
+ */
+
+#ifdef CONFIG_SMP
+ /*
+ * We failed to acquire the tns lock on our first try. Now use
+ * bounded exponential backoff to retry, like __atomic_spinlock().
+ */
+ {
+ moveli r23, 2048 /* maximum backoff time in cycles */
+ moveli r25, 32 /* starting backoff time in cycles */
+ }
+1: mfspr r26, CYCLE_LOW /* get start point for this backoff */
+2: mfspr r22, CYCLE_LOW /* test to see if we've backed off enough */
+ sub r22, r22, r26
+ slt r22, r22, r25
+ bbst r22, 2b
+ {
+ shli r25, r25, 1 /* double the backoff; retry the tns */
+ tns r21, ATOMIC_LOCK_REG_NAME
+ }
+ slt r26, r23, r25 /* is the proposed backoff too big? */
+ {
+ mvnz r25, r26, r23
+ bzt r21, .Ldo_cmpxchg\bitwidth
+ }
+ j 1b
+#endif /* CONFIG_SMP */
+ .endm
+
+.Lcmpxchg32_tns:
+ /*
+ * This is the last instruction on the second cache line.
+ * The nop here loads the second line, then we fall through
+ * to the tns to load the third line before we take the lock.
+ */
+ nop
+ cmpxchg_lock 32
+
+ /*
+ * This code is invoked from sys_cmpxchg after most of the
+ * preconditions have been checked. We still need to check
+ * that r0 is 8-byte aligned, since if it's not we won't
+ * actually be atomic. However, ATOMIC_LOCK_REG has the atomic
+ * lock pointer and r27/r28 have the saved SP/PC.
+ * r23 is holding "r0 & 7" so we can test for alignment.
+ * The compare value is in r2/r3; the new value is in r4/r5.
+ * On return, we must put the old value in r0/r1.
+ */
+ .align 64
+.Lcmpxchg64:
+ {
+#if ATOMIC_LOCKS_FOUND_VIA_TABLE()
+ s2a ATOMIC_LOCK_REG_NAME, r25, r21
+#endif
+ bzt r23, .Lcmpxchg64_tns
+ }
+ j .Lcmpxchg_badaddr
+
+.Ldo_cmpxchg64:
+ {
+ lw r21, r0
+ addi r25, r0, 4
+ }
+ {
+ lw r1, r25
+ }
+ seq r26, r21, r2
+ {
+ bz r26, .Lcmpxchg64_mismatch
+ seq r26, r1, r3
+ }
+ {
+ bz r26, .Lcmpxchg64_mismatch
+ }
+ sw r0, r4
+ sw r25, r5
+
+ /*
+ * The 32-bit path provides optimized "match" and "mismatch"
+ * iret paths, but we don't have enough bundles in this cache line
+ * to do that, so we just make even the "mismatch" path do an "mf".
+ */
+.Lcmpxchg64_mismatch:
+ {
+ move sp, r27
+ mtspr SPR_EX_CONTEXT_K_0, r28
+ }
+ mf
+ {
+ move r0, r21
+ sw ATOMIC_LOCK_REG_NAME, zero
+ }
+ iret
+
+.Lcmpxchg64_tns:
+ cmpxchg_lock 64
+
+
+ /*
+ * Reset sp and revector to sys_cmpxchg_badaddr(), which will
+ * just raise the appropriate signal and exit. Doing it this
+ * way means we don't have to duplicate the code in intvec.S's
+ * int_hand macro that locates the top of the stack.
+ */
+.Lcmpxchg_badaddr:
+ {
+ moveli TREG_SYSCALL_NR_NAME, __NR_cmpxchg_badaddr
+ move sp, r27
+ }
+ j intvec_SWINT_1
+ ENDPROC(sys_cmpxchg)
+ ENTRY(__sys_cmpxchg_end)
+
+
+/* The single-step support may need to read all the registers. */
+int_unalign:
+ push_extra_callee_saves r0
+ j do_trap
+
+/* Include .intrpt1 array of interrupt vectors */
+ .section ".intrpt1", "ax"
+
+#define op_handle_perf_interrupt bad_intr
+#define op_handle_aux_perf_interrupt bad_intr
+
+#ifndef CONFIG_HARDWALL
+#define do_hardwall_trap bad_intr
+#endif
+
+ int_hand INT_ITLB_MISS, ITLB_MISS, \
+ do_page_fault, handle_interrupt_no_single_step
+ int_hand INT_MEM_ERROR, MEM_ERROR, bad_intr
+ int_hand INT_ILL, ILL, do_trap, handle_ill
+ int_hand INT_GPV, GPV, do_trap
+ int_hand INT_SN_ACCESS, SN_ACCESS, do_trap
+ int_hand INT_IDN_ACCESS, IDN_ACCESS, do_trap
+ int_hand INT_UDN_ACCESS, UDN_ACCESS, do_trap
+ int_hand INT_IDN_REFILL, IDN_REFILL, bad_intr
+ int_hand INT_UDN_REFILL, UDN_REFILL, bad_intr
+ int_hand INT_IDN_COMPLETE, IDN_COMPLETE, bad_intr
+ int_hand INT_UDN_COMPLETE, UDN_COMPLETE, bad_intr
+ int_hand INT_SWINT_3, SWINT_3, do_trap
+ int_hand INT_SWINT_2, SWINT_2, do_trap
+ int_hand INT_SWINT_1, SWINT_1, SYSCALL, handle_syscall
+ int_hand INT_SWINT_0, SWINT_0, do_trap
+ int_hand INT_UNALIGN_DATA, UNALIGN_DATA, int_unalign
+ int_hand INT_DTLB_MISS, DTLB_MISS, do_page_fault
+ int_hand INT_DTLB_ACCESS, DTLB_ACCESS, do_page_fault
+ int_hand INT_DMATLB_MISS, DMATLB_MISS, do_page_fault
+ int_hand INT_DMATLB_ACCESS, DMATLB_ACCESS, do_page_fault
+ int_hand INT_SNITLB_MISS, SNITLB_MISS, do_page_fault
+ int_hand INT_SN_NOTIFY, SN_NOTIFY, bad_intr
+ int_hand INT_SN_FIREWALL, SN_FIREWALL, do_hardwall_trap
+ int_hand INT_IDN_FIREWALL, IDN_FIREWALL, bad_intr
+ int_hand INT_UDN_FIREWALL, UDN_FIREWALL, do_hardwall_trap
+ int_hand INT_TILE_TIMER, TILE_TIMER, do_timer_interrupt
+ int_hand INT_IDN_TIMER, IDN_TIMER, bad_intr
+ int_hand INT_UDN_TIMER, UDN_TIMER, bad_intr
+ int_hand INT_DMA_NOTIFY, DMA_NOTIFY, bad_intr
+ int_hand INT_IDN_CA, IDN_CA, bad_intr
+ int_hand INT_UDN_CA, UDN_CA, bad_intr
+ int_hand INT_IDN_AVAIL, IDN_AVAIL, bad_intr
+ int_hand INT_UDN_AVAIL, UDN_AVAIL, bad_intr
+ int_hand INT_PERF_COUNT, PERF_COUNT, \
+ op_handle_perf_interrupt, handle_nmi
+ int_hand INT_INTCTRL_3, INTCTRL_3, bad_intr
+#if CONFIG_KERNEL_PL == 2
+ dc_dispatch INT_INTCTRL_2, INTCTRL_2
+ int_hand INT_INTCTRL_1, INTCTRL_1, bad_intr
+#else
+ int_hand INT_INTCTRL_2, INTCTRL_2, bad_intr
+ dc_dispatch INT_INTCTRL_1, INTCTRL_1
+#endif
+ int_hand INT_INTCTRL_0, INTCTRL_0, bad_intr
+ int_hand INT_MESSAGE_RCV_DWNCL, MESSAGE_RCV_DWNCL, \
+ hv_message_intr
+ int_hand INT_DEV_INTR_DWNCL, DEV_INTR_DWNCL, \
+ tile_dev_intr
+ int_hand INT_I_ASID, I_ASID, bad_intr
+ int_hand INT_D_ASID, D_ASID, bad_intr
+ int_hand INT_DMATLB_MISS_DWNCL, DMATLB_MISS_DWNCL, \
+ do_page_fault
+ int_hand INT_SNITLB_MISS_DWNCL, SNITLB_MISS_DWNCL, \
+ do_page_fault
+ int_hand INT_DMATLB_ACCESS_DWNCL, DMATLB_ACCESS_DWNCL, \
+ do_page_fault
+ int_hand INT_SN_CPL, SN_CPL, bad_intr
+ int_hand INT_DOUBLE_FAULT, DOUBLE_FAULT, do_trap
+#if CHIP_HAS_AUX_PERF_COUNTERS()
+ int_hand INT_AUX_PERF_COUNT, AUX_PERF_COUNT, \
+ op_handle_aux_perf_interrupt, handle_nmi
+#endif
+
+ /* Synthetic interrupt delivered only by the simulator */
+ int_hand INT_BREAKPOINT, BREAKPOINT, do_breakpoint
diff --git a/arch/tile/kernel/intvec_64.S b/arch/tile/kernel/intvec_64.S
new file mode 100644
index 00000000..30ae76e5
--- /dev/null
+++ b/arch/tile/kernel/intvec_64.S
@@ -0,0 +1,1289 @@
+/*
+ * Copyright 2011 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ *
+ * Linux interrupt vectors.
+ */
+
+#include <linux/linkage.h>
+#include <linux/errno.h>
+#include <linux/unistd.h>
+#include <asm/ptrace.h>
+#include <asm/thread_info.h>
+#include <asm/irqflags.h>
+#include <asm/asm-offsets.h>
+#include <asm/types.h>
+#include <asm/signal.h>
+#include <hv/hypervisor.h>
+#include <arch/abi.h>
+#include <arch/interrupts.h>
+#include <arch/spr_def.h>
+
+#ifdef CONFIG_PREEMPT
+# error "No support for kernel preemption currently"
+#endif
+
+#define PTREGS_PTR(reg, ptreg) addli reg, sp, C_ABI_SAVE_AREA_SIZE + (ptreg)
+
+#define PTREGS_OFFSET_SYSCALL PTREGS_OFFSET_REG(TREG_SYSCALL_NR)
+
+
+ .macro push_reg reg, ptr=sp, delta=-8
+ {
+ st \ptr, \reg
+ addli \ptr, \ptr, \delta
+ }
+ .endm
+
+ .macro pop_reg reg, ptr=sp, delta=8
+ {
+ ld \reg, \ptr
+ addli \ptr, \ptr, \delta
+ }
+ .endm
+
+ .macro pop_reg_zero reg, zreg, ptr=sp, delta=8
+ {
+ move \zreg, zero
+ ld \reg, \ptr
+ addi \ptr, \ptr, \delta
+ }
+ .endm
+
+ .macro push_extra_callee_saves reg
+ PTREGS_PTR(\reg, PTREGS_OFFSET_REG(51))
+ push_reg r51, \reg
+ push_reg r50, \reg
+ push_reg r49, \reg
+ push_reg r48, \reg
+ push_reg r47, \reg
+ push_reg r46, \reg
+ push_reg r45, \reg
+ push_reg r44, \reg
+ push_reg r43, \reg
+ push_reg r42, \reg
+ push_reg r41, \reg
+ push_reg r40, \reg
+ push_reg r39, \reg
+ push_reg r38, \reg
+ push_reg r37, \reg
+ push_reg r36, \reg
+ push_reg r35, \reg
+ push_reg r34, \reg, PTREGS_OFFSET_BASE - PTREGS_OFFSET_REG(34)
+ .endm
+
+ .macro panic str
+ .pushsection .rodata, "a"
+1:
+ .asciz "\str"
+ .popsection
+ {
+ moveli r0, hw2_last(1b)
+ }
+ {
+ shl16insli r0, r0, hw1(1b)
+ }
+ {
+ shl16insli r0, r0, hw0(1b)
+ jal panic
+ }
+ .endm
+
+
+#ifdef __COLLECT_LINKER_FEEDBACK__
+ .pushsection .text.intvec_feedback,"ax"
+intvec_feedback:
+ .popsection
+#endif
+
+ /*
+ * Default interrupt handler.
+ *
+ * vecnum is where we'll put this code.
+ * c_routine is the C routine we'll call.
+ *
+ * The C routine is passed two arguments:
+ * - A pointer to the pt_regs state.
+ * - The interrupt vector number.
+ *
+ * The "processing" argument specifies the code for processing
+ * the interrupt. Defaults to "handle_interrupt".
+ */
+ .macro int_hand vecnum, vecname, c_routine, processing=handle_interrupt
+ .org (\vecnum << 8)
+intvec_\vecname:
+ /* Temporarily save a register so we have somewhere to work. */
+
+ mtspr SPR_SYSTEM_SAVE_K_1, r0
+ mfspr r0, SPR_EX_CONTEXT_K_1
+
+ andi r0, r0, SPR_EX_CONTEXT_1_1__PL_MASK /* mask off ICS */
+
+ .ifc \vecnum, INT_DOUBLE_FAULT
+ /*
+ * For double-faults from user-space, fall through to the normal
+ * register save and stack setup path. Otherwise, it's the
+ * hypervisor giving us one last chance to dump diagnostics, and we
+ * branch to the kernel_double_fault routine to do so.
+ */
+ beqz r0, 1f
+ j _kernel_double_fault
+1:
+ .else
+ /*
+ * If we're coming from user-space, then set sp to the top of
+ * the kernel stack. Otherwise, assume sp is already valid.
+ */
+ {
+ bnez r0, 0f
+ move r0, sp
+ }
+ .endif
+
+ .ifc \c_routine, do_page_fault
+ /*
+ * The page_fault handler may be downcalled directly by the
+ * hypervisor even when Linux is running and has ICS set.
+ *
+ * In this case the contents of EX_CONTEXT_K_1 reflect the
+ * previous fault and can't be relied on to choose whether or
+ * not to reinitialize the stack pointer. So we add a test
+ * to see whether SYSTEM_SAVE_K_2 has the high bit set,
+ * and if so we don't reinitialize sp, since we must be coming
+ * from Linux. (In fact the precise case is !(val & ~1),
+ * but any Linux PC has to have the high bit set.)
+ *
+ * Note that the hypervisor *always* sets SYSTEM_SAVE_K_2 for
+ * any path that turns into a downcall to one of our TLB handlers.
+ *
+ * FIXME: if we end up never using this path, perhaps we should
+ * prevent the hypervisor from generating downcalls in this case.
+ * The advantage of getting a downcall is we can panic in Linux.
+ */
+ mfspr r0, SPR_SYSTEM_SAVE_K_2
+ {
+ bltz r0, 0f /* high bit in S_S_1_2 is for a PC to use */
+ move r0, sp
+ }
+ .endif
+
+
+ /*
+ * SYSTEM_SAVE_K_0 holds the cpu number in the low bits, and
+ * the current stack top in the higher bits. So we recover
+ * our stack top by just masking off the low bits, then
+ * point sp at the top aligned address on the actual stack page.
+ */
+ mfspr r0, SPR_SYSTEM_SAVE_K_0
+ mm r0, zero, LOG2_THREAD_SIZE, 63
+
+0:
+ /*
+ * Align the stack mod 64 so we can properly predict what
+ * cache lines we need to write-hint to reduce memory fetch
+ * latency as we enter the kernel. The layout of memory is
+ * as follows, with cache line 0 at the lowest VA, and cache
+ * line 8 just below the r0 value this "andi" computes.
+ * Note that we never write to cache line 8, and we skip
+ * cache lines 1-3 for syscalls.
+ *
+ * cache line 8: ptregs padding (two words)
+ * cache line 7: sp, lr, pc, ex1, faultnum, orig_r0, flags, cmpexch
+ * cache line 6: r46...r53 (tp)
+ * cache line 5: r38...r45
+ * cache line 4: r30...r37
+ * cache line 3: r22...r29
+ * cache line 2: r14...r21
+ * cache line 1: r6...r13
+ * cache line 0: 2 x frame, r0..r5
+ */
+ andi r0, r0, -64
+
+ /*
+ * Push the first four registers on the stack, so that we can set
+ * them to vector-unique values before we jump to the common code.
+ *
+ * Registers are pushed on the stack as a struct pt_regs,
+ * with the sp initially just above the struct, and when we're
+ * done, sp points to the base of the struct, minus
+ * C_ABI_SAVE_AREA_SIZE, so we can directly jal to C code.
+ *
+ * This routine saves just the first four registers, plus the
+ * stack context so we can do proper backtracing right away,
+ * and defers to handle_interrupt to save the rest.
+ * The backtracer needs pc, ex1, lr, sp, r52, and faultnum.
+ */
+ addli r0, r0, PTREGS_OFFSET_LR - (PTREGS_SIZE + KSTK_PTREGS_GAP)
+ wh64 r0 /* cache line 7 */
+ {
+ st r0, lr
+ addli r0, r0, PTREGS_OFFSET_SP - PTREGS_OFFSET_LR
+ }
+ {
+ st r0, sp
+ addli sp, r0, PTREGS_OFFSET_REG(52) - PTREGS_OFFSET_SP
+ }
+ wh64 sp /* cache line 6 */
+ {
+ st sp, r52
+ addli sp, sp, PTREGS_OFFSET_REG(1) - PTREGS_OFFSET_REG(52)
+ }
+ wh64 sp /* cache line 0 */
+ {
+ st sp, r1
+ addli sp, sp, PTREGS_OFFSET_REG(2) - PTREGS_OFFSET_REG(1)
+ }
+ {
+ st sp, r2
+ addli sp, sp, PTREGS_OFFSET_REG(3) - PTREGS_OFFSET_REG(2)
+ }
+ {
+ st sp, r3
+ addli sp, sp, PTREGS_OFFSET_PC - PTREGS_OFFSET_REG(3)
+ }
+ mfspr r0, SPR_EX_CONTEXT_K_0
+ .ifc \processing,handle_syscall
+ /*
+ * Bump the saved PC by one bundle so that when we return, we won't
+ * execute the same swint instruction again. We need to do this while
+ * we're in the critical section.
+ */
+ addi r0, r0, 8
+ .endif
+ {
+ st sp, r0
+ addli sp, sp, PTREGS_OFFSET_EX1 - PTREGS_OFFSET_PC
+ }
+ mfspr r0, SPR_EX_CONTEXT_K_1
+ {
+ st sp, r0
+ addi sp, sp, PTREGS_OFFSET_FAULTNUM - PTREGS_OFFSET_EX1
+ /*
+ * Use r0 for syscalls so it's a temporary; use r1 for interrupts
+ * so that it gets passed through unchanged to the handler routine.
+ * Note that the .if conditional confusingly spans bundles.
+ */
+ .ifc \processing,handle_syscall
+ movei r0, \vecnum
+ }
+ {
+ st sp, r0
+ .else
+ movei r1, \vecnum
+ }
+ {
+ st sp, r1
+ .endif
+ addli sp, sp, PTREGS_OFFSET_REG(0) - PTREGS_OFFSET_FAULTNUM
+ }
+ mfspr r0, SPR_SYSTEM_SAVE_K_1 /* Original r0 */
+ {
+ st sp, r0
+ addi sp, sp, -PTREGS_OFFSET_REG(0) - 8
+ }
+ {
+ st sp, zero /* write zero into "Next SP" frame pointer */
+ addi sp, sp, -8 /* leave SP pointing at bottom of frame */
+ }
+ .ifc \processing,handle_syscall
+ j handle_syscall
+ .else
+ /* Capture per-interrupt SPR context to registers. */
+ .ifc \c_routine, do_page_fault
+ mfspr r2, SPR_SYSTEM_SAVE_K_3 /* address of page fault */
+ mfspr r3, SPR_SYSTEM_SAVE_K_2 /* info about page fault */
+ .else
+ .ifc \vecnum, INT_ILL_TRANS
+ mfspr r2, ILL_TRANS_REASON
+ .else
+ .ifc \vecnum, INT_DOUBLE_FAULT
+ mfspr r2, SPR_SYSTEM_SAVE_K_2 /* double fault info from HV */
+ .else
+ .ifc \c_routine, do_trap
+ mfspr r2, GPV_REASON
+ .else
+ .ifc \c_routine, op_handle_perf_interrupt
+ mfspr r2, PERF_COUNT_STS
+#if CHIP_HAS_AUX_PERF_COUNTERS()
+ .else
+ .ifc \c_routine, op_handle_aux_perf_interrupt
+ mfspr r2, AUX_PERF_COUNT_STS
+ .endif
+#endif
+ .endif
+ .endif
+ .endif
+ .endif
+ .endif
+ /* Put function pointer in r0 */
+ moveli r0, hw2_last(\c_routine)
+ shl16insli r0, r0, hw1(\c_routine)
+ {
+ shl16insli r0, r0, hw0(\c_routine)
+ j \processing
+ }
+ .endif
+ ENDPROC(intvec_\vecname)
+
+#ifdef __COLLECT_LINKER_FEEDBACK__
+ .pushsection .text.intvec_feedback,"ax"
+ .org (\vecnum << 5)
+ FEEDBACK_ENTER_EXPLICIT(intvec_\vecname, .intrpt1, 1 << 8)
+ jrp lr
+ .popsection
+#endif
+
+ .endm
+
+
+ /*
+ * Save the rest of the registers that we didn't save in the actual
+ * vector itself. We can't use r0-r10 inclusive here.
+ */
+ .macro finish_interrupt_save, function
+
+ /* If it's a syscall, save a proper orig_r0, otherwise just zero. */
+ PTREGS_PTR(r52, PTREGS_OFFSET_ORIG_R0)
+ {
+ .ifc \function,handle_syscall
+ st r52, r0
+ .else
+ st r52, zero
+ .endif
+ PTREGS_PTR(r52, PTREGS_OFFSET_TP)
+ }
+ st r52, tp
+ {
+ mfspr tp, CMPEXCH_VALUE
+ PTREGS_PTR(r52, PTREGS_OFFSET_CMPEXCH)
+ }
+
+ /*
+ * For ordinary syscalls, we save neither caller- nor callee-
+ * save registers, since the syscall invoker doesn't expect the
+ * caller-saves to be saved, and the called kernel functions will
+ * take care of saving the callee-saves for us.
+ *
+ * For interrupts we save just the caller-save registers. Saving
+ * them is required (since the "caller" can't save them). Again,
+ * the called kernel functions will restore the callee-save
+ * registers for us appropriately.
+ *
+ * On return, we normally restore nothing special for syscalls,
+ * and just the caller-save registers for interrupts.
+ *
+ * However, there are some important caveats to all this:
+ *
+ * - We always save a few callee-save registers to give us
+ * some scratchpad registers to carry across function calls.
+ *
+ * - fork/vfork/etc require us to save all the callee-save
+ * registers, which we do in PTREGS_SYSCALL_ALL_REGS, below.
+ *
+ * - We always save r0..r5 and r10 for syscalls, since we need
+ * to reload them a bit later for the actual kernel call, and
+ * since we might need them for -ERESTARTNOINTR, etc.
+ *
+ * - Before invoking a signal handler, we save the unsaved
+ * callee-save registers so they are visible to the
+ * signal handler or any ptracer.
+ *
+ * - If the unsaved callee-save registers are modified, we set
+ * a bit in pt_regs so we know to reload them from pt_regs
+ * and not just rely on the kernel function unwinding.
+ * (Done for ptrace register writes and SA_SIGINFO handler.)
+ */
+ {
+ st r52, tp
+ PTREGS_PTR(r52, PTREGS_OFFSET_REG(33))
+ }
+ wh64 r52 /* cache line 4 */
+ push_reg r33, r52
+ push_reg r32, r52
+ push_reg r31, r52
+ .ifc \function,handle_syscall
+ push_reg r30, r52, PTREGS_OFFSET_SYSCALL - PTREGS_OFFSET_REG(30)
+ push_reg TREG_SYSCALL_NR_NAME, r52, \
+ PTREGS_OFFSET_REG(5) - PTREGS_OFFSET_SYSCALL
+ .else
+
+ push_reg r30, r52, PTREGS_OFFSET_REG(29) - PTREGS_OFFSET_REG(30)
+ wh64 r52 /* cache line 3 */
+ push_reg r29, r52
+ push_reg r28, r52
+ push_reg r27, r52
+ push_reg r26, r52
+ push_reg r25, r52
+ push_reg r24, r52
+ push_reg r23, r52
+ push_reg r22, r52
+ wh64 r52 /* cache line 2 */
+ push_reg r21, r52
+ push_reg r20, r52
+ push_reg r19, r52
+ push_reg r18, r52
+ push_reg r17, r52
+ push_reg r16, r52
+ push_reg r15, r52
+ push_reg r14, r52
+ wh64 r52 /* cache line 1 */
+ push_reg r13, r52
+ push_reg r12, r52
+ push_reg r11, r52
+ push_reg r10, r52
+ push_reg r9, r52
+ push_reg r8, r52
+ push_reg r7, r52
+ push_reg r6, r52
+
+ .endif
+
+ push_reg r5, r52
+ st r52, r4
+
+ /* Load tp with our per-cpu offset. */
+#ifdef CONFIG_SMP
+ {
+ mfspr r20, SPR_SYSTEM_SAVE_K_0
+ moveli r21, hw2_last(__per_cpu_offset)
+ }
+ {
+ shl16insli r21, r21, hw1(__per_cpu_offset)
+ bfextu r20, r20, 0, LOG2_THREAD_SIZE-1
+ }
+ shl16insli r21, r21, hw0(__per_cpu_offset)
+ shl3add r20, r20, r21
+ ld tp, r20
+#else
+ move tp, zero
+#endif
+
+ /*
+ * If we will be returning to the kernel, we will need to
+ * reset the interrupt masks to the state they had before.
+ * Set DISABLE_IRQ in flags iff we came from PL1 with irqs disabled.
+ */
+ mfspr r32, SPR_EX_CONTEXT_K_1
+ {
+ andi r32, r32, SPR_EX_CONTEXT_1_1__PL_MASK /* mask off ICS */
+ PTREGS_PTR(r21, PTREGS_OFFSET_FLAGS)
+ }
+ beqzt r32, 1f /* zero if from user space */
+ IRQS_DISABLED(r32) /* zero if irqs enabled */
+#if PT_FLAGS_DISABLE_IRQ != 1
+# error Value of IRQS_DISABLED used to set PT_FLAGS_DISABLE_IRQ; fix
+#endif
+1:
+ .ifnc \function,handle_syscall
+ /* Record the fact that we saved the caller-save registers above. */
+ ori r32, r32, PT_FLAGS_CALLER_SAVES
+ .endif
+ st r21, r32
+
+#ifdef __COLLECT_LINKER_FEEDBACK__
+ /*
+ * Notify the feedback routines that we were in the
+ * appropriate fixed interrupt vector area. Note that we
+ * still have ICS set at this point, so we can't invoke any
+ * atomic operations or we will panic. The feedback
+ * routines internally preserve r0..r10 and r30 up.
+ */
+ .ifnc \function,handle_syscall
+ shli r20, r1, 5
+ .else
+ moveli r20, INT_SWINT_1 << 5
+ .endif
+ moveli r21, hw2_last(intvec_feedback)
+ shl16insli r21, r21, hw1(intvec_feedback)
+ shl16insli r21, r21, hw0(intvec_feedback)
+ add r20, r20, r21
+ jalr r20
+
+ /* And now notify the feedback routines that we are here. */
+ FEEDBACK_ENTER(\function)
+#endif
+
+ /*
+ * we've captured enough state to the stack (including in
+ * particular our EX_CONTEXT state) that we can now release
+ * the interrupt critical section and replace it with our
+ * standard "interrupts disabled" mask value. This allows
+ * synchronous interrupts (and profile interrupts) to punch
+ * through from this point onwards.
+ */
+ .ifc \function,handle_nmi
+ IRQ_DISABLE_ALL(r20)
+ .else
+ IRQ_DISABLE(r20, r21)
+ .endif
+ mtspr INTERRUPT_CRITICAL_SECTION, zero
+
+ /*
+ * Prepare the first 256 stack bytes to be rapidly accessible
+ * without having to fetch the background data.
+ */
+ addi r52, sp, -64
+ {
+ wh64 r52
+ addi r52, r52, -64
+ }
+ {
+ wh64 r52
+ addi r52, r52, -64
+ }
+ {
+ wh64 r52
+ addi r52, r52, -64
+ }
+ wh64 r52
+
+#ifdef CONFIG_TRACE_IRQFLAGS
+ .ifnc \function,handle_nmi
+ /*
+ * We finally have enough state set up to notify the irq
+ * tracing code that irqs were disabled on entry to the handler.
+ * The TRACE_IRQS_OFF call clobbers registers r0-r29.
+ * For syscalls, we already have the register state saved away
+ * on the stack, so we don't bother to do any register saves here,
+ * and later we pop the registers back off the kernel stack.
+ * For interrupt handlers, save r0-r3 in callee-saved registers.
+ */
+ .ifnc \function,handle_syscall
+ { move r30, r0; move r31, r1 }
+ { move r32, r2; move r33, r3 }
+ .endif
+ TRACE_IRQS_OFF
+ .ifnc \function,handle_syscall
+ { move r0, r30; move r1, r31 }
+ { move r2, r32; move r3, r33 }
+ .endif
+ .endif
+#endif
+
+ .endm
+
+ /*
+ * Redispatch a downcall.
+ */
+ .macro dc_dispatch vecnum, vecname
+ .org (\vecnum << 8)
+intvec_\vecname:
+ j hv_downcall_dispatch
+ ENDPROC(intvec_\vecname)
+ .endm
+
+ /*
+ * Common code for most interrupts. The C function we're eventually
+ * going to is in r0, and the faultnum is in r1; the original
+ * values for those registers are on the stack.
+ */
+ .pushsection .text.handle_interrupt,"ax"
+handle_interrupt:
+ finish_interrupt_save handle_interrupt
+
+ /* Jump to the C routine; it should enable irqs as soon as possible. */
+ {
+ jalr r0
+ PTREGS_PTR(r0, PTREGS_OFFSET_BASE)
+ }
+ FEEDBACK_REENTER(handle_interrupt)
+ {
+ movei r30, 0 /* not an NMI */
+ j interrupt_return
+ }
+ STD_ENDPROC(handle_interrupt)
+
+/*
+ * This routine takes a boolean in r30 indicating if this is an NMI.
+ * If so, we also expect a boolean in r31 indicating whether to
+ * re-enable the oprofile interrupts.
+ *
+ * Note that .Lresume_userspace is jumped to directly in several
+ * places, and we need to make sure r30 is set correctly in those
+ * callers as well.
+ */
+STD_ENTRY(interrupt_return)
+ /* If we're resuming to kernel space, don't check thread flags. */
+ {
+ bnez r30, .Lrestore_all /* NMIs don't special-case user-space */
+ PTREGS_PTR(r29, PTREGS_OFFSET_EX1)
+ }
+ ld r29, r29
+ andi r29, r29, SPR_EX_CONTEXT_1_1__PL_MASK /* mask off ICS */
+ {
+ beqzt r29, .Lresume_userspace
+ PTREGS_PTR(r29, PTREGS_OFFSET_PC)
+ }
+
+ /* If we're resuming to _cpu_idle_nap, bump PC forward by 8. */
+ moveli r27, hw2_last(_cpu_idle_nap)
+ {
+ ld r28, r29
+ shl16insli r27, r27, hw1(_cpu_idle_nap)
+ }
+ {
+ shl16insli r27, r27, hw0(_cpu_idle_nap)
+ }
+ {
+ cmpeq r27, r27, r28
+ }
+ {
+ blbc r27, .Lrestore_all
+ addi r28, r28, 8
+ }
+ st r29, r28
+ j .Lrestore_all
+
+.Lresume_userspace:
+ FEEDBACK_REENTER(interrupt_return)
+
+ /*
+ * Use r33 to hold whether we have already loaded the callee-saves
+ * into ptregs. We don't want to do it twice in this loop, since
+ * then we'd clobber whatever changes are made by ptrace, etc.
+ */
+ {
+ movei r33, 0
+ move r32, sp
+ }
+
+ /* Get base of stack in r32. */
+ EXTRACT_THREAD_INFO(r32)
+
+.Lretry_work_pending:
+ /*
+ * Disable interrupts so as to make sure we don't
+ * miss an interrupt that sets any of the thread flags (like
+ * need_resched or sigpending) between sampling and the iret.
+ * Routines like schedule() or do_signal() may re-enable
+ * interrupts before returning.
+ */
+ IRQ_DISABLE(r20, r21)
+ TRACE_IRQS_OFF /* Note: clobbers registers r0-r29 */
+
+
+ /* Check to see if there is any work to do before returning to user. */
+ {
+ addi r29, r32, THREAD_INFO_FLAGS_OFFSET
+ moveli r1, hw1_last(_TIF_ALLWORK_MASK)
+ }
+ {
+ ld r29, r29
+ shl16insli r1, r1, hw0(_TIF_ALLWORK_MASK)
+ }
+ and r1, r29, r1
+ beqzt r1, .Lrestore_all
+
+ /*
+ * Make sure we have all the registers saved for signal
+ * handling or notify-resume. Call out to C code to figure out
+ * exactly what we need to do for each flag bit, then if
+ * necessary, reload the flags and recheck.
+ */
+ {
+ PTREGS_PTR(r0, PTREGS_OFFSET_BASE)
+ bnez r33, 1f
+ }
+ push_extra_callee_saves r0
+ movei r33, 1
+1: jal do_work_pending
+ bnez r0, .Lretry_work_pending
+
+ /*
+ * In the NMI case we
+ * omit the call to single_process_check_nohz, which normally checks
+ * to see if we should start or stop the scheduler tick, because
+ * we can't call arbitrary Linux code from an NMI context.
+ * We always call the homecache TLB deferral code to re-trigger
+ * the deferral mechanism.
+ *
+ * The other chunk of responsibility this code has is to reset the
+ * interrupt masks appropriately to reset irqs and NMIs. We have
+ * to call TRACE_IRQS_OFF and TRACE_IRQS_ON to support all the
+ * lockdep-type stuff, but we can't set ICS until afterwards, since
+ * ICS can only be used in very tight chunks of code to avoid
+ * tripping over various assertions that it is off.
+ */
+.Lrestore_all:
+ PTREGS_PTR(r0, PTREGS_OFFSET_EX1)
+ {
+ ld r0, r0
+ PTREGS_PTR(r32, PTREGS_OFFSET_FLAGS)
+ }
+ {
+ andi r0, r0, SPR_EX_CONTEXT_1_1__PL_MASK
+ ld r32, r32
+ }
+ bnez r0, 1f
+ j 2f
+#if PT_FLAGS_DISABLE_IRQ != 1
+# error Assuming PT_FLAGS_DISABLE_IRQ == 1 so we can use blbct below
+#endif
+1: blbct r32, 2f
+ IRQ_DISABLE(r20,r21)
+ TRACE_IRQS_OFF
+ movei r0, 1
+ mtspr INTERRUPT_CRITICAL_SECTION, r0
+ beqzt r30, .Lrestore_regs
+ j 3f
+2: TRACE_IRQS_ON
+ movei r0, 1
+ mtspr INTERRUPT_CRITICAL_SECTION, r0
+ IRQ_ENABLE(r20, r21)
+ beqzt r30, .Lrestore_regs
+3:
+
+
+ /*
+ * We now commit to returning from this interrupt, since we will be
+ * doing things like setting EX_CONTEXT SPRs and unwinding the stack
+ * frame. No calls should be made to any other code after this point.
+ * This code should only be entered with ICS set.
+ * r32 must still be set to ptregs.flags.
+ * We launch loads to each cache line separately first, so we can
+ * get some parallelism out of the memory subsystem.
+ * We start zeroing caller-saved registers throughout, since
+ * that will save some cycles if this turns out to be a syscall.
+ */
+.Lrestore_regs:
+ FEEDBACK_REENTER(interrupt_return) /* called from elsewhere */
+
+ /*
+ * Rotate so we have one high bit and one low bit to test.
+ * - low bit says whether to restore all the callee-saved registers,
+ * or just r30-r33, and r52 up.
+ * - high bit (i.e. sign bit) says whether to restore all the
+ * caller-saved registers, or just r0.
+ */
+#if PT_FLAGS_CALLER_SAVES != 2 || PT_FLAGS_RESTORE_REGS != 4
+# error Rotate trick does not work :-)
+#endif
+ {
+ rotli r20, r32, 62
+ PTREGS_PTR(sp, PTREGS_OFFSET_REG(0))
+ }
+
+ /*
+ * Load cache lines 0, 4, 6 and 7, in that order, then use
+ * the last loaded value, which makes it likely that the other
+ * cache lines have also loaded, at which point we should be
+ * able to safely read all the remaining words on those cache
+ * lines without waiting for the memory subsystem.
+ */
+ pop_reg r0, sp, PTREGS_OFFSET_REG(30) - PTREGS_OFFSET_REG(0)
+ pop_reg r30, sp, PTREGS_OFFSET_REG(52) - PTREGS_OFFSET_REG(30)
+ pop_reg_zero r52, r3, sp, PTREGS_OFFSET_CMPEXCH - PTREGS_OFFSET_REG(52)
+ pop_reg_zero r21, r27, sp, PTREGS_OFFSET_EX1 - PTREGS_OFFSET_CMPEXCH
+ pop_reg_zero lr, r2, sp, PTREGS_OFFSET_PC - PTREGS_OFFSET_EX1
+ {
+ mtspr CMPEXCH_VALUE, r21
+ move r4, zero
+ }
+ pop_reg r21, sp, PTREGS_OFFSET_REG(31) - PTREGS_OFFSET_PC
+ {
+ mtspr SPR_EX_CONTEXT_K_1, lr
+ andi lr, lr, SPR_EX_CONTEXT_1_1__PL_MASK /* mask off ICS */
+ }
+ {
+ mtspr SPR_EX_CONTEXT_K_0, r21
+ move r5, zero
+ }
+
+ /* Restore callee-saveds that we actually use. */
+ pop_reg_zero r31, r6
+ pop_reg_zero r32, r7
+ pop_reg_zero r33, r8, sp, PTREGS_OFFSET_REG(29) - PTREGS_OFFSET_REG(33)
+
+ /*
+ * If we modified other callee-saveds, restore them now.
+ * This is rare, but could be via ptrace or signal handler.
+ */
+ {
+ move r9, zero
+ blbs r20, .Lrestore_callees
+ }
+.Lcontinue_restore_regs:
+
+ /* Check if we're returning from a syscall. */
+ {
+ move r10, zero
+ bltzt r20, 1f /* no, so go restore callee-save registers */
+ }
+
+ /*
+ * Check if we're returning to userspace.
+ * Note that if we're not, we don't worry about zeroing everything.
+ */
+ {
+ addli sp, sp, PTREGS_OFFSET_LR - PTREGS_OFFSET_REG(29)
+ bnez lr, .Lkernel_return
+ }
+
+ /*
+ * On return from syscall, we've restored r0 from pt_regs, but we
+ * clear the remainder of the caller-saved registers. We could
+ * restore the syscall arguments, but there's not much point,
+ * and it ensures user programs aren't trying to use the
+ * caller-saves if we clear them, as well as avoiding leaking
+ * kernel pointers into userspace.
+ */
+ pop_reg_zero lr, r11, sp, PTREGS_OFFSET_TP - PTREGS_OFFSET_LR
+ pop_reg_zero tp, r12, sp, PTREGS_OFFSET_SP - PTREGS_OFFSET_TP
+ {
+ ld sp, sp
+ move r13, zero
+ move r14, zero
+ }
+ { move r15, zero; move r16, zero }
+ { move r17, zero; move r18, zero }
+ { move r19, zero; move r20, zero }
+ { move r21, zero; move r22, zero }
+ { move r23, zero; move r24, zero }
+ { move r25, zero; move r26, zero }
+
+ /* Set r1 to errno if we are returning an error, otherwise zero. */
+ {
+ moveli r29, 4096
+ sub r1, zero, r0
+ }
+ {
+ move r28, zero
+ cmpltu r29, r1, r29
+ }
+ {
+ mnz r1, r29, r1
+ move r29, zero
+ }
+ iret
+
+ /*
+ * Not a syscall, so restore caller-saved registers.
+ * First kick off loads for cache lines 1-3, which we're touching
+ * for the first time here.
+ */
+ .align 64
+1: pop_reg r29, sp, PTREGS_OFFSET_REG(21) - PTREGS_OFFSET_REG(29)
+ pop_reg r21, sp, PTREGS_OFFSET_REG(13) - PTREGS_OFFSET_REG(21)
+ pop_reg r13, sp, PTREGS_OFFSET_REG(1) - PTREGS_OFFSET_REG(13)
+ pop_reg r1
+ pop_reg r2
+ pop_reg r3
+ pop_reg r4
+ pop_reg r5
+ pop_reg r6
+ pop_reg r7
+ pop_reg r8
+ pop_reg r9
+ pop_reg r10
+ pop_reg r11
+ pop_reg r12, sp, 16
+ /* r13 already restored above */
+ pop_reg r14
+ pop_reg r15
+ pop_reg r16
+ pop_reg r17
+ pop_reg r18
+ pop_reg r19
+ pop_reg r20, sp, 16
+ /* r21 already restored above */
+ pop_reg r22
+ pop_reg r23
+ pop_reg r24
+ pop_reg r25
+ pop_reg r26
+ pop_reg r27
+ pop_reg r28, sp, PTREGS_OFFSET_LR - PTREGS_OFFSET_REG(28)
+ /* r29 already restored above */
+ bnez lr, .Lkernel_return
+ pop_reg lr, sp, PTREGS_OFFSET_TP - PTREGS_OFFSET_LR
+ pop_reg tp, sp, PTREGS_OFFSET_SP - PTREGS_OFFSET_TP
+ ld sp, sp
+ iret
+
+ /*
+ * We can't restore tp when in kernel mode, since a thread might
+ * have migrated from another cpu and brought a stale tp value.
+ */
+.Lkernel_return:
+ pop_reg lr, sp, PTREGS_OFFSET_SP - PTREGS_OFFSET_LR
+ ld sp, sp
+ iret
+
+ /* Restore callee-saved registers from r34 to r51. */
+.Lrestore_callees:
+ addli sp, sp, PTREGS_OFFSET_REG(34) - PTREGS_OFFSET_REG(29)
+ pop_reg r34
+ pop_reg r35
+ pop_reg r36
+ pop_reg r37
+ pop_reg r38
+ pop_reg r39
+ pop_reg r40
+ pop_reg r41
+ pop_reg r42
+ pop_reg r43
+ pop_reg r44
+ pop_reg r45
+ pop_reg r46
+ pop_reg r47
+ pop_reg r48
+ pop_reg r49
+ pop_reg r50
+ pop_reg r51, sp, PTREGS_OFFSET_REG(29) - PTREGS_OFFSET_REG(51)
+ j .Lcontinue_restore_regs
+ STD_ENDPROC(interrupt_return)
+
+ /*
+ * "NMI" interrupts mask ALL interrupts before calling the
+ * handler, and don't check thread flags, etc., on the way
+ * back out. In general, the only things we do here for NMIs
+ * are register save/restore and dataplane kernel-TLB management.
+ * We don't (for example) deal with start/stop of the sched tick.
+ */
+ .pushsection .text.handle_nmi,"ax"
+handle_nmi:
+ finish_interrupt_save handle_nmi
+ {
+ jalr r0
+ PTREGS_PTR(r0, PTREGS_OFFSET_BASE)
+ }
+ FEEDBACK_REENTER(handle_nmi)
+ {
+ movei r30, 1
+ move r31, r0
+ }
+ j interrupt_return
+ STD_ENDPROC(handle_nmi)
+
+ /*
+ * Parallel code for syscalls to handle_interrupt.
+ */
+ .pushsection .text.handle_syscall,"ax"
+handle_syscall:
+ finish_interrupt_save handle_syscall
+
+ /* Enable irqs. */
+ TRACE_IRQS_ON
+ IRQ_ENABLE(r20, r21)
+
+ /* Bump the counter for syscalls made on this tile. */
+ moveli r20, hw2_last(irq_stat + IRQ_CPUSTAT_SYSCALL_COUNT_OFFSET)
+ shl16insli r20, r20, hw1(irq_stat + IRQ_CPUSTAT_SYSCALL_COUNT_OFFSET)
+ shl16insli r20, r20, hw0(irq_stat + IRQ_CPUSTAT_SYSCALL_COUNT_OFFSET)
+ add r20, r20, tp
+ ld4s r21, r20
+ {
+ addi r21, r21, 1
+ move r31, sp
+ }
+ {
+ st4 r20, r21
+ EXTRACT_THREAD_INFO(r31)
+ }
+
+ /* Trace syscalls, if requested. */
+ addi r31, r31, THREAD_INFO_FLAGS_OFFSET
+ ld r30, r31
+ andi r30, r30, _TIF_SYSCALL_TRACE
+ {
+ addi r30, r31, THREAD_INFO_STATUS_OFFSET - THREAD_INFO_FLAGS_OFFSET
+ beqzt r30, .Lrestore_syscall_regs
+ }
+ jal do_syscall_trace
+ FEEDBACK_REENTER(handle_syscall)
+
+ /*
+ * We always reload our registers from the stack at this
+ * point. They might be valid, if we didn't build with
+ * TRACE_IRQFLAGS, and this isn't a dataplane tile, and we're not
+ * doing syscall tracing, but there are enough cases now that it
+ * seems simplest just to do the reload unconditionally.
+ */
+.Lrestore_syscall_regs:
+ {
+ ld r30, r30
+ PTREGS_PTR(r11, PTREGS_OFFSET_REG(0))
+ }
+ pop_reg r0, r11
+ pop_reg r1, r11
+ pop_reg r2, r11
+ pop_reg r3, r11
+ pop_reg r4, r11
+ pop_reg r5, r11, PTREGS_OFFSET_SYSCALL - PTREGS_OFFSET_REG(5)
+ {
+ ld TREG_SYSCALL_NR_NAME, r11
+ moveli r21, __NR_syscalls
+ }
+
+ /* Ensure that the syscall number is within the legal range. */
+ {
+ moveli r20, hw2(sys_call_table)
+ blbs r30, .Lcompat_syscall
+ }
+ {
+ cmpltu r21, TREG_SYSCALL_NR_NAME, r21
+ shl16insli r20, r20, hw1(sys_call_table)
+ }
+ {
+ blbc r21, .Linvalid_syscall
+ shl16insli r20, r20, hw0(sys_call_table)
+ }
+.Lload_syscall_pointer:
+ shl3add r20, TREG_SYSCALL_NR_NAME, r20
+ ld r20, r20
+
+ /* Jump to syscall handler. */
+ jalr r20
+.Lhandle_syscall_link: /* value of "lr" after "jalr r20" above */
+
+ /*
+ * Write our r0 onto the stack so it gets restored instead
+ * of whatever the user had there before.
+ * In compat mode, sign-extend r0 before storing it.
+ */
+ {
+ PTREGS_PTR(r29, PTREGS_OFFSET_REG(0))
+ blbct r30, 1f
+ }
+ addxi r0, r0, 0
+1: st r29, r0
+
+.Lsyscall_sigreturn_skip:
+ FEEDBACK_REENTER(handle_syscall)
+
+ /* Do syscall trace again, if requested. */
+ ld r30, r31
+ andi r0, r30, _TIF_SYSCALL_TRACE
+ {
+ andi r0, r30, _TIF_SINGLESTEP
+ beqzt r0, 1f
+ }
+ jal do_syscall_trace
+ FEEDBACK_REENTER(handle_syscall)
+ andi r0, r30, _TIF_SINGLESTEP
+
+1: beqzt r0, 2f
+
+ /* Single stepping -- notify ptrace. */
+ {
+ movei r0, SIGTRAP
+ jal ptrace_notify
+ }
+ FEEDBACK_REENTER(handle_syscall)
+
+2: {
+ movei r30, 0 /* not an NMI */
+ j .Lresume_userspace /* jump into middle of interrupt_return */
+ }
+
+.Lcompat_syscall:
+ /*
+ * Load the base of the compat syscall table in r20, and
+ * range-check the syscall number (duplicated from 64-bit path).
+ * Sign-extend all the user's passed arguments to make them consistent.
+ * Also save the original "r(n)" values away in "r(11+n)" in
+ * case the syscall table entry wants to validate them.
+ */
+ moveli r20, hw2(compat_sys_call_table)
+ {
+ cmpltu r21, TREG_SYSCALL_NR_NAME, r21
+ shl16insli r20, r20, hw1(compat_sys_call_table)
+ }
+ {
+ blbc r21, .Linvalid_syscall
+ shl16insli r20, r20, hw0(compat_sys_call_table)
+ }
+ { move r11, r0; addxi r0, r0, 0 }
+ { move r12, r1; addxi r1, r1, 0 }
+ { move r13, r2; addxi r2, r2, 0 }
+ { move r14, r3; addxi r3, r3, 0 }
+ { move r15, r4; addxi r4, r4, 0 }
+ { move r16, r5; addxi r5, r5, 0 }
+ j .Lload_syscall_pointer
+
+.Linvalid_syscall:
+ /* Report an invalid syscall back to the user program */
+ {
+ PTREGS_PTR(r29, PTREGS_OFFSET_REG(0))
+ movei r28, -ENOSYS
+ }
+ st r29, r28
+ {
+ movei r30, 0 /* not an NMI */
+ j .Lresume_userspace /* jump into middle of interrupt_return */
+ }
+ STD_ENDPROC(handle_syscall)
+
+ /* Return the address for oprofile to suppress in backtraces. */
+STD_ENTRY_SECTION(handle_syscall_link_address, .text.handle_syscall)
+ lnk r0
+ {
+ addli r0, r0, .Lhandle_syscall_link - .
+ jrp lr
+ }
+ STD_ENDPROC(handle_syscall_link_address)
+
+STD_ENTRY(ret_from_fork)
+ jal sim_notify_fork
+ jal schedule_tail
+ FEEDBACK_REENTER(ret_from_fork)
+ {
+ movei r30, 0 /* not an NMI */
+ j .Lresume_userspace /* jump into middle of interrupt_return */
+ }
+ STD_ENDPROC(ret_from_fork)
+
+/* Various stub interrupt handlers and syscall handlers */
+
+STD_ENTRY_LOCAL(_kernel_double_fault)
+ mfspr r1, SPR_EX_CONTEXT_K_0
+ move r2, lr
+ move r3, sp
+ move r4, r52
+ addi sp, sp, -C_ABI_SAVE_AREA_SIZE
+ j kernel_double_fault
+ STD_ENDPROC(_kernel_double_fault)
+
+STD_ENTRY_LOCAL(bad_intr)
+ mfspr r2, SPR_EX_CONTEXT_K_0
+ panic "Unhandled interrupt %#x: PC %#lx"
+ STD_ENDPROC(bad_intr)
+
+/* Put address of pt_regs in reg and jump. */
+#define PTREGS_SYSCALL(x, reg) \
+ STD_ENTRY(_##x); \
+ { \
+ PTREGS_PTR(reg, PTREGS_OFFSET_BASE); \
+ j x \
+ }; \
+ STD_ENDPROC(_##x)
+
+/*
+ * Special-case sigreturn to not write r0 to the stack on return.
+ * This is technically more efficient, but it also avoids difficulties
+ * in the 64-bit OS when handling 32-bit compat code, since we must not
+ * sign-extend r0 for the sigreturn return-value case.
+ */
+#define PTREGS_SYSCALL_SIGRETURN(x, reg) \
+ STD_ENTRY(_##x); \
+ addli lr, lr, .Lsyscall_sigreturn_skip - .Lhandle_syscall_link; \
+ { \
+ PTREGS_PTR(reg, PTREGS_OFFSET_BASE); \
+ j x \
+ }; \
+ STD_ENDPROC(_##x)
+
+PTREGS_SYSCALL(sys_execve, r3)
+PTREGS_SYSCALL(sys_sigaltstack, r2)
+PTREGS_SYSCALL_SIGRETURN(sys_rt_sigreturn, r0)
+#ifdef CONFIG_COMPAT
+PTREGS_SYSCALL(compat_sys_execve, r3)
+PTREGS_SYSCALL(compat_sys_sigaltstack, r2)
+PTREGS_SYSCALL_SIGRETURN(compat_sys_rt_sigreturn, r0)
+#endif
+
+/* Save additional callee-saves to pt_regs, put address in r4 and jump. */
+STD_ENTRY(_sys_clone)
+ push_extra_callee_saves r4
+ j sys_clone
+ STD_ENDPROC(_sys_clone)
+
+/* The single-step support may need to read all the registers. */
+int_unalign:
+ push_extra_callee_saves r0
+ j do_trap
+
+/* Fill the return address stack with nonzero entries. */
+STD_ENTRY(fill_ra_stack)
+ {
+ move r0, lr
+ jal 1f
+ }
+1: jal 2f
+2: jal 3f
+3: jal 4f
+4: jrp r0
+ STD_ENDPROC(fill_ra_stack)
+
+/* Include .intrpt1 array of interrupt vectors */
+ .section ".intrpt1", "ax"
+
+#define op_handle_perf_interrupt bad_intr
+#define op_handle_aux_perf_interrupt bad_intr
+
+#ifndef CONFIG_HARDWALL
+#define do_hardwall_trap bad_intr
+#endif
+
+ int_hand INT_MEM_ERROR, MEM_ERROR, do_trap
+ int_hand INT_SINGLE_STEP_3, SINGLE_STEP_3, bad_intr
+#if CONFIG_KERNEL_PL == 2
+ int_hand INT_SINGLE_STEP_2, SINGLE_STEP_2, gx_singlestep_handle
+ int_hand INT_SINGLE_STEP_1, SINGLE_STEP_1, bad_intr
+#else
+ int_hand INT_SINGLE_STEP_2, SINGLE_STEP_2, bad_intr
+ int_hand INT_SINGLE_STEP_1, SINGLE_STEP_1, gx_singlestep_handle
+#endif
+ int_hand INT_SINGLE_STEP_0, SINGLE_STEP_0, bad_intr
+ int_hand INT_IDN_COMPLETE, IDN_COMPLETE, bad_intr
+ int_hand INT_UDN_COMPLETE, UDN_COMPLETE, bad_intr
+ int_hand INT_ITLB_MISS, ITLB_MISS, do_page_fault
+ int_hand INT_ILL, ILL, do_trap
+ int_hand INT_GPV, GPV, do_trap
+ int_hand INT_IDN_ACCESS, IDN_ACCESS, do_trap
+ int_hand INT_UDN_ACCESS, UDN_ACCESS, do_trap
+ int_hand INT_SWINT_3, SWINT_3, do_trap
+ int_hand INT_SWINT_2, SWINT_2, do_trap
+ int_hand INT_SWINT_1, SWINT_1, SYSCALL, handle_syscall
+ int_hand INT_SWINT_0, SWINT_0, do_trap
+ int_hand INT_ILL_TRANS, ILL_TRANS, do_trap
+ int_hand INT_UNALIGN_DATA, UNALIGN_DATA, int_unalign
+ int_hand INT_DTLB_MISS, DTLB_MISS, do_page_fault
+ int_hand INT_DTLB_ACCESS, DTLB_ACCESS, do_page_fault
+ int_hand INT_IDN_FIREWALL, IDN_FIREWALL, bad_intr
+ int_hand INT_UDN_FIREWALL, UDN_FIREWALL, do_hardwall_trap
+ int_hand INT_TILE_TIMER, TILE_TIMER, do_timer_interrupt
+ int_hand INT_IDN_TIMER, IDN_TIMER, bad_intr
+ int_hand INT_UDN_TIMER, UDN_TIMER, bad_intr
+ int_hand INT_IDN_AVAIL, IDN_AVAIL, bad_intr
+ int_hand INT_UDN_AVAIL, UDN_AVAIL, bad_intr
+ int_hand INT_IPI_3, IPI_3, bad_intr
+#if CONFIG_KERNEL_PL == 2
+ int_hand INT_IPI_2, IPI_2, tile_dev_intr
+ int_hand INT_IPI_1, IPI_1, bad_intr
+#else
+ int_hand INT_IPI_2, IPI_2, bad_intr
+ int_hand INT_IPI_1, IPI_1, tile_dev_intr
+#endif
+ int_hand INT_IPI_0, IPI_0, bad_intr
+ int_hand INT_PERF_COUNT, PERF_COUNT, \
+ op_handle_perf_interrupt, handle_nmi
+ int_hand INT_AUX_PERF_COUNT, AUX_PERF_COUNT, \
+ op_handle_perf_interrupt, handle_nmi
+ int_hand INT_INTCTRL_3, INTCTRL_3, bad_intr
+#if CONFIG_KERNEL_PL == 2
+ dc_dispatch INT_INTCTRL_2, INTCTRL_2
+ int_hand INT_INTCTRL_1, INTCTRL_1, bad_intr
+#else
+ int_hand INT_INTCTRL_2, INTCTRL_2, bad_intr
+ dc_dispatch INT_INTCTRL_1, INTCTRL_1
+#endif
+ int_hand INT_INTCTRL_0, INTCTRL_0, bad_intr
+ int_hand INT_MESSAGE_RCV_DWNCL, MESSAGE_RCV_DWNCL, \
+ hv_message_intr
+ int_hand INT_DEV_INTR_DWNCL, DEV_INTR_DWNCL, bad_intr
+ int_hand INT_I_ASID, I_ASID, bad_intr
+ int_hand INT_D_ASID, D_ASID, bad_intr
+ int_hand INT_DOUBLE_FAULT, DOUBLE_FAULT, do_trap
+
+ /* Synthetic interrupt delivered only by the simulator */
+ int_hand INT_BREAKPOINT, BREAKPOINT, do_breakpoint
diff --git a/arch/tile/kernel/irq.c b/arch/tile/kernel/irq.c
new file mode 100644
index 00000000..02e62806
--- /dev/null
+++ b/arch/tile/kernel/irq.c
@@ -0,0 +1,295 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/module.h>
+#include <linux/seq_file.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/kernel_stat.h>
+#include <linux/uaccess.h>
+#include <hv/drv_pcie_rc_intf.h>
+#include <arch/spr_def.h>
+#include <asm/traps.h>
+
+/* Bit-flag stored in irq_desc->chip_data to indicate HW-cleared irqs. */
+#define IS_HW_CLEARED 1
+
+/*
+ * The set of interrupts we enable for arch_local_irq_enable().
+ * This is initialized to have just a single interrupt that the kernel
+ * doesn't actually use as a sentinel. During kernel init,
+ * interrupts are added as the kernel gets prepared to support them.
+ * NOTE: we could probably initialize them all statically up front.
+ */
+DEFINE_PER_CPU(unsigned long long, interrupts_enabled_mask) =
+ INITIAL_INTERRUPTS_ENABLED;
+EXPORT_PER_CPU_SYMBOL(interrupts_enabled_mask);
+
+/* Define per-tile device interrupt statistics state. */
+DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_internodealigned_in_smp;
+EXPORT_PER_CPU_SYMBOL(irq_stat);
+
+/*
+ * Define per-tile irq disable mask; the hardware/HV only has a single
+ * mask that we use to implement both masking and disabling.
+ */
+static DEFINE_PER_CPU(unsigned long, irq_disable_mask)
+ ____cacheline_internodealigned_in_smp;
+
+/*
+ * Per-tile IRQ nesting depth. Used to make sure we enable newly
+ * enabled IRQs before exiting the outermost interrupt.
+ */
+static DEFINE_PER_CPU(int, irq_depth);
+
+/* State for allocating IRQs on Gx. */
+#if CHIP_HAS_IPI()
+static unsigned long available_irqs = ~(1UL << IRQ_RESCHEDULE);
+static DEFINE_SPINLOCK(available_irqs_lock);
+#endif
+
+#if CHIP_HAS_IPI()
+/* Use SPRs to manipulate device interrupts. */
+#define mask_irqs(irq_mask) __insn_mtspr(SPR_IPI_MASK_SET_K, irq_mask)
+#define unmask_irqs(irq_mask) __insn_mtspr(SPR_IPI_MASK_RESET_K, irq_mask)
+#define clear_irqs(irq_mask) __insn_mtspr(SPR_IPI_EVENT_RESET_K, irq_mask)
+#else
+/* Use HV to manipulate device interrupts. */
+#define mask_irqs(irq_mask) hv_disable_intr(irq_mask)
+#define unmask_irqs(irq_mask) hv_enable_intr(irq_mask)
+#define clear_irqs(irq_mask) hv_clear_intr(irq_mask)
+#endif
+
+/*
+ * The interrupt handling path, implemented in terms of HV interrupt
+ * emulation on TILE64 and TILEPro, and IPI hardware on TILE-Gx.
+ */
+void tile_dev_intr(struct pt_regs *regs, int intnum)
+{
+ int depth = __get_cpu_var(irq_depth)++;
+ unsigned long original_irqs;
+ unsigned long remaining_irqs;
+ struct pt_regs *old_regs;
+
+#if CHIP_HAS_IPI()
+ /*
+ * Pending interrupts are listed in an SPR. We might be
+ * nested, so be sure to only handle irqs that weren't already
+ * masked by a previous interrupt. Then, mask out the ones
+ * we're going to handle.
+ */
+ unsigned long masked = __insn_mfspr(SPR_IPI_MASK_K);
+ original_irqs = __insn_mfspr(SPR_IPI_EVENT_K) & ~masked;
+ __insn_mtspr(SPR_IPI_MASK_SET_K, original_irqs);
+#else
+ /*
+ * Hypervisor performs the equivalent of the Gx code above and
+ * then puts the pending interrupt mask into a system save reg
+ * for us to find.
+ */
+ original_irqs = __insn_mfspr(SPR_SYSTEM_SAVE_K_3);
+#endif
+ remaining_irqs = original_irqs;
+
+ /* Track time spent here in an interrupt context. */
+ old_regs = set_irq_regs(regs);
+ irq_enter();
+
+#ifdef CONFIG_DEBUG_STACKOVERFLOW
+ /* Debugging check for stack overflow: less than 1/8th stack free? */
+ {
+ long sp = stack_pointer - (long) current_thread_info();
+ if (unlikely(sp < (sizeof(struct thread_info) + STACK_WARN))) {
+ pr_emerg("tile_dev_intr: "
+ "stack overflow: %ld\n",
+ sp - sizeof(struct thread_info));
+ dump_stack();
+ }
+ }
+#endif
+ while (remaining_irqs) {
+ unsigned long irq = __ffs(remaining_irqs);
+ remaining_irqs &= ~(1UL << irq);
+
+ /* Count device irqs; Linux IPIs are counted elsewhere. */
+ if (irq != IRQ_RESCHEDULE)
+ __get_cpu_var(irq_stat).irq_dev_intr_count++;
+
+ generic_handle_irq(irq);
+ }
+
+ /*
+ * If we weren't nested, turn on all enabled interrupts,
+ * including any that were reenabled during interrupt
+ * handling.
+ */
+ if (depth == 0)
+ unmask_irqs(~__get_cpu_var(irq_disable_mask));
+
+ __get_cpu_var(irq_depth)--;
+
+ /*
+ * Track time spent against the current process again and
+ * process any softirqs if they are waiting.
+ */
+ irq_exit();
+ set_irq_regs(old_regs);
+}
+
+
+/*
+ * Remove an irq from the disabled mask. If we're in an interrupt
+ * context, defer enabling the HW interrupt until we leave.
+ */
+static void tile_irq_chip_enable(struct irq_data *d)
+{
+ get_cpu_var(irq_disable_mask) &= ~(1UL << d->irq);
+ if (__get_cpu_var(irq_depth) == 0)
+ unmask_irqs(1UL << d->irq);
+ put_cpu_var(irq_disable_mask);
+}
+
+/*
+ * Add an irq to the disabled mask. We disable the HW interrupt
+ * immediately so that there's no possibility of it firing. If we're
+ * in an interrupt context, the return path is careful to avoid
+ * unmasking a newly disabled interrupt.
+ */
+static void tile_irq_chip_disable(struct irq_data *d)
+{
+ get_cpu_var(irq_disable_mask) |= (1UL << d->irq);
+ mask_irqs(1UL << d->irq);
+ put_cpu_var(irq_disable_mask);
+}
+
+/* Mask an interrupt. */
+static void tile_irq_chip_mask(struct irq_data *d)
+{
+ mask_irqs(1UL << d->irq);
+}
+
+/* Unmask an interrupt. */
+static void tile_irq_chip_unmask(struct irq_data *d)
+{
+ unmask_irqs(1UL << d->irq);
+}
+
+/*
+ * Clear an interrupt before processing it so that any new assertions
+ * will trigger another irq.
+ */
+static void tile_irq_chip_ack(struct irq_data *d)
+{
+ if ((unsigned long)irq_data_get_irq_chip_data(d) != IS_HW_CLEARED)
+ clear_irqs(1UL << d->irq);
+}
+
+/*
+ * For per-cpu interrupts, we need to avoid unmasking any interrupts
+ * that we disabled via disable_percpu_irq().
+ */
+static void tile_irq_chip_eoi(struct irq_data *d)
+{
+ if (!(__get_cpu_var(irq_disable_mask) & (1UL << d->irq)))
+ unmask_irqs(1UL << d->irq);
+}
+
+static struct irq_chip tile_irq_chip = {
+ .name = "tile_irq_chip",
+ .irq_enable = tile_irq_chip_enable,
+ .irq_disable = tile_irq_chip_disable,
+ .irq_ack = tile_irq_chip_ack,
+ .irq_eoi = tile_irq_chip_eoi,
+ .irq_mask = tile_irq_chip_mask,
+ .irq_unmask = tile_irq_chip_unmask,
+};
+
+void __init init_IRQ(void)
+{
+ ipi_init();
+}
+
+void __cpuinit setup_irq_regs(void)
+{
+ /* Enable interrupt delivery. */
+ unmask_irqs(~0UL);
+#if CHIP_HAS_IPI()
+ arch_local_irq_unmask(INT_IPI_K);
+#endif
+}
+
+void tile_irq_activate(unsigned int irq, int tile_irq_type)
+{
+ /*
+ * We use handle_level_irq() by default because the pending
+ * interrupt vector (whether modeled by the HV on TILE64 and
+ * TILEPro or implemented in hardware on TILE-Gx) has
+ * level-style semantics for each bit. An interrupt fires
+ * whenever a bit is high, not just at edges.
+ */
+ irq_flow_handler_t handle = handle_level_irq;
+ if (tile_irq_type == TILE_IRQ_PERCPU)
+ handle = handle_percpu_irq;
+ irq_set_chip_and_handler(irq, &tile_irq_chip, handle);
+
+ /*
+ * Flag interrupts that are hardware-cleared so that ack()
+ * won't clear them.
+ */
+ if (tile_irq_type == TILE_IRQ_HW_CLEAR)
+ irq_set_chip_data(irq, (void *)IS_HW_CLEARED);
+}
+EXPORT_SYMBOL(tile_irq_activate);
+
+
+void ack_bad_irq(unsigned int irq)
+{
+ pr_err("unexpected IRQ trap at vector %02x\n", irq);
+}
+
+/*
+ * Generic, controller-independent functions:
+ */
+
+#if CHIP_HAS_IPI()
+int create_irq(void)
+{
+ unsigned long flags;
+ int result;
+
+ spin_lock_irqsave(&available_irqs_lock, flags);
+ if (available_irqs == 0)
+ result = -ENOMEM;
+ else {
+ result = __ffs(available_irqs);
+ available_irqs &= ~(1UL << result);
+ dynamic_irq_init(result);
+ }
+ spin_unlock_irqrestore(&available_irqs_lock, flags);
+
+ return result;
+}
+EXPORT_SYMBOL(create_irq);
+
+void destroy_irq(unsigned int irq)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&available_irqs_lock, flags);
+ available_irqs |= (1UL << irq);
+ dynamic_irq_cleanup(irq);
+ spin_unlock_irqrestore(&available_irqs_lock, flags);
+}
+EXPORT_SYMBOL(destroy_irq);
+#endif
diff --git a/arch/tile/kernel/machine_kexec.c b/arch/tile/kernel/machine_kexec.c
new file mode 100644
index 00000000..6255f2ea
--- /dev/null
+++ b/arch/tile/kernel/machine_kexec.c
@@ -0,0 +1,282 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ *
+ * based on machine_kexec.c from other architectures in linux-2.6.18
+ */
+
+#include <linux/mm.h>
+#include <linux/kexec.h>
+#include <linux/delay.h>
+#include <linux/reboot.h>
+#include <linux/errno.h>
+#include <linux/vmalloc.h>
+#include <linux/cpumask.h>
+#include <linux/kernel.h>
+#include <linux/elf.h>
+#include <linux/highmem.h>
+#include <linux/mmu_context.h>
+#include <linux/io.h>
+#include <linux/timex.h>
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/cacheflush.h>
+#include <asm/checksum.h>
+#include <hv/hypervisor.h>
+
+
+/*
+ * This stuff is not in elf.h and is not in any other kernel include.
+ * This stuff is needed below in the little boot notes parser to
+ * extract the command line so we can pass it to the hypervisor.
+ */
+struct Elf32_Bhdr {
+ Elf32_Word b_signature;
+ Elf32_Word b_size;
+ Elf32_Half b_checksum;
+ Elf32_Half b_records;
+};
+#define ELF_BOOT_MAGIC 0x0E1FB007
+#define EBN_COMMAND_LINE 0x00000004
+#define roundupsz(X) (((X) + 3) & ~3)
+
+/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
+
+
+void machine_shutdown(void)
+{
+ /*
+ * Normally we would stop all the other processors here, but
+ * the check in machine_kexec_prepare below ensures we'll only
+ * get this far if we've been booted with "nosmp" on the
+ * command line or without CONFIG_SMP so there's nothing to do
+ * here (for now).
+ */
+}
+
+void machine_crash_shutdown(struct pt_regs *regs)
+{
+ /*
+ * Cannot happen. This type of kexec is disabled on this
+ * architecture (and enforced in machine_kexec_prepare below).
+ */
+}
+
+
+int machine_kexec_prepare(struct kimage *image)
+{
+ if (num_online_cpus() > 1) {
+ pr_warning("%s: detected attempt to kexec "
+ "with num_online_cpus() > 1\n",
+ __func__);
+ return -ENOSYS;
+ }
+ if (image->type != KEXEC_TYPE_DEFAULT) {
+ pr_warning("%s: detected attempt to kexec "
+ "with unsupported type: %d\n",
+ __func__,
+ image->type);
+ return -ENOSYS;
+ }
+ return 0;
+}
+
+void machine_kexec_cleanup(struct kimage *image)
+{
+ /*
+ * We did nothing in machine_kexec_prepare,
+ * so we have nothing to do here.
+ */
+}
+
+/*
+ * If we can find elf boot notes on this page, return the command
+ * line. Otherwise, silently return null. Somewhat kludgy, but no
+ * good way to do this without significantly rearchitecting the
+ * architecture-independent kexec code.
+ */
+
+static unsigned char *kexec_bn2cl(void *pg)
+{
+ struct Elf32_Bhdr *bhdrp;
+ Elf32_Nhdr *nhdrp;
+ unsigned char *desc;
+ unsigned char *command_line;
+ __sum16 csum;
+
+ bhdrp = (struct Elf32_Bhdr *) pg;
+
+ /*
+ * This routine is invoked for every source page, so make
+ * sure to quietly ignore every impossible page.
+ */
+ if (bhdrp->b_signature != ELF_BOOT_MAGIC ||
+ bhdrp->b_size > PAGE_SIZE)
+ return 0;
+
+ /*
+ * If we get a checksum mismatch, warn with the checksum
+ * so we can diagnose better.
+ */
+ csum = ip_compute_csum(pg, bhdrp->b_size);
+ if (csum != 0) {
+ pr_warning("%s: bad checksum %#x (size %d)\n",
+ __func__, csum, bhdrp->b_size);
+ return 0;
+ }
+
+ nhdrp = (Elf32_Nhdr *) (bhdrp + 1);
+
+ while (nhdrp->n_type != EBN_COMMAND_LINE) {
+
+ desc = (unsigned char *) (nhdrp + 1);
+ desc += roundupsz(nhdrp->n_descsz);
+
+ nhdrp = (Elf32_Nhdr *) desc;
+
+ /* still in bounds? */
+ if ((unsigned char *) (nhdrp + 1) >
+ ((unsigned char *) pg) + bhdrp->b_size) {
+
+ pr_info("%s: out of bounds\n", __func__);
+ return 0;
+ }
+ }
+
+ command_line = (unsigned char *) (nhdrp + 1);
+ desc = command_line;
+
+ while (*desc != '\0') {
+ desc++;
+ if (((unsigned long)desc & PAGE_MASK) != (unsigned long)pg) {
+ pr_info("%s: ran off end of page\n",
+ __func__);
+ return 0;
+ }
+ }
+
+ return command_line;
+}
+
+static void kexec_find_and_set_command_line(struct kimage *image)
+{
+ kimage_entry_t *ptr, entry;
+
+ unsigned char *command_line = 0;
+ unsigned char *r;
+ HV_Errno hverr;
+
+ for (ptr = &image->head;
+ (entry = *ptr) && !(entry & IND_DONE);
+ ptr = (entry & IND_INDIRECTION) ?
+ phys_to_virt((entry & PAGE_MASK)) : ptr + 1) {
+
+ if ((entry & IND_SOURCE)) {
+ void *va =
+ kmap_atomic_pfn(entry >> PAGE_SHIFT);
+ r = kexec_bn2cl(va);
+ if (r) {
+ command_line = r;
+ break;
+ }
+ kunmap_atomic(va);
+ }
+ }
+
+ if (command_line != 0) {
+ pr_info("setting new command line to \"%s\"\n",
+ command_line);
+
+ hverr = hv_set_command_line(
+ (HV_VirtAddr) command_line, strlen(command_line));
+ kunmap_atomic(command_line);
+ } else {
+ pr_info("%s: no command line found; making empty\n",
+ __func__);
+ hverr = hv_set_command_line((HV_VirtAddr) command_line, 0);
+ }
+ if (hverr)
+ pr_warning("%s: hv_set_command_line returned error: %d\n",
+ __func__, hverr);
+}
+
+/*
+ * The kexec code range-checks all its PAs, so to avoid having it run
+ * amok and allocate memory and then sequester it from every other
+ * controller, we force it to come from controller zero. We also
+ * disable the oom-killer since if we do end up running out of memory,
+ * that almost certainly won't help.
+ */
+struct page *kimage_alloc_pages_arch(gfp_t gfp_mask, unsigned int order)
+{
+ gfp_mask |= __GFP_THISNODE | __GFP_NORETRY;
+ return alloc_pages_node(0, gfp_mask, order);
+}
+
+static void setup_quasi_va_is_pa(void)
+{
+ HV_PTE *pgtable;
+ HV_PTE pte;
+ int i;
+
+ /*
+ * Flush our TLB to prevent conflicts between the previous contents
+ * and the new stuff we're about to add.
+ */
+ local_flush_tlb_all();
+
+ /* setup VA is PA, at least up to PAGE_OFFSET */
+
+ pgtable = (HV_PTE *)current->mm->pgd;
+ pte = hv_pte(_PAGE_KERNEL | _PAGE_HUGE_PAGE);
+ pte = hv_pte_set_mode(pte, HV_PTE_MODE_CACHE_NO_L3);
+
+ for (i = 0; i < pgd_index(PAGE_OFFSET); i++) {
+ unsigned long pfn = i << (HPAGE_SHIFT - PAGE_SHIFT);
+ if (pfn_valid(pfn))
+ __set_pte(&pgtable[i], pfn_pte(pfn, pte));
+ }
+}
+
+
+void machine_kexec(struct kimage *image)
+{
+ void *reboot_code_buffer;
+ void (*rnk)(unsigned long, void *, unsigned long)
+ __noreturn;
+
+ /* Mask all interrupts before starting to reboot. */
+ interrupt_mask_set_mask(~0ULL);
+
+ kexec_find_and_set_command_line(image);
+
+ /*
+ * Adjust the home caching of the control page to be cached on
+ * this cpu, and copy the assembly helper into the control
+ * code page, which we map in the vmalloc area.
+ */
+ homecache_change_page_home(image->control_code_page, 0,
+ smp_processor_id());
+ reboot_code_buffer = vmap(&image->control_code_page, 1, 0,
+ __pgprot(_PAGE_KERNEL | _PAGE_EXECUTABLE));
+ memcpy(reboot_code_buffer, relocate_new_kernel,
+ relocate_new_kernel_size);
+ __flush_icache_range(
+ (unsigned long) reboot_code_buffer,
+ (unsigned long) reboot_code_buffer + relocate_new_kernel_size);
+
+ setup_quasi_va_is_pa();
+
+ /* now call it */
+ rnk = reboot_code_buffer;
+ (*rnk)(image->head, reboot_code_buffer, image->start);
+}
diff --git a/arch/tile/kernel/messaging.c b/arch/tile/kernel/messaging.c
new file mode 100644
index 00000000..0858ee6b
--- /dev/null
+++ b/arch/tile/kernel/messaging.c
@@ -0,0 +1,116 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/percpu.h>
+#include <linux/smp.h>
+#include <linux/hardirq.h>
+#include <linux/ptrace.h>
+#include <asm/hv_driver.h>
+#include <asm/irq_regs.h>
+#include <asm/traps.h>
+#include <hv/hypervisor.h>
+#include <arch/interrupts.h>
+
+/* All messages are stored here */
+static DEFINE_PER_CPU(HV_MsgState, msg_state);
+
+void __cpuinit init_messaging(void)
+{
+ /* Allocate storage for messages in kernel space */
+ HV_MsgState *state = &__get_cpu_var(msg_state);
+ int rc = hv_register_message_state(state);
+ if (rc != HV_OK)
+ panic("hv_register_message_state: error %d", rc);
+
+ /* Make sure downcall interrupts will be enabled. */
+ arch_local_irq_unmask(INT_INTCTRL_K);
+}
+
+void hv_message_intr(struct pt_regs *regs, int intnum)
+{
+ /*
+ * We enter with interrupts disabled and leave them disabled,
+ * to match expectations of called functions (e.g.
+ * do_ccupdate_local() in mm/slab.c). This is also consistent
+ * with normal call entry for device interrupts.
+ */
+
+ int message[HV_MAX_MESSAGE_SIZE/sizeof(int)];
+ HV_RcvMsgInfo rmi;
+ int nmsgs = 0;
+
+ /* Track time spent here in an interrupt context */
+ struct pt_regs *old_regs = set_irq_regs(regs);
+ irq_enter();
+
+#ifdef CONFIG_DEBUG_STACKOVERFLOW
+ /* Debugging check for stack overflow: less than 1/8th stack free? */
+ {
+ long sp = stack_pointer - (long) current_thread_info();
+ if (unlikely(sp < (sizeof(struct thread_info) + STACK_WARN))) {
+ pr_emerg("hv_message_intr: "
+ "stack overflow: %ld\n",
+ sp - sizeof(struct thread_info));
+ dump_stack();
+ }
+ }
+#endif
+
+ while (1) {
+ rmi = hv_receive_message(__get_cpu_var(msg_state),
+ (HV_VirtAddr) message,
+ sizeof(message));
+ if (rmi.msglen == 0)
+ break;
+
+ if (rmi.msglen < 0)
+ panic("hv_receive_message failed: %d", rmi.msglen);
+
+ ++nmsgs;
+
+ if (rmi.source == HV_MSG_TILE) {
+ int tag;
+
+ /* we just send tags for now */
+ BUG_ON(rmi.msglen != sizeof(int));
+
+ tag = message[0];
+#ifdef CONFIG_SMP
+ evaluate_message(message[0]);
+#else
+ panic("Received IPI message %d in UP mode", tag);
+#endif
+ } else if (rmi.source == HV_MSG_INTR) {
+ HV_IntrMsg *him = (HV_IntrMsg *)message;
+ struct hv_driver_cb *cb =
+ (struct hv_driver_cb *)him->intarg;
+ cb->callback(cb, him->intdata);
+ __get_cpu_var(irq_stat).irq_hv_msg_count++;
+ }
+ }
+
+ /*
+ * We shouldn't have gotten a message downcall with no
+ * messages available.
+ */
+ if (nmsgs == 0)
+ panic("Message downcall invoked with no messages!");
+
+ /*
+ * Track time spent against the current process again and
+ * process any softirqs if they are waiting.
+ */
+ irq_exit();
+ set_irq_regs(old_regs);
+}
diff --git a/arch/tile/kernel/module.c b/arch/tile/kernel/module.c
new file mode 100644
index 00000000..98d47692
--- /dev/null
+++ b/arch/tile/kernel/module.c
@@ -0,0 +1,234 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ *
+ * Based on i386 version, copyright (C) 2001 Rusty Russell.
+ */
+
+#include <linux/moduleloader.h>
+#include <linux/elf.h>
+#include <linux/vmalloc.h>
+#include <linux/fs.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <asm/pgtable.h>
+#include <asm/homecache.h>
+#include <arch/opcode.h>
+
+#ifdef __tilegx__
+# define Elf_Rela Elf64_Rela
+# define ELF_R_SYM ELF64_R_SYM
+# define ELF_R_TYPE ELF64_R_TYPE
+#else
+# define Elf_Rela Elf32_Rela
+# define ELF_R_SYM ELF32_R_SYM
+# define ELF_R_TYPE ELF32_R_TYPE
+#endif
+
+#ifdef MODULE_DEBUG
+#define DEBUGP printk
+#else
+#define DEBUGP(fmt...)
+#endif
+
+/*
+ * Allocate some address space in the range MEM_MODULE_START to
+ * MEM_MODULE_END and populate it with memory.
+ */
+void *module_alloc(unsigned long size)
+{
+ struct page **pages;
+ pgprot_t prot_rwx = __pgprot(_PAGE_KERNEL | _PAGE_KERNEL_EXEC);
+ struct vm_struct *area;
+ int i = 0;
+ int npages;
+
+ if (size == 0)
+ return NULL;
+ npages = (size + PAGE_SIZE - 1) / PAGE_SIZE;
+ pages = kmalloc(npages * sizeof(struct page *), GFP_KERNEL);
+ if (pages == NULL)
+ return NULL;
+ for (; i < npages; ++i) {
+ pages[i] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM);
+ if (!pages[i])
+ goto error;
+ }
+
+ area = __get_vm_area(size, VM_ALLOC, MEM_MODULE_START, MEM_MODULE_END);
+ if (!area)
+ goto error;
+ area->nr_pages = npages;
+ area->pages = pages;
+
+ if (map_vm_area(area, prot_rwx, &pages)) {
+ vunmap(area->addr);
+ goto error;
+ }
+
+ return area->addr;
+
+error:
+ while (--i >= 0)
+ __free_page(pages[i]);
+ kfree(pages);
+ return NULL;
+}
+
+
+/* Free memory returned from module_alloc */
+void module_free(struct module *mod, void *module_region)
+{
+ vfree(module_region);
+
+ /* Globally flush the L1 icache. */
+ flush_remote(0, HV_FLUSH_EVICT_L1I, cpu_online_mask,
+ 0, 0, 0, NULL, NULL, 0);
+
+ /*
+ * FIXME: If module_region == mod->module_init, trim exception
+ * table entries.
+ */
+}
+
+#ifdef __tilegx__
+/*
+ * Validate that the high 16 bits of "value" is just the sign-extension of
+ * the low 48 bits.
+ */
+static int validate_hw2_last(long value, struct module *me)
+{
+ if (((value << 16) >> 16) != value) {
+ pr_warning("module %s: Out of range HW2_LAST value %#lx\n",
+ me->name, value);
+ return 0;
+ }
+ return 1;
+}
+
+/*
+ * Validate that "value" isn't too big to hold in a JumpOff relocation.
+ */
+static int validate_jumpoff(long value)
+{
+ /* Determine size of jump offset. */
+ int shift = __builtin_clzl(get_JumpOff_X1(create_JumpOff_X1(-1)));
+
+ /* Check to see if it fits into the relocation slot. */
+ long f = get_JumpOff_X1(create_JumpOff_X1(value));
+ f = (f << shift) >> shift;
+
+ return f == value;
+}
+#endif
+
+int apply_relocate_add(Elf_Shdr *sechdrs,
+ const char *strtab,
+ unsigned int symindex,
+ unsigned int relsec,
+ struct module *me)
+{
+ unsigned int i;
+ Elf_Rela *rel = (void *)sechdrs[relsec].sh_addr;
+ Elf_Sym *sym;
+ u64 *location;
+ unsigned long value;
+
+ DEBUGP("Applying relocate section %u to %u\n", relsec,
+ sechdrs[relsec].sh_info);
+ for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) {
+ /* This is where to make the change */
+ location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr
+ + rel[i].r_offset;
+ /*
+ * This is the symbol it is referring to.
+ * Note that all undefined symbols have been resolved.
+ */
+ sym = (Elf_Sym *)sechdrs[symindex].sh_addr
+ + ELF_R_SYM(rel[i].r_info);
+ value = sym->st_value + rel[i].r_addend;
+
+ switch (ELF_R_TYPE(rel[i].r_info)) {
+
+#define MUNGE(func) (*location = ((*location & ~func(-1)) | func(value)))
+
+#ifndef __tilegx__
+ case R_TILE_32:
+ *(uint32_t *)location = value;
+ break;
+ case R_TILE_IMM16_X0_HA:
+ value = (value + 0x8000) >> 16;
+ /*FALLTHROUGH*/
+ case R_TILE_IMM16_X0_LO:
+ MUNGE(create_Imm16_X0);
+ break;
+ case R_TILE_IMM16_X1_HA:
+ value = (value + 0x8000) >> 16;
+ /*FALLTHROUGH*/
+ case R_TILE_IMM16_X1_LO:
+ MUNGE(create_Imm16_X1);
+ break;
+ case R_TILE_JOFFLONG_X1:
+ value -= (unsigned long) location; /* pc-relative */
+ value = (long) value >> 3; /* count by instrs */
+ MUNGE(create_JOffLong_X1);
+ break;
+#else
+ case R_TILEGX_64:
+ *location = value;
+ break;
+ case R_TILEGX_IMM16_X0_HW2_LAST:
+ if (!validate_hw2_last(value, me))
+ return -ENOEXEC;
+ value >>= 16;
+ /*FALLTHROUGH*/
+ case R_TILEGX_IMM16_X0_HW1:
+ value >>= 16;
+ /*FALLTHROUGH*/
+ case R_TILEGX_IMM16_X0_HW0:
+ MUNGE(create_Imm16_X0);
+ break;
+ case R_TILEGX_IMM16_X1_HW2_LAST:
+ if (!validate_hw2_last(value, me))
+ return -ENOEXEC;
+ value >>= 16;
+ /*FALLTHROUGH*/
+ case R_TILEGX_IMM16_X1_HW1:
+ value >>= 16;
+ /*FALLTHROUGH*/
+ case R_TILEGX_IMM16_X1_HW0:
+ MUNGE(create_Imm16_X1);
+ break;
+ case R_TILEGX_JUMPOFF_X1:
+ value -= (unsigned long) location; /* pc-relative */
+ value = (long) value >> 3; /* count by instrs */
+ if (!validate_jumpoff(value)) {
+ pr_warning("module %s: Out of range jump to"
+ " %#llx at %#llx (%p)\n", me->name,
+ sym->st_value + rel[i].r_addend,
+ rel[i].r_offset, location);
+ return -ENOEXEC;
+ }
+ MUNGE(create_JumpOff_X1);
+ break;
+#endif
+
+#undef MUNGE
+
+ default:
+ pr_err("module %s: Unknown relocation: %d\n",
+ me->name, (int) ELF_R_TYPE(rel[i].r_info));
+ return -ENOEXEC;
+ }
+ }
+ return 0;
+}
diff --git a/arch/tile/kernel/pci-dma.c b/arch/tile/kernel/pci-dma.c
new file mode 100644
index 00000000..b3ed19f8
--- /dev/null
+++ b/arch/tile/kernel/pci-dma.c
@@ -0,0 +1,252 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/mm.h>
+#include <linux/dma-mapping.h>
+#include <linux/vmalloc.h>
+#include <linux/export.h>
+#include <asm/tlbflush.h>
+#include <asm/homecache.h>
+
+/* Generic DMA mapping functions: */
+
+/*
+ * Allocate what Linux calls "coherent" memory, which for us just
+ * means uncached.
+ */
+void *dma_alloc_coherent(struct device *dev,
+ size_t size,
+ dma_addr_t *dma_handle,
+ gfp_t gfp)
+{
+ u64 dma_mask = dev->coherent_dma_mask ?: DMA_BIT_MASK(32);
+ int node = dev_to_node(dev);
+ int order = get_order(size);
+ struct page *pg;
+ dma_addr_t addr;
+
+ gfp |= __GFP_ZERO;
+
+ /*
+ * By forcing NUMA node 0 for 32-bit masks we ensure that the
+ * high 32 bits of the resulting PA will be zero. If the mask
+ * size is, e.g., 24, we may still not be able to guarantee a
+ * suitable memory address, in which case we will return NULL.
+ * But such devices are uncommon.
+ */
+ if (dma_mask <= DMA_BIT_MASK(32))
+ node = 0;
+
+ pg = homecache_alloc_pages_node(node, gfp, order, PAGE_HOME_UNCACHED);
+ if (pg == NULL)
+ return NULL;
+
+ addr = page_to_phys(pg);
+ if (addr + size > dma_mask) {
+ homecache_free_pages(addr, order);
+ return NULL;
+ }
+
+ *dma_handle = addr;
+ return page_address(pg);
+}
+EXPORT_SYMBOL(dma_alloc_coherent);
+
+/*
+ * Free memory that was allocated with dma_alloc_coherent.
+ */
+void dma_free_coherent(struct device *dev, size_t size,
+ void *vaddr, dma_addr_t dma_handle)
+{
+ homecache_free_pages((unsigned long)vaddr, get_order(size));
+}
+EXPORT_SYMBOL(dma_free_coherent);
+
+/*
+ * The map routines "map" the specified address range for DMA
+ * accesses. The memory belongs to the device after this call is
+ * issued, until it is unmapped with dma_unmap_single.
+ *
+ * We don't need to do any mapping, we just flush the address range
+ * out of the cache and return a DMA address.
+ *
+ * The unmap routines do whatever is necessary before the processor
+ * accesses the memory again, and must be called before the driver
+ * touches the memory. We can get away with a cache invalidate if we
+ * can count on nothing having been touched.
+ */
+
+/* Flush a PA range from cache page by page. */
+static void __dma_map_pa_range(dma_addr_t dma_addr, size_t size)
+{
+ struct page *page = pfn_to_page(PFN_DOWN(dma_addr));
+ size_t bytesleft = PAGE_SIZE - (dma_addr & (PAGE_SIZE - 1));
+
+ while ((ssize_t)size > 0) {
+ /* Flush the page. */
+ homecache_flush_cache(page++, 0);
+
+ /* Figure out if we need to continue on the next page. */
+ size -= bytesleft;
+ bytesleft = PAGE_SIZE;
+ }
+}
+
+/*
+ * dma_map_single can be passed any memory address, and there appear
+ * to be no alignment constraints.
+ *
+ * There is a chance that the start of the buffer will share a cache
+ * line with some other data that has been touched in the meantime.
+ */
+dma_addr_t dma_map_single(struct device *dev, void *ptr, size_t size,
+ enum dma_data_direction direction)
+{
+ dma_addr_t dma_addr = __pa(ptr);
+
+ BUG_ON(!valid_dma_direction(direction));
+ WARN_ON(size == 0);
+
+ __dma_map_pa_range(dma_addr, size);
+
+ return dma_addr;
+}
+EXPORT_SYMBOL(dma_map_single);
+
+void dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
+ enum dma_data_direction direction)
+{
+ BUG_ON(!valid_dma_direction(direction));
+}
+EXPORT_SYMBOL(dma_unmap_single);
+
+int dma_map_sg(struct device *dev, struct scatterlist *sglist, int nents,
+ enum dma_data_direction direction)
+{
+ struct scatterlist *sg;
+ int i;
+
+ BUG_ON(!valid_dma_direction(direction));
+
+ WARN_ON(nents == 0 || sglist->length == 0);
+
+ for_each_sg(sglist, sg, nents, i) {
+ sg->dma_address = sg_phys(sg);
+ __dma_map_pa_range(sg->dma_address, sg->length);
+ }
+
+ return nents;
+}
+EXPORT_SYMBOL(dma_map_sg);
+
+void dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nhwentries,
+ enum dma_data_direction direction)
+{
+ BUG_ON(!valid_dma_direction(direction));
+}
+EXPORT_SYMBOL(dma_unmap_sg);
+
+dma_addr_t dma_map_page(struct device *dev, struct page *page,
+ unsigned long offset, size_t size,
+ enum dma_data_direction direction)
+{
+ BUG_ON(!valid_dma_direction(direction));
+
+ BUG_ON(offset + size > PAGE_SIZE);
+ homecache_flush_cache(page, 0);
+
+ return page_to_pa(page) + offset;
+}
+EXPORT_SYMBOL(dma_map_page);
+
+void dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size,
+ enum dma_data_direction direction)
+{
+ BUG_ON(!valid_dma_direction(direction));
+}
+EXPORT_SYMBOL(dma_unmap_page);
+
+void dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle,
+ size_t size, enum dma_data_direction direction)
+{
+ BUG_ON(!valid_dma_direction(direction));
+}
+EXPORT_SYMBOL(dma_sync_single_for_cpu);
+
+void dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle,
+ size_t size, enum dma_data_direction direction)
+{
+ unsigned long start = PFN_DOWN(dma_handle);
+ unsigned long end = PFN_DOWN(dma_handle + size - 1);
+ unsigned long i;
+
+ BUG_ON(!valid_dma_direction(direction));
+ for (i = start; i <= end; ++i)
+ homecache_flush_cache(pfn_to_page(i), 0);
+}
+EXPORT_SYMBOL(dma_sync_single_for_device);
+
+void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems,
+ enum dma_data_direction direction)
+{
+ BUG_ON(!valid_dma_direction(direction));
+ WARN_ON(nelems == 0 || sg[0].length == 0);
+}
+EXPORT_SYMBOL(dma_sync_sg_for_cpu);
+
+/*
+ * Flush and invalidate cache for scatterlist.
+ */
+void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sglist,
+ int nelems, enum dma_data_direction direction)
+{
+ struct scatterlist *sg;
+ int i;
+
+ BUG_ON(!valid_dma_direction(direction));
+ WARN_ON(nelems == 0 || sglist->length == 0);
+
+ for_each_sg(sglist, sg, nelems, i) {
+ dma_sync_single_for_device(dev, sg->dma_address,
+ sg_dma_len(sg), direction);
+ }
+}
+EXPORT_SYMBOL(dma_sync_sg_for_device);
+
+void dma_sync_single_range_for_cpu(struct device *dev, dma_addr_t dma_handle,
+ unsigned long offset, size_t size,
+ enum dma_data_direction direction)
+{
+ dma_sync_single_for_cpu(dev, dma_handle + offset, size, direction);
+}
+EXPORT_SYMBOL(dma_sync_single_range_for_cpu);
+
+void dma_sync_single_range_for_device(struct device *dev,
+ dma_addr_t dma_handle,
+ unsigned long offset, size_t size,
+ enum dma_data_direction direction)
+{
+ dma_sync_single_for_device(dev, dma_handle + offset, size, direction);
+}
+EXPORT_SYMBOL(dma_sync_single_range_for_device);
+
+/*
+ * dma_alloc_noncoherent() returns non-cacheable memory, so there's no
+ * need to do any flushing here.
+ */
+void dma_cache_sync(struct device *dev, void *vaddr, size_t size,
+ enum dma_data_direction direction)
+{
+}
+EXPORT_SYMBOL(dma_cache_sync);
diff --git a/arch/tile/kernel/pci.c b/arch/tile/kernel/pci.c
new file mode 100644
index 00000000..b56d12bf
--- /dev/null
+++ b/arch/tile/kernel/pci.c
@@ -0,0 +1,632 @@
+/*
+ * Copyright 2011 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <linux/capability.h>
+#include <linux/sched.h>
+#include <linux/errno.h>
+#include <linux/bootmem.h>
+#include <linux/irq.h>
+#include <linux/io.h>
+#include <linux/uaccess.h>
+#include <linux/export.h>
+
+#include <asm/processor.h>
+#include <asm/sections.h>
+#include <asm/byteorder.h>
+#include <asm/hv_driver.h>
+#include <hv/drv_pcie_rc_intf.h>
+
+
+/*
+ * Initialization flow and process
+ * -------------------------------
+ *
+ * This files contains the routines to search for PCI buses,
+ * enumerate the buses, and configure any attached devices.
+ *
+ * There are two entry points here:
+ * 1) tile_pci_init
+ * This sets up the pci_controller structs, and opens the
+ * FDs to the hypervisor. This is called from setup_arch() early
+ * in the boot process.
+ * 2) pcibios_init
+ * This probes the PCI bus(es) for any attached hardware. It's
+ * called by subsys_initcall. All of the real work is done by the
+ * generic Linux PCI layer.
+ *
+ */
+
+/*
+ * This flag tells if the platform is TILEmpower that needs
+ * special configuration for the PLX switch chip.
+ */
+int __write_once tile_plx_gen1;
+
+static struct pci_controller controllers[TILE_NUM_PCIE];
+static int num_controllers;
+static int pci_scan_flags[TILE_NUM_PCIE];
+
+static struct pci_ops tile_cfg_ops;
+
+
+/*
+ * We don't need to worry about the alignment of resources.
+ */
+resource_size_t pcibios_align_resource(void *data, const struct resource *res,
+ resource_size_t size, resource_size_t align)
+{
+ return res->start;
+}
+EXPORT_SYMBOL(pcibios_align_resource);
+
+/*
+ * Open a FD to the hypervisor PCI device.
+ *
+ * controller_id is the controller number, config type is 0 or 1 for
+ * config0 or config1 operations.
+ */
+static int __devinit tile_pcie_open(int controller_id, int config_type)
+{
+ char filename[32];
+ int fd;
+
+ sprintf(filename, "pcie/%d/config%d", controller_id, config_type);
+
+ fd = hv_dev_open((HV_VirtAddr)filename, 0);
+
+ return fd;
+}
+
+
+/*
+ * Get the IRQ numbers from the HV and set up the handlers for them.
+ */
+static int __devinit tile_init_irqs(int controller_id,
+ struct pci_controller *controller)
+{
+ char filename[32];
+ int fd;
+ int ret;
+ int x;
+ struct pcie_rc_config rc_config;
+
+ sprintf(filename, "pcie/%d/ctl", controller_id);
+ fd = hv_dev_open((HV_VirtAddr)filename, 0);
+ if (fd < 0) {
+ pr_err("PCI: hv_dev_open(%s) failed\n", filename);
+ return -1;
+ }
+ ret = hv_dev_pread(fd, 0, (HV_VirtAddr)(&rc_config),
+ sizeof(rc_config), PCIE_RC_CONFIG_MASK_OFF);
+ hv_dev_close(fd);
+ if (ret != sizeof(rc_config)) {
+ pr_err("PCI: wanted %zd bytes, got %d\n",
+ sizeof(rc_config), ret);
+ return -1;
+ }
+ /* Record irq_base so that we can map INTx to IRQ # later. */
+ controller->irq_base = rc_config.intr;
+
+ for (x = 0; x < 4; x++)
+ tile_irq_activate(rc_config.intr + x,
+ TILE_IRQ_HW_CLEAR);
+
+ if (rc_config.plx_gen1)
+ controller->plx_gen1 = 1;
+
+ return 0;
+}
+
+/*
+ * First initialization entry point, called from setup_arch().
+ *
+ * Find valid controllers and fill in pci_controller structs for each
+ * of them.
+ *
+ * Returns the number of controllers discovered.
+ */
+int __init tile_pci_init(void)
+{
+ int i;
+
+ pr_info("PCI: Searching for controllers...\n");
+
+ /* Re-init number of PCIe controllers to support hot-plug feature. */
+ num_controllers = 0;
+
+ /* Do any configuration we need before using the PCIe */
+
+ for (i = 0; i < TILE_NUM_PCIE; i++) {
+ /*
+ * To see whether we need a real config op based on
+ * the results of pcibios_init(), to support PCIe hot-plug.
+ */
+ if (pci_scan_flags[i] == 0) {
+ int hv_cfg_fd0 = -1;
+ int hv_cfg_fd1 = -1;
+ int hv_mem_fd = -1;
+ char name[32];
+ struct pci_controller *controller;
+
+ /*
+ * Open the fd to the HV. If it fails then this
+ * device doesn't exist.
+ */
+ hv_cfg_fd0 = tile_pcie_open(i, 0);
+ if (hv_cfg_fd0 < 0)
+ continue;
+ hv_cfg_fd1 = tile_pcie_open(i, 1);
+ if (hv_cfg_fd1 < 0) {
+ pr_err("PCI: Couldn't open config fd to HV "
+ "for controller %d\n", i);
+ goto err_cont;
+ }
+
+ sprintf(name, "pcie/%d/mem", i);
+ hv_mem_fd = hv_dev_open((HV_VirtAddr)name, 0);
+ if (hv_mem_fd < 0) {
+ pr_err("PCI: Could not open mem fd to HV!\n");
+ goto err_cont;
+ }
+
+ pr_info("PCI: Found PCI controller #%d\n", i);
+
+ controller = &controllers[i];
+
+ controller->index = i;
+ controller->hv_cfg_fd[0] = hv_cfg_fd0;
+ controller->hv_cfg_fd[1] = hv_cfg_fd1;
+ controller->hv_mem_fd = hv_mem_fd;
+ controller->first_busno = 0;
+ controller->last_busno = 0xff;
+ controller->ops = &tile_cfg_ops;
+
+ num_controllers++;
+ continue;
+
+err_cont:
+ if (hv_cfg_fd0 >= 0)
+ hv_dev_close(hv_cfg_fd0);
+ if (hv_cfg_fd1 >= 0)
+ hv_dev_close(hv_cfg_fd1);
+ if (hv_mem_fd >= 0)
+ hv_dev_close(hv_mem_fd);
+ continue;
+ }
+ }
+
+ /*
+ * Before using the PCIe, see if we need to do any platform-specific
+ * configuration, such as the PLX switch Gen 1 issue on TILEmpower.
+ */
+ for (i = 0; i < num_controllers; i++) {
+ struct pci_controller *controller = &controllers[i];
+
+ if (controller->plx_gen1)
+ tile_plx_gen1 = 1;
+ }
+
+ return num_controllers;
+}
+
+/*
+ * (pin - 1) converts from the PCI standard's [1:4] convention to
+ * a normal [0:3] range.
+ */
+static int tile_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+{
+ struct pci_controller *controller =
+ (struct pci_controller *)dev->sysdata;
+ return (pin - 1) + controller->irq_base;
+}
+
+
+static void __devinit fixup_read_and_payload_sizes(void)
+{
+ struct pci_dev *dev = NULL;
+ int smallest_max_payload = 0x1; /* Tile maxes out at 256 bytes. */
+ int max_read_size = 0x2; /* Limit to 512 byte reads. */
+ u16 new_values;
+
+ /* Scan for the smallest maximum payload size. */
+ while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
+ int pcie_caps_offset;
+ u32 devcap;
+ int max_payload;
+
+ pcie_caps_offset = pci_find_capability(dev, PCI_CAP_ID_EXP);
+ if (pcie_caps_offset == 0)
+ continue;
+
+ pci_read_config_dword(dev, pcie_caps_offset + PCI_EXP_DEVCAP,
+ &devcap);
+ max_payload = devcap & PCI_EXP_DEVCAP_PAYLOAD;
+ if (max_payload < smallest_max_payload)
+ smallest_max_payload = max_payload;
+ }
+
+ /* Now, set the max_payload_size for all devices to that value. */
+ new_values = (max_read_size << 12) | (smallest_max_payload << 5);
+ while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
+ int pcie_caps_offset;
+ u16 devctl;
+
+ pcie_caps_offset = pci_find_capability(dev, PCI_CAP_ID_EXP);
+ if (pcie_caps_offset == 0)
+ continue;
+
+ pci_read_config_word(dev, pcie_caps_offset + PCI_EXP_DEVCTL,
+ &devctl);
+ devctl &= ~(PCI_EXP_DEVCTL_PAYLOAD | PCI_EXP_DEVCTL_READRQ);
+ devctl |= new_values;
+ pci_write_config_word(dev, pcie_caps_offset + PCI_EXP_DEVCTL,
+ devctl);
+ }
+}
+
+
+/*
+ * Second PCI initialization entry point, called by subsys_initcall.
+ *
+ * The controllers have been set up by the time we get here, by a call to
+ * tile_pci_init.
+ */
+int __init pcibios_init(void)
+{
+ int i;
+
+ pr_info("PCI: Probing PCI hardware\n");
+
+ /*
+ * Delay a bit in case devices aren't ready. Some devices are
+ * known to require at least 20ms here, but we use a more
+ * conservative value.
+ */
+ mdelay(250);
+
+ /* Scan all of the recorded PCI controllers. */
+ for (i = 0; i < TILE_NUM_PCIE; i++) {
+ /*
+ * Do real pcibios init ops if the controller is initialized
+ * by tile_pci_init() successfully and not initialized by
+ * pcibios_init() yet to support PCIe hot-plug.
+ */
+ if (pci_scan_flags[i] == 0 && controllers[i].ops != NULL) {
+ struct pci_controller *controller = &controllers[i];
+ struct pci_bus *bus;
+
+ if (tile_init_irqs(i, controller)) {
+ pr_err("PCI: Could not initialize IRQs\n");
+ continue;
+ }
+
+ pr_info("PCI: initializing controller #%d\n", i);
+
+ /*
+ * This comes from the generic Linux PCI driver.
+ *
+ * It reads the PCI tree for this bus into the Linux
+ * data structures.
+ *
+ * This is inlined in linux/pci.h and calls into
+ * pci_scan_bus_parented() in probe.c.
+ */
+ bus = pci_scan_bus(0, controller->ops, controller);
+ controller->root_bus = bus;
+ controller->last_busno = bus->subordinate;
+ }
+ }
+
+ /* Do machine dependent PCI interrupt routing */
+ pci_fixup_irqs(pci_common_swizzle, tile_map_irq);
+
+ /*
+ * This comes from the generic Linux PCI driver.
+ *
+ * It allocates all of the resources (I/O memory, etc)
+ * associated with the devices read in above.
+ */
+ pci_assign_unassigned_resources();
+
+ /* Configure the max_read_size and max_payload_size values. */
+ fixup_read_and_payload_sizes();
+
+ /* Record the I/O resources in the PCI controller structure. */
+ for (i = 0; i < TILE_NUM_PCIE; i++) {
+ /*
+ * Do real pcibios init ops if the controller is initialized
+ * by tile_pci_init() successfully and not initialized by
+ * pcibios_init() yet to support PCIe hot-plug.
+ */
+ if (pci_scan_flags[i] == 0 && controllers[i].ops != NULL) {
+ struct pci_bus *root_bus = controllers[i].root_bus;
+ struct pci_bus *next_bus;
+ struct pci_dev *dev;
+
+ list_for_each_entry(dev, &root_bus->devices, bus_list) {
+ /*
+ * Find the PCI host controller, ie. the 1st
+ * bridge.
+ */
+ if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI &&
+ (PCI_SLOT(dev->devfn) == 0)) {
+ next_bus = dev->subordinate;
+ controllers[i].mem_resources[0] =
+ *next_bus->resource[0];
+ controllers[i].mem_resources[1] =
+ *next_bus->resource[1];
+ controllers[i].mem_resources[2] =
+ *next_bus->resource[2];
+
+ /* Setup flags. */
+ pci_scan_flags[i] = 1;
+
+ break;
+ }
+ }
+ }
+ }
+
+ return 0;
+}
+subsys_initcall(pcibios_init);
+
+/*
+ * No bus fixups needed.
+ */
+void __devinit pcibios_fixup_bus(struct pci_bus *bus)
+{
+ /* Nothing needs to be done. */
+}
+
+void pcibios_set_master(struct pci_dev *dev)
+{
+ /* No special bus mastering setup handling. */
+}
+
+/*
+ * This can be called from the generic PCI layer, but doesn't need to
+ * do anything.
+ */
+char __devinit *pcibios_setup(char *str)
+{
+ /* Nothing needs to be done. */
+ return str;
+}
+
+/*
+ * This is called from the generic Linux layer.
+ */
+void __devinit pcibios_update_irq(struct pci_dev *dev, int irq)
+{
+ pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq);
+}
+
+/*
+ * Enable memory and/or address decoding, as appropriate, for the
+ * device described by the 'dev' struct.
+ *
+ * This is called from the generic PCI layer, and can be called
+ * for bridges or endpoints.
+ */
+int pcibios_enable_device(struct pci_dev *dev, int mask)
+{
+ u16 cmd, old_cmd;
+ u8 header_type;
+ int i;
+ struct resource *r;
+
+ pci_read_config_byte(dev, PCI_HEADER_TYPE, &header_type);
+
+ pci_read_config_word(dev, PCI_COMMAND, &cmd);
+ old_cmd = cmd;
+ if ((header_type & 0x7F) == PCI_HEADER_TYPE_BRIDGE) {
+ /*
+ * For bridges, we enable both memory and I/O decoding
+ * in call cases.
+ */
+ cmd |= PCI_COMMAND_IO;
+ cmd |= PCI_COMMAND_MEMORY;
+ } else {
+ /*
+ * For endpoints, we enable memory and/or I/O decoding
+ * only if they have a memory resource of that type.
+ */
+ for (i = 0; i < 6; i++) {
+ r = &dev->resource[i];
+ if (r->flags & IORESOURCE_UNSET) {
+ pr_err("PCI: Device %s not available "
+ "because of resource collisions\n",
+ pci_name(dev));
+ return -EINVAL;
+ }
+ if (r->flags & IORESOURCE_IO)
+ cmd |= PCI_COMMAND_IO;
+ if (r->flags & IORESOURCE_MEM)
+ cmd |= PCI_COMMAND_MEMORY;
+ }
+ }
+
+ /*
+ * We only write the command if it changed.
+ */
+ if (cmd != old_cmd)
+ pci_write_config_word(dev, PCI_COMMAND, cmd);
+ return 0;
+}
+
+/****************************************************************
+ *
+ * Tile PCI config space read/write routines
+ *
+ ****************************************************************/
+
+/*
+ * These are the normal read and write ops
+ * These are expanded with macros from pci_bus_read_config_byte() etc.
+ *
+ * devfn is the combined PCI slot & function.
+ *
+ * offset is in bytes, from the start of config space for the
+ * specified bus & slot.
+ */
+
+static int __devinit tile_cfg_read(struct pci_bus *bus,
+ unsigned int devfn,
+ int offset,
+ int size,
+ u32 *val)
+{
+ struct pci_controller *controller = bus->sysdata;
+ int busnum = bus->number & 0xff;
+ int slot = (devfn >> 3) & 0x1f;
+ int function = devfn & 0x7;
+ u32 addr;
+ int config_mode = 1;
+
+ /*
+ * There is no bridge between the Tile and bus 0, so we
+ * use config0 to talk to bus 0.
+ *
+ * If we're talking to a bus other than zero then we
+ * must have found a bridge.
+ */
+ if (busnum == 0) {
+ /*
+ * We fake an empty slot for (busnum == 0) && (slot > 0),
+ * since there is only one slot on bus 0.
+ */
+ if (slot) {
+ *val = 0xFFFFFFFF;
+ return 0;
+ }
+ config_mode = 0;
+ }
+
+ addr = busnum << 20; /* Bus in 27:20 */
+ addr |= slot << 15; /* Slot (device) in 19:15 */
+ addr |= function << 12; /* Function is in 14:12 */
+ addr |= (offset & 0xFFF); /* byte address in 0:11 */
+
+ return hv_dev_pread(controller->hv_cfg_fd[config_mode], 0,
+ (HV_VirtAddr)(val), size, addr);
+}
+
+
+/*
+ * See tile_cfg_read() for relevant comments.
+ * Note that "val" is the value to write, not a pointer to that value.
+ */
+static int __devinit tile_cfg_write(struct pci_bus *bus,
+ unsigned int devfn,
+ int offset,
+ int size,
+ u32 val)
+{
+ struct pci_controller *controller = bus->sysdata;
+ int busnum = bus->number & 0xff;
+ int slot = (devfn >> 3) & 0x1f;
+ int function = devfn & 0x7;
+ u32 addr;
+ int config_mode = 1;
+ HV_VirtAddr valp = (HV_VirtAddr)&val;
+
+ /*
+ * For bus 0 slot 0 we use config 0 accesses.
+ */
+ if (busnum == 0) {
+ /*
+ * We fake an empty slot for (busnum == 0) && (slot > 0),
+ * since there is only one slot on bus 0.
+ */
+ if (slot)
+ return 0;
+ config_mode = 0;
+ }
+
+ addr = busnum << 20; /* Bus in 27:20 */
+ addr |= slot << 15; /* Slot (device) in 19:15 */
+ addr |= function << 12; /* Function is in 14:12 */
+ addr |= (offset & 0xFFF); /* byte address in 0:11 */
+
+#ifdef __BIG_ENDIAN
+ /* Point to the correct part of the 32-bit "val". */
+ valp += 4 - size;
+#endif
+
+ return hv_dev_pwrite(controller->hv_cfg_fd[config_mode], 0,
+ valp, size, addr);
+}
+
+
+static struct pci_ops tile_cfg_ops = {
+ .read = tile_cfg_read,
+ .write = tile_cfg_write,
+};
+
+
+/*
+ * In the following, each PCI controller's mem_resources[1]
+ * represents its (non-prefetchable) PCI memory resource.
+ * mem_resources[0] and mem_resources[2] refer to its PCI I/O and
+ * prefetchable PCI memory resources, respectively.
+ * For more details, see pci_setup_bridge() in setup-bus.c.
+ * By comparing the target PCI memory address against the
+ * end address of controller 0, we can determine the controller
+ * that should accept the PCI memory access.
+ */
+#define TILE_READ(size, type) \
+type _tile_read##size(unsigned long addr) \
+{ \
+ type val; \
+ int idx = 0; \
+ if (addr > controllers[0].mem_resources[1].end && \
+ addr > controllers[0].mem_resources[2].end) \
+ idx = 1; \
+ if (hv_dev_pread(controllers[idx].hv_mem_fd, 0, \
+ (HV_VirtAddr)(&val), sizeof(type), addr)) \
+ pr_err("PCI: read %zd bytes at 0x%lX failed\n", \
+ sizeof(type), addr); \
+ return val; \
+} \
+EXPORT_SYMBOL(_tile_read##size)
+
+TILE_READ(b, u8);
+TILE_READ(w, u16);
+TILE_READ(l, u32);
+TILE_READ(q, u64);
+
+#define TILE_WRITE(size, type) \
+void _tile_write##size(type val, unsigned long addr) \
+{ \
+ int idx = 0; \
+ if (addr > controllers[0].mem_resources[1].end && \
+ addr > controllers[0].mem_resources[2].end) \
+ idx = 1; \
+ if (hv_dev_pwrite(controllers[idx].hv_mem_fd, 0, \
+ (HV_VirtAddr)(&val), sizeof(type), addr)) \
+ pr_err("PCI: write %zd bytes at 0x%lX failed\n", \
+ sizeof(type), addr); \
+} \
+EXPORT_SYMBOL(_tile_write##size)
+
+TILE_WRITE(b, u8);
+TILE_WRITE(w, u16);
+TILE_WRITE(l, u32);
+TILE_WRITE(q, u64);
diff --git a/arch/tile/kernel/proc.c b/arch/tile/kernel/proc.c
new file mode 100644
index 00000000..446a7f52
--- /dev/null
+++ b/arch/tile/kernel/proc.c
@@ -0,0 +1,162 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/smp.h>
+#include <linux/seq_file.h>
+#include <linux/threads.h>
+#include <linux/cpumask.h>
+#include <linux/timex.h>
+#include <linux/delay.h>
+#include <linux/fs.h>
+#include <linux/proc_fs.h>
+#include <linux/sysctl.h>
+#include <linux/hardirq.h>
+#include <linux/mman.h>
+#include <asm/unaligned.h>
+#include <asm/pgtable.h>
+#include <asm/processor.h>
+#include <asm/sections.h>
+#include <asm/homecache.h>
+#include <asm/hardwall.h>
+#include <arch/chip.h>
+
+
+/*
+ * Support /proc/cpuinfo
+ */
+
+#define cpu_to_ptr(n) ((void *)((long)(n)+1))
+#define ptr_to_cpu(p) ((long)(p) - 1)
+
+static int show_cpuinfo(struct seq_file *m, void *v)
+{
+ int n = ptr_to_cpu(v);
+
+ if (n == 0) {
+ char buf[NR_CPUS*5];
+ cpulist_scnprintf(buf, sizeof(buf), cpu_online_mask);
+ seq_printf(m, "cpu count\t: %d\n", num_online_cpus());
+ seq_printf(m, "cpu list\t: %s\n", buf);
+ seq_printf(m, "model name\t: %s\n", chip_model);
+ seq_printf(m, "flags\t\t:\n"); /* nothing for now */
+ seq_printf(m, "cpu MHz\t\t: %llu.%06llu\n",
+ get_clock_rate() / 1000000,
+ (get_clock_rate() % 1000000));
+ seq_printf(m, "bogomips\t: %lu.%02lu\n\n",
+ loops_per_jiffy/(500000/HZ),
+ (loops_per_jiffy/(5000/HZ)) % 100);
+ }
+
+#ifdef CONFIG_SMP
+ if (!cpu_online(n))
+ return 0;
+#endif
+
+ seq_printf(m, "processor\t: %d\n", n);
+
+ /* Print only num_online_cpus() blank lines total. */
+ if (cpumask_next(n, cpu_online_mask) < nr_cpu_ids)
+ seq_printf(m, "\n");
+
+ return 0;
+}
+
+static void *c_start(struct seq_file *m, loff_t *pos)
+{
+ return *pos < nr_cpu_ids ? cpu_to_ptr(*pos) : NULL;
+}
+static void *c_next(struct seq_file *m, void *v, loff_t *pos)
+{
+ ++*pos;
+ return c_start(m, pos);
+}
+static void c_stop(struct seq_file *m, void *v)
+{
+}
+const struct seq_operations cpuinfo_op = {
+ .start = c_start,
+ .next = c_next,
+ .stop = c_stop,
+ .show = show_cpuinfo,
+};
+
+/*
+ * Support /proc/tile directory
+ */
+
+static int __init proc_tile_init(void)
+{
+ struct proc_dir_entry *root = proc_mkdir("tile", NULL);
+ if (root == NULL)
+ return 0;
+
+ proc_tile_hardwall_init(root);
+
+ return 0;
+}
+
+arch_initcall(proc_tile_init);
+
+/*
+ * Support /proc/sys/tile directory
+ */
+
+#ifndef __tilegx__ /* FIXME: GX: no support for unaligned access yet */
+static ctl_table unaligned_subtable[] = {
+ {
+ .procname = "enabled",
+ .data = &unaligned_fixup,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ },
+ {
+ .procname = "printk",
+ .data = &unaligned_printk,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ },
+ {
+ .procname = "count",
+ .data = &unaligned_fixup_count,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ },
+ {}
+};
+
+static ctl_table unaligned_table[] = {
+ {
+ .procname = "unaligned_fixup",
+ .mode = 0555,
+ .child = unaligned_subtable
+ },
+ {}
+};
+
+static struct ctl_path tile_path[] = {
+ { .procname = "tile" },
+ { }
+};
+
+static int __init proc_sys_tile_init(void)
+{
+ register_sysctl_paths(tile_path, unaligned_table);
+ return 0;
+}
+
+arch_initcall(proc_sys_tile_init);
+#endif
diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c
new file mode 100644
index 00000000..54e6c64b
--- /dev/null
+++ b/arch/tile/kernel/process.c
@@ -0,0 +1,749 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/sched.h>
+#include <linux/preempt.h>
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/kprobes.h>
+#include <linux/elfcore.h>
+#include <linux/tick.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/compat.h>
+#include <linux/hardirq.h>
+#include <linux/syscalls.h>
+#include <linux/kernel.h>
+#include <linux/tracehook.h>
+#include <linux/signal.h>
+#include <asm/stack.h>
+#include <asm/switch_to.h>
+#include <asm/homecache.h>
+#include <asm/syscalls.h>
+#include <asm/traps.h>
+#include <asm/setup.h>
+#ifdef CONFIG_HARDWALL
+#include <asm/hardwall.h>
+#endif
+#include <arch/chip.h>
+#include <arch/abi.h>
+#include <arch/sim_def.h>
+
+
+/*
+ * Use the (x86) "idle=poll" option to prefer low latency when leaving the
+ * idle loop over low power while in the idle loop, e.g. if we have
+ * one thread per core and we want to get threads out of futex waits fast.
+ */
+static int no_idle_nap;
+static int __init idle_setup(char *str)
+{
+ if (!str)
+ return -EINVAL;
+
+ if (!strcmp(str, "poll")) {
+ pr_info("using polling idle threads.\n");
+ no_idle_nap = 1;
+ } else if (!strcmp(str, "halt"))
+ no_idle_nap = 0;
+ else
+ return -1;
+
+ return 0;
+}
+early_param("idle", idle_setup);
+
+/*
+ * The idle thread. There's no useful work to be
+ * done, so just try to conserve power and have a
+ * low exit latency (ie sit in a loop waiting for
+ * somebody to say that they'd like to reschedule)
+ */
+void cpu_idle(void)
+{
+ int cpu = smp_processor_id();
+
+
+ current_thread_info()->status |= TS_POLLING;
+
+ if (no_idle_nap) {
+ while (1) {
+ while (!need_resched())
+ cpu_relax();
+ schedule();
+ }
+ }
+
+ /* endless idle loop with no priority at all */
+ while (1) {
+ tick_nohz_idle_enter();
+ rcu_idle_enter();
+ while (!need_resched()) {
+ if (cpu_is_offline(cpu))
+ BUG(); /* no HOTPLUG_CPU */
+
+ local_irq_disable();
+ __get_cpu_var(irq_stat).idle_timestamp = jiffies;
+ current_thread_info()->status &= ~TS_POLLING;
+ /*
+ * TS_POLLING-cleared state must be visible before we
+ * test NEED_RESCHED:
+ */
+ smp_mb();
+
+ if (!need_resched())
+ _cpu_idle();
+ else
+ local_irq_enable();
+ current_thread_info()->status |= TS_POLLING;
+ }
+ rcu_idle_exit();
+ tick_nohz_idle_exit();
+ schedule_preempt_disabled();
+ }
+}
+
+struct thread_info *alloc_thread_info_node(struct task_struct *task, int node)
+{
+ struct page *page;
+ gfp_t flags = GFP_KERNEL;
+
+#ifdef CONFIG_DEBUG_STACK_USAGE
+ flags |= __GFP_ZERO;
+#endif
+
+ page = alloc_pages_node(node, flags, THREAD_SIZE_ORDER);
+ if (!page)
+ return NULL;
+
+ return (struct thread_info *)page_address(page);
+}
+
+/*
+ * Free a thread_info node, and all of its derivative
+ * data structures.
+ */
+void free_thread_info(struct thread_info *info)
+{
+ struct single_step_state *step_state = info->step_state;
+
+#ifdef CONFIG_HARDWALL
+ /*
+ * We free a thread_info from the context of the task that has
+ * been scheduled next, so the original task is already dead.
+ * Calling deactivate here just frees up the data structures.
+ * If the task we're freeing held the last reference to a
+ * hardwall fd, it would have been released prior to this point
+ * anyway via exit_files(), and "hardwall" would be NULL by now.
+ */
+ if (info->task->thread.hardwall)
+ hardwall_deactivate(info->task);
+#endif
+
+ if (step_state) {
+
+ /*
+ * FIXME: we don't munmap step_state->buffer
+ * because the mm_struct for this process (info->task->mm)
+ * has already been zeroed in exit_mm(). Keeping a
+ * reference to it here seems like a bad move, so this
+ * means we can't munmap() the buffer, and therefore if we
+ * ptrace multiple threads in a process, we will slowly
+ * leak user memory. (Note that as soon as the last
+ * thread in a process dies, we will reclaim all user
+ * memory including single-step buffers in the usual way.)
+ * We should either assign a kernel VA to this buffer
+ * somehow, or we should associate the buffer(s) with the
+ * mm itself so we can clean them up that way.
+ */
+ kfree(step_state);
+ }
+
+ free_pages((unsigned long)info, THREAD_SIZE_ORDER);
+}
+
+static void save_arch_state(struct thread_struct *t);
+
+int copy_thread(unsigned long clone_flags, unsigned long sp,
+ unsigned long stack_size,
+ struct task_struct *p, struct pt_regs *regs)
+{
+ struct pt_regs *childregs;
+ unsigned long ksp;
+
+ /*
+ * When creating a new kernel thread we pass sp as zero.
+ * Assign it to a reasonable value now that we have the stack.
+ */
+ if (sp == 0 && regs->ex1 == PL_ICS_EX1(KERNEL_PL, 0))
+ sp = KSTK_TOP(p);
+
+ /*
+ * Do not clone step state from the parent; each thread
+ * must make its own lazily.
+ */
+ task_thread_info(p)->step_state = NULL;
+
+ /*
+ * Start new thread in ret_from_fork so it schedules properly
+ * and then return from interrupt like the parent.
+ */
+ p->thread.pc = (unsigned long) ret_from_fork;
+
+ /* Save user stack top pointer so we can ID the stack vm area later. */
+ p->thread.usp0 = sp;
+
+ /* Record the pid of the process that created this one. */
+ p->thread.creator_pid = current->pid;
+
+ /*
+ * Copy the registers onto the kernel stack so the
+ * return-from-interrupt code will reload it into registers.
+ */
+ childregs = task_pt_regs(p);
+ *childregs = *regs;
+ childregs->regs[0] = 0; /* return value is zero */
+ childregs->sp = sp; /* override with new user stack pointer */
+
+ /*
+ * If CLONE_SETTLS is set, set "tp" in the new task to "r4",
+ * which is passed in as arg #5 to sys_clone().
+ */
+ if (clone_flags & CLONE_SETTLS)
+ childregs->tp = regs->regs[4];
+
+ /*
+ * Copy the callee-saved registers from the passed pt_regs struct
+ * into the context-switch callee-saved registers area.
+ * This way when we start the interrupt-return sequence, the
+ * callee-save registers will be correctly in registers, which
+ * is how we assume the compiler leaves them as we start doing
+ * the normal return-from-interrupt path after calling C code.
+ * Zero out the C ABI save area to mark the top of the stack.
+ */
+ ksp = (unsigned long) childregs;
+ ksp -= C_ABI_SAVE_AREA_SIZE; /* interrupt-entry save area */
+ ((long *)ksp)[0] = ((long *)ksp)[1] = 0;
+ ksp -= CALLEE_SAVED_REGS_COUNT * sizeof(unsigned long);
+ memcpy((void *)ksp, &regs->regs[CALLEE_SAVED_FIRST_REG],
+ CALLEE_SAVED_REGS_COUNT * sizeof(unsigned long));
+ ksp -= C_ABI_SAVE_AREA_SIZE; /* __switch_to() save area */
+ ((long *)ksp)[0] = ((long *)ksp)[1] = 0;
+ p->thread.ksp = ksp;
+
+#if CHIP_HAS_TILE_DMA()
+ /*
+ * No DMA in the new thread. We model this on the fact that
+ * fork() clears the pending signals, alarms, and aio for the child.
+ */
+ memset(&p->thread.tile_dma_state, 0, sizeof(struct tile_dma_state));
+ memset(&p->thread.dma_async_tlb, 0, sizeof(struct async_tlb));
+#endif
+
+#if CHIP_HAS_SN_PROC()
+ /* Likewise, the new thread is not running static processor code. */
+ p->thread.sn_proc_running = 0;
+ memset(&p->thread.sn_async_tlb, 0, sizeof(struct async_tlb));
+#endif
+
+#if CHIP_HAS_PROC_STATUS_SPR()
+ /* New thread has its miscellaneous processor state bits clear. */
+ p->thread.proc_status = 0;
+#endif
+
+#ifdef CONFIG_HARDWALL
+ /* New thread does not own any networks. */
+ p->thread.hardwall = NULL;
+#endif
+
+
+ /*
+ * Start the new thread with the current architecture state
+ * (user interrupt masks, etc.).
+ */
+ save_arch_state(&p->thread);
+
+ return 0;
+}
+
+/*
+ * Return "current" if it looks plausible, or else a pointer to a dummy.
+ * This can be helpful if we are just trying to emit a clean panic.
+ */
+struct task_struct *validate_current(void)
+{
+ static struct task_struct corrupt = { .comm = "<corrupt>" };
+ struct task_struct *tsk = current;
+ if (unlikely((unsigned long)tsk < PAGE_OFFSET ||
+ (high_memory && (void *)tsk > high_memory) ||
+ ((unsigned long)tsk & (__alignof__(*tsk) - 1)) != 0)) {
+ pr_err("Corrupt 'current' %p (sp %#lx)\n", tsk, stack_pointer);
+ tsk = &corrupt;
+ }
+ return tsk;
+}
+
+/* Take and return the pointer to the previous task, for schedule_tail(). */
+struct task_struct *sim_notify_fork(struct task_struct *prev)
+{
+ struct task_struct *tsk = current;
+ __insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_OS_FORK_PARENT |
+ (tsk->thread.creator_pid << _SIM_CONTROL_OPERATOR_BITS));
+ __insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_OS_FORK |
+ (tsk->pid << _SIM_CONTROL_OPERATOR_BITS));
+ return prev;
+}
+
+int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs)
+{
+ struct pt_regs *ptregs = task_pt_regs(tsk);
+ elf_core_copy_regs(regs, ptregs);
+ return 1;
+}
+
+#if CHIP_HAS_TILE_DMA()
+
+/* Allow user processes to access the DMA SPRs */
+void grant_dma_mpls(void)
+{
+#if CONFIG_KERNEL_PL == 2
+ __insn_mtspr(SPR_MPL_DMA_CPL_SET_1, 1);
+ __insn_mtspr(SPR_MPL_DMA_NOTIFY_SET_1, 1);
+#else
+ __insn_mtspr(SPR_MPL_DMA_CPL_SET_0, 1);
+ __insn_mtspr(SPR_MPL_DMA_NOTIFY_SET_0, 1);
+#endif
+}
+
+/* Forbid user processes from accessing the DMA SPRs */
+void restrict_dma_mpls(void)
+{
+#if CONFIG_KERNEL_PL == 2
+ __insn_mtspr(SPR_MPL_DMA_CPL_SET_2, 1);
+ __insn_mtspr(SPR_MPL_DMA_NOTIFY_SET_2, 1);
+#else
+ __insn_mtspr(SPR_MPL_DMA_CPL_SET_1, 1);
+ __insn_mtspr(SPR_MPL_DMA_NOTIFY_SET_1, 1);
+#endif
+}
+
+/* Pause the DMA engine, then save off its state registers. */
+static void save_tile_dma_state(struct tile_dma_state *dma)
+{
+ unsigned long state = __insn_mfspr(SPR_DMA_USER_STATUS);
+ unsigned long post_suspend_state;
+
+ /* If we're running, suspend the engine. */
+ if ((state & DMA_STATUS_MASK) == SPR_DMA_STATUS__RUNNING_MASK)
+ __insn_mtspr(SPR_DMA_CTR, SPR_DMA_CTR__SUSPEND_MASK);
+
+ /*
+ * Wait for the engine to idle, then save regs. Note that we
+ * want to record the "running" bit from before suspension,
+ * and the "done" bit from after, so that we can properly
+ * distinguish a case where the user suspended the engine from
+ * the case where the kernel suspended as part of the context
+ * swap.
+ */
+ do {
+ post_suspend_state = __insn_mfspr(SPR_DMA_USER_STATUS);
+ } while (post_suspend_state & SPR_DMA_STATUS__BUSY_MASK);
+
+ dma->src = __insn_mfspr(SPR_DMA_SRC_ADDR);
+ dma->src_chunk = __insn_mfspr(SPR_DMA_SRC_CHUNK_ADDR);
+ dma->dest = __insn_mfspr(SPR_DMA_DST_ADDR);
+ dma->dest_chunk = __insn_mfspr(SPR_DMA_DST_CHUNK_ADDR);
+ dma->strides = __insn_mfspr(SPR_DMA_STRIDE);
+ dma->chunk_size = __insn_mfspr(SPR_DMA_CHUNK_SIZE);
+ dma->byte = __insn_mfspr(SPR_DMA_BYTE);
+ dma->status = (state & SPR_DMA_STATUS__RUNNING_MASK) |
+ (post_suspend_state & SPR_DMA_STATUS__DONE_MASK);
+}
+
+/* Restart a DMA that was running before we were context-switched out. */
+static void restore_tile_dma_state(struct thread_struct *t)
+{
+ const struct tile_dma_state *dma = &t->tile_dma_state;
+
+ /*
+ * The only way to restore the done bit is to run a zero
+ * length transaction.
+ */
+ if ((dma->status & SPR_DMA_STATUS__DONE_MASK) &&
+ !(__insn_mfspr(SPR_DMA_USER_STATUS) & SPR_DMA_STATUS__DONE_MASK)) {
+ __insn_mtspr(SPR_DMA_BYTE, 0);
+ __insn_mtspr(SPR_DMA_CTR, SPR_DMA_CTR__REQUEST_MASK);
+ while (__insn_mfspr(SPR_DMA_USER_STATUS) &
+ SPR_DMA_STATUS__BUSY_MASK)
+ ;
+ }
+
+ __insn_mtspr(SPR_DMA_SRC_ADDR, dma->src);
+ __insn_mtspr(SPR_DMA_SRC_CHUNK_ADDR, dma->src_chunk);
+ __insn_mtspr(SPR_DMA_DST_ADDR, dma->dest);
+ __insn_mtspr(SPR_DMA_DST_CHUNK_ADDR, dma->dest_chunk);
+ __insn_mtspr(SPR_DMA_STRIDE, dma->strides);
+ __insn_mtspr(SPR_DMA_CHUNK_SIZE, dma->chunk_size);
+ __insn_mtspr(SPR_DMA_BYTE, dma->byte);
+
+ /*
+ * Restart the engine if we were running and not done.
+ * Clear a pending async DMA fault that we were waiting on return
+ * to user space to execute, since we expect the DMA engine
+ * to regenerate those faults for us now. Note that we don't
+ * try to clear the TIF_ASYNC_TLB flag, since it's relatively
+ * harmless if set, and it covers both DMA and the SN processor.
+ */
+ if ((dma->status & DMA_STATUS_MASK) == SPR_DMA_STATUS__RUNNING_MASK) {
+ t->dma_async_tlb.fault_num = 0;
+ __insn_mtspr(SPR_DMA_CTR, SPR_DMA_CTR__REQUEST_MASK);
+ }
+}
+
+#endif
+
+static void save_arch_state(struct thread_struct *t)
+{
+#if CHIP_HAS_SPLIT_INTR_MASK()
+ t->interrupt_mask = __insn_mfspr(SPR_INTERRUPT_MASK_0_0) |
+ ((u64)__insn_mfspr(SPR_INTERRUPT_MASK_0_1) << 32);
+#else
+ t->interrupt_mask = __insn_mfspr(SPR_INTERRUPT_MASK_0);
+#endif
+ t->ex_context[0] = __insn_mfspr(SPR_EX_CONTEXT_0_0);
+ t->ex_context[1] = __insn_mfspr(SPR_EX_CONTEXT_0_1);
+ t->system_save[0] = __insn_mfspr(SPR_SYSTEM_SAVE_0_0);
+ t->system_save[1] = __insn_mfspr(SPR_SYSTEM_SAVE_0_1);
+ t->system_save[2] = __insn_mfspr(SPR_SYSTEM_SAVE_0_2);
+ t->system_save[3] = __insn_mfspr(SPR_SYSTEM_SAVE_0_3);
+ t->intctrl_0 = __insn_mfspr(SPR_INTCTRL_0_STATUS);
+#if CHIP_HAS_PROC_STATUS_SPR()
+ t->proc_status = __insn_mfspr(SPR_PROC_STATUS);
+#endif
+#if !CHIP_HAS_FIXED_INTVEC_BASE()
+ t->interrupt_vector_base = __insn_mfspr(SPR_INTERRUPT_VECTOR_BASE_0);
+#endif
+#if CHIP_HAS_TILE_RTF_HWM()
+ t->tile_rtf_hwm = __insn_mfspr(SPR_TILE_RTF_HWM);
+#endif
+#if CHIP_HAS_DSTREAM_PF()
+ t->dstream_pf = __insn_mfspr(SPR_DSTREAM_PF);
+#endif
+}
+
+static void restore_arch_state(const struct thread_struct *t)
+{
+#if CHIP_HAS_SPLIT_INTR_MASK()
+ __insn_mtspr(SPR_INTERRUPT_MASK_0_0, (u32) t->interrupt_mask);
+ __insn_mtspr(SPR_INTERRUPT_MASK_0_1, t->interrupt_mask >> 32);
+#else
+ __insn_mtspr(SPR_INTERRUPT_MASK_0, t->interrupt_mask);
+#endif
+ __insn_mtspr(SPR_EX_CONTEXT_0_0, t->ex_context[0]);
+ __insn_mtspr(SPR_EX_CONTEXT_0_1, t->ex_context[1]);
+ __insn_mtspr(SPR_SYSTEM_SAVE_0_0, t->system_save[0]);
+ __insn_mtspr(SPR_SYSTEM_SAVE_0_1, t->system_save[1]);
+ __insn_mtspr(SPR_SYSTEM_SAVE_0_2, t->system_save[2]);
+ __insn_mtspr(SPR_SYSTEM_SAVE_0_3, t->system_save[3]);
+ __insn_mtspr(SPR_INTCTRL_0_STATUS, t->intctrl_0);
+#if CHIP_HAS_PROC_STATUS_SPR()
+ __insn_mtspr(SPR_PROC_STATUS, t->proc_status);
+#endif
+#if !CHIP_HAS_FIXED_INTVEC_BASE()
+ __insn_mtspr(SPR_INTERRUPT_VECTOR_BASE_0, t->interrupt_vector_base);
+#endif
+#if CHIP_HAS_TILE_RTF_HWM()
+ __insn_mtspr(SPR_TILE_RTF_HWM, t->tile_rtf_hwm);
+#endif
+#if CHIP_HAS_DSTREAM_PF()
+ __insn_mtspr(SPR_DSTREAM_PF, t->dstream_pf);
+#endif
+}
+
+
+void _prepare_arch_switch(struct task_struct *next)
+{
+#if CHIP_HAS_SN_PROC()
+ int snctl;
+#endif
+#if CHIP_HAS_TILE_DMA()
+ struct tile_dma_state *dma = &current->thread.tile_dma_state;
+ if (dma->enabled)
+ save_tile_dma_state(dma);
+#endif
+#if CHIP_HAS_SN_PROC()
+ /*
+ * Suspend the static network processor if it was running.
+ * We do not suspend the fabric itself, just like we don't
+ * try to suspend the UDN.
+ */
+ snctl = __insn_mfspr(SPR_SNCTL);
+ current->thread.sn_proc_running =
+ (snctl & SPR_SNCTL__FRZPROC_MASK) == 0;
+ if (current->thread.sn_proc_running)
+ __insn_mtspr(SPR_SNCTL, snctl | SPR_SNCTL__FRZPROC_MASK);
+#endif
+}
+
+
+struct task_struct *__sched _switch_to(struct task_struct *prev,
+ struct task_struct *next)
+{
+ /* DMA state is already saved; save off other arch state. */
+ save_arch_state(&prev->thread);
+
+#if CHIP_HAS_TILE_DMA()
+ /*
+ * Restore DMA in new task if desired.
+ * Note that it is only safe to restart here since interrupts
+ * are disabled, so we can't take any DMATLB miss or access
+ * interrupts before we have finished switching stacks.
+ */
+ if (next->thread.tile_dma_state.enabled) {
+ restore_tile_dma_state(&next->thread);
+ grant_dma_mpls();
+ } else {
+ restrict_dma_mpls();
+ }
+#endif
+
+ /* Restore other arch state. */
+ restore_arch_state(&next->thread);
+
+#if CHIP_HAS_SN_PROC()
+ /*
+ * Restart static network processor in the new process
+ * if it was running before.
+ */
+ if (next->thread.sn_proc_running) {
+ int snctl = __insn_mfspr(SPR_SNCTL);
+ __insn_mtspr(SPR_SNCTL, snctl & ~SPR_SNCTL__FRZPROC_MASK);
+ }
+#endif
+
+#ifdef CONFIG_HARDWALL
+ /* Enable or disable access to the network registers appropriately. */
+ if (prev->thread.hardwall != NULL) {
+ if (next->thread.hardwall == NULL)
+ restrict_network_mpls();
+ } else if (next->thread.hardwall != NULL) {
+ grant_network_mpls();
+ }
+#endif
+
+ /*
+ * Switch kernel SP, PC, and callee-saved registers.
+ * In the context of the new task, return the old task pointer
+ * (i.e. the task that actually called __switch_to).
+ * Pass the value to use for SYSTEM_SAVE_K_0 when we reset our sp.
+ */
+ return __switch_to(prev, next, next_current_ksp0(next));
+}
+
+/*
+ * This routine is called on return from interrupt if any of the
+ * TIF_WORK_MASK flags are set in thread_info->flags. It is
+ * entered with interrupts disabled so we don't miss an event
+ * that modified the thread_info flags. If any flag is set, we
+ * handle it and return, and the calling assembly code will
+ * re-disable interrupts, reload the thread flags, and call back
+ * if more flags need to be handled.
+ *
+ * We return whether we need to check the thread_info flags again
+ * or not. Note that we don't clear TIF_SINGLESTEP here, so it's
+ * important that it be tested last, and then claim that we don't
+ * need to recheck the flags.
+ */
+int do_work_pending(struct pt_regs *regs, u32 thread_info_flags)
+{
+ /* If we enter in kernel mode, do nothing and exit the caller loop. */
+ if (!user_mode(regs))
+ return 0;
+
+ if (thread_info_flags & _TIF_NEED_RESCHED) {
+ schedule();
+ return 1;
+ }
+#if CHIP_HAS_TILE_DMA() || CHIP_HAS_SN_PROC()
+ if (thread_info_flags & _TIF_ASYNC_TLB) {
+ do_async_page_fault(regs);
+ return 1;
+ }
+#endif
+ if (thread_info_flags & _TIF_SIGPENDING) {
+ do_signal(regs);
+ return 1;
+ }
+ if (thread_info_flags & _TIF_NOTIFY_RESUME) {
+ clear_thread_flag(TIF_NOTIFY_RESUME);
+ tracehook_notify_resume(regs);
+ if (current->replacement_session_keyring)
+ key_replace_session_keyring();
+ return 1;
+ }
+ if (thread_info_flags & _TIF_SINGLESTEP) {
+ single_step_once(regs);
+ return 0;
+ }
+ panic("work_pending: bad flags %#x\n", thread_info_flags);
+}
+
+/* Note there is an implicit fifth argument if (clone_flags & CLONE_SETTLS). */
+SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp,
+ void __user *, parent_tidptr, void __user *, child_tidptr,
+ struct pt_regs *, regs)
+{
+ if (!newsp)
+ newsp = regs->sp;
+ return do_fork(clone_flags, newsp, regs, 0,
+ parent_tidptr, child_tidptr);
+}
+
+/*
+ * sys_execve() executes a new program.
+ */
+SYSCALL_DEFINE4(execve, const char __user *, path,
+ const char __user *const __user *, argv,
+ const char __user *const __user *, envp,
+ struct pt_regs *, regs)
+{
+ long error;
+ char *filename;
+
+ filename = getname(path);
+ error = PTR_ERR(filename);
+ if (IS_ERR(filename))
+ goto out;
+ error = do_execve(filename, argv, envp, regs);
+ putname(filename);
+ if (error == 0)
+ single_step_execve();
+out:
+ return error;
+}
+
+#ifdef CONFIG_COMPAT
+long compat_sys_execve(const char __user *path,
+ compat_uptr_t __user *argv,
+ compat_uptr_t __user *envp,
+ struct pt_regs *regs)
+{
+ long error;
+ char *filename;
+
+ filename = getname(path);
+ error = PTR_ERR(filename);
+ if (IS_ERR(filename))
+ goto out;
+ error = compat_do_execve(filename, argv, envp, regs);
+ putname(filename);
+ if (error == 0)
+ single_step_execve();
+out:
+ return error;
+}
+#endif
+
+unsigned long get_wchan(struct task_struct *p)
+{
+ struct KBacktraceIterator kbt;
+
+ if (!p || p == current || p->state == TASK_RUNNING)
+ return 0;
+
+ for (KBacktraceIterator_init(&kbt, p, NULL);
+ !KBacktraceIterator_end(&kbt);
+ KBacktraceIterator_next(&kbt)) {
+ if (!in_sched_functions(kbt.it.pc))
+ return kbt.it.pc;
+ }
+
+ return 0;
+}
+
+/*
+ * We pass in lr as zero (cleared in kernel_thread) and the caller
+ * part of the backtrace ABI on the stack also zeroed (in copy_thread)
+ * so that backtraces will stop with this function.
+ * Note that we don't use r0, since copy_thread() clears it.
+ */
+static void start_kernel_thread(int dummy, int (*fn)(int), int arg)
+{
+ do_exit(fn(arg));
+}
+
+/*
+ * Create a kernel thread
+ */
+int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
+{
+ struct pt_regs regs;
+
+ memset(&regs, 0, sizeof(regs));
+ regs.ex1 = PL_ICS_EX1(KERNEL_PL, 0); /* run at kernel PL, no ICS */
+ regs.pc = (long) start_kernel_thread;
+ regs.flags = PT_FLAGS_CALLER_SAVES; /* need to restore r1 and r2 */
+ regs.regs[1] = (long) fn; /* function pointer */
+ regs.regs[2] = (long) arg; /* parameter register */
+
+ /* Ok, create the new process.. */
+ return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, &regs,
+ 0, NULL, NULL);
+}
+EXPORT_SYMBOL(kernel_thread);
+
+/* Flush thread state. */
+void flush_thread(void)
+{
+ /* Nothing */
+}
+
+/*
+ * Free current thread data structures etc..
+ */
+void exit_thread(void)
+{
+ /* Nothing */
+}
+
+void show_regs(struct pt_regs *regs)
+{
+ struct task_struct *tsk = validate_current();
+ int i;
+
+ pr_err("\n");
+ pr_err(" Pid: %d, comm: %20s, CPU: %d\n",
+ tsk->pid, tsk->comm, smp_processor_id());
+#ifdef __tilegx__
+ for (i = 0; i < 51; i += 3)
+ pr_err(" r%-2d: "REGFMT" r%-2d: "REGFMT" r%-2d: "REGFMT"\n",
+ i, regs->regs[i], i+1, regs->regs[i+1],
+ i+2, regs->regs[i+2]);
+ pr_err(" r51: "REGFMT" r52: "REGFMT" tp : "REGFMT"\n",
+ regs->regs[51], regs->regs[52], regs->tp);
+ pr_err(" sp : "REGFMT" lr : "REGFMT"\n", regs->sp, regs->lr);
+#else
+ for (i = 0; i < 52; i += 4)
+ pr_err(" r%-2d: "REGFMT" r%-2d: "REGFMT
+ " r%-2d: "REGFMT" r%-2d: "REGFMT"\n",
+ i, regs->regs[i], i+1, regs->regs[i+1],
+ i+2, regs->regs[i+2], i+3, regs->regs[i+3]);
+ pr_err(" r52: "REGFMT" tp : "REGFMT" sp : "REGFMT" lr : "REGFMT"\n",
+ regs->regs[52], regs->tp, regs->sp, regs->lr);
+#endif
+ pr_err(" pc : "REGFMT" ex1: %ld faultnum: %ld\n",
+ regs->pc, regs->ex1, regs->faultnum);
+
+ dump_stack_regs(regs);
+}
diff --git a/arch/tile/kernel/ptrace.c b/arch/tile/kernel/ptrace.c
new file mode 100644
index 00000000..e92e4052
--- /dev/null
+++ b/arch/tile/kernel/ptrace.c
@@ -0,0 +1,205 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ *
+ * Copied from i386: Ross Biro 1/23/92
+ */
+
+#include <linux/kernel.h>
+#include <linux/ptrace.h>
+#include <linux/kprobes.h>
+#include <linux/compat.h>
+#include <linux/uaccess.h>
+#include <asm/traps.h>
+
+void user_enable_single_step(struct task_struct *child)
+{
+ set_tsk_thread_flag(child, TIF_SINGLESTEP);
+}
+
+void user_disable_single_step(struct task_struct *child)
+{
+ clear_tsk_thread_flag(child, TIF_SINGLESTEP);
+}
+
+/*
+ * Called by kernel/ptrace.c when detaching..
+ */
+void ptrace_disable(struct task_struct *child)
+{
+ clear_tsk_thread_flag(child, TIF_SINGLESTEP);
+
+ /*
+ * These two are currently unused, but will be set by arch_ptrace()
+ * and used in the syscall assembly when we do support them.
+ */
+ clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
+}
+
+long arch_ptrace(struct task_struct *child, long request,
+ unsigned long addr, unsigned long data)
+{
+ unsigned long __user *datap = (long __user __force *)data;
+ unsigned long tmp;
+ long ret = -EIO;
+ char *childreg;
+ struct pt_regs copyregs;
+ int ex1_offset;
+
+ switch (request) {
+
+ case PTRACE_PEEKUSR: /* Read register from pt_regs. */
+ if (addr >= PTREGS_SIZE)
+ break;
+ childreg = (char *)task_pt_regs(child) + addr;
+#ifdef CONFIG_COMPAT
+ if (is_compat_task()) {
+ if (addr & (sizeof(compat_long_t)-1))
+ break;
+ ret = put_user(*(compat_long_t *)childreg,
+ (compat_long_t __user *)datap);
+ } else
+#endif
+ {
+ if (addr & (sizeof(long)-1))
+ break;
+ ret = put_user(*(long *)childreg, datap);
+ }
+ break;
+
+ case PTRACE_POKEUSR: /* Write register in pt_regs. */
+ if (addr >= PTREGS_SIZE)
+ break;
+ childreg = (char *)task_pt_regs(child) + addr;
+
+ /* Guard against overwrites of the privilege level. */
+ ex1_offset = PTREGS_OFFSET_EX1;
+#if defined(CONFIG_COMPAT) && defined(__BIG_ENDIAN)
+ if (is_compat_task()) /* point at low word */
+ ex1_offset += sizeof(compat_long_t);
+#endif
+ if (addr == ex1_offset)
+ data = PL_ICS_EX1(USER_PL, EX1_ICS(data));
+
+#ifdef CONFIG_COMPAT
+ if (is_compat_task()) {
+ if (addr & (sizeof(compat_long_t)-1))
+ break;
+ *(compat_long_t *)childreg = data;
+ } else
+#endif
+ {
+ if (addr & (sizeof(long)-1))
+ break;
+ *(long *)childreg = data;
+ }
+ ret = 0;
+ break;
+
+ case PTRACE_GETREGS: /* Get all registers from the child. */
+ if (copy_to_user(datap, task_pt_regs(child),
+ sizeof(struct pt_regs)) == 0) {
+ ret = 0;
+ }
+ break;
+
+ case PTRACE_SETREGS: /* Set all registers in the child. */
+ if (copy_from_user(&copyregs, datap,
+ sizeof(struct pt_regs)) == 0) {
+ copyregs.ex1 =
+ PL_ICS_EX1(USER_PL, EX1_ICS(copyregs.ex1));
+ *task_pt_regs(child) = copyregs;
+ ret = 0;
+ }
+ break;
+
+ case PTRACE_GETFPREGS: /* Get the child FPU state. */
+ case PTRACE_SETFPREGS: /* Set the child FPU state. */
+ break;
+
+ case PTRACE_SETOPTIONS:
+ /* Support TILE-specific ptrace options. */
+ child->ptrace &= ~PT_TRACE_MASK_TILE;
+ tmp = data & PTRACE_O_MASK_TILE;
+ data &= ~PTRACE_O_MASK_TILE;
+ ret = ptrace_request(child, request, addr, data);
+ if (tmp & PTRACE_O_TRACEMIGRATE)
+ child->ptrace |= PT_TRACE_MIGRATE;
+ break;
+
+ default:
+#ifdef CONFIG_COMPAT
+ if (task_thread_info(current)->status & TS_COMPAT) {
+ ret = compat_ptrace_request(child, request,
+ addr, data);
+ break;
+ }
+#endif
+ ret = ptrace_request(child, request, addr, data);
+ break;
+ }
+
+ return ret;
+}
+
+#ifdef CONFIG_COMPAT
+/* Not used; we handle compat issues in arch_ptrace() directly. */
+long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
+ compat_ulong_t addr, compat_ulong_t data)
+{
+ BUG();
+}
+#endif
+
+void do_syscall_trace(void)
+{
+ if (!test_thread_flag(TIF_SYSCALL_TRACE))
+ return;
+
+ if (!(current->ptrace & PT_PTRACED))
+ return;
+
+ /*
+ * The 0x80 provides a way for the tracing parent to distinguish
+ * between a syscall stop and SIGTRAP delivery
+ */
+ ptrace_notify(SIGTRAP|((current->ptrace & PT_TRACESYSGOOD) ? 0x80 : 0));
+
+ /*
+ * this isn't the same as continuing with a signal, but it will do
+ * for normal use. strace only continues with a signal if the
+ * stopping signal is not SIGTRAP. -brl
+ */
+ if (current->exit_code) {
+ send_sig(current->exit_code, current, 1);
+ current->exit_code = 0;
+ }
+}
+
+void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code)
+{
+ struct siginfo info;
+
+ memset(&info, 0, sizeof(info));
+ info.si_signo = SIGTRAP;
+ info.si_code = TRAP_BRKPT;
+ info.si_addr = (void __user *) regs->pc;
+
+ /* Send us the fakey SIGTRAP */
+ force_sig_info(SIGTRAP, &info, tsk);
+}
+
+/* Handle synthetic interrupt delivered only by the simulator. */
+void __kprobes do_breakpoint(struct pt_regs* regs, int fault_num)
+{
+ send_sigtrap(current, regs, fault_num);
+}
diff --git a/arch/tile/kernel/reboot.c b/arch/tile/kernel/reboot.c
new file mode 100644
index 00000000..baa3d905
--- /dev/null
+++ b/arch/tile/kernel/reboot.c
@@ -0,0 +1,51 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/stddef.h>
+#include <linux/reboot.h>
+#include <linux/smp.h>
+#include <linux/pm.h>
+#include <asm/page.h>
+#include <asm/setup.h>
+#include <hv/hypervisor.h>
+
+#ifndef CONFIG_SMP
+#define smp_send_stop()
+#endif
+
+void machine_halt(void)
+{
+ warn_early_printk();
+ arch_local_irq_disable_all();
+ smp_send_stop();
+ hv_halt();
+}
+
+void machine_power_off(void)
+{
+ warn_early_printk();
+ arch_local_irq_disable_all();
+ smp_send_stop();
+ hv_power_off();
+}
+
+void machine_restart(char *cmd)
+{
+ arch_local_irq_disable_all();
+ smp_send_stop();
+ hv_restart((HV_VirtAddr) "vmlinux", (HV_VirtAddr) cmd);
+}
+
+/* No interesting distinction to be made here. */
+void (*pm_power_off)(void) = NULL;
diff --git a/arch/tile/kernel/regs_32.S b/arch/tile/kernel/regs_32.S
new file mode 100644
index 00000000..c12280c2
--- /dev/null
+++ b/arch/tile/kernel/regs_32.S
@@ -0,0 +1,145 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/linkage.h>
+#include <asm/ptrace.h>
+#include <asm/asm-offsets.h>
+#include <arch/spr_def.h>
+#include <asm/processor.h>
+#include <asm/switch_to.h>
+
+/*
+ * See <asm/system.h>; called with prev and next task_struct pointers.
+ * "prev" is returned in r0 for _switch_to and also for ret_from_fork.
+ *
+ * We want to save pc/sp in "prev", and get the new pc/sp from "next".
+ * We also need to save all the callee-saved registers on the stack.
+ *
+ * Intel enables/disables access to the hardware cycle counter in
+ * seccomp (secure computing) environments if necessary, based on
+ * has_secure_computing(). We might want to do this at some point,
+ * though it would require virtualizing the other SPRs under WORLD_ACCESS.
+ *
+ * Since we're saving to the stack, we omit sp from this list.
+ * And for parallels with other architectures, we save lr separately,
+ * in the thread_struct itself (as the "pc" field).
+ *
+ * This code also needs to be aligned with process.c copy_thread()
+ */
+
+#if CALLEE_SAVED_REGS_COUNT != 24
+# error Mismatch between <asm/system.h> and kernel/entry.S
+#endif
+#define FRAME_SIZE ((2 + CALLEE_SAVED_REGS_COUNT) * 4)
+
+#define SAVE_REG(r) { sw r12, r; addi r12, r12, 4 }
+#define LOAD_REG(r) { lw r, r12; addi r12, r12, 4 }
+#define FOR_EACH_CALLEE_SAVED_REG(f) \
+ f(r30); f(r31); \
+ f(r32); f(r33); f(r34); f(r35); f(r36); f(r37); f(r38); f(r39); \
+ f(r40); f(r41); f(r42); f(r43); f(r44); f(r45); f(r46); f(r47); \
+ f(r48); f(r49); f(r50); f(r51); f(r52);
+
+STD_ENTRY_SECTION(__switch_to, .sched.text)
+ {
+ move r10, sp
+ sw sp, lr
+ addi sp, sp, -FRAME_SIZE
+ }
+ {
+ addi r11, sp, 4
+ addi r12, sp, 8
+ }
+ {
+ sw r11, r10
+ addli r4, r1, TASK_STRUCT_THREAD_KSP_OFFSET
+ }
+ {
+ lw r13, r4 /* Load new sp to a temp register early. */
+ addli r3, r0, TASK_STRUCT_THREAD_KSP_OFFSET
+ }
+ FOR_EACH_CALLEE_SAVED_REG(SAVE_REG)
+ {
+ sw r3, sp
+ addli r3, r0, TASK_STRUCT_THREAD_PC_OFFSET
+ }
+ {
+ sw r3, lr
+ addli r4, r1, TASK_STRUCT_THREAD_PC_OFFSET
+ }
+ {
+ lw lr, r4
+ addi r12, r13, 8
+ }
+ {
+ /* Update sp and ksp0 simultaneously to avoid backtracer warnings. */
+ move sp, r13
+ mtspr SPR_SYSTEM_SAVE_K_0, r2
+ }
+ FOR_EACH_CALLEE_SAVED_REG(LOAD_REG)
+.L__switch_to_pc:
+ {
+ addi sp, sp, FRAME_SIZE
+ jrp lr /* r0 is still valid here, so return it */
+ }
+ STD_ENDPROC(__switch_to)
+
+/* Return a suitable address for the backtracer for suspended threads */
+STD_ENTRY_SECTION(get_switch_to_pc, .sched.text)
+ lnk r0
+ {
+ addli r0, r0, .L__switch_to_pc - .
+ jrp lr
+ }
+ STD_ENDPROC(get_switch_to_pc)
+
+STD_ENTRY(get_pt_regs)
+ .irp reg, r0, r1, r2, r3, r4, r5, r6, r7, \
+ r8, r9, r10, r11, r12, r13, r14, r15, \
+ r16, r17, r18, r19, r20, r21, r22, r23, \
+ r24, r25, r26, r27, r28, r29, r30, r31, \
+ r32, r33, r34, r35, r36, r37, r38, r39, \
+ r40, r41, r42, r43, r44, r45, r46, r47, \
+ r48, r49, r50, r51, r52, tp, sp
+ {
+ sw r0, \reg
+ addi r0, r0, 4
+ }
+ .endr
+ {
+ sw r0, lr
+ addi r0, r0, PTREGS_OFFSET_PC - PTREGS_OFFSET_LR
+ }
+ lnk r1
+ {
+ sw r0, r1
+ addi r0, r0, PTREGS_OFFSET_EX1 - PTREGS_OFFSET_PC
+ }
+ mfspr r1, INTERRUPT_CRITICAL_SECTION
+ shli r1, r1, SPR_EX_CONTEXT_1_1__ICS_SHIFT
+ ori r1, r1, KERNEL_PL
+ {
+ sw r0, r1
+ addi r0, r0, PTREGS_OFFSET_FAULTNUM - PTREGS_OFFSET_EX1
+ }
+ {
+ sw r0, zero /* clear faultnum */
+ addi r0, r0, PTREGS_OFFSET_ORIG_R0 - PTREGS_OFFSET_FAULTNUM
+ }
+ {
+ sw r0, zero /* clear orig_r0 */
+ addli r0, r0, -PTREGS_OFFSET_ORIG_R0 /* restore r0 to base */
+ }
+ jrp lr
+ STD_ENDPROC(get_pt_regs)
diff --git a/arch/tile/kernel/regs_64.S b/arch/tile/kernel/regs_64.S
new file mode 100644
index 00000000..0829fd01
--- /dev/null
+++ b/arch/tile/kernel/regs_64.S
@@ -0,0 +1,145 @@
+/*
+ * Copyright 2011 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/linkage.h>
+#include <asm/ptrace.h>
+#include <asm/asm-offsets.h>
+#include <arch/spr_def.h>
+#include <asm/processor.h>
+#include <asm/switch_to.h>
+
+/*
+ * See <asm/system.h>; called with prev and next task_struct pointers.
+ * "prev" is returned in r0 for _switch_to and also for ret_from_fork.
+ *
+ * We want to save pc/sp in "prev", and get the new pc/sp from "next".
+ * We also need to save all the callee-saved registers on the stack.
+ *
+ * Intel enables/disables access to the hardware cycle counter in
+ * seccomp (secure computing) environments if necessary, based on
+ * has_secure_computing(). We might want to do this at some point,
+ * though it would require virtualizing the other SPRs under WORLD_ACCESS.
+ *
+ * Since we're saving to the stack, we omit sp from this list.
+ * And for parallels with other architectures, we save lr separately,
+ * in the thread_struct itself (as the "pc" field).
+ *
+ * This code also needs to be aligned with process.c copy_thread()
+ */
+
+#if CALLEE_SAVED_REGS_COUNT != 24
+# error Mismatch between <asm/system.h> and kernel/entry.S
+#endif
+#define FRAME_SIZE ((2 + CALLEE_SAVED_REGS_COUNT) * 8)
+
+#define SAVE_REG(r) { st r12, r; addi r12, r12, 8 }
+#define LOAD_REG(r) { ld r, r12; addi r12, r12, 8 }
+#define FOR_EACH_CALLEE_SAVED_REG(f) \
+ f(r30); f(r31); \
+ f(r32); f(r33); f(r34); f(r35); f(r36); f(r37); f(r38); f(r39); \
+ f(r40); f(r41); f(r42); f(r43); f(r44); f(r45); f(r46); f(r47); \
+ f(r48); f(r49); f(r50); f(r51); f(r52);
+
+STD_ENTRY_SECTION(__switch_to, .sched.text)
+ {
+ move r10, sp
+ st sp, lr
+ }
+ {
+ addli r11, sp, -FRAME_SIZE + 8
+ addli sp, sp, -FRAME_SIZE
+ }
+ {
+ st r11, r10
+ addli r4, r1, TASK_STRUCT_THREAD_KSP_OFFSET
+ }
+ {
+ ld r13, r4 /* Load new sp to a temp register early. */
+ addi r12, sp, 16
+ }
+ FOR_EACH_CALLEE_SAVED_REG(SAVE_REG)
+ addli r3, r0, TASK_STRUCT_THREAD_KSP_OFFSET
+ {
+ st r3, sp
+ addli r3, r0, TASK_STRUCT_THREAD_PC_OFFSET
+ }
+ {
+ st r3, lr
+ addli r4, r1, TASK_STRUCT_THREAD_PC_OFFSET
+ }
+ {
+ ld lr, r4
+ addi r12, r13, 16
+ }
+ {
+ /* Update sp and ksp0 simultaneously to avoid backtracer warnings. */
+ move sp, r13
+ mtspr SPR_SYSTEM_SAVE_K_0, r2
+ }
+ FOR_EACH_CALLEE_SAVED_REG(LOAD_REG)
+.L__switch_to_pc:
+ {
+ addli sp, sp, FRAME_SIZE
+ jrp lr /* r0 is still valid here, so return it */
+ }
+ STD_ENDPROC(__switch_to)
+
+/* Return a suitable address for the backtracer for suspended threads */
+STD_ENTRY_SECTION(get_switch_to_pc, .sched.text)
+ lnk r0
+ {
+ addli r0, r0, .L__switch_to_pc - .
+ jrp lr
+ }
+ STD_ENDPROC(get_switch_to_pc)
+
+STD_ENTRY(get_pt_regs)
+ .irp reg, r0, r1, r2, r3, r4, r5, r6, r7, \
+ r8, r9, r10, r11, r12, r13, r14, r15, \
+ r16, r17, r18, r19, r20, r21, r22, r23, \
+ r24, r25, r26, r27, r28, r29, r30, r31, \
+ r32, r33, r34, r35, r36, r37, r38, r39, \
+ r40, r41, r42, r43, r44, r45, r46, r47, \
+ r48, r49, r50, r51, r52, tp, sp
+ {
+ st r0, \reg
+ addi r0, r0, 8
+ }
+ .endr
+ {
+ st r0, lr
+ addi r0, r0, PTREGS_OFFSET_PC - PTREGS_OFFSET_LR
+ }
+ lnk r1
+ {
+ st r0, r1
+ addi r0, r0, PTREGS_OFFSET_EX1 - PTREGS_OFFSET_PC
+ }
+ mfspr r1, INTERRUPT_CRITICAL_SECTION
+ shli r1, r1, SPR_EX_CONTEXT_1_1__ICS_SHIFT
+ ori r1, r1, KERNEL_PL
+ {
+ st r0, r1
+ addi r0, r0, PTREGS_OFFSET_FAULTNUM - PTREGS_OFFSET_EX1
+ }
+ {
+ st r0, zero /* clear faultnum */
+ addi r0, r0, PTREGS_OFFSET_ORIG_R0 - PTREGS_OFFSET_FAULTNUM
+ }
+ {
+ st r0, zero /* clear orig_r0 */
+ addli r0, r0, -PTREGS_OFFSET_ORIG_R0 /* restore r0 to base */
+ }
+ jrp lr
+ STD_ENDPROC(get_pt_regs)
diff --git a/arch/tile/kernel/relocate_kernel.S b/arch/tile/kernel/relocate_kernel.S
new file mode 100644
index 00000000..010b4185
--- /dev/null
+++ b/arch/tile/kernel/relocate_kernel.S
@@ -0,0 +1,280 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ *
+ * copy new kernel into place and then call hv_reexec
+ *
+ */
+
+#include <linux/linkage.h>
+#include <arch/chip.h>
+#include <asm/page.h>
+#include <hv/hypervisor.h>
+
+#define ___hvb MEM_SV_INTRPT + HV_GLUE_START_CPA
+
+#define ___hv_dispatch(f) (___hvb + (HV_DISPATCH_ENTRY_SIZE * f))
+
+#define ___hv_console_putc ___hv_dispatch(HV_DISPATCH_CONSOLE_PUTC)
+#define ___hv_halt ___hv_dispatch(HV_DISPATCH_HALT)
+#define ___hv_reexec ___hv_dispatch(HV_DISPATCH_REEXEC)
+#define ___hv_flush_remote ___hv_dispatch(HV_DISPATCH_FLUSH_REMOTE)
+
+#undef RELOCATE_NEW_KERNEL_VERBOSE
+
+STD_ENTRY(relocate_new_kernel)
+
+ move r30, r0 /* page list */
+ move r31, r1 /* address of page we are on */
+ move r32, r2 /* start address of new kernel */
+
+ shri r1, r1, PAGE_SHIFT
+ addi r1, r1, 1
+ shli sp, r1, PAGE_SHIFT
+ addi sp, sp, -8
+ /* we now have a stack (whether we need one or not) */
+
+ moveli r40, lo16(___hv_console_putc)
+ auli r40, r40, ha16(___hv_console_putc)
+
+#ifdef RELOCATE_NEW_KERNEL_VERBOSE
+ moveli r0, 'r'
+ jalr r40
+
+ moveli r0, '_'
+ jalr r40
+
+ moveli r0, 'n'
+ jalr r40
+
+ moveli r0, '_'
+ jalr r40
+
+ moveli r0, 'k'
+ jalr r40
+
+ moveli r0, '\n'
+ jalr r40
+#endif
+
+ /*
+ * Throughout this code r30 is pointer to the element of page
+ * list we are working on.
+ *
+ * Normally we get to the next element of the page list by
+ * incrementing r30 by four. The exception is if the element
+ * on the page list is an IND_INDIRECTION in which case we use
+ * the element with the low bits masked off as the new value
+ * of r30.
+ *
+ * To get this started, we need the value passed to us (which
+ * will always be an IND_INDIRECTION) in memory somewhere with
+ * r30 pointing at it. To do that, we push the value passed
+ * to us on the stack and make r30 point to it.
+ */
+
+ sw sp, r30
+ move r30, sp
+ addi sp, sp, -8
+
+#if CHIP_HAS_CBOX_HOME_MAP()
+ /*
+ * On TILEPro, we need to flush all tiles' caches, since we may
+ * have been doing hash-for-home caching there. Note that we
+ * must do this _after_ we're completely done modifying any memory
+ * other than our output buffer (which we know is locally cached).
+ * We want the caches to be fully clean when we do the reexec,
+ * because the hypervisor is going to do this flush again at that
+ * point, and we don't want that second flush to overwrite any memory.
+ */
+ {
+ move r0, zero /* cache_pa */
+ move r1, zero
+ }
+ {
+ auli r2, zero, ha16(HV_FLUSH_EVICT_L2) /* cache_control */
+ movei r3, -1 /* cache_cpumask; -1 means all client tiles */
+ }
+ {
+ move r4, zero /* tlb_va */
+ move r5, zero /* tlb_length */
+ }
+ {
+ move r6, zero /* tlb_pgsize */
+ move r7, zero /* tlb_cpumask */
+ }
+ {
+ move r8, zero /* asids */
+ moveli r20, lo16(___hv_flush_remote)
+ }
+ {
+ move r9, zero /* asidcount */
+ auli r20, r20, ha16(___hv_flush_remote)
+ }
+
+ jalr r20
+#endif
+
+ /* r33 is destination pointer, default to zero */
+
+ moveli r33, 0
+
+.Lloop: lw r10, r30
+
+ andi r9, r10, 0xf /* low 4 bits tell us what type it is */
+ xor r10, r10, r9 /* r10 is now value with low 4 bits stripped */
+
+ seqi r0, r9, 0x1 /* IND_DESTINATION */
+ bzt r0, .Ltry2
+
+ move r33, r10
+
+#ifdef RELOCATE_NEW_KERNEL_VERBOSE
+ moveli r0, 'd'
+ jalr r40
+#endif
+
+ addi r30, r30, 4
+ j .Lloop
+
+.Ltry2:
+ seqi r0, r9, 0x2 /* IND_INDIRECTION */
+ bzt r0, .Ltry4
+
+ move r30, r10
+
+#ifdef RELOCATE_NEW_KERNEL_VERBOSE
+ moveli r0, 'i'
+ jalr r40
+#endif
+
+ j .Lloop
+
+.Ltry4:
+ seqi r0, r9, 0x4 /* IND_DONE */
+ bzt r0, .Ltry8
+
+ mf
+
+#ifdef RELOCATE_NEW_KERNEL_VERBOSE
+ moveli r0, 'D'
+ jalr r40
+ moveli r0, '\n'
+ jalr r40
+#endif
+
+ move r0, r32
+ moveli r1, 0 /* arg to hv_reexec is 64 bits */
+
+ moveli r41, lo16(___hv_reexec)
+ auli r41, r41, ha16(___hv_reexec)
+
+ jalr r41
+
+ /* we should not get here */
+
+ moveli r0, '?'
+ jalr r40
+ moveli r0, '\n'
+ jalr r40
+
+ j .Lhalt
+
+.Ltry8: seqi r0, r9, 0x8 /* IND_SOURCE */
+ bz r0, .Lerr /* unknown type */
+
+ /* copy page at r10 to page at r33 */
+
+ move r11, r33
+
+ moveli r0, lo16(PAGE_SIZE)
+ auli r0, r0, ha16(PAGE_SIZE)
+ add r33, r33, r0
+
+ /* copy word at r10 to word at r11 until r11 equals r33 */
+
+ /* We know page size must be multiple of 16, so we can unroll
+ * 16 times safely without any edge case checking.
+ *
+ * Issue a flush of the destination every 16 words to avoid
+ * incoherence when starting the new kernel. (Now this is
+ * just good paranoia because the hv_reexec call will also
+ * take care of this.)
+ */
+
+1:
+ { lw r0, r10; addi r10, r10, 4 }
+ { sw r11, r0; addi r11, r11, 4 }
+ { lw r0, r10; addi r10, r10, 4 }
+ { sw r11, r0; addi r11, r11, 4 }
+ { lw r0, r10; addi r10, r10, 4 }
+ { sw r11, r0; addi r11, r11, 4 }
+ { lw r0, r10; addi r10, r10, 4 }
+ { sw r11, r0; addi r11, r11, 4 }
+ { lw r0, r10; addi r10, r10, 4 }
+ { sw r11, r0; addi r11, r11, 4 }
+ { lw r0, r10; addi r10, r10, 4 }
+ { sw r11, r0; addi r11, r11, 4 }
+ { lw r0, r10; addi r10, r10, 4 }
+ { sw r11, r0; addi r11, r11, 4 }
+ { lw r0, r10; addi r10, r10, 4 }
+ { sw r11, r0; addi r11, r11, 4 }
+ { lw r0, r10; addi r10, r10, 4 }
+ { sw r11, r0; addi r11, r11, 4 }
+ { lw r0, r10; addi r10, r10, 4 }
+ { sw r11, r0; addi r11, r11, 4 }
+ { lw r0, r10; addi r10, r10, 4 }
+ { sw r11, r0; addi r11, r11, 4 }
+ { lw r0, r10; addi r10, r10, 4 }
+ { sw r11, r0; addi r11, r11, 4 }
+ { lw r0, r10; addi r10, r10, 4 }
+ { sw r11, r0; addi r11, r11, 4 }
+ { lw r0, r10; addi r10, r10, 4 }
+ { sw r11, r0; addi r11, r11, 4 }
+ { lw r0, r10; addi r10, r10, 4 }
+ { sw r11, r0; addi r11, r11, 4 }
+ { lw r0, r10; addi r10, r10, 4 }
+ { sw r11, r0 }
+ { flush r11 ; addi r11, r11, 4 }
+
+ seq r0, r33, r11
+ bzt r0, 1b
+
+#ifdef RELOCATE_NEW_KERNEL_VERBOSE
+ moveli r0, 's'
+ jalr r40
+#endif
+
+ addi r30, r30, 4
+ j .Lloop
+
+
+.Lerr: moveli r0, 'e'
+ jalr r40
+ moveli r0, 'r'
+ jalr r40
+ moveli r0, 'r'
+ jalr r40
+ moveli r0, '\n'
+ jalr r40
+.Lhalt:
+ moveli r41, lo16(___hv_halt)
+ auli r41, r41, ha16(___hv_halt)
+
+ jalr r41
+ STD_ENDPROC(relocate_new_kernel)
+
+ .section .rodata,"a"
+
+ .globl relocate_new_kernel_size
+relocate_new_kernel_size:
+ .long .Lend_relocate_new_kernel - relocate_new_kernel
diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c
new file mode 100644
index 00000000..bff23f47
--- /dev/null
+++ b/arch/tile/kernel/setup.c
@@ -0,0 +1,1532 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mmzone.h>
+#include <linux/bootmem.h>
+#include <linux/module.h>
+#include <linux/node.h>
+#include <linux/cpu.h>
+#include <linux/ioport.h>
+#include <linux/irq.h>
+#include <linux/kexec.h>
+#include <linux/pci.h>
+#include <linux/initrd.h>
+#include <linux/io.h>
+#include <linux/highmem.h>
+#include <linux/smp.h>
+#include <linux/timex.h>
+#include <asm/setup.h>
+#include <asm/sections.h>
+#include <asm/cacheflush.h>
+#include <asm/pgalloc.h>
+#include <asm/mmu_context.h>
+#include <hv/hypervisor.h>
+#include <arch/interrupts.h>
+
+/* <linux/smp.h> doesn't provide this definition. */
+#ifndef CONFIG_SMP
+#define setup_max_cpus 1
+#endif
+
+static inline int ABS(int x) { return x >= 0 ? x : -x; }
+
+/* Chip information */
+char chip_model[64] __write_once;
+
+struct pglist_data node_data[MAX_NUMNODES] __read_mostly;
+EXPORT_SYMBOL(node_data);
+
+/* We only create bootmem data on node 0. */
+static bootmem_data_t __initdata node0_bdata;
+
+/* Information on the NUMA nodes that we compute early */
+unsigned long __cpuinitdata node_start_pfn[MAX_NUMNODES];
+unsigned long __cpuinitdata node_end_pfn[MAX_NUMNODES];
+unsigned long __initdata node_memmap_pfn[MAX_NUMNODES];
+unsigned long __initdata node_percpu_pfn[MAX_NUMNODES];
+unsigned long __initdata node_free_pfn[MAX_NUMNODES];
+
+static unsigned long __initdata node_percpu[MAX_NUMNODES];
+
+#ifdef CONFIG_HIGHMEM
+/* Page frame index of end of lowmem on each controller. */
+unsigned long __cpuinitdata node_lowmem_end_pfn[MAX_NUMNODES];
+
+/* Number of pages that can be mapped into lowmem. */
+static unsigned long __initdata mappable_physpages;
+#endif
+
+/* Data on which physical memory controller corresponds to which NUMA node */
+int node_controller[MAX_NUMNODES] = { [0 ... MAX_NUMNODES-1] = -1 };
+
+#ifdef CONFIG_HIGHMEM
+/* Map information from VAs to PAs */
+unsigned long pbase_map[1 << (32 - HPAGE_SHIFT)]
+ __write_once __attribute__((aligned(L2_CACHE_BYTES)));
+EXPORT_SYMBOL(pbase_map);
+
+/* Map information from PAs to VAs */
+void *vbase_map[NR_PA_HIGHBIT_VALUES]
+ __write_once __attribute__((aligned(L2_CACHE_BYTES)));
+EXPORT_SYMBOL(vbase_map);
+#endif
+
+/* Node number as a function of the high PA bits */
+int highbits_to_node[NR_PA_HIGHBIT_VALUES] __write_once;
+EXPORT_SYMBOL(highbits_to_node);
+
+static unsigned int __initdata maxmem_pfn = -1U;
+static unsigned int __initdata maxnodemem_pfn[MAX_NUMNODES] = {
+ [0 ... MAX_NUMNODES-1] = -1U
+};
+static nodemask_t __initdata isolnodes;
+
+#ifdef CONFIG_PCI
+enum { DEFAULT_PCI_RESERVE_MB = 64 };
+static unsigned int __initdata pci_reserve_mb = DEFAULT_PCI_RESERVE_MB;
+unsigned long __initdata pci_reserve_start_pfn = -1U;
+unsigned long __initdata pci_reserve_end_pfn = -1U;
+#endif
+
+static int __init setup_maxmem(char *str)
+{
+ unsigned long long maxmem;
+ if (str == NULL || (maxmem = memparse(str, NULL)) == 0)
+ return -EINVAL;
+
+ maxmem_pfn = (maxmem >> HPAGE_SHIFT) << (HPAGE_SHIFT - PAGE_SHIFT);
+ pr_info("Forcing RAM used to no more than %dMB\n",
+ maxmem_pfn >> (20 - PAGE_SHIFT));
+ return 0;
+}
+early_param("maxmem", setup_maxmem);
+
+static int __init setup_maxnodemem(char *str)
+{
+ char *endp;
+ unsigned long long maxnodemem;
+ long node;
+
+ node = str ? simple_strtoul(str, &endp, 0) : INT_MAX;
+ if (node >= MAX_NUMNODES || *endp != ':')
+ return -EINVAL;
+
+ maxnodemem = memparse(endp+1, NULL);
+ maxnodemem_pfn[node] = (maxnodemem >> HPAGE_SHIFT) <<
+ (HPAGE_SHIFT - PAGE_SHIFT);
+ pr_info("Forcing RAM used on node %ld to no more than %dMB\n",
+ node, maxnodemem_pfn[node] >> (20 - PAGE_SHIFT));
+ return 0;
+}
+early_param("maxnodemem", setup_maxnodemem);
+
+static int __init setup_isolnodes(char *str)
+{
+ char buf[MAX_NUMNODES * 5];
+ if (str == NULL || nodelist_parse(str, isolnodes) != 0)
+ return -EINVAL;
+
+ nodelist_scnprintf(buf, sizeof(buf), isolnodes);
+ pr_info("Set isolnodes value to '%s'\n", buf);
+ return 0;
+}
+early_param("isolnodes", setup_isolnodes);
+
+#ifdef CONFIG_PCI
+static int __init setup_pci_reserve(char* str)
+{
+ unsigned long mb;
+
+ if (str == NULL || strict_strtoul(str, 0, &mb) != 0 ||
+ mb > 3 * 1024)
+ return -EINVAL;
+
+ pci_reserve_mb = mb;
+ pr_info("Reserving %dMB for PCIE root complex mappings\n",
+ pci_reserve_mb);
+ return 0;
+}
+early_param("pci_reserve", setup_pci_reserve);
+#endif
+
+#ifndef __tilegx__
+/*
+ * vmalloc=size forces the vmalloc area to be exactly 'size' bytes.
+ * This can be used to increase (or decrease) the vmalloc area.
+ */
+static int __init parse_vmalloc(char *arg)
+{
+ if (!arg)
+ return -EINVAL;
+
+ VMALLOC_RESERVE = (memparse(arg, &arg) + PGDIR_SIZE - 1) & PGDIR_MASK;
+
+ /* See validate_va() for more on this test. */
+ if ((long)_VMALLOC_START >= 0)
+ early_panic("\"vmalloc=%#lx\" value too large: maximum %#lx\n",
+ VMALLOC_RESERVE, _VMALLOC_END - 0x80000000UL);
+
+ return 0;
+}
+early_param("vmalloc", parse_vmalloc);
+#endif
+
+#ifdef CONFIG_HIGHMEM
+/*
+ * Determine for each controller where its lowmem is mapped and how much of
+ * it is mapped there. On controller zero, the first few megabytes are
+ * already mapped in as code at MEM_SV_INTRPT, so in principle we could
+ * start our data mappings higher up, but for now we don't bother, to avoid
+ * additional confusion.
+ *
+ * One question is whether, on systems with more than 768 Mb and
+ * controllers of different sizes, to map in a proportionate amount of
+ * each one, or to try to map the same amount from each controller.
+ * (E.g. if we have three controllers with 256MB, 1GB, and 256MB
+ * respectively, do we map 256MB from each, or do we map 128 MB, 512
+ * MB, and 128 MB respectively?) For now we use a proportionate
+ * solution like the latter.
+ *
+ * The VA/PA mapping demands that we align our decisions at 16 MB
+ * boundaries so that we can rapidly convert VA to PA.
+ */
+static void *__init setup_pa_va_mapping(void)
+{
+ unsigned long curr_pages = 0;
+ unsigned long vaddr = PAGE_OFFSET;
+ nodemask_t highonlynodes = isolnodes;
+ int i, j;
+
+ memset(pbase_map, -1, sizeof(pbase_map));
+ memset(vbase_map, -1, sizeof(vbase_map));
+
+ /* Node zero cannot be isolated for LOWMEM purposes. */
+ node_clear(0, highonlynodes);
+
+ /* Count up the number of pages on non-highonlynodes controllers. */
+ mappable_physpages = 0;
+ for_each_online_node(i) {
+ if (!node_isset(i, highonlynodes))
+ mappable_physpages +=
+ node_end_pfn[i] - node_start_pfn[i];
+ }
+
+ for_each_online_node(i) {
+ unsigned long start = node_start_pfn[i];
+ unsigned long end = node_end_pfn[i];
+ unsigned long size = end - start;
+ unsigned long vaddr_end;
+
+ if (node_isset(i, highonlynodes)) {
+ /* Mark this controller as having no lowmem. */
+ node_lowmem_end_pfn[i] = start;
+ continue;
+ }
+
+ curr_pages += size;
+ if (mappable_physpages > MAXMEM_PFN) {
+ vaddr_end = PAGE_OFFSET +
+ (((u64)curr_pages * MAXMEM_PFN /
+ mappable_physpages)
+ << PAGE_SHIFT);
+ } else {
+ vaddr_end = PAGE_OFFSET + (curr_pages << PAGE_SHIFT);
+ }
+ for (j = 0; vaddr < vaddr_end; vaddr += HPAGE_SIZE, ++j) {
+ unsigned long this_pfn =
+ start + (j << HUGETLB_PAGE_ORDER);
+ pbase_map[vaddr >> HPAGE_SHIFT] = this_pfn;
+ if (vbase_map[__pfn_to_highbits(this_pfn)] ==
+ (void *)-1)
+ vbase_map[__pfn_to_highbits(this_pfn)] =
+ (void *)(vaddr & HPAGE_MASK);
+ }
+ node_lowmem_end_pfn[i] = start + (j << HUGETLB_PAGE_ORDER);
+ BUG_ON(node_lowmem_end_pfn[i] > end);
+ }
+
+ /* Return highest address of any mapped memory. */
+ return (void *)vaddr;
+}
+#endif /* CONFIG_HIGHMEM */
+
+/*
+ * Register our most important memory mappings with the debug stub.
+ *
+ * This is up to 4 mappings for lowmem, one mapping per memory
+ * controller, plus one for our text segment.
+ */
+static void __cpuinit store_permanent_mappings(void)
+{
+ int i;
+
+ for_each_online_node(i) {
+ HV_PhysAddr pa = ((HV_PhysAddr)node_start_pfn[i]) << PAGE_SHIFT;
+#ifdef CONFIG_HIGHMEM
+ HV_PhysAddr high_mapped_pa = node_lowmem_end_pfn[i];
+#else
+ HV_PhysAddr high_mapped_pa = node_end_pfn[i];
+#endif
+
+ unsigned long pages = high_mapped_pa - node_start_pfn[i];
+ HV_VirtAddr addr = (HV_VirtAddr) __va(pa);
+ hv_store_mapping(addr, pages << PAGE_SHIFT, pa);
+ }
+
+ hv_store_mapping((HV_VirtAddr)_stext,
+ (uint32_t)(_einittext - _stext), 0);
+}
+
+/*
+ * Use hv_inquire_physical() to populate node_{start,end}_pfn[]
+ * and node_online_map, doing suitable sanity-checking.
+ * Also set min_low_pfn, max_low_pfn, and max_pfn.
+ */
+static void __init setup_memory(void)
+{
+ int i, j;
+ int highbits_seen[NR_PA_HIGHBIT_VALUES] = { 0 };
+#ifdef CONFIG_HIGHMEM
+ long highmem_pages;
+#endif
+#ifndef __tilegx__
+ int cap;
+#endif
+#if defined(CONFIG_HIGHMEM) || defined(__tilegx__)
+ long lowmem_pages;
+#endif
+
+ /* We are using a char to hold the cpu_2_node[] mapping */
+ BUILD_BUG_ON(MAX_NUMNODES > 127);
+
+ /* Discover the ranges of memory available to us */
+ for (i = 0; ; ++i) {
+ unsigned long start, size, end, highbits;
+ HV_PhysAddrRange range = hv_inquire_physical(i);
+ if (range.size == 0)
+ break;
+#ifdef CONFIG_FLATMEM
+ if (i > 0) {
+ pr_err("Can't use discontiguous PAs: %#llx..%#llx\n",
+ range.size, range.start + range.size);
+ continue;
+ }
+#endif
+#ifndef __tilegx__
+ if ((unsigned long)range.start) {
+ pr_err("Range not at 4GB multiple: %#llx..%#llx\n",
+ range.start, range.start + range.size);
+ continue;
+ }
+#endif
+ if ((range.start & (HPAGE_SIZE-1)) != 0 ||
+ (range.size & (HPAGE_SIZE-1)) != 0) {
+ unsigned long long start_pa = range.start;
+ unsigned long long orig_size = range.size;
+ range.start = (start_pa + HPAGE_SIZE - 1) & HPAGE_MASK;
+ range.size -= (range.start - start_pa);
+ range.size &= HPAGE_MASK;
+ pr_err("Range not hugepage-aligned: %#llx..%#llx:"
+ " now %#llx-%#llx\n",
+ start_pa, start_pa + orig_size,
+ range.start, range.start + range.size);
+ }
+ highbits = __pa_to_highbits(range.start);
+ if (highbits >= NR_PA_HIGHBIT_VALUES) {
+ pr_err("PA high bits too high: %#llx..%#llx\n",
+ range.start, range.start + range.size);
+ continue;
+ }
+ if (highbits_seen[highbits]) {
+ pr_err("Range overlaps in high bits: %#llx..%#llx\n",
+ range.start, range.start + range.size);
+ continue;
+ }
+ highbits_seen[highbits] = 1;
+ if (PFN_DOWN(range.size) > maxnodemem_pfn[i]) {
+ int max_size = maxnodemem_pfn[i];
+ if (max_size > 0) {
+ pr_err("Maxnodemem reduced node %d to"
+ " %d pages\n", i, max_size);
+ range.size = PFN_PHYS(max_size);
+ } else {
+ pr_err("Maxnodemem disabled node %d\n", i);
+ continue;
+ }
+ }
+ if (num_physpages + PFN_DOWN(range.size) > maxmem_pfn) {
+ int max_size = maxmem_pfn - num_physpages;
+ if (max_size > 0) {
+ pr_err("Maxmem reduced node %d to %d pages\n",
+ i, max_size);
+ range.size = PFN_PHYS(max_size);
+ } else {
+ pr_err("Maxmem disabled node %d\n", i);
+ continue;
+ }
+ }
+ if (i >= MAX_NUMNODES) {
+ pr_err("Too many PA nodes (#%d): %#llx...%#llx\n",
+ i, range.size, range.size + range.start);
+ continue;
+ }
+
+ start = range.start >> PAGE_SHIFT;
+ size = range.size >> PAGE_SHIFT;
+ end = start + size;
+
+#ifndef __tilegx__
+ if (((HV_PhysAddr)end << PAGE_SHIFT) !=
+ (range.start + range.size)) {
+ pr_err("PAs too high to represent: %#llx..%#llx\n",
+ range.start, range.start + range.size);
+ continue;
+ }
+#endif
+#ifdef CONFIG_PCI
+ /*
+ * Blocks that overlap the pci reserved region must
+ * have enough space to hold the maximum percpu data
+ * region at the top of the range. If there isn't
+ * enough space above the reserved region, just
+ * truncate the node.
+ */
+ if (start <= pci_reserve_start_pfn &&
+ end > pci_reserve_start_pfn) {
+ unsigned int per_cpu_size =
+ __per_cpu_end - __per_cpu_start;
+ unsigned int percpu_pages =
+ NR_CPUS * (PFN_UP(per_cpu_size) >> PAGE_SHIFT);
+ if (end < pci_reserve_end_pfn + percpu_pages) {
+ end = pci_reserve_start_pfn;
+ pr_err("PCI mapping region reduced node %d to"
+ " %ld pages\n", i, end - start);
+ }
+ }
+#endif
+
+ for (j = __pfn_to_highbits(start);
+ j <= __pfn_to_highbits(end - 1); j++)
+ highbits_to_node[j] = i;
+
+ node_start_pfn[i] = start;
+ node_end_pfn[i] = end;
+ node_controller[i] = range.controller;
+ num_physpages += size;
+ max_pfn = end;
+
+ /* Mark node as online */
+ node_set(i, node_online_map);
+ node_set(i, node_possible_map);
+ }
+
+#ifndef __tilegx__
+ /*
+ * For 4KB pages, mem_map "struct page" data is 1% of the size
+ * of the physical memory, so can be quite big (640 MB for
+ * four 16G zones). These structures must be mapped in
+ * lowmem, and since we currently cap out at about 768 MB,
+ * it's impractical to try to use this much address space.
+ * For now, arbitrarily cap the amount of physical memory
+ * we're willing to use at 8 million pages (32GB of 4KB pages).
+ */
+ cap = 8 * 1024 * 1024; /* 8 million pages */
+ if (num_physpages > cap) {
+ int num_nodes = num_online_nodes();
+ int cap_each = cap / num_nodes;
+ unsigned long dropped_pages = 0;
+ for (i = 0; i < num_nodes; ++i) {
+ int size = node_end_pfn[i] - node_start_pfn[i];
+ if (size > cap_each) {
+ dropped_pages += (size - cap_each);
+ node_end_pfn[i] = node_start_pfn[i] + cap_each;
+ }
+ }
+ num_physpages -= dropped_pages;
+ pr_warning("Only using %ldMB memory;"
+ " ignoring %ldMB.\n",
+ num_physpages >> (20 - PAGE_SHIFT),
+ dropped_pages >> (20 - PAGE_SHIFT));
+ pr_warning("Consider using a larger page size.\n");
+ }
+#endif
+
+ /* Heap starts just above the last loaded address. */
+ min_low_pfn = PFN_UP((unsigned long)_end - PAGE_OFFSET);
+
+#ifdef CONFIG_HIGHMEM
+ /* Find where we map lowmem from each controller. */
+ high_memory = setup_pa_va_mapping();
+
+ /* Set max_low_pfn based on what node 0 can directly address. */
+ max_low_pfn = node_lowmem_end_pfn[0];
+
+ lowmem_pages = (mappable_physpages > MAXMEM_PFN) ?
+ MAXMEM_PFN : mappable_physpages;
+ highmem_pages = (long) (num_physpages - lowmem_pages);
+
+ pr_notice("%ldMB HIGHMEM available.\n",
+ pages_to_mb(highmem_pages > 0 ? highmem_pages : 0));
+ pr_notice("%ldMB LOWMEM available.\n",
+ pages_to_mb(lowmem_pages));
+#else
+ /* Set max_low_pfn based on what node 0 can directly address. */
+ max_low_pfn = node_end_pfn[0];
+
+#ifndef __tilegx__
+ if (node_end_pfn[0] > MAXMEM_PFN) {
+ pr_warning("Only using %ldMB LOWMEM.\n",
+ MAXMEM>>20);
+ pr_warning("Use a HIGHMEM enabled kernel.\n");
+ max_low_pfn = MAXMEM_PFN;
+ max_pfn = MAXMEM_PFN;
+ num_physpages = MAXMEM_PFN;
+ node_end_pfn[0] = MAXMEM_PFN;
+ } else {
+ pr_notice("%ldMB memory available.\n",
+ pages_to_mb(node_end_pfn[0]));
+ }
+ for (i = 1; i < MAX_NUMNODES; ++i) {
+ node_start_pfn[i] = 0;
+ node_end_pfn[i] = 0;
+ }
+ high_memory = __va(node_end_pfn[0]);
+#else
+ lowmem_pages = 0;
+ for (i = 0; i < MAX_NUMNODES; ++i) {
+ int pages = node_end_pfn[i] - node_start_pfn[i];
+ lowmem_pages += pages;
+ if (pages)
+ high_memory = pfn_to_kaddr(node_end_pfn[i]);
+ }
+ pr_notice("%ldMB memory available.\n",
+ pages_to_mb(lowmem_pages));
+#endif
+#endif
+}
+
+static void __init setup_bootmem_allocator(void)
+{
+ unsigned long bootmap_size, first_alloc_pfn, last_alloc_pfn;
+
+ /* Provide a node 0 bdata. */
+ NODE_DATA(0)->bdata = &node0_bdata;
+
+#ifdef CONFIG_PCI
+ /* Don't let boot memory alias the PCI region. */
+ last_alloc_pfn = min(max_low_pfn, pci_reserve_start_pfn);
+#else
+ last_alloc_pfn = max_low_pfn;
+#endif
+
+ /*
+ * Initialize the boot-time allocator (with low memory only):
+ * The first argument says where to put the bitmap, and the
+ * second says where the end of allocatable memory is.
+ */
+ bootmap_size = init_bootmem(min_low_pfn, last_alloc_pfn);
+
+ /*
+ * Let the bootmem allocator use all the space we've given it
+ * except for its own bitmap.
+ */
+ first_alloc_pfn = min_low_pfn + PFN_UP(bootmap_size);
+ if (first_alloc_pfn >= last_alloc_pfn)
+ early_panic("Not enough memory on controller 0 for bootmem\n");
+
+ free_bootmem(PFN_PHYS(first_alloc_pfn),
+ PFN_PHYS(last_alloc_pfn - first_alloc_pfn));
+
+#ifdef CONFIG_KEXEC
+ if (crashk_res.start != crashk_res.end)
+ reserve_bootmem(crashk_res.start, resource_size(&crashk_res), 0);
+#endif
+}
+
+void *__init alloc_remap(int nid, unsigned long size)
+{
+ int pages = node_end_pfn[nid] - node_start_pfn[nid];
+ void *map = pfn_to_kaddr(node_memmap_pfn[nid]);
+ BUG_ON(size != pages * sizeof(struct page));
+ memset(map, 0, size);
+ return map;
+}
+
+static int __init percpu_size(void)
+{
+ int size = __per_cpu_end - __per_cpu_start;
+ size += PERCPU_MODULE_RESERVE;
+ size += PERCPU_DYNAMIC_EARLY_SIZE;
+ if (size < PCPU_MIN_UNIT_SIZE)
+ size = PCPU_MIN_UNIT_SIZE;
+ size = roundup(size, PAGE_SIZE);
+
+ /* In several places we assume the per-cpu data fits on a huge page. */
+ BUG_ON(kdata_huge && size > HPAGE_SIZE);
+ return size;
+}
+
+static inline unsigned long alloc_bootmem_pfn(int size, unsigned long goal)
+{
+ void *kva = __alloc_bootmem(size, PAGE_SIZE, goal);
+ unsigned long pfn = kaddr_to_pfn(kva);
+ BUG_ON(goal && PFN_PHYS(pfn) != goal);
+ return pfn;
+}
+
+static void __init zone_sizes_init(void)
+{
+ unsigned long zones_size[MAX_NR_ZONES] = { 0 };
+ int size = percpu_size();
+ int num_cpus = smp_height * smp_width;
+ int i;
+
+ for (i = 0; i < num_cpus; ++i)
+ node_percpu[cpu_to_node(i)] += size;
+
+ for_each_online_node(i) {
+ unsigned long start = node_start_pfn[i];
+ unsigned long end = node_end_pfn[i];
+#ifdef CONFIG_HIGHMEM
+ unsigned long lowmem_end = node_lowmem_end_pfn[i];
+#else
+ unsigned long lowmem_end = end;
+#endif
+ int memmap_size = (end - start) * sizeof(struct page);
+ node_free_pfn[i] = start;
+
+ /*
+ * Set aside pages for per-cpu data and the mem_map array.
+ *
+ * Since the per-cpu data requires special homecaching,
+ * if we are in kdata_huge mode, we put it at the end of
+ * the lowmem region. If we're not in kdata_huge mode,
+ * we take the per-cpu pages from the bottom of the
+ * controller, since that avoids fragmenting a huge page
+ * that users might want. We always take the memmap
+ * from the bottom of the controller, since with
+ * kdata_huge that lets it be under a huge TLB entry.
+ *
+ * If the user has requested isolnodes for a controller,
+ * though, there'll be no lowmem, so we just alloc_bootmem
+ * the memmap. There will be no percpu memory either.
+ */
+ if (__pfn_to_highbits(start) == 0) {
+ /* In low PAs, allocate via bootmem. */
+ unsigned long goal = 0;
+ node_memmap_pfn[i] =
+ alloc_bootmem_pfn(memmap_size, goal);
+ if (kdata_huge)
+ goal = PFN_PHYS(lowmem_end) - node_percpu[i];
+ if (node_percpu[i])
+ node_percpu_pfn[i] =
+ alloc_bootmem_pfn(node_percpu[i], goal);
+ } else if (cpu_isset(i, isolnodes)) {
+ node_memmap_pfn[i] = alloc_bootmem_pfn(memmap_size, 0);
+ BUG_ON(node_percpu[i] != 0);
+ } else {
+ /* In high PAs, just reserve some pages. */
+ node_memmap_pfn[i] = node_free_pfn[i];
+ node_free_pfn[i] += PFN_UP(memmap_size);
+ if (!kdata_huge) {
+ node_percpu_pfn[i] = node_free_pfn[i];
+ node_free_pfn[i] += PFN_UP(node_percpu[i]);
+ } else {
+ node_percpu_pfn[i] =
+ lowmem_end - PFN_UP(node_percpu[i]);
+ }
+ }
+
+#ifdef CONFIG_HIGHMEM
+ if (start > lowmem_end) {
+ zones_size[ZONE_NORMAL] = 0;
+ zones_size[ZONE_HIGHMEM] = end - start;
+ } else {
+ zones_size[ZONE_NORMAL] = lowmem_end - start;
+ zones_size[ZONE_HIGHMEM] = end - lowmem_end;
+ }
+#else
+ zones_size[ZONE_NORMAL] = end - start;
+#endif
+
+ /*
+ * Everyone shares node 0's bootmem allocator, but
+ * we use alloc_remap(), above, to put the actual
+ * struct page array on the individual controllers,
+ * which is most of the data that we actually care about.
+ * We can't place bootmem allocators on the other
+ * controllers since the bootmem allocator can only
+ * operate on 32-bit physical addresses.
+ */
+ NODE_DATA(i)->bdata = NODE_DATA(0)->bdata;
+
+ free_area_init_node(i, zones_size, start, NULL);
+ printk(KERN_DEBUG " Normal zone: %ld per-cpu pages\n",
+ PFN_UP(node_percpu[i]));
+
+ /* Track the type of memory on each node */
+ if (zones_size[ZONE_NORMAL])
+ node_set_state(i, N_NORMAL_MEMORY);
+#ifdef CONFIG_HIGHMEM
+ if (end != start)
+ node_set_state(i, N_HIGH_MEMORY);
+#endif
+
+ node_set_online(i);
+ }
+}
+
+#ifdef CONFIG_NUMA
+
+/* which logical CPUs are on which nodes */
+struct cpumask node_2_cpu_mask[MAX_NUMNODES] __write_once;
+EXPORT_SYMBOL(node_2_cpu_mask);
+
+/* which node each logical CPU is on */
+char cpu_2_node[NR_CPUS] __write_once __attribute__((aligned(L2_CACHE_BYTES)));
+EXPORT_SYMBOL(cpu_2_node);
+
+/* Return cpu_to_node() except for cpus not yet assigned, which return -1 */
+static int __init cpu_to_bound_node(int cpu, struct cpumask* unbound_cpus)
+{
+ if (!cpu_possible(cpu) || cpumask_test_cpu(cpu, unbound_cpus))
+ return -1;
+ else
+ return cpu_to_node(cpu);
+}
+
+/* Return number of immediately-adjacent tiles sharing the same NUMA node. */
+static int __init node_neighbors(int node, int cpu,
+ struct cpumask *unbound_cpus)
+{
+ int neighbors = 0;
+ int w = smp_width;
+ int h = smp_height;
+ int x = cpu % w;
+ int y = cpu / w;
+ if (x > 0 && cpu_to_bound_node(cpu-1, unbound_cpus) == node)
+ ++neighbors;
+ if (x < w-1 && cpu_to_bound_node(cpu+1, unbound_cpus) == node)
+ ++neighbors;
+ if (y > 0 && cpu_to_bound_node(cpu-w, unbound_cpus) == node)
+ ++neighbors;
+ if (y < h-1 && cpu_to_bound_node(cpu+w, unbound_cpus) == node)
+ ++neighbors;
+ return neighbors;
+}
+
+static void __init setup_numa_mapping(void)
+{
+ int distance[MAX_NUMNODES][NR_CPUS];
+ HV_Coord coord;
+ int cpu, node, cpus, i, x, y;
+ int num_nodes = num_online_nodes();
+ struct cpumask unbound_cpus;
+ nodemask_t default_nodes;
+
+ cpumask_clear(&unbound_cpus);
+
+ /* Get set of nodes we will use for defaults */
+ nodes_andnot(default_nodes, node_online_map, isolnodes);
+ if (nodes_empty(default_nodes)) {
+ BUG_ON(!node_isset(0, node_online_map));
+ pr_err("Forcing NUMA node zero available as a default node\n");
+ node_set(0, default_nodes);
+ }
+
+ /* Populate the distance[] array */
+ memset(distance, -1, sizeof(distance));
+ cpu = 0;
+ for (coord.y = 0; coord.y < smp_height; ++coord.y) {
+ for (coord.x = 0; coord.x < smp_width;
+ ++coord.x, ++cpu) {
+ BUG_ON(cpu >= nr_cpu_ids);
+ if (!cpu_possible(cpu)) {
+ cpu_2_node[cpu] = -1;
+ continue;
+ }
+ for_each_node_mask(node, default_nodes) {
+ HV_MemoryControllerInfo info =
+ hv_inquire_memory_controller(
+ coord, node_controller[node]);
+ distance[node][cpu] =
+ ABS(info.coord.x) + ABS(info.coord.y);
+ }
+ cpumask_set_cpu(cpu, &unbound_cpus);
+ }
+ }
+ cpus = cpu;
+
+ /*
+ * Round-robin through the NUMA nodes until all the cpus are
+ * assigned. We could be more clever here (e.g. create four
+ * sorted linked lists on the same set of cpu nodes, and pull
+ * off them in round-robin sequence, removing from all four
+ * lists each time) but given the relatively small numbers
+ * involved, O(n^2) seem OK for a one-time cost.
+ */
+ node = first_node(default_nodes);
+ while (!cpumask_empty(&unbound_cpus)) {
+ int best_cpu = -1;
+ int best_distance = INT_MAX;
+ for (cpu = 0; cpu < cpus; ++cpu) {
+ if (cpumask_test_cpu(cpu, &unbound_cpus)) {
+ /*
+ * Compute metric, which is how much
+ * closer the cpu is to this memory
+ * controller than the others, shifted
+ * up, and then the number of
+ * neighbors already in the node as an
+ * epsilon adjustment to try to keep
+ * the nodes compact.
+ */
+ int d = distance[node][cpu] * num_nodes;
+ for_each_node_mask(i, default_nodes) {
+ if (i != node)
+ d -= distance[i][cpu];
+ }
+ d *= 8; /* allow space for epsilon */
+ d -= node_neighbors(node, cpu, &unbound_cpus);
+ if (d < best_distance) {
+ best_cpu = cpu;
+ best_distance = d;
+ }
+ }
+ }
+ BUG_ON(best_cpu < 0);
+ cpumask_set_cpu(best_cpu, &node_2_cpu_mask[node]);
+ cpu_2_node[best_cpu] = node;
+ cpumask_clear_cpu(best_cpu, &unbound_cpus);
+ node = next_node(node, default_nodes);
+ if (node == MAX_NUMNODES)
+ node = first_node(default_nodes);
+ }
+
+ /* Print out node assignments and set defaults for disabled cpus */
+ cpu = 0;
+ for (y = 0; y < smp_height; ++y) {
+ printk(KERN_DEBUG "NUMA cpu-to-node row %d:", y);
+ for (x = 0; x < smp_width; ++x, ++cpu) {
+ if (cpu_to_node(cpu) < 0) {
+ pr_cont(" -");
+ cpu_2_node[cpu] = first_node(default_nodes);
+ } else {
+ pr_cont(" %d", cpu_to_node(cpu));
+ }
+ }
+ pr_cont("\n");
+ }
+}
+
+static struct cpu cpu_devices[NR_CPUS];
+
+static int __init topology_init(void)
+{
+ int i;
+
+ for_each_online_node(i)
+ register_one_node(i);
+
+ for (i = 0; i < smp_height * smp_width; ++i)
+ register_cpu(&cpu_devices[i], i);
+
+ return 0;
+}
+
+subsys_initcall(topology_init);
+
+#else /* !CONFIG_NUMA */
+
+#define setup_numa_mapping() do { } while (0)
+
+#endif /* CONFIG_NUMA */
+
+/**
+ * setup_cpu() - Do all necessary per-cpu, tile-specific initialization.
+ * @boot: Is this the boot cpu?
+ *
+ * Called from setup_arch() on the boot cpu, or online_secondary().
+ */
+void __cpuinit setup_cpu(int boot)
+{
+ /* The boot cpu sets up its permanent mappings much earlier. */
+ if (!boot)
+ store_permanent_mappings();
+
+ /* Allow asynchronous TLB interrupts. */
+#if CHIP_HAS_TILE_DMA()
+ arch_local_irq_unmask(INT_DMATLB_MISS);
+ arch_local_irq_unmask(INT_DMATLB_ACCESS);
+#endif
+#if CHIP_HAS_SN_PROC()
+ arch_local_irq_unmask(INT_SNITLB_MISS);
+#endif
+#ifdef __tilegx__
+ arch_local_irq_unmask(INT_SINGLE_STEP_K);
+#endif
+
+ /*
+ * Allow user access to many generic SPRs, like the cycle
+ * counter, PASS/FAIL/DONE, INTERRUPT_CRITICAL_SECTION, etc.
+ */
+ __insn_mtspr(SPR_MPL_WORLD_ACCESS_SET_0, 1);
+
+#if CHIP_HAS_SN()
+ /* Static network is not restricted. */
+ __insn_mtspr(SPR_MPL_SN_ACCESS_SET_0, 1);
+#endif
+#if CHIP_HAS_SN_PROC()
+ __insn_mtspr(SPR_MPL_SN_NOTIFY_SET_0, 1);
+ __insn_mtspr(SPR_MPL_SN_CPL_SET_0, 1);
+#endif
+
+ /*
+ * Set the MPL for interrupt control 0 & 1 to the corresponding
+ * values. This includes access to the SYSTEM_SAVE and EX_CONTEXT
+ * SPRs, as well as the interrupt mask.
+ */
+ __insn_mtspr(SPR_MPL_INTCTRL_0_SET_0, 1);
+ __insn_mtspr(SPR_MPL_INTCTRL_1_SET_1, 1);
+
+ /* Initialize IRQ support for this cpu. */
+ setup_irq_regs();
+
+#ifdef CONFIG_HARDWALL
+ /* Reset the network state on this cpu. */
+ reset_network_state();
+#endif
+}
+
+#ifdef CONFIG_BLK_DEV_INITRD
+
+/*
+ * Note that the kernel can potentially support other compression
+ * techniques than gz, though we don't do so by default. If we ever
+ * decide to do so we can either look for other filename extensions,
+ * or just allow a file with this name to be compressed with an
+ * arbitrary compressor (somewhat counterintuitively).
+ */
+static int __initdata set_initramfs_file;
+static char __initdata initramfs_file[128] = "initramfs.cpio.gz";
+
+static int __init setup_initramfs_file(char *str)
+{
+ if (str == NULL)
+ return -EINVAL;
+ strncpy(initramfs_file, str, sizeof(initramfs_file) - 1);
+ set_initramfs_file = 1;
+
+ return 0;
+}
+early_param("initramfs_file", setup_initramfs_file);
+
+/*
+ * We look for an "initramfs.cpio.gz" file in the hvfs.
+ * If there is one, we allocate some memory for it and it will be
+ * unpacked to the initramfs.
+ */
+static void __init load_hv_initrd(void)
+{
+ HV_FS_StatInfo stat;
+ int fd, rc;
+ void *initrd;
+
+ fd = hv_fs_findfile((HV_VirtAddr) initramfs_file);
+ if (fd == HV_ENOENT) {
+ if (set_initramfs_file)
+ pr_warning("No such hvfs initramfs file '%s'\n",
+ initramfs_file);
+ return;
+ }
+ BUG_ON(fd < 0);
+ stat = hv_fs_fstat(fd);
+ BUG_ON(stat.size < 0);
+ if (stat.flags & HV_FS_ISDIR) {
+ pr_warning("Ignoring hvfs file '%s': it's a directory.\n",
+ initramfs_file);
+ return;
+ }
+ initrd = alloc_bootmem_pages(stat.size);
+ rc = hv_fs_pread(fd, (HV_VirtAddr) initrd, stat.size, 0);
+ if (rc != stat.size) {
+ pr_err("Error reading %d bytes from hvfs file '%s': %d\n",
+ stat.size, initramfs_file, rc);
+ free_initrd_mem((unsigned long) initrd, stat.size);
+ return;
+ }
+ initrd_start = (unsigned long) initrd;
+ initrd_end = initrd_start + stat.size;
+}
+
+void __init free_initrd_mem(unsigned long begin, unsigned long end)
+{
+ free_bootmem(__pa(begin), end - begin);
+}
+
+#else
+static inline void load_hv_initrd(void) {}
+#endif /* CONFIG_BLK_DEV_INITRD */
+
+static void __init validate_hv(void)
+{
+ /*
+ * It may already be too late, but let's check our built-in
+ * configuration against what the hypervisor is providing.
+ */
+ unsigned long glue_size = hv_sysconf(HV_SYSCONF_GLUE_SIZE);
+ int hv_page_size = hv_sysconf(HV_SYSCONF_PAGE_SIZE_SMALL);
+ int hv_hpage_size = hv_sysconf(HV_SYSCONF_PAGE_SIZE_LARGE);
+ HV_ASIDRange asid_range;
+
+#ifndef CONFIG_SMP
+ HV_Topology topology = hv_inquire_topology();
+ BUG_ON(topology.coord.x != 0 || topology.coord.y != 0);
+ if (topology.width != 1 || topology.height != 1) {
+ pr_warning("Warning: booting UP kernel on %dx%d grid;"
+ " will ignore all but first tile.\n",
+ topology.width, topology.height);
+ }
+#endif
+
+ if (PAGE_OFFSET + HV_GLUE_START_CPA + glue_size > (unsigned long)_text)
+ early_panic("Hypervisor glue size %ld is too big!\n",
+ glue_size);
+ if (hv_page_size != PAGE_SIZE)
+ early_panic("Hypervisor page size %#x != our %#lx\n",
+ hv_page_size, PAGE_SIZE);
+ if (hv_hpage_size != HPAGE_SIZE)
+ early_panic("Hypervisor huge page size %#x != our %#lx\n",
+ hv_hpage_size, HPAGE_SIZE);
+
+#ifdef CONFIG_SMP
+ /*
+ * Some hypervisor APIs take a pointer to a bitmap array
+ * whose size is at least the number of cpus on the chip.
+ * We use a struct cpumask for this, so it must be big enough.
+ */
+ if ((smp_height * smp_width) > nr_cpu_ids)
+ early_panic("Hypervisor %d x %d grid too big for Linux"
+ " NR_CPUS %d\n", smp_height, smp_width,
+ nr_cpu_ids);
+#endif
+
+ /*
+ * Check that we're using allowed ASIDs, and initialize the
+ * various asid variables to their appropriate initial states.
+ */
+ asid_range = hv_inquire_asid(0);
+ __get_cpu_var(current_asid) = min_asid = asid_range.start;
+ max_asid = asid_range.start + asid_range.size - 1;
+
+ if (hv_confstr(HV_CONFSTR_CHIP_MODEL, (HV_VirtAddr)chip_model,
+ sizeof(chip_model)) < 0) {
+ pr_err("Warning: HV_CONFSTR_CHIP_MODEL not available\n");
+ strlcpy(chip_model, "unknown", sizeof(chip_model));
+ }
+}
+
+static void __init validate_va(void)
+{
+#ifndef __tilegx__ /* FIXME: GX: probably some validation relevant here */
+ /*
+ * Similarly, make sure we're only using allowed VAs.
+ * We assume we can contiguously use MEM_USER_INTRPT .. MEM_HV_INTRPT,
+ * and 0 .. KERNEL_HIGH_VADDR.
+ * In addition, make sure we CAN'T use the end of memory, since
+ * we use the last chunk of each pgd for the pgd_list.
+ */
+ int i, user_kernel_ok = 0;
+ unsigned long max_va = 0;
+ unsigned long list_va =
+ ((PGD_LIST_OFFSET / sizeof(pgd_t)) << PGDIR_SHIFT);
+
+ for (i = 0; ; ++i) {
+ HV_VirtAddrRange range = hv_inquire_virtual(i);
+ if (range.size == 0)
+ break;
+ if (range.start <= MEM_USER_INTRPT &&
+ range.start + range.size >= MEM_HV_INTRPT)
+ user_kernel_ok = 1;
+ if (range.start == 0)
+ max_va = range.size;
+ BUG_ON(range.start + range.size > list_va);
+ }
+ if (!user_kernel_ok)
+ early_panic("Hypervisor not configured for user/kernel VAs\n");
+ if (max_va == 0)
+ early_panic("Hypervisor not configured for low VAs\n");
+ if (max_va < KERNEL_HIGH_VADDR)
+ early_panic("Hypervisor max VA %#lx smaller than %#lx\n",
+ max_va, KERNEL_HIGH_VADDR);
+
+ /* Kernel PCs must have their high bit set; see intvec.S. */
+ if ((long)VMALLOC_START >= 0)
+ early_panic(
+ "Linux VMALLOC region below the 2GB line (%#lx)!\n"
+ "Reconfigure the kernel with fewer NR_HUGE_VMAPS\n"
+ "or smaller VMALLOC_RESERVE.\n",
+ VMALLOC_START);
+#endif
+}
+
+/*
+ * cpu_lotar_map lists all the cpus that are valid for the supervisor
+ * to cache data on at a page level, i.e. what cpus can be placed in
+ * the LOTAR field of a PTE. It is equivalent to the set of possible
+ * cpus plus any other cpus that are willing to share their cache.
+ * It is set by hv_inquire_tiles(HV_INQ_TILES_LOTAR).
+ */
+struct cpumask __write_once cpu_lotar_map;
+EXPORT_SYMBOL(cpu_lotar_map);
+
+#if CHIP_HAS_CBOX_HOME_MAP()
+/*
+ * hash_for_home_map lists all the tiles that hash-for-home data
+ * will be cached on. Note that this may includes tiles that are not
+ * valid for this supervisor to use otherwise (e.g. if a hypervisor
+ * device is being shared between multiple supervisors).
+ * It is set by hv_inquire_tiles(HV_INQ_TILES_HFH_CACHE).
+ */
+struct cpumask hash_for_home_map;
+EXPORT_SYMBOL(hash_for_home_map);
+#endif
+
+/*
+ * cpu_cacheable_map lists all the cpus whose caches the hypervisor can
+ * flush on our behalf. It is set to cpu_possible_mask OR'ed with
+ * hash_for_home_map, and it is what should be passed to
+ * hv_flush_remote() to flush all caches. Note that if there are
+ * dedicated hypervisor driver tiles that have authorized use of their
+ * cache, those tiles will only appear in cpu_lotar_map, NOT in
+ * cpu_cacheable_map, as they are a special case.
+ */
+struct cpumask __write_once cpu_cacheable_map;
+EXPORT_SYMBOL(cpu_cacheable_map);
+
+static __initdata struct cpumask disabled_map;
+
+static int __init disabled_cpus(char *str)
+{
+ int boot_cpu = smp_processor_id();
+
+ if (str == NULL || cpulist_parse_crop(str, &disabled_map) != 0)
+ return -EINVAL;
+ if (cpumask_test_cpu(boot_cpu, &disabled_map)) {
+ pr_err("disabled_cpus: can't disable boot cpu %d\n", boot_cpu);
+ cpumask_clear_cpu(boot_cpu, &disabled_map);
+ }
+ return 0;
+}
+
+early_param("disabled_cpus", disabled_cpus);
+
+void __init print_disabled_cpus(void)
+{
+ if (!cpumask_empty(&disabled_map)) {
+ char buf[100];
+ cpulist_scnprintf(buf, sizeof(buf), &disabled_map);
+ pr_info("CPUs not available for Linux: %s\n", buf);
+ }
+}
+
+static void __init setup_cpu_maps(void)
+{
+ struct cpumask hv_disabled_map, cpu_possible_init;
+ int boot_cpu = smp_processor_id();
+ int cpus, i, rc;
+
+ /* Learn which cpus are allowed by the hypervisor. */
+ rc = hv_inquire_tiles(HV_INQ_TILES_AVAIL,
+ (HV_VirtAddr) cpumask_bits(&cpu_possible_init),
+ sizeof(cpu_cacheable_map));
+ if (rc < 0)
+ early_panic("hv_inquire_tiles(AVAIL) failed: rc %d\n", rc);
+ if (!cpumask_test_cpu(boot_cpu, &cpu_possible_init))
+ early_panic("Boot CPU %d disabled by hypervisor!\n", boot_cpu);
+
+ /* Compute the cpus disabled by the hvconfig file. */
+ cpumask_complement(&hv_disabled_map, &cpu_possible_init);
+
+ /* Include them with the cpus disabled by "disabled_cpus". */
+ cpumask_or(&disabled_map, &disabled_map, &hv_disabled_map);
+
+ /*
+ * Disable every cpu after "setup_max_cpus". But don't mark
+ * as disabled the cpus that are outside of our initial rectangle,
+ * since that turns out to be confusing.
+ */
+ cpus = 1; /* this cpu */
+ cpumask_set_cpu(boot_cpu, &disabled_map); /* ignore this cpu */
+ for (i = 0; cpus < setup_max_cpus; ++i)
+ if (!cpumask_test_cpu(i, &disabled_map))
+ ++cpus;
+ for (; i < smp_height * smp_width; ++i)
+ cpumask_set_cpu(i, &disabled_map);
+ cpumask_clear_cpu(boot_cpu, &disabled_map); /* reset this cpu */
+ for (i = smp_height * smp_width; i < NR_CPUS; ++i)
+ cpumask_clear_cpu(i, &disabled_map);
+
+ /*
+ * Setup cpu_possible map as every cpu allocated to us, minus
+ * the results of any "disabled_cpus" settings.
+ */
+ cpumask_andnot(&cpu_possible_init, &cpu_possible_init, &disabled_map);
+ init_cpu_possible(&cpu_possible_init);
+
+ /* Learn which cpus are valid for LOTAR caching. */
+ rc = hv_inquire_tiles(HV_INQ_TILES_LOTAR,
+ (HV_VirtAddr) cpumask_bits(&cpu_lotar_map),
+ sizeof(cpu_lotar_map));
+ if (rc < 0) {
+ pr_err("warning: no HV_INQ_TILES_LOTAR; using AVAIL\n");
+ cpu_lotar_map = *cpu_possible_mask;
+ }
+
+#if CHIP_HAS_CBOX_HOME_MAP()
+ /* Retrieve set of CPUs used for hash-for-home caching */
+ rc = hv_inquire_tiles(HV_INQ_TILES_HFH_CACHE,
+ (HV_VirtAddr) hash_for_home_map.bits,
+ sizeof(hash_for_home_map));
+ if (rc < 0)
+ early_panic("hv_inquire_tiles(HFH_CACHE) failed: rc %d\n", rc);
+ cpumask_or(&cpu_cacheable_map, cpu_possible_mask, &hash_for_home_map);
+#else
+ cpu_cacheable_map = *cpu_possible_mask;
+#endif
+}
+
+
+static int __init dataplane(char *str)
+{
+ pr_warning("WARNING: dataplane support disabled in this kernel\n");
+ return 0;
+}
+
+early_param("dataplane", dataplane);
+
+#ifdef CONFIG_CMDLINE_BOOL
+static char __initdata builtin_cmdline[COMMAND_LINE_SIZE] = CONFIG_CMDLINE;
+#endif
+
+void __init setup_arch(char **cmdline_p)
+{
+ int len;
+
+#if defined(CONFIG_CMDLINE_BOOL) && defined(CONFIG_CMDLINE_OVERRIDE)
+ len = hv_get_command_line((HV_VirtAddr) boot_command_line,
+ COMMAND_LINE_SIZE);
+ if (boot_command_line[0])
+ pr_warning("WARNING: ignoring dynamic command line \"%s\"\n",
+ boot_command_line);
+ strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE);
+#else
+ char *hv_cmdline;
+#if defined(CONFIG_CMDLINE_BOOL)
+ if (builtin_cmdline[0]) {
+ int builtin_len = strlcpy(boot_command_line, builtin_cmdline,
+ COMMAND_LINE_SIZE);
+ if (builtin_len < COMMAND_LINE_SIZE-1)
+ boot_command_line[builtin_len++] = ' ';
+ hv_cmdline = &boot_command_line[builtin_len];
+ len = COMMAND_LINE_SIZE - builtin_len;
+ } else
+#endif
+ {
+ hv_cmdline = boot_command_line;
+ len = COMMAND_LINE_SIZE;
+ }
+ len = hv_get_command_line((HV_VirtAddr) hv_cmdline, len);
+ if (len < 0 || len > COMMAND_LINE_SIZE)
+ early_panic("hv_get_command_line failed: %d\n", len);
+#endif
+
+ *cmdline_p = boot_command_line;
+
+ /* Set disabled_map and setup_max_cpus very early */
+ parse_early_param();
+
+ /* Make sure the kernel is compatible with the hypervisor. */
+ validate_hv();
+ validate_va();
+
+ setup_cpu_maps();
+
+
+#ifdef CONFIG_PCI
+ /*
+ * Initialize the PCI structures. This is done before memory
+ * setup so that we know whether or not a pci_reserve region
+ * is necessary.
+ */
+ if (tile_pci_init() == 0)
+ pci_reserve_mb = 0;
+
+ /* PCI systems reserve a region just below 4GB for mapping iomem. */
+ pci_reserve_end_pfn = (1 << (32 - PAGE_SHIFT));
+ pci_reserve_start_pfn = pci_reserve_end_pfn -
+ (pci_reserve_mb << (20 - PAGE_SHIFT));
+#endif
+
+ init_mm.start_code = (unsigned long) _text;
+ init_mm.end_code = (unsigned long) _etext;
+ init_mm.end_data = (unsigned long) _edata;
+ init_mm.brk = (unsigned long) _end;
+
+ setup_memory();
+ store_permanent_mappings();
+ setup_bootmem_allocator();
+
+ /*
+ * NOTE: before this point _nobody_ is allowed to allocate
+ * any memory using the bootmem allocator.
+ */
+
+ paging_init();
+ setup_numa_mapping();
+ zone_sizes_init();
+ set_page_homes();
+ setup_cpu(1);
+ setup_clock();
+ load_hv_initrd();
+}
+
+
+/*
+ * Set up per-cpu memory.
+ */
+
+unsigned long __per_cpu_offset[NR_CPUS] __write_once;
+EXPORT_SYMBOL(__per_cpu_offset);
+
+static size_t __initdata pfn_offset[MAX_NUMNODES] = { 0 };
+static unsigned long __initdata percpu_pfn[NR_CPUS] = { 0 };
+
+/*
+ * As the percpu code allocates pages, we return the pages from the
+ * end of the node for the specified cpu.
+ */
+static void *__init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align)
+{
+ int nid = cpu_to_node(cpu);
+ unsigned long pfn = node_percpu_pfn[nid] + pfn_offset[nid];
+
+ BUG_ON(size % PAGE_SIZE != 0);
+ pfn_offset[nid] += size / PAGE_SIZE;
+ BUG_ON(node_percpu[nid] < size);
+ node_percpu[nid] -= size;
+ if (percpu_pfn[cpu] == 0)
+ percpu_pfn[cpu] = pfn;
+ return pfn_to_kaddr(pfn);
+}
+
+/*
+ * Pages reserved for percpu memory are not freeable, and in any case we are
+ * on a short path to panic() in setup_per_cpu_area() at this point anyway.
+ */
+static void __init pcpu_fc_free(void *ptr, size_t size)
+{
+}
+
+/*
+ * Set up vmalloc page tables using bootmem for the percpu code.
+ */
+static void __init pcpu_fc_populate_pte(unsigned long addr)
+{
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+
+ BUG_ON(pgd_addr_invalid(addr));
+ if (addr < VMALLOC_START || addr >= VMALLOC_END)
+ panic("PCPU addr %#lx outside vmalloc range %#lx..%#lx;"
+ " try increasing CONFIG_VMALLOC_RESERVE\n",
+ addr, VMALLOC_START, VMALLOC_END);
+
+ pgd = swapper_pg_dir + pgd_index(addr);
+ pud = pud_offset(pgd, addr);
+ BUG_ON(!pud_present(*pud));
+ pmd = pmd_offset(pud, addr);
+ if (pmd_present(*pmd)) {
+ BUG_ON(pmd_huge_page(*pmd));
+ } else {
+ pte = __alloc_bootmem(L2_KERNEL_PGTABLE_SIZE,
+ HV_PAGE_TABLE_ALIGN, 0);
+ pmd_populate_kernel(&init_mm, pmd, pte);
+ }
+}
+
+void __init setup_per_cpu_areas(void)
+{
+ struct page *pg;
+ unsigned long delta, pfn, lowmem_va;
+ unsigned long size = percpu_size();
+ char *ptr;
+ int rc, cpu, i;
+
+ rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE, pcpu_fc_alloc,
+ pcpu_fc_free, pcpu_fc_populate_pte);
+ if (rc < 0)
+ panic("Cannot initialize percpu area (err=%d)", rc);
+
+ delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
+ for_each_possible_cpu(cpu) {
+ __per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu];
+
+ /* finv the copy out of cache so we can change homecache */
+ ptr = pcpu_base_addr + pcpu_unit_offsets[cpu];
+ __finv_buffer(ptr, size);
+ pfn = percpu_pfn[cpu];
+
+ /* Rewrite the page tables to cache on that cpu */
+ pg = pfn_to_page(pfn);
+ for (i = 0; i < size; i += PAGE_SIZE, ++pfn, ++pg) {
+
+ /* Update the vmalloc mapping and page home. */
+ pte_t *ptep =
+ virt_to_pte(NULL, (unsigned long)ptr + i);
+ pte_t pte = *ptep;
+ BUG_ON(pfn != pte_pfn(pte));
+ pte = hv_pte_set_mode(pte, HV_PTE_MODE_CACHE_TILE_L3);
+ pte = set_remote_cache_cpu(pte, cpu);
+ set_pte(ptep, pte);
+
+ /* Update the lowmem mapping for consistency. */
+ lowmem_va = (unsigned long)pfn_to_kaddr(pfn);
+ ptep = virt_to_pte(NULL, lowmem_va);
+ if (pte_huge(*ptep)) {
+ printk(KERN_DEBUG "early shatter of huge page"
+ " at %#lx\n", lowmem_va);
+ shatter_pmd((pmd_t *)ptep);
+ ptep = virt_to_pte(NULL, lowmem_va);
+ BUG_ON(pte_huge(*ptep));
+ }
+ BUG_ON(pfn != pte_pfn(*ptep));
+ set_pte(ptep, pte);
+ }
+ }
+
+ /* Set our thread pointer appropriately. */
+ set_my_cpu_offset(__per_cpu_offset[smp_processor_id()]);
+
+ /* Make sure the finv's have completed. */
+ mb_incoherent();
+
+ /* Flush the TLB so we reference it properly from here on out. */
+ local_flush_tlb_all();
+}
+
+static struct resource data_resource = {
+ .name = "Kernel data",
+ .start = 0,
+ .end = 0,
+ .flags = IORESOURCE_BUSY | IORESOURCE_MEM
+};
+
+static struct resource code_resource = {
+ .name = "Kernel code",
+ .start = 0,
+ .end = 0,
+ .flags = IORESOURCE_BUSY | IORESOURCE_MEM
+};
+
+/*
+ * We reserve all resources above 4GB so that PCI won't try to put
+ * mappings above 4GB; the standard allows that for some devices but
+ * the probing code trunates values to 32 bits.
+ */
+#ifdef CONFIG_PCI
+static struct resource* __init
+insert_non_bus_resource(void)
+{
+ struct resource *res =
+ kzalloc(sizeof(struct resource), GFP_ATOMIC);
+ res->name = "Non-Bus Physical Address Space";
+ res->start = (1ULL << 32);
+ res->end = -1LL;
+ res->flags = IORESOURCE_BUSY | IORESOURCE_MEM;
+ if (insert_resource(&iomem_resource, res)) {
+ kfree(res);
+ return NULL;
+ }
+ return res;
+}
+#endif
+
+static struct resource* __init
+insert_ram_resource(u64 start_pfn, u64 end_pfn)
+{
+ struct resource *res =
+ kzalloc(sizeof(struct resource), GFP_ATOMIC);
+ res->name = "System RAM";
+ res->start = start_pfn << PAGE_SHIFT;
+ res->end = (end_pfn << PAGE_SHIFT) - 1;
+ res->flags = IORESOURCE_BUSY | IORESOURCE_MEM;
+ if (insert_resource(&iomem_resource, res)) {
+ kfree(res);
+ return NULL;
+ }
+ return res;
+}
+
+/*
+ * Request address space for all standard resources
+ *
+ * If the system includes PCI root complex drivers, we need to create
+ * a window just below 4GB where PCI BARs can be mapped.
+ */
+static int __init request_standard_resources(void)
+{
+ int i;
+ enum { CODE_DELTA = MEM_SV_INTRPT - PAGE_OFFSET };
+
+ iomem_resource.end = -1LL;
+#ifdef CONFIG_PCI
+ insert_non_bus_resource();
+#endif
+
+ for_each_online_node(i) {
+ u64 start_pfn = node_start_pfn[i];
+ u64 end_pfn = node_end_pfn[i];
+
+#ifdef CONFIG_PCI
+ if (start_pfn <= pci_reserve_start_pfn &&
+ end_pfn > pci_reserve_start_pfn) {
+ if (end_pfn > pci_reserve_end_pfn)
+ insert_ram_resource(pci_reserve_end_pfn,
+ end_pfn);
+ end_pfn = pci_reserve_start_pfn;
+ }
+#endif
+ insert_ram_resource(start_pfn, end_pfn);
+ }
+
+ code_resource.start = __pa(_text - CODE_DELTA);
+ code_resource.end = __pa(_etext - CODE_DELTA)-1;
+ data_resource.start = __pa(_sdata);
+ data_resource.end = __pa(_end)-1;
+
+ insert_resource(&iomem_resource, &code_resource);
+ insert_resource(&iomem_resource, &data_resource);
+
+#ifdef CONFIG_KEXEC
+ insert_resource(&iomem_resource, &crashk_res);
+#endif
+
+ return 0;
+}
+
+subsys_initcall(request_standard_resources);
diff --git a/arch/tile/kernel/signal.c b/arch/tile/kernel/signal.c
new file mode 100644
index 00000000..f79d4b88
--- /dev/null
+++ b/arch/tile/kernel/signal.c
@@ -0,0 +1,476 @@
+/*
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/kernel.h>
+#include <linux/signal.h>
+#include <linux/errno.h>
+#include <linux/wait.h>
+#include <linux/unistd.h>
+#include <linux/stddef.h>
+#include <linux/personality.h>
+#include <linux/suspend.h>
+#include <linux/ptrace.h>
+#include <linux/elf.h>
+#include <linux/compat.h>
+#include <linux/syscalls.h>
+#include <linux/uaccess.h>
+#include <asm/processor.h>
+#include <asm/ucontext.h>
+#include <asm/sigframe.h>
+#include <asm/syscalls.h>
+#include <arch/interrupts.h>
+
+#define DEBUG_SIG 0
+
+#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
+
+SYSCALL_DEFINE3(sigaltstack, const stack_t __user *, uss,
+ stack_t __user *, uoss, struct pt_regs *, regs)
+{
+ return do_sigaltstack(uss, uoss, regs->sp);
+}
+
+
+/*
+ * Do a signal return; undo the signal stack.
+ */
+
+int restore_sigcontext(struct pt_regs *regs,
+ struct sigcontext __user *sc)
+{
+ int err = 0;
+ int i;
+
+ /* Always make any pending restarted system calls return -EINTR */
+ current_thread_info()->restart_block.fn = do_no_restart_syscall;
+
+ /*
+ * Enforce that sigcontext is like pt_regs, and doesn't mess
+ * up our stack alignment rules.
+ */
+ BUILD_BUG_ON(sizeof(struct sigcontext) != sizeof(struct pt_regs));
+ BUILD_BUG_ON(sizeof(struct sigcontext) % 8 != 0);
+
+ for (i = 0; i < sizeof(struct pt_regs)/sizeof(long); ++i)
+ err |= __get_user(regs->regs[i], &sc->gregs[i]);
+
+ /* Ensure that the PL is always set to USER_PL. */
+ regs->ex1 = PL_ICS_EX1(USER_PL, EX1_ICS(regs->ex1));
+
+ regs->faultnum = INT_SWINT_1_SIGRETURN;
+
+ return err;
+}
+
+void signal_fault(const char *type, struct pt_regs *regs,
+ void __user *frame, int sig)
+{
+ trace_unhandled_signal(type, regs, (unsigned long)frame, SIGSEGV);
+ force_sigsegv(sig, current);
+}
+
+/* The assembly shim for this function arranges to ignore the return value. */
+SYSCALL_DEFINE1(rt_sigreturn, struct pt_regs *, regs)
+{
+ struct rt_sigframe __user *frame =
+ (struct rt_sigframe __user *)(regs->sp);
+ sigset_t set;
+
+ if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
+ goto badframe;
+ if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
+ goto badframe;
+
+ sigdelsetmask(&set, ~_BLOCKABLE);
+ set_current_blocked(&set);
+
+ if (restore_sigcontext(regs, &frame->uc.uc_mcontext))
+ goto badframe;
+
+ if (do_sigaltstack(&frame->uc.uc_stack, NULL, regs->sp) == -EFAULT)
+ goto badframe;
+
+ return 0;
+
+badframe:
+ signal_fault("bad sigreturn frame", regs, frame, 0);
+ return 0;
+}
+
+/*
+ * Set up a signal frame.
+ */
+
+int setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs)
+{
+ int i, err = 0;
+
+ for (i = 0; i < sizeof(struct pt_regs)/sizeof(long); ++i)
+ err |= __put_user(regs->regs[i], &sc->gregs[i]);
+
+ return err;
+}
+
+/*
+ * Determine which stack to use..
+ */
+static inline void __user *get_sigframe(struct k_sigaction *ka,
+ struct pt_regs *regs,
+ size_t frame_size)
+{
+ unsigned long sp;
+
+ /* Default to using normal stack */
+ sp = regs->sp;
+
+ /*
+ * If we are on the alternate signal stack and would overflow
+ * it, don't. Return an always-bogus address instead so we
+ * will die with SIGSEGV.
+ */
+ if (on_sig_stack(sp) && !likely(on_sig_stack(sp - frame_size)))
+ return (void __user __force *)-1UL;
+
+ /* This is the X/Open sanctioned signal stack switching. */
+ if (ka->sa.sa_flags & SA_ONSTACK) {
+ if (sas_ss_flags(sp) == 0)
+ sp = current->sas_ss_sp + current->sas_ss_size;
+ }
+
+ sp -= frame_size;
+ /*
+ * Align the stack pointer according to the TILE ABI,
+ * i.e. so that on function entry (sp & 15) == 0.
+ */
+ sp &= -16UL;
+ return (void __user *) sp;
+}
+
+static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
+ sigset_t *set, struct pt_regs *regs)
+{
+ unsigned long restorer;
+ struct rt_sigframe __user *frame;
+ int err = 0;
+ int usig;
+
+ frame = get_sigframe(ka, regs, sizeof(*frame));
+
+ if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
+ goto give_sigsegv;
+
+ usig = current_thread_info()->exec_domain
+ && current_thread_info()->exec_domain->signal_invmap
+ && sig < 32
+ ? current_thread_info()->exec_domain->signal_invmap[sig]
+ : sig;
+
+ /* Always write at least the signal number for the stack backtracer. */
+ if (ka->sa.sa_flags & SA_SIGINFO) {
+ /* At sigreturn time, restore the callee-save registers too. */
+ err |= copy_siginfo_to_user(&frame->info, info);
+ regs->flags |= PT_FLAGS_RESTORE_REGS;
+ } else {
+ err |= __put_user(info->si_signo, &frame->info.si_signo);
+ }
+
+ /* Create the ucontext. */
+ err |= __clear_user(&frame->save_area, sizeof(frame->save_area));
+ err |= __put_user(0, &frame->uc.uc_flags);
+ err |= __put_user(NULL, &frame->uc.uc_link);
+ err |= __put_user((void __user *)(current->sas_ss_sp),
+ &frame->uc.uc_stack.ss_sp);
+ err |= __put_user(sas_ss_flags(regs->sp),
+ &frame->uc.uc_stack.ss_flags);
+ err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
+ err |= setup_sigcontext(&frame->uc.uc_mcontext, regs);
+ err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
+ if (err)
+ goto give_sigsegv;
+
+ restorer = VDSO_BASE;
+ if (ka->sa.sa_flags & SA_RESTORER)
+ restorer = (unsigned long) ka->sa.sa_restorer;
+
+ /*
+ * Set up registers for signal handler.
+ * Registers that we don't modify keep the value they had from
+ * user-space at the time we took the signal.
+ * We always pass siginfo and mcontext, regardless of SA_SIGINFO,
+ * since some things rely on this (e.g. glibc's debug/segfault.c).
+ */
+ regs->pc = (unsigned long) ka->sa.sa_handler;
+ regs->ex1 = PL_ICS_EX1(USER_PL, 1); /* set crit sec in handler */
+ regs->sp = (unsigned long) frame;
+ regs->lr = restorer;
+ regs->regs[0] = (unsigned long) usig;
+ regs->regs[1] = (unsigned long) &frame->info;
+ regs->regs[2] = (unsigned long) &frame->uc;
+ regs->flags |= PT_FLAGS_CALLER_SAVES;
+
+ /*
+ * Notify any tracer that was single-stepping it.
+ * The tracer may want to single-step inside the
+ * handler too.
+ */
+ if (test_thread_flag(TIF_SINGLESTEP))
+ ptrace_notify(SIGTRAP);
+
+ return 0;
+
+give_sigsegv:
+ signal_fault("bad setup frame", regs, frame, sig);
+ return -EFAULT;
+}
+
+/*
+ * OK, we're invoking a handler
+ */
+
+static int handle_signal(unsigned long sig, siginfo_t *info,
+ struct k_sigaction *ka, sigset_t *oldset,
+ struct pt_regs *regs)
+{
+ int ret;
+
+ /* Are we from a system call? */
+ if (regs->faultnum == INT_SWINT_1) {
+ /* If so, check system call restarting.. */
+ switch (regs->regs[0]) {
+ case -ERESTART_RESTARTBLOCK:
+ case -ERESTARTNOHAND:
+ regs->regs[0] = -EINTR;
+ break;
+
+ case -ERESTARTSYS:
+ if (!(ka->sa.sa_flags & SA_RESTART)) {
+ regs->regs[0] = -EINTR;
+ break;
+ }
+ /* fallthrough */
+ case -ERESTARTNOINTR:
+ /* Reload caller-saves to restore r0..r5 and r10. */
+ regs->flags |= PT_FLAGS_CALLER_SAVES;
+ regs->regs[0] = regs->orig_r0;
+ regs->pc -= 8;
+ }
+ }
+
+ /* Set up the stack frame */
+#ifdef CONFIG_COMPAT
+ if (is_compat_task())
+ ret = compat_setup_rt_frame(sig, ka, info, oldset, regs);
+ else
+#endif
+ ret = setup_rt_frame(sig, ka, info, oldset, regs);
+ if (ret == 0) {
+ /* This code is only called from system calls or from
+ * the work_pending path in the return-to-user code, and
+ * either way we can re-enable interrupts unconditionally.
+ */
+ block_sigmask(ka, sig);
+ }
+
+ return ret;
+}
+
+/*
+ * Note that 'init' is a special process: it doesn't get signals it doesn't
+ * want to handle. Thus you cannot kill init even with a SIGKILL even by
+ * mistake.
+ */
+void do_signal(struct pt_regs *regs)
+{
+ siginfo_t info;
+ int signr;
+ struct k_sigaction ka;
+ sigset_t *oldset;
+
+ /*
+ * i386 will check if we're coming from kernel mode and bail out
+ * here. In my experience this just turns weird crashes into
+ * weird spin-hangs. But if we find a case where this seems
+ * helpful, we can reinstate the check on "!user_mode(regs)".
+ */
+
+ if (current_thread_info()->status & TS_RESTORE_SIGMASK)
+ oldset = &current->saved_sigmask;
+ else
+ oldset = &current->blocked;
+
+ signr = get_signal_to_deliver(&info, &ka, regs, NULL);
+ if (signr > 0) {
+ /* Whee! Actually deliver the signal. */
+ if (handle_signal(signr, &info, &ka, oldset, regs) == 0) {
+ /*
+ * A signal was successfully delivered; the saved
+ * sigmask will have been stored in the signal frame,
+ * and will be restored by sigreturn, so we can simply
+ * clear the TS_RESTORE_SIGMASK flag.
+ */
+ current_thread_info()->status &= ~TS_RESTORE_SIGMASK;
+ }
+
+ goto done;
+ }
+
+ /* Did we come from a system call? */
+ if (regs->faultnum == INT_SWINT_1) {
+ /* Restart the system call - no handlers present */
+ switch (regs->regs[0]) {
+ case -ERESTARTNOHAND:
+ case -ERESTARTSYS:
+ case -ERESTARTNOINTR:
+ regs->flags |= PT_FLAGS_CALLER_SAVES;
+ regs->regs[0] = regs->orig_r0;
+ regs->pc -= 8;
+ break;
+
+ case -ERESTART_RESTARTBLOCK:
+ regs->flags |= PT_FLAGS_CALLER_SAVES;
+ regs->regs[TREG_SYSCALL_NR] = __NR_restart_syscall;
+ regs->pc -= 8;
+ break;
+ }
+ }
+
+ /* If there's no signal to deliver, just put the saved sigmask back. */
+ if (current_thread_info()->status & TS_RESTORE_SIGMASK) {
+ current_thread_info()->status &= ~TS_RESTORE_SIGMASK;
+ sigprocmask(SIG_SETMASK, &current->saved_sigmask, NULL);
+ }
+
+done:
+ /* Avoid double syscall restart if there are nested signals. */
+ regs->faultnum = INT_SWINT_1_SIGRETURN;
+}
+
+int show_unhandled_signals = 1;
+
+static int __init crashinfo(char *str)
+{
+ unsigned long val;
+ const char *word;
+
+ if (*str == '\0')
+ val = 2;
+ else if (*str != '=' || strict_strtoul(++str, 0, &val) != 0)
+ return 0;
+ show_unhandled_signals = val;
+ switch (show_unhandled_signals) {
+ case 0:
+ word = "No";
+ break;
+ case 1:
+ word = "One-line";
+ break;
+ default:
+ word = "Detailed";
+ break;
+ }
+ pr_info("%s crash reports will be generated on the console\n", word);
+ return 1;
+}
+__setup("crashinfo", crashinfo);
+
+static void dump_mem(void __user *address)
+{
+ void __user *addr;
+ enum { region_size = 256, bytes_per_line = 16 };
+ int i, j, k;
+ int found_readable_mem = 0;
+
+ pr_err("\n");
+ if (!access_ok(VERIFY_READ, address, 1)) {
+ pr_err("Not dumping at address 0x%lx (kernel address)\n",
+ (unsigned long)address);
+ return;
+ }
+
+ addr = (void __user *)
+ (((unsigned long)address & -bytes_per_line) - region_size/2);
+ if (addr > address)
+ addr = NULL;
+ for (i = 0; i < region_size;
+ addr += bytes_per_line, i += bytes_per_line) {
+ unsigned char buf[bytes_per_line];
+ char line[100];
+ if (copy_from_user(buf, addr, bytes_per_line))
+ continue;
+ if (!found_readable_mem) {
+ pr_err("Dumping memory around address 0x%lx:\n",
+ (unsigned long)address);
+ found_readable_mem = 1;
+ }
+ j = sprintf(line, REGFMT":", (unsigned long)addr);
+ for (k = 0; k < bytes_per_line; ++k)
+ j += sprintf(&line[j], " %02x", buf[k]);
+ pr_err("%s\n", line);
+ }
+ if (!found_readable_mem)
+ pr_err("No readable memory around address 0x%lx\n",
+ (unsigned long)address);
+}
+
+void trace_unhandled_signal(const char *type, struct pt_regs *regs,
+ unsigned long address, int sig)
+{
+ struct task_struct *tsk = current;
+
+ if (show_unhandled_signals == 0)
+ return;
+
+ /* If the signal is handled, don't show it here. */
+ if (!is_global_init(tsk)) {
+ void __user *handler =
+ tsk->sighand->action[sig-1].sa.sa_handler;
+ if (handler != SIG_IGN && handler != SIG_DFL)
+ return;
+ }
+
+ /* Rate-limit the one-line output, not the detailed output. */
+ if (show_unhandled_signals <= 1 && !printk_ratelimit())
+ return;
+
+ printk("%s%s[%d]: %s at %lx pc "REGFMT" signal %d",
+ task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
+ tsk->comm, task_pid_nr(tsk), type, address, regs->pc, sig);
+
+ print_vma_addr(KERN_CONT " in ", regs->pc);
+
+ printk(KERN_CONT "\n");
+
+ if (show_unhandled_signals > 1) {
+ switch (sig) {
+ case SIGILL:
+ case SIGFPE:
+ case SIGSEGV:
+ case SIGBUS:
+ pr_err("User crash: signal %d,"
+ " trap %ld, address 0x%lx\n",
+ sig, regs->faultnum, address);
+ show_regs(regs);
+ dump_mem((void __user *)address);
+ break;
+ default:
+ pr_err("User crash: signal %d, trap %ld\n",
+ sig, regs->faultnum);
+ break;
+ }
+ }
+}
diff --git a/arch/tile/kernel/single_step.c b/arch/tile/kernel/single_step.c
new file mode 100644
index 00000000..89529c9f
--- /dev/null
+++ b/arch/tile/kernel/single_step.c
@@ -0,0 +1,768 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ *
+ * A code-rewriter that enables instruction single-stepping.
+ * Derived from iLib's single-stepping code.
+ */
+
+#ifndef __tilegx__ /* Hardware support for single step unavailable. */
+
+/* These functions are only used on the TILE platform */
+#include <linux/slab.h>
+#include <linux/thread_info.h>
+#include <linux/uaccess.h>
+#include <linux/mman.h>
+#include <linux/types.h>
+#include <linux/err.h>
+#include <asm/cacheflush.h>
+#include <asm/unaligned.h>
+#include <arch/abi.h>
+#include <arch/opcode.h>
+
+#define signExtend17(val) sign_extend((val), 17)
+#define TILE_X1_MASK (0xffffffffULL << 31)
+
+int unaligned_printk;
+
+static int __init setup_unaligned_printk(char *str)
+{
+ long val;
+ if (strict_strtol(str, 0, &val) != 0)
+ return 0;
+ unaligned_printk = val;
+ pr_info("Printk for each unaligned data accesses is %s\n",
+ unaligned_printk ? "enabled" : "disabled");
+ return 1;
+}
+__setup("unaligned_printk=", setup_unaligned_printk);
+
+unsigned int unaligned_fixup_count;
+
+enum mem_op {
+ MEMOP_NONE,
+ MEMOP_LOAD,
+ MEMOP_STORE,
+ MEMOP_LOAD_POSTINCR,
+ MEMOP_STORE_POSTINCR
+};
+
+static inline tile_bundle_bits set_BrOff_X1(tile_bundle_bits n, s32 offset)
+{
+ tile_bundle_bits result;
+
+ /* mask out the old offset */
+ tile_bundle_bits mask = create_BrOff_X1(-1);
+ result = n & (~mask);
+
+ /* or in the new offset */
+ result |= create_BrOff_X1(offset);
+
+ return result;
+}
+
+static inline tile_bundle_bits move_X1(tile_bundle_bits n, int dest, int src)
+{
+ tile_bundle_bits result;
+ tile_bundle_bits op;
+
+ result = n & (~TILE_X1_MASK);
+
+ op = create_Opcode_X1(SPECIAL_0_OPCODE_X1) |
+ create_RRROpcodeExtension_X1(OR_SPECIAL_0_OPCODE_X1) |
+ create_Dest_X1(dest) |
+ create_SrcB_X1(TREG_ZERO) |
+ create_SrcA_X1(src) ;
+
+ result |= op;
+ return result;
+}
+
+static inline tile_bundle_bits nop_X1(tile_bundle_bits n)
+{
+ return move_X1(n, TREG_ZERO, TREG_ZERO);
+}
+
+static inline tile_bundle_bits addi_X1(
+ tile_bundle_bits n, int dest, int src, int imm)
+{
+ n &= ~TILE_X1_MASK;
+
+ n |= (create_SrcA_X1(src) |
+ create_Dest_X1(dest) |
+ create_Imm8_X1(imm) |
+ create_S_X1(0) |
+ create_Opcode_X1(IMM_0_OPCODE_X1) |
+ create_ImmOpcodeExtension_X1(ADDI_IMM_0_OPCODE_X1));
+
+ return n;
+}
+
+static tile_bundle_bits rewrite_load_store_unaligned(
+ struct single_step_state *state,
+ tile_bundle_bits bundle,
+ struct pt_regs *regs,
+ enum mem_op mem_op,
+ int size, int sign_ext)
+{
+ unsigned char __user *addr;
+ int val_reg, addr_reg, err, val;
+
+ /* Get address and value registers */
+ if (bundle & TILEPRO_BUNDLE_Y_ENCODING_MASK) {
+ addr_reg = get_SrcA_Y2(bundle);
+ val_reg = get_SrcBDest_Y2(bundle);
+ } else if (mem_op == MEMOP_LOAD || mem_op == MEMOP_LOAD_POSTINCR) {
+ addr_reg = get_SrcA_X1(bundle);
+ val_reg = get_Dest_X1(bundle);
+ } else {
+ addr_reg = get_SrcA_X1(bundle);
+ val_reg = get_SrcB_X1(bundle);
+ }
+
+ /*
+ * If registers are not GPRs, don't try to handle it.
+ *
+ * FIXME: we could handle non-GPR loads by getting the real value
+ * from memory, writing it to the single step buffer, using a
+ * temp_reg to hold a pointer to that memory, then executing that
+ * instruction and resetting temp_reg. For non-GPR stores, it's a
+ * little trickier; we could use the single step buffer for that
+ * too, but we'd have to add some more state bits so that we could
+ * call back in here to copy that value to the real target. For
+ * now, we just handle the simple case.
+ */
+ if ((val_reg >= PTREGS_NR_GPRS &&
+ (val_reg != TREG_ZERO ||
+ mem_op == MEMOP_LOAD ||
+ mem_op == MEMOP_LOAD_POSTINCR)) ||
+ addr_reg >= PTREGS_NR_GPRS)
+ return bundle;
+
+ /* If it's aligned, don't handle it specially */
+ addr = (void __user *)regs->regs[addr_reg];
+ if (((unsigned long)addr % size) == 0)
+ return bundle;
+
+ /*
+ * Return SIGBUS with the unaligned address, if requested.
+ * Note that we return SIGBUS even for completely invalid addresses
+ * as long as they are in fact unaligned; this matches what the
+ * tilepro hardware would be doing, if it could provide us with the
+ * actual bad address in an SPR, which it doesn't.
+ */
+ if (unaligned_fixup == 0) {
+ siginfo_t info = {
+ .si_signo = SIGBUS,
+ .si_code = BUS_ADRALN,
+ .si_addr = addr
+ };
+ trace_unhandled_signal("unaligned trap", regs,
+ (unsigned long)addr, SIGBUS);
+ force_sig_info(info.si_signo, &info, current);
+ return (tilepro_bundle_bits) 0;
+ }
+
+#ifndef __LITTLE_ENDIAN
+# error We assume little-endian representation with copy_xx_user size 2 here
+#endif
+ /* Handle unaligned load/store */
+ if (mem_op == MEMOP_LOAD || mem_op == MEMOP_LOAD_POSTINCR) {
+ unsigned short val_16;
+ switch (size) {
+ case 2:
+ err = copy_from_user(&val_16, addr, sizeof(val_16));
+ val = sign_ext ? ((short)val_16) : val_16;
+ break;
+ case 4:
+ err = copy_from_user(&val, addr, sizeof(val));
+ break;
+ default:
+ BUG();
+ }
+ if (err == 0) {
+ state->update_reg = val_reg;
+ state->update_value = val;
+ state->update = 1;
+ }
+ } else {
+ val = (val_reg == TREG_ZERO) ? 0 : regs->regs[val_reg];
+ err = copy_to_user(addr, &val, size);
+ }
+
+ if (err) {
+ siginfo_t info = {
+ .si_signo = SIGSEGV,
+ .si_code = SEGV_MAPERR,
+ .si_addr = addr
+ };
+ trace_unhandled_signal("segfault", regs,
+ (unsigned long)addr, SIGSEGV);
+ force_sig_info(info.si_signo, &info, current);
+ return (tile_bundle_bits) 0;
+ }
+
+ if (unaligned_printk || unaligned_fixup_count == 0) {
+ pr_info("Process %d/%s: PC %#lx: Fixup of"
+ " unaligned %s at %#lx.\n",
+ current->pid, current->comm, regs->pc,
+ (mem_op == MEMOP_LOAD ||
+ mem_op == MEMOP_LOAD_POSTINCR) ?
+ "load" : "store",
+ (unsigned long)addr);
+ if (!unaligned_printk) {
+#define P pr_info
+P("\n");
+P("Unaligned fixups in the kernel will slow your application considerably.\n");
+P("To find them, write a \"1\" to /proc/sys/tile/unaligned_fixup/printk,\n");
+P("which requests the kernel show all unaligned fixups, or write a \"0\"\n");
+P("to /proc/sys/tile/unaligned_fixup/enabled, in which case each unaligned\n");
+P("access will become a SIGBUS you can debug. No further warnings will be\n");
+P("shown so as to avoid additional slowdown, but you can track the number\n");
+P("of fixups performed via /proc/sys/tile/unaligned_fixup/count.\n");
+P("Use the tile-addr2line command (see \"info addr2line\") to decode PCs.\n");
+P("\n");
+#undef P
+ }
+ }
+ ++unaligned_fixup_count;
+
+ if (bundle & TILEPRO_BUNDLE_Y_ENCODING_MASK) {
+ /* Convert the Y2 instruction to a prefetch. */
+ bundle &= ~(create_SrcBDest_Y2(-1) |
+ create_Opcode_Y2(-1));
+ bundle |= (create_SrcBDest_Y2(TREG_ZERO) |
+ create_Opcode_Y2(LW_OPCODE_Y2));
+ /* Replace the load postincr with an addi */
+ } else if (mem_op == MEMOP_LOAD_POSTINCR) {
+ bundle = addi_X1(bundle, addr_reg, addr_reg,
+ get_Imm8_X1(bundle));
+ /* Replace the store postincr with an addi */
+ } else if (mem_op == MEMOP_STORE_POSTINCR) {
+ bundle = addi_X1(bundle, addr_reg, addr_reg,
+ get_Dest_Imm8_X1(bundle));
+ } else {
+ /* Convert the X1 instruction to a nop. */
+ bundle &= ~(create_Opcode_X1(-1) |
+ create_UnShOpcodeExtension_X1(-1) |
+ create_UnOpcodeExtension_X1(-1));
+ bundle |= (create_Opcode_X1(SHUN_0_OPCODE_X1) |
+ create_UnShOpcodeExtension_X1(
+ UN_0_SHUN_0_OPCODE_X1) |
+ create_UnOpcodeExtension_X1(
+ NOP_UN_0_SHUN_0_OPCODE_X1));
+ }
+
+ return bundle;
+}
+
+/*
+ * Called after execve() has started the new image. This allows us
+ * to reset the info state. Note that the the mmap'ed memory, if there
+ * was any, has already been unmapped by the exec.
+ */
+void single_step_execve(void)
+{
+ struct thread_info *ti = current_thread_info();
+ kfree(ti->step_state);
+ ti->step_state = NULL;
+}
+
+/**
+ * single_step_once() - entry point when single stepping has been triggered.
+ * @regs: The machine register state
+ *
+ * When we arrive at this routine via a trampoline, the single step
+ * engine copies the executing bundle to the single step buffer.
+ * If the instruction is a condition branch, then the target is
+ * reset to one past the next instruction. If the instruction
+ * sets the lr, then that is noted. If the instruction is a jump
+ * or call, then the new target pc is preserved and the current
+ * bundle instruction set to null.
+ *
+ * The necessary post-single-step rewriting information is stored in
+ * single_step_state-> We use data segment values because the
+ * stack will be rewound when we run the rewritten single-stepped
+ * instruction.
+ */
+void single_step_once(struct pt_regs *regs)
+{
+ extern tile_bundle_bits __single_step_ill_insn;
+ extern tile_bundle_bits __single_step_j_insn;
+ extern tile_bundle_bits __single_step_addli_insn;
+ extern tile_bundle_bits __single_step_auli_insn;
+ struct thread_info *info = (void *)current_thread_info();
+ struct single_step_state *state = info->step_state;
+ int is_single_step = test_ti_thread_flag(info, TIF_SINGLESTEP);
+ tile_bundle_bits __user *buffer, *pc;
+ tile_bundle_bits bundle;
+ int temp_reg;
+ int target_reg = TREG_LR;
+ int err;
+ enum mem_op mem_op = MEMOP_NONE;
+ int size = 0, sign_ext = 0; /* happy compiler */
+
+ asm(
+" .pushsection .rodata.single_step\n"
+" .align 8\n"
+" .globl __single_step_ill_insn\n"
+"__single_step_ill_insn:\n"
+" ill\n"
+" .globl __single_step_addli_insn\n"
+"__single_step_addli_insn:\n"
+" { nop; addli r0, zero, 0 }\n"
+" .globl __single_step_auli_insn\n"
+"__single_step_auli_insn:\n"
+" { nop; auli r0, r0, 0 }\n"
+" .globl __single_step_j_insn\n"
+"__single_step_j_insn:\n"
+" j .\n"
+" .popsection\n"
+ );
+
+ /*
+ * Enable interrupts here to allow touching userspace and the like.
+ * The callers expect this: do_trap() already has interrupts
+ * enabled, and do_work_pending() handles functions that enable
+ * interrupts internally.
+ */
+ local_irq_enable();
+
+ if (state == NULL) {
+ /* allocate a page of writable, executable memory */
+ state = kmalloc(sizeof(struct single_step_state), GFP_KERNEL);
+ if (state == NULL) {
+ pr_err("Out of kernel memory trying to single-step\n");
+ return;
+ }
+
+ /* allocate a cache line of writable, executable memory */
+ buffer = (void __user *) vm_mmap(NULL, 0, 64,
+ PROT_EXEC | PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ 0);
+
+ if (IS_ERR((void __force *)buffer)) {
+ kfree(state);
+ pr_err("Out of kernel pages trying to single-step\n");
+ return;
+ }
+
+ state->buffer = buffer;
+ state->is_enabled = 0;
+
+ info->step_state = state;
+
+ /* Validate our stored instruction patterns */
+ BUG_ON(get_Opcode_X1(__single_step_addli_insn) !=
+ ADDLI_OPCODE_X1);
+ BUG_ON(get_Opcode_X1(__single_step_auli_insn) !=
+ AULI_OPCODE_X1);
+ BUG_ON(get_SrcA_X1(__single_step_addli_insn) != TREG_ZERO);
+ BUG_ON(get_Dest_X1(__single_step_addli_insn) != 0);
+ BUG_ON(get_JOffLong_X1(__single_step_j_insn) != 0);
+ }
+
+ /*
+ * If we are returning from a syscall, we still haven't hit the
+ * "ill" for the swint1 instruction. So back the PC up to be
+ * pointing at the swint1, but we'll actually return directly
+ * back to the "ill" so we come back in via SIGILL as if we
+ * had "executed" the swint1 without ever being in kernel space.
+ */
+ if (regs->faultnum == INT_SWINT_1)
+ regs->pc -= 8;
+
+ pc = (tile_bundle_bits __user *)(regs->pc);
+ if (get_user(bundle, pc) != 0) {
+ pr_err("Couldn't read instruction at %p trying to step\n", pc);
+ return;
+ }
+
+ /* We'll follow the instruction with 2 ill op bundles */
+ state->orig_pc = (unsigned long)pc;
+ state->next_pc = (unsigned long)(pc + 1);
+ state->branch_next_pc = 0;
+ state->update = 0;
+
+ if (!(bundle & TILEPRO_BUNDLE_Y_ENCODING_MASK)) {
+ /* two wide, check for control flow */
+ int opcode = get_Opcode_X1(bundle);
+
+ switch (opcode) {
+ /* branches */
+ case BRANCH_OPCODE_X1:
+ {
+ s32 offset = signExtend17(get_BrOff_X1(bundle));
+
+ /*
+ * For branches, we use a rewriting trick to let the
+ * hardware evaluate whether the branch is taken or
+ * untaken. We record the target offset and then
+ * rewrite the branch instruction to target 1 insn
+ * ahead if the branch is taken. We then follow the
+ * rewritten branch with two bundles, each containing
+ * an "ill" instruction. The supervisor examines the
+ * pc after the single step code is executed, and if
+ * the pc is the first ill instruction, then the
+ * branch (if any) was not taken. If the pc is the
+ * second ill instruction, then the branch was
+ * taken. The new pc is computed for these cases, and
+ * inserted into the registers for the thread. If
+ * the pc is the start of the single step code, then
+ * an exception or interrupt was taken before the
+ * code started processing, and the same "original"
+ * pc is restored. This change, different from the
+ * original implementation, has the advantage of
+ * executing a single user instruction.
+ */
+ state->branch_next_pc = (unsigned long)(pc + offset);
+
+ /* rewrite branch offset to go forward one bundle */
+ bundle = set_BrOff_X1(bundle, 2);
+ }
+ break;
+
+ /* jumps */
+ case JALB_OPCODE_X1:
+ case JALF_OPCODE_X1:
+ state->update = 1;
+ state->next_pc =
+ (unsigned long) (pc + get_JOffLong_X1(bundle));
+ break;
+
+ case JB_OPCODE_X1:
+ case JF_OPCODE_X1:
+ state->next_pc =
+ (unsigned long) (pc + get_JOffLong_X1(bundle));
+ bundle = nop_X1(bundle);
+ break;
+
+ case SPECIAL_0_OPCODE_X1:
+ switch (get_RRROpcodeExtension_X1(bundle)) {
+ /* jump-register */
+ case JALRP_SPECIAL_0_OPCODE_X1:
+ case JALR_SPECIAL_0_OPCODE_X1:
+ state->update = 1;
+ state->next_pc =
+ regs->regs[get_SrcA_X1(bundle)];
+ break;
+
+ case JRP_SPECIAL_0_OPCODE_X1:
+ case JR_SPECIAL_0_OPCODE_X1:
+ state->next_pc =
+ regs->regs[get_SrcA_X1(bundle)];
+ bundle = nop_X1(bundle);
+ break;
+
+ case LNK_SPECIAL_0_OPCODE_X1:
+ state->update = 1;
+ target_reg = get_Dest_X1(bundle);
+ break;
+
+ /* stores */
+ case SH_SPECIAL_0_OPCODE_X1:
+ mem_op = MEMOP_STORE;
+ size = 2;
+ break;
+
+ case SW_SPECIAL_0_OPCODE_X1:
+ mem_op = MEMOP_STORE;
+ size = 4;
+ break;
+ }
+ break;
+
+ /* loads and iret */
+ case SHUN_0_OPCODE_X1:
+ if (get_UnShOpcodeExtension_X1(bundle) ==
+ UN_0_SHUN_0_OPCODE_X1) {
+ switch (get_UnOpcodeExtension_X1(bundle)) {
+ case LH_UN_0_SHUN_0_OPCODE_X1:
+ mem_op = MEMOP_LOAD;
+ size = 2;
+ sign_ext = 1;
+ break;
+
+ case LH_U_UN_0_SHUN_0_OPCODE_X1:
+ mem_op = MEMOP_LOAD;
+ size = 2;
+ sign_ext = 0;
+ break;
+
+ case LW_UN_0_SHUN_0_OPCODE_X1:
+ mem_op = MEMOP_LOAD;
+ size = 4;
+ break;
+
+ case IRET_UN_0_SHUN_0_OPCODE_X1:
+ {
+ unsigned long ex0_0 = __insn_mfspr(
+ SPR_EX_CONTEXT_0_0);
+ unsigned long ex0_1 = __insn_mfspr(
+ SPR_EX_CONTEXT_0_1);
+ /*
+ * Special-case it if we're iret'ing
+ * to PL0 again. Otherwise just let
+ * it run and it will generate SIGILL.
+ */
+ if (EX1_PL(ex0_1) == USER_PL) {
+ state->next_pc = ex0_0;
+ regs->ex1 = ex0_1;
+ bundle = nop_X1(bundle);
+ }
+ }
+ }
+ }
+ break;
+
+#if CHIP_HAS_WH64()
+ /* postincrement operations */
+ case IMM_0_OPCODE_X1:
+ switch (get_ImmOpcodeExtension_X1(bundle)) {
+ case LWADD_IMM_0_OPCODE_X1:
+ mem_op = MEMOP_LOAD_POSTINCR;
+ size = 4;
+ break;
+
+ case LHADD_IMM_0_OPCODE_X1:
+ mem_op = MEMOP_LOAD_POSTINCR;
+ size = 2;
+ sign_ext = 1;
+ break;
+
+ case LHADD_U_IMM_0_OPCODE_X1:
+ mem_op = MEMOP_LOAD_POSTINCR;
+ size = 2;
+ sign_ext = 0;
+ break;
+
+ case SWADD_IMM_0_OPCODE_X1:
+ mem_op = MEMOP_STORE_POSTINCR;
+ size = 4;
+ break;
+
+ case SHADD_IMM_0_OPCODE_X1:
+ mem_op = MEMOP_STORE_POSTINCR;
+ size = 2;
+ break;
+
+ default:
+ break;
+ }
+ break;
+#endif /* CHIP_HAS_WH64() */
+ }
+
+ if (state->update) {
+ /*
+ * Get an available register. We start with a
+ * bitmask with 1's for available registers.
+ * We truncate to the low 32 registers since
+ * we are guaranteed to have set bits in the
+ * low 32 bits, then use ctz to pick the first.
+ */
+ u32 mask = (u32) ~((1ULL << get_Dest_X0(bundle)) |
+ (1ULL << get_SrcA_X0(bundle)) |
+ (1ULL << get_SrcB_X0(bundle)) |
+ (1ULL << target_reg));
+ temp_reg = __builtin_ctz(mask);
+ state->update_reg = temp_reg;
+ state->update_value = regs->regs[temp_reg];
+ regs->regs[temp_reg] = (unsigned long) (pc+1);
+ regs->flags |= PT_FLAGS_RESTORE_REGS;
+ bundle = move_X1(bundle, target_reg, temp_reg);
+ }
+ } else {
+ int opcode = get_Opcode_Y2(bundle);
+
+ switch (opcode) {
+ /* loads */
+ case LH_OPCODE_Y2:
+ mem_op = MEMOP_LOAD;
+ size = 2;
+ sign_ext = 1;
+ break;
+
+ case LH_U_OPCODE_Y2:
+ mem_op = MEMOP_LOAD;
+ size = 2;
+ sign_ext = 0;
+ break;
+
+ case LW_OPCODE_Y2:
+ mem_op = MEMOP_LOAD;
+ size = 4;
+ break;
+
+ /* stores */
+ case SH_OPCODE_Y2:
+ mem_op = MEMOP_STORE;
+ size = 2;
+ break;
+
+ case SW_OPCODE_Y2:
+ mem_op = MEMOP_STORE;
+ size = 4;
+ break;
+ }
+ }
+
+ /*
+ * Check if we need to rewrite an unaligned load/store.
+ * Returning zero is a special value meaning we need to SIGSEGV.
+ */
+ if (mem_op != MEMOP_NONE && unaligned_fixup >= 0) {
+ bundle = rewrite_load_store_unaligned(state, bundle, regs,
+ mem_op, size, sign_ext);
+ if (bundle == 0)
+ return;
+ }
+
+ /* write the bundle to our execution area */
+ buffer = state->buffer;
+ err = __put_user(bundle, buffer++);
+
+ /*
+ * If we're really single-stepping, we take an INT_ILL after.
+ * If we're just handling an unaligned access, we can just
+ * jump directly back to where we were in user code.
+ */
+ if (is_single_step) {
+ err |= __put_user(__single_step_ill_insn, buffer++);
+ err |= __put_user(__single_step_ill_insn, buffer++);
+ } else {
+ long delta;
+
+ if (state->update) {
+ /* We have some state to update; do it inline */
+ int ha16;
+ bundle = __single_step_addli_insn;
+ bundle |= create_Dest_X1(state->update_reg);
+ bundle |= create_Imm16_X1(state->update_value);
+ err |= __put_user(bundle, buffer++);
+ bundle = __single_step_auli_insn;
+ bundle |= create_Dest_X1(state->update_reg);
+ bundle |= create_SrcA_X1(state->update_reg);
+ ha16 = (state->update_value + 0x8000) >> 16;
+ bundle |= create_Imm16_X1(ha16);
+ err |= __put_user(bundle, buffer++);
+ state->update = 0;
+ }
+
+ /* End with a jump back to the next instruction */
+ delta = ((regs->pc + TILE_BUNDLE_SIZE_IN_BYTES) -
+ (unsigned long)buffer) >>
+ TILE_LOG2_BUNDLE_ALIGNMENT_IN_BYTES;
+ bundle = __single_step_j_insn;
+ bundle |= create_JOffLong_X1(delta);
+ err |= __put_user(bundle, buffer++);
+ }
+
+ if (err) {
+ pr_err("Fault when writing to single-step buffer\n");
+ return;
+ }
+
+ /*
+ * Flush the buffer.
+ * We do a local flush only, since this is a thread-specific buffer.
+ */
+ __flush_icache_range((unsigned long)state->buffer,
+ (unsigned long)buffer);
+
+ /* Indicate enabled */
+ state->is_enabled = is_single_step;
+ regs->pc = (unsigned long)state->buffer;
+
+ /* Fault immediately if we are coming back from a syscall. */
+ if (regs->faultnum == INT_SWINT_1)
+ regs->pc += 8;
+}
+
+#else
+#include <linux/smp.h>
+#include <linux/ptrace.h>
+#include <arch/spr_def.h>
+
+static DEFINE_PER_CPU(unsigned long, ss_saved_pc);
+
+
+/*
+ * Called directly on the occasion of an interrupt.
+ *
+ * If the process doesn't have single step set, then we use this as an
+ * opportunity to turn single step off.
+ *
+ * It has been mentioned that we could conditionally turn off single stepping
+ * on each entry into the kernel and rely on single_step_once to turn it
+ * on for the processes that matter (as we already do), but this
+ * implementation is somewhat more efficient in that we muck with registers
+ * once on a bum interrupt rather than on every entry into the kernel.
+ *
+ * If SINGLE_STEP_CONTROL_K has CANCELED set, then an interrupt occurred,
+ * so we have to run through this process again before we can say that an
+ * instruction has executed.
+ *
+ * swint will set CANCELED, but it's a legitimate instruction. Fortunately
+ * it changes the PC. If it hasn't changed, then we know that the interrupt
+ * wasn't generated by swint and we'll need to run this process again before
+ * we can say an instruction has executed.
+ *
+ * If either CANCELED == 0 or the PC's changed, we send out SIGTRAPs and get
+ * on with our lives.
+ */
+
+void gx_singlestep_handle(struct pt_regs *regs, int fault_num)
+{
+ unsigned long *ss_pc = &__get_cpu_var(ss_saved_pc);
+ struct thread_info *info = (void *)current_thread_info();
+ int is_single_step = test_ti_thread_flag(info, TIF_SINGLESTEP);
+ unsigned long control = __insn_mfspr(SPR_SINGLE_STEP_CONTROL_K);
+
+ if (is_single_step == 0) {
+ __insn_mtspr(SPR_SINGLE_STEP_EN_K_K, 0);
+
+ } else if ((*ss_pc != regs->pc) ||
+ (!(control & SPR_SINGLE_STEP_CONTROL_1__CANCELED_MASK))) {
+
+ ptrace_notify(SIGTRAP);
+ control |= SPR_SINGLE_STEP_CONTROL_1__CANCELED_MASK;
+ control |= SPR_SINGLE_STEP_CONTROL_1__INHIBIT_MASK;
+ __insn_mtspr(SPR_SINGLE_STEP_CONTROL_K, control);
+ }
+}
+
+
+/*
+ * Called from need_singlestep. Set up the control registers and the enable
+ * register, then return back.
+ */
+
+void single_step_once(struct pt_regs *regs)
+{
+ unsigned long *ss_pc = &__get_cpu_var(ss_saved_pc);
+ unsigned long control = __insn_mfspr(SPR_SINGLE_STEP_CONTROL_K);
+
+ *ss_pc = regs->pc;
+ control |= SPR_SINGLE_STEP_CONTROL_1__CANCELED_MASK;
+ control |= SPR_SINGLE_STEP_CONTROL_1__INHIBIT_MASK;
+ __insn_mtspr(SPR_SINGLE_STEP_CONTROL_K, control);
+ __insn_mtspr(SPR_SINGLE_STEP_EN_K_K, 1 << USER_PL);
+}
+
+void single_step_execve(void)
+{
+ /* Nothing */
+}
+
+#endif /* !__tilegx__ */
diff --git a/arch/tile/kernel/smp.c b/arch/tile/kernel/smp.c
new file mode 100644
index 00000000..91da0f72
--- /dev/null
+++ b/arch/tile/kernel/smp.c
@@ -0,0 +1,244 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ *
+ * TILE SMP support routines.
+ */
+
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/irq.h>
+#include <linux/module.h>
+#include <asm/cacheflush.h>
+
+HV_Topology smp_topology __write_once;
+EXPORT_SYMBOL(smp_topology);
+
+#if CHIP_HAS_IPI()
+static unsigned long __iomem *ipi_mappings[NR_CPUS];
+#endif
+
+
+/*
+ * Top-level send_IPI*() functions to send messages to other cpus.
+ */
+
+/* Set by smp_send_stop() to avoid recursive panics. */
+static int stopping_cpus;
+
+static void __send_IPI_many(HV_Recipient *recip, int nrecip, int tag)
+{
+ int sent = 0;
+ while (sent < nrecip) {
+ int rc = hv_send_message(recip, nrecip,
+ (HV_VirtAddr)&tag, sizeof(tag));
+ if (rc < 0) {
+ if (!stopping_cpus) /* avoid recursive panic */
+ panic("hv_send_message returned %d", rc);
+ break;
+ }
+ WARN_ONCE(rc == 0, "hv_send_message() returned zero\n");
+ sent += rc;
+ }
+}
+
+void send_IPI_single(int cpu, int tag)
+{
+ HV_Recipient recip = {
+ .y = cpu / smp_width,
+ .x = cpu % smp_width,
+ .state = HV_TO_BE_SENT
+ };
+ __send_IPI_many(&recip, 1, tag);
+}
+
+void send_IPI_many(const struct cpumask *mask, int tag)
+{
+ HV_Recipient recip[NR_CPUS];
+ int cpu;
+ int nrecip = 0;
+ int my_cpu = smp_processor_id();
+ for_each_cpu(cpu, mask) {
+ HV_Recipient *r;
+ BUG_ON(cpu == my_cpu);
+ r = &recip[nrecip++];
+ r->y = cpu / smp_width;
+ r->x = cpu % smp_width;
+ r->state = HV_TO_BE_SENT;
+ }
+ __send_IPI_many(recip, nrecip, tag);
+}
+
+void send_IPI_allbutself(int tag)
+{
+ struct cpumask mask;
+ cpumask_copy(&mask, cpu_online_mask);
+ cpumask_clear_cpu(smp_processor_id(), &mask);
+ send_IPI_many(&mask, tag);
+}
+
+/*
+ * Functions related to starting/stopping cpus.
+ */
+
+/* Handler to start the current cpu. */
+static void smp_start_cpu_interrupt(void)
+{
+ get_irq_regs()->pc = start_cpu_function_addr;
+}
+
+/* Handler to stop the current cpu. */
+static void smp_stop_cpu_interrupt(void)
+{
+ set_cpu_online(smp_processor_id(), 0);
+ arch_local_irq_disable_all();
+ for (;;)
+ asm("nap; nop");
+}
+
+/* This function calls the 'stop' function on all other CPUs in the system. */
+void smp_send_stop(void)
+{
+ stopping_cpus = 1;
+ send_IPI_allbutself(MSG_TAG_STOP_CPU);
+}
+
+/* On panic, just wait; we may get an smp_send_stop() later on. */
+void panic_smp_self_stop(void)
+{
+ while (1)
+ asm("nap; nop");
+}
+
+/*
+ * Dispatch code called from hv_message_intr() for HV_MSG_TILE hv messages.
+ */
+void evaluate_message(int tag)
+{
+ switch (tag) {
+ case MSG_TAG_START_CPU: /* Start up a cpu */
+ smp_start_cpu_interrupt();
+ break;
+
+ case MSG_TAG_STOP_CPU: /* Sent to shut down slave CPU's */
+ smp_stop_cpu_interrupt();
+ break;
+
+ case MSG_TAG_CALL_FUNCTION_MANY: /* Call function on cpumask */
+ generic_smp_call_function_interrupt();
+ break;
+
+ case MSG_TAG_CALL_FUNCTION_SINGLE: /* Call function on one other CPU */
+ generic_smp_call_function_single_interrupt();
+ break;
+
+ default:
+ panic("Unknown IPI message tag %d", tag);
+ break;
+ }
+}
+
+
+/*
+ * flush_icache_range() code uses smp_call_function().
+ */
+
+struct ipi_flush {
+ unsigned long start;
+ unsigned long end;
+};
+
+static void ipi_flush_icache_range(void *info)
+{
+ struct ipi_flush *flush = (struct ipi_flush *) info;
+ __flush_icache_range(flush->start, flush->end);
+}
+
+void flush_icache_range(unsigned long start, unsigned long end)
+{
+ struct ipi_flush flush = { start, end };
+ preempt_disable();
+ on_each_cpu(ipi_flush_icache_range, &flush, 1);
+ preempt_enable();
+}
+
+
+/* Called when smp_send_reschedule() triggers IRQ_RESCHEDULE. */
+static irqreturn_t handle_reschedule_ipi(int irq, void *token)
+{
+ __get_cpu_var(irq_stat).irq_resched_count++;
+ scheduler_ipi();
+
+ return IRQ_HANDLED;
+}
+
+static struct irqaction resched_action = {
+ .handler = handle_reschedule_ipi,
+ .name = "resched",
+ .dev_id = handle_reschedule_ipi /* unique token */,
+};
+
+void __init ipi_init(void)
+{
+#if CHIP_HAS_IPI()
+ int cpu;
+ /* Map IPI trigger MMIO addresses. */
+ for_each_possible_cpu(cpu) {
+ HV_Coord tile;
+ HV_PTE pte;
+ unsigned long offset;
+
+ tile.x = cpu_x(cpu);
+ tile.y = cpu_y(cpu);
+ if (hv_get_ipi_pte(tile, KERNEL_PL, &pte) != 0)
+ panic("Failed to initialize IPI for cpu %d\n", cpu);
+
+ offset = hv_pte_get_pfn(pte) << PAGE_SHIFT;
+ ipi_mappings[cpu] = ioremap_prot(offset, PAGE_SIZE, pte);
+ }
+#endif
+
+ /* Bind handle_reschedule_ipi() to IRQ_RESCHEDULE. */
+ tile_irq_activate(IRQ_RESCHEDULE, TILE_IRQ_PERCPU);
+ BUG_ON(setup_irq(IRQ_RESCHEDULE, &resched_action));
+}
+
+#if CHIP_HAS_IPI()
+
+void smp_send_reschedule(int cpu)
+{
+ WARN_ON(cpu_is_offline(cpu));
+
+ /*
+ * We just want to do an MMIO store. The traditional writeq()
+ * functions aren't really correct here, since they're always
+ * directed at the PCI shim. For now, just do a raw store,
+ * casting away the __iomem attribute.
+ */
+ ((unsigned long __force *)ipi_mappings[cpu])[IRQ_RESCHEDULE] = 0;
+}
+
+#else
+
+void smp_send_reschedule(int cpu)
+{
+ HV_Coord coord;
+
+ WARN_ON(cpu_is_offline(cpu));
+
+ coord.y = cpu_y(cpu);
+ coord.x = cpu_x(cpu);
+ hv_trigger_ipi(coord, IRQ_RESCHEDULE);
+}
+
+#endif /* CHIP_HAS_IPI() */
diff --git a/arch/tile/kernel/smpboot.c b/arch/tile/kernel/smpboot.c
new file mode 100644
index 00000000..172aef7d
--- /dev/null
+++ b/arch/tile/kernel/smpboot.c
@@ -0,0 +1,279 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/kernel_stat.h>
+#include <linux/bootmem.h>
+#include <linux/notifier.h>
+#include <linux/cpu.h>
+#include <linux/percpu.h>
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/irq.h>
+#include <asm/mmu_context.h>
+#include <asm/tlbflush.h>
+#include <asm/sections.h>
+
+/* State of each CPU. */
+static DEFINE_PER_CPU(int, cpu_state) = { 0 };
+
+/* The messaging code jumps to this pointer during boot-up */
+unsigned long start_cpu_function_addr;
+
+/* Called very early during startup to mark boot cpu as online */
+void __init smp_prepare_boot_cpu(void)
+{
+ int cpu = smp_processor_id();
+ set_cpu_online(cpu, 1);
+ set_cpu_present(cpu, 1);
+ __get_cpu_var(cpu_state) = CPU_ONLINE;
+
+ init_messaging();
+}
+
+static void start_secondary(void);
+
+/*
+ * Called at the top of init() to launch all the other CPUs.
+ * They run free to complete their initialization and then wait
+ * until they get an IPI from the boot cpu to come online.
+ */
+void __init smp_prepare_cpus(unsigned int max_cpus)
+{
+ long rc;
+ int cpu, cpu_count;
+ int boot_cpu = smp_processor_id();
+
+ current_thread_info()->cpu = boot_cpu;
+
+ /*
+ * Pin this task to the boot CPU while we bring up the others,
+ * just to make sure we don't uselessly migrate as they come up.
+ */
+ rc = sched_setaffinity(current->pid, cpumask_of(boot_cpu));
+ if (rc != 0)
+ pr_err("Couldn't set init affinity to boot cpu (%ld)\n", rc);
+
+ /* Print information about disabled and dataplane cpus. */
+ print_disabled_cpus();
+
+ /*
+ * Tell the messaging subsystem how to respond to the
+ * startup message. We use a level of indirection to avoid
+ * confusing the linker with the fact that the messaging
+ * subsystem is calling __init code.
+ */
+ start_cpu_function_addr = (unsigned long) &online_secondary;
+
+ /* Set up thread context for all new processors. */
+ cpu_count = 1;
+ for (cpu = 0; cpu < NR_CPUS; ++cpu) {
+ struct task_struct *idle;
+
+ if (cpu == boot_cpu)
+ continue;
+
+ if (!cpu_possible(cpu)) {
+ /*
+ * Make this processor do nothing on boot.
+ * Note that we don't give the boot_pc function
+ * a stack, so it has to be assembly code.
+ */
+ per_cpu(boot_sp, cpu) = 0;
+ per_cpu(boot_pc, cpu) = (unsigned long) smp_nap;
+ continue;
+ }
+
+ /* Create a new idle thread to run start_secondary() */
+ idle = fork_idle(cpu);
+ if (IS_ERR(idle))
+ panic("failed fork for CPU %d", cpu);
+ idle->thread.pc = (unsigned long) start_secondary;
+
+ /* Make this thread the boot thread for this processor */
+ per_cpu(boot_sp, cpu) = task_ksp0(idle);
+ per_cpu(boot_pc, cpu) = idle->thread.pc;
+
+ ++cpu_count;
+ }
+ BUG_ON(cpu_count > (max_cpus ? max_cpus : 1));
+
+ /* Fire up the other tiles, if any */
+ init_cpu_present(cpu_possible_mask);
+ if (cpumask_weight(cpu_present_mask) > 1) {
+ mb(); /* make sure all data is visible to new processors */
+ hv_start_all_tiles();
+ }
+}
+
+static __initdata struct cpumask init_affinity;
+
+static __init int reset_init_affinity(void)
+{
+ long rc = sched_setaffinity(current->pid, &init_affinity);
+ if (rc != 0)
+ pr_warning("couldn't reset init affinity (%ld)\n",
+ rc);
+ return 0;
+}
+late_initcall(reset_init_affinity);
+
+static struct cpumask cpu_started __cpuinitdata;
+
+/*
+ * Activate a secondary processor. Very minimal; don't add anything
+ * to this path without knowing what you're doing, since SMP booting
+ * is pretty fragile.
+ */
+static void __cpuinit start_secondary(void)
+{
+ int cpuid = smp_processor_id();
+
+ /* Set our thread pointer appropriately. */
+ set_my_cpu_offset(__per_cpu_offset[cpuid]);
+
+ preempt_disable();
+
+ /*
+ * In large machines even this will slow us down, since we
+ * will be contending for for the printk spinlock.
+ */
+ /* printk(KERN_DEBUG "Initializing CPU#%d\n", cpuid); */
+
+ /* Initialize the current asid for our first page table. */
+ __get_cpu_var(current_asid) = min_asid;
+
+ /* Set up this thread as another owner of the init_mm */
+ atomic_inc(&init_mm.mm_count);
+ current->active_mm = &init_mm;
+ if (current->mm)
+ BUG();
+ enter_lazy_tlb(&init_mm, current);
+
+ /* Allow hypervisor messages to be received */
+ init_messaging();
+ local_irq_enable();
+
+ /* Indicate that we're ready to come up. */
+ /* Must not do this before we're ready to receive messages */
+ if (cpumask_test_and_set_cpu(cpuid, &cpu_started)) {
+ pr_warning("CPU#%d already started!\n", cpuid);
+ for (;;)
+ local_irq_enable();
+ }
+
+ smp_nap();
+}
+
+/*
+ * Bring a secondary processor online.
+ */
+void __cpuinit online_secondary(void)
+{
+ /*
+ * low-memory mappings have been cleared, flush them from
+ * the local TLBs too.
+ */
+ local_flush_tlb();
+
+ BUG_ON(in_interrupt());
+
+ /* This must be done before setting cpu_online_mask */
+ wmb();
+
+ notify_cpu_starting(smp_processor_id());
+
+ /*
+ * We need to hold call_lock, so there is no inconsistency
+ * between the time smp_call_function() determines number of
+ * IPI recipients, and the time when the determination is made
+ * for which cpus receive the IPI. Holding this
+ * lock helps us to not include this cpu in a currently in progress
+ * smp_call_function().
+ */
+ ipi_call_lock();
+ set_cpu_online(smp_processor_id(), 1);
+ ipi_call_unlock();
+ __get_cpu_var(cpu_state) = CPU_ONLINE;
+
+ /* Set up tile-specific state for this cpu. */
+ setup_cpu(0);
+
+ /* Set up tile-timer clock-event device on this cpu */
+ setup_tile_timer();
+
+ preempt_enable();
+
+ cpu_idle();
+}
+
+int __cpuinit __cpu_up(unsigned int cpu)
+{
+ /* Wait 5s total for all CPUs for them to come online */
+ static int timeout;
+ for (; !cpumask_test_cpu(cpu, &cpu_started); timeout++) {
+ if (timeout >= 50000) {
+ pr_info("skipping unresponsive cpu%d\n", cpu);
+ local_irq_enable();
+ return -EIO;
+ }
+ udelay(100);
+ }
+
+ local_irq_enable();
+ per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
+
+ /* Unleash the CPU! */
+ send_IPI_single(cpu, MSG_TAG_START_CPU);
+ while (!cpumask_test_cpu(cpu, cpu_online_mask))
+ cpu_relax();
+ return 0;
+}
+
+static void panic_start_cpu(void)
+{
+ panic("Received a MSG_START_CPU IPI after boot finished.");
+}
+
+void __init smp_cpus_done(unsigned int max_cpus)
+{
+ int cpu, next, rc;
+
+ /* Reset the response to a (now illegal) MSG_START_CPU IPI. */
+ start_cpu_function_addr = (unsigned long) &panic_start_cpu;
+
+ cpumask_copy(&init_affinity, cpu_online_mask);
+
+ /*
+ * Pin ourselves to a single cpu in the initial affinity set
+ * so that kernel mappings for the rootfs are not in the dataplane,
+ * if set, and to avoid unnecessary migrating during bringup.
+ * Use the last cpu just in case the whole chip has been
+ * isolated from the scheduler, to keep init away from likely
+ * more useful user code. This also ensures that work scheduled
+ * via schedule_delayed_work() in the init routines will land
+ * on this cpu.
+ */
+ for (cpu = cpumask_first(&init_affinity);
+ (next = cpumask_next(cpu, &init_affinity)) < nr_cpu_ids;
+ cpu = next)
+ ;
+ rc = sched_setaffinity(current->pid, cpumask_of(cpu));
+ if (rc != 0)
+ pr_err("Couldn't set init affinity to cpu %d (%d)\n", cpu, rc);
+}
diff --git a/arch/tile/kernel/stack.c b/arch/tile/kernel/stack.c
new file mode 100644
index 00000000..b2f44c28
--- /dev/null
+++ b/arch/tile/kernel/stack.c
@@ -0,0 +1,491 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/kprobes.h>
+#include <linux/module.h>
+#include <linux/pfn.h>
+#include <linux/kallsyms.h>
+#include <linux/stacktrace.h>
+#include <linux/uaccess.h>
+#include <linux/mmzone.h>
+#include <linux/dcache.h>
+#include <linux/fs.h>
+#include <asm/backtrace.h>
+#include <asm/page.h>
+#include <asm/ucontext.h>
+#include <asm/switch_to.h>
+#include <asm/sigframe.h>
+#include <asm/stack.h>
+#include <arch/abi.h>
+#include <arch/interrupts.h>
+
+#define KBT_ONGOING 0 /* Backtrace still ongoing */
+#define KBT_DONE 1 /* Backtrace cleanly completed */
+#define KBT_RUNNING 2 /* Can't run backtrace on a running task */
+#define KBT_LOOP 3 /* Backtrace entered a loop */
+
+/* Is address on the specified kernel stack? */
+static int in_kernel_stack(struct KBacktraceIterator *kbt, unsigned long sp)
+{
+ ulong kstack_base = (ulong) kbt->task->stack;
+ if (kstack_base == 0) /* corrupt task pointer; just follow stack... */
+ return sp >= PAGE_OFFSET && sp < (unsigned long)high_memory;
+ return sp >= kstack_base && sp < kstack_base + THREAD_SIZE;
+}
+
+/* Callback for backtracer; basically a glorified memcpy */
+static bool read_memory_func(void *result, unsigned long address,
+ unsigned int size, void *vkbt)
+{
+ int retval;
+ struct KBacktraceIterator *kbt = (struct KBacktraceIterator *)vkbt;
+
+ if (address == 0)
+ return 0;
+ if (__kernel_text_address(address)) {
+ /* OK to read kernel code. */
+ } else if (address >= PAGE_OFFSET) {
+ /* We only tolerate kernel-space reads of this task's stack */
+ if (!in_kernel_stack(kbt, address))
+ return 0;
+ } else if (!kbt->is_current) {
+ return 0; /* can't read from other user address spaces */
+ }
+ pagefault_disable();
+ retval = __copy_from_user_inatomic(result,
+ (void __user __force *)address,
+ size);
+ pagefault_enable();
+ return (retval == 0);
+}
+
+/* Return a pt_regs pointer for a valid fault handler frame */
+static struct pt_regs *valid_fault_handler(struct KBacktraceIterator* kbt)
+{
+ const char *fault = NULL; /* happy compiler */
+ char fault_buf[64];
+ unsigned long sp = kbt->it.sp;
+ struct pt_regs *p;
+
+ if (sp % sizeof(long) != 0)
+ return NULL;
+ if (!in_kernel_stack(kbt, sp))
+ return NULL;
+ if (!in_kernel_stack(kbt, sp + C_ABI_SAVE_AREA_SIZE + PTREGS_SIZE-1))
+ return NULL;
+ p = (struct pt_regs *)(sp + C_ABI_SAVE_AREA_SIZE);
+ if (p->faultnum == INT_SWINT_1 || p->faultnum == INT_SWINT_1_SIGRETURN)
+ fault = "syscall";
+ else {
+ if (kbt->verbose) { /* else we aren't going to use it */
+ snprintf(fault_buf, sizeof(fault_buf),
+ "interrupt %ld", p->faultnum);
+ fault = fault_buf;
+ }
+ }
+ if (EX1_PL(p->ex1) == KERNEL_PL &&
+ __kernel_text_address(p->pc) &&
+ in_kernel_stack(kbt, p->sp) &&
+ p->sp >= sp) {
+ if (kbt->verbose)
+ pr_err(" <%s while in kernel mode>\n", fault);
+ } else if (EX1_PL(p->ex1) == USER_PL &&
+ p->pc < PAGE_OFFSET &&
+ p->sp < PAGE_OFFSET) {
+ if (kbt->verbose)
+ pr_err(" <%s while in user mode>\n", fault);
+ } else if (kbt->verbose) {
+ pr_err(" (odd fault: pc %#lx, sp %#lx, ex1 %#lx?)\n",
+ p->pc, p->sp, p->ex1);
+ p = NULL;
+ }
+ if (!kbt->profile || (INT_MASK(p->faultnum) & QUEUED_INTERRUPTS) == 0)
+ return p;
+ return NULL;
+}
+
+/* Is the pc pointing to a sigreturn trampoline? */
+static int is_sigreturn(unsigned long pc)
+{
+ return (pc == VDSO_BASE);
+}
+
+/* Return a pt_regs pointer for a valid signal handler frame */
+static struct pt_regs *valid_sigframe(struct KBacktraceIterator* kbt,
+ struct rt_sigframe* kframe)
+{
+ BacktraceIterator *b = &kbt->it;
+
+ if (b->pc == VDSO_BASE && b->sp < PAGE_OFFSET &&
+ b->sp % sizeof(long) == 0) {
+ int retval;
+ pagefault_disable();
+ retval = __copy_from_user_inatomic(
+ kframe, (void __user __force *)b->sp,
+ sizeof(*kframe));
+ pagefault_enable();
+ if (retval != 0 ||
+ (unsigned int)(kframe->info.si_signo) >= _NSIG)
+ return NULL;
+ if (kbt->verbose) {
+ pr_err(" <received signal %d>\n",
+ kframe->info.si_signo);
+ }
+ return (struct pt_regs *)&kframe->uc.uc_mcontext;
+ }
+ return NULL;
+}
+
+static int KBacktraceIterator_is_sigreturn(struct KBacktraceIterator *kbt)
+{
+ return is_sigreturn(kbt->it.pc);
+}
+
+static int KBacktraceIterator_restart(struct KBacktraceIterator *kbt)
+{
+ struct pt_regs *p;
+ struct rt_sigframe kframe;
+
+ p = valid_fault_handler(kbt);
+ if (p == NULL)
+ p = valid_sigframe(kbt, &kframe);
+ if (p == NULL)
+ return 0;
+ backtrace_init(&kbt->it, read_memory_func, kbt,
+ p->pc, p->lr, p->sp, p->regs[52]);
+ kbt->new_context = 1;
+ return 1;
+}
+
+/* Find a frame that isn't a sigreturn, if there is one. */
+static int KBacktraceIterator_next_item_inclusive(
+ struct KBacktraceIterator *kbt)
+{
+ for (;;) {
+ do {
+ if (!KBacktraceIterator_is_sigreturn(kbt))
+ return KBT_ONGOING;
+ } while (backtrace_next(&kbt->it));
+
+ if (!KBacktraceIterator_restart(kbt))
+ return KBT_DONE;
+ }
+}
+
+/*
+ * If the current sp is on a page different than what we recorded
+ * as the top-of-kernel-stack last time we context switched, we have
+ * probably blown the stack, and nothing is going to work out well.
+ * If we can at least get out a warning, that may help the debug,
+ * though we probably won't be able to backtrace into the code that
+ * actually did the recursive damage.
+ */
+static void validate_stack(struct pt_regs *regs)
+{
+ int cpu = smp_processor_id();
+ unsigned long ksp0 = get_current_ksp0();
+ unsigned long ksp0_base = ksp0 - THREAD_SIZE;
+ unsigned long sp = stack_pointer;
+
+ if (EX1_PL(regs->ex1) == KERNEL_PL && regs->sp >= ksp0) {
+ pr_err("WARNING: cpu %d: kernel stack page %#lx underrun!\n"
+ " sp %#lx (%#lx in caller), caller pc %#lx, lr %#lx\n",
+ cpu, ksp0_base, sp, regs->sp, regs->pc, regs->lr);
+ }
+
+ else if (sp < ksp0_base + sizeof(struct thread_info)) {
+ pr_err("WARNING: cpu %d: kernel stack page %#lx overrun!\n"
+ " sp %#lx (%#lx in caller), caller pc %#lx, lr %#lx\n",
+ cpu, ksp0_base, sp, regs->sp, regs->pc, regs->lr);
+ }
+}
+
+void KBacktraceIterator_init(struct KBacktraceIterator *kbt,
+ struct task_struct *t, struct pt_regs *regs)
+{
+ unsigned long pc, lr, sp, r52;
+ int is_current;
+
+ /*
+ * Set up callback information. We grab the kernel stack base
+ * so we will allow reads of that address range.
+ */
+ is_current = (t == NULL || t == current);
+ kbt->is_current = is_current;
+ if (is_current)
+ t = validate_current();
+ kbt->task = t;
+ kbt->verbose = 0; /* override in caller if desired */
+ kbt->profile = 0; /* override in caller if desired */
+ kbt->end = KBT_ONGOING;
+ kbt->new_context = 1;
+ if (is_current)
+ validate_stack(regs);
+
+ if (regs == NULL) {
+ if (is_current || t->state == TASK_RUNNING) {
+ /* Can't do this; we need registers */
+ kbt->end = KBT_RUNNING;
+ return;
+ }
+ pc = get_switch_to_pc();
+ lr = t->thread.pc;
+ sp = t->thread.ksp;
+ r52 = 0;
+ } else {
+ pc = regs->pc;
+ lr = regs->lr;
+ sp = regs->sp;
+ r52 = regs->regs[52];
+ }
+
+ backtrace_init(&kbt->it, read_memory_func, kbt, pc, lr, sp, r52);
+ kbt->end = KBacktraceIterator_next_item_inclusive(kbt);
+}
+EXPORT_SYMBOL(KBacktraceIterator_init);
+
+int KBacktraceIterator_end(struct KBacktraceIterator *kbt)
+{
+ return kbt->end != KBT_ONGOING;
+}
+EXPORT_SYMBOL(KBacktraceIterator_end);
+
+void KBacktraceIterator_next(struct KBacktraceIterator *kbt)
+{
+ unsigned long old_pc = kbt->it.pc, old_sp = kbt->it.sp;
+ kbt->new_context = 0;
+ if (!backtrace_next(&kbt->it) && !KBacktraceIterator_restart(kbt)) {
+ kbt->end = KBT_DONE;
+ return;
+ }
+ kbt->end = KBacktraceIterator_next_item_inclusive(kbt);
+ if (old_pc == kbt->it.pc && old_sp == kbt->it.sp) {
+ /* Trapped in a loop; give up. */
+ kbt->end = KBT_LOOP;
+ }
+}
+EXPORT_SYMBOL(KBacktraceIterator_next);
+
+static void describe_addr(struct KBacktraceIterator *kbt,
+ unsigned long address,
+ int have_mmap_sem, char *buf, size_t bufsize)
+{
+ struct vm_area_struct *vma;
+ size_t namelen, remaining;
+ unsigned long size, offset, adjust;
+ char *p, *modname;
+ const char *name;
+ int rc;
+
+ /*
+ * Look one byte back for every caller frame (i.e. those that
+ * aren't a new context) so we look up symbol data for the
+ * call itself, not the following instruction, which may be on
+ * a different line (or in a different function).
+ */
+ adjust = !kbt->new_context;
+ address -= adjust;
+
+ if (address >= PAGE_OFFSET) {
+ /* Handle kernel symbols. */
+ BUG_ON(bufsize < KSYM_NAME_LEN);
+ name = kallsyms_lookup(address, &size, &offset,
+ &modname, buf);
+ if (name == NULL) {
+ buf[0] = '\0';
+ return;
+ }
+ namelen = strlen(buf);
+ remaining = (bufsize - 1) - namelen;
+ p = buf + namelen;
+ rc = snprintf(p, remaining, "+%#lx/%#lx ",
+ offset + adjust, size);
+ if (modname && rc < remaining)
+ snprintf(p + rc, remaining - rc, "[%s] ", modname);
+ buf[bufsize-1] = '\0';
+ return;
+ }
+
+ /* If we don't have the mmap_sem, we can't show any more info. */
+ buf[0] = '\0';
+ if (!have_mmap_sem)
+ return;
+
+ /* Find vma info. */
+ vma = find_vma(kbt->task->mm, address);
+ if (vma == NULL || address < vma->vm_start) {
+ snprintf(buf, bufsize, "[unmapped address] ");
+ return;
+ }
+
+ if (vma->vm_file) {
+ char *s;
+ p = d_path(&vma->vm_file->f_path, buf, bufsize);
+ if (IS_ERR(p))
+ p = "?";
+ s = strrchr(p, '/');
+ if (s)
+ p = s+1;
+ } else {
+ p = "anon";
+ }
+
+ /* Generate a string description of the vma info. */
+ namelen = strlen(p);
+ remaining = (bufsize - 1) - namelen;
+ memmove(buf, p, namelen);
+ snprintf(buf + namelen, remaining, "[%lx+%lx] ",
+ vma->vm_start, vma->vm_end - vma->vm_start);
+}
+
+/*
+ * This method wraps the backtracer's more generic support.
+ * It is only invoked from the architecture-specific code; show_stack()
+ * and dump_stack() (in entry.S) are architecture-independent entry points.
+ */
+void tile_show_stack(struct KBacktraceIterator *kbt, int headers)
+{
+ int i;
+ int have_mmap_sem = 0;
+
+ if (headers) {
+ /*
+ * Add a blank line since if we are called from panic(),
+ * then bust_spinlocks() spit out a space in front of us
+ * and it will mess up our KERN_ERR.
+ */
+ pr_err("\n");
+ pr_err("Starting stack dump of tid %d, pid %d (%s)"
+ " on cpu %d at cycle %lld\n",
+ kbt->task->pid, kbt->task->tgid, kbt->task->comm,
+ smp_processor_id(), get_cycles());
+ }
+ kbt->verbose = 1;
+ i = 0;
+ for (; !KBacktraceIterator_end(kbt); KBacktraceIterator_next(kbt)) {
+ char namebuf[KSYM_NAME_LEN+100];
+ unsigned long address = kbt->it.pc;
+
+ /* Try to acquire the mmap_sem as we pass into userspace. */
+ if (address < PAGE_OFFSET && !have_mmap_sem && kbt->task->mm)
+ have_mmap_sem =
+ down_read_trylock(&kbt->task->mm->mmap_sem);
+
+ describe_addr(kbt, address, have_mmap_sem,
+ namebuf, sizeof(namebuf));
+
+ pr_err(" frame %d: 0x%lx %s(sp 0x%lx)\n",
+ i++, address, namebuf, (unsigned long)(kbt->it.sp));
+
+ if (i >= 100) {
+ pr_err("Stack dump truncated"
+ " (%d frames)\n", i);
+ break;
+ }
+ }
+ if (kbt->end == KBT_LOOP)
+ pr_err("Stack dump stopped; next frame identical to this one\n");
+ if (headers)
+ pr_err("Stack dump complete\n");
+ if (have_mmap_sem)
+ up_read(&kbt->task->mm->mmap_sem);
+}
+EXPORT_SYMBOL(tile_show_stack);
+
+
+/* This is called from show_regs() and _dump_stack() */
+void dump_stack_regs(struct pt_regs *regs)
+{
+ struct KBacktraceIterator kbt;
+ KBacktraceIterator_init(&kbt, NULL, regs);
+ tile_show_stack(&kbt, 1);
+}
+EXPORT_SYMBOL(dump_stack_regs);
+
+static struct pt_regs *regs_to_pt_regs(struct pt_regs *regs,
+ ulong pc, ulong lr, ulong sp, ulong r52)
+{
+ memset(regs, 0, sizeof(struct pt_regs));
+ regs->pc = pc;
+ regs->lr = lr;
+ regs->sp = sp;
+ regs->regs[52] = r52;
+ return regs;
+}
+
+/* This is called from dump_stack() and just converts to pt_regs */
+void _dump_stack(int dummy, ulong pc, ulong lr, ulong sp, ulong r52)
+{
+ struct pt_regs regs;
+ dump_stack_regs(regs_to_pt_regs(&regs, pc, lr, sp, r52));
+}
+
+/* This is called from KBacktraceIterator_init_current() */
+void _KBacktraceIterator_init_current(struct KBacktraceIterator *kbt, ulong pc,
+ ulong lr, ulong sp, ulong r52)
+{
+ struct pt_regs regs;
+ KBacktraceIterator_init(kbt, NULL,
+ regs_to_pt_regs(&regs, pc, lr, sp, r52));
+}
+
+/* This is called only from kernel/sched.c, with esp == NULL */
+void show_stack(struct task_struct *task, unsigned long *esp)
+{
+ struct KBacktraceIterator kbt;
+ if (task == NULL || task == current)
+ KBacktraceIterator_init_current(&kbt);
+ else
+ KBacktraceIterator_init(&kbt, task, NULL);
+ tile_show_stack(&kbt, 0);
+}
+
+#ifdef CONFIG_STACKTRACE
+
+/* Support generic Linux stack API too */
+
+void save_stack_trace_tsk(struct task_struct *task, struct stack_trace *trace)
+{
+ struct KBacktraceIterator kbt;
+ int skip = trace->skip;
+ int i = 0;
+
+ if (task == NULL || task == current)
+ KBacktraceIterator_init_current(&kbt);
+ else
+ KBacktraceIterator_init(&kbt, task, NULL);
+ for (; !KBacktraceIterator_end(&kbt); KBacktraceIterator_next(&kbt)) {
+ if (skip) {
+ --skip;
+ continue;
+ }
+ if (i >= trace->max_entries || kbt.it.pc < PAGE_OFFSET)
+ break;
+ trace->entries[i++] = kbt.it.pc;
+ }
+ trace->nr_entries = i;
+}
+EXPORT_SYMBOL(save_stack_trace_tsk);
+
+void save_stack_trace(struct stack_trace *trace)
+{
+ save_stack_trace_tsk(NULL, trace);
+}
+
+#endif
+
+/* In entry.S */
+EXPORT_SYMBOL(KBacktraceIterator_init_current);
diff --git a/arch/tile/kernel/sys.c b/arch/tile/kernel/sys.c
new file mode 100644
index 00000000..cb44ba7c
--- /dev/null
+++ b/arch/tile/kernel/sys.c
@@ -0,0 +1,119 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ *
+ * This file contains various random system calls that
+ * have a non-standard calling sequence on the Linux/TILE
+ * platform.
+ */
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/syscalls.h>
+#include <linux/mman.h>
+#include <linux/file.h>
+#include <linux/mempolicy.h>
+#include <linux/binfmts.h>
+#include <linux/fs.h>
+#include <linux/compat.h>
+#include <linux/uaccess.h>
+#include <linux/signal.h>
+#include <asm/syscalls.h>
+#include <asm/pgtable.h>
+#include <asm/homecache.h>
+#include <arch/chip.h>
+
+SYSCALL_DEFINE0(flush_cache)
+{
+ homecache_evict(cpumask_of(smp_processor_id()));
+ return 0;
+}
+
+/*
+ * Syscalls that pass 64-bit values on 32-bit systems normally
+ * pass them as (low,high) word packed into the immediately adjacent
+ * registers. If the low word naturally falls on an even register,
+ * our ABI makes it work correctly; if not, we adjust it here.
+ * Handling it here means we don't have to fix uclibc AND glibc AND
+ * any other standard libcs we want to support.
+ */
+
+#if !defined(__tilegx__) || defined(CONFIG_COMPAT)
+
+ssize_t sys32_readahead(int fd, u32 offset_lo, u32 offset_hi, u32 count)
+{
+ return sys_readahead(fd, ((loff_t)offset_hi << 32) | offset_lo, count);
+}
+
+int sys32_fadvise64_64(int fd, u32 offset_lo, u32 offset_hi,
+ u32 len_lo, u32 len_hi, int advice)
+{
+ return sys_fadvise64_64(fd, ((loff_t)offset_hi << 32) | offset_lo,
+ ((loff_t)len_hi << 32) | len_lo, advice);
+}
+
+#endif /* 32-bit syscall wrappers */
+
+/* Note: used by the compat code even in 64-bit Linux. */
+SYSCALL_DEFINE6(mmap2, unsigned long, addr, unsigned long, len,
+ unsigned long, prot, unsigned long, flags,
+ unsigned long, fd, unsigned long, off_4k)
+{
+#define PAGE_ADJUST (PAGE_SHIFT - 12)
+ if (off_4k & ((1 << PAGE_ADJUST) - 1))
+ return -EINVAL;
+ return sys_mmap_pgoff(addr, len, prot, flags, fd,
+ off_4k >> PAGE_ADJUST);
+}
+
+#ifdef __tilegx__
+SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len,
+ unsigned long, prot, unsigned long, flags,
+ unsigned long, fd, off_t, offset)
+{
+ if (offset & ((1 << PAGE_SHIFT) - 1))
+ return -EINVAL;
+ return sys_mmap_pgoff(addr, len, prot, flags, fd,
+ offset >> PAGE_SHIFT);
+}
+#endif
+
+
+/* Provide the actual syscall number to call mapping. */
+#undef __SYSCALL
+#define __SYSCALL(nr, call) [nr] = (call),
+
+#ifndef __tilegx__
+/* See comments at the top of the file. */
+#define sys_fadvise64_64 sys32_fadvise64_64
+#define sys_readahead sys32_readahead
+#endif
+
+/* Call the trampolines to manage pt_regs where necessary. */
+#define sys_execve _sys_execve
+#define sys_sigaltstack _sys_sigaltstack
+#define sys_rt_sigreturn _sys_rt_sigreturn
+#define sys_clone _sys_clone
+#ifndef __tilegx__
+#define sys_cmpxchg_badaddr _sys_cmpxchg_badaddr
+#endif
+
+/*
+ * Note that we can't include <linux/unistd.h> here since the header
+ * guard will defeat us; <asm/unistd.h> checks for __SYSCALL as well.
+ */
+void *sys_call_table[__NR_syscalls] = {
+ [0 ... __NR_syscalls-1] = sys_ni_syscall,
+#include <asm/unistd.h>
+};
diff --git a/arch/tile/kernel/sysfs.c b/arch/tile/kernel/sysfs.c
new file mode 100644
index 00000000..71ae728e
--- /dev/null
+++ b/arch/tile/kernel/sysfs.c
@@ -0,0 +1,185 @@
+/*
+ * Copyright 2011 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ *
+ * /sys entry support.
+ */
+
+#include <linux/device.h>
+#include <linux/cpu.h>
+#include <linux/slab.h>
+#include <linux/smp.h>
+#include <linux/stat.h>
+#include <hv/hypervisor.h>
+
+/* Return a string queried from the hypervisor, truncated to page size. */
+static ssize_t get_hv_confstr(char *page, int query)
+{
+ ssize_t n = hv_confstr(query, (unsigned long)page, PAGE_SIZE - 1);
+ n = n < 0 ? 0 : min(n, (ssize_t)PAGE_SIZE - 1) - 1;
+ if (n)
+ page[n++] = '\n';
+ page[n] = '\0';
+ return n;
+}
+
+static ssize_t chip_width_show(struct device *dev,
+ struct device_attribute *attr,
+ char *page)
+{
+ return sprintf(page, "%u\n", smp_width);
+}
+static DEVICE_ATTR(chip_width, 0444, chip_width_show, NULL);
+
+static ssize_t chip_height_show(struct device *dev,
+ struct device_attribute *attr,
+ char *page)
+{
+ return sprintf(page, "%u\n", smp_height);
+}
+static DEVICE_ATTR(chip_height, 0444, chip_height_show, NULL);
+
+static ssize_t chip_serial_show(struct device *dev,
+ struct device_attribute *attr,
+ char *page)
+{
+ return get_hv_confstr(page, HV_CONFSTR_CHIP_SERIAL_NUM);
+}
+static DEVICE_ATTR(chip_serial, 0444, chip_serial_show, NULL);
+
+static ssize_t chip_revision_show(struct device *dev,
+ struct device_attribute *attr,
+ char *page)
+{
+ return get_hv_confstr(page, HV_CONFSTR_CHIP_REV);
+}
+static DEVICE_ATTR(chip_revision, 0444, chip_revision_show, NULL);
+
+
+static ssize_t type_show(struct device *dev,
+ struct device_attribute *attr,
+ char *page)
+{
+ return sprintf(page, "tilera\n");
+}
+static DEVICE_ATTR(type, 0444, type_show, NULL);
+
+#define HV_CONF_ATTR(name, conf) \
+ static ssize_t name ## _show(struct device *dev, \
+ struct device_attribute *attr, \
+ char *page) \
+ { \
+ return get_hv_confstr(page, conf); \
+ } \
+ static DEVICE_ATTR(name, 0444, name ## _show, NULL);
+
+HV_CONF_ATTR(version, HV_CONFSTR_HV_SW_VER)
+HV_CONF_ATTR(config_version, HV_CONFSTR_HV_CONFIG_VER)
+
+HV_CONF_ATTR(board_part, HV_CONFSTR_BOARD_PART_NUM)
+HV_CONF_ATTR(board_serial, HV_CONFSTR_BOARD_SERIAL_NUM)
+HV_CONF_ATTR(board_revision, HV_CONFSTR_BOARD_REV)
+HV_CONF_ATTR(board_description, HV_CONFSTR_BOARD_DESC)
+HV_CONF_ATTR(mezz_part, HV_CONFSTR_MEZZ_PART_NUM)
+HV_CONF_ATTR(mezz_serial, HV_CONFSTR_MEZZ_SERIAL_NUM)
+HV_CONF_ATTR(mezz_revision, HV_CONFSTR_MEZZ_REV)
+HV_CONF_ATTR(mezz_description, HV_CONFSTR_MEZZ_DESC)
+HV_CONF_ATTR(switch_control, HV_CONFSTR_SWITCH_CONTROL)
+
+static struct attribute *board_attrs[] = {
+ &dev_attr_board_part.attr,
+ &dev_attr_board_serial.attr,
+ &dev_attr_board_revision.attr,
+ &dev_attr_board_description.attr,
+ &dev_attr_mezz_part.attr,
+ &dev_attr_mezz_serial.attr,
+ &dev_attr_mezz_revision.attr,
+ &dev_attr_mezz_description.attr,
+ &dev_attr_switch_control.attr,
+ NULL
+};
+
+static struct attribute_group board_attr_group = {
+ .name = "board",
+ .attrs = board_attrs,
+};
+
+
+static struct bin_attribute hvconfig_bin;
+
+static ssize_t
+hvconfig_bin_read(struct file *filp, struct kobject *kobj,
+ struct bin_attribute *bin_attr,
+ char *buf, loff_t off, size_t count)
+{
+ static size_t size;
+
+ /* Lazily learn the true size (minus the trailing NUL). */
+ if (size == 0)
+ size = hv_confstr(HV_CONFSTR_HV_CONFIG, 0, 0) - 1;
+
+ /* Check and adjust input parameters. */
+ if (off > size)
+ return -EINVAL;
+ if (count > size - off)
+ count = size - off;
+
+ if (count) {
+ /* Get a copy of the hvc and copy out the relevant portion. */
+ char *hvc;
+
+ size = off + count;
+ hvc = kmalloc(size, GFP_KERNEL);
+ if (hvc == NULL)
+ return -ENOMEM;
+ hv_confstr(HV_CONFSTR_HV_CONFIG, (unsigned long)hvc, size);
+ memcpy(buf, hvc + off, count);
+ kfree(hvc);
+ }
+
+ return count;
+}
+
+static int __init create_sysfs_entries(void)
+{
+ int err = 0;
+
+#define create_cpu_attr(name) \
+ if (!err) \
+ err = device_create_file(cpu_subsys.dev_root, &dev_attr_##name);
+ create_cpu_attr(chip_width);
+ create_cpu_attr(chip_height);
+ create_cpu_attr(chip_serial);
+ create_cpu_attr(chip_revision);
+
+#define create_hv_attr(name) \
+ if (!err) \
+ err = sysfs_create_file(hypervisor_kobj, &dev_attr_##name.attr);
+ create_hv_attr(type);
+ create_hv_attr(version);
+ create_hv_attr(config_version);
+
+ if (!err)
+ err = sysfs_create_group(hypervisor_kobj, &board_attr_group);
+
+ if (!err) {
+ sysfs_bin_attr_init(&hvconfig_bin);
+ hvconfig_bin.attr.name = "hvconfig";
+ hvconfig_bin.attr.mode = S_IRUGO;
+ hvconfig_bin.read = hvconfig_bin_read;
+ hvconfig_bin.size = PAGE_SIZE;
+ err = sysfs_create_bin_file(hypervisor_kobj, &hvconfig_bin);
+ }
+
+ return err;
+}
+subsys_initcall(create_sysfs_entries);
diff --git a/arch/tile/kernel/tile-desc_32.c b/arch/tile/kernel/tile-desc_32.c
new file mode 100644
index 00000000..dd7bd1d8
--- /dev/null
+++ b/arch/tile/kernel/tile-desc_32.c
@@ -0,0 +1,2605 @@
+/* TILEPro opcode information.
+ *
+ * Copyright 2011 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ *
+ *
+ *
+ *
+ *
+ */
+
+/* This define is BFD_RELOC_##x for real bfd, or -1 for everyone else. */
+#define BFD_RELOC(x) -1
+
+/* Special registers. */
+#define TREG_LR 55
+#define TREG_SN 56
+#define TREG_ZERO 63
+
+#include <linux/stddef.h>
+#include <asm/tile-desc.h>
+
+const struct tilepro_opcode tilepro_opcodes[395] =
+{
+ { "bpt", TILEPRO_OPC_BPT, 0x2, 0, TREG_ZERO, 0,
+ { { 0, }, { }, { 0, }, { 0, }, { 0, } },
+ },
+ { "info", TILEPRO_OPC_INFO, 0xf, 1, TREG_ZERO, 1,
+ { { 0 }, { 1 }, { 2 }, { 3 }, { 0, } },
+ },
+ { "infol", TILEPRO_OPC_INFOL, 0x3, 1, TREG_ZERO, 1,
+ { { 4 }, { 5 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "j", TILEPRO_OPC_J, 0x2, 1, TREG_ZERO, 1,
+ { { 0, }, { 6 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "jal", TILEPRO_OPC_JAL, 0x2, 1, TREG_LR, 1,
+ { { 0, }, { 6 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "move", TILEPRO_OPC_MOVE, 0xf, 2, TREG_ZERO, 1,
+ { { 7, 8 }, { 9, 10 }, { 11, 12 }, { 13, 14 }, { 0, } },
+ },
+ { "move.sn", TILEPRO_OPC_MOVE_SN, 0x3, 2, TREG_SN, 1,
+ { { 7, 8 }, { 9, 10 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "movei", TILEPRO_OPC_MOVEI, 0xf, 2, TREG_ZERO, 1,
+ { { 7, 0 }, { 9, 1 }, { 11, 2 }, { 13, 3 }, { 0, } },
+ },
+ { "movei.sn", TILEPRO_OPC_MOVEI_SN, 0x3, 2, TREG_SN, 1,
+ { { 7, 0 }, { 9, 1 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "moveli", TILEPRO_OPC_MOVELI, 0x3, 2, TREG_ZERO, 1,
+ { { 7, 4 }, { 9, 5 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "moveli.sn", TILEPRO_OPC_MOVELI_SN, 0x3, 2, TREG_SN, 1,
+ { { 7, 4 }, { 9, 5 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "movelis", TILEPRO_OPC_MOVELIS, 0x3, 2, TREG_SN, 1,
+ { { 7, 4 }, { 9, 5 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "prefetch", TILEPRO_OPC_PREFETCH, 0x12, 1, TREG_ZERO, 1,
+ { { 0, }, { 10 }, { 0, }, { 0, }, { 15 } },
+ },
+ { "raise", TILEPRO_OPC_RAISE, 0x2, 0, TREG_ZERO, 1,
+ { { 0, }, { }, { 0, }, { 0, }, { 0, } },
+ },
+ { "add", TILEPRO_OPC_ADD, 0xf, 3, TREG_ZERO, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 11, 12, 18 }, { 13, 14, 19 }, { 0, } },
+ },
+ { "add.sn", TILEPRO_OPC_ADD_SN, 0x3, 3, TREG_SN, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "addb", TILEPRO_OPC_ADDB, 0x3, 3, TREG_ZERO, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "addb.sn", TILEPRO_OPC_ADDB_SN, 0x3, 3, TREG_SN, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "addbs_u", TILEPRO_OPC_ADDBS_U, 0x3, 3, TREG_ZERO, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "addbs_u.sn", TILEPRO_OPC_ADDBS_U_SN, 0x3, 3, TREG_SN, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "addh", TILEPRO_OPC_ADDH, 0x3, 3, TREG_ZERO, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "addh.sn", TILEPRO_OPC_ADDH_SN, 0x3, 3, TREG_SN, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "addhs", TILEPRO_OPC_ADDHS, 0x3, 3, TREG_ZERO, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "addhs.sn", TILEPRO_OPC_ADDHS_SN, 0x3, 3, TREG_SN, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "addi", TILEPRO_OPC_ADDI, 0xf, 3, TREG_ZERO, 1,
+ { { 7, 8, 0 }, { 9, 10, 1 }, { 11, 12, 2 }, { 13, 14, 3 }, { 0, } },
+ },
+ { "addi.sn", TILEPRO_OPC_ADDI_SN, 0x3, 3, TREG_SN, 1,
+ { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "addib", TILEPRO_OPC_ADDIB, 0x3, 3, TREG_ZERO, 1,
+ { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "addib.sn", TILEPRO_OPC_ADDIB_SN, 0x3, 3, TREG_SN, 1,
+ { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "addih", TILEPRO_OPC_ADDIH, 0x3, 3, TREG_ZERO, 1,
+ { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "addih.sn", TILEPRO_OPC_ADDIH_SN, 0x3, 3, TREG_SN, 1,
+ { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "addli", TILEPRO_OPC_ADDLI, 0x3, 3, TREG_ZERO, 1,
+ { { 7, 8, 4 }, { 9, 10, 5 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "addli.sn", TILEPRO_OPC_ADDLI_SN, 0x3, 3, TREG_SN, 1,
+ { { 7, 8, 4 }, { 9, 10, 5 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "addlis", TILEPRO_OPC_ADDLIS, 0x3, 3, TREG_SN, 1,
+ { { 7, 8, 4 }, { 9, 10, 5 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "adds", TILEPRO_OPC_ADDS, 0x3, 3, TREG_ZERO, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "adds.sn", TILEPRO_OPC_ADDS_SN, 0x3, 3, TREG_SN, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "adiffb_u", TILEPRO_OPC_ADIFFB_U, 0x1, 3, TREG_ZERO, 1,
+ { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "adiffb_u.sn", TILEPRO_OPC_ADIFFB_U_SN, 0x1, 3, TREG_SN, 1,
+ { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "adiffh", TILEPRO_OPC_ADIFFH, 0x1, 3, TREG_ZERO, 1,
+ { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "adiffh.sn", TILEPRO_OPC_ADIFFH_SN, 0x1, 3, TREG_SN, 1,
+ { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "and", TILEPRO_OPC_AND, 0xf, 3, TREG_ZERO, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 11, 12, 18 }, { 13, 14, 19 }, { 0, } },
+ },
+ { "and.sn", TILEPRO_OPC_AND_SN, 0x3, 3, TREG_SN, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "andi", TILEPRO_OPC_ANDI, 0xf, 3, TREG_ZERO, 1,
+ { { 7, 8, 0 }, { 9, 10, 1 }, { 11, 12, 2 }, { 13, 14, 3 }, { 0, } },
+ },
+ { "andi.sn", TILEPRO_OPC_ANDI_SN, 0x3, 3, TREG_SN, 1,
+ { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "auli", TILEPRO_OPC_AULI, 0x3, 3, TREG_ZERO, 1,
+ { { 7, 8, 4 }, { 9, 10, 5 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "avgb_u", TILEPRO_OPC_AVGB_U, 0x1, 3, TREG_ZERO, 1,
+ { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "avgb_u.sn", TILEPRO_OPC_AVGB_U_SN, 0x1, 3, TREG_SN, 1,
+ { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "avgh", TILEPRO_OPC_AVGH, 0x1, 3, TREG_ZERO, 1,
+ { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "avgh.sn", TILEPRO_OPC_AVGH_SN, 0x1, 3, TREG_SN, 1,
+ { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "bbns", TILEPRO_OPC_BBNS, 0x2, 2, TREG_ZERO, 1,
+ { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "bbns.sn", TILEPRO_OPC_BBNS_SN, 0x2, 2, TREG_SN, 1,
+ { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "bbnst", TILEPRO_OPC_BBNST, 0x2, 2, TREG_ZERO, 1,
+ { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "bbnst.sn", TILEPRO_OPC_BBNST_SN, 0x2, 2, TREG_SN, 1,
+ { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "bbs", TILEPRO_OPC_BBS, 0x2, 2, TREG_ZERO, 1,
+ { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "bbs.sn", TILEPRO_OPC_BBS_SN, 0x2, 2, TREG_SN, 1,
+ { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "bbst", TILEPRO_OPC_BBST, 0x2, 2, TREG_ZERO, 1,
+ { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "bbst.sn", TILEPRO_OPC_BBST_SN, 0x2, 2, TREG_SN, 1,
+ { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "bgez", TILEPRO_OPC_BGEZ, 0x2, 2, TREG_ZERO, 1,
+ { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "bgez.sn", TILEPRO_OPC_BGEZ_SN, 0x2, 2, TREG_SN, 1,
+ { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "bgezt", TILEPRO_OPC_BGEZT, 0x2, 2, TREG_ZERO, 1,
+ { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "bgezt.sn", TILEPRO_OPC_BGEZT_SN, 0x2, 2, TREG_SN, 1,
+ { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "bgz", TILEPRO_OPC_BGZ, 0x2, 2, TREG_ZERO, 1,
+ { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "bgz.sn", TILEPRO_OPC_BGZ_SN, 0x2, 2, TREG_SN, 1,
+ { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "bgzt", TILEPRO_OPC_BGZT, 0x2, 2, TREG_ZERO, 1,
+ { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "bgzt.sn", TILEPRO_OPC_BGZT_SN, 0x2, 2, TREG_SN, 1,
+ { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "bitx", TILEPRO_OPC_BITX, 0x5, 2, TREG_ZERO, 1,
+ { { 7, 8 }, { 0, }, { 11, 12 }, { 0, }, { 0, } },
+ },
+ { "bitx.sn", TILEPRO_OPC_BITX_SN, 0x1, 2, TREG_SN, 1,
+ { { 7, 8 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "blez", TILEPRO_OPC_BLEZ, 0x2, 2, TREG_ZERO, 1,
+ { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "blez.sn", TILEPRO_OPC_BLEZ_SN, 0x2, 2, TREG_SN, 1,
+ { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "blezt", TILEPRO_OPC_BLEZT, 0x2, 2, TREG_ZERO, 1,
+ { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "blezt.sn", TILEPRO_OPC_BLEZT_SN, 0x2, 2, TREG_SN, 1,
+ { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "blz", TILEPRO_OPC_BLZ, 0x2, 2, TREG_ZERO, 1,
+ { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "blz.sn", TILEPRO_OPC_BLZ_SN, 0x2, 2, TREG_SN, 1,
+ { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "blzt", TILEPRO_OPC_BLZT, 0x2, 2, TREG_ZERO, 1,
+ { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "blzt.sn", TILEPRO_OPC_BLZT_SN, 0x2, 2, TREG_SN, 1,
+ { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "bnz", TILEPRO_OPC_BNZ, 0x2, 2, TREG_ZERO, 1,
+ { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "bnz.sn", TILEPRO_OPC_BNZ_SN, 0x2, 2, TREG_SN, 1,
+ { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "bnzt", TILEPRO_OPC_BNZT, 0x2, 2, TREG_ZERO, 1,
+ { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "bnzt.sn", TILEPRO_OPC_BNZT_SN, 0x2, 2, TREG_SN, 1,
+ { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "bytex", TILEPRO_OPC_BYTEX, 0x5, 2, TREG_ZERO, 1,
+ { { 7, 8 }, { 0, }, { 11, 12 }, { 0, }, { 0, } },
+ },
+ { "bytex.sn", TILEPRO_OPC_BYTEX_SN, 0x1, 2, TREG_SN, 1,
+ { { 7, 8 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "bz", TILEPRO_OPC_BZ, 0x2, 2, TREG_ZERO, 1,
+ { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "bz.sn", TILEPRO_OPC_BZ_SN, 0x2, 2, TREG_SN, 1,
+ { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "bzt", TILEPRO_OPC_BZT, 0x2, 2, TREG_ZERO, 1,
+ { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "bzt.sn", TILEPRO_OPC_BZT_SN, 0x2, 2, TREG_SN, 1,
+ { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "clz", TILEPRO_OPC_CLZ, 0x5, 2, TREG_ZERO, 1,
+ { { 7, 8 }, { 0, }, { 11, 12 }, { 0, }, { 0, } },
+ },
+ { "clz.sn", TILEPRO_OPC_CLZ_SN, 0x1, 2, TREG_SN, 1,
+ { { 7, 8 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "crc32_32", TILEPRO_OPC_CRC32_32, 0x1, 3, TREG_ZERO, 1,
+ { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "crc32_32.sn", TILEPRO_OPC_CRC32_32_SN, 0x1, 3, TREG_SN, 1,
+ { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "crc32_8", TILEPRO_OPC_CRC32_8, 0x1, 3, TREG_ZERO, 1,
+ { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "crc32_8.sn", TILEPRO_OPC_CRC32_8_SN, 0x1, 3, TREG_SN, 1,
+ { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "ctz", TILEPRO_OPC_CTZ, 0x5, 2, TREG_ZERO, 1,
+ { { 7, 8 }, { 0, }, { 11, 12 }, { 0, }, { 0, } },
+ },
+ { "ctz.sn", TILEPRO_OPC_CTZ_SN, 0x1, 2, TREG_SN, 1,
+ { { 7, 8 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "drain", TILEPRO_OPC_DRAIN, 0x2, 0, TREG_ZERO, 0,
+ { { 0, }, { }, { 0, }, { 0, }, { 0, } },
+ },
+ { "dtlbpr", TILEPRO_OPC_DTLBPR, 0x2, 1, TREG_ZERO, 1,
+ { { 0, }, { 10 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "dword_align", TILEPRO_OPC_DWORD_ALIGN, 0x1, 3, TREG_ZERO, 1,
+ { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "dword_align.sn", TILEPRO_OPC_DWORD_ALIGN_SN, 0x1, 3, TREG_SN, 1,
+ { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "finv", TILEPRO_OPC_FINV, 0x2, 1, TREG_ZERO, 1,
+ { { 0, }, { 10 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "flush", TILEPRO_OPC_FLUSH, 0x2, 1, TREG_ZERO, 1,
+ { { 0, }, { 10 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "fnop", TILEPRO_OPC_FNOP, 0xf, 0, TREG_ZERO, 1,
+ { { }, { }, { }, { }, { 0, } },
+ },
+ { "icoh", TILEPRO_OPC_ICOH, 0x2, 1, TREG_ZERO, 1,
+ { { 0, }, { 10 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "ill", TILEPRO_OPC_ILL, 0xa, 0, TREG_ZERO, 1,
+ { { 0, }, { }, { 0, }, { }, { 0, } },
+ },
+ { "inthb", TILEPRO_OPC_INTHB, 0x3, 3, TREG_ZERO, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "inthb.sn", TILEPRO_OPC_INTHB_SN, 0x3, 3, TREG_SN, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "inthh", TILEPRO_OPC_INTHH, 0x3, 3, TREG_ZERO, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "inthh.sn", TILEPRO_OPC_INTHH_SN, 0x3, 3, TREG_SN, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "intlb", TILEPRO_OPC_INTLB, 0x3, 3, TREG_ZERO, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "intlb.sn", TILEPRO_OPC_INTLB_SN, 0x3, 3, TREG_SN, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "intlh", TILEPRO_OPC_INTLH, 0x3, 3, TREG_ZERO, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "intlh.sn", TILEPRO_OPC_INTLH_SN, 0x3, 3, TREG_SN, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "inv", TILEPRO_OPC_INV, 0x2, 1, TREG_ZERO, 1,
+ { { 0, }, { 10 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "iret", TILEPRO_OPC_IRET, 0x2, 0, TREG_ZERO, 1,
+ { { 0, }, { }, { 0, }, { 0, }, { 0, } },
+ },
+ { "jalb", TILEPRO_OPC_JALB, 0x2, 1, TREG_LR, 1,
+ { { 0, }, { 22 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "jalf", TILEPRO_OPC_JALF, 0x2, 1, TREG_LR, 1,
+ { { 0, }, { 22 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "jalr", TILEPRO_OPC_JALR, 0x2, 1, TREG_LR, 1,
+ { { 0, }, { 10 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "jalrp", TILEPRO_OPC_JALRP, 0x2, 1, TREG_LR, 1,
+ { { 0, }, { 10 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "jb", TILEPRO_OPC_JB, 0x2, 1, TREG_ZERO, 1,
+ { { 0, }, { 22 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "jf", TILEPRO_OPC_JF, 0x2, 1, TREG_ZERO, 1,
+ { { 0, }, { 22 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "jr", TILEPRO_OPC_JR, 0x2, 1, TREG_ZERO, 1,
+ { { 0, }, { 10 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "jrp", TILEPRO_OPC_JRP, 0x2, 1, TREG_ZERO, 1,
+ { { 0, }, { 10 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "lb", TILEPRO_OPC_LB, 0x12, 2, TREG_ZERO, 1,
+ { { 0, }, { 9, 10 }, { 0, }, { 0, }, { 23, 15 } },
+ },
+ { "lb.sn", TILEPRO_OPC_LB_SN, 0x2, 2, TREG_SN, 1,
+ { { 0, }, { 9, 10 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "lb_u", TILEPRO_OPC_LB_U, 0x12, 2, TREG_ZERO, 1,
+ { { 0, }, { 9, 10 }, { 0, }, { 0, }, { 23, 15 } },
+ },
+ { "lb_u.sn", TILEPRO_OPC_LB_U_SN, 0x2, 2, TREG_SN, 1,
+ { { 0, }, { 9, 10 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "lbadd", TILEPRO_OPC_LBADD, 0x2, 3, TREG_ZERO, 1,
+ { { 0, }, { 9, 24, 1 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "lbadd.sn", TILEPRO_OPC_LBADD_SN, 0x2, 3, TREG_SN, 1,
+ { { 0, }, { 9, 24, 1 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "lbadd_u", TILEPRO_OPC_LBADD_U, 0x2, 3, TREG_ZERO, 1,
+ { { 0, }, { 9, 24, 1 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "lbadd_u.sn", TILEPRO_OPC_LBADD_U_SN, 0x2, 3, TREG_SN, 1,
+ { { 0, }, { 9, 24, 1 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "lh", TILEPRO_OPC_LH, 0x12, 2, TREG_ZERO, 1,
+ { { 0, }, { 9, 10 }, { 0, }, { 0, }, { 23, 15 } },
+ },
+ { "lh.sn", TILEPRO_OPC_LH_SN, 0x2, 2, TREG_SN, 1,
+ { { 0, }, { 9, 10 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "lh_u", TILEPRO_OPC_LH_U, 0x12, 2, TREG_ZERO, 1,
+ { { 0, }, { 9, 10 }, { 0, }, { 0, }, { 23, 15 } },
+ },
+ { "lh_u.sn", TILEPRO_OPC_LH_U_SN, 0x2, 2, TREG_SN, 1,
+ { { 0, }, { 9, 10 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "lhadd", TILEPRO_OPC_LHADD, 0x2, 3, TREG_ZERO, 1,
+ { { 0, }, { 9, 24, 1 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "lhadd.sn", TILEPRO_OPC_LHADD_SN, 0x2, 3, TREG_SN, 1,
+ { { 0, }, { 9, 24, 1 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "lhadd_u", TILEPRO_OPC_LHADD_U, 0x2, 3, TREG_ZERO, 1,
+ { { 0, }, { 9, 24, 1 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "lhadd_u.sn", TILEPRO_OPC_LHADD_U_SN, 0x2, 3, TREG_SN, 1,
+ { { 0, }, { 9, 24, 1 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "lnk", TILEPRO_OPC_LNK, 0x2, 1, TREG_ZERO, 1,
+ { { 0, }, { 9 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "lnk.sn", TILEPRO_OPC_LNK_SN, 0x2, 1, TREG_SN, 1,
+ { { 0, }, { 9 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "lw", TILEPRO_OPC_LW, 0x12, 2, TREG_ZERO, 1,
+ { { 0, }, { 9, 10 }, { 0, }, { 0, }, { 23, 15 } },
+ },
+ { "lw.sn", TILEPRO_OPC_LW_SN, 0x2, 2, TREG_SN, 1,
+ { { 0, }, { 9, 10 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "lw_na", TILEPRO_OPC_LW_NA, 0x2, 2, TREG_ZERO, 1,
+ { { 0, }, { 9, 10 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "lw_na.sn", TILEPRO_OPC_LW_NA_SN, 0x2, 2, TREG_SN, 1,
+ { { 0, }, { 9, 10 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "lwadd", TILEPRO_OPC_LWADD, 0x2, 3, TREG_ZERO, 1,
+ { { 0, }, { 9, 24, 1 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "lwadd.sn", TILEPRO_OPC_LWADD_SN, 0x2, 3, TREG_SN, 1,
+ { { 0, }, { 9, 24, 1 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "lwadd_na", TILEPRO_OPC_LWADD_NA, 0x2, 3, TREG_ZERO, 1,
+ { { 0, }, { 9, 24, 1 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "lwadd_na.sn", TILEPRO_OPC_LWADD_NA_SN, 0x2, 3, TREG_SN, 1,
+ { { 0, }, { 9, 24, 1 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "maxb_u", TILEPRO_OPC_MAXB_U, 0x3, 3, TREG_ZERO, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "maxb_u.sn", TILEPRO_OPC_MAXB_U_SN, 0x3, 3, TREG_SN, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "maxh", TILEPRO_OPC_MAXH, 0x3, 3, TREG_ZERO, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "maxh.sn", TILEPRO_OPC_MAXH_SN, 0x3, 3, TREG_SN, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "maxib_u", TILEPRO_OPC_MAXIB_U, 0x3, 3, TREG_ZERO, 1,
+ { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "maxib_u.sn", TILEPRO_OPC_MAXIB_U_SN, 0x3, 3, TREG_SN, 1,
+ { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "maxih", TILEPRO_OPC_MAXIH, 0x3, 3, TREG_ZERO, 1,
+ { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "maxih.sn", TILEPRO_OPC_MAXIH_SN, 0x3, 3, TREG_SN, 1,
+ { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "mf", TILEPRO_OPC_MF, 0x2, 0, TREG_ZERO, 1,
+ { { 0, }, { }, { 0, }, { 0, }, { 0, } },
+ },
+ { "mfspr", TILEPRO_OPC_MFSPR, 0x2, 2, TREG_ZERO, 1,
+ { { 0, }, { 9, 25 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "minb_u", TILEPRO_OPC_MINB_U, 0x3, 3, TREG_ZERO, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "minb_u.sn", TILEPRO_OPC_MINB_U_SN, 0x3, 3, TREG_SN, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "minh", TILEPRO_OPC_MINH, 0x3, 3, TREG_ZERO, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "minh.sn", TILEPRO_OPC_MINH_SN, 0x3, 3, TREG_SN, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "minib_u", TILEPRO_OPC_MINIB_U, 0x3, 3, TREG_ZERO, 1,
+ { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "minib_u.sn", TILEPRO_OPC_MINIB_U_SN, 0x3, 3, TREG_SN, 1,
+ { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "minih", TILEPRO_OPC_MINIH, 0x3, 3, TREG_ZERO, 1,
+ { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "minih.sn", TILEPRO_OPC_MINIH_SN, 0x3, 3, TREG_SN, 1,
+ { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "mm", TILEPRO_OPC_MM, 0x3, 5, TREG_ZERO, 1,
+ { { 7, 8, 16, 26, 27 }, { 9, 10, 17, 28, 29 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "mnz", TILEPRO_OPC_MNZ, 0xf, 3, TREG_ZERO, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 11, 12, 18 }, { 13, 14, 19 }, { 0, } },
+ },
+ { "mnz.sn", TILEPRO_OPC_MNZ_SN, 0x3, 3, TREG_SN, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "mnzb", TILEPRO_OPC_MNZB, 0x3, 3, TREG_ZERO, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "mnzb.sn", TILEPRO_OPC_MNZB_SN, 0x3, 3, TREG_SN, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "mnzh", TILEPRO_OPC_MNZH, 0x3, 3, TREG_ZERO, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "mnzh.sn", TILEPRO_OPC_MNZH_SN, 0x3, 3, TREG_SN, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "mtspr", TILEPRO_OPC_MTSPR, 0x2, 2, TREG_ZERO, 1,
+ { { 0, }, { 30, 10 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "mulhh_ss", TILEPRO_OPC_MULHH_SS, 0x5, 3, TREG_ZERO, 1,
+ { { 7, 8, 16 }, { 0, }, { 11, 12, 18 }, { 0, }, { 0, } },
+ },
+ { "mulhh_ss.sn", TILEPRO_OPC_MULHH_SS_SN, 0x1, 3, TREG_SN, 1,
+ { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "mulhh_su", TILEPRO_OPC_MULHH_SU, 0x1, 3, TREG_ZERO, 1,
+ { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "mulhh_su.sn", TILEPRO_OPC_MULHH_SU_SN, 0x1, 3, TREG_SN, 1,
+ { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "mulhh_uu", TILEPRO_OPC_MULHH_UU, 0x5, 3, TREG_ZERO, 1,
+ { { 7, 8, 16 }, { 0, }, { 11, 12, 18 }, { 0, }, { 0, } },
+ },
+ { "mulhh_uu.sn", TILEPRO_OPC_MULHH_UU_SN, 0x1, 3, TREG_SN, 1,
+ { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "mulhha_ss", TILEPRO_OPC_MULHHA_SS, 0x5, 3, TREG_ZERO, 1,
+ { { 21, 8, 16 }, { 0, }, { 31, 12, 18 }, { 0, }, { 0, } },
+ },
+ { "mulhha_ss.sn", TILEPRO_OPC_MULHHA_SS_SN, 0x1, 3, TREG_SN, 1,
+ { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "mulhha_su", TILEPRO_OPC_MULHHA_SU, 0x1, 3, TREG_ZERO, 1,
+ { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "mulhha_su.sn", TILEPRO_OPC_MULHHA_SU_SN, 0x1, 3, TREG_SN, 1,
+ { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "mulhha_uu", TILEPRO_OPC_MULHHA_UU, 0x5, 3, TREG_ZERO, 1,
+ { { 21, 8, 16 }, { 0, }, { 31, 12, 18 }, { 0, }, { 0, } },
+ },
+ { "mulhha_uu.sn", TILEPRO_OPC_MULHHA_UU_SN, 0x1, 3, TREG_SN, 1,
+ { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "mulhhsa_uu", TILEPRO_OPC_MULHHSA_UU, 0x1, 3, TREG_ZERO, 1,
+ { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "mulhhsa_uu.sn", TILEPRO_OPC_MULHHSA_UU_SN, 0x1, 3, TREG_SN, 1,
+ { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "mulhl_ss", TILEPRO_OPC_MULHL_SS, 0x1, 3, TREG_ZERO, 1,
+ { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "mulhl_ss.sn", TILEPRO_OPC_MULHL_SS_SN, 0x1, 3, TREG_SN, 1,
+ { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "mulhl_su", TILEPRO_OPC_MULHL_SU, 0x1, 3, TREG_ZERO, 1,
+ { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "mulhl_su.sn", TILEPRO_OPC_MULHL_SU_SN, 0x1, 3, TREG_SN, 1,
+ { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "mulhl_us", TILEPRO_OPC_MULHL_US, 0x1, 3, TREG_ZERO, 1,
+ { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "mulhl_us.sn", TILEPRO_OPC_MULHL_US_SN, 0x1, 3, TREG_SN, 1,
+ { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "mulhl_uu", TILEPRO_OPC_MULHL_UU, 0x1, 3, TREG_ZERO, 1,
+ { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "mulhl_uu.sn", TILEPRO_OPC_MULHL_UU_SN, 0x1, 3, TREG_SN, 1,
+ { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "mulhla_ss", TILEPRO_OPC_MULHLA_SS, 0x1, 3, TREG_ZERO, 1,
+ { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "mulhla_ss.sn", TILEPRO_OPC_MULHLA_SS_SN, 0x1, 3, TREG_SN, 1,
+ { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "mulhla_su", TILEPRO_OPC_MULHLA_SU, 0x1, 3, TREG_ZERO, 1,
+ { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "mulhla_su.sn", TILEPRO_OPC_MULHLA_SU_SN, 0x1, 3, TREG_SN, 1,
+ { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "mulhla_us", TILEPRO_OPC_MULHLA_US, 0x1, 3, TREG_ZERO, 1,
+ { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "mulhla_us.sn", TILEPRO_OPC_MULHLA_US_SN, 0x1, 3, TREG_SN, 1,
+ { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "mulhla_uu", TILEPRO_OPC_MULHLA_UU, 0x1, 3, TREG_ZERO, 1,
+ { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "mulhla_uu.sn", TILEPRO_OPC_MULHLA_UU_SN, 0x1, 3, TREG_SN, 1,
+ { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "mulhlsa_uu", TILEPRO_OPC_MULHLSA_UU, 0x5, 3, TREG_ZERO, 1,
+ { { 21, 8, 16 }, { 0, }, { 31, 12, 18 }, { 0, }, { 0, } },
+ },
+ { "mulhlsa_uu.sn", TILEPRO_OPC_MULHLSA_UU_SN, 0x1, 3, TREG_SN, 1,
+ { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "mulll_ss", TILEPRO_OPC_MULLL_SS, 0x5, 3, TREG_ZERO, 1,
+ { { 7, 8, 16 }, { 0, }, { 11, 12, 18 }, { 0, }, { 0, } },
+ },
+ { "mulll_ss.sn", TILEPRO_OPC_MULLL_SS_SN, 0x1, 3, TREG_SN, 1,
+ { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "mulll_su", TILEPRO_OPC_MULLL_SU, 0x1, 3, TREG_ZERO, 1,
+ { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "mulll_su.sn", TILEPRO_OPC_MULLL_SU_SN, 0x1, 3, TREG_SN, 1,
+ { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "mulll_uu", TILEPRO_OPC_MULLL_UU, 0x5, 3, TREG_ZERO, 1,
+ { { 7, 8, 16 }, { 0, }, { 11, 12, 18 }, { 0, }, { 0, } },
+ },
+ { "mulll_uu.sn", TILEPRO_OPC_MULLL_UU_SN, 0x1, 3, TREG_SN, 1,
+ { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "mullla_ss", TILEPRO_OPC_MULLLA_SS, 0x5, 3, TREG_ZERO, 1,
+ { { 21, 8, 16 }, { 0, }, { 31, 12, 18 }, { 0, }, { 0, } },
+ },
+ { "mullla_ss.sn", TILEPRO_OPC_MULLLA_SS_SN, 0x1, 3, TREG_SN, 1,
+ { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "mullla_su", TILEPRO_OPC_MULLLA_SU, 0x1, 3, TREG_ZERO, 1,
+ { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "mullla_su.sn", TILEPRO_OPC_MULLLA_SU_SN, 0x1, 3, TREG_SN, 1,
+ { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "mullla_uu", TILEPRO_OPC_MULLLA_UU, 0x5, 3, TREG_ZERO, 1,
+ { { 21, 8, 16 }, { 0, }, { 31, 12, 18 }, { 0, }, { 0, } },
+ },
+ { "mullla_uu.sn", TILEPRO_OPC_MULLLA_UU_SN, 0x1, 3, TREG_SN, 1,
+ { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "mulllsa_uu", TILEPRO_OPC_MULLLSA_UU, 0x1, 3, TREG_ZERO, 1,
+ { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "mulllsa_uu.sn", TILEPRO_OPC_MULLLSA_UU_SN, 0x1, 3, TREG_SN, 1,
+ { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "mvnz", TILEPRO_OPC_MVNZ, 0x5, 3, TREG_ZERO, 1,
+ { { 21, 8, 16 }, { 0, }, { 31, 12, 18 }, { 0, }, { 0, } },
+ },
+ { "mvnz.sn", TILEPRO_OPC_MVNZ_SN, 0x1, 3, TREG_SN, 1,
+ { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "mvz", TILEPRO_OPC_MVZ, 0x5, 3, TREG_ZERO, 1,
+ { { 21, 8, 16 }, { 0, }, { 31, 12, 18 }, { 0, }, { 0, } },
+ },
+ { "mvz.sn", TILEPRO_OPC_MVZ_SN, 0x1, 3, TREG_SN, 1,
+ { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "mz", TILEPRO_OPC_MZ, 0xf, 3, TREG_ZERO, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 11, 12, 18 }, { 13, 14, 19 }, { 0, } },
+ },
+ { "mz.sn", TILEPRO_OPC_MZ_SN, 0x3, 3, TREG_SN, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "mzb", TILEPRO_OPC_MZB, 0x3, 3, TREG_ZERO, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "mzb.sn", TILEPRO_OPC_MZB_SN, 0x3, 3, TREG_SN, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "mzh", TILEPRO_OPC_MZH, 0x3, 3, TREG_ZERO, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "mzh.sn", TILEPRO_OPC_MZH_SN, 0x3, 3, TREG_SN, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "nap", TILEPRO_OPC_NAP, 0x2, 0, TREG_ZERO, 0,
+ { { 0, }, { }, { 0, }, { 0, }, { 0, } },
+ },
+ { "nop", TILEPRO_OPC_NOP, 0xf, 0, TREG_ZERO, 1,
+ { { }, { }, { }, { }, { 0, } },
+ },
+ { "nor", TILEPRO_OPC_NOR, 0xf, 3, TREG_ZERO, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 11, 12, 18 }, { 13, 14, 19 }, { 0, } },
+ },
+ { "nor.sn", TILEPRO_OPC_NOR_SN, 0x3, 3, TREG_SN, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "or", TILEPRO_OPC_OR, 0xf, 3, TREG_ZERO, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 11, 12, 18 }, { 13, 14, 19 }, { 0, } },
+ },
+ { "or.sn", TILEPRO_OPC_OR_SN, 0x3, 3, TREG_SN, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "ori", TILEPRO_OPC_ORI, 0xf, 3, TREG_ZERO, 1,
+ { { 7, 8, 0 }, { 9, 10, 1 }, { 11, 12, 2 }, { 13, 14, 3 }, { 0, } },
+ },
+ { "ori.sn", TILEPRO_OPC_ORI_SN, 0x3, 3, TREG_SN, 1,
+ { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "packbs_u", TILEPRO_OPC_PACKBS_U, 0x3, 3, TREG_ZERO, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "packbs_u.sn", TILEPRO_OPC_PACKBS_U_SN, 0x3, 3, TREG_SN, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "packhb", TILEPRO_OPC_PACKHB, 0x3, 3, TREG_ZERO, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "packhb.sn", TILEPRO_OPC_PACKHB_SN, 0x3, 3, TREG_SN, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "packhs", TILEPRO_OPC_PACKHS, 0x3, 3, TREG_ZERO, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "packhs.sn", TILEPRO_OPC_PACKHS_SN, 0x3, 3, TREG_SN, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "packlb", TILEPRO_OPC_PACKLB, 0x3, 3, TREG_ZERO, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "packlb.sn", TILEPRO_OPC_PACKLB_SN, 0x3, 3, TREG_SN, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "pcnt", TILEPRO_OPC_PCNT, 0x5, 2, TREG_ZERO, 1,
+ { { 7, 8 }, { 0, }, { 11, 12 }, { 0, }, { 0, } },
+ },
+ { "pcnt.sn", TILEPRO_OPC_PCNT_SN, 0x1, 2, TREG_SN, 1,
+ { { 7, 8 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "rl", TILEPRO_OPC_RL, 0xf, 3, TREG_ZERO, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 11, 12, 18 }, { 13, 14, 19 }, { 0, } },
+ },
+ { "rl.sn", TILEPRO_OPC_RL_SN, 0x3, 3, TREG_SN, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "rli", TILEPRO_OPC_RLI, 0xf, 3, TREG_ZERO, 1,
+ { { 7, 8, 32 }, { 9, 10, 33 }, { 11, 12, 34 }, { 13, 14, 35 }, { 0, } },
+ },
+ { "rli.sn", TILEPRO_OPC_RLI_SN, 0x3, 3, TREG_SN, 1,
+ { { 7, 8, 32 }, { 9, 10, 33 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "s1a", TILEPRO_OPC_S1A, 0xf, 3, TREG_ZERO, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 11, 12, 18 }, { 13, 14, 19 }, { 0, } },
+ },
+ { "s1a.sn", TILEPRO_OPC_S1A_SN, 0x3, 3, TREG_SN, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "s2a", TILEPRO_OPC_S2A, 0xf, 3, TREG_ZERO, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 11, 12, 18 }, { 13, 14, 19 }, { 0, } },
+ },
+ { "s2a.sn", TILEPRO_OPC_S2A_SN, 0x3, 3, TREG_SN, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "s3a", TILEPRO_OPC_S3A, 0xf, 3, TREG_ZERO, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 11, 12, 18 }, { 13, 14, 19 }, { 0, } },
+ },
+ { "s3a.sn", TILEPRO_OPC_S3A_SN, 0x3, 3, TREG_SN, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "sadab_u", TILEPRO_OPC_SADAB_U, 0x1, 3, TREG_ZERO, 1,
+ { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "sadab_u.sn", TILEPRO_OPC_SADAB_U_SN, 0x1, 3, TREG_SN, 1,
+ { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "sadah", TILEPRO_OPC_SADAH, 0x1, 3, TREG_ZERO, 1,
+ { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "sadah.sn", TILEPRO_OPC_SADAH_SN, 0x1, 3, TREG_SN, 1,
+ { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "sadah_u", TILEPRO_OPC_SADAH_U, 0x1, 3, TREG_ZERO, 1,
+ { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "sadah_u.sn", TILEPRO_OPC_SADAH_U_SN, 0x1, 3, TREG_SN, 1,
+ { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "sadb_u", TILEPRO_OPC_SADB_U, 0x1, 3, TREG_ZERO, 1,
+ { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "sadb_u.sn", TILEPRO_OPC_SADB_U_SN, 0x1, 3, TREG_SN, 1,
+ { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "sadh", TILEPRO_OPC_SADH, 0x1, 3, TREG_ZERO, 1,
+ { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "sadh.sn", TILEPRO_OPC_SADH_SN, 0x1, 3, TREG_SN, 1,
+ { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "sadh_u", TILEPRO_OPC_SADH_U, 0x1, 3, TREG_ZERO, 1,
+ { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "sadh_u.sn", TILEPRO_OPC_SADH_U_SN, 0x1, 3, TREG_SN, 1,
+ { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "sb", TILEPRO_OPC_SB, 0x12, 2, TREG_ZERO, 1,
+ { { 0, }, { 10, 17 }, { 0, }, { 0, }, { 15, 36 } },
+ },
+ { "sbadd", TILEPRO_OPC_SBADD, 0x2, 3, TREG_ZERO, 1,
+ { { 0, }, { 24, 17, 37 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "seq", TILEPRO_OPC_SEQ, 0xf, 3, TREG_ZERO, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 11, 12, 18 }, { 13, 14, 19 }, { 0, } },
+ },
+ { "seq.sn", TILEPRO_OPC_SEQ_SN, 0x3, 3, TREG_SN, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "seqb", TILEPRO_OPC_SEQB, 0x3, 3, TREG_ZERO, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "seqb.sn", TILEPRO_OPC_SEQB_SN, 0x3, 3, TREG_SN, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "seqh", TILEPRO_OPC_SEQH, 0x3, 3, TREG_ZERO, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "seqh.sn", TILEPRO_OPC_SEQH_SN, 0x3, 3, TREG_SN, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "seqi", TILEPRO_OPC_SEQI, 0xf, 3, TREG_ZERO, 1,
+ { { 7, 8, 0 }, { 9, 10, 1 }, { 11, 12, 2 }, { 13, 14, 3 }, { 0, } },
+ },
+ { "seqi.sn", TILEPRO_OPC_SEQI_SN, 0x3, 3, TREG_SN, 1,
+ { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "seqib", TILEPRO_OPC_SEQIB, 0x3, 3, TREG_ZERO, 1,
+ { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "seqib.sn", TILEPRO_OPC_SEQIB_SN, 0x3, 3, TREG_SN, 1,
+ { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "seqih", TILEPRO_OPC_SEQIH, 0x3, 3, TREG_ZERO, 1,
+ { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "seqih.sn", TILEPRO_OPC_SEQIH_SN, 0x3, 3, TREG_SN, 1,
+ { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "sh", TILEPRO_OPC_SH, 0x12, 2, TREG_ZERO, 1,
+ { { 0, }, { 10, 17 }, { 0, }, { 0, }, { 15, 36 } },
+ },
+ { "shadd", TILEPRO_OPC_SHADD, 0x2, 3, TREG_ZERO, 1,
+ { { 0, }, { 24, 17, 37 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "shl", TILEPRO_OPC_SHL, 0xf, 3, TREG_ZERO, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 11, 12, 18 }, { 13, 14, 19 }, { 0, } },
+ },
+ { "shl.sn", TILEPRO_OPC_SHL_SN, 0x3, 3, TREG_SN, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "shlb", TILEPRO_OPC_SHLB, 0x3, 3, TREG_ZERO, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "shlb.sn", TILEPRO_OPC_SHLB_SN, 0x3, 3, TREG_SN, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "shlh", TILEPRO_OPC_SHLH, 0x3, 3, TREG_ZERO, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "shlh.sn", TILEPRO_OPC_SHLH_SN, 0x3, 3, TREG_SN, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "shli", TILEPRO_OPC_SHLI, 0xf, 3, TREG_ZERO, 1,
+ { { 7, 8, 32 }, { 9, 10, 33 }, { 11, 12, 34 }, { 13, 14, 35 }, { 0, } },
+ },
+ { "shli.sn", TILEPRO_OPC_SHLI_SN, 0x3, 3, TREG_SN, 1,
+ { { 7, 8, 32 }, { 9, 10, 33 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "shlib", TILEPRO_OPC_SHLIB, 0x3, 3, TREG_ZERO, 1,
+ { { 7, 8, 32 }, { 9, 10, 33 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "shlib.sn", TILEPRO_OPC_SHLIB_SN, 0x3, 3, TREG_SN, 1,
+ { { 7, 8, 32 }, { 9, 10, 33 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "shlih", TILEPRO_OPC_SHLIH, 0x3, 3, TREG_ZERO, 1,
+ { { 7, 8, 32 }, { 9, 10, 33 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "shlih.sn", TILEPRO_OPC_SHLIH_SN, 0x3, 3, TREG_SN, 1,
+ { { 7, 8, 32 }, { 9, 10, 33 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "shr", TILEPRO_OPC_SHR, 0xf, 3, TREG_ZERO, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 11, 12, 18 }, { 13, 14, 19 }, { 0, } },
+ },
+ { "shr.sn", TILEPRO_OPC_SHR_SN, 0x3, 3, TREG_SN, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "shrb", TILEPRO_OPC_SHRB, 0x3, 3, TREG_ZERO, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "shrb.sn", TILEPRO_OPC_SHRB_SN, 0x3, 3, TREG_SN, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "shrh", TILEPRO_OPC_SHRH, 0x3, 3, TREG_ZERO, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "shrh.sn", TILEPRO_OPC_SHRH_SN, 0x3, 3, TREG_SN, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "shri", TILEPRO_OPC_SHRI, 0xf, 3, TREG_ZERO, 1,
+ { { 7, 8, 32 }, { 9, 10, 33 }, { 11, 12, 34 }, { 13, 14, 35 }, { 0, } },
+ },
+ { "shri.sn", TILEPRO_OPC_SHRI_SN, 0x3, 3, TREG_SN, 1,
+ { { 7, 8, 32 }, { 9, 10, 33 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "shrib", TILEPRO_OPC_SHRIB, 0x3, 3, TREG_ZERO, 1,
+ { { 7, 8, 32 }, { 9, 10, 33 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "shrib.sn", TILEPRO_OPC_SHRIB_SN, 0x3, 3, TREG_SN, 1,
+ { { 7, 8, 32 }, { 9, 10, 33 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "shrih", TILEPRO_OPC_SHRIH, 0x3, 3, TREG_ZERO, 1,
+ { { 7, 8, 32 }, { 9, 10, 33 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "shrih.sn", TILEPRO_OPC_SHRIH_SN, 0x3, 3, TREG_SN, 1,
+ { { 7, 8, 32 }, { 9, 10, 33 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "slt", TILEPRO_OPC_SLT, 0xf, 3, TREG_ZERO, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 11, 12, 18 }, { 13, 14, 19 }, { 0, } },
+ },
+ { "slt.sn", TILEPRO_OPC_SLT_SN, 0x3, 3, TREG_SN, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "slt_u", TILEPRO_OPC_SLT_U, 0xf, 3, TREG_ZERO, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 11, 12, 18 }, { 13, 14, 19 }, { 0, } },
+ },
+ { "slt_u.sn", TILEPRO_OPC_SLT_U_SN, 0x3, 3, TREG_SN, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "sltb", TILEPRO_OPC_SLTB, 0x3, 3, TREG_ZERO, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "sltb.sn", TILEPRO_OPC_SLTB_SN, 0x3, 3, TREG_SN, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "sltb_u", TILEPRO_OPC_SLTB_U, 0x3, 3, TREG_ZERO, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "sltb_u.sn", TILEPRO_OPC_SLTB_U_SN, 0x3, 3, TREG_SN, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "slte", TILEPRO_OPC_SLTE, 0xf, 3, TREG_ZERO, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 11, 12, 18 }, { 13, 14, 19 }, { 0, } },
+ },
+ { "slte.sn", TILEPRO_OPC_SLTE_SN, 0x3, 3, TREG_SN, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "slte_u", TILEPRO_OPC_SLTE_U, 0xf, 3, TREG_ZERO, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 11, 12, 18 }, { 13, 14, 19 }, { 0, } },
+ },
+ { "slte_u.sn", TILEPRO_OPC_SLTE_U_SN, 0x3, 3, TREG_SN, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "slteb", TILEPRO_OPC_SLTEB, 0x3, 3, TREG_ZERO, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "slteb.sn", TILEPRO_OPC_SLTEB_SN, 0x3, 3, TREG_SN, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "slteb_u", TILEPRO_OPC_SLTEB_U, 0x3, 3, TREG_ZERO, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "slteb_u.sn", TILEPRO_OPC_SLTEB_U_SN, 0x3, 3, TREG_SN, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "slteh", TILEPRO_OPC_SLTEH, 0x3, 3, TREG_ZERO, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "slteh.sn", TILEPRO_OPC_SLTEH_SN, 0x3, 3, TREG_SN, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "slteh_u", TILEPRO_OPC_SLTEH_U, 0x3, 3, TREG_ZERO, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "slteh_u.sn", TILEPRO_OPC_SLTEH_U_SN, 0x3, 3, TREG_SN, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "slth", TILEPRO_OPC_SLTH, 0x3, 3, TREG_ZERO, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "slth.sn", TILEPRO_OPC_SLTH_SN, 0x3, 3, TREG_SN, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "slth_u", TILEPRO_OPC_SLTH_U, 0x3, 3, TREG_ZERO, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "slth_u.sn", TILEPRO_OPC_SLTH_U_SN, 0x3, 3, TREG_SN, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "slti", TILEPRO_OPC_SLTI, 0xf, 3, TREG_ZERO, 1,
+ { { 7, 8, 0 }, { 9, 10, 1 }, { 11, 12, 2 }, { 13, 14, 3 }, { 0, } },
+ },
+ { "slti.sn", TILEPRO_OPC_SLTI_SN, 0x3, 3, TREG_SN, 1,
+ { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "slti_u", TILEPRO_OPC_SLTI_U, 0xf, 3, TREG_ZERO, 1,
+ { { 7, 8, 0 }, { 9, 10, 1 }, { 11, 12, 2 }, { 13, 14, 3 }, { 0, } },
+ },
+ { "slti_u.sn", TILEPRO_OPC_SLTI_U_SN, 0x3, 3, TREG_SN, 1,
+ { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "sltib", TILEPRO_OPC_SLTIB, 0x3, 3, TREG_ZERO, 1,
+ { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "sltib.sn", TILEPRO_OPC_SLTIB_SN, 0x3, 3, TREG_SN, 1,
+ { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "sltib_u", TILEPRO_OPC_SLTIB_U, 0x3, 3, TREG_ZERO, 1,
+ { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "sltib_u.sn", TILEPRO_OPC_SLTIB_U_SN, 0x3, 3, TREG_SN, 1,
+ { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "sltih", TILEPRO_OPC_SLTIH, 0x3, 3, TREG_ZERO, 1,
+ { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "sltih.sn", TILEPRO_OPC_SLTIH_SN, 0x3, 3, TREG_SN, 1,
+ { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "sltih_u", TILEPRO_OPC_SLTIH_U, 0x3, 3, TREG_ZERO, 1,
+ { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "sltih_u.sn", TILEPRO_OPC_SLTIH_U_SN, 0x3, 3, TREG_SN, 1,
+ { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "sne", TILEPRO_OPC_SNE, 0xf, 3, TREG_ZERO, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 11, 12, 18 }, { 13, 14, 19 }, { 0, } },
+ },
+ { "sne.sn", TILEPRO_OPC_SNE_SN, 0x3, 3, TREG_SN, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "sneb", TILEPRO_OPC_SNEB, 0x3, 3, TREG_ZERO, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "sneb.sn", TILEPRO_OPC_SNEB_SN, 0x3, 3, TREG_SN, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "sneh", TILEPRO_OPC_SNEH, 0x3, 3, TREG_ZERO, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "sneh.sn", TILEPRO_OPC_SNEH_SN, 0x3, 3, TREG_SN, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "sra", TILEPRO_OPC_SRA, 0xf, 3, TREG_ZERO, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 11, 12, 18 }, { 13, 14, 19 }, { 0, } },
+ },
+ { "sra.sn", TILEPRO_OPC_SRA_SN, 0x3, 3, TREG_SN, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "srab", TILEPRO_OPC_SRAB, 0x3, 3, TREG_ZERO, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "srab.sn", TILEPRO_OPC_SRAB_SN, 0x3, 3, TREG_SN, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "srah", TILEPRO_OPC_SRAH, 0x3, 3, TREG_ZERO, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "srah.sn", TILEPRO_OPC_SRAH_SN, 0x3, 3, TREG_SN, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "srai", TILEPRO_OPC_SRAI, 0xf, 3, TREG_ZERO, 1,
+ { { 7, 8, 32 }, { 9, 10, 33 }, { 11, 12, 34 }, { 13, 14, 35 }, { 0, } },
+ },
+ { "srai.sn", TILEPRO_OPC_SRAI_SN, 0x3, 3, TREG_SN, 1,
+ { { 7, 8, 32 }, { 9, 10, 33 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "sraib", TILEPRO_OPC_SRAIB, 0x3, 3, TREG_ZERO, 1,
+ { { 7, 8, 32 }, { 9, 10, 33 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "sraib.sn", TILEPRO_OPC_SRAIB_SN, 0x3, 3, TREG_SN, 1,
+ { { 7, 8, 32 }, { 9, 10, 33 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "sraih", TILEPRO_OPC_SRAIH, 0x3, 3, TREG_ZERO, 1,
+ { { 7, 8, 32 }, { 9, 10, 33 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "sraih.sn", TILEPRO_OPC_SRAIH_SN, 0x3, 3, TREG_SN, 1,
+ { { 7, 8, 32 }, { 9, 10, 33 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "sub", TILEPRO_OPC_SUB, 0xf, 3, TREG_ZERO, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 11, 12, 18 }, { 13, 14, 19 }, { 0, } },
+ },
+ { "sub.sn", TILEPRO_OPC_SUB_SN, 0x3, 3, TREG_SN, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "subb", TILEPRO_OPC_SUBB, 0x3, 3, TREG_ZERO, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "subb.sn", TILEPRO_OPC_SUBB_SN, 0x3, 3, TREG_SN, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "subbs_u", TILEPRO_OPC_SUBBS_U, 0x3, 3, TREG_ZERO, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "subbs_u.sn", TILEPRO_OPC_SUBBS_U_SN, 0x3, 3, TREG_SN, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "subh", TILEPRO_OPC_SUBH, 0x3, 3, TREG_ZERO, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "subh.sn", TILEPRO_OPC_SUBH_SN, 0x3, 3, TREG_SN, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "subhs", TILEPRO_OPC_SUBHS, 0x3, 3, TREG_ZERO, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "subhs.sn", TILEPRO_OPC_SUBHS_SN, 0x3, 3, TREG_SN, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "subs", TILEPRO_OPC_SUBS, 0x3, 3, TREG_ZERO, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "subs.sn", TILEPRO_OPC_SUBS_SN, 0x3, 3, TREG_SN, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "sw", TILEPRO_OPC_SW, 0x12, 2, TREG_ZERO, 1,
+ { { 0, }, { 10, 17 }, { 0, }, { 0, }, { 15, 36 } },
+ },
+ { "swadd", TILEPRO_OPC_SWADD, 0x2, 3, TREG_ZERO, 1,
+ { { 0, }, { 24, 17, 37 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "swint0", TILEPRO_OPC_SWINT0, 0x2, 0, TREG_ZERO, 0,
+ { { 0, }, { }, { 0, }, { 0, }, { 0, } },
+ },
+ { "swint1", TILEPRO_OPC_SWINT1, 0x2, 0, TREG_ZERO, 0,
+ { { 0, }, { }, { 0, }, { 0, }, { 0, } },
+ },
+ { "swint2", TILEPRO_OPC_SWINT2, 0x2, 0, TREG_ZERO, 0,
+ { { 0, }, { }, { 0, }, { 0, }, { 0, } },
+ },
+ { "swint3", TILEPRO_OPC_SWINT3, 0x2, 0, TREG_ZERO, 0,
+ { { 0, }, { }, { 0, }, { 0, }, { 0, } },
+ },
+ { "tblidxb0", TILEPRO_OPC_TBLIDXB0, 0x5, 2, TREG_ZERO, 1,
+ { { 21, 8 }, { 0, }, { 31, 12 }, { 0, }, { 0, } },
+ },
+ { "tblidxb0.sn", TILEPRO_OPC_TBLIDXB0_SN, 0x1, 2, TREG_SN, 1,
+ { { 21, 8 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "tblidxb1", TILEPRO_OPC_TBLIDXB1, 0x5, 2, TREG_ZERO, 1,
+ { { 21, 8 }, { 0, }, { 31, 12 }, { 0, }, { 0, } },
+ },
+ { "tblidxb1.sn", TILEPRO_OPC_TBLIDXB1_SN, 0x1, 2, TREG_SN, 1,
+ { { 21, 8 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "tblidxb2", TILEPRO_OPC_TBLIDXB2, 0x5, 2, TREG_ZERO, 1,
+ { { 21, 8 }, { 0, }, { 31, 12 }, { 0, }, { 0, } },
+ },
+ { "tblidxb2.sn", TILEPRO_OPC_TBLIDXB2_SN, 0x1, 2, TREG_SN, 1,
+ { { 21, 8 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "tblidxb3", TILEPRO_OPC_TBLIDXB3, 0x5, 2, TREG_ZERO, 1,
+ { { 21, 8 }, { 0, }, { 31, 12 }, { 0, }, { 0, } },
+ },
+ { "tblidxb3.sn", TILEPRO_OPC_TBLIDXB3_SN, 0x1, 2, TREG_SN, 1,
+ { { 21, 8 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "tns", TILEPRO_OPC_TNS, 0x2, 2, TREG_ZERO, 1,
+ { { 0, }, { 9, 10 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "tns.sn", TILEPRO_OPC_TNS_SN, 0x2, 2, TREG_SN, 1,
+ { { 0, }, { 9, 10 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "wh64", TILEPRO_OPC_WH64, 0x2, 1, TREG_ZERO, 1,
+ { { 0, }, { 10 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "xor", TILEPRO_OPC_XOR, 0xf, 3, TREG_ZERO, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 11, 12, 18 }, { 13, 14, 19 }, { 0, } },
+ },
+ { "xor.sn", TILEPRO_OPC_XOR_SN, 0x3, 3, TREG_SN, 1,
+ { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "xori", TILEPRO_OPC_XORI, 0x3, 3, TREG_ZERO, 1,
+ { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "xori.sn", TILEPRO_OPC_XORI_SN, 0x3, 3, TREG_SN, 1,
+ { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } },
+ },
+ { NULL, TILEPRO_OPC_NONE, 0, 0, TREG_ZERO, 0, { { 0, } },
+ }
+};
+#define BITFIELD(start, size) ((start) | (((1 << (size)) - 1) << 6))
+#define CHILD(array_index) (TILEPRO_OPC_NONE + (array_index))
+
+static const unsigned short decode_X0_fsm[1153] =
+{
+ BITFIELD(22, 9) /* index 0 */,
+ CHILD(513), CHILD(530), CHILD(547), CHILD(564), CHILD(596), CHILD(613),
+ CHILD(630), TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, CHILD(663), CHILD(680), CHILD(697),
+ CHILD(714), CHILD(746), CHILD(763), CHILD(780), TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813),
+ CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813),
+ CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813),
+ CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813),
+ CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813),
+ CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813),
+ CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813),
+ CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813),
+ CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813),
+ CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813),
+ CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(828), CHILD(828),
+ CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828),
+ CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828),
+ CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828),
+ CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828),
+ CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828),
+ CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828),
+ CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828),
+ CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828),
+ CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828),
+ CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828),
+ CHILD(828), CHILD(828), CHILD(843), CHILD(843), CHILD(843), CHILD(843),
+ CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843),
+ CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843),
+ CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843),
+ CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843),
+ CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843),
+ CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843),
+ CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843),
+ CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843),
+ CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843),
+ CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843),
+ CHILD(873), CHILD(878), CHILD(883), CHILD(903), CHILD(908),
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, CHILD(913),
+ CHILD(918), CHILD(923), CHILD(943), CHILD(948), TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, CHILD(953), TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, CHILD(988), TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_MM, TILEPRO_OPC_MM,
+ TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM,
+ TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM,
+ TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM,
+ TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM,
+ TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM,
+ TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM,
+ TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM,
+ TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM,
+ TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM,
+ TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM,
+ TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM,
+ TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM,
+ TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM,
+ TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM,
+ TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM,
+ TILEPRO_OPC_MM, TILEPRO_OPC_MM, CHILD(993), TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, CHILD(1076), TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ BITFIELD(18, 4) /* index 513 */,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_ADDB, TILEPRO_OPC_ADDH, TILEPRO_OPC_ADD,
+ TILEPRO_OPC_ADIFFB_U, TILEPRO_OPC_ADIFFH, TILEPRO_OPC_AND,
+ TILEPRO_OPC_AVGB_U, TILEPRO_OPC_AVGH, TILEPRO_OPC_CRC32_32,
+ TILEPRO_OPC_CRC32_8, TILEPRO_OPC_INTHB, TILEPRO_OPC_INTHH,
+ TILEPRO_OPC_INTLB, TILEPRO_OPC_INTLH, TILEPRO_OPC_MAXB_U,
+ BITFIELD(18, 4) /* index 530 */,
+ TILEPRO_OPC_MAXH, TILEPRO_OPC_MINB_U, TILEPRO_OPC_MINH, TILEPRO_OPC_MNZB,
+ TILEPRO_OPC_MNZH, TILEPRO_OPC_MNZ, TILEPRO_OPC_MULHHA_SS,
+ TILEPRO_OPC_MULHHA_SU, TILEPRO_OPC_MULHHA_UU, TILEPRO_OPC_MULHHSA_UU,
+ TILEPRO_OPC_MULHH_SS, TILEPRO_OPC_MULHH_SU, TILEPRO_OPC_MULHH_UU,
+ TILEPRO_OPC_MULHLA_SS, TILEPRO_OPC_MULHLA_SU, TILEPRO_OPC_MULHLA_US,
+ BITFIELD(18, 4) /* index 547 */,
+ TILEPRO_OPC_MULHLA_UU, TILEPRO_OPC_MULHLSA_UU, TILEPRO_OPC_MULHL_SS,
+ TILEPRO_OPC_MULHL_SU, TILEPRO_OPC_MULHL_US, TILEPRO_OPC_MULHL_UU,
+ TILEPRO_OPC_MULLLA_SS, TILEPRO_OPC_MULLLA_SU, TILEPRO_OPC_MULLLA_UU,
+ TILEPRO_OPC_MULLLSA_UU, TILEPRO_OPC_MULLL_SS, TILEPRO_OPC_MULLL_SU,
+ TILEPRO_OPC_MULLL_UU, TILEPRO_OPC_MVNZ, TILEPRO_OPC_MVZ, TILEPRO_OPC_MZB,
+ BITFIELD(18, 4) /* index 564 */,
+ TILEPRO_OPC_MZH, TILEPRO_OPC_MZ, TILEPRO_OPC_NOR, CHILD(581),
+ TILEPRO_OPC_PACKHB, TILEPRO_OPC_PACKLB, TILEPRO_OPC_RL, TILEPRO_OPC_S1A,
+ TILEPRO_OPC_S2A, TILEPRO_OPC_S3A, TILEPRO_OPC_SADAB_U, TILEPRO_OPC_SADAH,
+ TILEPRO_OPC_SADAH_U, TILEPRO_OPC_SADB_U, TILEPRO_OPC_SADH,
+ TILEPRO_OPC_SADH_U,
+ BITFIELD(12, 2) /* index 581 */,
+ TILEPRO_OPC_OR, TILEPRO_OPC_OR, TILEPRO_OPC_OR, CHILD(586),
+ BITFIELD(14, 2) /* index 586 */,
+ TILEPRO_OPC_OR, TILEPRO_OPC_OR, TILEPRO_OPC_OR, CHILD(591),
+ BITFIELD(16, 2) /* index 591 */,
+ TILEPRO_OPC_OR, TILEPRO_OPC_OR, TILEPRO_OPC_OR, TILEPRO_OPC_MOVE,
+ BITFIELD(18, 4) /* index 596 */,
+ TILEPRO_OPC_SEQB, TILEPRO_OPC_SEQH, TILEPRO_OPC_SEQ, TILEPRO_OPC_SHLB,
+ TILEPRO_OPC_SHLH, TILEPRO_OPC_SHL, TILEPRO_OPC_SHRB, TILEPRO_OPC_SHRH,
+ TILEPRO_OPC_SHR, TILEPRO_OPC_SLTB, TILEPRO_OPC_SLTB_U, TILEPRO_OPC_SLTEB,
+ TILEPRO_OPC_SLTEB_U, TILEPRO_OPC_SLTEH, TILEPRO_OPC_SLTEH_U,
+ TILEPRO_OPC_SLTE,
+ BITFIELD(18, 4) /* index 613 */,
+ TILEPRO_OPC_SLTE_U, TILEPRO_OPC_SLTH, TILEPRO_OPC_SLTH_U, TILEPRO_OPC_SLT,
+ TILEPRO_OPC_SLT_U, TILEPRO_OPC_SNEB, TILEPRO_OPC_SNEH, TILEPRO_OPC_SNE,
+ TILEPRO_OPC_SRAB, TILEPRO_OPC_SRAH, TILEPRO_OPC_SRA, TILEPRO_OPC_SUBB,
+ TILEPRO_OPC_SUBH, TILEPRO_OPC_SUB, TILEPRO_OPC_XOR, TILEPRO_OPC_DWORD_ALIGN,
+ BITFIELD(18, 3) /* index 630 */,
+ CHILD(639), CHILD(642), CHILD(645), CHILD(648), CHILD(651), CHILD(654),
+ CHILD(657), CHILD(660),
+ BITFIELD(21, 1) /* index 639 */,
+ TILEPRO_OPC_ADDS, TILEPRO_OPC_NONE,
+ BITFIELD(21, 1) /* index 642 */,
+ TILEPRO_OPC_SUBS, TILEPRO_OPC_NONE,
+ BITFIELD(21, 1) /* index 645 */,
+ TILEPRO_OPC_ADDBS_U, TILEPRO_OPC_NONE,
+ BITFIELD(21, 1) /* index 648 */,
+ TILEPRO_OPC_ADDHS, TILEPRO_OPC_NONE,
+ BITFIELD(21, 1) /* index 651 */,
+ TILEPRO_OPC_SUBBS_U, TILEPRO_OPC_NONE,
+ BITFIELD(21, 1) /* index 654 */,
+ TILEPRO_OPC_SUBHS, TILEPRO_OPC_NONE,
+ BITFIELD(21, 1) /* index 657 */,
+ TILEPRO_OPC_PACKHS, TILEPRO_OPC_NONE,
+ BITFIELD(21, 1) /* index 660 */,
+ TILEPRO_OPC_PACKBS_U, TILEPRO_OPC_NONE,
+ BITFIELD(18, 4) /* index 663 */,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_ADDB_SN, TILEPRO_OPC_ADDH_SN,
+ TILEPRO_OPC_ADD_SN, TILEPRO_OPC_ADIFFB_U_SN, TILEPRO_OPC_ADIFFH_SN,
+ TILEPRO_OPC_AND_SN, TILEPRO_OPC_AVGB_U_SN, TILEPRO_OPC_AVGH_SN,
+ TILEPRO_OPC_CRC32_32_SN, TILEPRO_OPC_CRC32_8_SN, TILEPRO_OPC_INTHB_SN,
+ TILEPRO_OPC_INTHH_SN, TILEPRO_OPC_INTLB_SN, TILEPRO_OPC_INTLH_SN,
+ TILEPRO_OPC_MAXB_U_SN,
+ BITFIELD(18, 4) /* index 680 */,
+ TILEPRO_OPC_MAXH_SN, TILEPRO_OPC_MINB_U_SN, TILEPRO_OPC_MINH_SN,
+ TILEPRO_OPC_MNZB_SN, TILEPRO_OPC_MNZH_SN, TILEPRO_OPC_MNZ_SN,
+ TILEPRO_OPC_MULHHA_SS_SN, TILEPRO_OPC_MULHHA_SU_SN,
+ TILEPRO_OPC_MULHHA_UU_SN, TILEPRO_OPC_MULHHSA_UU_SN,
+ TILEPRO_OPC_MULHH_SS_SN, TILEPRO_OPC_MULHH_SU_SN, TILEPRO_OPC_MULHH_UU_SN,
+ TILEPRO_OPC_MULHLA_SS_SN, TILEPRO_OPC_MULHLA_SU_SN,
+ TILEPRO_OPC_MULHLA_US_SN,
+ BITFIELD(18, 4) /* index 697 */,
+ TILEPRO_OPC_MULHLA_UU_SN, TILEPRO_OPC_MULHLSA_UU_SN,
+ TILEPRO_OPC_MULHL_SS_SN, TILEPRO_OPC_MULHL_SU_SN, TILEPRO_OPC_MULHL_US_SN,
+ TILEPRO_OPC_MULHL_UU_SN, TILEPRO_OPC_MULLLA_SS_SN, TILEPRO_OPC_MULLLA_SU_SN,
+ TILEPRO_OPC_MULLLA_UU_SN, TILEPRO_OPC_MULLLSA_UU_SN,
+ TILEPRO_OPC_MULLL_SS_SN, TILEPRO_OPC_MULLL_SU_SN, TILEPRO_OPC_MULLL_UU_SN,
+ TILEPRO_OPC_MVNZ_SN, TILEPRO_OPC_MVZ_SN, TILEPRO_OPC_MZB_SN,
+ BITFIELD(18, 4) /* index 714 */,
+ TILEPRO_OPC_MZH_SN, TILEPRO_OPC_MZ_SN, TILEPRO_OPC_NOR_SN, CHILD(731),
+ TILEPRO_OPC_PACKHB_SN, TILEPRO_OPC_PACKLB_SN, TILEPRO_OPC_RL_SN,
+ TILEPRO_OPC_S1A_SN, TILEPRO_OPC_S2A_SN, TILEPRO_OPC_S3A_SN,
+ TILEPRO_OPC_SADAB_U_SN, TILEPRO_OPC_SADAH_SN, TILEPRO_OPC_SADAH_U_SN,
+ TILEPRO_OPC_SADB_U_SN, TILEPRO_OPC_SADH_SN, TILEPRO_OPC_SADH_U_SN,
+ BITFIELD(12, 2) /* index 731 */,
+ TILEPRO_OPC_OR_SN, TILEPRO_OPC_OR_SN, TILEPRO_OPC_OR_SN, CHILD(736),
+ BITFIELD(14, 2) /* index 736 */,
+ TILEPRO_OPC_OR_SN, TILEPRO_OPC_OR_SN, TILEPRO_OPC_OR_SN, CHILD(741),
+ BITFIELD(16, 2) /* index 741 */,
+ TILEPRO_OPC_OR_SN, TILEPRO_OPC_OR_SN, TILEPRO_OPC_OR_SN,
+ TILEPRO_OPC_MOVE_SN,
+ BITFIELD(18, 4) /* index 746 */,
+ TILEPRO_OPC_SEQB_SN, TILEPRO_OPC_SEQH_SN, TILEPRO_OPC_SEQ_SN,
+ TILEPRO_OPC_SHLB_SN, TILEPRO_OPC_SHLH_SN, TILEPRO_OPC_SHL_SN,
+ TILEPRO_OPC_SHRB_SN, TILEPRO_OPC_SHRH_SN, TILEPRO_OPC_SHR_SN,
+ TILEPRO_OPC_SLTB_SN, TILEPRO_OPC_SLTB_U_SN, TILEPRO_OPC_SLTEB_SN,
+ TILEPRO_OPC_SLTEB_U_SN, TILEPRO_OPC_SLTEH_SN, TILEPRO_OPC_SLTEH_U_SN,
+ TILEPRO_OPC_SLTE_SN,
+ BITFIELD(18, 4) /* index 763 */,
+ TILEPRO_OPC_SLTE_U_SN, TILEPRO_OPC_SLTH_SN, TILEPRO_OPC_SLTH_U_SN,
+ TILEPRO_OPC_SLT_SN, TILEPRO_OPC_SLT_U_SN, TILEPRO_OPC_SNEB_SN,
+ TILEPRO_OPC_SNEH_SN, TILEPRO_OPC_SNE_SN, TILEPRO_OPC_SRAB_SN,
+ TILEPRO_OPC_SRAH_SN, TILEPRO_OPC_SRA_SN, TILEPRO_OPC_SUBB_SN,
+ TILEPRO_OPC_SUBH_SN, TILEPRO_OPC_SUB_SN, TILEPRO_OPC_XOR_SN,
+ TILEPRO_OPC_DWORD_ALIGN_SN,
+ BITFIELD(18, 3) /* index 780 */,
+ CHILD(789), CHILD(792), CHILD(795), CHILD(798), CHILD(801), CHILD(804),
+ CHILD(807), CHILD(810),
+ BITFIELD(21, 1) /* index 789 */,
+ TILEPRO_OPC_ADDS_SN, TILEPRO_OPC_NONE,
+ BITFIELD(21, 1) /* index 792 */,
+ TILEPRO_OPC_SUBS_SN, TILEPRO_OPC_NONE,
+ BITFIELD(21, 1) /* index 795 */,
+ TILEPRO_OPC_ADDBS_U_SN, TILEPRO_OPC_NONE,
+ BITFIELD(21, 1) /* index 798 */,
+ TILEPRO_OPC_ADDHS_SN, TILEPRO_OPC_NONE,
+ BITFIELD(21, 1) /* index 801 */,
+ TILEPRO_OPC_SUBBS_U_SN, TILEPRO_OPC_NONE,
+ BITFIELD(21, 1) /* index 804 */,
+ TILEPRO_OPC_SUBHS_SN, TILEPRO_OPC_NONE,
+ BITFIELD(21, 1) /* index 807 */,
+ TILEPRO_OPC_PACKHS_SN, TILEPRO_OPC_NONE,
+ BITFIELD(21, 1) /* index 810 */,
+ TILEPRO_OPC_PACKBS_U_SN, TILEPRO_OPC_NONE,
+ BITFIELD(6, 2) /* index 813 */,
+ TILEPRO_OPC_ADDLI_SN, TILEPRO_OPC_ADDLI_SN, TILEPRO_OPC_ADDLI_SN,
+ CHILD(818),
+ BITFIELD(8, 2) /* index 818 */,
+ TILEPRO_OPC_ADDLI_SN, TILEPRO_OPC_ADDLI_SN, TILEPRO_OPC_ADDLI_SN,
+ CHILD(823),
+ BITFIELD(10, 2) /* index 823 */,
+ TILEPRO_OPC_ADDLI_SN, TILEPRO_OPC_ADDLI_SN, TILEPRO_OPC_ADDLI_SN,
+ TILEPRO_OPC_MOVELI_SN,
+ BITFIELD(6, 2) /* index 828 */,
+ TILEPRO_OPC_ADDLI, TILEPRO_OPC_ADDLI, TILEPRO_OPC_ADDLI, CHILD(833),
+ BITFIELD(8, 2) /* index 833 */,
+ TILEPRO_OPC_ADDLI, TILEPRO_OPC_ADDLI, TILEPRO_OPC_ADDLI, CHILD(838),
+ BITFIELD(10, 2) /* index 838 */,
+ TILEPRO_OPC_ADDLI, TILEPRO_OPC_ADDLI, TILEPRO_OPC_ADDLI, TILEPRO_OPC_MOVELI,
+ BITFIELD(0, 2) /* index 843 */,
+ TILEPRO_OPC_AULI, TILEPRO_OPC_AULI, TILEPRO_OPC_AULI, CHILD(848),
+ BITFIELD(2, 2) /* index 848 */,
+ TILEPRO_OPC_AULI, TILEPRO_OPC_AULI, TILEPRO_OPC_AULI, CHILD(853),
+ BITFIELD(4, 2) /* index 853 */,
+ TILEPRO_OPC_AULI, TILEPRO_OPC_AULI, TILEPRO_OPC_AULI, CHILD(858),
+ BITFIELD(6, 2) /* index 858 */,
+ TILEPRO_OPC_AULI, TILEPRO_OPC_AULI, TILEPRO_OPC_AULI, CHILD(863),
+ BITFIELD(8, 2) /* index 863 */,
+ TILEPRO_OPC_AULI, TILEPRO_OPC_AULI, TILEPRO_OPC_AULI, CHILD(868),
+ BITFIELD(10, 2) /* index 868 */,
+ TILEPRO_OPC_AULI, TILEPRO_OPC_AULI, TILEPRO_OPC_AULI, TILEPRO_OPC_INFOL,
+ BITFIELD(20, 2) /* index 873 */,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_ADDIB, TILEPRO_OPC_ADDIH, TILEPRO_OPC_ADDI,
+ BITFIELD(20, 2) /* index 878 */,
+ TILEPRO_OPC_MAXIB_U, TILEPRO_OPC_MAXIH, TILEPRO_OPC_MINIB_U,
+ TILEPRO_OPC_MINIH,
+ BITFIELD(20, 2) /* index 883 */,
+ CHILD(888), TILEPRO_OPC_SEQIB, TILEPRO_OPC_SEQIH, TILEPRO_OPC_SEQI,
+ BITFIELD(6, 2) /* index 888 */,
+ TILEPRO_OPC_ORI, TILEPRO_OPC_ORI, TILEPRO_OPC_ORI, CHILD(893),
+ BITFIELD(8, 2) /* index 893 */,
+ TILEPRO_OPC_ORI, TILEPRO_OPC_ORI, TILEPRO_OPC_ORI, CHILD(898),
+ BITFIELD(10, 2) /* index 898 */,
+ TILEPRO_OPC_ORI, TILEPRO_OPC_ORI, TILEPRO_OPC_ORI, TILEPRO_OPC_MOVEI,
+ BITFIELD(20, 2) /* index 903 */,
+ TILEPRO_OPC_SLTIB, TILEPRO_OPC_SLTIB_U, TILEPRO_OPC_SLTIH,
+ TILEPRO_OPC_SLTIH_U,
+ BITFIELD(20, 2) /* index 908 */,
+ TILEPRO_OPC_SLTI, TILEPRO_OPC_SLTI_U, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ BITFIELD(20, 2) /* index 913 */,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_ADDIB_SN, TILEPRO_OPC_ADDIH_SN,
+ TILEPRO_OPC_ADDI_SN,
+ BITFIELD(20, 2) /* index 918 */,
+ TILEPRO_OPC_MAXIB_U_SN, TILEPRO_OPC_MAXIH_SN, TILEPRO_OPC_MINIB_U_SN,
+ TILEPRO_OPC_MINIH_SN,
+ BITFIELD(20, 2) /* index 923 */,
+ CHILD(928), TILEPRO_OPC_SEQIB_SN, TILEPRO_OPC_SEQIH_SN, TILEPRO_OPC_SEQI_SN,
+ BITFIELD(6, 2) /* index 928 */,
+ TILEPRO_OPC_ORI_SN, TILEPRO_OPC_ORI_SN, TILEPRO_OPC_ORI_SN, CHILD(933),
+ BITFIELD(8, 2) /* index 933 */,
+ TILEPRO_OPC_ORI_SN, TILEPRO_OPC_ORI_SN, TILEPRO_OPC_ORI_SN, CHILD(938),
+ BITFIELD(10, 2) /* index 938 */,
+ TILEPRO_OPC_ORI_SN, TILEPRO_OPC_ORI_SN, TILEPRO_OPC_ORI_SN,
+ TILEPRO_OPC_MOVEI_SN,
+ BITFIELD(20, 2) /* index 943 */,
+ TILEPRO_OPC_SLTIB_SN, TILEPRO_OPC_SLTIB_U_SN, TILEPRO_OPC_SLTIH_SN,
+ TILEPRO_OPC_SLTIH_U_SN,
+ BITFIELD(20, 2) /* index 948 */,
+ TILEPRO_OPC_SLTI_SN, TILEPRO_OPC_SLTI_U_SN, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE,
+ BITFIELD(20, 2) /* index 953 */,
+ TILEPRO_OPC_NONE, CHILD(958), TILEPRO_OPC_XORI, TILEPRO_OPC_NONE,
+ BITFIELD(0, 2) /* index 958 */,
+ TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, CHILD(963),
+ BITFIELD(2, 2) /* index 963 */,
+ TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, CHILD(968),
+ BITFIELD(4, 2) /* index 968 */,
+ TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, CHILD(973),
+ BITFIELD(6, 2) /* index 973 */,
+ TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, CHILD(978),
+ BITFIELD(8, 2) /* index 978 */,
+ TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, CHILD(983),
+ BITFIELD(10, 2) /* index 983 */,
+ TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_INFO,
+ BITFIELD(20, 2) /* index 988 */,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_ANDI_SN, TILEPRO_OPC_XORI_SN,
+ TILEPRO_OPC_NONE,
+ BITFIELD(17, 5) /* index 993 */,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_RLI, TILEPRO_OPC_SHLIB, TILEPRO_OPC_SHLIH,
+ TILEPRO_OPC_SHLI, TILEPRO_OPC_SHRIB, TILEPRO_OPC_SHRIH, TILEPRO_OPC_SHRI,
+ TILEPRO_OPC_SRAIB, TILEPRO_OPC_SRAIH, TILEPRO_OPC_SRAI, CHILD(1026),
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ BITFIELD(12, 4) /* index 1026 */,
+ TILEPRO_OPC_NONE, CHILD(1043), CHILD(1046), CHILD(1049), CHILD(1052),
+ CHILD(1055), CHILD(1058), CHILD(1061), CHILD(1064), CHILD(1067),
+ CHILD(1070), CHILD(1073), TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ BITFIELD(16, 1) /* index 1043 */,
+ TILEPRO_OPC_BITX, TILEPRO_OPC_NONE,
+ BITFIELD(16, 1) /* index 1046 */,
+ TILEPRO_OPC_BYTEX, TILEPRO_OPC_NONE,
+ BITFIELD(16, 1) /* index 1049 */,
+ TILEPRO_OPC_CLZ, TILEPRO_OPC_NONE,
+ BITFIELD(16, 1) /* index 1052 */,
+ TILEPRO_OPC_CTZ, TILEPRO_OPC_NONE,
+ BITFIELD(16, 1) /* index 1055 */,
+ TILEPRO_OPC_FNOP, TILEPRO_OPC_NONE,
+ BITFIELD(16, 1) /* index 1058 */,
+ TILEPRO_OPC_NOP, TILEPRO_OPC_NONE,
+ BITFIELD(16, 1) /* index 1061 */,
+ TILEPRO_OPC_PCNT, TILEPRO_OPC_NONE,
+ BITFIELD(16, 1) /* index 1064 */,
+ TILEPRO_OPC_TBLIDXB0, TILEPRO_OPC_NONE,
+ BITFIELD(16, 1) /* index 1067 */,
+ TILEPRO_OPC_TBLIDXB1, TILEPRO_OPC_NONE,
+ BITFIELD(16, 1) /* index 1070 */,
+ TILEPRO_OPC_TBLIDXB2, TILEPRO_OPC_NONE,
+ BITFIELD(16, 1) /* index 1073 */,
+ TILEPRO_OPC_TBLIDXB3, TILEPRO_OPC_NONE,
+ BITFIELD(17, 5) /* index 1076 */,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_RLI_SN, TILEPRO_OPC_SHLIB_SN,
+ TILEPRO_OPC_SHLIH_SN, TILEPRO_OPC_SHLI_SN, TILEPRO_OPC_SHRIB_SN,
+ TILEPRO_OPC_SHRIH_SN, TILEPRO_OPC_SHRI_SN, TILEPRO_OPC_SRAIB_SN,
+ TILEPRO_OPC_SRAIH_SN, TILEPRO_OPC_SRAI_SN, CHILD(1109), TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ BITFIELD(12, 4) /* index 1109 */,
+ TILEPRO_OPC_NONE, CHILD(1126), CHILD(1129), CHILD(1132), CHILD(1135),
+ CHILD(1055), CHILD(1058), CHILD(1138), CHILD(1141), CHILD(1144),
+ CHILD(1147), CHILD(1150), TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ BITFIELD(16, 1) /* index 1126 */,
+ TILEPRO_OPC_BITX_SN, TILEPRO_OPC_NONE,
+ BITFIELD(16, 1) /* index 1129 */,
+ TILEPRO_OPC_BYTEX_SN, TILEPRO_OPC_NONE,
+ BITFIELD(16, 1) /* index 1132 */,
+ TILEPRO_OPC_CLZ_SN, TILEPRO_OPC_NONE,
+ BITFIELD(16, 1) /* index 1135 */,
+ TILEPRO_OPC_CTZ_SN, TILEPRO_OPC_NONE,
+ BITFIELD(16, 1) /* index 1138 */,
+ TILEPRO_OPC_PCNT_SN, TILEPRO_OPC_NONE,
+ BITFIELD(16, 1) /* index 1141 */,
+ TILEPRO_OPC_TBLIDXB0_SN, TILEPRO_OPC_NONE,
+ BITFIELD(16, 1) /* index 1144 */,
+ TILEPRO_OPC_TBLIDXB1_SN, TILEPRO_OPC_NONE,
+ BITFIELD(16, 1) /* index 1147 */,
+ TILEPRO_OPC_TBLIDXB2_SN, TILEPRO_OPC_NONE,
+ BITFIELD(16, 1) /* index 1150 */,
+ TILEPRO_OPC_TBLIDXB3_SN, TILEPRO_OPC_NONE,
+};
+
+static const unsigned short decode_X1_fsm[1540] =
+{
+ BITFIELD(54, 9) /* index 0 */,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ CHILD(513), CHILD(561), CHILD(594), TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, CHILD(641),
+ CHILD(689), CHILD(722), TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, CHILD(766),
+ CHILD(766), CHILD(766), CHILD(766), CHILD(766), CHILD(766), CHILD(766),
+ CHILD(766), CHILD(766), CHILD(766), CHILD(766), CHILD(766), CHILD(766),
+ CHILD(766), CHILD(766), CHILD(766), CHILD(766), CHILD(766), CHILD(766),
+ CHILD(766), CHILD(766), CHILD(766), CHILD(766), CHILD(766), CHILD(766),
+ CHILD(766), CHILD(766), CHILD(766), CHILD(766), CHILD(766), CHILD(766),
+ CHILD(766), CHILD(781), CHILD(781), CHILD(781), CHILD(781), CHILD(781),
+ CHILD(781), CHILD(781), CHILD(781), CHILD(781), CHILD(781), CHILD(781),
+ CHILD(781), CHILD(781), CHILD(781), CHILD(781), CHILD(781), CHILD(781),
+ CHILD(781), CHILD(781), CHILD(781), CHILD(781), CHILD(781), CHILD(781),
+ CHILD(781), CHILD(781), CHILD(781), CHILD(781), CHILD(781), CHILD(781),
+ CHILD(781), CHILD(781), CHILD(781), CHILD(796), CHILD(796), CHILD(796),
+ CHILD(796), CHILD(796), CHILD(796), CHILD(796), CHILD(796), CHILD(796),
+ CHILD(796), CHILD(796), CHILD(796), CHILD(796), CHILD(796), CHILD(796),
+ CHILD(796), CHILD(796), CHILD(796), CHILD(796), CHILD(796), CHILD(796),
+ CHILD(796), CHILD(796), CHILD(796), CHILD(796), CHILD(796), CHILD(796),
+ CHILD(796), CHILD(796), CHILD(796), CHILD(796), CHILD(796), CHILD(826),
+ CHILD(826), CHILD(826), CHILD(826), CHILD(826), CHILD(826), CHILD(826),
+ CHILD(826), CHILD(826), CHILD(826), CHILD(826), CHILD(826), CHILD(826),
+ CHILD(826), CHILD(826), CHILD(826), CHILD(843), CHILD(843), CHILD(843),
+ CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843),
+ CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843),
+ CHILD(843), CHILD(860), CHILD(899), CHILD(923), CHILD(932),
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ CHILD(941), CHILD(950), CHILD(974), CHILD(983), TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_MM,
+ TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM,
+ TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM,
+ TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM,
+ TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM,
+ TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM,
+ TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM,
+ TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM,
+ TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, CHILD(992),
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, CHILD(1334),
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_J,
+ TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J,
+ TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J,
+ TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J,
+ TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J,
+ TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J,
+ TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J,
+ TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J,
+ TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J,
+ TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J,
+ TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J,
+ TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J,
+ TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J,
+ TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_JAL,
+ TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL,
+ TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL,
+ TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL,
+ TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL,
+ TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL,
+ TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL,
+ TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL,
+ TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL,
+ TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL,
+ TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL,
+ TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL,
+ TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL,
+ TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL,
+ TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL,
+ TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL,
+ TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ BITFIELD(49, 5) /* index 513 */,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_ADDB, TILEPRO_OPC_ADDH, TILEPRO_OPC_ADD,
+ TILEPRO_OPC_AND, TILEPRO_OPC_INTHB, TILEPRO_OPC_INTHH, TILEPRO_OPC_INTLB,
+ TILEPRO_OPC_INTLH, TILEPRO_OPC_JALRP, TILEPRO_OPC_JALR, TILEPRO_OPC_JRP,
+ TILEPRO_OPC_JR, TILEPRO_OPC_LNK, TILEPRO_OPC_MAXB_U, TILEPRO_OPC_MAXH,
+ TILEPRO_OPC_MINB_U, TILEPRO_OPC_MINH, TILEPRO_OPC_MNZB, TILEPRO_OPC_MNZH,
+ TILEPRO_OPC_MNZ, TILEPRO_OPC_MZB, TILEPRO_OPC_MZH, TILEPRO_OPC_MZ,
+ TILEPRO_OPC_NOR, CHILD(546), TILEPRO_OPC_PACKHB, TILEPRO_OPC_PACKLB,
+ TILEPRO_OPC_RL, TILEPRO_OPC_S1A, TILEPRO_OPC_S2A, TILEPRO_OPC_S3A,
+ BITFIELD(43, 2) /* index 546 */,
+ TILEPRO_OPC_OR, TILEPRO_OPC_OR, TILEPRO_OPC_OR, CHILD(551),
+ BITFIELD(45, 2) /* index 551 */,
+ TILEPRO_OPC_OR, TILEPRO_OPC_OR, TILEPRO_OPC_OR, CHILD(556),
+ BITFIELD(47, 2) /* index 556 */,
+ TILEPRO_OPC_OR, TILEPRO_OPC_OR, TILEPRO_OPC_OR, TILEPRO_OPC_MOVE,
+ BITFIELD(49, 5) /* index 561 */,
+ TILEPRO_OPC_SB, TILEPRO_OPC_SEQB, TILEPRO_OPC_SEQH, TILEPRO_OPC_SEQ,
+ TILEPRO_OPC_SHLB, TILEPRO_OPC_SHLH, TILEPRO_OPC_SHL, TILEPRO_OPC_SHRB,
+ TILEPRO_OPC_SHRH, TILEPRO_OPC_SHR, TILEPRO_OPC_SH, TILEPRO_OPC_SLTB,
+ TILEPRO_OPC_SLTB_U, TILEPRO_OPC_SLTEB, TILEPRO_OPC_SLTEB_U,
+ TILEPRO_OPC_SLTEH, TILEPRO_OPC_SLTEH_U, TILEPRO_OPC_SLTE,
+ TILEPRO_OPC_SLTE_U, TILEPRO_OPC_SLTH, TILEPRO_OPC_SLTH_U, TILEPRO_OPC_SLT,
+ TILEPRO_OPC_SLT_U, TILEPRO_OPC_SNEB, TILEPRO_OPC_SNEH, TILEPRO_OPC_SNE,
+ TILEPRO_OPC_SRAB, TILEPRO_OPC_SRAH, TILEPRO_OPC_SRA, TILEPRO_OPC_SUBB,
+ TILEPRO_OPC_SUBH, TILEPRO_OPC_SUB,
+ BITFIELD(49, 4) /* index 594 */,
+ CHILD(611), CHILD(614), CHILD(617), CHILD(620), CHILD(623), CHILD(626),
+ CHILD(629), CHILD(632), CHILD(635), CHILD(638), TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE,
+ BITFIELD(53, 1) /* index 611 */,
+ TILEPRO_OPC_SW, TILEPRO_OPC_NONE,
+ BITFIELD(53, 1) /* index 614 */,
+ TILEPRO_OPC_XOR, TILEPRO_OPC_NONE,
+ BITFIELD(53, 1) /* index 617 */,
+ TILEPRO_OPC_ADDS, TILEPRO_OPC_NONE,
+ BITFIELD(53, 1) /* index 620 */,
+ TILEPRO_OPC_SUBS, TILEPRO_OPC_NONE,
+ BITFIELD(53, 1) /* index 623 */,
+ TILEPRO_OPC_ADDBS_U, TILEPRO_OPC_NONE,
+ BITFIELD(53, 1) /* index 626 */,
+ TILEPRO_OPC_ADDHS, TILEPRO_OPC_NONE,
+ BITFIELD(53, 1) /* index 629 */,
+ TILEPRO_OPC_SUBBS_U, TILEPRO_OPC_NONE,
+ BITFIELD(53, 1) /* index 632 */,
+ TILEPRO_OPC_SUBHS, TILEPRO_OPC_NONE,
+ BITFIELD(53, 1) /* index 635 */,
+ TILEPRO_OPC_PACKHS, TILEPRO_OPC_NONE,
+ BITFIELD(53, 1) /* index 638 */,
+ TILEPRO_OPC_PACKBS_U, TILEPRO_OPC_NONE,
+ BITFIELD(49, 5) /* index 641 */,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_ADDB_SN, TILEPRO_OPC_ADDH_SN,
+ TILEPRO_OPC_ADD_SN, TILEPRO_OPC_AND_SN, TILEPRO_OPC_INTHB_SN,
+ TILEPRO_OPC_INTHH_SN, TILEPRO_OPC_INTLB_SN, TILEPRO_OPC_INTLH_SN,
+ TILEPRO_OPC_JALRP, TILEPRO_OPC_JALR, TILEPRO_OPC_JRP, TILEPRO_OPC_JR,
+ TILEPRO_OPC_LNK_SN, TILEPRO_OPC_MAXB_U_SN, TILEPRO_OPC_MAXH_SN,
+ TILEPRO_OPC_MINB_U_SN, TILEPRO_OPC_MINH_SN, TILEPRO_OPC_MNZB_SN,
+ TILEPRO_OPC_MNZH_SN, TILEPRO_OPC_MNZ_SN, TILEPRO_OPC_MZB_SN,
+ TILEPRO_OPC_MZH_SN, TILEPRO_OPC_MZ_SN, TILEPRO_OPC_NOR_SN, CHILD(674),
+ TILEPRO_OPC_PACKHB_SN, TILEPRO_OPC_PACKLB_SN, TILEPRO_OPC_RL_SN,
+ TILEPRO_OPC_S1A_SN, TILEPRO_OPC_S2A_SN, TILEPRO_OPC_S3A_SN,
+ BITFIELD(43, 2) /* index 674 */,
+ TILEPRO_OPC_OR_SN, TILEPRO_OPC_OR_SN, TILEPRO_OPC_OR_SN, CHILD(679),
+ BITFIELD(45, 2) /* index 679 */,
+ TILEPRO_OPC_OR_SN, TILEPRO_OPC_OR_SN, TILEPRO_OPC_OR_SN, CHILD(684),
+ BITFIELD(47, 2) /* index 684 */,
+ TILEPRO_OPC_OR_SN, TILEPRO_OPC_OR_SN, TILEPRO_OPC_OR_SN,
+ TILEPRO_OPC_MOVE_SN,
+ BITFIELD(49, 5) /* index 689 */,
+ TILEPRO_OPC_SB, TILEPRO_OPC_SEQB_SN, TILEPRO_OPC_SEQH_SN,
+ TILEPRO_OPC_SEQ_SN, TILEPRO_OPC_SHLB_SN, TILEPRO_OPC_SHLH_SN,
+ TILEPRO_OPC_SHL_SN, TILEPRO_OPC_SHRB_SN, TILEPRO_OPC_SHRH_SN,
+ TILEPRO_OPC_SHR_SN, TILEPRO_OPC_SH, TILEPRO_OPC_SLTB_SN,
+ TILEPRO_OPC_SLTB_U_SN, TILEPRO_OPC_SLTEB_SN, TILEPRO_OPC_SLTEB_U_SN,
+ TILEPRO_OPC_SLTEH_SN, TILEPRO_OPC_SLTEH_U_SN, TILEPRO_OPC_SLTE_SN,
+ TILEPRO_OPC_SLTE_U_SN, TILEPRO_OPC_SLTH_SN, TILEPRO_OPC_SLTH_U_SN,
+ TILEPRO_OPC_SLT_SN, TILEPRO_OPC_SLT_U_SN, TILEPRO_OPC_SNEB_SN,
+ TILEPRO_OPC_SNEH_SN, TILEPRO_OPC_SNE_SN, TILEPRO_OPC_SRAB_SN,
+ TILEPRO_OPC_SRAH_SN, TILEPRO_OPC_SRA_SN, TILEPRO_OPC_SUBB_SN,
+ TILEPRO_OPC_SUBH_SN, TILEPRO_OPC_SUB_SN,
+ BITFIELD(49, 4) /* index 722 */,
+ CHILD(611), CHILD(739), CHILD(742), CHILD(745), CHILD(748), CHILD(751),
+ CHILD(754), CHILD(757), CHILD(760), CHILD(763), TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE,
+ BITFIELD(53, 1) /* index 739 */,
+ TILEPRO_OPC_XOR_SN, TILEPRO_OPC_NONE,
+ BITFIELD(53, 1) /* index 742 */,
+ TILEPRO_OPC_ADDS_SN, TILEPRO_OPC_NONE,
+ BITFIELD(53, 1) /* index 745 */,
+ TILEPRO_OPC_SUBS_SN, TILEPRO_OPC_NONE,
+ BITFIELD(53, 1) /* index 748 */,
+ TILEPRO_OPC_ADDBS_U_SN, TILEPRO_OPC_NONE,
+ BITFIELD(53, 1) /* index 751 */,
+ TILEPRO_OPC_ADDHS_SN, TILEPRO_OPC_NONE,
+ BITFIELD(53, 1) /* index 754 */,
+ TILEPRO_OPC_SUBBS_U_SN, TILEPRO_OPC_NONE,
+ BITFIELD(53, 1) /* index 757 */,
+ TILEPRO_OPC_SUBHS_SN, TILEPRO_OPC_NONE,
+ BITFIELD(53, 1) /* index 760 */,
+ TILEPRO_OPC_PACKHS_SN, TILEPRO_OPC_NONE,
+ BITFIELD(53, 1) /* index 763 */,
+ TILEPRO_OPC_PACKBS_U_SN, TILEPRO_OPC_NONE,
+ BITFIELD(37, 2) /* index 766 */,
+ TILEPRO_OPC_ADDLI_SN, TILEPRO_OPC_ADDLI_SN, TILEPRO_OPC_ADDLI_SN,
+ CHILD(771),
+ BITFIELD(39, 2) /* index 771 */,
+ TILEPRO_OPC_ADDLI_SN, TILEPRO_OPC_ADDLI_SN, TILEPRO_OPC_ADDLI_SN,
+ CHILD(776),
+ BITFIELD(41, 2) /* index 776 */,
+ TILEPRO_OPC_ADDLI_SN, TILEPRO_OPC_ADDLI_SN, TILEPRO_OPC_ADDLI_SN,
+ TILEPRO_OPC_MOVELI_SN,
+ BITFIELD(37, 2) /* index 781 */,
+ TILEPRO_OPC_ADDLI, TILEPRO_OPC_ADDLI, TILEPRO_OPC_ADDLI, CHILD(786),
+ BITFIELD(39, 2) /* index 786 */,
+ TILEPRO_OPC_ADDLI, TILEPRO_OPC_ADDLI, TILEPRO_OPC_ADDLI, CHILD(791),
+ BITFIELD(41, 2) /* index 791 */,
+ TILEPRO_OPC_ADDLI, TILEPRO_OPC_ADDLI, TILEPRO_OPC_ADDLI, TILEPRO_OPC_MOVELI,
+ BITFIELD(31, 2) /* index 796 */,
+ TILEPRO_OPC_AULI, TILEPRO_OPC_AULI, TILEPRO_OPC_AULI, CHILD(801),
+ BITFIELD(33, 2) /* index 801 */,
+ TILEPRO_OPC_AULI, TILEPRO_OPC_AULI, TILEPRO_OPC_AULI, CHILD(806),
+ BITFIELD(35, 2) /* index 806 */,
+ TILEPRO_OPC_AULI, TILEPRO_OPC_AULI, TILEPRO_OPC_AULI, CHILD(811),
+ BITFIELD(37, 2) /* index 811 */,
+ TILEPRO_OPC_AULI, TILEPRO_OPC_AULI, TILEPRO_OPC_AULI, CHILD(816),
+ BITFIELD(39, 2) /* index 816 */,
+ TILEPRO_OPC_AULI, TILEPRO_OPC_AULI, TILEPRO_OPC_AULI, CHILD(821),
+ BITFIELD(41, 2) /* index 821 */,
+ TILEPRO_OPC_AULI, TILEPRO_OPC_AULI, TILEPRO_OPC_AULI, TILEPRO_OPC_INFOL,
+ BITFIELD(31, 4) /* index 826 */,
+ TILEPRO_OPC_BZ, TILEPRO_OPC_BZT, TILEPRO_OPC_BNZ, TILEPRO_OPC_BNZT,
+ TILEPRO_OPC_BGZ, TILEPRO_OPC_BGZT, TILEPRO_OPC_BGEZ, TILEPRO_OPC_BGEZT,
+ TILEPRO_OPC_BLZ, TILEPRO_OPC_BLZT, TILEPRO_OPC_BLEZ, TILEPRO_OPC_BLEZT,
+ TILEPRO_OPC_BBS, TILEPRO_OPC_BBST, TILEPRO_OPC_BBNS, TILEPRO_OPC_BBNST,
+ BITFIELD(31, 4) /* index 843 */,
+ TILEPRO_OPC_BZ_SN, TILEPRO_OPC_BZT_SN, TILEPRO_OPC_BNZ_SN,
+ TILEPRO_OPC_BNZT_SN, TILEPRO_OPC_BGZ_SN, TILEPRO_OPC_BGZT_SN,
+ TILEPRO_OPC_BGEZ_SN, TILEPRO_OPC_BGEZT_SN, TILEPRO_OPC_BLZ_SN,
+ TILEPRO_OPC_BLZT_SN, TILEPRO_OPC_BLEZ_SN, TILEPRO_OPC_BLEZT_SN,
+ TILEPRO_OPC_BBS_SN, TILEPRO_OPC_BBST_SN, TILEPRO_OPC_BBNS_SN,
+ TILEPRO_OPC_BBNST_SN,
+ BITFIELD(51, 3) /* index 860 */,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_ADDIB, TILEPRO_OPC_ADDIH, TILEPRO_OPC_ADDI,
+ CHILD(869), TILEPRO_OPC_MAXIB_U, TILEPRO_OPC_MAXIH, TILEPRO_OPC_MFSPR,
+ BITFIELD(31, 2) /* index 869 */,
+ TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, CHILD(874),
+ BITFIELD(33, 2) /* index 874 */,
+ TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, CHILD(879),
+ BITFIELD(35, 2) /* index 879 */,
+ TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, CHILD(884),
+ BITFIELD(37, 2) /* index 884 */,
+ TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, CHILD(889),
+ BITFIELD(39, 2) /* index 889 */,
+ TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, CHILD(894),
+ BITFIELD(41, 2) /* index 894 */,
+ TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_INFO,
+ BITFIELD(51, 3) /* index 899 */,
+ TILEPRO_OPC_MINIB_U, TILEPRO_OPC_MINIH, TILEPRO_OPC_MTSPR, CHILD(908),
+ TILEPRO_OPC_SEQIB, TILEPRO_OPC_SEQIH, TILEPRO_OPC_SEQI, TILEPRO_OPC_SLTIB,
+ BITFIELD(37, 2) /* index 908 */,
+ TILEPRO_OPC_ORI, TILEPRO_OPC_ORI, TILEPRO_OPC_ORI, CHILD(913),
+ BITFIELD(39, 2) /* index 913 */,
+ TILEPRO_OPC_ORI, TILEPRO_OPC_ORI, TILEPRO_OPC_ORI, CHILD(918),
+ BITFIELD(41, 2) /* index 918 */,
+ TILEPRO_OPC_ORI, TILEPRO_OPC_ORI, TILEPRO_OPC_ORI, TILEPRO_OPC_MOVEI,
+ BITFIELD(51, 3) /* index 923 */,
+ TILEPRO_OPC_SLTIB_U, TILEPRO_OPC_SLTIH, TILEPRO_OPC_SLTIH_U,
+ TILEPRO_OPC_SLTI, TILEPRO_OPC_SLTI_U, TILEPRO_OPC_XORI, TILEPRO_OPC_LBADD,
+ TILEPRO_OPC_LBADD_U,
+ BITFIELD(51, 3) /* index 932 */,
+ TILEPRO_OPC_LHADD, TILEPRO_OPC_LHADD_U, TILEPRO_OPC_LWADD,
+ TILEPRO_OPC_LWADD_NA, TILEPRO_OPC_SBADD, TILEPRO_OPC_SHADD,
+ TILEPRO_OPC_SWADD, TILEPRO_OPC_NONE,
+ BITFIELD(51, 3) /* index 941 */,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_ADDIB_SN, TILEPRO_OPC_ADDIH_SN,
+ TILEPRO_OPC_ADDI_SN, TILEPRO_OPC_ANDI_SN, TILEPRO_OPC_MAXIB_U_SN,
+ TILEPRO_OPC_MAXIH_SN, TILEPRO_OPC_MFSPR,
+ BITFIELD(51, 3) /* index 950 */,
+ TILEPRO_OPC_MINIB_U_SN, TILEPRO_OPC_MINIH_SN, TILEPRO_OPC_MTSPR, CHILD(959),
+ TILEPRO_OPC_SEQIB_SN, TILEPRO_OPC_SEQIH_SN, TILEPRO_OPC_SEQI_SN,
+ TILEPRO_OPC_SLTIB_SN,
+ BITFIELD(37, 2) /* index 959 */,
+ TILEPRO_OPC_ORI_SN, TILEPRO_OPC_ORI_SN, TILEPRO_OPC_ORI_SN, CHILD(964),
+ BITFIELD(39, 2) /* index 964 */,
+ TILEPRO_OPC_ORI_SN, TILEPRO_OPC_ORI_SN, TILEPRO_OPC_ORI_SN, CHILD(969),
+ BITFIELD(41, 2) /* index 969 */,
+ TILEPRO_OPC_ORI_SN, TILEPRO_OPC_ORI_SN, TILEPRO_OPC_ORI_SN,
+ TILEPRO_OPC_MOVEI_SN,
+ BITFIELD(51, 3) /* index 974 */,
+ TILEPRO_OPC_SLTIB_U_SN, TILEPRO_OPC_SLTIH_SN, TILEPRO_OPC_SLTIH_U_SN,
+ TILEPRO_OPC_SLTI_SN, TILEPRO_OPC_SLTI_U_SN, TILEPRO_OPC_XORI_SN,
+ TILEPRO_OPC_LBADD_SN, TILEPRO_OPC_LBADD_U_SN,
+ BITFIELD(51, 3) /* index 983 */,
+ TILEPRO_OPC_LHADD_SN, TILEPRO_OPC_LHADD_U_SN, TILEPRO_OPC_LWADD_SN,
+ TILEPRO_OPC_LWADD_NA_SN, TILEPRO_OPC_SBADD, TILEPRO_OPC_SHADD,
+ TILEPRO_OPC_SWADD, TILEPRO_OPC_NONE,
+ BITFIELD(46, 7) /* index 992 */,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ CHILD(1121), CHILD(1121), CHILD(1121), CHILD(1121), CHILD(1124),
+ CHILD(1124), CHILD(1124), CHILD(1124), CHILD(1127), CHILD(1127),
+ CHILD(1127), CHILD(1127), CHILD(1130), CHILD(1130), CHILD(1130),
+ CHILD(1130), CHILD(1133), CHILD(1133), CHILD(1133), CHILD(1133),
+ CHILD(1136), CHILD(1136), CHILD(1136), CHILD(1136), CHILD(1139),
+ CHILD(1139), CHILD(1139), CHILD(1139), CHILD(1142), CHILD(1142),
+ CHILD(1142), CHILD(1142), CHILD(1145), CHILD(1145), CHILD(1145),
+ CHILD(1145), CHILD(1148), CHILD(1148), CHILD(1148), CHILD(1148),
+ CHILD(1151), CHILD(1242), CHILD(1290), CHILD(1323), TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ BITFIELD(53, 1) /* index 1121 */,
+ TILEPRO_OPC_RLI, TILEPRO_OPC_NONE,
+ BITFIELD(53, 1) /* index 1124 */,
+ TILEPRO_OPC_SHLIB, TILEPRO_OPC_NONE,
+ BITFIELD(53, 1) /* index 1127 */,
+ TILEPRO_OPC_SHLIH, TILEPRO_OPC_NONE,
+ BITFIELD(53, 1) /* index 1130 */,
+ TILEPRO_OPC_SHLI, TILEPRO_OPC_NONE,
+ BITFIELD(53, 1) /* index 1133 */,
+ TILEPRO_OPC_SHRIB, TILEPRO_OPC_NONE,
+ BITFIELD(53, 1) /* index 1136 */,
+ TILEPRO_OPC_SHRIH, TILEPRO_OPC_NONE,
+ BITFIELD(53, 1) /* index 1139 */,
+ TILEPRO_OPC_SHRI, TILEPRO_OPC_NONE,
+ BITFIELD(53, 1) /* index 1142 */,
+ TILEPRO_OPC_SRAIB, TILEPRO_OPC_NONE,
+ BITFIELD(53, 1) /* index 1145 */,
+ TILEPRO_OPC_SRAIH, TILEPRO_OPC_NONE,
+ BITFIELD(53, 1) /* index 1148 */,
+ TILEPRO_OPC_SRAI, TILEPRO_OPC_NONE,
+ BITFIELD(43, 3) /* index 1151 */,
+ TILEPRO_OPC_NONE, CHILD(1160), CHILD(1163), CHILD(1166), CHILD(1169),
+ CHILD(1172), CHILD(1175), CHILD(1178),
+ BITFIELD(53, 1) /* index 1160 */,
+ TILEPRO_OPC_DRAIN, TILEPRO_OPC_NONE,
+ BITFIELD(53, 1) /* index 1163 */,
+ TILEPRO_OPC_DTLBPR, TILEPRO_OPC_NONE,
+ BITFIELD(53, 1) /* index 1166 */,
+ TILEPRO_OPC_FINV, TILEPRO_OPC_NONE,
+ BITFIELD(53, 1) /* index 1169 */,
+ TILEPRO_OPC_FLUSH, TILEPRO_OPC_NONE,
+ BITFIELD(53, 1) /* index 1172 */,
+ TILEPRO_OPC_FNOP, TILEPRO_OPC_NONE,
+ BITFIELD(53, 1) /* index 1175 */,
+ TILEPRO_OPC_ICOH, TILEPRO_OPC_NONE,
+ BITFIELD(31, 2) /* index 1178 */,
+ CHILD(1183), CHILD(1211), CHILD(1239), CHILD(1239),
+ BITFIELD(53, 1) /* index 1183 */,
+ CHILD(1186), TILEPRO_OPC_NONE,
+ BITFIELD(33, 2) /* index 1186 */,
+ TILEPRO_OPC_ILL, TILEPRO_OPC_ILL, TILEPRO_OPC_ILL, CHILD(1191),
+ BITFIELD(35, 2) /* index 1191 */,
+ TILEPRO_OPC_ILL, CHILD(1196), TILEPRO_OPC_ILL, TILEPRO_OPC_ILL,
+ BITFIELD(37, 2) /* index 1196 */,
+ TILEPRO_OPC_ILL, CHILD(1201), TILEPRO_OPC_ILL, TILEPRO_OPC_ILL,
+ BITFIELD(39, 2) /* index 1201 */,
+ TILEPRO_OPC_ILL, CHILD(1206), TILEPRO_OPC_ILL, TILEPRO_OPC_ILL,
+ BITFIELD(41, 2) /* index 1206 */,
+ TILEPRO_OPC_ILL, TILEPRO_OPC_ILL, TILEPRO_OPC_BPT, TILEPRO_OPC_ILL,
+ BITFIELD(53, 1) /* index 1211 */,
+ CHILD(1214), TILEPRO_OPC_NONE,
+ BITFIELD(33, 2) /* index 1214 */,
+ TILEPRO_OPC_ILL, TILEPRO_OPC_ILL, TILEPRO_OPC_ILL, CHILD(1219),
+ BITFIELD(35, 2) /* index 1219 */,
+ TILEPRO_OPC_ILL, CHILD(1224), TILEPRO_OPC_ILL, TILEPRO_OPC_ILL,
+ BITFIELD(37, 2) /* index 1224 */,
+ TILEPRO_OPC_ILL, CHILD(1229), TILEPRO_OPC_ILL, TILEPRO_OPC_ILL,
+ BITFIELD(39, 2) /* index 1229 */,
+ TILEPRO_OPC_ILL, CHILD(1234), TILEPRO_OPC_ILL, TILEPRO_OPC_ILL,
+ BITFIELD(41, 2) /* index 1234 */,
+ TILEPRO_OPC_ILL, TILEPRO_OPC_ILL, TILEPRO_OPC_RAISE, TILEPRO_OPC_ILL,
+ BITFIELD(53, 1) /* index 1239 */,
+ TILEPRO_OPC_ILL, TILEPRO_OPC_NONE,
+ BITFIELD(43, 3) /* index 1242 */,
+ CHILD(1251), CHILD(1254), CHILD(1257), CHILD(1275), CHILD(1278),
+ CHILD(1281), CHILD(1284), CHILD(1287),
+ BITFIELD(53, 1) /* index 1251 */,
+ TILEPRO_OPC_INV, TILEPRO_OPC_NONE,
+ BITFIELD(53, 1) /* index 1254 */,
+ TILEPRO_OPC_IRET, TILEPRO_OPC_NONE,
+ BITFIELD(53, 1) /* index 1257 */,
+ CHILD(1260), TILEPRO_OPC_NONE,
+ BITFIELD(31, 2) /* index 1260 */,
+ TILEPRO_OPC_LB, TILEPRO_OPC_LB, TILEPRO_OPC_LB, CHILD(1265),
+ BITFIELD(33, 2) /* index 1265 */,
+ TILEPRO_OPC_LB, TILEPRO_OPC_LB, TILEPRO_OPC_LB, CHILD(1270),
+ BITFIELD(35, 2) /* index 1270 */,
+ TILEPRO_OPC_LB, TILEPRO_OPC_LB, TILEPRO_OPC_LB, TILEPRO_OPC_PREFETCH,
+ BITFIELD(53, 1) /* index 1275 */,
+ TILEPRO_OPC_LB_U, TILEPRO_OPC_NONE,
+ BITFIELD(53, 1) /* index 1278 */,
+ TILEPRO_OPC_LH, TILEPRO_OPC_NONE,
+ BITFIELD(53, 1) /* index 1281 */,
+ TILEPRO_OPC_LH_U, TILEPRO_OPC_NONE,
+ BITFIELD(53, 1) /* index 1284 */,
+ TILEPRO_OPC_LW, TILEPRO_OPC_NONE,
+ BITFIELD(53, 1) /* index 1287 */,
+ TILEPRO_OPC_MF, TILEPRO_OPC_NONE,
+ BITFIELD(43, 3) /* index 1290 */,
+ CHILD(1299), CHILD(1302), CHILD(1305), CHILD(1308), CHILD(1311),
+ CHILD(1314), CHILD(1317), CHILD(1320),
+ BITFIELD(53, 1) /* index 1299 */,
+ TILEPRO_OPC_NAP, TILEPRO_OPC_NONE,
+ BITFIELD(53, 1) /* index 1302 */,
+ TILEPRO_OPC_NOP, TILEPRO_OPC_NONE,
+ BITFIELD(53, 1) /* index 1305 */,
+ TILEPRO_OPC_SWINT0, TILEPRO_OPC_NONE,
+ BITFIELD(53, 1) /* index 1308 */,
+ TILEPRO_OPC_SWINT1, TILEPRO_OPC_NONE,
+ BITFIELD(53, 1) /* index 1311 */,
+ TILEPRO_OPC_SWINT2, TILEPRO_OPC_NONE,
+ BITFIELD(53, 1) /* index 1314 */,
+ TILEPRO_OPC_SWINT3, TILEPRO_OPC_NONE,
+ BITFIELD(53, 1) /* index 1317 */,
+ TILEPRO_OPC_TNS, TILEPRO_OPC_NONE,
+ BITFIELD(53, 1) /* index 1320 */,
+ TILEPRO_OPC_WH64, TILEPRO_OPC_NONE,
+ BITFIELD(43, 2) /* index 1323 */,
+ CHILD(1328), TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ BITFIELD(45, 1) /* index 1328 */,
+ CHILD(1331), TILEPRO_OPC_NONE,
+ BITFIELD(53, 1) /* index 1331 */,
+ TILEPRO_OPC_LW_NA, TILEPRO_OPC_NONE,
+ BITFIELD(46, 7) /* index 1334 */,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ CHILD(1463), CHILD(1463), CHILD(1463), CHILD(1463), CHILD(1466),
+ CHILD(1466), CHILD(1466), CHILD(1466), CHILD(1469), CHILD(1469),
+ CHILD(1469), CHILD(1469), CHILD(1472), CHILD(1472), CHILD(1472),
+ CHILD(1472), CHILD(1475), CHILD(1475), CHILD(1475), CHILD(1475),
+ CHILD(1478), CHILD(1478), CHILD(1478), CHILD(1478), CHILD(1481),
+ CHILD(1481), CHILD(1481), CHILD(1481), CHILD(1484), CHILD(1484),
+ CHILD(1484), CHILD(1484), CHILD(1487), CHILD(1487), CHILD(1487),
+ CHILD(1487), CHILD(1490), CHILD(1490), CHILD(1490), CHILD(1490),
+ CHILD(1151), CHILD(1493), CHILD(1517), CHILD(1529), TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ BITFIELD(53, 1) /* index 1463 */,
+ TILEPRO_OPC_RLI_SN, TILEPRO_OPC_NONE,
+ BITFIELD(53, 1) /* index 1466 */,
+ TILEPRO_OPC_SHLIB_SN, TILEPRO_OPC_NONE,
+ BITFIELD(53, 1) /* index 1469 */,
+ TILEPRO_OPC_SHLIH_SN, TILEPRO_OPC_NONE,
+ BITFIELD(53, 1) /* index 1472 */,
+ TILEPRO_OPC_SHLI_SN, TILEPRO_OPC_NONE,
+ BITFIELD(53, 1) /* index 1475 */,
+ TILEPRO_OPC_SHRIB_SN, TILEPRO_OPC_NONE,
+ BITFIELD(53, 1) /* index 1478 */,
+ TILEPRO_OPC_SHRIH_SN, TILEPRO_OPC_NONE,
+ BITFIELD(53, 1) /* index 1481 */,
+ TILEPRO_OPC_SHRI_SN, TILEPRO_OPC_NONE,
+ BITFIELD(53, 1) /* index 1484 */,
+ TILEPRO_OPC_SRAIB_SN, TILEPRO_OPC_NONE,
+ BITFIELD(53, 1) /* index 1487 */,
+ TILEPRO_OPC_SRAIH_SN, TILEPRO_OPC_NONE,
+ BITFIELD(53, 1) /* index 1490 */,
+ TILEPRO_OPC_SRAI_SN, TILEPRO_OPC_NONE,
+ BITFIELD(43, 3) /* index 1493 */,
+ CHILD(1251), CHILD(1254), CHILD(1502), CHILD(1505), CHILD(1508),
+ CHILD(1511), CHILD(1514), CHILD(1287),
+ BITFIELD(53, 1) /* index 1502 */,
+ TILEPRO_OPC_LB_SN, TILEPRO_OPC_NONE,
+ BITFIELD(53, 1) /* index 1505 */,
+ TILEPRO_OPC_LB_U_SN, TILEPRO_OPC_NONE,
+ BITFIELD(53, 1) /* index 1508 */,
+ TILEPRO_OPC_LH_SN, TILEPRO_OPC_NONE,
+ BITFIELD(53, 1) /* index 1511 */,
+ TILEPRO_OPC_LH_U_SN, TILEPRO_OPC_NONE,
+ BITFIELD(53, 1) /* index 1514 */,
+ TILEPRO_OPC_LW_SN, TILEPRO_OPC_NONE,
+ BITFIELD(43, 3) /* index 1517 */,
+ CHILD(1299), CHILD(1302), CHILD(1305), CHILD(1308), CHILD(1311),
+ CHILD(1314), CHILD(1526), CHILD(1320),
+ BITFIELD(53, 1) /* index 1526 */,
+ TILEPRO_OPC_TNS_SN, TILEPRO_OPC_NONE,
+ BITFIELD(43, 2) /* index 1529 */,
+ CHILD(1534), TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ BITFIELD(45, 1) /* index 1534 */,
+ CHILD(1537), TILEPRO_OPC_NONE,
+ BITFIELD(53, 1) /* index 1537 */,
+ TILEPRO_OPC_LW_NA_SN, TILEPRO_OPC_NONE,
+};
+
+static const unsigned short decode_Y0_fsm[168] =
+{
+ BITFIELD(27, 4) /* index 0 */,
+ TILEPRO_OPC_NONE, CHILD(17), CHILD(22), CHILD(27), CHILD(47), CHILD(52),
+ CHILD(57), CHILD(62), CHILD(67), TILEPRO_OPC_ADDI, CHILD(72), CHILD(102),
+ TILEPRO_OPC_SEQI, CHILD(117), TILEPRO_OPC_SLTI, TILEPRO_OPC_SLTI_U,
+ BITFIELD(18, 2) /* index 17 */,
+ TILEPRO_OPC_ADD, TILEPRO_OPC_S1A, TILEPRO_OPC_S2A, TILEPRO_OPC_SUB,
+ BITFIELD(18, 2) /* index 22 */,
+ TILEPRO_OPC_MNZ, TILEPRO_OPC_MVNZ, TILEPRO_OPC_MVZ, TILEPRO_OPC_MZ,
+ BITFIELD(18, 2) /* index 27 */,
+ TILEPRO_OPC_AND, TILEPRO_OPC_NOR, CHILD(32), TILEPRO_OPC_XOR,
+ BITFIELD(12, 2) /* index 32 */,
+ TILEPRO_OPC_OR, TILEPRO_OPC_OR, TILEPRO_OPC_OR, CHILD(37),
+ BITFIELD(14, 2) /* index 37 */,
+ TILEPRO_OPC_OR, TILEPRO_OPC_OR, TILEPRO_OPC_OR, CHILD(42),
+ BITFIELD(16, 2) /* index 42 */,
+ TILEPRO_OPC_OR, TILEPRO_OPC_OR, TILEPRO_OPC_OR, TILEPRO_OPC_MOVE,
+ BITFIELD(18, 2) /* index 47 */,
+ TILEPRO_OPC_RL, TILEPRO_OPC_SHL, TILEPRO_OPC_SHR, TILEPRO_OPC_SRA,
+ BITFIELD(18, 2) /* index 52 */,
+ TILEPRO_OPC_SLTE, TILEPRO_OPC_SLTE_U, TILEPRO_OPC_SLT, TILEPRO_OPC_SLT_U,
+ BITFIELD(18, 2) /* index 57 */,
+ TILEPRO_OPC_MULHLSA_UU, TILEPRO_OPC_S3A, TILEPRO_OPC_SEQ, TILEPRO_OPC_SNE,
+ BITFIELD(18, 2) /* index 62 */,
+ TILEPRO_OPC_MULHH_SS, TILEPRO_OPC_MULHH_UU, TILEPRO_OPC_MULLL_SS,
+ TILEPRO_OPC_MULLL_UU,
+ BITFIELD(18, 2) /* index 67 */,
+ TILEPRO_OPC_MULHHA_SS, TILEPRO_OPC_MULHHA_UU, TILEPRO_OPC_MULLLA_SS,
+ TILEPRO_OPC_MULLLA_UU,
+ BITFIELD(0, 2) /* index 72 */,
+ TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, CHILD(77),
+ BITFIELD(2, 2) /* index 77 */,
+ TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, CHILD(82),
+ BITFIELD(4, 2) /* index 82 */,
+ TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, CHILD(87),
+ BITFIELD(6, 2) /* index 87 */,
+ TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, CHILD(92),
+ BITFIELD(8, 2) /* index 92 */,
+ TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, CHILD(97),
+ BITFIELD(10, 2) /* index 97 */,
+ TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_INFO,
+ BITFIELD(6, 2) /* index 102 */,
+ TILEPRO_OPC_ORI, TILEPRO_OPC_ORI, TILEPRO_OPC_ORI, CHILD(107),
+ BITFIELD(8, 2) /* index 107 */,
+ TILEPRO_OPC_ORI, TILEPRO_OPC_ORI, TILEPRO_OPC_ORI, CHILD(112),
+ BITFIELD(10, 2) /* index 112 */,
+ TILEPRO_OPC_ORI, TILEPRO_OPC_ORI, TILEPRO_OPC_ORI, TILEPRO_OPC_MOVEI,
+ BITFIELD(15, 5) /* index 117 */,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_RLI, TILEPRO_OPC_RLI, TILEPRO_OPC_RLI, TILEPRO_OPC_RLI,
+ TILEPRO_OPC_SHLI, TILEPRO_OPC_SHLI, TILEPRO_OPC_SHLI, TILEPRO_OPC_SHLI,
+ TILEPRO_OPC_SHRI, TILEPRO_OPC_SHRI, TILEPRO_OPC_SHRI, TILEPRO_OPC_SHRI,
+ TILEPRO_OPC_SRAI, TILEPRO_OPC_SRAI, TILEPRO_OPC_SRAI, TILEPRO_OPC_SRAI,
+ CHILD(150), CHILD(159), TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ BITFIELD(12, 3) /* index 150 */,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_BITX, TILEPRO_OPC_BYTEX, TILEPRO_OPC_CLZ,
+ TILEPRO_OPC_CTZ, TILEPRO_OPC_FNOP, TILEPRO_OPC_NOP, TILEPRO_OPC_PCNT,
+ BITFIELD(12, 3) /* index 159 */,
+ TILEPRO_OPC_TBLIDXB0, TILEPRO_OPC_TBLIDXB1, TILEPRO_OPC_TBLIDXB2,
+ TILEPRO_OPC_TBLIDXB3, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE,
+};
+
+static const unsigned short decode_Y1_fsm[140] =
+{
+ BITFIELD(59, 4) /* index 0 */,
+ TILEPRO_OPC_NONE, CHILD(17), CHILD(22), CHILD(27), CHILD(47), CHILD(52),
+ CHILD(57), TILEPRO_OPC_ADDI, CHILD(62), CHILD(92), TILEPRO_OPC_SEQI,
+ CHILD(107), TILEPRO_OPC_SLTI, TILEPRO_OPC_SLTI_U, TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE,
+ BITFIELD(49, 2) /* index 17 */,
+ TILEPRO_OPC_ADD, TILEPRO_OPC_S1A, TILEPRO_OPC_S2A, TILEPRO_OPC_SUB,
+ BITFIELD(49, 2) /* index 22 */,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_MNZ, TILEPRO_OPC_MZ, TILEPRO_OPC_NONE,
+ BITFIELD(49, 2) /* index 27 */,
+ TILEPRO_OPC_AND, TILEPRO_OPC_NOR, CHILD(32), TILEPRO_OPC_XOR,
+ BITFIELD(43, 2) /* index 32 */,
+ TILEPRO_OPC_OR, TILEPRO_OPC_OR, TILEPRO_OPC_OR, CHILD(37),
+ BITFIELD(45, 2) /* index 37 */,
+ TILEPRO_OPC_OR, TILEPRO_OPC_OR, TILEPRO_OPC_OR, CHILD(42),
+ BITFIELD(47, 2) /* index 42 */,
+ TILEPRO_OPC_OR, TILEPRO_OPC_OR, TILEPRO_OPC_OR, TILEPRO_OPC_MOVE,
+ BITFIELD(49, 2) /* index 47 */,
+ TILEPRO_OPC_RL, TILEPRO_OPC_SHL, TILEPRO_OPC_SHR, TILEPRO_OPC_SRA,
+ BITFIELD(49, 2) /* index 52 */,
+ TILEPRO_OPC_SLTE, TILEPRO_OPC_SLTE_U, TILEPRO_OPC_SLT, TILEPRO_OPC_SLT_U,
+ BITFIELD(49, 2) /* index 57 */,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_S3A, TILEPRO_OPC_SEQ, TILEPRO_OPC_SNE,
+ BITFIELD(31, 2) /* index 62 */,
+ TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, CHILD(67),
+ BITFIELD(33, 2) /* index 67 */,
+ TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, CHILD(72),
+ BITFIELD(35, 2) /* index 72 */,
+ TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, CHILD(77),
+ BITFIELD(37, 2) /* index 77 */,
+ TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, CHILD(82),
+ BITFIELD(39, 2) /* index 82 */,
+ TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, CHILD(87),
+ BITFIELD(41, 2) /* index 87 */,
+ TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_INFO,
+ BITFIELD(37, 2) /* index 92 */,
+ TILEPRO_OPC_ORI, TILEPRO_OPC_ORI, TILEPRO_OPC_ORI, CHILD(97),
+ BITFIELD(39, 2) /* index 97 */,
+ TILEPRO_OPC_ORI, TILEPRO_OPC_ORI, TILEPRO_OPC_ORI, CHILD(102),
+ BITFIELD(41, 2) /* index 102 */,
+ TILEPRO_OPC_ORI, TILEPRO_OPC_ORI, TILEPRO_OPC_ORI, TILEPRO_OPC_MOVEI,
+ BITFIELD(48, 3) /* index 107 */,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_RLI, TILEPRO_OPC_SHLI, TILEPRO_OPC_SHRI,
+ TILEPRO_OPC_SRAI, CHILD(116), TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ BITFIELD(43, 3) /* index 116 */,
+ TILEPRO_OPC_NONE, CHILD(125), CHILD(130), CHILD(135), TILEPRO_OPC_NONE,
+ TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ BITFIELD(46, 2) /* index 125 */,
+ TILEPRO_OPC_FNOP, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ BITFIELD(46, 2) /* index 130 */,
+ TILEPRO_OPC_ILL, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+ BITFIELD(46, 2) /* index 135 */,
+ TILEPRO_OPC_NOP, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE,
+};
+
+static const unsigned short decode_Y2_fsm[24] =
+{
+ BITFIELD(56, 3) /* index 0 */,
+ CHILD(9), TILEPRO_OPC_LB_U, TILEPRO_OPC_LH, TILEPRO_OPC_LH_U,
+ TILEPRO_OPC_LW, TILEPRO_OPC_SB, TILEPRO_OPC_SH, TILEPRO_OPC_SW,
+ BITFIELD(20, 2) /* index 9 */,
+ TILEPRO_OPC_LB, TILEPRO_OPC_LB, TILEPRO_OPC_LB, CHILD(14),
+ BITFIELD(22, 2) /* index 14 */,
+ TILEPRO_OPC_LB, TILEPRO_OPC_LB, TILEPRO_OPC_LB, CHILD(19),
+ BITFIELD(24, 2) /* index 19 */,
+ TILEPRO_OPC_LB, TILEPRO_OPC_LB, TILEPRO_OPC_LB, TILEPRO_OPC_PREFETCH,
+};
+
+#undef BITFIELD
+#undef CHILD
+const unsigned short * const
+tilepro_bundle_decoder_fsms[TILEPRO_NUM_PIPELINE_ENCODINGS] =
+{
+ decode_X0_fsm,
+ decode_X1_fsm,
+ decode_Y0_fsm,
+ decode_Y1_fsm,
+ decode_Y2_fsm
+};
+const struct tilepro_operand tilepro_operands[43] =
+{
+ {
+ TILEPRO_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEPRO_IMM8_X0),
+ 8, 1, 0, 0, 0, 0,
+ create_Imm8_X0, get_Imm8_X0
+ },
+ {
+ TILEPRO_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEPRO_IMM8_X1),
+ 8, 1, 0, 0, 0, 0,
+ create_Imm8_X1, get_Imm8_X1
+ },
+ {
+ TILEPRO_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEPRO_IMM8_Y0),
+ 8, 1, 0, 0, 0, 0,
+ create_Imm8_Y0, get_Imm8_Y0
+ },
+ {
+ TILEPRO_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEPRO_IMM8_Y1),
+ 8, 1, 0, 0, 0, 0,
+ create_Imm8_Y1, get_Imm8_Y1
+ },
+ {
+ TILEPRO_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEPRO_IMM16_X0),
+ 16, 1, 0, 0, 0, 0,
+ create_Imm16_X0, get_Imm16_X0
+ },
+ {
+ TILEPRO_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEPRO_IMM16_X1),
+ 16, 1, 0, 0, 0, 0,
+ create_Imm16_X1, get_Imm16_X1
+ },
+ {
+ TILEPRO_OP_TYPE_ADDRESS, BFD_RELOC(TILEPRO_JOFFLONG_X1),
+ 29, 1, 0, 0, 1, TILEPRO_LOG2_BUNDLE_ALIGNMENT_IN_BYTES,
+ create_JOffLong_X1, get_JOffLong_X1
+ },
+ {
+ TILEPRO_OP_TYPE_REGISTER, BFD_RELOC(NONE),
+ 6, 0, 0, 1, 0, 0,
+ create_Dest_X0, get_Dest_X0
+ },
+ {
+ TILEPRO_OP_TYPE_REGISTER, BFD_RELOC(NONE),
+ 6, 0, 1, 0, 0, 0,
+ create_SrcA_X0, get_SrcA_X0
+ },
+ {
+ TILEPRO_OP_TYPE_REGISTER, BFD_RELOC(NONE),
+ 6, 0, 0, 1, 0, 0,
+ create_Dest_X1, get_Dest_X1
+ },
+ {
+ TILEPRO_OP_TYPE_REGISTER, BFD_RELOC(NONE),
+ 6, 0, 1, 0, 0, 0,
+ create_SrcA_X1, get_SrcA_X1
+ },
+ {
+ TILEPRO_OP_TYPE_REGISTER, BFD_RELOC(NONE),
+ 6, 0, 0, 1, 0, 0,
+ create_Dest_Y0, get_Dest_Y0
+ },
+ {
+ TILEPRO_OP_TYPE_REGISTER, BFD_RELOC(NONE),
+ 6, 0, 1, 0, 0, 0,
+ create_SrcA_Y0, get_SrcA_Y0
+ },
+ {
+ TILEPRO_OP_TYPE_REGISTER, BFD_RELOC(NONE),
+ 6, 0, 0, 1, 0, 0,
+ create_Dest_Y1, get_Dest_Y1
+ },
+ {
+ TILEPRO_OP_TYPE_REGISTER, BFD_RELOC(NONE),
+ 6, 0, 1, 0, 0, 0,
+ create_SrcA_Y1, get_SrcA_Y1
+ },
+ {
+ TILEPRO_OP_TYPE_REGISTER, BFD_RELOC(NONE),
+ 6, 0, 1, 0, 0, 0,
+ create_SrcA_Y2, get_SrcA_Y2
+ },
+ {
+ TILEPRO_OP_TYPE_REGISTER, BFD_RELOC(NONE),
+ 6, 0, 1, 0, 0, 0,
+ create_SrcB_X0, get_SrcB_X0
+ },
+ {
+ TILEPRO_OP_TYPE_REGISTER, BFD_RELOC(NONE),
+ 6, 0, 1, 0, 0, 0,
+ create_SrcB_X1, get_SrcB_X1
+ },
+ {
+ TILEPRO_OP_TYPE_REGISTER, BFD_RELOC(NONE),
+ 6, 0, 1, 0, 0, 0,
+ create_SrcB_Y0, get_SrcB_Y0
+ },
+ {
+ TILEPRO_OP_TYPE_REGISTER, BFD_RELOC(NONE),
+ 6, 0, 1, 0, 0, 0,
+ create_SrcB_Y1, get_SrcB_Y1
+ },
+ {
+ TILEPRO_OP_TYPE_ADDRESS, BFD_RELOC(TILEPRO_BROFF_X1),
+ 17, 1, 0, 0, 1, TILEPRO_LOG2_BUNDLE_ALIGNMENT_IN_BYTES,
+ create_BrOff_X1, get_BrOff_X1
+ },
+ {
+ TILEPRO_OP_TYPE_REGISTER, BFD_RELOC(NONE),
+ 6, 0, 1, 1, 0, 0,
+ create_Dest_X0, get_Dest_X0
+ },
+ {
+ TILEPRO_OP_TYPE_ADDRESS, BFD_RELOC(NONE),
+ 28, 1, 0, 0, 1, TILEPRO_LOG2_BUNDLE_ALIGNMENT_IN_BYTES,
+ create_JOff_X1, get_JOff_X1
+ },
+ {
+ TILEPRO_OP_TYPE_REGISTER, BFD_RELOC(NONE),
+ 6, 0, 0, 1, 0, 0,
+ create_SrcBDest_Y2, get_SrcBDest_Y2
+ },
+ {
+ TILEPRO_OP_TYPE_REGISTER, BFD_RELOC(NONE),
+ 6, 0, 1, 1, 0, 0,
+ create_SrcA_X1, get_SrcA_X1
+ },
+ {
+ TILEPRO_OP_TYPE_SPR, BFD_RELOC(TILEPRO_MF_IMM15_X1),
+ 15, 0, 0, 0, 0, 0,
+ create_MF_Imm15_X1, get_MF_Imm15_X1
+ },
+ {
+ TILEPRO_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEPRO_MMSTART_X0),
+ 5, 0, 0, 0, 0, 0,
+ create_MMStart_X0, get_MMStart_X0
+ },
+ {
+ TILEPRO_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEPRO_MMEND_X0),
+ 5, 0, 0, 0, 0, 0,
+ create_MMEnd_X0, get_MMEnd_X0
+ },
+ {
+ TILEPRO_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEPRO_MMSTART_X1),
+ 5, 0, 0, 0, 0, 0,
+ create_MMStart_X1, get_MMStart_X1
+ },
+ {
+ TILEPRO_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEPRO_MMEND_X1),
+ 5, 0, 0, 0, 0, 0,
+ create_MMEnd_X1, get_MMEnd_X1
+ },
+ {
+ TILEPRO_OP_TYPE_SPR, BFD_RELOC(TILEPRO_MT_IMM15_X1),
+ 15, 0, 0, 0, 0, 0,
+ create_MT_Imm15_X1, get_MT_Imm15_X1
+ },
+ {
+ TILEPRO_OP_TYPE_REGISTER, BFD_RELOC(NONE),
+ 6, 0, 1, 1, 0, 0,
+ create_Dest_Y0, get_Dest_Y0
+ },
+ {
+ TILEPRO_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEPRO_SHAMT_X0),
+ 5, 0, 0, 0, 0, 0,
+ create_ShAmt_X0, get_ShAmt_X0
+ },
+ {
+ TILEPRO_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEPRO_SHAMT_X1),
+ 5, 0, 0, 0, 0, 0,
+ create_ShAmt_X1, get_ShAmt_X1
+ },
+ {
+ TILEPRO_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEPRO_SHAMT_Y0),
+ 5, 0, 0, 0, 0, 0,
+ create_ShAmt_Y0, get_ShAmt_Y0
+ },
+ {
+ TILEPRO_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEPRO_SHAMT_Y1),
+ 5, 0, 0, 0, 0, 0,
+ create_ShAmt_Y1, get_ShAmt_Y1
+ },
+ {
+ TILEPRO_OP_TYPE_REGISTER, BFD_RELOC(NONE),
+ 6, 0, 1, 0, 0, 0,
+ create_SrcBDest_Y2, get_SrcBDest_Y2
+ },
+ {
+ TILEPRO_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEPRO_DEST_IMM8_X1),
+ 8, 1, 0, 0, 0, 0,
+ create_Dest_Imm8_X1, get_Dest_Imm8_X1
+ },
+ {
+ TILEPRO_OP_TYPE_ADDRESS, BFD_RELOC(NONE),
+ 10, 1, 0, 0, 1, TILEPRO_LOG2_SN_INSTRUCTION_SIZE_IN_BYTES,
+ create_BrOff_SN, get_BrOff_SN
+ },
+ {
+ TILEPRO_OP_TYPE_IMMEDIATE, BFD_RELOC(NONE),
+ 8, 0, 0, 0, 0, 0,
+ create_Imm8_SN, get_Imm8_SN
+ },
+ {
+ TILEPRO_OP_TYPE_IMMEDIATE, BFD_RELOC(NONE),
+ 8, 1, 0, 0, 0, 0,
+ create_Imm8_SN, get_Imm8_SN
+ },
+ {
+ TILEPRO_OP_TYPE_REGISTER, BFD_RELOC(NONE),
+ 2, 0, 0, 1, 0, 0,
+ create_Dest_SN, get_Dest_SN
+ },
+ {
+ TILEPRO_OP_TYPE_REGISTER, BFD_RELOC(NONE),
+ 2, 0, 1, 0, 0, 0,
+ create_Src_SN, get_Src_SN
+ }
+};
+
+
+
+
+/* Given a set of bundle bits and a specific pipe, returns which
+ * instruction the bundle contains in that pipe.
+ */
+const struct tilepro_opcode *
+find_opcode(tilepro_bundle_bits bits, tilepro_pipeline pipe)
+{
+ const unsigned short *table = tilepro_bundle_decoder_fsms[pipe];
+ int index = 0;
+
+ while (1)
+ {
+ unsigned short bitspec = table[index];
+ unsigned int bitfield =
+ ((unsigned int)(bits >> (bitspec & 63))) & (bitspec >> 6);
+
+ unsigned short next = table[index + 1 + bitfield];
+ if (next <= TILEPRO_OPC_NONE)
+ return &tilepro_opcodes[next];
+
+ index = next - TILEPRO_OPC_NONE;
+ }
+}
+
+
+int
+parse_insn_tilepro(tilepro_bundle_bits bits,
+ unsigned int pc,
+ struct tilepro_decoded_instruction
+ decoded[TILEPRO_MAX_INSTRUCTIONS_PER_BUNDLE])
+{
+ int num_instructions = 0;
+ int pipe;
+
+ int min_pipe, max_pipe;
+ if ((bits & TILEPRO_BUNDLE_Y_ENCODING_MASK) == 0)
+ {
+ min_pipe = TILEPRO_PIPELINE_X0;
+ max_pipe = TILEPRO_PIPELINE_X1;
+ }
+ else
+ {
+ min_pipe = TILEPRO_PIPELINE_Y0;
+ max_pipe = TILEPRO_PIPELINE_Y2;
+ }
+
+ /* For each pipe, find an instruction that fits. */
+ for (pipe = min_pipe; pipe <= max_pipe; pipe++)
+ {
+ const struct tilepro_opcode *opc;
+ struct tilepro_decoded_instruction *d;
+ int i;
+
+ d = &decoded[num_instructions++];
+ opc = find_opcode (bits, (tilepro_pipeline)pipe);
+ d->opcode = opc;
+
+ /* Decode each operand, sign extending, etc. as appropriate. */
+ for (i = 0; i < opc->num_operands; i++)
+ {
+ const struct tilepro_operand *op =
+ &tilepro_operands[opc->operands[pipe][i]];
+ int opval = op->extract (bits);
+ if (op->is_signed)
+ {
+ /* Sign-extend the operand. */
+ int shift = (int)((sizeof(int) * 8) - op->num_bits);
+ opval = (opval << shift) >> shift;
+ }
+
+ /* Adjust PC-relative scaled branch offsets. */
+ if (op->type == TILEPRO_OP_TYPE_ADDRESS)
+ {
+ opval *= TILEPRO_BUNDLE_SIZE_IN_BYTES;
+ opval += (int)pc;
+ }
+
+ /* Record the final value. */
+ d->operands[i] = op;
+ d->operand_values[i] = opval;
+ }
+ }
+
+ return num_instructions;
+}
diff --git a/arch/tile/kernel/tile-desc_64.c b/arch/tile/kernel/tile-desc_64.c
new file mode 100644
index 00000000..65b5f8ac
--- /dev/null
+++ b/arch/tile/kernel/tile-desc_64.c
@@ -0,0 +1,2218 @@
+/* TILE-Gx opcode information.
+ *
+ * Copyright 2011 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ *
+ *
+ *
+ *
+ *
+ */
+
+/* This define is BFD_RELOC_##x for real bfd, or -1 for everyone else. */
+#define BFD_RELOC(x) -1
+
+/* Special registers. */
+#define TREG_LR 55
+#define TREG_SN 56
+#define TREG_ZERO 63
+
+#include <linux/stddef.h>
+#include <asm/tile-desc.h>
+
+const struct tilegx_opcode tilegx_opcodes[334] =
+{
+ { "bpt", TILEGX_OPC_BPT, 0x2, 0, TREG_ZERO, 0,
+ { { 0, }, { }, { 0, }, { 0, }, { 0, } },
+ },
+ { "info", TILEGX_OPC_INFO, 0xf, 1, TREG_ZERO, 1,
+ { { 0 }, { 1 }, { 2 }, { 3 }, { 0, } },
+ },
+ { "infol", TILEGX_OPC_INFOL, 0x3, 1, TREG_ZERO, 1,
+ { { 4 }, { 5 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "move", TILEGX_OPC_MOVE, 0xf, 2, TREG_ZERO, 1,
+ { { 6, 7 }, { 8, 9 }, { 10, 11 }, { 12, 13 }, { 0, } },
+ },
+ { "movei", TILEGX_OPC_MOVEI, 0xf, 2, TREG_ZERO, 1,
+ { { 6, 0 }, { 8, 1 }, { 10, 2 }, { 12, 3 }, { 0, } },
+ },
+ { "moveli", TILEGX_OPC_MOVELI, 0x3, 2, TREG_ZERO, 1,
+ { { 6, 4 }, { 8, 5 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "prefetch", TILEGX_OPC_PREFETCH, 0x12, 1, TREG_ZERO, 1,
+ { { 0, }, { 9 }, { 0, }, { 0, }, { 14 } },
+ },
+ { "prefetch_add_l1", TILEGX_OPC_PREFETCH_ADD_L1, 0x2, 2, TREG_ZERO, 1,
+ { { 0, }, { 15, 1 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "prefetch_add_l1_fault", TILEGX_OPC_PREFETCH_ADD_L1_FAULT, 0x2, 2, TREG_ZERO, 1,
+ { { 0, }, { 15, 1 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "prefetch_add_l2", TILEGX_OPC_PREFETCH_ADD_L2, 0x2, 2, TREG_ZERO, 1,
+ { { 0, }, { 15, 1 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "prefetch_add_l2_fault", TILEGX_OPC_PREFETCH_ADD_L2_FAULT, 0x2, 2, TREG_ZERO, 1,
+ { { 0, }, { 15, 1 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "prefetch_add_l3", TILEGX_OPC_PREFETCH_ADD_L3, 0x2, 2, TREG_ZERO, 1,
+ { { 0, }, { 15, 1 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "prefetch_add_l3_fault", TILEGX_OPC_PREFETCH_ADD_L3_FAULT, 0x2, 2, TREG_ZERO, 1,
+ { { 0, }, { 15, 1 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "prefetch_l1", TILEGX_OPC_PREFETCH_L1, 0x12, 1, TREG_ZERO, 1,
+ { { 0, }, { 9 }, { 0, }, { 0, }, { 14 } },
+ },
+ { "prefetch_l1_fault", TILEGX_OPC_PREFETCH_L1_FAULT, 0x12, 1, TREG_ZERO, 1,
+ { { 0, }, { 9 }, { 0, }, { 0, }, { 14 } },
+ },
+ { "prefetch_l2", TILEGX_OPC_PREFETCH_L2, 0x12, 1, TREG_ZERO, 1,
+ { { 0, }, { 9 }, { 0, }, { 0, }, { 14 } },
+ },
+ { "prefetch_l2_fault", TILEGX_OPC_PREFETCH_L2_FAULT, 0x12, 1, TREG_ZERO, 1,
+ { { 0, }, { 9 }, { 0, }, { 0, }, { 14 } },
+ },
+ { "prefetch_l3", TILEGX_OPC_PREFETCH_L3, 0x12, 1, TREG_ZERO, 1,
+ { { 0, }, { 9 }, { 0, }, { 0, }, { 14 } },
+ },
+ { "prefetch_l3_fault", TILEGX_OPC_PREFETCH_L3_FAULT, 0x12, 1, TREG_ZERO, 1,
+ { { 0, }, { 9 }, { 0, }, { 0, }, { 14 } },
+ },
+ { "raise", TILEGX_OPC_RAISE, 0x2, 0, TREG_ZERO, 1,
+ { { 0, }, { }, { 0, }, { 0, }, { 0, } },
+ },
+ { "add", TILEGX_OPC_ADD, 0xf, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } },
+ },
+ { "addi", TILEGX_OPC_ADDI, 0xf, 3, TREG_ZERO, 1,
+ { { 6, 7, 0 }, { 8, 9, 1 }, { 10, 11, 2 }, { 12, 13, 3 }, { 0, } },
+ },
+ { "addli", TILEGX_OPC_ADDLI, 0x3, 3, TREG_ZERO, 1,
+ { { 6, 7, 4 }, { 8, 9, 5 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "addx", TILEGX_OPC_ADDX, 0xf, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } },
+ },
+ { "addxi", TILEGX_OPC_ADDXI, 0xf, 3, TREG_ZERO, 1,
+ { { 6, 7, 0 }, { 8, 9, 1 }, { 10, 11, 2 }, { 12, 13, 3 }, { 0, } },
+ },
+ { "addxli", TILEGX_OPC_ADDXLI, 0x3, 3, TREG_ZERO, 1,
+ { { 6, 7, 4 }, { 8, 9, 5 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "addxsc", TILEGX_OPC_ADDXSC, 0x3, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "and", TILEGX_OPC_AND, 0xf, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } },
+ },
+ { "andi", TILEGX_OPC_ANDI, 0xf, 3, TREG_ZERO, 1,
+ { { 6, 7, 0 }, { 8, 9, 1 }, { 10, 11, 2 }, { 12, 13, 3 }, { 0, } },
+ },
+ { "beqz", TILEGX_OPC_BEQZ, 0x2, 2, TREG_ZERO, 1,
+ { { 0, }, { 9, 20 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "beqzt", TILEGX_OPC_BEQZT, 0x2, 2, TREG_ZERO, 1,
+ { { 0, }, { 9, 20 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "bfexts", TILEGX_OPC_BFEXTS, 0x1, 4, TREG_ZERO, 1,
+ { { 6, 7, 21, 22 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "bfextu", TILEGX_OPC_BFEXTU, 0x1, 4, TREG_ZERO, 1,
+ { { 6, 7, 21, 22 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "bfins", TILEGX_OPC_BFINS, 0x1, 4, TREG_ZERO, 1,
+ { { 23, 7, 21, 22 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "bgez", TILEGX_OPC_BGEZ, 0x2, 2, TREG_ZERO, 1,
+ { { 0, }, { 9, 20 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "bgezt", TILEGX_OPC_BGEZT, 0x2, 2, TREG_ZERO, 1,
+ { { 0, }, { 9, 20 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "bgtz", TILEGX_OPC_BGTZ, 0x2, 2, TREG_ZERO, 1,
+ { { 0, }, { 9, 20 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "bgtzt", TILEGX_OPC_BGTZT, 0x2, 2, TREG_ZERO, 1,
+ { { 0, }, { 9, 20 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "blbc", TILEGX_OPC_BLBC, 0x2, 2, TREG_ZERO, 1,
+ { { 0, }, { 9, 20 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "blbct", TILEGX_OPC_BLBCT, 0x2, 2, TREG_ZERO, 1,
+ { { 0, }, { 9, 20 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "blbs", TILEGX_OPC_BLBS, 0x2, 2, TREG_ZERO, 1,
+ { { 0, }, { 9, 20 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "blbst", TILEGX_OPC_BLBST, 0x2, 2, TREG_ZERO, 1,
+ { { 0, }, { 9, 20 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "blez", TILEGX_OPC_BLEZ, 0x2, 2, TREG_ZERO, 1,
+ { { 0, }, { 9, 20 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "blezt", TILEGX_OPC_BLEZT, 0x2, 2, TREG_ZERO, 1,
+ { { 0, }, { 9, 20 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "bltz", TILEGX_OPC_BLTZ, 0x2, 2, TREG_ZERO, 1,
+ { { 0, }, { 9, 20 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "bltzt", TILEGX_OPC_BLTZT, 0x2, 2, TREG_ZERO, 1,
+ { { 0, }, { 9, 20 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "bnez", TILEGX_OPC_BNEZ, 0x2, 2, TREG_ZERO, 1,
+ { { 0, }, { 9, 20 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "bnezt", TILEGX_OPC_BNEZT, 0x2, 2, TREG_ZERO, 1,
+ { { 0, }, { 9, 20 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "clz", TILEGX_OPC_CLZ, 0x5, 2, TREG_ZERO, 1,
+ { { 6, 7 }, { 0, }, { 10, 11 }, { 0, }, { 0, } },
+ },
+ { "cmoveqz", TILEGX_OPC_CMOVEQZ, 0x5, 3, TREG_ZERO, 1,
+ { { 23, 7, 16 }, { 0, }, { 24, 11, 18 }, { 0, }, { 0, } },
+ },
+ { "cmovnez", TILEGX_OPC_CMOVNEZ, 0x5, 3, TREG_ZERO, 1,
+ { { 23, 7, 16 }, { 0, }, { 24, 11, 18 }, { 0, }, { 0, } },
+ },
+ { "cmpeq", TILEGX_OPC_CMPEQ, 0xf, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } },
+ },
+ { "cmpeqi", TILEGX_OPC_CMPEQI, 0xf, 3, TREG_ZERO, 1,
+ { { 6, 7, 0 }, { 8, 9, 1 }, { 10, 11, 2 }, { 12, 13, 3 }, { 0, } },
+ },
+ { "cmpexch", TILEGX_OPC_CMPEXCH, 0x2, 3, TREG_ZERO, 1,
+ { { 0, }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "cmpexch4", TILEGX_OPC_CMPEXCH4, 0x2, 3, TREG_ZERO, 1,
+ { { 0, }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "cmples", TILEGX_OPC_CMPLES, 0xf, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } },
+ },
+ { "cmpleu", TILEGX_OPC_CMPLEU, 0xf, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } },
+ },
+ { "cmplts", TILEGX_OPC_CMPLTS, 0xf, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } },
+ },
+ { "cmpltsi", TILEGX_OPC_CMPLTSI, 0xf, 3, TREG_ZERO, 1,
+ { { 6, 7, 0 }, { 8, 9, 1 }, { 10, 11, 2 }, { 12, 13, 3 }, { 0, } },
+ },
+ { "cmpltu", TILEGX_OPC_CMPLTU, 0xf, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } },
+ },
+ { "cmpltui", TILEGX_OPC_CMPLTUI, 0x3, 3, TREG_ZERO, 1,
+ { { 6, 7, 0 }, { 8, 9, 1 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "cmpne", TILEGX_OPC_CMPNE, 0xf, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } },
+ },
+ { "cmul", TILEGX_OPC_CMUL, 0x1, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "cmula", TILEGX_OPC_CMULA, 0x1, 3, TREG_ZERO, 1,
+ { { 23, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "cmulaf", TILEGX_OPC_CMULAF, 0x1, 3, TREG_ZERO, 1,
+ { { 23, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "cmulf", TILEGX_OPC_CMULF, 0x1, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "cmulfr", TILEGX_OPC_CMULFR, 0x1, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "cmulh", TILEGX_OPC_CMULH, 0x1, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "cmulhr", TILEGX_OPC_CMULHR, 0x1, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "crc32_32", TILEGX_OPC_CRC32_32, 0x1, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "crc32_8", TILEGX_OPC_CRC32_8, 0x1, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "ctz", TILEGX_OPC_CTZ, 0x5, 2, TREG_ZERO, 1,
+ { { 6, 7 }, { 0, }, { 10, 11 }, { 0, }, { 0, } },
+ },
+ { "dblalign", TILEGX_OPC_DBLALIGN, 0x1, 3, TREG_ZERO, 1,
+ { { 23, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "dblalign2", TILEGX_OPC_DBLALIGN2, 0x3, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "dblalign4", TILEGX_OPC_DBLALIGN4, 0x3, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "dblalign6", TILEGX_OPC_DBLALIGN6, 0x3, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "drain", TILEGX_OPC_DRAIN, 0x2, 0, TREG_ZERO, 0,
+ { { 0, }, { }, { 0, }, { 0, }, { 0, } },
+ },
+ { "dtlbpr", TILEGX_OPC_DTLBPR, 0x2, 1, TREG_ZERO, 1,
+ { { 0, }, { 9 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "exch", TILEGX_OPC_EXCH, 0x2, 3, TREG_ZERO, 1,
+ { { 0, }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "exch4", TILEGX_OPC_EXCH4, 0x2, 3, TREG_ZERO, 1,
+ { { 0, }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "fdouble_add_flags", TILEGX_OPC_FDOUBLE_ADD_FLAGS, 0x1, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "fdouble_addsub", TILEGX_OPC_FDOUBLE_ADDSUB, 0x1, 3, TREG_ZERO, 1,
+ { { 23, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "fdouble_mul_flags", TILEGX_OPC_FDOUBLE_MUL_FLAGS, 0x1, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "fdouble_pack1", TILEGX_OPC_FDOUBLE_PACK1, 0x1, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "fdouble_pack2", TILEGX_OPC_FDOUBLE_PACK2, 0x1, 3, TREG_ZERO, 1,
+ { { 23, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "fdouble_sub_flags", TILEGX_OPC_FDOUBLE_SUB_FLAGS, 0x1, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "fdouble_unpack_max", TILEGX_OPC_FDOUBLE_UNPACK_MAX, 0x1, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "fdouble_unpack_min", TILEGX_OPC_FDOUBLE_UNPACK_MIN, 0x1, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "fetchadd", TILEGX_OPC_FETCHADD, 0x2, 3, TREG_ZERO, 1,
+ { { 0, }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "fetchadd4", TILEGX_OPC_FETCHADD4, 0x2, 3, TREG_ZERO, 1,
+ { { 0, }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "fetchaddgez", TILEGX_OPC_FETCHADDGEZ, 0x2, 3, TREG_ZERO, 1,
+ { { 0, }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "fetchaddgez4", TILEGX_OPC_FETCHADDGEZ4, 0x2, 3, TREG_ZERO, 1,
+ { { 0, }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "fetchand", TILEGX_OPC_FETCHAND, 0x2, 3, TREG_ZERO, 1,
+ { { 0, }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "fetchand4", TILEGX_OPC_FETCHAND4, 0x2, 3, TREG_ZERO, 1,
+ { { 0, }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "fetchor", TILEGX_OPC_FETCHOR, 0x2, 3, TREG_ZERO, 1,
+ { { 0, }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "fetchor4", TILEGX_OPC_FETCHOR4, 0x2, 3, TREG_ZERO, 1,
+ { { 0, }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "finv", TILEGX_OPC_FINV, 0x2, 1, TREG_ZERO, 1,
+ { { 0, }, { 9 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "flush", TILEGX_OPC_FLUSH, 0x2, 1, TREG_ZERO, 1,
+ { { 0, }, { 9 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "flushwb", TILEGX_OPC_FLUSHWB, 0x2, 0, TREG_ZERO, 1,
+ { { 0, }, { }, { 0, }, { 0, }, { 0, } },
+ },
+ { "fnop", TILEGX_OPC_FNOP, 0xf, 0, TREG_ZERO, 1,
+ { { }, { }, { }, { }, { 0, } },
+ },
+ { "fsingle_add1", TILEGX_OPC_FSINGLE_ADD1, 0x1, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "fsingle_addsub2", TILEGX_OPC_FSINGLE_ADDSUB2, 0x1, 3, TREG_ZERO, 1,
+ { { 23, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "fsingle_mul1", TILEGX_OPC_FSINGLE_MUL1, 0x1, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "fsingle_mul2", TILEGX_OPC_FSINGLE_MUL2, 0x1, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "fsingle_pack1", TILEGX_OPC_FSINGLE_PACK1, 0x5, 2, TREG_ZERO, 1,
+ { { 6, 7 }, { 0, }, { 10, 11 }, { 0, }, { 0, } },
+ },
+ { "fsingle_pack2", TILEGX_OPC_FSINGLE_PACK2, 0x1, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "fsingle_sub1", TILEGX_OPC_FSINGLE_SUB1, 0x1, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "icoh", TILEGX_OPC_ICOH, 0x2, 1, TREG_ZERO, 1,
+ { { 0, }, { 9 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "ill", TILEGX_OPC_ILL, 0xa, 0, TREG_ZERO, 1,
+ { { 0, }, { }, { 0, }, { }, { 0, } },
+ },
+ { "inv", TILEGX_OPC_INV, 0x2, 1, TREG_ZERO, 1,
+ { { 0, }, { 9 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "iret", TILEGX_OPC_IRET, 0x2, 0, TREG_ZERO, 1,
+ { { 0, }, { }, { 0, }, { 0, }, { 0, } },
+ },
+ { "j", TILEGX_OPC_J, 0x2, 1, TREG_ZERO, 1,
+ { { 0, }, { 25 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "jal", TILEGX_OPC_JAL, 0x2, 1, TREG_LR, 1,
+ { { 0, }, { 25 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "jalr", TILEGX_OPC_JALR, 0xa, 1, TREG_LR, 1,
+ { { 0, }, { 9 }, { 0, }, { 13 }, { 0, } },
+ },
+ { "jalrp", TILEGX_OPC_JALRP, 0xa, 1, TREG_LR, 1,
+ { { 0, }, { 9 }, { 0, }, { 13 }, { 0, } },
+ },
+ { "jr", TILEGX_OPC_JR, 0xa, 1, TREG_ZERO, 1,
+ { { 0, }, { 9 }, { 0, }, { 13 }, { 0, } },
+ },
+ { "jrp", TILEGX_OPC_JRP, 0xa, 1, TREG_ZERO, 1,
+ { { 0, }, { 9 }, { 0, }, { 13 }, { 0, } },
+ },
+ { "ld", TILEGX_OPC_LD, 0x12, 2, TREG_ZERO, 1,
+ { { 0, }, { 8, 9 }, { 0, }, { 0, }, { 26, 14 } },
+ },
+ { "ld1s", TILEGX_OPC_LD1S, 0x12, 2, TREG_ZERO, 1,
+ { { 0, }, { 8, 9 }, { 0, }, { 0, }, { 26, 14 } },
+ },
+ { "ld1s_add", TILEGX_OPC_LD1S_ADD, 0x2, 3, TREG_ZERO, 1,
+ { { 0, }, { 8, 15, 1 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "ld1u", TILEGX_OPC_LD1U, 0x12, 2, TREG_ZERO, 1,
+ { { 0, }, { 8, 9 }, { 0, }, { 0, }, { 26, 14 } },
+ },
+ { "ld1u_add", TILEGX_OPC_LD1U_ADD, 0x2, 3, TREG_ZERO, 1,
+ { { 0, }, { 8, 15, 1 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "ld2s", TILEGX_OPC_LD2S, 0x12, 2, TREG_ZERO, 1,
+ { { 0, }, { 8, 9 }, { 0, }, { 0, }, { 26, 14 } },
+ },
+ { "ld2s_add", TILEGX_OPC_LD2S_ADD, 0x2, 3, TREG_ZERO, 1,
+ { { 0, }, { 8, 15, 1 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "ld2u", TILEGX_OPC_LD2U, 0x12, 2, TREG_ZERO, 1,
+ { { 0, }, { 8, 9 }, { 0, }, { 0, }, { 26, 14 } },
+ },
+ { "ld2u_add", TILEGX_OPC_LD2U_ADD, 0x2, 3, TREG_ZERO, 1,
+ { { 0, }, { 8, 15, 1 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "ld4s", TILEGX_OPC_LD4S, 0x12, 2, TREG_ZERO, 1,
+ { { 0, }, { 8, 9 }, { 0, }, { 0, }, { 26, 14 } },
+ },
+ { "ld4s_add", TILEGX_OPC_LD4S_ADD, 0x2, 3, TREG_ZERO, 1,
+ { { 0, }, { 8, 15, 1 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "ld4u", TILEGX_OPC_LD4U, 0x12, 2, TREG_ZERO, 1,
+ { { 0, }, { 8, 9 }, { 0, }, { 0, }, { 26, 14 } },
+ },
+ { "ld4u_add", TILEGX_OPC_LD4U_ADD, 0x2, 3, TREG_ZERO, 1,
+ { { 0, }, { 8, 15, 1 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "ld_add", TILEGX_OPC_LD_ADD, 0x2, 3, TREG_ZERO, 1,
+ { { 0, }, { 8, 15, 1 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "ldna", TILEGX_OPC_LDNA, 0x2, 2, TREG_ZERO, 1,
+ { { 0, }, { 8, 9 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "ldna_add", TILEGX_OPC_LDNA_ADD, 0x2, 3, TREG_ZERO, 1,
+ { { 0, }, { 8, 15, 1 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "ldnt", TILEGX_OPC_LDNT, 0x2, 2, TREG_ZERO, 1,
+ { { 0, }, { 8, 9 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "ldnt1s", TILEGX_OPC_LDNT1S, 0x2, 2, TREG_ZERO, 1,
+ { { 0, }, { 8, 9 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "ldnt1s_add", TILEGX_OPC_LDNT1S_ADD, 0x2, 3, TREG_ZERO, 1,
+ { { 0, }, { 8, 15, 1 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "ldnt1u", TILEGX_OPC_LDNT1U, 0x2, 2, TREG_ZERO, 1,
+ { { 0, }, { 8, 9 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "ldnt1u_add", TILEGX_OPC_LDNT1U_ADD, 0x2, 3, TREG_ZERO, 1,
+ { { 0, }, { 8, 15, 1 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "ldnt2s", TILEGX_OPC_LDNT2S, 0x2, 2, TREG_ZERO, 1,
+ { { 0, }, { 8, 9 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "ldnt2s_add", TILEGX_OPC_LDNT2S_ADD, 0x2, 3, TREG_ZERO, 1,
+ { { 0, }, { 8, 15, 1 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "ldnt2u", TILEGX_OPC_LDNT2U, 0x2, 2, TREG_ZERO, 1,
+ { { 0, }, { 8, 9 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "ldnt2u_add", TILEGX_OPC_LDNT2U_ADD, 0x2, 3, TREG_ZERO, 1,
+ { { 0, }, { 8, 15, 1 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "ldnt4s", TILEGX_OPC_LDNT4S, 0x2, 2, TREG_ZERO, 1,
+ { { 0, }, { 8, 9 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "ldnt4s_add", TILEGX_OPC_LDNT4S_ADD, 0x2, 3, TREG_ZERO, 1,
+ { { 0, }, { 8, 15, 1 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "ldnt4u", TILEGX_OPC_LDNT4U, 0x2, 2, TREG_ZERO, 1,
+ { { 0, }, { 8, 9 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "ldnt4u_add", TILEGX_OPC_LDNT4U_ADD, 0x2, 3, TREG_ZERO, 1,
+ { { 0, }, { 8, 15, 1 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "ldnt_add", TILEGX_OPC_LDNT_ADD, 0x2, 3, TREG_ZERO, 1,
+ { { 0, }, { 8, 15, 1 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "lnk", TILEGX_OPC_LNK, 0xa, 1, TREG_ZERO, 1,
+ { { 0, }, { 8 }, { 0, }, { 12 }, { 0, } },
+ },
+ { "mf", TILEGX_OPC_MF, 0x2, 0, TREG_ZERO, 1,
+ { { 0, }, { }, { 0, }, { 0, }, { 0, } },
+ },
+ { "mfspr", TILEGX_OPC_MFSPR, 0x2, 2, TREG_ZERO, 1,
+ { { 0, }, { 8, 27 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "mm", TILEGX_OPC_MM, 0x1, 4, TREG_ZERO, 1,
+ { { 23, 7, 21, 22 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "mnz", TILEGX_OPC_MNZ, 0xf, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } },
+ },
+ { "mtspr", TILEGX_OPC_MTSPR, 0x2, 2, TREG_ZERO, 1,
+ { { 0, }, { 28, 9 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "mul_hs_hs", TILEGX_OPC_MUL_HS_HS, 0x5, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 0, }, { 10, 11, 18 }, { 0, }, { 0, } },
+ },
+ { "mul_hs_hu", TILEGX_OPC_MUL_HS_HU, 0x1, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "mul_hs_ls", TILEGX_OPC_MUL_HS_LS, 0x1, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "mul_hs_lu", TILEGX_OPC_MUL_HS_LU, 0x1, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "mul_hu_hu", TILEGX_OPC_MUL_HU_HU, 0x5, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 0, }, { 10, 11, 18 }, { 0, }, { 0, } },
+ },
+ { "mul_hu_ls", TILEGX_OPC_MUL_HU_LS, 0x1, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "mul_hu_lu", TILEGX_OPC_MUL_HU_LU, 0x1, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "mul_ls_ls", TILEGX_OPC_MUL_LS_LS, 0x5, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 0, }, { 10, 11, 18 }, { 0, }, { 0, } },
+ },
+ { "mul_ls_lu", TILEGX_OPC_MUL_LS_LU, 0x1, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "mul_lu_lu", TILEGX_OPC_MUL_LU_LU, 0x5, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 0, }, { 10, 11, 18 }, { 0, }, { 0, } },
+ },
+ { "mula_hs_hs", TILEGX_OPC_MULA_HS_HS, 0x5, 3, TREG_ZERO, 1,
+ { { 23, 7, 16 }, { 0, }, { 24, 11, 18 }, { 0, }, { 0, } },
+ },
+ { "mula_hs_hu", TILEGX_OPC_MULA_HS_HU, 0x1, 3, TREG_ZERO, 1,
+ { { 23, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "mula_hs_ls", TILEGX_OPC_MULA_HS_LS, 0x1, 3, TREG_ZERO, 1,
+ { { 23, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "mula_hs_lu", TILEGX_OPC_MULA_HS_LU, 0x1, 3, TREG_ZERO, 1,
+ { { 23, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "mula_hu_hu", TILEGX_OPC_MULA_HU_HU, 0x5, 3, TREG_ZERO, 1,
+ { { 23, 7, 16 }, { 0, }, { 24, 11, 18 }, { 0, }, { 0, } },
+ },
+ { "mula_hu_ls", TILEGX_OPC_MULA_HU_LS, 0x1, 3, TREG_ZERO, 1,
+ { { 23, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "mula_hu_lu", TILEGX_OPC_MULA_HU_LU, 0x1, 3, TREG_ZERO, 1,
+ { { 23, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "mula_ls_ls", TILEGX_OPC_MULA_LS_LS, 0x5, 3, TREG_ZERO, 1,
+ { { 23, 7, 16 }, { 0, }, { 24, 11, 18 }, { 0, }, { 0, } },
+ },
+ { "mula_ls_lu", TILEGX_OPC_MULA_LS_LU, 0x1, 3, TREG_ZERO, 1,
+ { { 23, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "mula_lu_lu", TILEGX_OPC_MULA_LU_LU, 0x5, 3, TREG_ZERO, 1,
+ { { 23, 7, 16 }, { 0, }, { 24, 11, 18 }, { 0, }, { 0, } },
+ },
+ { "mulax", TILEGX_OPC_MULAX, 0x5, 3, TREG_ZERO, 1,
+ { { 23, 7, 16 }, { 0, }, { 24, 11, 18 }, { 0, }, { 0, } },
+ },
+ { "mulx", TILEGX_OPC_MULX, 0x5, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 0, }, { 10, 11, 18 }, { 0, }, { 0, } },
+ },
+ { "mz", TILEGX_OPC_MZ, 0xf, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } },
+ },
+ { "nap", TILEGX_OPC_NAP, 0x2, 0, TREG_ZERO, 0,
+ { { 0, }, { }, { 0, }, { 0, }, { 0, } },
+ },
+ { "nop", TILEGX_OPC_NOP, 0xf, 0, TREG_ZERO, 1,
+ { { }, { }, { }, { }, { 0, } },
+ },
+ { "nor", TILEGX_OPC_NOR, 0xf, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } },
+ },
+ { "or", TILEGX_OPC_OR, 0xf, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } },
+ },
+ { "ori", TILEGX_OPC_ORI, 0x3, 3, TREG_ZERO, 1,
+ { { 6, 7, 0 }, { 8, 9, 1 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "pcnt", TILEGX_OPC_PCNT, 0x5, 2, TREG_ZERO, 1,
+ { { 6, 7 }, { 0, }, { 10, 11 }, { 0, }, { 0, } },
+ },
+ { "revbits", TILEGX_OPC_REVBITS, 0x5, 2, TREG_ZERO, 1,
+ { { 6, 7 }, { 0, }, { 10, 11 }, { 0, }, { 0, } },
+ },
+ { "revbytes", TILEGX_OPC_REVBYTES, 0x5, 2, TREG_ZERO, 1,
+ { { 6, 7 }, { 0, }, { 10, 11 }, { 0, }, { 0, } },
+ },
+ { "rotl", TILEGX_OPC_ROTL, 0xf, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } },
+ },
+ { "rotli", TILEGX_OPC_ROTLI, 0xf, 3, TREG_ZERO, 1,
+ { { 6, 7, 29 }, { 8, 9, 30 }, { 10, 11, 31 }, { 12, 13, 32 }, { 0, } },
+ },
+ { "shl", TILEGX_OPC_SHL, 0xf, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } },
+ },
+ { "shl16insli", TILEGX_OPC_SHL16INSLI, 0x3, 3, TREG_ZERO, 1,
+ { { 6, 7, 4 }, { 8, 9, 5 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "shl1add", TILEGX_OPC_SHL1ADD, 0xf, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } },
+ },
+ { "shl1addx", TILEGX_OPC_SHL1ADDX, 0xf, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } },
+ },
+ { "shl2add", TILEGX_OPC_SHL2ADD, 0xf, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } },
+ },
+ { "shl2addx", TILEGX_OPC_SHL2ADDX, 0xf, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } },
+ },
+ { "shl3add", TILEGX_OPC_SHL3ADD, 0xf, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } },
+ },
+ { "shl3addx", TILEGX_OPC_SHL3ADDX, 0xf, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } },
+ },
+ { "shli", TILEGX_OPC_SHLI, 0xf, 3, TREG_ZERO, 1,
+ { { 6, 7, 29 }, { 8, 9, 30 }, { 10, 11, 31 }, { 12, 13, 32 }, { 0, } },
+ },
+ { "shlx", TILEGX_OPC_SHLX, 0x3, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "shlxi", TILEGX_OPC_SHLXI, 0x3, 3, TREG_ZERO, 1,
+ { { 6, 7, 29 }, { 8, 9, 30 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "shrs", TILEGX_OPC_SHRS, 0xf, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } },
+ },
+ { "shrsi", TILEGX_OPC_SHRSI, 0xf, 3, TREG_ZERO, 1,
+ { { 6, 7, 29 }, { 8, 9, 30 }, { 10, 11, 31 }, { 12, 13, 32 }, { 0, } },
+ },
+ { "shru", TILEGX_OPC_SHRU, 0xf, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } },
+ },
+ { "shrui", TILEGX_OPC_SHRUI, 0xf, 3, TREG_ZERO, 1,
+ { { 6, 7, 29 }, { 8, 9, 30 }, { 10, 11, 31 }, { 12, 13, 32 }, { 0, } },
+ },
+ { "shrux", TILEGX_OPC_SHRUX, 0x3, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "shruxi", TILEGX_OPC_SHRUXI, 0x3, 3, TREG_ZERO, 1,
+ { { 6, 7, 29 }, { 8, 9, 30 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "shufflebytes", TILEGX_OPC_SHUFFLEBYTES, 0x1, 3, TREG_ZERO, 1,
+ { { 23, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "st", TILEGX_OPC_ST, 0x12, 2, TREG_ZERO, 1,
+ { { 0, }, { 9, 17 }, { 0, }, { 0, }, { 14, 33 } },
+ },
+ { "st1", TILEGX_OPC_ST1, 0x12, 2, TREG_ZERO, 1,
+ { { 0, }, { 9, 17 }, { 0, }, { 0, }, { 14, 33 } },
+ },
+ { "st1_add", TILEGX_OPC_ST1_ADD, 0x2, 3, TREG_ZERO, 1,
+ { { 0, }, { 15, 17, 34 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "st2", TILEGX_OPC_ST2, 0x12, 2, TREG_ZERO, 1,
+ { { 0, }, { 9, 17 }, { 0, }, { 0, }, { 14, 33 } },
+ },
+ { "st2_add", TILEGX_OPC_ST2_ADD, 0x2, 3, TREG_ZERO, 1,
+ { { 0, }, { 15, 17, 34 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "st4", TILEGX_OPC_ST4, 0x12, 2, TREG_ZERO, 1,
+ { { 0, }, { 9, 17 }, { 0, }, { 0, }, { 14, 33 } },
+ },
+ { "st4_add", TILEGX_OPC_ST4_ADD, 0x2, 3, TREG_ZERO, 1,
+ { { 0, }, { 15, 17, 34 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "st_add", TILEGX_OPC_ST_ADD, 0x2, 3, TREG_ZERO, 1,
+ { { 0, }, { 15, 17, 34 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "stnt", TILEGX_OPC_STNT, 0x2, 2, TREG_ZERO, 1,
+ { { 0, }, { 9, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "stnt1", TILEGX_OPC_STNT1, 0x2, 2, TREG_ZERO, 1,
+ { { 0, }, { 9, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "stnt1_add", TILEGX_OPC_STNT1_ADD, 0x2, 3, TREG_ZERO, 1,
+ { { 0, }, { 15, 17, 34 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "stnt2", TILEGX_OPC_STNT2, 0x2, 2, TREG_ZERO, 1,
+ { { 0, }, { 9, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "stnt2_add", TILEGX_OPC_STNT2_ADD, 0x2, 3, TREG_ZERO, 1,
+ { { 0, }, { 15, 17, 34 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "stnt4", TILEGX_OPC_STNT4, 0x2, 2, TREG_ZERO, 1,
+ { { 0, }, { 9, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "stnt4_add", TILEGX_OPC_STNT4_ADD, 0x2, 3, TREG_ZERO, 1,
+ { { 0, }, { 15, 17, 34 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "stnt_add", TILEGX_OPC_STNT_ADD, 0x2, 3, TREG_ZERO, 1,
+ { { 0, }, { 15, 17, 34 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "sub", TILEGX_OPC_SUB, 0xf, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } },
+ },
+ { "subx", TILEGX_OPC_SUBX, 0xf, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } },
+ },
+ { "subxsc", TILEGX_OPC_SUBXSC, 0x3, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "swint0", TILEGX_OPC_SWINT0, 0x2, 0, TREG_ZERO, 0,
+ { { 0, }, { }, { 0, }, { 0, }, { 0, } },
+ },
+ { "swint1", TILEGX_OPC_SWINT1, 0x2, 0, TREG_ZERO, 0,
+ { { 0, }, { }, { 0, }, { 0, }, { 0, } },
+ },
+ { "swint2", TILEGX_OPC_SWINT2, 0x2, 0, TREG_ZERO, 0,
+ { { 0, }, { }, { 0, }, { 0, }, { 0, } },
+ },
+ { "swint3", TILEGX_OPC_SWINT3, 0x2, 0, TREG_ZERO, 0,
+ { { 0, }, { }, { 0, }, { 0, }, { 0, } },
+ },
+ { "tblidxb0", TILEGX_OPC_TBLIDXB0, 0x5, 2, TREG_ZERO, 1,
+ { { 23, 7 }, { 0, }, { 24, 11 }, { 0, }, { 0, } },
+ },
+ { "tblidxb1", TILEGX_OPC_TBLIDXB1, 0x5, 2, TREG_ZERO, 1,
+ { { 23, 7 }, { 0, }, { 24, 11 }, { 0, }, { 0, } },
+ },
+ { "tblidxb2", TILEGX_OPC_TBLIDXB2, 0x5, 2, TREG_ZERO, 1,
+ { { 23, 7 }, { 0, }, { 24, 11 }, { 0, }, { 0, } },
+ },
+ { "tblidxb3", TILEGX_OPC_TBLIDXB3, 0x5, 2, TREG_ZERO, 1,
+ { { 23, 7 }, { 0, }, { 24, 11 }, { 0, }, { 0, } },
+ },
+ { "v1add", TILEGX_OPC_V1ADD, 0x3, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "v1addi", TILEGX_OPC_V1ADDI, 0x3, 3, TREG_ZERO, 1,
+ { { 6, 7, 0 }, { 8, 9, 1 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "v1adduc", TILEGX_OPC_V1ADDUC, 0x3, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "v1adiffu", TILEGX_OPC_V1ADIFFU, 0x1, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "v1avgu", TILEGX_OPC_V1AVGU, 0x1, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "v1cmpeq", TILEGX_OPC_V1CMPEQ, 0x3, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "v1cmpeqi", TILEGX_OPC_V1CMPEQI, 0x3, 3, TREG_ZERO, 1,
+ { { 6, 7, 0 }, { 8, 9, 1 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "v1cmples", TILEGX_OPC_V1CMPLES, 0x3, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "v1cmpleu", TILEGX_OPC_V1CMPLEU, 0x3, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "v1cmplts", TILEGX_OPC_V1CMPLTS, 0x3, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "v1cmpltsi", TILEGX_OPC_V1CMPLTSI, 0x3, 3, TREG_ZERO, 1,
+ { { 6, 7, 0 }, { 8, 9, 1 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "v1cmpltu", TILEGX_OPC_V1CMPLTU, 0x3, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "v1cmpltui", TILEGX_OPC_V1CMPLTUI, 0x3, 3, TREG_ZERO, 1,
+ { { 6, 7, 0 }, { 8, 9, 1 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "v1cmpne", TILEGX_OPC_V1CMPNE, 0x3, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "v1ddotpu", TILEGX_OPC_V1DDOTPU, 0x1, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "v1ddotpua", TILEGX_OPC_V1DDOTPUA, 0x1, 3, TREG_ZERO, 1,
+ { { 23, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "v1ddotpus", TILEGX_OPC_V1DDOTPUS, 0x1, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "v1ddotpusa", TILEGX_OPC_V1DDOTPUSA, 0x1, 3, TREG_ZERO, 1,
+ { { 23, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "v1dotp", TILEGX_OPC_V1DOTP, 0x1, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "v1dotpa", TILEGX_OPC_V1DOTPA, 0x1, 3, TREG_ZERO, 1,
+ { { 23, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "v1dotpu", TILEGX_OPC_V1DOTPU, 0x1, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "v1dotpua", TILEGX_OPC_V1DOTPUA, 0x1, 3, TREG_ZERO, 1,
+ { { 23, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "v1dotpus", TILEGX_OPC_V1DOTPUS, 0x1, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "v1dotpusa", TILEGX_OPC_V1DOTPUSA, 0x1, 3, TREG_ZERO, 1,
+ { { 23, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "v1int_h", TILEGX_OPC_V1INT_H, 0x3, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "v1int_l", TILEGX_OPC_V1INT_L, 0x3, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "v1maxu", TILEGX_OPC_V1MAXU, 0x3, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "v1maxui", TILEGX_OPC_V1MAXUI, 0x3, 3, TREG_ZERO, 1,
+ { { 6, 7, 0 }, { 8, 9, 1 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "v1minu", TILEGX_OPC_V1MINU, 0x3, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "v1minui", TILEGX_OPC_V1MINUI, 0x3, 3, TREG_ZERO, 1,
+ { { 6, 7, 0 }, { 8, 9, 1 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "v1mnz", TILEGX_OPC_V1MNZ, 0x3, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "v1multu", TILEGX_OPC_V1MULTU, 0x1, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "v1mulu", TILEGX_OPC_V1MULU, 0x1, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "v1mulus", TILEGX_OPC_V1MULUS, 0x1, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "v1mz", TILEGX_OPC_V1MZ, 0x3, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "v1sadau", TILEGX_OPC_V1SADAU, 0x1, 3, TREG_ZERO, 1,
+ { { 23, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "v1sadu", TILEGX_OPC_V1SADU, 0x1, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "v1shl", TILEGX_OPC_V1SHL, 0x3, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "v1shli", TILEGX_OPC_V1SHLI, 0x3, 3, TREG_ZERO, 1,
+ { { 6, 7, 29 }, { 8, 9, 30 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "v1shrs", TILEGX_OPC_V1SHRS, 0x3, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "v1shrsi", TILEGX_OPC_V1SHRSI, 0x3, 3, TREG_ZERO, 1,
+ { { 6, 7, 29 }, { 8, 9, 30 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "v1shru", TILEGX_OPC_V1SHRU, 0x3, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "v1shrui", TILEGX_OPC_V1SHRUI, 0x3, 3, TREG_ZERO, 1,
+ { { 6, 7, 29 }, { 8, 9, 30 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "v1sub", TILEGX_OPC_V1SUB, 0x3, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "v1subuc", TILEGX_OPC_V1SUBUC, 0x3, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "v2add", TILEGX_OPC_V2ADD, 0x3, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "v2addi", TILEGX_OPC_V2ADDI, 0x3, 3, TREG_ZERO, 1,
+ { { 6, 7, 0 }, { 8, 9, 1 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "v2addsc", TILEGX_OPC_V2ADDSC, 0x3, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "v2adiffs", TILEGX_OPC_V2ADIFFS, 0x1, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "v2avgs", TILEGX_OPC_V2AVGS, 0x1, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "v2cmpeq", TILEGX_OPC_V2CMPEQ, 0x3, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "v2cmpeqi", TILEGX_OPC_V2CMPEQI, 0x3, 3, TREG_ZERO, 1,
+ { { 6, 7, 0 }, { 8, 9, 1 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "v2cmples", TILEGX_OPC_V2CMPLES, 0x3, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "v2cmpleu", TILEGX_OPC_V2CMPLEU, 0x3, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "v2cmplts", TILEGX_OPC_V2CMPLTS, 0x3, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "v2cmpltsi", TILEGX_OPC_V2CMPLTSI, 0x3, 3, TREG_ZERO, 1,
+ { { 6, 7, 0 }, { 8, 9, 1 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "v2cmpltu", TILEGX_OPC_V2CMPLTU, 0x3, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "v2cmpltui", TILEGX_OPC_V2CMPLTUI, 0x3, 3, TREG_ZERO, 1,
+ { { 6, 7, 0 }, { 8, 9, 1 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "v2cmpne", TILEGX_OPC_V2CMPNE, 0x3, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "v2dotp", TILEGX_OPC_V2DOTP, 0x1, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "v2dotpa", TILEGX_OPC_V2DOTPA, 0x1, 3, TREG_ZERO, 1,
+ { { 23, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "v2int_h", TILEGX_OPC_V2INT_H, 0x3, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "v2int_l", TILEGX_OPC_V2INT_L, 0x3, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "v2maxs", TILEGX_OPC_V2MAXS, 0x3, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "v2maxsi", TILEGX_OPC_V2MAXSI, 0x3, 3, TREG_ZERO, 1,
+ { { 6, 7, 0 }, { 8, 9, 1 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "v2mins", TILEGX_OPC_V2MINS, 0x3, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "v2minsi", TILEGX_OPC_V2MINSI, 0x3, 3, TREG_ZERO, 1,
+ { { 6, 7, 0 }, { 8, 9, 1 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "v2mnz", TILEGX_OPC_V2MNZ, 0x3, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "v2mulfsc", TILEGX_OPC_V2MULFSC, 0x1, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "v2muls", TILEGX_OPC_V2MULS, 0x1, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "v2mults", TILEGX_OPC_V2MULTS, 0x1, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "v2mz", TILEGX_OPC_V2MZ, 0x3, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "v2packh", TILEGX_OPC_V2PACKH, 0x3, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "v2packl", TILEGX_OPC_V2PACKL, 0x3, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "v2packuc", TILEGX_OPC_V2PACKUC, 0x3, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "v2sadas", TILEGX_OPC_V2SADAS, 0x1, 3, TREG_ZERO, 1,
+ { { 23, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "v2sadau", TILEGX_OPC_V2SADAU, 0x1, 3, TREG_ZERO, 1,
+ { { 23, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "v2sads", TILEGX_OPC_V2SADS, 0x1, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "v2sadu", TILEGX_OPC_V2SADU, 0x1, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+ },
+ { "v2shl", TILEGX_OPC_V2SHL, 0x3, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "v2shli", TILEGX_OPC_V2SHLI, 0x3, 3, TREG_ZERO, 1,
+ { { 6, 7, 29 }, { 8, 9, 30 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "v2shlsc", TILEGX_OPC_V2SHLSC, 0x3, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "v2shrs", TILEGX_OPC_V2SHRS, 0x3, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "v2shrsi", TILEGX_OPC_V2SHRSI, 0x3, 3, TREG_ZERO, 1,
+ { { 6, 7, 29 }, { 8, 9, 30 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "v2shru", TILEGX_OPC_V2SHRU, 0x3, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "v2shrui", TILEGX_OPC_V2SHRUI, 0x3, 3, TREG_ZERO, 1,
+ { { 6, 7, 29 }, { 8, 9, 30 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "v2sub", TILEGX_OPC_V2SUB, 0x3, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "v2subsc", TILEGX_OPC_V2SUBSC, 0x3, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "v4add", TILEGX_OPC_V4ADD, 0x3, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "v4addsc", TILEGX_OPC_V4ADDSC, 0x3, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "v4int_h", TILEGX_OPC_V4INT_H, 0x3, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "v4int_l", TILEGX_OPC_V4INT_L, 0x3, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "v4packsc", TILEGX_OPC_V4PACKSC, 0x3, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "v4shl", TILEGX_OPC_V4SHL, 0x3, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "v4shlsc", TILEGX_OPC_V4SHLSC, 0x3, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "v4shrs", TILEGX_OPC_V4SHRS, 0x3, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "v4shru", TILEGX_OPC_V4SHRU, 0x3, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "v4sub", TILEGX_OPC_V4SUB, 0x3, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "v4subsc", TILEGX_OPC_V4SUBSC, 0x3, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "wh64", TILEGX_OPC_WH64, 0x2, 1, TREG_ZERO, 1,
+ { { 0, }, { 9 }, { 0, }, { 0, }, { 0, } },
+ },
+ { "xor", TILEGX_OPC_XOR, 0xf, 3, TREG_ZERO, 1,
+ { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } },
+ },
+ { "xori", TILEGX_OPC_XORI, 0x3, 3, TREG_ZERO, 1,
+ { { 6, 7, 0 }, { 8, 9, 1 }, { 0, }, { 0, }, { 0, } },
+ },
+ { NULL, TILEGX_OPC_NONE, 0, 0, TREG_ZERO, 0, { { 0, } },
+ }
+};
+#define BITFIELD(start, size) ((start) | (((1 << (size)) - 1) << 6))
+#define CHILD(array_index) (TILEGX_OPC_NONE + (array_index))
+
+static const unsigned short decode_X0_fsm[936] =
+{
+ BITFIELD(22, 9) /* index 0 */,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513),
+ CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513),
+ CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513),
+ CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513),
+ CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513),
+ CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513),
+ CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513),
+ CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513),
+ CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513),
+ CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513),
+ CHILD(513), CHILD(513), CHILD(513), CHILD(513), TILEGX_OPC_ADDXLI,
+ TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI,
+ TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI,
+ TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI,
+ TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI,
+ TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI,
+ TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI,
+ TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI,
+ TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI,
+ TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI,
+ TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI,
+ TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI,
+ TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI,
+ TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI,
+ TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI,
+ TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI,
+ TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_BFEXTS,
+ TILEGX_OPC_BFEXTS, TILEGX_OPC_BFEXTS, TILEGX_OPC_BFEXTS, TILEGX_OPC_BFEXTU,
+ TILEGX_OPC_BFEXTU, TILEGX_OPC_BFEXTU, TILEGX_OPC_BFEXTU, TILEGX_OPC_BFINS,
+ TILEGX_OPC_BFINS, TILEGX_OPC_BFINS, TILEGX_OPC_BFINS, TILEGX_OPC_MM,
+ TILEGX_OPC_MM, TILEGX_OPC_MM, TILEGX_OPC_MM, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, CHILD(528), CHILD(578),
+ CHILD(583), CHILD(588), CHILD(593), CHILD(598), TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, CHILD(603), CHILD(620), CHILD(637), CHILD(654), CHILD(671),
+ CHILD(703), CHILD(797), CHILD(814), CHILD(831), CHILD(848), CHILD(865),
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, CHILD(889), TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906),
+ CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906),
+ CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906),
+ CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906),
+ CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906),
+ CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906),
+ CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906),
+ CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906),
+ CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906),
+ CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906),
+ CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906),
+ BITFIELD(6, 2) /* index 513 */,
+ TILEGX_OPC_ADDLI, TILEGX_OPC_ADDLI, TILEGX_OPC_ADDLI, CHILD(518),
+ BITFIELD(8, 2) /* index 518 */,
+ TILEGX_OPC_ADDLI, TILEGX_OPC_ADDLI, TILEGX_OPC_ADDLI, CHILD(523),
+ BITFIELD(10, 2) /* index 523 */,
+ TILEGX_OPC_ADDLI, TILEGX_OPC_ADDLI, TILEGX_OPC_ADDLI, TILEGX_OPC_MOVELI,
+ BITFIELD(20, 2) /* index 528 */,
+ TILEGX_OPC_NONE, CHILD(533), TILEGX_OPC_ADDXI, CHILD(548),
+ BITFIELD(6, 2) /* index 533 */,
+ TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, CHILD(538),
+ BITFIELD(8, 2) /* index 538 */,
+ TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, CHILD(543),
+ BITFIELD(10, 2) /* index 543 */,
+ TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_MOVEI,
+ BITFIELD(0, 2) /* index 548 */,
+ TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(553),
+ BITFIELD(2, 2) /* index 553 */,
+ TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(558),
+ BITFIELD(4, 2) /* index 558 */,
+ TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(563),
+ BITFIELD(6, 2) /* index 563 */,
+ TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(568),
+ BITFIELD(8, 2) /* index 568 */,
+ TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(573),
+ BITFIELD(10, 2) /* index 573 */,
+ TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_INFO,
+ BITFIELD(20, 2) /* index 578 */,
+ TILEGX_OPC_CMPEQI, TILEGX_OPC_CMPLTSI, TILEGX_OPC_CMPLTUI, TILEGX_OPC_ORI,
+ BITFIELD(20, 2) /* index 583 */,
+ TILEGX_OPC_V1ADDI, TILEGX_OPC_V1CMPEQI, TILEGX_OPC_V1CMPLTSI,
+ TILEGX_OPC_V1CMPLTUI,
+ BITFIELD(20, 2) /* index 588 */,
+ TILEGX_OPC_V1MAXUI, TILEGX_OPC_V1MINUI, TILEGX_OPC_V2ADDI,
+ TILEGX_OPC_V2CMPEQI,
+ BITFIELD(20, 2) /* index 593 */,
+ TILEGX_OPC_V2CMPLTSI, TILEGX_OPC_V2CMPLTUI, TILEGX_OPC_V2MAXSI,
+ TILEGX_OPC_V2MINSI,
+ BITFIELD(20, 2) /* index 598 */,
+ TILEGX_OPC_XORI, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ BITFIELD(18, 4) /* index 603 */,
+ TILEGX_OPC_NONE, TILEGX_OPC_ADDXSC, TILEGX_OPC_ADDX, TILEGX_OPC_ADD,
+ TILEGX_OPC_AND, TILEGX_OPC_CMOVEQZ, TILEGX_OPC_CMOVNEZ, TILEGX_OPC_CMPEQ,
+ TILEGX_OPC_CMPLES, TILEGX_OPC_CMPLEU, TILEGX_OPC_CMPLTS, TILEGX_OPC_CMPLTU,
+ TILEGX_OPC_CMPNE, TILEGX_OPC_CMULAF, TILEGX_OPC_CMULA, TILEGX_OPC_CMULFR,
+ BITFIELD(18, 4) /* index 620 */,
+ TILEGX_OPC_CMULF, TILEGX_OPC_CMULHR, TILEGX_OPC_CMULH, TILEGX_OPC_CMUL,
+ TILEGX_OPC_CRC32_32, TILEGX_OPC_CRC32_8, TILEGX_OPC_DBLALIGN2,
+ TILEGX_OPC_DBLALIGN4, TILEGX_OPC_DBLALIGN6, TILEGX_OPC_DBLALIGN,
+ TILEGX_OPC_FDOUBLE_ADDSUB, TILEGX_OPC_FDOUBLE_ADD_FLAGS,
+ TILEGX_OPC_FDOUBLE_MUL_FLAGS, TILEGX_OPC_FDOUBLE_PACK1,
+ TILEGX_OPC_FDOUBLE_PACK2, TILEGX_OPC_FDOUBLE_SUB_FLAGS,
+ BITFIELD(18, 4) /* index 637 */,
+ TILEGX_OPC_FDOUBLE_UNPACK_MAX, TILEGX_OPC_FDOUBLE_UNPACK_MIN,
+ TILEGX_OPC_FSINGLE_ADD1, TILEGX_OPC_FSINGLE_ADDSUB2,
+ TILEGX_OPC_FSINGLE_MUL1, TILEGX_OPC_FSINGLE_MUL2, TILEGX_OPC_FSINGLE_PACK2,
+ TILEGX_OPC_FSINGLE_SUB1, TILEGX_OPC_MNZ, TILEGX_OPC_MULAX,
+ TILEGX_OPC_MULA_HS_HS, TILEGX_OPC_MULA_HS_HU, TILEGX_OPC_MULA_HS_LS,
+ TILEGX_OPC_MULA_HS_LU, TILEGX_OPC_MULA_HU_HU, TILEGX_OPC_MULA_HU_LS,
+ BITFIELD(18, 4) /* index 654 */,
+ TILEGX_OPC_MULA_HU_LU, TILEGX_OPC_MULA_LS_LS, TILEGX_OPC_MULA_LS_LU,
+ TILEGX_OPC_MULA_LU_LU, TILEGX_OPC_MULX, TILEGX_OPC_MUL_HS_HS,
+ TILEGX_OPC_MUL_HS_HU, TILEGX_OPC_MUL_HS_LS, TILEGX_OPC_MUL_HS_LU,
+ TILEGX_OPC_MUL_HU_HU, TILEGX_OPC_MUL_HU_LS, TILEGX_OPC_MUL_HU_LU,
+ TILEGX_OPC_MUL_LS_LS, TILEGX_OPC_MUL_LS_LU, TILEGX_OPC_MUL_LU_LU,
+ TILEGX_OPC_MZ,
+ BITFIELD(18, 4) /* index 671 */,
+ TILEGX_OPC_NOR, CHILD(688), TILEGX_OPC_ROTL, TILEGX_OPC_SHL1ADDX,
+ TILEGX_OPC_SHL1ADD, TILEGX_OPC_SHL2ADDX, TILEGX_OPC_SHL2ADD,
+ TILEGX_OPC_SHL3ADDX, TILEGX_OPC_SHL3ADD, TILEGX_OPC_SHLX, TILEGX_OPC_SHL,
+ TILEGX_OPC_SHRS, TILEGX_OPC_SHRUX, TILEGX_OPC_SHRU, TILEGX_OPC_SHUFFLEBYTES,
+ TILEGX_OPC_SUBXSC,
+ BITFIELD(12, 2) /* index 688 */,
+ TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_OR, CHILD(693),
+ BITFIELD(14, 2) /* index 693 */,
+ TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_OR, CHILD(698),
+ BITFIELD(16, 2) /* index 698 */,
+ TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_MOVE,
+ BITFIELD(18, 4) /* index 703 */,
+ TILEGX_OPC_SUBX, TILEGX_OPC_SUB, CHILD(720), TILEGX_OPC_V1ADDUC,
+ TILEGX_OPC_V1ADD, TILEGX_OPC_V1ADIFFU, TILEGX_OPC_V1AVGU,
+ TILEGX_OPC_V1CMPEQ, TILEGX_OPC_V1CMPLES, TILEGX_OPC_V1CMPLEU,
+ TILEGX_OPC_V1CMPLTS, TILEGX_OPC_V1CMPLTU, TILEGX_OPC_V1CMPNE,
+ TILEGX_OPC_V1DDOTPUSA, TILEGX_OPC_V1DDOTPUS, TILEGX_OPC_V1DOTPA,
+ BITFIELD(12, 4) /* index 720 */,
+ TILEGX_OPC_NONE, CHILD(737), CHILD(742), CHILD(747), CHILD(752), CHILD(757),
+ CHILD(762), CHILD(767), CHILD(772), CHILD(777), CHILD(782), CHILD(787),
+ CHILD(792), TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ BITFIELD(16, 2) /* index 737 */,
+ TILEGX_OPC_CLZ, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ BITFIELD(16, 2) /* index 742 */,
+ TILEGX_OPC_CTZ, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ BITFIELD(16, 2) /* index 747 */,
+ TILEGX_OPC_FNOP, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ BITFIELD(16, 2) /* index 752 */,
+ TILEGX_OPC_FSINGLE_PACK1, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ BITFIELD(16, 2) /* index 757 */,
+ TILEGX_OPC_NOP, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ BITFIELD(16, 2) /* index 762 */,
+ TILEGX_OPC_PCNT, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ BITFIELD(16, 2) /* index 767 */,
+ TILEGX_OPC_REVBITS, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ BITFIELD(16, 2) /* index 772 */,
+ TILEGX_OPC_REVBYTES, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ BITFIELD(16, 2) /* index 777 */,
+ TILEGX_OPC_TBLIDXB0, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ BITFIELD(16, 2) /* index 782 */,
+ TILEGX_OPC_TBLIDXB1, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ BITFIELD(16, 2) /* index 787 */,
+ TILEGX_OPC_TBLIDXB2, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ BITFIELD(16, 2) /* index 792 */,
+ TILEGX_OPC_TBLIDXB3, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ BITFIELD(18, 4) /* index 797 */,
+ TILEGX_OPC_V1DOTPUSA, TILEGX_OPC_V1DOTPUS, TILEGX_OPC_V1DOTP,
+ TILEGX_OPC_V1INT_H, TILEGX_OPC_V1INT_L, TILEGX_OPC_V1MAXU,
+ TILEGX_OPC_V1MINU, TILEGX_OPC_V1MNZ, TILEGX_OPC_V1MULTU, TILEGX_OPC_V1MULUS,
+ TILEGX_OPC_V1MULU, TILEGX_OPC_V1MZ, TILEGX_OPC_V1SADAU, TILEGX_OPC_V1SADU,
+ TILEGX_OPC_V1SHL, TILEGX_OPC_V1SHRS,
+ BITFIELD(18, 4) /* index 814 */,
+ TILEGX_OPC_V1SHRU, TILEGX_OPC_V1SUBUC, TILEGX_OPC_V1SUB, TILEGX_OPC_V2ADDSC,
+ TILEGX_OPC_V2ADD, TILEGX_OPC_V2ADIFFS, TILEGX_OPC_V2AVGS,
+ TILEGX_OPC_V2CMPEQ, TILEGX_OPC_V2CMPLES, TILEGX_OPC_V2CMPLEU,
+ TILEGX_OPC_V2CMPLTS, TILEGX_OPC_V2CMPLTU, TILEGX_OPC_V2CMPNE,
+ TILEGX_OPC_V2DOTPA, TILEGX_OPC_V2DOTP, TILEGX_OPC_V2INT_H,
+ BITFIELD(18, 4) /* index 831 */,
+ TILEGX_OPC_V2INT_L, TILEGX_OPC_V2MAXS, TILEGX_OPC_V2MINS, TILEGX_OPC_V2MNZ,
+ TILEGX_OPC_V2MULFSC, TILEGX_OPC_V2MULS, TILEGX_OPC_V2MULTS, TILEGX_OPC_V2MZ,
+ TILEGX_OPC_V2PACKH, TILEGX_OPC_V2PACKL, TILEGX_OPC_V2PACKUC,
+ TILEGX_OPC_V2SADAS, TILEGX_OPC_V2SADAU, TILEGX_OPC_V2SADS,
+ TILEGX_OPC_V2SADU, TILEGX_OPC_V2SHLSC,
+ BITFIELD(18, 4) /* index 848 */,
+ TILEGX_OPC_V2SHL, TILEGX_OPC_V2SHRS, TILEGX_OPC_V2SHRU, TILEGX_OPC_V2SUBSC,
+ TILEGX_OPC_V2SUB, TILEGX_OPC_V4ADDSC, TILEGX_OPC_V4ADD, TILEGX_OPC_V4INT_H,
+ TILEGX_OPC_V4INT_L, TILEGX_OPC_V4PACKSC, TILEGX_OPC_V4SHLSC,
+ TILEGX_OPC_V4SHL, TILEGX_OPC_V4SHRS, TILEGX_OPC_V4SHRU, TILEGX_OPC_V4SUBSC,
+ TILEGX_OPC_V4SUB,
+ BITFIELD(18, 3) /* index 865 */,
+ CHILD(874), CHILD(877), CHILD(880), CHILD(883), CHILD(886), TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ BITFIELD(21, 1) /* index 874 */,
+ TILEGX_OPC_XOR, TILEGX_OPC_NONE,
+ BITFIELD(21, 1) /* index 877 */,
+ TILEGX_OPC_V1DDOTPUA, TILEGX_OPC_NONE,
+ BITFIELD(21, 1) /* index 880 */,
+ TILEGX_OPC_V1DDOTPU, TILEGX_OPC_NONE,
+ BITFIELD(21, 1) /* index 883 */,
+ TILEGX_OPC_V1DOTPUA, TILEGX_OPC_NONE,
+ BITFIELD(21, 1) /* index 886 */,
+ TILEGX_OPC_V1DOTPU, TILEGX_OPC_NONE,
+ BITFIELD(18, 4) /* index 889 */,
+ TILEGX_OPC_NONE, TILEGX_OPC_ROTLI, TILEGX_OPC_SHLI, TILEGX_OPC_SHLXI,
+ TILEGX_OPC_SHRSI, TILEGX_OPC_SHRUI, TILEGX_OPC_SHRUXI, TILEGX_OPC_V1SHLI,
+ TILEGX_OPC_V1SHRSI, TILEGX_OPC_V1SHRUI, TILEGX_OPC_V2SHLI,
+ TILEGX_OPC_V2SHRSI, TILEGX_OPC_V2SHRUI, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE,
+ BITFIELD(0, 2) /* index 906 */,
+ TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI,
+ CHILD(911),
+ BITFIELD(2, 2) /* index 911 */,
+ TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI,
+ CHILD(916),
+ BITFIELD(4, 2) /* index 916 */,
+ TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI,
+ CHILD(921),
+ BITFIELD(6, 2) /* index 921 */,
+ TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI,
+ CHILD(926),
+ BITFIELD(8, 2) /* index 926 */,
+ TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI,
+ CHILD(931),
+ BITFIELD(10, 2) /* index 931 */,
+ TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI,
+ TILEGX_OPC_INFOL,
+};
+
+static const unsigned short decode_X1_fsm[1206] =
+{
+ BITFIELD(53, 9) /* index 0 */,
+ CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513),
+ CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513),
+ CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513),
+ CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513),
+ CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513),
+ CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513),
+ CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513),
+ CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513),
+ CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513),
+ CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513),
+ CHILD(513), CHILD(513), CHILD(513), CHILD(513), TILEGX_OPC_ADDXLI,
+ TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI,
+ TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI,
+ TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI,
+ TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI,
+ TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI,
+ TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI,
+ TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI,
+ TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI,
+ TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI,
+ TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI,
+ TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI,
+ TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI,
+ TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI,
+ TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI,
+ TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI,
+ TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_BEQZT,
+ TILEGX_OPC_BEQZT, TILEGX_OPC_BEQZ, TILEGX_OPC_BEQZ, TILEGX_OPC_BGEZT,
+ TILEGX_OPC_BGEZT, TILEGX_OPC_BGEZ, TILEGX_OPC_BGEZ, TILEGX_OPC_BGTZT,
+ TILEGX_OPC_BGTZT, TILEGX_OPC_BGTZ, TILEGX_OPC_BGTZ, TILEGX_OPC_BLBCT,
+ TILEGX_OPC_BLBCT, TILEGX_OPC_BLBC, TILEGX_OPC_BLBC, TILEGX_OPC_BLBST,
+ TILEGX_OPC_BLBST, TILEGX_OPC_BLBS, TILEGX_OPC_BLBS, TILEGX_OPC_BLEZT,
+ TILEGX_OPC_BLEZT, TILEGX_OPC_BLEZ, TILEGX_OPC_BLEZ, TILEGX_OPC_BLTZT,
+ TILEGX_OPC_BLTZT, TILEGX_OPC_BLTZ, TILEGX_OPC_BLTZ, TILEGX_OPC_BNEZT,
+ TILEGX_OPC_BNEZT, TILEGX_OPC_BNEZ, TILEGX_OPC_BNEZ, CHILD(528), CHILD(578),
+ CHILD(598), CHILD(663), CHILD(683), CHILD(688), CHILD(693), CHILD(698),
+ CHILD(703), CHILD(708), CHILD(713), CHILD(718), TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_JAL,
+ TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL,
+ TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL,
+ TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL,
+ TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL,
+ TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL,
+ TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL,
+ TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL,
+ TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_J, TILEGX_OPC_J,
+ TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J,
+ TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J,
+ TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J,
+ TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J,
+ TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J,
+ TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J,
+ CHILD(723), CHILD(740), CHILD(772), CHILD(789), CHILD(1108), CHILD(1125),
+ CHILD(1142), TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, CHILD(1159), TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, CHILD(1176), CHILD(1176), CHILD(1176),
+ CHILD(1176), CHILD(1176), CHILD(1176), CHILD(1176), CHILD(1176),
+ CHILD(1176), CHILD(1176), CHILD(1176), CHILD(1176), CHILD(1176),
+ CHILD(1176), CHILD(1176), CHILD(1176), CHILD(1176), CHILD(1176),
+ CHILD(1176), CHILD(1176), CHILD(1176), CHILD(1176), CHILD(1176),
+ CHILD(1176), CHILD(1176), CHILD(1176), CHILD(1176), CHILD(1176),
+ CHILD(1176), CHILD(1176), CHILD(1176), CHILD(1176), CHILD(1176),
+ CHILD(1176), CHILD(1176), CHILD(1176), CHILD(1176), CHILD(1176),
+ CHILD(1176), CHILD(1176), CHILD(1176), CHILD(1176), CHILD(1176),
+ CHILD(1176), CHILD(1176), CHILD(1176), CHILD(1176), CHILD(1176),
+ CHILD(1176), CHILD(1176), CHILD(1176), CHILD(1176), CHILD(1176),
+ CHILD(1176), CHILD(1176), CHILD(1176), CHILD(1176), CHILD(1176),
+ CHILD(1176), CHILD(1176), CHILD(1176), CHILD(1176), CHILD(1176),
+ CHILD(1176),
+ BITFIELD(37, 2) /* index 513 */,
+ TILEGX_OPC_ADDLI, TILEGX_OPC_ADDLI, TILEGX_OPC_ADDLI, CHILD(518),
+ BITFIELD(39, 2) /* index 518 */,
+ TILEGX_OPC_ADDLI, TILEGX_OPC_ADDLI, TILEGX_OPC_ADDLI, CHILD(523),
+ BITFIELD(41, 2) /* index 523 */,
+ TILEGX_OPC_ADDLI, TILEGX_OPC_ADDLI, TILEGX_OPC_ADDLI, TILEGX_OPC_MOVELI,
+ BITFIELD(51, 2) /* index 528 */,
+ TILEGX_OPC_NONE, CHILD(533), TILEGX_OPC_ADDXI, CHILD(548),
+ BITFIELD(37, 2) /* index 533 */,
+ TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, CHILD(538),
+ BITFIELD(39, 2) /* index 538 */,
+ TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, CHILD(543),
+ BITFIELD(41, 2) /* index 543 */,
+ TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_MOVEI,
+ BITFIELD(31, 2) /* index 548 */,
+ TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(553),
+ BITFIELD(33, 2) /* index 553 */,
+ TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(558),
+ BITFIELD(35, 2) /* index 558 */,
+ TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(563),
+ BITFIELD(37, 2) /* index 563 */,
+ TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(568),
+ BITFIELD(39, 2) /* index 568 */,
+ TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(573),
+ BITFIELD(41, 2) /* index 573 */,
+ TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_INFO,
+ BITFIELD(51, 2) /* index 578 */,
+ TILEGX_OPC_CMPEQI, TILEGX_OPC_CMPLTSI, TILEGX_OPC_CMPLTUI, CHILD(583),
+ BITFIELD(31, 2) /* index 583 */,
+ TILEGX_OPC_LD1S_ADD, TILEGX_OPC_LD1S_ADD, TILEGX_OPC_LD1S_ADD, CHILD(588),
+ BITFIELD(33, 2) /* index 588 */,
+ TILEGX_OPC_LD1S_ADD, TILEGX_OPC_LD1S_ADD, TILEGX_OPC_LD1S_ADD, CHILD(593),
+ BITFIELD(35, 2) /* index 593 */,
+ TILEGX_OPC_LD1S_ADD, TILEGX_OPC_LD1S_ADD, TILEGX_OPC_LD1S_ADD,
+ TILEGX_OPC_PREFETCH_ADD_L1_FAULT,
+ BITFIELD(51, 2) /* index 598 */,
+ CHILD(603), CHILD(618), CHILD(633), CHILD(648),
+ BITFIELD(31, 2) /* index 603 */,
+ TILEGX_OPC_LD1U_ADD, TILEGX_OPC_LD1U_ADD, TILEGX_OPC_LD1U_ADD, CHILD(608),
+ BITFIELD(33, 2) /* index 608 */,
+ TILEGX_OPC_LD1U_ADD, TILEGX_OPC_LD1U_ADD, TILEGX_OPC_LD1U_ADD, CHILD(613),
+ BITFIELD(35, 2) /* index 613 */,
+ TILEGX_OPC_LD1U_ADD, TILEGX_OPC_LD1U_ADD, TILEGX_OPC_LD1U_ADD,
+ TILEGX_OPC_PREFETCH_ADD_L1,
+ BITFIELD(31, 2) /* index 618 */,
+ TILEGX_OPC_LD2S_ADD, TILEGX_OPC_LD2S_ADD, TILEGX_OPC_LD2S_ADD, CHILD(623),
+ BITFIELD(33, 2) /* index 623 */,
+ TILEGX_OPC_LD2S_ADD, TILEGX_OPC_LD2S_ADD, TILEGX_OPC_LD2S_ADD, CHILD(628),
+ BITFIELD(35, 2) /* index 628 */,
+ TILEGX_OPC_LD2S_ADD, TILEGX_OPC_LD2S_ADD, TILEGX_OPC_LD2S_ADD,
+ TILEGX_OPC_PREFETCH_ADD_L2_FAULT,
+ BITFIELD(31, 2) /* index 633 */,
+ TILEGX_OPC_LD2U_ADD, TILEGX_OPC_LD2U_ADD, TILEGX_OPC_LD2U_ADD, CHILD(638),
+ BITFIELD(33, 2) /* index 638 */,
+ TILEGX_OPC_LD2U_ADD, TILEGX_OPC_LD2U_ADD, TILEGX_OPC_LD2U_ADD, CHILD(643),
+ BITFIELD(35, 2) /* index 643 */,
+ TILEGX_OPC_LD2U_ADD, TILEGX_OPC_LD2U_ADD, TILEGX_OPC_LD2U_ADD,
+ TILEGX_OPC_PREFETCH_ADD_L2,
+ BITFIELD(31, 2) /* index 648 */,
+ TILEGX_OPC_LD4S_ADD, TILEGX_OPC_LD4S_ADD, TILEGX_OPC_LD4S_ADD, CHILD(653),
+ BITFIELD(33, 2) /* index 653 */,
+ TILEGX_OPC_LD4S_ADD, TILEGX_OPC_LD4S_ADD, TILEGX_OPC_LD4S_ADD, CHILD(658),
+ BITFIELD(35, 2) /* index 658 */,
+ TILEGX_OPC_LD4S_ADD, TILEGX_OPC_LD4S_ADD, TILEGX_OPC_LD4S_ADD,
+ TILEGX_OPC_PREFETCH_ADD_L3_FAULT,
+ BITFIELD(51, 2) /* index 663 */,
+ CHILD(668), TILEGX_OPC_LDNT1S_ADD, TILEGX_OPC_LDNT1U_ADD,
+ TILEGX_OPC_LDNT2S_ADD,
+ BITFIELD(31, 2) /* index 668 */,
+ TILEGX_OPC_LD4U_ADD, TILEGX_OPC_LD4U_ADD, TILEGX_OPC_LD4U_ADD, CHILD(673),
+ BITFIELD(33, 2) /* index 673 */,
+ TILEGX_OPC_LD4U_ADD, TILEGX_OPC_LD4U_ADD, TILEGX_OPC_LD4U_ADD, CHILD(678),
+ BITFIELD(35, 2) /* index 678 */,
+ TILEGX_OPC_LD4U_ADD, TILEGX_OPC_LD4U_ADD, TILEGX_OPC_LD4U_ADD,
+ TILEGX_OPC_PREFETCH_ADD_L3,
+ BITFIELD(51, 2) /* index 683 */,
+ TILEGX_OPC_LDNT2U_ADD, TILEGX_OPC_LDNT4S_ADD, TILEGX_OPC_LDNT4U_ADD,
+ TILEGX_OPC_LDNT_ADD,
+ BITFIELD(51, 2) /* index 688 */,
+ TILEGX_OPC_LD_ADD, TILEGX_OPC_LDNA_ADD, TILEGX_OPC_MFSPR, TILEGX_OPC_MTSPR,
+ BITFIELD(51, 2) /* index 693 */,
+ TILEGX_OPC_ORI, TILEGX_OPC_ST1_ADD, TILEGX_OPC_ST2_ADD, TILEGX_OPC_ST4_ADD,
+ BITFIELD(51, 2) /* index 698 */,
+ TILEGX_OPC_STNT1_ADD, TILEGX_OPC_STNT2_ADD, TILEGX_OPC_STNT4_ADD,
+ TILEGX_OPC_STNT_ADD,
+ BITFIELD(51, 2) /* index 703 */,
+ TILEGX_OPC_ST_ADD, TILEGX_OPC_V1ADDI, TILEGX_OPC_V1CMPEQI,
+ TILEGX_OPC_V1CMPLTSI,
+ BITFIELD(51, 2) /* index 708 */,
+ TILEGX_OPC_V1CMPLTUI, TILEGX_OPC_V1MAXUI, TILEGX_OPC_V1MINUI,
+ TILEGX_OPC_V2ADDI,
+ BITFIELD(51, 2) /* index 713 */,
+ TILEGX_OPC_V2CMPEQI, TILEGX_OPC_V2CMPLTSI, TILEGX_OPC_V2CMPLTUI,
+ TILEGX_OPC_V2MAXSI,
+ BITFIELD(51, 2) /* index 718 */,
+ TILEGX_OPC_V2MINSI, TILEGX_OPC_XORI, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ BITFIELD(49, 4) /* index 723 */,
+ TILEGX_OPC_NONE, TILEGX_OPC_ADDXSC, TILEGX_OPC_ADDX, TILEGX_OPC_ADD,
+ TILEGX_OPC_AND, TILEGX_OPC_CMPEQ, TILEGX_OPC_CMPEXCH4, TILEGX_OPC_CMPEXCH,
+ TILEGX_OPC_CMPLES, TILEGX_OPC_CMPLEU, TILEGX_OPC_CMPLTS, TILEGX_OPC_CMPLTU,
+ TILEGX_OPC_CMPNE, TILEGX_OPC_DBLALIGN2, TILEGX_OPC_DBLALIGN4,
+ TILEGX_OPC_DBLALIGN6,
+ BITFIELD(49, 4) /* index 740 */,
+ TILEGX_OPC_EXCH4, TILEGX_OPC_EXCH, TILEGX_OPC_FETCHADD4,
+ TILEGX_OPC_FETCHADDGEZ4, TILEGX_OPC_FETCHADDGEZ, TILEGX_OPC_FETCHADD,
+ TILEGX_OPC_FETCHAND4, TILEGX_OPC_FETCHAND, TILEGX_OPC_FETCHOR4,
+ TILEGX_OPC_FETCHOR, TILEGX_OPC_MNZ, TILEGX_OPC_MZ, TILEGX_OPC_NOR,
+ CHILD(757), TILEGX_OPC_ROTL, TILEGX_OPC_SHL1ADDX,
+ BITFIELD(43, 2) /* index 757 */,
+ TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_OR, CHILD(762),
+ BITFIELD(45, 2) /* index 762 */,
+ TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_OR, CHILD(767),
+ BITFIELD(47, 2) /* index 767 */,
+ TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_MOVE,
+ BITFIELD(49, 4) /* index 772 */,
+ TILEGX_OPC_SHL1ADD, TILEGX_OPC_SHL2ADDX, TILEGX_OPC_SHL2ADD,
+ TILEGX_OPC_SHL3ADDX, TILEGX_OPC_SHL3ADD, TILEGX_OPC_SHLX, TILEGX_OPC_SHL,
+ TILEGX_OPC_SHRS, TILEGX_OPC_SHRUX, TILEGX_OPC_SHRU, TILEGX_OPC_ST1,
+ TILEGX_OPC_ST2, TILEGX_OPC_ST4, TILEGX_OPC_STNT1, TILEGX_OPC_STNT2,
+ TILEGX_OPC_STNT4,
+ BITFIELD(46, 7) /* index 789 */,
+ TILEGX_OPC_STNT, TILEGX_OPC_STNT, TILEGX_OPC_STNT, TILEGX_OPC_STNT,
+ TILEGX_OPC_STNT, TILEGX_OPC_STNT, TILEGX_OPC_STNT, TILEGX_OPC_STNT,
+ TILEGX_OPC_ST, TILEGX_OPC_ST, TILEGX_OPC_ST, TILEGX_OPC_ST, TILEGX_OPC_ST,
+ TILEGX_OPC_ST, TILEGX_OPC_ST, TILEGX_OPC_ST, TILEGX_OPC_SUBXSC,
+ TILEGX_OPC_SUBXSC, TILEGX_OPC_SUBXSC, TILEGX_OPC_SUBXSC, TILEGX_OPC_SUBXSC,
+ TILEGX_OPC_SUBXSC, TILEGX_OPC_SUBXSC, TILEGX_OPC_SUBXSC, TILEGX_OPC_SUBX,
+ TILEGX_OPC_SUBX, TILEGX_OPC_SUBX, TILEGX_OPC_SUBX, TILEGX_OPC_SUBX,
+ TILEGX_OPC_SUBX, TILEGX_OPC_SUBX, TILEGX_OPC_SUBX, TILEGX_OPC_SUB,
+ TILEGX_OPC_SUB, TILEGX_OPC_SUB, TILEGX_OPC_SUB, TILEGX_OPC_SUB,
+ TILEGX_OPC_SUB, TILEGX_OPC_SUB, TILEGX_OPC_SUB, CHILD(918), CHILD(927),
+ CHILD(1006), CHILD(1090), CHILD(1099), TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_V1ADDUC, TILEGX_OPC_V1ADDUC, TILEGX_OPC_V1ADDUC,
+ TILEGX_OPC_V1ADDUC, TILEGX_OPC_V1ADDUC, TILEGX_OPC_V1ADDUC,
+ TILEGX_OPC_V1ADDUC, TILEGX_OPC_V1ADDUC, TILEGX_OPC_V1ADD, TILEGX_OPC_V1ADD,
+ TILEGX_OPC_V1ADD, TILEGX_OPC_V1ADD, TILEGX_OPC_V1ADD, TILEGX_OPC_V1ADD,
+ TILEGX_OPC_V1ADD, TILEGX_OPC_V1ADD, TILEGX_OPC_V1CMPEQ, TILEGX_OPC_V1CMPEQ,
+ TILEGX_OPC_V1CMPEQ, TILEGX_OPC_V1CMPEQ, TILEGX_OPC_V1CMPEQ,
+ TILEGX_OPC_V1CMPEQ, TILEGX_OPC_V1CMPEQ, TILEGX_OPC_V1CMPEQ,
+ TILEGX_OPC_V1CMPLES, TILEGX_OPC_V1CMPLES, TILEGX_OPC_V1CMPLES,
+ TILEGX_OPC_V1CMPLES, TILEGX_OPC_V1CMPLES, TILEGX_OPC_V1CMPLES,
+ TILEGX_OPC_V1CMPLES, TILEGX_OPC_V1CMPLES, TILEGX_OPC_V1CMPLEU,
+ TILEGX_OPC_V1CMPLEU, TILEGX_OPC_V1CMPLEU, TILEGX_OPC_V1CMPLEU,
+ TILEGX_OPC_V1CMPLEU, TILEGX_OPC_V1CMPLEU, TILEGX_OPC_V1CMPLEU,
+ TILEGX_OPC_V1CMPLEU, TILEGX_OPC_V1CMPLTS, TILEGX_OPC_V1CMPLTS,
+ TILEGX_OPC_V1CMPLTS, TILEGX_OPC_V1CMPLTS, TILEGX_OPC_V1CMPLTS,
+ TILEGX_OPC_V1CMPLTS, TILEGX_OPC_V1CMPLTS, TILEGX_OPC_V1CMPLTS,
+ TILEGX_OPC_V1CMPLTU, TILEGX_OPC_V1CMPLTU, TILEGX_OPC_V1CMPLTU,
+ TILEGX_OPC_V1CMPLTU, TILEGX_OPC_V1CMPLTU, TILEGX_OPC_V1CMPLTU,
+ TILEGX_OPC_V1CMPLTU, TILEGX_OPC_V1CMPLTU, TILEGX_OPC_V1CMPNE,
+ TILEGX_OPC_V1CMPNE, TILEGX_OPC_V1CMPNE, TILEGX_OPC_V1CMPNE,
+ TILEGX_OPC_V1CMPNE, TILEGX_OPC_V1CMPNE, TILEGX_OPC_V1CMPNE,
+ TILEGX_OPC_V1CMPNE, TILEGX_OPC_V1INT_H, TILEGX_OPC_V1INT_H,
+ TILEGX_OPC_V1INT_H, TILEGX_OPC_V1INT_H, TILEGX_OPC_V1INT_H,
+ TILEGX_OPC_V1INT_H, TILEGX_OPC_V1INT_H, TILEGX_OPC_V1INT_H,
+ TILEGX_OPC_V1INT_L, TILEGX_OPC_V1INT_L, TILEGX_OPC_V1INT_L,
+ TILEGX_OPC_V1INT_L, TILEGX_OPC_V1INT_L, TILEGX_OPC_V1INT_L,
+ TILEGX_OPC_V1INT_L, TILEGX_OPC_V1INT_L,
+ BITFIELD(43, 3) /* index 918 */,
+ TILEGX_OPC_NONE, TILEGX_OPC_DRAIN, TILEGX_OPC_DTLBPR, TILEGX_OPC_FINV,
+ TILEGX_OPC_FLUSHWB, TILEGX_OPC_FLUSH, TILEGX_OPC_FNOP, TILEGX_OPC_ICOH,
+ BITFIELD(43, 3) /* index 927 */,
+ CHILD(936), TILEGX_OPC_INV, TILEGX_OPC_IRET, TILEGX_OPC_JALRP,
+ TILEGX_OPC_JALR, TILEGX_OPC_JRP, TILEGX_OPC_JR, CHILD(991),
+ BITFIELD(31, 2) /* index 936 */,
+ CHILD(941), CHILD(966), TILEGX_OPC_ILL, TILEGX_OPC_ILL,
+ BITFIELD(33, 2) /* index 941 */,
+ TILEGX_OPC_ILL, TILEGX_OPC_ILL, TILEGX_OPC_ILL, CHILD(946),
+ BITFIELD(35, 2) /* index 946 */,
+ TILEGX_OPC_ILL, CHILD(951), TILEGX_OPC_ILL, TILEGX_OPC_ILL,
+ BITFIELD(37, 2) /* index 951 */,
+ TILEGX_OPC_ILL, CHILD(956), TILEGX_OPC_ILL, TILEGX_OPC_ILL,
+ BITFIELD(39, 2) /* index 956 */,
+ TILEGX_OPC_ILL, CHILD(961), TILEGX_OPC_ILL, TILEGX_OPC_ILL,
+ BITFIELD(41, 2) /* index 961 */,
+ TILEGX_OPC_ILL, TILEGX_OPC_ILL, TILEGX_OPC_BPT, TILEGX_OPC_ILL,
+ BITFIELD(33, 2) /* index 966 */,
+ TILEGX_OPC_ILL, TILEGX_OPC_ILL, TILEGX_OPC_ILL, CHILD(971),
+ BITFIELD(35, 2) /* index 971 */,
+ TILEGX_OPC_ILL, CHILD(976), TILEGX_OPC_ILL, TILEGX_OPC_ILL,
+ BITFIELD(37, 2) /* index 976 */,
+ TILEGX_OPC_ILL, CHILD(981), TILEGX_OPC_ILL, TILEGX_OPC_ILL,
+ BITFIELD(39, 2) /* index 981 */,
+ TILEGX_OPC_ILL, CHILD(986), TILEGX_OPC_ILL, TILEGX_OPC_ILL,
+ BITFIELD(41, 2) /* index 986 */,
+ TILEGX_OPC_ILL, TILEGX_OPC_ILL, TILEGX_OPC_RAISE, TILEGX_OPC_ILL,
+ BITFIELD(31, 2) /* index 991 */,
+ TILEGX_OPC_LD1S, TILEGX_OPC_LD1S, TILEGX_OPC_LD1S, CHILD(996),
+ BITFIELD(33, 2) /* index 996 */,
+ TILEGX_OPC_LD1S, TILEGX_OPC_LD1S, TILEGX_OPC_LD1S, CHILD(1001),
+ BITFIELD(35, 2) /* index 1001 */,
+ TILEGX_OPC_LD1S, TILEGX_OPC_LD1S, TILEGX_OPC_LD1S,
+ TILEGX_OPC_PREFETCH_L1_FAULT,
+ BITFIELD(43, 3) /* index 1006 */,
+ CHILD(1015), CHILD(1030), CHILD(1045), CHILD(1060), CHILD(1075),
+ TILEGX_OPC_LDNA, TILEGX_OPC_LDNT1S, TILEGX_OPC_LDNT1U,
+ BITFIELD(31, 2) /* index 1015 */,
+ TILEGX_OPC_LD1U, TILEGX_OPC_LD1U, TILEGX_OPC_LD1U, CHILD(1020),
+ BITFIELD(33, 2) /* index 1020 */,
+ TILEGX_OPC_LD1U, TILEGX_OPC_LD1U, TILEGX_OPC_LD1U, CHILD(1025),
+ BITFIELD(35, 2) /* index 1025 */,
+ TILEGX_OPC_LD1U, TILEGX_OPC_LD1U, TILEGX_OPC_LD1U, TILEGX_OPC_PREFETCH,
+ BITFIELD(31, 2) /* index 1030 */,
+ TILEGX_OPC_LD2S, TILEGX_OPC_LD2S, TILEGX_OPC_LD2S, CHILD(1035),
+ BITFIELD(33, 2) /* index 1035 */,
+ TILEGX_OPC_LD2S, TILEGX_OPC_LD2S, TILEGX_OPC_LD2S, CHILD(1040),
+ BITFIELD(35, 2) /* index 1040 */,
+ TILEGX_OPC_LD2S, TILEGX_OPC_LD2S, TILEGX_OPC_LD2S,
+ TILEGX_OPC_PREFETCH_L2_FAULT,
+ BITFIELD(31, 2) /* index 1045 */,
+ TILEGX_OPC_LD2U, TILEGX_OPC_LD2U, TILEGX_OPC_LD2U, CHILD(1050),
+ BITFIELD(33, 2) /* index 1050 */,
+ TILEGX_OPC_LD2U, TILEGX_OPC_LD2U, TILEGX_OPC_LD2U, CHILD(1055),
+ BITFIELD(35, 2) /* index 1055 */,
+ TILEGX_OPC_LD2U, TILEGX_OPC_LD2U, TILEGX_OPC_LD2U, TILEGX_OPC_PREFETCH_L2,
+ BITFIELD(31, 2) /* index 1060 */,
+ TILEGX_OPC_LD4S, TILEGX_OPC_LD4S, TILEGX_OPC_LD4S, CHILD(1065),
+ BITFIELD(33, 2) /* index 1065 */,
+ TILEGX_OPC_LD4S, TILEGX_OPC_LD4S, TILEGX_OPC_LD4S, CHILD(1070),
+ BITFIELD(35, 2) /* index 1070 */,
+ TILEGX_OPC_LD4S, TILEGX_OPC_LD4S, TILEGX_OPC_LD4S,
+ TILEGX_OPC_PREFETCH_L3_FAULT,
+ BITFIELD(31, 2) /* index 1075 */,
+ TILEGX_OPC_LD4U, TILEGX_OPC_LD4U, TILEGX_OPC_LD4U, CHILD(1080),
+ BITFIELD(33, 2) /* index 1080 */,
+ TILEGX_OPC_LD4U, TILEGX_OPC_LD4U, TILEGX_OPC_LD4U, CHILD(1085),
+ BITFIELD(35, 2) /* index 1085 */,
+ TILEGX_OPC_LD4U, TILEGX_OPC_LD4U, TILEGX_OPC_LD4U, TILEGX_OPC_PREFETCH_L3,
+ BITFIELD(43, 3) /* index 1090 */,
+ TILEGX_OPC_LDNT2S, TILEGX_OPC_LDNT2U, TILEGX_OPC_LDNT4S, TILEGX_OPC_LDNT4U,
+ TILEGX_OPC_LDNT, TILEGX_OPC_LD, TILEGX_OPC_LNK, TILEGX_OPC_MF,
+ BITFIELD(43, 3) /* index 1099 */,
+ TILEGX_OPC_NAP, TILEGX_OPC_NOP, TILEGX_OPC_SWINT0, TILEGX_OPC_SWINT1,
+ TILEGX_OPC_SWINT2, TILEGX_OPC_SWINT3, TILEGX_OPC_WH64, TILEGX_OPC_NONE,
+ BITFIELD(49, 4) /* index 1108 */,
+ TILEGX_OPC_V1MAXU, TILEGX_OPC_V1MINU, TILEGX_OPC_V1MNZ, TILEGX_OPC_V1MZ,
+ TILEGX_OPC_V1SHL, TILEGX_OPC_V1SHRS, TILEGX_OPC_V1SHRU, TILEGX_OPC_V1SUBUC,
+ TILEGX_OPC_V1SUB, TILEGX_OPC_V2ADDSC, TILEGX_OPC_V2ADD, TILEGX_OPC_V2CMPEQ,
+ TILEGX_OPC_V2CMPLES, TILEGX_OPC_V2CMPLEU, TILEGX_OPC_V2CMPLTS,
+ TILEGX_OPC_V2CMPLTU,
+ BITFIELD(49, 4) /* index 1125 */,
+ TILEGX_OPC_V2CMPNE, TILEGX_OPC_V2INT_H, TILEGX_OPC_V2INT_L,
+ TILEGX_OPC_V2MAXS, TILEGX_OPC_V2MINS, TILEGX_OPC_V2MNZ, TILEGX_OPC_V2MZ,
+ TILEGX_OPC_V2PACKH, TILEGX_OPC_V2PACKL, TILEGX_OPC_V2PACKUC,
+ TILEGX_OPC_V2SHLSC, TILEGX_OPC_V2SHL, TILEGX_OPC_V2SHRS, TILEGX_OPC_V2SHRU,
+ TILEGX_OPC_V2SUBSC, TILEGX_OPC_V2SUB,
+ BITFIELD(49, 4) /* index 1142 */,
+ TILEGX_OPC_V4ADDSC, TILEGX_OPC_V4ADD, TILEGX_OPC_V4INT_H,
+ TILEGX_OPC_V4INT_L, TILEGX_OPC_V4PACKSC, TILEGX_OPC_V4SHLSC,
+ TILEGX_OPC_V4SHL, TILEGX_OPC_V4SHRS, TILEGX_OPC_V4SHRU, TILEGX_OPC_V4SUBSC,
+ TILEGX_OPC_V4SUB, TILEGX_OPC_XOR, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ BITFIELD(49, 4) /* index 1159 */,
+ TILEGX_OPC_NONE, TILEGX_OPC_ROTLI, TILEGX_OPC_SHLI, TILEGX_OPC_SHLXI,
+ TILEGX_OPC_SHRSI, TILEGX_OPC_SHRUI, TILEGX_OPC_SHRUXI, TILEGX_OPC_V1SHLI,
+ TILEGX_OPC_V1SHRSI, TILEGX_OPC_V1SHRUI, TILEGX_OPC_V2SHLI,
+ TILEGX_OPC_V2SHRSI, TILEGX_OPC_V2SHRUI, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE,
+ BITFIELD(31, 2) /* index 1176 */,
+ TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI,
+ CHILD(1181),
+ BITFIELD(33, 2) /* index 1181 */,
+ TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI,
+ CHILD(1186),
+ BITFIELD(35, 2) /* index 1186 */,
+ TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI,
+ CHILD(1191),
+ BITFIELD(37, 2) /* index 1191 */,
+ TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI,
+ CHILD(1196),
+ BITFIELD(39, 2) /* index 1196 */,
+ TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI,
+ CHILD(1201),
+ BITFIELD(41, 2) /* index 1201 */,
+ TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI,
+ TILEGX_OPC_INFOL,
+};
+
+static const unsigned short decode_Y0_fsm[178] =
+{
+ BITFIELD(27, 4) /* index 0 */,
+ CHILD(17), TILEGX_OPC_ADDXI, CHILD(32), TILEGX_OPC_CMPEQI,
+ TILEGX_OPC_CMPLTSI, CHILD(62), CHILD(67), CHILD(118), CHILD(123),
+ CHILD(128), CHILD(133), CHILD(153), CHILD(158), CHILD(163), CHILD(168),
+ CHILD(173),
+ BITFIELD(6, 2) /* index 17 */,
+ TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, CHILD(22),
+ BITFIELD(8, 2) /* index 22 */,
+ TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, CHILD(27),
+ BITFIELD(10, 2) /* index 27 */,
+ TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_MOVEI,
+ BITFIELD(0, 2) /* index 32 */,
+ TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(37),
+ BITFIELD(2, 2) /* index 37 */,
+ TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(42),
+ BITFIELD(4, 2) /* index 42 */,
+ TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(47),
+ BITFIELD(6, 2) /* index 47 */,
+ TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(52),
+ BITFIELD(8, 2) /* index 52 */,
+ TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(57),
+ BITFIELD(10, 2) /* index 57 */,
+ TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_INFO,
+ BITFIELD(18, 2) /* index 62 */,
+ TILEGX_OPC_ADDX, TILEGX_OPC_ADD, TILEGX_OPC_SUBX, TILEGX_OPC_SUB,
+ BITFIELD(15, 5) /* index 67 */,
+ TILEGX_OPC_SHL1ADD, TILEGX_OPC_SHL1ADD, TILEGX_OPC_SHL1ADD,
+ TILEGX_OPC_SHL1ADD, TILEGX_OPC_SHL1ADD, TILEGX_OPC_SHL1ADD,
+ TILEGX_OPC_SHL1ADD, TILEGX_OPC_SHL1ADD, TILEGX_OPC_SHL2ADD,
+ TILEGX_OPC_SHL2ADD, TILEGX_OPC_SHL2ADD, TILEGX_OPC_SHL2ADD,
+ TILEGX_OPC_SHL2ADD, TILEGX_OPC_SHL2ADD, TILEGX_OPC_SHL2ADD,
+ TILEGX_OPC_SHL2ADD, TILEGX_OPC_SHL3ADD, TILEGX_OPC_SHL3ADD,
+ TILEGX_OPC_SHL3ADD, TILEGX_OPC_SHL3ADD, TILEGX_OPC_SHL3ADD,
+ TILEGX_OPC_SHL3ADD, TILEGX_OPC_SHL3ADD, TILEGX_OPC_SHL3ADD, CHILD(100),
+ CHILD(109), TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ BITFIELD(12, 3) /* index 100 */,
+ TILEGX_OPC_NONE, TILEGX_OPC_CLZ, TILEGX_OPC_CTZ, TILEGX_OPC_FNOP,
+ TILEGX_OPC_FSINGLE_PACK1, TILEGX_OPC_NOP, TILEGX_OPC_PCNT,
+ TILEGX_OPC_REVBITS,
+ BITFIELD(12, 3) /* index 109 */,
+ TILEGX_OPC_REVBYTES, TILEGX_OPC_TBLIDXB0, TILEGX_OPC_TBLIDXB1,
+ TILEGX_OPC_TBLIDXB2, TILEGX_OPC_TBLIDXB3, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ TILEGX_OPC_NONE,
+ BITFIELD(18, 2) /* index 118 */,
+ TILEGX_OPC_CMPLES, TILEGX_OPC_CMPLEU, TILEGX_OPC_CMPLTS, TILEGX_OPC_CMPLTU,
+ BITFIELD(18, 2) /* index 123 */,
+ TILEGX_OPC_CMPEQ, TILEGX_OPC_CMPNE, TILEGX_OPC_MULAX, TILEGX_OPC_MULX,
+ BITFIELD(18, 2) /* index 128 */,
+ TILEGX_OPC_CMOVEQZ, TILEGX_OPC_CMOVNEZ, TILEGX_OPC_MNZ, TILEGX_OPC_MZ,
+ BITFIELD(18, 2) /* index 133 */,
+ TILEGX_OPC_AND, TILEGX_OPC_NOR, CHILD(138), TILEGX_OPC_XOR,
+ BITFIELD(12, 2) /* index 138 */,
+ TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_OR, CHILD(143),
+ BITFIELD(14, 2) /* index 143 */,
+ TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_OR, CHILD(148),
+ BITFIELD(16, 2) /* index 148 */,
+ TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_MOVE,
+ BITFIELD(18, 2) /* index 153 */,
+ TILEGX_OPC_ROTL, TILEGX_OPC_SHL, TILEGX_OPC_SHRS, TILEGX_OPC_SHRU,
+ BITFIELD(18, 2) /* index 158 */,
+ TILEGX_OPC_NONE, TILEGX_OPC_SHL1ADDX, TILEGX_OPC_SHL2ADDX,
+ TILEGX_OPC_SHL3ADDX,
+ BITFIELD(18, 2) /* index 163 */,
+ TILEGX_OPC_MUL_HS_HS, TILEGX_OPC_MUL_HU_HU, TILEGX_OPC_MUL_LS_LS,
+ TILEGX_OPC_MUL_LU_LU,
+ BITFIELD(18, 2) /* index 168 */,
+ TILEGX_OPC_MULA_HS_HS, TILEGX_OPC_MULA_HU_HU, TILEGX_OPC_MULA_LS_LS,
+ TILEGX_OPC_MULA_LU_LU,
+ BITFIELD(18, 2) /* index 173 */,
+ TILEGX_OPC_ROTLI, TILEGX_OPC_SHLI, TILEGX_OPC_SHRSI, TILEGX_OPC_SHRUI,
+};
+
+static const unsigned short decode_Y1_fsm[167] =
+{
+ BITFIELD(58, 4) /* index 0 */,
+ TILEGX_OPC_NONE, CHILD(17), TILEGX_OPC_ADDXI, CHILD(32), TILEGX_OPC_CMPEQI,
+ TILEGX_OPC_CMPLTSI, CHILD(62), CHILD(67), CHILD(117), CHILD(122),
+ CHILD(127), CHILD(132), CHILD(152), CHILD(157), CHILD(162), TILEGX_OPC_NONE,
+ BITFIELD(37, 2) /* index 17 */,
+ TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, CHILD(22),
+ BITFIELD(39, 2) /* index 22 */,
+ TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, CHILD(27),
+ BITFIELD(41, 2) /* index 27 */,
+ TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_MOVEI,
+ BITFIELD(31, 2) /* index 32 */,
+ TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(37),
+ BITFIELD(33, 2) /* index 37 */,
+ TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(42),
+ BITFIELD(35, 2) /* index 42 */,
+ TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(47),
+ BITFIELD(37, 2) /* index 47 */,
+ TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(52),
+ BITFIELD(39, 2) /* index 52 */,
+ TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(57),
+ BITFIELD(41, 2) /* index 57 */,
+ TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_INFO,
+ BITFIELD(49, 2) /* index 62 */,
+ TILEGX_OPC_ADDX, TILEGX_OPC_ADD, TILEGX_OPC_SUBX, TILEGX_OPC_SUB,
+ BITFIELD(47, 4) /* index 67 */,
+ TILEGX_OPC_SHL1ADD, TILEGX_OPC_SHL1ADD, TILEGX_OPC_SHL1ADD,
+ TILEGX_OPC_SHL1ADD, TILEGX_OPC_SHL2ADD, TILEGX_OPC_SHL2ADD,
+ TILEGX_OPC_SHL2ADD, TILEGX_OPC_SHL2ADD, TILEGX_OPC_SHL3ADD,
+ TILEGX_OPC_SHL3ADD, TILEGX_OPC_SHL3ADD, TILEGX_OPC_SHL3ADD, CHILD(84),
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+ BITFIELD(43, 3) /* index 84 */,
+ CHILD(93), CHILD(96), CHILD(99), CHILD(102), CHILD(105), CHILD(108),
+ CHILD(111), CHILD(114),
+ BITFIELD(46, 1) /* index 93 */,
+ TILEGX_OPC_NONE, TILEGX_OPC_FNOP,
+ BITFIELD(46, 1) /* index 96 */,
+ TILEGX_OPC_NONE, TILEGX_OPC_ILL,
+ BITFIELD(46, 1) /* index 99 */,
+ TILEGX_OPC_NONE, TILEGX_OPC_JALRP,
+ BITFIELD(46, 1) /* index 102 */,
+ TILEGX_OPC_NONE, TILEGX_OPC_JALR,
+ BITFIELD(46, 1) /* index 105 */,
+ TILEGX_OPC_NONE, TILEGX_OPC_JRP,
+ BITFIELD(46, 1) /* index 108 */,
+ TILEGX_OPC_NONE, TILEGX_OPC_JR,
+ BITFIELD(46, 1) /* index 111 */,
+ TILEGX_OPC_NONE, TILEGX_OPC_LNK,
+ BITFIELD(46, 1) /* index 114 */,
+ TILEGX_OPC_NONE, TILEGX_OPC_NOP,
+ BITFIELD(49, 2) /* index 117 */,
+ TILEGX_OPC_CMPLES, TILEGX_OPC_CMPLEU, TILEGX_OPC_CMPLTS, TILEGX_OPC_CMPLTU,
+ BITFIELD(49, 2) /* index 122 */,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_CMPEQ, TILEGX_OPC_CMPNE,
+ BITFIELD(49, 2) /* index 127 */,
+ TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_MNZ, TILEGX_OPC_MZ,
+ BITFIELD(49, 2) /* index 132 */,
+ TILEGX_OPC_AND, TILEGX_OPC_NOR, CHILD(137), TILEGX_OPC_XOR,
+ BITFIELD(43, 2) /* index 137 */,
+ TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_OR, CHILD(142),
+ BITFIELD(45, 2) /* index 142 */,
+ TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_OR, CHILD(147),
+ BITFIELD(47, 2) /* index 147 */,
+ TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_MOVE,
+ BITFIELD(49, 2) /* index 152 */,
+ TILEGX_OPC_ROTL, TILEGX_OPC_SHL, TILEGX_OPC_SHRS, TILEGX_OPC_SHRU,
+ BITFIELD(49, 2) /* index 157 */,
+ TILEGX_OPC_NONE, TILEGX_OPC_SHL1ADDX, TILEGX_OPC_SHL2ADDX,
+ TILEGX_OPC_SHL3ADDX,
+ BITFIELD(49, 2) /* index 162 */,
+ TILEGX_OPC_ROTLI, TILEGX_OPC_SHLI, TILEGX_OPC_SHRSI, TILEGX_OPC_SHRUI,
+};
+
+static const unsigned short decode_Y2_fsm[118] =
+{
+ BITFIELD(62, 2) /* index 0 */,
+ TILEGX_OPC_NONE, CHILD(5), CHILD(66), CHILD(109),
+ BITFIELD(55, 3) /* index 5 */,
+ CHILD(14), CHILD(14), CHILD(14), CHILD(17), CHILD(40), CHILD(40), CHILD(40),
+ CHILD(43),
+ BITFIELD(26, 1) /* index 14 */,
+ TILEGX_OPC_LD1S, TILEGX_OPC_LD1U,
+ BITFIELD(26, 1) /* index 17 */,
+ CHILD(20), CHILD(30),
+ BITFIELD(51, 2) /* index 20 */,
+ TILEGX_OPC_LD1S, TILEGX_OPC_LD1S, TILEGX_OPC_LD1S, CHILD(25),
+ BITFIELD(53, 2) /* index 25 */,
+ TILEGX_OPC_LD1S, TILEGX_OPC_LD1S, TILEGX_OPC_LD1S,
+ TILEGX_OPC_PREFETCH_L1_FAULT,
+ BITFIELD(51, 2) /* index 30 */,
+ TILEGX_OPC_LD1U, TILEGX_OPC_LD1U, TILEGX_OPC_LD1U, CHILD(35),
+ BITFIELD(53, 2) /* index 35 */,
+ TILEGX_OPC_LD1U, TILEGX_OPC_LD1U, TILEGX_OPC_LD1U, TILEGX_OPC_PREFETCH,
+ BITFIELD(26, 1) /* index 40 */,
+ TILEGX_OPC_LD2S, TILEGX_OPC_LD2U,
+ BITFIELD(26, 1) /* index 43 */,
+ CHILD(46), CHILD(56),
+ BITFIELD(51, 2) /* index 46 */,
+ TILEGX_OPC_LD2S, TILEGX_OPC_LD2S, TILEGX_OPC_LD2S, CHILD(51),
+ BITFIELD(53, 2) /* index 51 */,
+ TILEGX_OPC_LD2S, TILEGX_OPC_LD2S, TILEGX_OPC_LD2S,
+ TILEGX_OPC_PREFETCH_L2_FAULT,
+ BITFIELD(51, 2) /* index 56 */,
+ TILEGX_OPC_LD2U, TILEGX_OPC_LD2U, TILEGX_OPC_LD2U, CHILD(61),
+ BITFIELD(53, 2) /* index 61 */,
+ TILEGX_OPC_LD2U, TILEGX_OPC_LD2U, TILEGX_OPC_LD2U, TILEGX_OPC_PREFETCH_L2,
+ BITFIELD(56, 2) /* index 66 */,
+ CHILD(71), CHILD(74), CHILD(90), CHILD(93),
+ BITFIELD(26, 1) /* index 71 */,
+ TILEGX_OPC_NONE, TILEGX_OPC_LD4S,
+ BITFIELD(26, 1) /* index 74 */,
+ TILEGX_OPC_NONE, CHILD(77),
+ BITFIELD(51, 2) /* index 77 */,
+ TILEGX_OPC_LD4S, TILEGX_OPC_LD4S, TILEGX_OPC_LD4S, CHILD(82),
+ BITFIELD(53, 2) /* index 82 */,
+ TILEGX_OPC_LD4S, TILEGX_OPC_LD4S, TILEGX_OPC_LD4S, CHILD(87),
+ BITFIELD(55, 1) /* index 87 */,
+ TILEGX_OPC_LD4S, TILEGX_OPC_PREFETCH_L3_FAULT,
+ BITFIELD(26, 1) /* index 90 */,
+ TILEGX_OPC_LD4U, TILEGX_OPC_LD,
+ BITFIELD(26, 1) /* index 93 */,
+ CHILD(96), TILEGX_OPC_LD,
+ BITFIELD(51, 2) /* index 96 */,
+ TILEGX_OPC_LD4U, TILEGX_OPC_LD4U, TILEGX_OPC_LD4U, CHILD(101),
+ BITFIELD(53, 2) /* index 101 */,
+ TILEGX_OPC_LD4U, TILEGX_OPC_LD4U, TILEGX_OPC_LD4U, CHILD(106),
+ BITFIELD(55, 1) /* index 106 */,
+ TILEGX_OPC_LD4U, TILEGX_OPC_PREFETCH_L3,
+ BITFIELD(26, 1) /* index 109 */,
+ CHILD(112), CHILD(115),
+ BITFIELD(57, 1) /* index 112 */,
+ TILEGX_OPC_ST1, TILEGX_OPC_ST4,
+ BITFIELD(57, 1) /* index 115 */,
+ TILEGX_OPC_ST2, TILEGX_OPC_ST,
+};
+
+#undef BITFIELD
+#undef CHILD
+const unsigned short * const
+tilegx_bundle_decoder_fsms[TILEGX_NUM_PIPELINE_ENCODINGS] =
+{
+ decode_X0_fsm,
+ decode_X1_fsm,
+ decode_Y0_fsm,
+ decode_Y1_fsm,
+ decode_Y2_fsm
+};
+const struct tilegx_operand tilegx_operands[35] =
+{
+ {
+ TILEGX_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEGX_IMM8_X0),
+ 8, 1, 0, 0, 0, 0,
+ create_Imm8_X0, get_Imm8_X0
+ },
+ {
+ TILEGX_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEGX_IMM8_X1),
+ 8, 1, 0, 0, 0, 0,
+ create_Imm8_X1, get_Imm8_X1
+ },
+ {
+ TILEGX_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEGX_IMM8_Y0),
+ 8, 1, 0, 0, 0, 0,
+ create_Imm8_Y0, get_Imm8_Y0
+ },
+ {
+ TILEGX_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEGX_IMM8_Y1),
+ 8, 1, 0, 0, 0, 0,
+ create_Imm8_Y1, get_Imm8_Y1
+ },
+ {
+ TILEGX_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEGX_IMM16_X0_HW0_LAST),
+ 16, 1, 0, 0, 0, 0,
+ create_Imm16_X0, get_Imm16_X0
+ },
+ {
+ TILEGX_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEGX_IMM16_X1_HW0_LAST),
+ 16, 1, 0, 0, 0, 0,
+ create_Imm16_X1, get_Imm16_X1
+ },
+ {
+ TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE),
+ 6, 0, 0, 1, 0, 0,
+ create_Dest_X0, get_Dest_X0
+ },
+ {
+ TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE),
+ 6, 0, 1, 0, 0, 0,
+ create_SrcA_X0, get_SrcA_X0
+ },
+ {
+ TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE),
+ 6, 0, 0, 1, 0, 0,
+ create_Dest_X1, get_Dest_X1
+ },
+ {
+ TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE),
+ 6, 0, 1, 0, 0, 0,
+ create_SrcA_X1, get_SrcA_X1
+ },
+ {
+ TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE),
+ 6, 0, 0, 1, 0, 0,
+ create_Dest_Y0, get_Dest_Y0
+ },
+ {
+ TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE),
+ 6, 0, 1, 0, 0, 0,
+ create_SrcA_Y0, get_SrcA_Y0
+ },
+ {
+ TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE),
+ 6, 0, 0, 1, 0, 0,
+ create_Dest_Y1, get_Dest_Y1
+ },
+ {
+ TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE),
+ 6, 0, 1, 0, 0, 0,
+ create_SrcA_Y1, get_SrcA_Y1
+ },
+ {
+ TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE),
+ 6, 0, 1, 0, 0, 0,
+ create_SrcA_Y2, get_SrcA_Y2
+ },
+ {
+ TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE),
+ 6, 0, 1, 1, 0, 0,
+ create_SrcA_X1, get_SrcA_X1
+ },
+ {
+ TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE),
+ 6, 0, 1, 0, 0, 0,
+ create_SrcB_X0, get_SrcB_X0
+ },
+ {
+ TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE),
+ 6, 0, 1, 0, 0, 0,
+ create_SrcB_X1, get_SrcB_X1
+ },
+ {
+ TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE),
+ 6, 0, 1, 0, 0, 0,
+ create_SrcB_Y0, get_SrcB_Y0
+ },
+ {
+ TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE),
+ 6, 0, 1, 0, 0, 0,
+ create_SrcB_Y1, get_SrcB_Y1
+ },
+ {
+ TILEGX_OP_TYPE_ADDRESS, BFD_RELOC(TILEGX_BROFF_X1),
+ 17, 1, 0, 0, 1, TILEGX_LOG2_BUNDLE_ALIGNMENT_IN_BYTES,
+ create_BrOff_X1, get_BrOff_X1
+ },
+ {
+ TILEGX_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEGX_MMSTART_X0),
+ 6, 0, 0, 0, 0, 0,
+ create_BFStart_X0, get_BFStart_X0
+ },
+ {
+ TILEGX_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEGX_MMEND_X0),
+ 6, 0, 0, 0, 0, 0,
+ create_BFEnd_X0, get_BFEnd_X0
+ },
+ {
+ TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE),
+ 6, 0, 1, 1, 0, 0,
+ create_Dest_X0, get_Dest_X0
+ },
+ {
+ TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE),
+ 6, 0, 1, 1, 0, 0,
+ create_Dest_Y0, get_Dest_Y0
+ },
+ {
+ TILEGX_OP_TYPE_ADDRESS, BFD_RELOC(TILEGX_JUMPOFF_X1),
+ 27, 1, 0, 0, 1, TILEGX_LOG2_BUNDLE_ALIGNMENT_IN_BYTES,
+ create_JumpOff_X1, get_JumpOff_X1
+ },
+ {
+ TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE),
+ 6, 0, 0, 1, 0, 0,
+ create_SrcBDest_Y2, get_SrcBDest_Y2
+ },
+ {
+ TILEGX_OP_TYPE_SPR, BFD_RELOC(TILEGX_MF_IMM14_X1),
+ 14, 0, 0, 0, 0, 0,
+ create_MF_Imm14_X1, get_MF_Imm14_X1
+ },
+ {
+ TILEGX_OP_TYPE_SPR, BFD_RELOC(TILEGX_MT_IMM14_X1),
+ 14, 0, 0, 0, 0, 0,
+ create_MT_Imm14_X1, get_MT_Imm14_X1
+ },
+ {
+ TILEGX_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEGX_SHAMT_X0),
+ 6, 0, 0, 0, 0, 0,
+ create_ShAmt_X0, get_ShAmt_X0
+ },
+ {
+ TILEGX_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEGX_SHAMT_X1),
+ 6, 0, 0, 0, 0, 0,
+ create_ShAmt_X1, get_ShAmt_X1
+ },
+ {
+ TILEGX_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEGX_SHAMT_Y0),
+ 6, 0, 0, 0, 0, 0,
+ create_ShAmt_Y0, get_ShAmt_Y0
+ },
+ {
+ TILEGX_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEGX_SHAMT_Y1),
+ 6, 0, 0, 0, 0, 0,
+ create_ShAmt_Y1, get_ShAmt_Y1
+ },
+ {
+ TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE),
+ 6, 0, 1, 0, 0, 0,
+ create_SrcBDest_Y2, get_SrcBDest_Y2
+ },
+ {
+ TILEGX_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEGX_DEST_IMM8_X1),
+ 8, 1, 0, 0, 0, 0,
+ create_Dest_Imm8_X1, get_Dest_Imm8_X1
+ }
+};
+
+
+
+
+/* Given a set of bundle bits and the lookup FSM for a specific pipe,
+ * returns which instruction the bundle contains in that pipe.
+ */
+static const struct tilegx_opcode *
+find_opcode(tilegx_bundle_bits bits, const unsigned short *table)
+{
+ int index = 0;
+
+ while (1)
+ {
+ unsigned short bitspec = table[index];
+ unsigned int bitfield =
+ ((unsigned int)(bits >> (bitspec & 63))) & (bitspec >> 6);
+
+ unsigned short next = table[index + 1 + bitfield];
+ if (next <= TILEGX_OPC_NONE)
+ return &tilegx_opcodes[next];
+
+ index = next - TILEGX_OPC_NONE;
+ }
+}
+
+
+int
+parse_insn_tilegx(tilegx_bundle_bits bits,
+ unsigned long long pc,
+ struct tilegx_decoded_instruction
+ decoded[TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE])
+{
+ int num_instructions = 0;
+ int pipe;
+
+ int min_pipe, max_pipe;
+ if ((bits & TILEGX_BUNDLE_MODE_MASK) == 0)
+ {
+ min_pipe = TILEGX_PIPELINE_X0;
+ max_pipe = TILEGX_PIPELINE_X1;
+ }
+ else
+ {
+ min_pipe = TILEGX_PIPELINE_Y0;
+ max_pipe = TILEGX_PIPELINE_Y2;
+ }
+
+ /* For each pipe, find an instruction that fits. */
+ for (pipe = min_pipe; pipe <= max_pipe; pipe++)
+ {
+ const struct tilegx_opcode *opc;
+ struct tilegx_decoded_instruction *d;
+ int i;
+
+ d = &decoded[num_instructions++];
+ opc = find_opcode (bits, tilegx_bundle_decoder_fsms[pipe]);
+ d->opcode = opc;
+
+ /* Decode each operand, sign extending, etc. as appropriate. */
+ for (i = 0; i < opc->num_operands; i++)
+ {
+ const struct tilegx_operand *op =
+ &tilegx_operands[opc->operands[pipe][i]];
+ int raw_opval = op->extract (bits);
+ long long opval;
+
+ if (op->is_signed)
+ {
+ /* Sign-extend the operand. */
+ int shift = (int)((sizeof(int) * 8) - op->num_bits);
+ raw_opval = (raw_opval << shift) >> shift;
+ }
+
+ /* Adjust PC-relative scaled branch offsets. */
+ if (op->type == TILEGX_OP_TYPE_ADDRESS)
+ opval = (raw_opval * TILEGX_BUNDLE_SIZE_IN_BYTES) + pc;
+ else
+ opval = raw_opval;
+
+ /* Record the final value. */
+ d->operands[i] = op;
+ d->operand_values[i] = opval;
+ }
+ }
+
+ return num_instructions;
+}
diff --git a/arch/tile/kernel/time.c b/arch/tile/kernel/time.c
new file mode 100644
index 00000000..f6f50f2a
--- /dev/null
+++ b/arch/tile/kernel/time.c
@@ -0,0 +1,235 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ *
+ * Support the cycle counter clocksource and tile timer clock event device.
+ */
+
+#include <linux/time.h>
+#include <linux/timex.h>
+#include <linux/clocksource.h>
+#include <linux/clockchips.h>
+#include <linux/hardirq.h>
+#include <linux/sched.h>
+#include <linux/smp.h>
+#include <linux/delay.h>
+#include <linux/module.h>
+#include <asm/irq_regs.h>
+#include <asm/traps.h>
+#include <hv/hypervisor.h>
+#include <arch/interrupts.h>
+#include <arch/spr_def.h>
+
+
+/*
+ * Define the cycle counter clock source.
+ */
+
+/* How many cycles per second we are running at. */
+static cycles_t cycles_per_sec __write_once;
+
+cycles_t get_clock_rate(void)
+{
+ return cycles_per_sec;
+}
+
+#if CHIP_HAS_SPLIT_CYCLE()
+cycles_t get_cycles(void)
+{
+ unsigned int high = __insn_mfspr(SPR_CYCLE_HIGH);
+ unsigned int low = __insn_mfspr(SPR_CYCLE_LOW);
+ unsigned int high2 = __insn_mfspr(SPR_CYCLE_HIGH);
+
+ while (unlikely(high != high2)) {
+ low = __insn_mfspr(SPR_CYCLE_LOW);
+ high = high2;
+ high2 = __insn_mfspr(SPR_CYCLE_HIGH);
+ }
+
+ return (((cycles_t)high) << 32) | low;
+}
+EXPORT_SYMBOL(get_cycles);
+#endif
+
+/*
+ * We use a relatively small shift value so that sched_clock()
+ * won't wrap around very often.
+ */
+#define SCHED_CLOCK_SHIFT 10
+
+static unsigned long sched_clock_mult __write_once;
+
+static cycles_t clocksource_get_cycles(struct clocksource *cs)
+{
+ return get_cycles();
+}
+
+static struct clocksource cycle_counter_cs = {
+ .name = "cycle counter",
+ .rating = 300,
+ .read = clocksource_get_cycles,
+ .mask = CLOCKSOURCE_MASK(64),
+ .flags = CLOCK_SOURCE_IS_CONTINUOUS,
+};
+
+/*
+ * Called very early from setup_arch() to set cycles_per_sec.
+ * We initialize it early so we can use it to set up loops_per_jiffy.
+ */
+void __init setup_clock(void)
+{
+ cycles_per_sec = hv_sysconf(HV_SYSCONF_CPU_SPEED);
+ sched_clock_mult =
+ clocksource_hz2mult(cycles_per_sec, SCHED_CLOCK_SHIFT);
+}
+
+void __init calibrate_delay(void)
+{
+ loops_per_jiffy = get_clock_rate() / HZ;
+ pr_info("Clock rate yields %lu.%02lu BogoMIPS (lpj=%lu)\n",
+ loops_per_jiffy/(500000/HZ),
+ (loops_per_jiffy/(5000/HZ)) % 100, loops_per_jiffy);
+}
+
+/* Called fairly late in init/main.c, but before we go smp. */
+void __init time_init(void)
+{
+ /* Initialize and register the clock source. */
+ clocksource_register_hz(&cycle_counter_cs, cycles_per_sec);
+
+ /* Start up the tile-timer interrupt source on the boot cpu. */
+ setup_tile_timer();
+}
+
+
+/*
+ * Define the tile timer clock event device. The timer is driven by
+ * the TILE_TIMER_CONTROL register, which consists of a 31-bit down
+ * counter, plus bit 31, which signifies that the counter has wrapped
+ * from zero to (2**31) - 1. The INT_TILE_TIMER interrupt will be
+ * raised as long as bit 31 is set.
+ *
+ * The TILE_MINSEC value represents the largest range of real-time
+ * we can possibly cover with the timer, based on MAX_TICK combined
+ * with the slowest reasonable clock rate we might run at.
+ */
+
+#define MAX_TICK 0x7fffffff /* we have 31 bits of countdown timer */
+#define TILE_MINSEC 5 /* timer covers no more than 5 seconds */
+
+static int tile_timer_set_next_event(unsigned long ticks,
+ struct clock_event_device *evt)
+{
+ BUG_ON(ticks > MAX_TICK);
+ __insn_mtspr(SPR_TILE_TIMER_CONTROL, ticks);
+ arch_local_irq_unmask_now(INT_TILE_TIMER);
+ return 0;
+}
+
+/*
+ * Whenever anyone tries to change modes, we just mask interrupts
+ * and wait for the next event to get set.
+ */
+static void tile_timer_set_mode(enum clock_event_mode mode,
+ struct clock_event_device *evt)
+{
+ arch_local_irq_mask_now(INT_TILE_TIMER);
+}
+
+/*
+ * Set min_delta_ns to 1 microsecond, since it takes about
+ * that long to fire the interrupt.
+ */
+static DEFINE_PER_CPU(struct clock_event_device, tile_timer) = {
+ .name = "tile timer",
+ .features = CLOCK_EVT_FEAT_ONESHOT,
+ .min_delta_ns = 1000,
+ .rating = 100,
+ .irq = -1,
+ .set_next_event = tile_timer_set_next_event,
+ .set_mode = tile_timer_set_mode,
+};
+
+void __cpuinit setup_tile_timer(void)
+{
+ struct clock_event_device *evt = &__get_cpu_var(tile_timer);
+
+ /* Fill in fields that are speed-specific. */
+ clockevents_calc_mult_shift(evt, cycles_per_sec, TILE_MINSEC);
+ evt->max_delta_ns = clockevent_delta2ns(MAX_TICK, evt);
+
+ /* Mark as being for this cpu only. */
+ evt->cpumask = cpumask_of(smp_processor_id());
+
+ /* Start out with timer not firing. */
+ arch_local_irq_mask_now(INT_TILE_TIMER);
+
+ /* Register tile timer. */
+ clockevents_register_device(evt);
+}
+
+/* Called from the interrupt vector. */
+void do_timer_interrupt(struct pt_regs *regs, int fault_num)
+{
+ struct pt_regs *old_regs = set_irq_regs(regs);
+ struct clock_event_device *evt = &__get_cpu_var(tile_timer);
+
+ /*
+ * Mask the timer interrupt here, since we are a oneshot timer
+ * and there are now by definition no events pending.
+ */
+ arch_local_irq_mask(INT_TILE_TIMER);
+
+ /* Track time spent here in an interrupt context */
+ irq_enter();
+
+ /* Track interrupt count. */
+ __get_cpu_var(irq_stat).irq_timer_count++;
+
+ /* Call the generic timer handler */
+ evt->event_handler(evt);
+
+ /*
+ * Track time spent against the current process again and
+ * process any softirqs if they are waiting.
+ */
+ irq_exit();
+
+ set_irq_regs(old_regs);
+}
+
+/*
+ * Scheduler clock - returns current time in nanosec units.
+ * Note that with LOCKDEP, this is called during lockdep_init(), and
+ * we will claim that sched_clock() is zero for a little while, until
+ * we run setup_clock(), above.
+ */
+unsigned long long sched_clock(void)
+{
+ return clocksource_cyc2ns(get_cycles(),
+ sched_clock_mult, SCHED_CLOCK_SHIFT);
+}
+
+int setup_profiling_timer(unsigned int multiplier)
+{
+ return -EINVAL;
+}
+
+/*
+ * Use the tile timer to convert nsecs to core clock cycles, relying
+ * on it having the same frequency as SPR_CYCLE.
+ */
+cycles_t ns2cycles(unsigned long nsecs)
+{
+ struct clock_event_device *dev = &__get_cpu_var(tile_timer);
+ return ((u64)nsecs * dev->mult) >> dev->shift;
+}
diff --git a/arch/tile/kernel/tlb.c b/arch/tile/kernel/tlb.c
new file mode 100644
index 00000000..a5f241c2
--- /dev/null
+++ b/arch/tile/kernel/tlb.c
@@ -0,0 +1,97 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#include <linux/cpumask.h>
+#include <linux/module.h>
+#include <asm/tlbflush.h>
+#include <asm/homecache.h>
+#include <hv/hypervisor.h>
+
+/* From tlbflush.h */
+DEFINE_PER_CPU(int, current_asid);
+int min_asid, max_asid;
+
+/*
+ * Note that we flush the L1I (for VM_EXEC pages) as well as the TLB
+ * so that when we are unmapping an executable page, we also flush it.
+ * Combined with flushing the L1I at context switch time, this means
+ * we don't have to do any other icache flushes.
+ */
+
+void flush_tlb_mm(struct mm_struct *mm)
+{
+ HV_Remote_ASID asids[NR_CPUS];
+ int i = 0, cpu;
+ for_each_cpu(cpu, mm_cpumask(mm)) {
+ HV_Remote_ASID *asid = &asids[i++];
+ asid->y = cpu / smp_topology.width;
+ asid->x = cpu % smp_topology.width;
+ asid->asid = per_cpu(current_asid, cpu);
+ }
+ flush_remote(0, HV_FLUSH_EVICT_L1I, mm_cpumask(mm),
+ 0, 0, 0, NULL, asids, i);
+}
+
+void flush_tlb_current_task(void)
+{
+ flush_tlb_mm(current->mm);
+}
+
+void flush_tlb_page_mm(const struct vm_area_struct *vma, struct mm_struct *mm,
+ unsigned long va)
+{
+ unsigned long size = hv_page_size(vma);
+ int cache = (vma->vm_flags & VM_EXEC) ? HV_FLUSH_EVICT_L1I : 0;
+ flush_remote(0, cache, mm_cpumask(mm),
+ va, size, size, mm_cpumask(mm), NULL, 0);
+}
+
+void flush_tlb_page(const struct vm_area_struct *vma, unsigned long va)
+{
+ flush_tlb_page_mm(vma, vma->vm_mm, va);
+}
+EXPORT_SYMBOL(flush_tlb_page);
+
+void flush_tlb_range(const struct vm_area_struct *vma,
+ unsigned long start, unsigned long end)
+{
+ unsigned long size = hv_page_size(vma);
+ struct mm_struct *mm = vma->vm_mm;
+ int cache = (vma->vm_flags & VM_EXEC) ? HV_FLUSH_EVICT_L1I : 0;
+ flush_remote(0, cache, mm_cpumask(mm), start, end - start, size,
+ mm_cpumask(mm), NULL, 0);
+}
+
+void flush_tlb_all(void)
+{
+ int i;
+ for (i = 0; ; ++i) {
+ HV_VirtAddrRange r = hv_inquire_virtual(i);
+ if (r.size == 0)
+ break;
+ flush_remote(0, HV_FLUSH_EVICT_L1I, cpu_online_mask,
+ r.start, r.size, PAGE_SIZE, cpu_online_mask,
+ NULL, 0);
+ flush_remote(0, 0, NULL,
+ r.start, r.size, HPAGE_SIZE, cpu_online_mask,
+ NULL, 0);
+ }
+}
+
+void flush_tlb_kernel_range(unsigned long start, unsigned long end)
+{
+ flush_remote(0, HV_FLUSH_EVICT_L1I, cpu_online_mask,
+ start, end - start, PAGE_SIZE, cpu_online_mask, NULL, 0);
+}
diff --git a/arch/tile/kernel/traps.c b/arch/tile/kernel/traps.c
new file mode 100644
index 00000000..73cff814
--- /dev/null
+++ b/arch/tile/kernel/traps.c
@@ -0,0 +1,329 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/kprobes.h>
+#include <linux/module.h>
+#include <linux/reboot.h>
+#include <linux/uaccess.h>
+#include <linux/ptrace.h>
+#include <asm/stack.h>
+#include <asm/traps.h>
+#include <asm/setup.h>
+
+#include <arch/interrupts.h>
+#include <arch/spr_def.h>
+#include <arch/opcode.h>
+
+void __init trap_init(void)
+{
+ /* Nothing needed here since we link code at .intrpt1 */
+}
+
+int unaligned_fixup = 1;
+
+static int __init setup_unaligned_fixup(char *str)
+{
+ /*
+ * Say "=-1" to completely disable it. If you just do "=0", we
+ * will still parse the instruction, then fire a SIGBUS with
+ * the correct address from inside the single_step code.
+ */
+ long val;
+ if (strict_strtol(str, 0, &val) != 0)
+ return 0;
+ unaligned_fixup = val;
+ pr_info("Fixups for unaligned data accesses are %s\n",
+ unaligned_fixup >= 0 ?
+ (unaligned_fixup ? "enabled" : "disabled") :
+ "completely disabled");
+ return 1;
+}
+__setup("unaligned_fixup=", setup_unaligned_fixup);
+
+#if CHIP_HAS_TILE_DMA()
+
+static int dma_disabled;
+
+static int __init nodma(char *str)
+{
+ pr_info("User-space DMA is disabled\n");
+ dma_disabled = 1;
+ return 1;
+}
+__setup("nodma", nodma);
+
+/* How to decode SPR_GPV_REASON */
+#define IRET_ERROR (1U << 31)
+#define MT_ERROR (1U << 30)
+#define MF_ERROR (1U << 29)
+#define SPR_INDEX ((1U << 15) - 1)
+#define SPR_MPL_SHIFT 9 /* starting bit position for MPL encoded in SPR */
+
+/*
+ * See if this GPV is just to notify the kernel of SPR use and we can
+ * retry the user instruction after adjusting some MPLs suitably.
+ */
+static int retry_gpv(unsigned int gpv_reason)
+{
+ int mpl;
+
+ if (gpv_reason & IRET_ERROR)
+ return 0;
+
+ BUG_ON((gpv_reason & (MT_ERROR|MF_ERROR)) == 0);
+ mpl = (gpv_reason & SPR_INDEX) >> SPR_MPL_SHIFT;
+ if (mpl == INT_DMA_NOTIFY && !dma_disabled) {
+ /* User is turning on DMA. Allow it and retry. */
+ printk(KERN_DEBUG "Process %d/%s is now enabled for DMA\n",
+ current->pid, current->comm);
+ BUG_ON(current->thread.tile_dma_state.enabled);
+ current->thread.tile_dma_state.enabled = 1;
+ grant_dma_mpls();
+ return 1;
+ }
+
+ return 0;
+}
+
+#endif /* CHIP_HAS_TILE_DMA() */
+
+#ifdef __tilegx__
+#define bundle_bits tilegx_bundle_bits
+#else
+#define bundle_bits tile_bundle_bits
+#endif
+
+extern bundle_bits bpt_code;
+
+asm(".pushsection .rodata.bpt_code,\"a\";"
+ ".align 8;"
+ "bpt_code: bpt;"
+ ".size bpt_code,.-bpt_code;"
+ ".popsection");
+
+static int special_ill(bundle_bits bundle, int *sigp, int *codep)
+{
+ int sig, code, maxcode;
+
+ if (bundle == bpt_code) {
+ *sigp = SIGTRAP;
+ *codep = TRAP_BRKPT;
+ return 1;
+ }
+
+ /* If it's a "raise" bundle, then "ill" must be in pipe X1. */
+#ifdef __tilegx__
+ if ((bundle & TILEGX_BUNDLE_MODE_MASK) != 0)
+ return 0;
+ if (get_Opcode_X1(bundle) != RRR_0_OPCODE_X1)
+ return 0;
+ if (get_RRROpcodeExtension_X1(bundle) != UNARY_RRR_0_OPCODE_X1)
+ return 0;
+ if (get_UnaryOpcodeExtension_X1(bundle) != ILL_UNARY_OPCODE_X1)
+ return 0;
+#else
+ if (bundle & TILEPRO_BUNDLE_Y_ENCODING_MASK)
+ return 0;
+ if (get_Opcode_X1(bundle) != SHUN_0_OPCODE_X1)
+ return 0;
+ if (get_UnShOpcodeExtension_X1(bundle) != UN_0_SHUN_0_OPCODE_X1)
+ return 0;
+ if (get_UnOpcodeExtension_X1(bundle) != ILL_UN_0_SHUN_0_OPCODE_X1)
+ return 0;
+#endif
+
+ /* Check that the magic distinguishers are set to mean "raise". */
+ if (get_Dest_X1(bundle) != 29 || get_SrcA_X1(bundle) != 37)
+ return 0;
+
+ /* There must be an "addli zero, zero, VAL" in X0. */
+ if (get_Opcode_X0(bundle) != ADDLI_OPCODE_X0)
+ return 0;
+ if (get_Dest_X0(bundle) != TREG_ZERO)
+ return 0;
+ if (get_SrcA_X0(bundle) != TREG_ZERO)
+ return 0;
+
+ /*
+ * Validate the proposed signal number and si_code value.
+ * Note that we embed these in the static instruction itself
+ * so that we perturb the register state as little as possible
+ * at the time of the actual fault; it's unlikely you'd ever
+ * need to dynamically choose which kind of fault to raise
+ * from user space.
+ */
+ sig = get_Imm16_X0(bundle) & 0x3f;
+ switch (sig) {
+ case SIGILL:
+ maxcode = NSIGILL;
+ break;
+ case SIGFPE:
+ maxcode = NSIGFPE;
+ break;
+ case SIGSEGV:
+ maxcode = NSIGSEGV;
+ break;
+ case SIGBUS:
+ maxcode = NSIGBUS;
+ break;
+ case SIGTRAP:
+ maxcode = NSIGTRAP;
+ break;
+ default:
+ return 0;
+ }
+ code = (get_Imm16_X0(bundle) >> 6) & 0xf;
+ if (code <= 0 || code > maxcode)
+ return 0;
+
+ /* Make it the requested signal. */
+ *sigp = sig;
+ *codep = code | __SI_FAULT;
+ return 1;
+}
+
+void __kprobes do_trap(struct pt_regs *regs, int fault_num,
+ unsigned long reason)
+{
+ siginfo_t info = { 0 };
+ int signo, code;
+ unsigned long address = 0;
+ bundle_bits instr;
+
+ /* Re-enable interrupts. */
+ local_irq_enable();
+
+ /*
+ * If it hits in kernel mode and we can't fix it up, just exit the
+ * current process and hope for the best.
+ */
+ if (!user_mode(regs)) {
+ if (fixup_exception(regs)) /* only UNALIGN_DATA in practice */
+ return;
+ pr_alert("Kernel took bad trap %d at PC %#lx\n",
+ fault_num, regs->pc);
+ if (fault_num == INT_GPV)
+ pr_alert("GPV_REASON is %#lx\n", reason);
+ show_regs(regs);
+ do_exit(SIGKILL); /* FIXME: implement i386 die() */
+ return;
+ }
+
+ switch (fault_num) {
+ case INT_MEM_ERROR:
+ signo = SIGBUS;
+ code = BUS_OBJERR;
+ break;
+ case INT_ILL:
+ if (copy_from_user(&instr, (void __user *)regs->pc,
+ sizeof(instr))) {
+ pr_err("Unreadable instruction for INT_ILL:"
+ " %#lx\n", regs->pc);
+ do_exit(SIGKILL);
+ return;
+ }
+ if (!special_ill(instr, &signo, &code)) {
+ signo = SIGILL;
+ code = ILL_ILLOPC;
+ }
+ address = regs->pc;
+ break;
+ case INT_GPV:
+#if CHIP_HAS_TILE_DMA()
+ if (retry_gpv(reason))
+ return;
+#endif
+ /*FALLTHROUGH*/
+ case INT_UDN_ACCESS:
+ case INT_IDN_ACCESS:
+#if CHIP_HAS_SN()
+ case INT_SN_ACCESS:
+#endif
+ signo = SIGILL;
+ code = ILL_PRVREG;
+ address = regs->pc;
+ break;
+ case INT_SWINT_3:
+ case INT_SWINT_2:
+ case INT_SWINT_0:
+ signo = SIGILL;
+ code = ILL_ILLTRP;
+ address = regs->pc;
+ break;
+ case INT_UNALIGN_DATA:
+#ifndef __tilegx__ /* Emulated support for single step debugging */
+ if (unaligned_fixup >= 0) {
+ struct single_step_state *state =
+ current_thread_info()->step_state;
+ if (!state ||
+ (void __user *)(regs->pc) != state->buffer) {
+ single_step_once(regs);
+ return;
+ }
+ }
+#endif
+ signo = SIGBUS;
+ code = BUS_ADRALN;
+ address = 0;
+ break;
+ case INT_DOUBLE_FAULT:
+ /*
+ * For double fault, "reason" is actually passed as
+ * SYSTEM_SAVE_K_2, the hypervisor's double-fault info, so
+ * we can provide the original fault number rather than
+ * the uninteresting "INT_DOUBLE_FAULT" so the user can
+ * learn what actually struck while PL0 ICS was set.
+ */
+ fault_num = reason;
+ signo = SIGILL;
+ code = ILL_DBLFLT;
+ address = regs->pc;
+ break;
+#ifdef __tilegx__
+ case INT_ILL_TRANS: {
+ /* Avoid a hardware erratum with the return address stack. */
+ fill_ra_stack();
+
+ signo = SIGSEGV;
+ code = SEGV_MAPERR;
+ if (reason & SPR_ILL_TRANS_REASON__I_STREAM_VA_RMASK)
+ address = regs->pc;
+ else
+ address = 0; /* FIXME: GX: single-step for address */
+ break;
+ }
+#endif
+ default:
+ panic("Unexpected do_trap interrupt number %d", fault_num);
+ return;
+ }
+
+ info.si_signo = signo;
+ info.si_code = code;
+ info.si_addr = (void __user *)address;
+ if (signo == SIGILL)
+ info.si_trapno = fault_num;
+ if (signo != SIGTRAP)
+ trace_unhandled_signal("trap", regs, address, signo);
+ force_sig_info(signo, &info, current);
+}
+
+void kernel_double_fault(int dummy, ulong pc, ulong lr, ulong sp, ulong r52)
+{
+ _dump_stack(dummy, pc, lr, sp, r52);
+ pr_emerg("Double fault: exiting\n");
+ machine_halt();
+}
diff --git a/arch/tile/kernel/vmlinux.lds.S b/arch/tile/kernel/vmlinux.lds.S
new file mode 100644
index 00000000..631f10de
--- /dev/null
+++ b/arch/tile/kernel/vmlinux.lds.S
@@ -0,0 +1,95 @@
+#include <asm-generic/vmlinux.lds.h>
+#include <asm/page.h>
+#include <asm/cache.h>
+#include <asm/thread_info.h>
+#include <hv/hypervisor.h>
+
+/* Text loads starting from the supervisor interrupt vector address. */
+#define TEXT_OFFSET MEM_SV_INTRPT
+
+OUTPUT_ARCH(tile)
+ENTRY(_start)
+jiffies = jiffies_64;
+
+PHDRS
+{
+ intrpt1 PT_LOAD ;
+ text PT_LOAD ;
+ data PT_LOAD ;
+}
+SECTIONS
+{
+ /* Text is loaded with a different VA than data; start with text. */
+ #undef LOAD_OFFSET
+ #define LOAD_OFFSET TEXT_OFFSET
+
+ /* Interrupt vectors */
+ .intrpt1 (LOAD_OFFSET) : AT ( 0 ) /* put at the start of physical memory */
+ {
+ _text = .;
+ _stext = .;
+ *(.intrpt1)
+ } :intrpt1 =0
+
+ /* Hypervisor call vectors */
+ #include "hvglue.lds"
+
+ /* Now the real code */
+ . = ALIGN(0x20000);
+ .text : AT (ADDR(.text) - LOAD_OFFSET) {
+ HEAD_TEXT
+ SCHED_TEXT
+ LOCK_TEXT
+ __fix_text_end = .; /* tile-cpack won't rearrange before this */
+ TEXT_TEXT
+ *(.text.*)
+ *(.coldtext*)
+ *(.fixup)
+ *(.gnu.warning)
+ } :text =0
+ _etext = .;
+
+ /* "Init" is divided into two areas with very different virtual addresses. */
+ INIT_TEXT_SECTION(PAGE_SIZE)
+
+ /* Now we skip back to PAGE_OFFSET for the data. */
+ . = (. - TEXT_OFFSET + PAGE_OFFSET);
+ #undef LOAD_OFFSET
+ #define LOAD_OFFSET PAGE_OFFSET
+
+ . = ALIGN(PAGE_SIZE);
+ VMLINUX_SYMBOL(_sinitdata) = .;
+ INIT_DATA_SECTION(16) :data =0
+ PERCPU_SECTION(L2_CACHE_BYTES)
+ . = ALIGN(PAGE_SIZE);
+ VMLINUX_SYMBOL(_einitdata) = .;
+
+ _sdata = .; /* Start of data section */
+
+ RO_DATA_SECTION(PAGE_SIZE)
+
+ /* initially writeable, then read-only */
+ . = ALIGN(PAGE_SIZE);
+ __w1data_begin = .;
+ .w1data : AT(ADDR(.w1data) - LOAD_OFFSET) {
+ VMLINUX_SYMBOL(__w1data_begin) = .;
+ *(.w1data)
+ VMLINUX_SYMBOL(__w1data_end) = .;
+ }
+
+ RW_DATA_SECTION(L2_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
+
+ _edata = .;
+
+ EXCEPTION_TABLE(L2_CACHE_BYTES)
+ NOTES
+
+
+ BSS_SECTION(8, PAGE_SIZE, 1)
+ _end = . ;
+
+ STABS_DEBUG
+ DWARF_DEBUG
+
+ DISCARDS
+}
diff --git a/arch/tile/kvm/Kconfig b/arch/tile/kvm/Kconfig
new file mode 100644
index 00000000..669fcdba
--- /dev/null
+++ b/arch/tile/kvm/Kconfig
@@ -0,0 +1,37 @@
+#
+# KVM configuration
+#
+
+source "virt/kvm/Kconfig"
+
+menuconfig VIRTUALIZATION
+ bool "Virtualization"
+ ---help---
+ Say Y here to get to see options for using your Linux host to run
+ other operating systems inside virtual machines (guests).
+ This option alone does not add any kernel code.
+
+ If you say N, all options in this submenu will be skipped and
+ disabled.
+
+if VIRTUALIZATION
+
+config KVM
+ tristate "Kernel-based Virtual Machine (KVM) support"
+ depends on HAVE_KVM && MODULES && EXPERIMENTAL
+ select PREEMPT_NOTIFIERS
+ select ANON_INODES
+ ---help---
+ Support hosting paravirtualized guest machines.
+
+ This module provides access to the hardware capabilities through
+ a character device node named /dev/kvm.
+
+ To compile this as a module, choose M here: the module
+ will be called kvm.
+
+ If unsure, say N.
+
+source drivers/vhost/Kconfig
+
+endif # VIRTUALIZATION
diff --git a/arch/tile/lib/Makefile b/arch/tile/lib/Makefile
new file mode 100644
index 00000000..985f5985
--- /dev/null
+++ b/arch/tile/lib/Makefile
@@ -0,0 +1,18 @@
+#
+# Makefile for TILE-specific library files..
+#
+
+lib-y = cacheflush.o checksum.o cpumask.o delay.o uaccess.o \
+ memmove.o memcpy_$(BITS).o memchr_$(BITS).o memset_$(BITS).o \
+ strchr_$(BITS).o strlen_$(BITS).o
+
+ifeq ($(CONFIG_TILEGX),y)
+CFLAGS_REMOVE_memcpy_user_64.o = -fno-omit-frame-pointer
+lib-y += memcpy_user_64.o
+else
+lib-y += atomic_32.o atomic_asm_32.o memcpy_tile64.o
+endif
+
+lib-$(CONFIG_SMP) += spinlock_$(BITS).o usercopy_$(BITS).o
+
+obj-$(CONFIG_MODULES) += exports.o
diff --git a/arch/tile/lib/atomic_32.c b/arch/tile/lib/atomic_32.c
new file mode 100644
index 00000000..771b251b
--- /dev/null
+++ b/arch/tile/lib/atomic_32.c
@@ -0,0 +1,329 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/cache.h>
+#include <linux/delay.h>
+#include <linux/uaccess.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/atomic.h>
+#include <asm/futex.h>
+#include <arch/chip.h>
+
+/* See <asm/atomic_32.h> */
+#if ATOMIC_LOCKS_FOUND_VIA_TABLE()
+
+/*
+ * A block of memory containing locks for atomic ops. Each instance of this
+ * struct will be homed on a different CPU.
+ */
+struct atomic_locks_on_cpu {
+ int lock[ATOMIC_HASH_L2_SIZE];
+} __attribute__((aligned(ATOMIC_HASH_L2_SIZE * 4)));
+
+static DEFINE_PER_CPU(struct atomic_locks_on_cpu, atomic_lock_pool);
+
+/* The locks we'll use until __init_atomic_per_cpu is called. */
+static struct atomic_locks_on_cpu __initdata initial_atomic_locks;
+
+/* Hash into this vector to get a pointer to lock for the given atomic. */
+struct atomic_locks_on_cpu *atomic_lock_ptr[ATOMIC_HASH_L1_SIZE]
+ __write_once = {
+ [0 ... ATOMIC_HASH_L1_SIZE-1] (&initial_atomic_locks)
+};
+
+#else /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */
+
+/* This page is remapped on startup to be hash-for-home. */
+int atomic_locks[PAGE_SIZE / sizeof(int)] __page_aligned_bss;
+
+#endif /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */
+
+static inline int *__atomic_hashed_lock(volatile void *v)
+{
+ /* NOTE: this code must match "sys_cmpxchg" in kernel/intvec_32.S */
+#if ATOMIC_LOCKS_FOUND_VIA_TABLE()
+ unsigned long i =
+ (unsigned long) v & ((PAGE_SIZE-1) & -sizeof(long long));
+ unsigned long n = __insn_crc32_32(0, i);
+
+ /* Grab high bits for L1 index. */
+ unsigned long l1_index = n >> ((sizeof(n) * 8) - ATOMIC_HASH_L1_SHIFT);
+ /* Grab low bits for L2 index. */
+ unsigned long l2_index = n & (ATOMIC_HASH_L2_SIZE - 1);
+
+ return &atomic_lock_ptr[l1_index]->lock[l2_index];
+#else
+ /*
+ * Use bits [3, 3 + ATOMIC_HASH_SHIFT) as the lock index.
+ * Using mm works here because atomic_locks is page aligned.
+ */
+ unsigned long ptr = __insn_mm((unsigned long)v >> 1,
+ (unsigned long)atomic_locks,
+ 2, (ATOMIC_HASH_SHIFT + 2) - 1);
+ return (int *)ptr;
+#endif
+}
+
+#ifdef CONFIG_SMP
+/* Return whether the passed pointer is a valid atomic lock pointer. */
+static int is_atomic_lock(int *p)
+{
+#if ATOMIC_LOCKS_FOUND_VIA_TABLE()
+ int i;
+ for (i = 0; i < ATOMIC_HASH_L1_SIZE; ++i) {
+
+ if (p >= &atomic_lock_ptr[i]->lock[0] &&
+ p < &atomic_lock_ptr[i]->lock[ATOMIC_HASH_L2_SIZE]) {
+ return 1;
+ }
+ }
+ return 0;
+#else
+ return p >= &atomic_locks[0] && p < &atomic_locks[ATOMIC_HASH_SIZE];
+#endif
+}
+
+void __atomic_fault_unlock(int *irqlock_word)
+{
+ BUG_ON(!is_atomic_lock(irqlock_word));
+ BUG_ON(*irqlock_word != 1);
+ *irqlock_word = 0;
+}
+
+#endif /* CONFIG_SMP */
+
+static inline int *__atomic_setup(volatile void *v)
+{
+ /* Issue a load to the target to bring it into cache. */
+ *(volatile int *)v;
+ return __atomic_hashed_lock(v);
+}
+
+int _atomic_xchg(atomic_t *v, int n)
+{
+ return __atomic_xchg(&v->counter, __atomic_setup(v), n).val;
+}
+EXPORT_SYMBOL(_atomic_xchg);
+
+int _atomic_xchg_add(atomic_t *v, int i)
+{
+ return __atomic_xchg_add(&v->counter, __atomic_setup(v), i).val;
+}
+EXPORT_SYMBOL(_atomic_xchg_add);
+
+int _atomic_xchg_add_unless(atomic_t *v, int a, int u)
+{
+ /*
+ * Note: argument order is switched here since it is easier
+ * to use the first argument consistently as the "old value"
+ * in the assembly, as is done for _atomic_cmpxchg().
+ */
+ return __atomic_xchg_add_unless(&v->counter, __atomic_setup(v), u, a)
+ .val;
+}
+EXPORT_SYMBOL(_atomic_xchg_add_unless);
+
+int _atomic_cmpxchg(atomic_t *v, int o, int n)
+{
+ return __atomic_cmpxchg(&v->counter, __atomic_setup(v), o, n).val;
+}
+EXPORT_SYMBOL(_atomic_cmpxchg);
+
+unsigned long _atomic_or(volatile unsigned long *p, unsigned long mask)
+{
+ return __atomic_or((int *)p, __atomic_setup(p), mask).val;
+}
+EXPORT_SYMBOL(_atomic_or);
+
+unsigned long _atomic_andn(volatile unsigned long *p, unsigned long mask)
+{
+ return __atomic_andn((int *)p, __atomic_setup(p), mask).val;
+}
+EXPORT_SYMBOL(_atomic_andn);
+
+unsigned long _atomic_xor(volatile unsigned long *p, unsigned long mask)
+{
+ return __atomic_xor((int *)p, __atomic_setup(p), mask).val;
+}
+EXPORT_SYMBOL(_atomic_xor);
+
+
+u64 _atomic64_xchg(atomic64_t *v, u64 n)
+{
+ return __atomic64_xchg(&v->counter, __atomic_setup(v), n);
+}
+EXPORT_SYMBOL(_atomic64_xchg);
+
+u64 _atomic64_xchg_add(atomic64_t *v, u64 i)
+{
+ return __atomic64_xchg_add(&v->counter, __atomic_setup(v), i);
+}
+EXPORT_SYMBOL(_atomic64_xchg_add);
+
+u64 _atomic64_xchg_add_unless(atomic64_t *v, u64 a, u64 u)
+{
+ /*
+ * Note: argument order is switched here since it is easier
+ * to use the first argument consistently as the "old value"
+ * in the assembly, as is done for _atomic_cmpxchg().
+ */
+ return __atomic64_xchg_add_unless(&v->counter, __atomic_setup(v),
+ u, a);
+}
+EXPORT_SYMBOL(_atomic64_xchg_add_unless);
+
+u64 _atomic64_cmpxchg(atomic64_t *v, u64 o, u64 n)
+{
+ return __atomic64_cmpxchg(&v->counter, __atomic_setup(v), o, n);
+}
+EXPORT_SYMBOL(_atomic64_cmpxchg);
+
+
+static inline int *__futex_setup(int __user *v)
+{
+ /*
+ * Issue a prefetch to the counter to bring it into cache.
+ * As for __atomic_setup, but we can't do a read into the L1
+ * since it might fault; instead we do a prefetch into the L2.
+ */
+ __insn_prefetch(v);
+ return __atomic_hashed_lock((int __force *)v);
+}
+
+struct __get_user futex_set(u32 __user *v, int i)
+{
+ return __atomic_xchg((int __force *)v, __futex_setup(v), i);
+}
+
+struct __get_user futex_add(u32 __user *v, int n)
+{
+ return __atomic_xchg_add((int __force *)v, __futex_setup(v), n);
+}
+
+struct __get_user futex_or(u32 __user *v, int n)
+{
+ return __atomic_or((int __force *)v, __futex_setup(v), n);
+}
+
+struct __get_user futex_andn(u32 __user *v, int n)
+{
+ return __atomic_andn((int __force *)v, __futex_setup(v), n);
+}
+
+struct __get_user futex_xor(u32 __user *v, int n)
+{
+ return __atomic_xor((int __force *)v, __futex_setup(v), n);
+}
+
+struct __get_user futex_cmpxchg(u32 __user *v, int o, int n)
+{
+ return __atomic_cmpxchg((int __force *)v, __futex_setup(v), o, n);
+}
+
+/*
+ * If any of the atomic or futex routines hit a bad address (not in
+ * the page tables at kernel PL) this routine is called. The futex
+ * routines are never used on kernel space, and the normal atomics and
+ * bitops are never used on user space. So a fault on kernel space
+ * must be fatal, but a fault on userspace is a futex fault and we
+ * need to return -EFAULT. Note that the context this routine is
+ * invoked in is the context of the "_atomic_xxx()" routines called
+ * by the functions in this file.
+ */
+struct __get_user __atomic_bad_address(int __user *addr)
+{
+ if (unlikely(!access_ok(VERIFY_WRITE, addr, sizeof(int))))
+ panic("Bad address used for kernel atomic op: %p\n", addr);
+ return (struct __get_user) { .err = -EFAULT };
+}
+
+
+#if CHIP_HAS_CBOX_HOME_MAP()
+static int __init noatomichash(char *str)
+{
+ pr_warning("noatomichash is deprecated.\n");
+ return 1;
+}
+__setup("noatomichash", noatomichash);
+#endif
+
+void __init __init_atomic_per_cpu(void)
+{
+#if ATOMIC_LOCKS_FOUND_VIA_TABLE()
+
+ unsigned int i;
+ int actual_cpu;
+
+ /*
+ * Before this is called from setup, we just have one lock for
+ * all atomic objects/operations. Here we replace the
+ * elements of atomic_lock_ptr so that they point at per_cpu
+ * integers. This seemingly over-complex approach stems from
+ * the fact that DEFINE_PER_CPU defines an entry for each cpu
+ * in the grid, not each cpu from 0..ATOMIC_HASH_SIZE-1. But
+ * for efficient hashing of atomics to their locks we want a
+ * compile time constant power of 2 for the size of this
+ * table, so we use ATOMIC_HASH_SIZE.
+ *
+ * Here we populate atomic_lock_ptr from the per cpu
+ * atomic_lock_pool, interspersing by actual cpu so that
+ * subsequent elements are homed on consecutive cpus.
+ */
+
+ actual_cpu = cpumask_first(cpu_possible_mask);
+
+ for (i = 0; i < ATOMIC_HASH_L1_SIZE; ++i) {
+ /*
+ * Preincrement to slightly bias against using cpu 0,
+ * which has plenty of stuff homed on it already.
+ */
+ actual_cpu = cpumask_next(actual_cpu, cpu_possible_mask);
+ if (actual_cpu >= nr_cpu_ids)
+ actual_cpu = cpumask_first(cpu_possible_mask);
+
+ atomic_lock_ptr[i] = &per_cpu(atomic_lock_pool, actual_cpu);
+ }
+
+#else /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */
+
+ /* Validate power-of-two and "bigger than cpus" assumption */
+ BUILD_BUG_ON(ATOMIC_HASH_SIZE & (ATOMIC_HASH_SIZE-1));
+ BUG_ON(ATOMIC_HASH_SIZE < nr_cpu_ids);
+
+ /*
+ * On TILEPro we prefer to use a single hash-for-home
+ * page, since this means atomic operations are less
+ * likely to encounter a TLB fault and thus should
+ * in general perform faster. You may wish to disable
+ * this in situations where few hash-for-home tiles
+ * are configured.
+ */
+ BUG_ON((unsigned long)atomic_locks % PAGE_SIZE != 0);
+
+ /* The locks must all fit on one page. */
+ BUILD_BUG_ON(ATOMIC_HASH_SIZE * sizeof(int) > PAGE_SIZE);
+
+ /*
+ * We use the page offset of the atomic value's address as
+ * an index into atomic_locks, excluding the low 3 bits.
+ * That should not produce more indices than ATOMIC_HASH_SIZE.
+ */
+ BUILD_BUG_ON((PAGE_SIZE >> 3) > ATOMIC_HASH_SIZE);
+
+#endif /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */
+
+ /* The futex code makes this assumption, so we validate it here. */
+ BUILD_BUG_ON(sizeof(atomic_t) != sizeof(int));
+}
diff --git a/arch/tile/lib/atomic_asm_32.S b/arch/tile/lib/atomic_asm_32.S
new file mode 100644
index 00000000..30638042
--- /dev/null
+++ b/arch/tile/lib/atomic_asm_32.S
@@ -0,0 +1,196 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ *
+ * Support routines for atomic operations. Each function takes:
+ *
+ * r0: address to manipulate
+ * r1: pointer to atomic lock guarding this operation (for ATOMIC_LOCK_REG)
+ * r2: new value to write, or for cmpxchg/add_unless, value to compare against
+ * r3: (cmpxchg/xchg_add_unless) new value to write or add;
+ * (atomic64 ops) high word of value to write
+ * r4/r5: (cmpxchg64/add_unless64) new value to write or add
+ *
+ * The 32-bit routines return a "struct __get_user" so that the futex code
+ * has an opportunity to return -EFAULT to the user if needed.
+ * The 64-bit routines just return a "long long" with the value,
+ * since they are only used from kernel space and don't expect to fault.
+ * Support for 16-bit ops is included in the framework but we don't provide
+ * any (x86_64 has an atomic_inc_short(), so we might want to some day).
+ *
+ * Note that the caller is advised to issue a suitable L1 or L2
+ * prefetch on the address being manipulated to avoid extra stalls.
+ * In addition, the hot path is on two icache lines, and we start with
+ * a jump to the second line to make sure they are both in cache so
+ * that we never stall waiting on icache fill while holding the lock.
+ * (This doesn't work out with most 64-bit ops, since they consume
+ * too many bundles, so may take an extra i-cache stall.)
+ *
+ * These routines set the INTERRUPT_CRITICAL_SECTION bit, just
+ * like sys_cmpxchg(), so that NMIs like PERF_COUNT will not interrupt
+ * the code, just page faults.
+ *
+ * If the load or store faults in a way that can be directly fixed in
+ * the do_page_fault_ics() handler (e.g. a vmalloc reference) we fix it
+ * directly, return to the instruction that faulted, and retry it.
+ *
+ * If the load or store faults in a way that potentially requires us
+ * to release the atomic lock, then retry (e.g. a migrating PTE), we
+ * reset the PC in do_page_fault_ics() to the "tns" instruction so
+ * that on return we will reacquire the lock and restart the op. We
+ * are somewhat overloading the exception_table_entry notion by doing
+ * this, since those entries are not normally used for migrating PTEs.
+ *
+ * If the main page fault handler discovers a bad address, it will see
+ * the PC pointing to the "tns" instruction (due to the earlier
+ * exception_table_entry processing in do_page_fault_ics), and
+ * re-reset the PC to the fault handler, atomic_bad_address(), which
+ * effectively takes over from the atomic op and can either return a
+ * bad "struct __get_user" (for user addresses) or can just panic (for
+ * bad kernel addresses).
+ *
+ * Note that if the value we would store is the same as what we
+ * loaded, we bypass the store. Other platforms with true atomics can
+ * make the guarantee that a non-atomic __clear_bit(), for example,
+ * can safely race with an atomic test_and_set_bit(); this example is
+ * from bit_spinlock.h in slub_lock() / slub_unlock(). We can't do
+ * that on Tile since the "atomic" op is really just a
+ * read/modify/write, and can race with the non-atomic
+ * read/modify/write. However, if we can short-circuit the write when
+ * it is not needed, in the atomic case, we avoid the race.
+ */
+
+#include <linux/linkage.h>
+#include <asm/atomic_32.h>
+#include <asm/page.h>
+#include <asm/processor.h>
+
+ .section .text.atomic,"ax"
+ENTRY(__start_atomic_asm_code)
+
+ .macro atomic_op, name, bitwidth, body
+ .align 64
+STD_ENTRY_SECTION(__atomic\name, .text.atomic)
+ {
+ movei r24, 1
+ j 4f /* branch to second cache line */
+ }
+1: {
+ .ifc \bitwidth,16
+ lh r22, r0
+ .else
+ lw r22, r0
+ addi r28, r0, 4
+ .endif
+ }
+ .ifc \bitwidth,64
+ lw r23, r28
+ .endif
+ \body /* set r24, and r25 if 64-bit */
+ {
+ seq r26, r22, r24
+ seq r27, r23, r25
+ }
+ .ifc \bitwidth,64
+ bbnst r27, 2f
+ .endif
+ bbs r26, 3f /* skip write-back if it's the same value */
+2: {
+ .ifc \bitwidth,16
+ sh r0, r24
+ .else
+ sw r0, r24
+ .endif
+ }
+ .ifc \bitwidth,64
+ sw r28, r25
+ .endif
+ mf
+3: {
+ move r0, r22
+ .ifc \bitwidth,64
+ move r1, r23
+ .else
+ move r1, zero
+ .endif
+ sw ATOMIC_LOCK_REG_NAME, zero
+ }
+ mtspr INTERRUPT_CRITICAL_SECTION, zero
+ jrp lr
+4: {
+ move ATOMIC_LOCK_REG_NAME, r1
+ mtspr INTERRUPT_CRITICAL_SECTION, r24
+ }
+#ifndef CONFIG_SMP
+ j 1b /* no atomic locks */
+#else
+ {
+ tns r21, ATOMIC_LOCK_REG_NAME
+ moveli r23, 2048 /* maximum backoff time in cycles */
+ }
+ {
+ bzt r21, 1b /* branch if lock acquired */
+ moveli r25, 32 /* starting backoff time in cycles */
+ }
+5: mtspr INTERRUPT_CRITICAL_SECTION, zero
+ mfspr r26, CYCLE_LOW /* get start point for this backoff */
+6: mfspr r22, CYCLE_LOW /* test to see if we've backed off enough */
+ sub r22, r22, r26
+ slt r22, r22, r25
+ bbst r22, 6b
+ {
+ mtspr INTERRUPT_CRITICAL_SECTION, r24
+ shli r25, r25, 1 /* double the backoff; retry the tns */
+ }
+ {
+ tns r21, ATOMIC_LOCK_REG_NAME
+ slt r26, r23, r25 /* is the proposed backoff too big? */
+ }
+ {
+ bzt r21, 1b /* branch if lock acquired */
+ mvnz r25, r26, r23
+ }
+ j 5b
+#endif
+ STD_ENDPROC(__atomic\name)
+ .ifc \bitwidth,32
+ .pushsection __ex_table,"a"
+ .word 1b, __atomic\name
+ .word 2b, __atomic\name
+ .word __atomic\name, __atomic_bad_address
+ .popsection
+ .endif
+ .endm
+
+atomic_op _cmpxchg, 32, "seq r26, r22, r2; { bbns r26, 3f; move r24, r3 }"
+atomic_op _xchg, 32, "move r24, r2"
+atomic_op _xchg_add, 32, "add r24, r22, r2"
+atomic_op _xchg_add_unless, 32, \
+ "sne r26, r22, r2; { bbns r26, 3f; add r24, r22, r3 }"
+atomic_op _or, 32, "or r24, r22, r2"
+atomic_op _andn, 32, "nor r2, r2, zero; and r24, r22, r2"
+atomic_op _xor, 32, "xor r24, r22, r2"
+
+atomic_op 64_cmpxchg, 64, "{ seq r26, r22, r2; seq r27, r23, r3 }; \
+ { bbns r26, 3f; move r24, r4 }; { bbns r27, 3f; move r25, r5 }"
+atomic_op 64_xchg, 64, "{ move r24, r2; move r25, r3 }"
+atomic_op 64_xchg_add, 64, "{ add r24, r22, r2; add r25, r23, r3 }; \
+ slt_u r26, r24, r22; add r25, r25, r26"
+atomic_op 64_xchg_add_unless, 64, \
+ "{ sne r26, r22, r2; sne r27, r23, r3 }; \
+ { bbns r26, 3f; add r24, r22, r4 }; \
+ { bbns r27, 3f; add r25, r23, r5 }; \
+ slt_u r26, r24, r22; add r25, r25, r26"
+
+ jrp lr /* happy backtracer */
+
+ENTRY(__end_atomic_asm_code)
diff --git a/arch/tile/lib/cacheflush.c b/arch/tile/lib/cacheflush.c
new file mode 100644
index 00000000..db4fb89e
--- /dev/null
+++ b/arch/tile/lib/cacheflush.c
@@ -0,0 +1,167 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#include <asm/page.h>
+#include <asm/cacheflush.h>
+#include <arch/icache.h>
+#include <arch/spr_def.h>
+
+
+void __flush_icache_range(unsigned long start, unsigned long end)
+{
+ invalidate_icache((const void *)start, end - start, PAGE_SIZE);
+}
+
+
+/* Force a load instruction to issue. */
+static inline void force_load(char *p)
+{
+ *(volatile char *)p;
+}
+
+/*
+ * Flush and invalidate a VA range that is homed remotely on a single
+ * core (if "!hfh") or homed via hash-for-home (if "hfh"), waiting
+ * until the memory controller holds the flushed values.
+ */
+void finv_buffer_remote(void *buffer, size_t size, int hfh)
+{
+ char *p, *base;
+ size_t step_size, load_count;
+
+ /*
+ * On TILEPro the striping granularity is a fixed 8KB; on
+ * TILE-Gx it is configurable, and we rely on the fact that
+ * the hypervisor always configures maximum striping, so that
+ * bits 9 and 10 of the PA are part of the stripe function, so
+ * every 512 bytes we hit a striping boundary.
+ *
+ */
+#ifdef __tilegx__
+ const unsigned long STRIPE_WIDTH = 512;
+#else
+ const unsigned long STRIPE_WIDTH = 8192;
+#endif
+
+#ifdef __tilegx__
+ /*
+ * On TILE-Gx, we must disable the dstream prefetcher before doing
+ * a cache flush; otherwise, we could end up with data in the cache
+ * that we don't want there. Note that normally we'd do an mf
+ * after the SPR write to disabling the prefetcher, but we do one
+ * below, before any further loads, so there's no need to do it
+ * here.
+ */
+ uint_reg_t old_dstream_pf = __insn_mfspr(SPR_DSTREAM_PF);
+ __insn_mtspr(SPR_DSTREAM_PF, 0);
+#endif
+
+ /*
+ * Flush and invalidate the buffer out of the local L1/L2
+ * and request the home cache to flush and invalidate as well.
+ */
+ __finv_buffer(buffer, size);
+
+ /*
+ * Wait for the home cache to acknowledge that it has processed
+ * all the flush-and-invalidate requests. This does not mean
+ * that the flushed data has reached the memory controller yet,
+ * but it does mean the home cache is processing the flushes.
+ */
+ __insn_mf();
+
+ /*
+ * Issue a load to the last cache line, which can't complete
+ * until all the previously-issued flushes to the same memory
+ * controller have also completed. If we weren't striping
+ * memory, that one load would be sufficient, but since we may
+ * be, we also need to back up to the last load issued to
+ * another memory controller, which would be the point where
+ * we crossed a "striping" boundary (the granularity of striping
+ * across memory controllers). Keep backing up and doing this
+ * until we are before the beginning of the buffer, or have
+ * hit all the controllers.
+ *
+ * If we are flushing a hash-for-home buffer, it's even worse.
+ * Each line may be homed on a different tile, and each tile
+ * may have up to four lines that are on different
+ * controllers. So as we walk backwards, we have to touch
+ * enough cache lines to satisfy these constraints. In
+ * practice this ends up being close enough to "load from
+ * every cache line on a full memory stripe on each
+ * controller" that we simply do that, to simplify the logic.
+ *
+ * On TILE-Gx the hash-for-home function is much more complex,
+ * with the upshot being we can't readily guarantee we have
+ * hit both entries in the 128-entry AMT that were hit by any
+ * load in the entire range, so we just re-load them all.
+ * With larger buffers, we may want to consider using a hypervisor
+ * trap to issue loads directly to each hash-for-home tile for
+ * each controller (doing it from Linux would trash the TLB).
+ */
+ if (hfh) {
+ step_size = L2_CACHE_BYTES;
+#ifdef __tilegx__
+ load_count = (size + L2_CACHE_BYTES - 1) / L2_CACHE_BYTES;
+#else
+ load_count = (STRIPE_WIDTH / L2_CACHE_BYTES) *
+ (1 << CHIP_LOG_NUM_MSHIMS());
+#endif
+ } else {
+ step_size = STRIPE_WIDTH;
+ load_count = (1 << CHIP_LOG_NUM_MSHIMS());
+ }
+
+ /* Load the last byte of the buffer. */
+ p = (char *)buffer + size - 1;
+ force_load(p);
+
+ /* Bump down to the end of the previous stripe or cache line. */
+ p -= step_size;
+ p = (char *)((unsigned long)p | (step_size - 1));
+
+ /* Figure out how far back we need to go. */
+ base = p - (step_size * (load_count - 2));
+ if ((unsigned long)base < (unsigned long)buffer)
+ base = buffer;
+
+ /*
+ * Fire all the loads we need. The MAF only has eight entries
+ * so we can have at most eight outstanding loads, so we
+ * unroll by that amount.
+ */
+#pragma unroll 8
+ for (; p >= base; p -= step_size)
+ force_load(p);
+
+ /*
+ * Repeat, but with inv's instead of loads, to get rid of the
+ * data we just loaded into our own cache and the old home L3.
+ * No need to unroll since inv's don't target a register.
+ */
+ p = (char *)buffer + size - 1;
+ __insn_inv(p);
+ p -= step_size;
+ p = (char *)((unsigned long)p | (step_size - 1));
+ for (; p >= base; p -= step_size)
+ __insn_inv(p);
+
+ /* Wait for the load+inv's (and thus finvs) to have completed. */
+ __insn_mf();
+
+#ifdef __tilegx__
+ /* Reenable the prefetcher. */
+ __insn_mtspr(SPR_DSTREAM_PF, old_dstream_pf);
+#endif
+}
diff --git a/arch/tile/lib/checksum.c b/arch/tile/lib/checksum.c
new file mode 100644
index 00000000..e4bab5bd
--- /dev/null
+++ b/arch/tile/lib/checksum.c
@@ -0,0 +1,102 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ * Support code for the main lib/checksum.c.
+ */
+
+#include <net/checksum.h>
+#include <linux/module.h>
+
+static inline unsigned int longto16(unsigned long x)
+{
+ unsigned long ret;
+#ifdef __tilegx__
+ ret = __insn_v2sadu(x, 0);
+ ret = __insn_v2sadu(ret, 0);
+#else
+ ret = __insn_sadh_u(x, 0);
+ ret = __insn_sadh_u(ret, 0);
+#endif
+ return ret;
+}
+
+__wsum do_csum(const unsigned char *buff, int len)
+{
+ int odd, count;
+ unsigned long result = 0;
+
+ if (len <= 0)
+ goto out;
+ odd = 1 & (unsigned long) buff;
+ if (odd) {
+ result = (*buff << 8);
+ len--;
+ buff++;
+ }
+ count = len >> 1; /* nr of 16-bit words.. */
+ if (count) {
+ if (2 & (unsigned long) buff) {
+ result += *(const unsigned short *)buff;
+ count--;
+ len -= 2;
+ buff += 2;
+ }
+ count >>= 1; /* nr of 32-bit words.. */
+ if (count) {
+#ifdef __tilegx__
+ if (4 & (unsigned long) buff) {
+ unsigned int w = *(const unsigned int *)buff;
+ result = __insn_v2sadau(result, w, 0);
+ count--;
+ len -= 4;
+ buff += 4;
+ }
+ count >>= 1; /* nr of 64-bit words.. */
+#endif
+
+ /*
+ * This algorithm could wrap around for very
+ * large buffers, but those should be impossible.
+ */
+ BUG_ON(count >= 65530);
+
+ while (count) {
+ unsigned long w = *(const unsigned long *)buff;
+ count--;
+ buff += sizeof(w);
+#ifdef __tilegx__
+ result = __insn_v2sadau(result, w, 0);
+#else
+ result = __insn_sadah_u(result, w, 0);
+#endif
+ }
+#ifdef __tilegx__
+ if (len & 4) {
+ unsigned int w = *(const unsigned int *)buff;
+ result = __insn_v2sadau(result, w, 0);
+ buff += 4;
+ }
+#endif
+ }
+ if (len & 2) {
+ result += *(const unsigned short *) buff;
+ buff += 2;
+ }
+ }
+ if (len & 1)
+ result += *buff;
+ result = longto16(result);
+ if (odd)
+ result = swab16(result);
+out:
+ return result;
+}
diff --git a/arch/tile/lib/cpumask.c b/arch/tile/lib/cpumask.c
new file mode 100644
index 00000000..fdc40361
--- /dev/null
+++ b/arch/tile/lib/cpumask.c
@@ -0,0 +1,52 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/cpumask.h>
+#include <linux/ctype.h>
+#include <linux/errno.h>
+#include <linux/smp.h>
+
+/*
+ * Allow cropping out bits beyond the end of the array.
+ * Move to "lib" directory if more clients want to use this routine.
+ */
+int bitmap_parselist_crop(const char *bp, unsigned long *maskp, int nmaskbits)
+{
+ unsigned a, b;
+
+ bitmap_zero(maskp, nmaskbits);
+ do {
+ if (!isdigit(*bp))
+ return -EINVAL;
+ a = simple_strtoul(bp, (char **)&bp, 10);
+ b = a;
+ if (*bp == '-') {
+ bp++;
+ if (!isdigit(*bp))
+ return -EINVAL;
+ b = simple_strtoul(bp, (char **)&bp, 10);
+ }
+ if (!(a <= b))
+ return -EINVAL;
+ if (b >= nmaskbits)
+ b = nmaskbits-1;
+ while (a <= b) {
+ set_bit(a, maskp);
+ a++;
+ }
+ if (*bp == ',')
+ bp++;
+ } while (*bp != '\0' && *bp != '\n');
+ return 0;
+}
diff --git a/arch/tile/lib/delay.c b/arch/tile/lib/delay.c
new file mode 100644
index 00000000..cdacdd11
--- /dev/null
+++ b/arch/tile/lib/delay.c
@@ -0,0 +1,45 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/thread_info.h>
+#include <asm/timex.h>
+
+void __udelay(unsigned long usecs)
+{
+ if (usecs > ULONG_MAX / 1000) {
+ WARN_ON_ONCE(usecs > ULONG_MAX / 1000);
+ usecs = ULONG_MAX / 1000;
+ }
+ __ndelay(usecs * 1000);
+}
+EXPORT_SYMBOL(__udelay);
+
+void __ndelay(unsigned long nsecs)
+{
+ cycles_t target = get_cycles();
+ target += ns2cycles(nsecs);
+ while (get_cycles() < target)
+ cpu_relax();
+}
+EXPORT_SYMBOL(__ndelay);
+
+void __delay(unsigned long cycles)
+{
+ cycles_t target = get_cycles() + cycles;
+ while (get_cycles() < target)
+ cpu_relax();
+}
+EXPORT_SYMBOL(__delay);
diff --git a/arch/tile/lib/exports.c b/arch/tile/lib/exports.c
new file mode 100644
index 00000000..2a81d32d
--- /dev/null
+++ b/arch/tile/lib/exports.c
@@ -0,0 +1,93 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ *
+ * Exports from assembler code and from libtile-cc.
+ */
+
+#include <linux/module.h>
+
+/* arch/tile/lib/usercopy.S */
+#include <linux/uaccess.h>
+EXPORT_SYMBOL(__get_user_1);
+EXPORT_SYMBOL(__get_user_2);
+EXPORT_SYMBOL(__get_user_4);
+EXPORT_SYMBOL(__get_user_8);
+EXPORT_SYMBOL(__put_user_1);
+EXPORT_SYMBOL(__put_user_2);
+EXPORT_SYMBOL(__put_user_4);
+EXPORT_SYMBOL(__put_user_8);
+EXPORT_SYMBOL(strnlen_user_asm);
+EXPORT_SYMBOL(strncpy_from_user_asm);
+EXPORT_SYMBOL(clear_user_asm);
+EXPORT_SYMBOL(flush_user_asm);
+EXPORT_SYMBOL(inv_user_asm);
+EXPORT_SYMBOL(finv_user_asm);
+
+/* arch/tile/kernel/entry.S */
+#include <linux/kernel.h>
+#include <asm/processor.h>
+EXPORT_SYMBOL(current_text_addr);
+EXPORT_SYMBOL(dump_stack);
+
+/* arch/tile/kernel/head.S */
+EXPORT_SYMBOL(empty_zero_page);
+
+/* arch/tile/lib/, various memcpy files */
+EXPORT_SYMBOL(memcpy);
+EXPORT_SYMBOL(__copy_to_user_inatomic);
+EXPORT_SYMBOL(__copy_from_user_inatomic);
+EXPORT_SYMBOL(__copy_from_user_zeroing);
+#ifdef __tilegx__
+EXPORT_SYMBOL(__copy_in_user_inatomic);
+#endif
+
+/* hypervisor glue */
+#include <hv/hypervisor.h>
+EXPORT_SYMBOL(hv_dev_open);
+EXPORT_SYMBOL(hv_dev_pread);
+EXPORT_SYMBOL(hv_dev_pwrite);
+EXPORT_SYMBOL(hv_dev_preada);
+EXPORT_SYMBOL(hv_dev_pwritea);
+EXPORT_SYMBOL(hv_dev_poll);
+EXPORT_SYMBOL(hv_dev_poll_cancel);
+EXPORT_SYMBOL(hv_dev_close);
+EXPORT_SYMBOL(hv_sysconf);
+EXPORT_SYMBOL(hv_confstr);
+
+/* libgcc.a */
+uint32_t __udivsi3(uint32_t dividend, uint32_t divisor);
+EXPORT_SYMBOL(__udivsi3);
+int32_t __divsi3(int32_t dividend, int32_t divisor);
+EXPORT_SYMBOL(__divsi3);
+uint64_t __udivdi3(uint64_t dividend, uint64_t divisor);
+EXPORT_SYMBOL(__udivdi3);
+int64_t __divdi3(int64_t dividend, int64_t divisor);
+EXPORT_SYMBOL(__divdi3);
+uint32_t __umodsi3(uint32_t dividend, uint32_t divisor);
+EXPORT_SYMBOL(__umodsi3);
+int32_t __modsi3(int32_t dividend, int32_t divisor);
+EXPORT_SYMBOL(__modsi3);
+uint64_t __umoddi3(uint64_t dividend, uint64_t divisor);
+EXPORT_SYMBOL(__umoddi3);
+int64_t __moddi3(int64_t dividend, int64_t divisor);
+EXPORT_SYMBOL(__moddi3);
+#ifndef __tilegx__
+int64_t __muldi3(int64_t, int64_t);
+EXPORT_SYMBOL(__muldi3);
+uint64_t __lshrdi3(uint64_t, unsigned int);
+EXPORT_SYMBOL(__lshrdi3);
+uint64_t __ashrdi3(uint64_t, unsigned int);
+EXPORT_SYMBOL(__ashrdi3);
+uint64_t __ashldi3(uint64_t, unsigned int);
+EXPORT_SYMBOL(__ashldi3);
+#endif
diff --git a/arch/tile/lib/memchr_32.c b/arch/tile/lib/memchr_32.c
new file mode 100644
index 00000000..cc3d9bad
--- /dev/null
+++ b/arch/tile/lib/memchr_32.c
@@ -0,0 +1,71 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/module.h>
+
+void *memchr(const void *s, int c, size_t n)
+{
+ const uint32_t *last_word_ptr;
+ const uint32_t *p;
+ const char *last_byte_ptr;
+ uintptr_t s_int;
+ uint32_t goal, before_mask, v, bits;
+ char *ret;
+
+ if (__builtin_expect(n == 0, 0)) {
+ /* Don't dereference any memory if the array is empty. */
+ return NULL;
+ }
+
+ /* Get an aligned pointer. */
+ s_int = (uintptr_t) s;
+ p = (const uint32_t *)(s_int & -4);
+
+ /* Create four copies of the byte for which we are looking. */
+ goal = 0x01010101 * (uint8_t) c;
+
+ /* Read the first word, but munge it so that bytes before the array
+ * will not match goal.
+ *
+ * Note that this shift count expression works because we know
+ * shift counts are taken mod 32.
+ */
+ before_mask = (1 << (s_int << 3)) - 1;
+ v = (*p | before_mask) ^ (goal & before_mask);
+
+ /* Compute the address of the last byte. */
+ last_byte_ptr = (const char *)s + n - 1;
+
+ /* Compute the address of the word containing the last byte. */
+ last_word_ptr = (const uint32_t *)((uintptr_t) last_byte_ptr & -4);
+
+ while ((bits = __insn_seqb(v, goal)) == 0) {
+ if (__builtin_expect(p == last_word_ptr, 0)) {
+ /* We already read the last word in the array,
+ * so give up.
+ */
+ return NULL;
+ }
+ v = *++p;
+ }
+
+ /* We found a match, but it might be in a byte past the end
+ * of the array.
+ */
+ ret = ((char *)p) + (__insn_ctz(bits) >> 3);
+ return (ret <= last_byte_ptr) ? ret : NULL;
+}
+EXPORT_SYMBOL(memchr);
diff --git a/arch/tile/lib/memchr_64.c b/arch/tile/lib/memchr_64.c
new file mode 100644
index 00000000..84fdc8d8
--- /dev/null
+++ b/arch/tile/lib/memchr_64.c
@@ -0,0 +1,71 @@
+/*
+ * Copyright 2011 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/module.h>
+
+void *memchr(const void *s, int c, size_t n)
+{
+ const uint64_t *last_word_ptr;
+ const uint64_t *p;
+ const char *last_byte_ptr;
+ uintptr_t s_int;
+ uint64_t goal, before_mask, v, bits;
+ char *ret;
+
+ if (__builtin_expect(n == 0, 0)) {
+ /* Don't dereference any memory if the array is empty. */
+ return NULL;
+ }
+
+ /* Get an aligned pointer. */
+ s_int = (uintptr_t) s;
+ p = (const uint64_t *)(s_int & -8);
+
+ /* Create eight copies of the byte for which we are looking. */
+ goal = 0x0101010101010101ULL * (uint8_t) c;
+
+ /* Read the first word, but munge it so that bytes before the array
+ * will not match goal.
+ *
+ * Note that this shift count expression works because we know
+ * shift counts are taken mod 64.
+ */
+ before_mask = (1ULL << (s_int << 3)) - 1;
+ v = (*p | before_mask) ^ (goal & before_mask);
+
+ /* Compute the address of the last byte. */
+ last_byte_ptr = (const char *)s + n - 1;
+
+ /* Compute the address of the word containing the last byte. */
+ last_word_ptr = (const uint64_t *)((uintptr_t) last_byte_ptr & -8);
+
+ while ((bits = __insn_v1cmpeq(v, goal)) == 0) {
+ if (__builtin_expect(p == last_word_ptr, 0)) {
+ /* We already read the last word in the array,
+ * so give up.
+ */
+ return NULL;
+ }
+ v = *++p;
+ }
+
+ /* We found a match, but it might be in a byte past the end
+ * of the array.
+ */
+ ret = ((char *)p) + (__insn_ctz(bits) >> 3);
+ return (ret <= last_byte_ptr) ? ret : NULL;
+}
+EXPORT_SYMBOL(memchr);
diff --git a/arch/tile/lib/memcpy_32.S b/arch/tile/lib/memcpy_32.S
new file mode 100644
index 00000000..2a419a61
--- /dev/null
+++ b/arch/tile/lib/memcpy_32.S
@@ -0,0 +1,618 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#include <arch/chip.h>
+
+
+/*
+ * This file shares the implementation of the userspace memcpy and
+ * the kernel's memcpy, copy_to_user and copy_from_user.
+ */
+
+#include <linux/linkage.h>
+
+/* On TILE64, we wrap these functions via arch/tile/lib/memcpy_tile64.c */
+#if !CHIP_HAS_COHERENT_LOCAL_CACHE()
+#define memcpy __memcpy_asm
+#define __copy_to_user_inatomic __copy_to_user_inatomic_asm
+#define __copy_from_user_inatomic __copy_from_user_inatomic_asm
+#define __copy_from_user_zeroing __copy_from_user_zeroing_asm
+#endif
+
+#define IS_MEMCPY 0
+#define IS_COPY_FROM_USER 1
+#define IS_COPY_FROM_USER_ZEROING 2
+#define IS_COPY_TO_USER -1
+
+ .section .text.memcpy_common, "ax"
+ .align 64
+
+/* Use this to preface each bundle that can cause an exception so
+ * the kernel can clean up properly. The special cleanup code should
+ * not use these, since it knows what it is doing.
+ */
+#define EX \
+ .pushsection __ex_table, "a"; \
+ .word 9f, memcpy_common_fixup; \
+ .popsection; \
+ 9
+
+
+/* __copy_from_user_inatomic takes the kernel target address in r0,
+ * the user source in r1, and the bytes to copy in r2.
+ * It returns the number of uncopiable bytes (hopefully zero) in r0.
+ */
+ENTRY(__copy_from_user_inatomic)
+.type __copy_from_user_inatomic, @function
+ FEEDBACK_ENTER_EXPLICIT(__copy_from_user_inatomic, \
+ .text.memcpy_common, \
+ .Lend_memcpy_common - __copy_from_user_inatomic)
+ { movei r29, IS_COPY_FROM_USER; j memcpy_common }
+ .size __copy_from_user_inatomic, . - __copy_from_user_inatomic
+
+/* __copy_from_user_zeroing is like __copy_from_user_inatomic, but
+ * any uncopiable bytes are zeroed in the target.
+ */
+ENTRY(__copy_from_user_zeroing)
+.type __copy_from_user_zeroing, @function
+ FEEDBACK_REENTER(__copy_from_user_inatomic)
+ { movei r29, IS_COPY_FROM_USER_ZEROING; j memcpy_common }
+ .size __copy_from_user_zeroing, . - __copy_from_user_zeroing
+
+/* __copy_to_user_inatomic takes the user target address in r0,
+ * the kernel source in r1, and the bytes to copy in r2.
+ * It returns the number of uncopiable bytes (hopefully zero) in r0.
+ */
+ENTRY(__copy_to_user_inatomic)
+.type __copy_to_user_inatomic, @function
+ FEEDBACK_REENTER(__copy_from_user_inatomic)
+ { movei r29, IS_COPY_TO_USER; j memcpy_common }
+ .size __copy_to_user_inatomic, . - __copy_to_user_inatomic
+
+ENTRY(memcpy)
+.type memcpy, @function
+ FEEDBACK_REENTER(__copy_from_user_inatomic)
+ { movei r29, IS_MEMCPY }
+ .size memcpy, . - memcpy
+ /* Fall through */
+
+ .type memcpy_common, @function
+memcpy_common:
+ /* On entry, r29 holds one of the IS_* macro values from above. */
+
+
+ /* r0 is the dest, r1 is the source, r2 is the size. */
+
+ /* Save aside original dest so we can return it at the end. */
+ { sw sp, lr; move r23, r0; or r4, r0, r1 }
+
+ /* Check for an empty size. */
+ { bz r2, .Ldone; andi r4, r4, 3 }
+
+ /* Save aside original values in case of a fault. */
+ { move r24, r1; move r25, r2 }
+ move r27, lr
+
+ /* Check for an unaligned source or dest. */
+ { bnz r4, .Lcopy_unaligned_maybe_many; addli r4, r2, -256 }
+
+.Lcheck_aligned_copy_size:
+ /* If we are copying < 256 bytes, branch to simple case. */
+ { blzt r4, .Lcopy_8_check; slti_u r8, r2, 8 }
+
+ /* Copying >= 256 bytes, so jump to complex prefetching loop. */
+ { andi r6, r1, 63; j .Lcopy_many }
+
+/*
+ *
+ * Aligned 4 byte at a time copy loop
+ *
+ */
+
+.Lcopy_8_loop:
+ /* Copy two words at a time to hide load latency. */
+EX: { lw r3, r1; addi r1, r1, 4; slti_u r8, r2, 16 }
+EX: { lw r4, r1; addi r1, r1, 4 }
+EX: { sw r0, r3; addi r0, r0, 4; addi r2, r2, -4 }
+EX: { sw r0, r4; addi r0, r0, 4; addi r2, r2, -4 }
+.Lcopy_8_check:
+ { bzt r8, .Lcopy_8_loop; slti_u r4, r2, 4 }
+
+ /* Copy odd leftover word, if any. */
+ { bnzt r4, .Lcheck_odd_stragglers }
+EX: { lw r3, r1; addi r1, r1, 4 }
+EX: { sw r0, r3; addi r0, r0, 4; addi r2, r2, -4 }
+
+.Lcheck_odd_stragglers:
+ { bnz r2, .Lcopy_unaligned_few }
+
+.Ldone:
+ /* For memcpy return original dest address, else zero. */
+ { mz r0, r29, r23; jrp lr }
+
+
+/*
+ *
+ * Prefetching multiple cache line copy handler (for large transfers).
+ *
+ */
+
+ /* Copy words until r1 is cache-line-aligned. */
+.Lalign_loop:
+EX: { lw r3, r1; addi r1, r1, 4 }
+ { andi r6, r1, 63 }
+EX: { sw r0, r3; addi r0, r0, 4; addi r2, r2, -4 }
+.Lcopy_many:
+ { bnzt r6, .Lalign_loop; addi r9, r0, 63 }
+
+ { addi r3, r1, 60; andi r9, r9, -64 }
+
+#if CHIP_HAS_WH64()
+ /* No need to prefetch dst, we'll just do the wh64
+ * right before we copy a line.
+ */
+#endif
+
+EX: { lw r5, r3; addi r3, r3, 64; movei r4, 1 }
+ /* Intentionally stall for a few cycles to leave L2 cache alone. */
+ { bnzt zero, .; move r27, lr }
+EX: { lw r6, r3; addi r3, r3, 64 }
+ /* Intentionally stall for a few cycles to leave L2 cache alone. */
+ { bnzt zero, . }
+EX: { lw r7, r3; addi r3, r3, 64 }
+#if !CHIP_HAS_WH64()
+ /* Prefetch the dest */
+ /* Intentionally stall for a few cycles to leave L2 cache alone. */
+ { bnzt zero, . }
+ /* Use a real load to cause a TLB miss if necessary. We aren't using
+ * r28, so this should be fine.
+ */
+EX: { lw r28, r9; addi r9, r9, 64 }
+ /* Intentionally stall for a few cycles to leave L2 cache alone. */
+ { bnzt zero, . }
+ { prefetch r9; addi r9, r9, 64 }
+ /* Intentionally stall for a few cycles to leave L2 cache alone. */
+ { bnzt zero, . }
+ { prefetch r9; addi r9, r9, 64 }
+#endif
+ /* Intentionally stall for a few cycles to leave L2 cache alone. */
+ { bz zero, .Lbig_loop2 }
+
+ /* On entry to this loop:
+ * - r0 points to the start of dst line 0
+ * - r1 points to start of src line 0
+ * - r2 >= (256 - 60), only the first time the loop trips.
+ * - r3 contains r1 + 128 + 60 [pointer to end of source line 2]
+ * This is our prefetch address. When we get near the end
+ * rather than prefetching off the end this is changed to point
+ * to some "safe" recently loaded address.
+ * - r5 contains *(r1 + 60) [i.e. last word of source line 0]
+ * - r6 contains *(r1 + 64 + 60) [i.e. last word of source line 1]
+ * - r9 contains ((r0 + 63) & -64)
+ * [start of next dst cache line.]
+ */
+
+.Lbig_loop:
+ { jal .Lcopy_line2; add r15, r1, r2 }
+
+.Lbig_loop2:
+ /* Copy line 0, first stalling until r5 is ready. */
+EX: { move r12, r5; lw r16, r1 }
+ { bz r4, .Lcopy_8_check; slti_u r8, r2, 8 }
+ /* Prefetch several lines ahead. */
+EX: { lw r5, r3; addi r3, r3, 64 }
+ { jal .Lcopy_line }
+
+ /* Copy line 1, first stalling until r6 is ready. */
+EX: { move r12, r6; lw r16, r1 }
+ { bz r4, .Lcopy_8_check; slti_u r8, r2, 8 }
+ /* Prefetch several lines ahead. */
+EX: { lw r6, r3; addi r3, r3, 64 }
+ { jal .Lcopy_line }
+
+ /* Copy line 2, first stalling until r7 is ready. */
+EX: { move r12, r7; lw r16, r1 }
+ { bz r4, .Lcopy_8_check; slti_u r8, r2, 8 }
+ /* Prefetch several lines ahead. */
+EX: { lw r7, r3; addi r3, r3, 64 }
+ /* Use up a caches-busy cycle by jumping back to the top of the
+ * loop. Might as well get it out of the way now.
+ */
+ { j .Lbig_loop }
+
+
+ /* On entry:
+ * - r0 points to the destination line.
+ * - r1 points to the source line.
+ * - r3 is the next prefetch address.
+ * - r9 holds the last address used for wh64.
+ * - r12 = WORD_15
+ * - r16 = WORD_0.
+ * - r17 == r1 + 16.
+ * - r27 holds saved lr to restore.
+ *
+ * On exit:
+ * - r0 is incremented by 64.
+ * - r1 is incremented by 64, unless that would point to a word
+ * beyond the end of the source array, in which case it is redirected
+ * to point to an arbitrary word already in the cache.
+ * - r2 is decremented by 64.
+ * - r3 is unchanged, unless it points to a word beyond the
+ * end of the source array, in which case it is redirected
+ * to point to an arbitrary word already in the cache.
+ * Redirecting is OK since if we are that close to the end
+ * of the array we will not come back to this subroutine
+ * and use the contents of the prefetched address.
+ * - r4 is nonzero iff r2 >= 64.
+ * - r9 is incremented by 64, unless it points beyond the
+ * end of the last full destination cache line, in which
+ * case it is redirected to a "safe address" that can be
+ * clobbered (sp - 64)
+ * - lr contains the value in r27.
+ */
+
+/* r26 unused */
+
+.Lcopy_line:
+ /* TODO: when r3 goes past the end, we would like to redirect it
+ * to prefetch the last partial cache line (if any) just once, for the
+ * benefit of the final cleanup loop. But we don't want to
+ * prefetch that line more than once, or subsequent prefetches
+ * will go into the RTF. But then .Lbig_loop should unconditionally
+ * branch to top of loop to execute final prefetch, and its
+ * nop should become a conditional branch.
+ */
+
+ /* We need two non-memory cycles here to cover the resources
+ * used by the loads initiated by the caller.
+ */
+ { add r15, r1, r2 }
+.Lcopy_line2:
+ { slt_u r13, r3, r15; addi r17, r1, 16 }
+
+ /* NOTE: this will stall for one cycle as L1 is busy. */
+
+ /* Fill second L1D line. */
+EX: { lw r17, r17; addi r1, r1, 48; mvz r3, r13, r1 } /* r17 = WORD_4 */
+
+#if CHIP_HAS_WH64()
+ /* Prepare destination line for writing. */
+EX: { wh64 r9; addi r9, r9, 64 }
+#else
+ /* Prefetch dest line */
+ { prefetch r9; addi r9, r9, 64 }
+#endif
+ /* Load seven words that are L1D hits to cover wh64 L2 usage. */
+
+ /* Load the three remaining words from the last L1D line, which
+ * we know has already filled the L1D.
+ */
+EX: { lw r4, r1; addi r1, r1, 4; addi r20, r1, 16 } /* r4 = WORD_12 */
+EX: { lw r8, r1; addi r1, r1, 4; slt_u r13, r20, r15 }/* r8 = WORD_13 */
+EX: { lw r11, r1; addi r1, r1, -52; mvz r20, r13, r1 } /* r11 = WORD_14 */
+
+ /* Load the three remaining words from the first L1D line, first
+ * stalling until it has filled by "looking at" r16.
+ */
+EX: { lw r13, r1; addi r1, r1, 4; move zero, r16 } /* r13 = WORD_1 */
+EX: { lw r14, r1; addi r1, r1, 4 } /* r14 = WORD_2 */
+EX: { lw r15, r1; addi r1, r1, 8; addi r10, r0, 60 } /* r15 = WORD_3 */
+
+ /* Load second word from the second L1D line, first
+ * stalling until it has filled by "looking at" r17.
+ */
+EX: { lw r19, r1; addi r1, r1, 4; move zero, r17 } /* r19 = WORD_5 */
+
+ /* Store last word to the destination line, potentially dirtying it
+ * for the first time, which keeps the L2 busy for two cycles.
+ */
+EX: { sw r10, r12 } /* store(WORD_15) */
+
+ /* Use two L1D hits to cover the sw L2 access above. */
+EX: { lw r10, r1; addi r1, r1, 4 } /* r10 = WORD_6 */
+EX: { lw r12, r1; addi r1, r1, 4 } /* r12 = WORD_7 */
+
+ /* Fill third L1D line. */
+EX: { lw r18, r1; addi r1, r1, 4 } /* r18 = WORD_8 */
+
+ /* Store first L1D line. */
+EX: { sw r0, r16; addi r0, r0, 4; add r16, r0, r2 } /* store(WORD_0) */
+EX: { sw r0, r13; addi r0, r0, 4; andi r16, r16, -64 } /* store(WORD_1) */
+EX: { sw r0, r14; addi r0, r0, 4; slt_u r16, r9, r16 } /* store(WORD_2) */
+#if CHIP_HAS_WH64()
+EX: { sw r0, r15; addi r0, r0, 4; addi r13, sp, -64 } /* store(WORD_3) */
+#else
+ /* Back up the r9 to a cache line we are already storing to
+ * if it gets past the end of the dest vector. Strictly speaking,
+ * we don't need to back up to the start of a cache line, but it's free
+ * and tidy, so why not?
+ */
+EX: { sw r0, r15; addi r0, r0, 4; andi r13, r0, -64 } /* store(WORD_3) */
+#endif
+ /* Store second L1D line. */
+EX: { sw r0, r17; addi r0, r0, 4; mvz r9, r16, r13 }/* store(WORD_4) */
+EX: { sw r0, r19; addi r0, r0, 4 } /* store(WORD_5) */
+EX: { sw r0, r10; addi r0, r0, 4 } /* store(WORD_6) */
+EX: { sw r0, r12; addi r0, r0, 4 } /* store(WORD_7) */
+
+EX: { lw r13, r1; addi r1, r1, 4; move zero, r18 } /* r13 = WORD_9 */
+EX: { lw r14, r1; addi r1, r1, 4 } /* r14 = WORD_10 */
+EX: { lw r15, r1; move r1, r20 } /* r15 = WORD_11 */
+
+ /* Store third L1D line. */
+EX: { sw r0, r18; addi r0, r0, 4 } /* store(WORD_8) */
+EX: { sw r0, r13; addi r0, r0, 4 } /* store(WORD_9) */
+EX: { sw r0, r14; addi r0, r0, 4 } /* store(WORD_10) */
+EX: { sw r0, r15; addi r0, r0, 4 } /* store(WORD_11) */
+
+ /* Store rest of fourth L1D line. */
+EX: { sw r0, r4; addi r0, r0, 4 } /* store(WORD_12) */
+ {
+EX: sw r0, r8 /* store(WORD_13) */
+ addi r0, r0, 4
+ /* Will r2 be > 64 after we subtract 64 below? */
+ shri r4, r2, 7
+ }
+ {
+EX: sw r0, r11 /* store(WORD_14) */
+ addi r0, r0, 8
+ /* Record 64 bytes successfully copied. */
+ addi r2, r2, -64
+ }
+
+ { jrp lr; move lr, r27 }
+
+ /* Convey to the backtrace library that the stack frame is size
+ * zero, and the real return address is on the stack rather than
+ * in 'lr'.
+ */
+ { info 8 }
+
+ .align 64
+.Lcopy_unaligned_maybe_many:
+ /* Skip the setup overhead if we aren't copying many bytes. */
+ { slti_u r8, r2, 20; sub r4, zero, r0 }
+ { bnzt r8, .Lcopy_unaligned_few; andi r4, r4, 3 }
+ { bz r4, .Ldest_is_word_aligned; add r18, r1, r2 }
+
+/*
+ *
+ * unaligned 4 byte at a time copy handler.
+ *
+ */
+
+ /* Copy single bytes until r0 == 0 mod 4, so we can store words. */
+.Lalign_dest_loop:
+EX: { lb_u r3, r1; addi r1, r1, 1; addi r4, r4, -1 }
+EX: { sb r0, r3; addi r0, r0, 1; addi r2, r2, -1 }
+ { bnzt r4, .Lalign_dest_loop; andi r3, r1, 3 }
+
+ /* If source and dest are now *both* aligned, do an aligned copy. */
+ { bz r3, .Lcheck_aligned_copy_size; addli r4, r2, -256 }
+
+.Ldest_is_word_aligned:
+
+#if CHIP_HAS_DWORD_ALIGN()
+EX: { andi r8, r0, 63; lwadd_na r6, r1, 4}
+ { slti_u r9, r2, 64; bz r8, .Ldest_is_L2_line_aligned }
+
+ /* This copies unaligned words until either there are fewer
+ * than 4 bytes left to copy, or until the destination pointer
+ * is cache-aligned, whichever comes first.
+ *
+ * On entry:
+ * - r0 is the next store address.
+ * - r1 points 4 bytes past the load address corresponding to r0.
+ * - r2 >= 4
+ * - r6 is the next aligned word loaded.
+ */
+.Lcopy_unaligned_src_words:
+EX: { lwadd_na r7, r1, 4; slti_u r8, r2, 4 + 4 }
+ /* stall */
+ { dword_align r6, r7, r1; slti_u r9, r2, 64 + 4 }
+EX: { swadd r0, r6, 4; addi r2, r2, -4 }
+ { bnz r8, .Lcleanup_unaligned_words; andi r8, r0, 63 }
+ { bnzt r8, .Lcopy_unaligned_src_words; move r6, r7 }
+
+ /* On entry:
+ * - r0 is the next store address.
+ * - r1 points 4 bytes past the load address corresponding to r0.
+ * - r2 >= 4 (# of bytes left to store).
+ * - r6 is the next aligned src word value.
+ * - r9 = (r2 < 64U).
+ * - r18 points one byte past the end of source memory.
+ */
+.Ldest_is_L2_line_aligned:
+
+ {
+ /* Not a full cache line remains. */
+ bnz r9, .Lcleanup_unaligned_words
+ move r7, r6
+ }
+
+ /* r2 >= 64 */
+
+ /* Kick off two prefetches, but don't go past the end. */
+ { addi r3, r1, 63 - 4; addi r8, r1, 64 + 63 - 4 }
+ { prefetch r3; move r3, r8; slt_u r8, r8, r18 }
+ { mvz r3, r8, r1; addi r8, r3, 64 }
+ { prefetch r3; move r3, r8; slt_u r8, r8, r18 }
+ { mvz r3, r8, r1; movei r17, 0 }
+
+.Lcopy_unaligned_line:
+ /* Prefetch another line. */
+ { prefetch r3; addi r15, r1, 60; addi r3, r3, 64 }
+ /* Fire off a load of the last word we are about to copy. */
+EX: { lw_na r15, r15; slt_u r8, r3, r18 }
+
+EX: { mvz r3, r8, r1; wh64 r0 }
+
+ /* This loop runs twice.
+ *
+ * On entry:
+ * - r17 is even before the first iteration, and odd before
+ * the second. It is incremented inside the loop. Encountering
+ * an even value at the end of the loop makes it stop.
+ */
+.Lcopy_half_an_unaligned_line:
+EX: {
+ /* Stall until the last byte is ready. In the steady state this
+ * guarantees all words to load below will be in the L2 cache, which
+ * avoids shunting the loads to the RTF.
+ */
+ move zero, r15
+ lwadd_na r7, r1, 16
+ }
+EX: { lwadd_na r11, r1, 12 }
+EX: { lwadd_na r14, r1, -24 }
+EX: { lwadd_na r8, r1, 4 }
+EX: { lwadd_na r9, r1, 4 }
+EX: {
+ lwadd_na r10, r1, 8
+ /* r16 = (r2 < 64), after we subtract 32 from r2 below. */
+ slti_u r16, r2, 64 + 32
+ }
+EX: { lwadd_na r12, r1, 4; addi r17, r17, 1 }
+EX: { lwadd_na r13, r1, 8; dword_align r6, r7, r1 }
+EX: { swadd r0, r6, 4; dword_align r7, r8, r1 }
+EX: { swadd r0, r7, 4; dword_align r8, r9, r1 }
+EX: { swadd r0, r8, 4; dword_align r9, r10, r1 }
+EX: { swadd r0, r9, 4; dword_align r10, r11, r1 }
+EX: { swadd r0, r10, 4; dword_align r11, r12, r1 }
+EX: { swadd r0, r11, 4; dword_align r12, r13, r1 }
+EX: { swadd r0, r12, 4; dword_align r13, r14, r1 }
+EX: { swadd r0, r13, 4; addi r2, r2, -32 }
+ { move r6, r14; bbst r17, .Lcopy_half_an_unaligned_line }
+
+ { bzt r16, .Lcopy_unaligned_line; move r7, r6 }
+
+ /* On entry:
+ * - r0 is the next store address.
+ * - r1 points 4 bytes past the load address corresponding to r0.
+ * - r2 >= 0 (# of bytes left to store).
+ * - r7 is the next aligned src word value.
+ */
+.Lcleanup_unaligned_words:
+ /* Handle any trailing bytes. */
+ { bz r2, .Lcopy_unaligned_done; slti_u r8, r2, 4 }
+ { bzt r8, .Lcopy_unaligned_src_words; move r6, r7 }
+
+ /* Move r1 back to the point where it corresponds to r0. */
+ { addi r1, r1, -4 }
+
+#else /* !CHIP_HAS_DWORD_ALIGN() */
+
+ /* Compute right/left shift counts and load initial source words. */
+ { andi r5, r1, -4; andi r3, r1, 3 }
+EX: { lw r6, r5; addi r5, r5, 4; shli r3, r3, 3 }
+EX: { lw r7, r5; addi r5, r5, 4; sub r4, zero, r3 }
+
+ /* Load and store one word at a time, using shifts and ORs
+ * to correct for the misaligned src.
+ */
+.Lcopy_unaligned_src_loop:
+ { shr r6, r6, r3; shl r8, r7, r4 }
+EX: { lw r7, r5; or r8, r8, r6; move r6, r7 }
+EX: { sw r0, r8; addi r0, r0, 4; addi r2, r2, -4 }
+ { addi r5, r5, 4; slti_u r8, r2, 8 }
+ { bzt r8, .Lcopy_unaligned_src_loop; addi r1, r1, 4 }
+
+ { bz r2, .Lcopy_unaligned_done }
+#endif /* !CHIP_HAS_DWORD_ALIGN() */
+
+ /* Fall through */
+
+/*
+ *
+ * 1 byte at a time copy handler.
+ *
+ */
+
+.Lcopy_unaligned_few:
+EX: { lb_u r3, r1; addi r1, r1, 1 }
+EX: { sb r0, r3; addi r0, r0, 1; addi r2, r2, -1 }
+ { bnzt r2, .Lcopy_unaligned_few }
+
+.Lcopy_unaligned_done:
+
+ /* For memcpy return original dest address, else zero. */
+ { mz r0, r29, r23; jrp lr }
+
+.Lend_memcpy_common:
+ .size memcpy_common, .Lend_memcpy_common - memcpy_common
+
+ .section .fixup,"ax"
+memcpy_common_fixup:
+ .type memcpy_common_fixup, @function
+
+ /* Skip any bytes we already successfully copied.
+ * r2 (num remaining) is correct, but r0 (dst) and r1 (src)
+ * may not be quite right because of unrolling and prefetching.
+ * So we need to recompute their values as the address just
+ * after the last byte we are sure was successfully loaded and
+ * then stored.
+ */
+
+ /* Determine how many bytes we successfully copied. */
+ { sub r3, r25, r2 }
+
+ /* Add this to the original r0 and r1 to get their new values. */
+ { add r0, r23, r3; add r1, r24, r3 }
+
+ { bzt r29, memcpy_fixup_loop }
+ { blzt r29, copy_to_user_fixup_loop }
+
+copy_from_user_fixup_loop:
+ /* Try copying the rest one byte at a time, expecting a load fault. */
+.Lcfu: { lb_u r3, r1; addi r1, r1, 1 }
+ { sb r0, r3; addi r0, r0, 1; addi r2, r2, -1 }
+ { bnzt r2, copy_from_user_fixup_loop }
+
+.Lcopy_from_user_fixup_zero_remainder:
+ { bbs r29, 2f } /* low bit set means IS_COPY_FROM_USER */
+ /* byte-at-a-time loop faulted, so zero the rest. */
+ { move r3, r2; bz r2, 2f /* should be impossible, but handle it. */ }
+1: { sb r0, zero; addi r0, r0, 1; addi r3, r3, -1 }
+ { bnzt r3, 1b }
+2: move lr, r27
+ { move r0, r2; jrp lr }
+
+copy_to_user_fixup_loop:
+ /* Try copying the rest one byte at a time, expecting a store fault. */
+ { lb_u r3, r1; addi r1, r1, 1 }
+.Lctu: { sb r0, r3; addi r0, r0, 1; addi r2, r2, -1 }
+ { bnzt r2, copy_to_user_fixup_loop }
+.Lcopy_to_user_fixup_done:
+ move lr, r27
+ { move r0, r2; jrp lr }
+
+memcpy_fixup_loop:
+ /* Try copying the rest one byte at a time. We expect a disastrous
+ * fault to happen since we are in fixup code, but let it happen.
+ */
+ { lb_u r3, r1; addi r1, r1, 1 }
+ { sb r0, r3; addi r0, r0, 1; addi r2, r2, -1 }
+ { bnzt r2, memcpy_fixup_loop }
+ /* This should be unreachable, we should have faulted again.
+ * But be paranoid and handle it in case some interrupt changed
+ * the TLB or something.
+ */
+ move lr, r27
+ { move r0, r23; jrp lr }
+
+ .size memcpy_common_fixup, . - memcpy_common_fixup
+
+ .section __ex_table,"a"
+ .word .Lcfu, .Lcopy_from_user_fixup_zero_remainder
+ .word .Lctu, .Lcopy_to_user_fixup_done
diff --git a/arch/tile/lib/memcpy_64.c b/arch/tile/lib/memcpy_64.c
new file mode 100644
index 00000000..3fab9a6a
--- /dev/null
+++ b/arch/tile/lib/memcpy_64.c
@@ -0,0 +1,220 @@
+/*
+ * Copyright 2011 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/module.h>
+#define __memcpy memcpy
+/* EXPORT_SYMBOL() is in arch/tile/lib/exports.c since this should be asm. */
+
+/* Must be 8 bytes in size. */
+#define word_t uint64_t
+
+#if CHIP_L2_LINE_SIZE() != 64 && CHIP_L2_LINE_SIZE() != 128
+#error "Assumes 64 or 128 byte line size"
+#endif
+
+/* How many cache lines ahead should we prefetch? */
+#define PREFETCH_LINES_AHEAD 3
+
+/*
+ * Provide "base versions" of load and store for the normal code path.
+ * The kernel provides other versions for userspace copies.
+ */
+#define ST(p, v) (*(p) = (v))
+#define LD(p) (*(p))
+
+#ifndef USERCOPY_FUNC
+#define ST1 ST
+#define ST2 ST
+#define ST4 ST
+#define ST8 ST
+#define LD1 LD
+#define LD2 LD
+#define LD4 LD
+#define LD8 LD
+#define RETVAL dstv
+void *memcpy(void *__restrict dstv, const void *__restrict srcv, size_t n)
+#else
+/*
+ * Special kernel version will provide implementation of the LDn/STn
+ * macros to return a count of uncopied bytes due to mm fault.
+ */
+#define RETVAL 0
+int USERCOPY_FUNC(void *__restrict dstv, const void *__restrict srcv, size_t n)
+#endif
+{
+ char *__restrict dst1 = (char *)dstv;
+ const char *__restrict src1 = (const char *)srcv;
+ const char *__restrict src1_end;
+ const char *__restrict prefetch;
+ word_t *__restrict dst8; /* 8-byte pointer to destination memory. */
+ word_t final; /* Final bytes to write to trailing word, if any */
+ long i;
+
+ if (n < 16) {
+ for (; n; n--)
+ ST1(dst1++, LD1(src1++));
+ return RETVAL;
+ }
+
+ /*
+ * Locate the end of source memory we will copy. Don't
+ * prefetch past this.
+ */
+ src1_end = src1 + n - 1;
+
+ /* Prefetch ahead a few cache lines, but not past the end. */
+ prefetch = src1;
+ for (i = 0; i < PREFETCH_LINES_AHEAD; i++) {
+ __insn_prefetch(prefetch);
+ prefetch += CHIP_L2_LINE_SIZE();
+ prefetch = (prefetch > src1_end) ? prefetch : src1;
+ }
+
+ /* Copy bytes until dst is word-aligned. */
+ for (; (uintptr_t)dst1 & (sizeof(word_t) - 1); n--)
+ ST1(dst1++, LD1(src1++));
+
+ /* 8-byte pointer to destination memory. */
+ dst8 = (word_t *)dst1;
+
+ if (__builtin_expect((uintptr_t)src1 & (sizeof(word_t) - 1), 0)) {
+ /*
+ * Misaligned copy. Copy 8 bytes at a time, but don't
+ * bother with other fanciness.
+ *
+ * TODO: Consider prefetching and using wh64 as well.
+ */
+
+ /* Create an aligned src8. */
+ const word_t *__restrict src8 =
+ (const word_t *)((uintptr_t)src1 & -sizeof(word_t));
+ word_t b;
+
+ word_t a = LD8(src8++);
+ for (; n >= sizeof(word_t); n -= sizeof(word_t)) {
+ b = LD8(src8++);
+ a = __insn_dblalign(a, b, src1);
+ ST8(dst8++, a);
+ a = b;
+ }
+
+ if (n == 0)
+ return RETVAL;
+
+ b = ((const char *)src8 <= src1_end) ? *src8 : 0;
+
+ /*
+ * Final source bytes to write to trailing partial
+ * word, if any.
+ */
+ final = __insn_dblalign(a, b, src1);
+ } else {
+ /* Aligned copy. */
+
+ const word_t* __restrict src8 = (const word_t *)src1;
+
+ /* src8 and dst8 are both word-aligned. */
+ if (n >= CHIP_L2_LINE_SIZE()) {
+ /* Copy until 'dst' is cache-line-aligned. */
+ for (; (uintptr_t)dst8 & (CHIP_L2_LINE_SIZE() - 1);
+ n -= sizeof(word_t))
+ ST8(dst8++, LD8(src8++));
+
+ for (; n >= CHIP_L2_LINE_SIZE(); ) {
+ __insn_wh64(dst8);
+
+ /*
+ * Prefetch and advance to next line
+ * to prefetch, but don't go past the end
+ */
+ __insn_prefetch(prefetch);
+ prefetch += CHIP_L2_LINE_SIZE();
+ prefetch = (prefetch > src1_end) ? prefetch :
+ (const char *)src8;
+
+ /*
+ * Copy an entire cache line. Manually
+ * unrolled to avoid idiosyncracies of
+ * compiler unrolling.
+ */
+#define COPY_WORD(offset) ({ ST8(dst8+offset, LD8(src8+offset)); n -= 8; })
+ COPY_WORD(0);
+ COPY_WORD(1);
+ COPY_WORD(2);
+ COPY_WORD(3);
+ COPY_WORD(4);
+ COPY_WORD(5);
+ COPY_WORD(6);
+ COPY_WORD(7);
+#if CHIP_L2_LINE_SIZE() == 128
+ COPY_WORD(8);
+ COPY_WORD(9);
+ COPY_WORD(10);
+ COPY_WORD(11);
+ COPY_WORD(12);
+ COPY_WORD(13);
+ COPY_WORD(14);
+ COPY_WORD(15);
+#elif CHIP_L2_LINE_SIZE() != 64
+# error Fix code that assumes particular L2 cache line sizes
+#endif
+
+ dst8 += CHIP_L2_LINE_SIZE() / sizeof(word_t);
+ src8 += CHIP_L2_LINE_SIZE() / sizeof(word_t);
+ }
+ }
+
+ for (; n >= sizeof(word_t); n -= sizeof(word_t))
+ ST8(dst8++, LD8(src8++));
+
+ if (__builtin_expect(n == 0, 1))
+ return RETVAL;
+
+ final = LD8(src8);
+ }
+
+ /* n != 0 if we get here. Write out any trailing bytes. */
+ dst1 = (char *)dst8;
+ if (n & 4) {
+ ST4((uint32_t *)dst1, final);
+ dst1 += 4;
+ final >>= 32;
+ n &= 3;
+ }
+ if (n & 2) {
+ ST2((uint16_t *)dst1, final);
+ dst1 += 2;
+ final >>= 16;
+ n &= 1;
+ }
+ if (n)
+ ST1((uint8_t *)dst1, final);
+
+ return RETVAL;
+}
+
+
+#ifdef USERCOPY_FUNC
+#undef ST1
+#undef ST2
+#undef ST4
+#undef ST8
+#undef LD1
+#undef LD2
+#undef LD4
+#undef LD8
+#undef USERCOPY_FUNC
+#endif
diff --git a/arch/tile/lib/memcpy_tile64.c b/arch/tile/lib/memcpy_tile64.c
new file mode 100644
index 00000000..b2fe15e0
--- /dev/null
+++ b/arch/tile/lib/memcpy_tile64.c
@@ -0,0 +1,276 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/string.h>
+#include <linux/smp.h>
+#include <linux/module.h>
+#include <linux/uaccess.h>
+#include <asm/fixmap.h>
+#include <asm/kmap_types.h>
+#include <asm/tlbflush.h>
+#include <hv/hypervisor.h>
+#include <arch/chip.h>
+
+
+#if !CHIP_HAS_COHERENT_LOCAL_CACHE()
+
+/* Defined in memcpy.S */
+extern unsigned long __memcpy_asm(void *to, const void *from, unsigned long n);
+extern unsigned long __copy_to_user_inatomic_asm(
+ void __user *to, const void *from, unsigned long n);
+extern unsigned long __copy_from_user_inatomic_asm(
+ void *to, const void __user *from, unsigned long n);
+extern unsigned long __copy_from_user_zeroing_asm(
+ void *to, const void __user *from, unsigned long n);
+
+typedef unsigned long (*memcpy_t)(void *, const void *, unsigned long);
+
+/* Size above which to consider TLB games for performance */
+#define LARGE_COPY_CUTOFF 2048
+
+/* Communicate to the simulator what we are trying to do. */
+#define sim_allow_multiple_caching(b) \
+ __insn_mtspr(SPR_SIM_CONTROL, \
+ SIM_CONTROL_ALLOW_MULTIPLE_CACHING | ((b) << _SIM_CONTROL_OPERATOR_BITS))
+
+/*
+ * Copy memory by briefly enabling incoherent cacheline-at-a-time mode.
+ *
+ * We set up our own source and destination PTEs that we fully control.
+ * This is the only way to guarantee that we don't race with another
+ * thread that is modifying the PTE; we can't afford to try the
+ * copy_{to,from}_user() technique of catching the interrupt, since
+ * we must run with interrupts disabled to avoid the risk of some
+ * other code seeing the incoherent data in our cache. (Recall that
+ * our cache is indexed by PA, so even if the other code doesn't use
+ * our kmap_atomic virtual addresses, they'll still hit in cache using
+ * the normal VAs that aren't supposed to hit in cache.)
+ */
+static void memcpy_multicache(void *dest, const void *source,
+ pte_t dst_pte, pte_t src_pte, int len)
+{
+ int idx;
+ unsigned long flags, newsrc, newdst;
+ pmd_t *pmdp;
+ pte_t *ptep;
+ int type0, type1;
+ int cpu = get_cpu();
+
+ /*
+ * Disable interrupts so that we don't recurse into memcpy()
+ * in an interrupt handler, nor accidentally reference
+ * the PA of the source from an interrupt routine. Also
+ * notify the simulator that we're playing games so we don't
+ * generate spurious coherency warnings.
+ */
+ local_irq_save(flags);
+ sim_allow_multiple_caching(1);
+
+ /* Set up the new dest mapping */
+ type0 = kmap_atomic_idx_push();
+ idx = FIX_KMAP_BEGIN + (KM_TYPE_NR * cpu) + type0;
+ newdst = __fix_to_virt(idx) + ((unsigned long)dest & (PAGE_SIZE-1));
+ pmdp = pmd_offset(pud_offset(pgd_offset_k(newdst), newdst), newdst);
+ ptep = pte_offset_kernel(pmdp, newdst);
+ if (pte_val(*ptep) != pte_val(dst_pte)) {
+ set_pte(ptep, dst_pte);
+ local_flush_tlb_page(NULL, newdst, PAGE_SIZE);
+ }
+
+ /* Set up the new source mapping */
+ type1 = kmap_atomic_idx_push();
+ idx += (type0 - type1);
+ src_pte = hv_pte_set_nc(src_pte);
+ src_pte = hv_pte_clear_writable(src_pte); /* be paranoid */
+ newsrc = __fix_to_virt(idx) + ((unsigned long)source & (PAGE_SIZE-1));
+ pmdp = pmd_offset(pud_offset(pgd_offset_k(newsrc), newsrc), newsrc);
+ ptep = pte_offset_kernel(pmdp, newsrc);
+ __set_pte(ptep, src_pte); /* set_pte() would be confused by this */
+ local_flush_tlb_page(NULL, newsrc, PAGE_SIZE);
+
+ /* Actually move the data. */
+ __memcpy_asm((void *)newdst, (const void *)newsrc, len);
+
+ /*
+ * Remap the source as locally-cached and not OLOC'ed so that
+ * we can inval without also invaling the remote cpu's cache.
+ * This also avoids known errata with inv'ing cacheable oloc data.
+ */
+ src_pte = hv_pte_set_mode(src_pte, HV_PTE_MODE_CACHE_NO_L3);
+ src_pte = hv_pte_set_writable(src_pte); /* need write access for inv */
+ __set_pte(ptep, src_pte); /* set_pte() would be confused by this */
+ local_flush_tlb_page(NULL, newsrc, PAGE_SIZE);
+
+ /*
+ * Do the actual invalidation, covering the full L2 cache line
+ * at the end since __memcpy_asm() is somewhat aggressive.
+ */
+ __inv_buffer((void *)newsrc, len);
+
+ /*
+ * We're done: notify the simulator that all is back to normal,
+ * and re-enable interrupts and pre-emption.
+ */
+ kmap_atomic_idx_pop();
+ kmap_atomic_idx_pop();
+ sim_allow_multiple_caching(0);
+ local_irq_restore(flags);
+ put_cpu();
+}
+
+/*
+ * Identify large copies from remotely-cached memory, and copy them
+ * via memcpy_multicache() if they look good, otherwise fall back
+ * to the particular kind of copying passed as the memcpy_t function.
+ */
+static unsigned long fast_copy(void *dest, const void *source, int len,
+ memcpy_t func)
+{
+ /*
+ * Check if it's big enough to bother with. We may end up doing a
+ * small copy via TLB manipulation if we're near a page boundary,
+ * but presumably we'll make it up when we hit the second page.
+ */
+ while (len >= LARGE_COPY_CUTOFF) {
+ int copy_size, bytes_left_on_page;
+ pte_t *src_ptep, *dst_ptep;
+ pte_t src_pte, dst_pte;
+ struct page *src_page, *dst_page;
+
+ /* Is the source page oloc'ed to a remote cpu? */
+retry_source:
+ src_ptep = virt_to_pte(current->mm, (unsigned long)source);
+ if (src_ptep == NULL)
+ break;
+ src_pte = *src_ptep;
+ if (!hv_pte_get_present(src_pte) ||
+ !hv_pte_get_readable(src_pte) ||
+ hv_pte_get_mode(src_pte) != HV_PTE_MODE_CACHE_TILE_L3)
+ break;
+ if (get_remote_cache_cpu(src_pte) == smp_processor_id())
+ break;
+ src_page = pfn_to_page(hv_pte_get_pfn(src_pte));
+ get_page(src_page);
+ if (pte_val(src_pte) != pte_val(*src_ptep)) {
+ put_page(src_page);
+ goto retry_source;
+ }
+ if (pte_huge(src_pte)) {
+ /* Adjust the PTE to correspond to a small page */
+ int pfn = hv_pte_get_pfn(src_pte);
+ pfn += (((unsigned long)source & (HPAGE_SIZE-1))
+ >> PAGE_SHIFT);
+ src_pte = pfn_pte(pfn, src_pte);
+ src_pte = pte_mksmall(src_pte);
+ }
+
+ /* Is the destination page writable? */
+retry_dest:
+ dst_ptep = virt_to_pte(current->mm, (unsigned long)dest);
+ if (dst_ptep == NULL) {
+ put_page(src_page);
+ break;
+ }
+ dst_pte = *dst_ptep;
+ if (!hv_pte_get_present(dst_pte) ||
+ !hv_pte_get_writable(dst_pte)) {
+ put_page(src_page);
+ break;
+ }
+ dst_page = pfn_to_page(hv_pte_get_pfn(dst_pte));
+ if (dst_page == src_page) {
+ /*
+ * Source and dest are on the same page; this
+ * potentially exposes us to incoherence if any
+ * part of src and dest overlap on a cache line.
+ * Just give up rather than trying to be precise.
+ */
+ put_page(src_page);
+ break;
+ }
+ get_page(dst_page);
+ if (pte_val(dst_pte) != pte_val(*dst_ptep)) {
+ put_page(dst_page);
+ goto retry_dest;
+ }
+ if (pte_huge(dst_pte)) {
+ /* Adjust the PTE to correspond to a small page */
+ int pfn = hv_pte_get_pfn(dst_pte);
+ pfn += (((unsigned long)dest & (HPAGE_SIZE-1))
+ >> PAGE_SHIFT);
+ dst_pte = pfn_pte(pfn, dst_pte);
+ dst_pte = pte_mksmall(dst_pte);
+ }
+
+ /* All looks good: create a cachable PTE and copy from it */
+ copy_size = len;
+ bytes_left_on_page =
+ PAGE_SIZE - (((int)source) & (PAGE_SIZE-1));
+ if (copy_size > bytes_left_on_page)
+ copy_size = bytes_left_on_page;
+ bytes_left_on_page =
+ PAGE_SIZE - (((int)dest) & (PAGE_SIZE-1));
+ if (copy_size > bytes_left_on_page)
+ copy_size = bytes_left_on_page;
+ memcpy_multicache(dest, source, dst_pte, src_pte, copy_size);
+
+ /* Release the pages */
+ put_page(dst_page);
+ put_page(src_page);
+
+ /* Continue on the next page */
+ dest += copy_size;
+ source += copy_size;
+ len -= copy_size;
+ }
+
+ return func(dest, source, len);
+}
+
+void *memcpy(void *to, const void *from, __kernel_size_t n)
+{
+ if (n < LARGE_COPY_CUTOFF)
+ return (void *)__memcpy_asm(to, from, n);
+ else
+ return (void *)fast_copy(to, from, n, __memcpy_asm);
+}
+
+unsigned long __copy_to_user_inatomic(void __user *to, const void *from,
+ unsigned long n)
+{
+ if (n < LARGE_COPY_CUTOFF)
+ return __copy_to_user_inatomic_asm(to, from, n);
+ else
+ return fast_copy(to, from, n, __copy_to_user_inatomic_asm);
+}
+
+unsigned long __copy_from_user_inatomic(void *to, const void __user *from,
+ unsigned long n)
+{
+ if (n < LARGE_COPY_CUTOFF)
+ return __copy_from_user_inatomic_asm(to, from, n);
+ else
+ return fast_copy(to, from, n, __copy_from_user_inatomic_asm);
+}
+
+unsigned long __copy_from_user_zeroing(void *to, const void __user *from,
+ unsigned long n)
+{
+ if (n < LARGE_COPY_CUTOFF)
+ return __copy_from_user_zeroing_asm(to, from, n);
+ else
+ return fast_copy(to, from, n, __copy_from_user_zeroing_asm);
+}
+
+#endif /* !CHIP_HAS_COHERENT_LOCAL_CACHE() */
diff --git a/arch/tile/lib/memcpy_user_64.c b/arch/tile/lib/memcpy_user_64.c
new file mode 100644
index 00000000..37440caa
--- /dev/null
+++ b/arch/tile/lib/memcpy_user_64.c
@@ -0,0 +1,92 @@
+/*
+ * Copyright 2011 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ *
+ * Do memcpy(), but trap and return "n" when a load or store faults.
+ *
+ * Note: this idiom only works when memcpy() compiles to a leaf function.
+ * Here leaf function not only means it does not have calls, but also
+ * requires no stack operations (sp, stack frame pointer) and no
+ * use of callee-saved registers, else "jrp lr" will be incorrect since
+ * unwinding stack frame is bypassed. Since memcpy() is not complex so
+ * these conditions are satisfied here, but we need to be careful when
+ * modifying this file. This is not a clean solution but is the best
+ * one so far.
+ *
+ * Also note that we are capturing "n" from the containing scope here.
+ */
+
+#define _ST(p, inst, v) \
+ ({ \
+ asm("1: " #inst " %0, %1;" \
+ ".pushsection .coldtext.memcpy,\"ax\";" \
+ "2: { move r0, %2; jrp lr };" \
+ ".section __ex_table,\"a\";" \
+ ".quad 1b, 2b;" \
+ ".popsection" \
+ : "=m" (*(p)) : "r" (v), "r" (n)); \
+ })
+
+#define _LD(p, inst) \
+ ({ \
+ unsigned long __v; \
+ asm("1: " #inst " %0, %1;" \
+ ".pushsection .coldtext.memcpy,\"ax\";" \
+ "2: { move r0, %2; jrp lr };" \
+ ".section __ex_table,\"a\";" \
+ ".quad 1b, 2b;" \
+ ".popsection" \
+ : "=r" (__v) : "m" (*(p)), "r" (n)); \
+ __v; \
+ })
+
+#define USERCOPY_FUNC __copy_to_user_inatomic
+#define ST1(p, v) _ST((p), st1, (v))
+#define ST2(p, v) _ST((p), st2, (v))
+#define ST4(p, v) _ST((p), st4, (v))
+#define ST8(p, v) _ST((p), st, (v))
+#define LD1 LD
+#define LD2 LD
+#define LD4 LD
+#define LD8 LD
+#include "memcpy_64.c"
+
+#define USERCOPY_FUNC __copy_from_user_inatomic
+#define ST1 ST
+#define ST2 ST
+#define ST4 ST
+#define ST8 ST
+#define LD1(p) _LD((p), ld1u)
+#define LD2(p) _LD((p), ld2u)
+#define LD4(p) _LD((p), ld4u)
+#define LD8(p) _LD((p), ld)
+#include "memcpy_64.c"
+
+#define USERCOPY_FUNC __copy_in_user_inatomic
+#define ST1(p, v) _ST((p), st1, (v))
+#define ST2(p, v) _ST((p), st2, (v))
+#define ST4(p, v) _ST((p), st4, (v))
+#define ST8(p, v) _ST((p), st, (v))
+#define LD1(p) _LD((p), ld1u)
+#define LD2(p) _LD((p), ld2u)
+#define LD4(p) _LD((p), ld4u)
+#define LD8(p) _LD((p), ld)
+#include "memcpy_64.c"
+
+unsigned long __copy_from_user_zeroing(void *to, const void __user *from,
+ unsigned long n)
+{
+ unsigned long rc = __copy_from_user_inatomic(to, from, n);
+ if (unlikely(rc))
+ memset(to + n - rc, 0, rc);
+ return rc;
+}
diff --git a/arch/tile/lib/memmove.c b/arch/tile/lib/memmove.c
new file mode 100644
index 00000000..fd615ae6
--- /dev/null
+++ b/arch/tile/lib/memmove.c
@@ -0,0 +1,63 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/module.h>
+
+void *memmove(void *dest, const void *src, size_t n)
+{
+ if ((const char *)src >= (char *)dest + n
+ || (char *)dest >= (const char *)src + n) {
+ /* We found no overlap, so let memcpy do all the heavy
+ * lifting (prefetching, etc.)
+ */
+ return memcpy(dest, src, n);
+ }
+
+ if (n != 0) {
+ const uint8_t *in;
+ uint8_t x;
+ uint8_t *out;
+ int stride;
+
+ if (src < dest) {
+ /* copy backwards */
+ in = (const uint8_t *)src + n - 1;
+ out = (uint8_t *)dest + n - 1;
+ stride = -1;
+ } else {
+ /* copy forwards */
+ in = (const uint8_t *)src;
+ out = (uint8_t *)dest;
+ stride = 1;
+ }
+
+ /* Manually software-pipeline this loop. */
+ x = *in;
+ in += stride;
+
+ while (--n != 0) {
+ *out = x;
+ out += stride;
+ x = *in;
+ in += stride;
+ }
+
+ *out = x;
+ }
+
+ return dest;
+}
+EXPORT_SYMBOL(memmove);
diff --git a/arch/tile/lib/memset_32.c b/arch/tile/lib/memset_32.c
new file mode 100644
index 00000000..57dbb3a5
--- /dev/null
+++ b/arch/tile/lib/memset_32.c
@@ -0,0 +1,251 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#include <arch/chip.h>
+
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/module.h>
+
+#undef memset
+
+void *memset(void *s, int c, size_t n)
+{
+ uint32_t *out32;
+ int n32;
+ uint32_t v16, v32;
+ uint8_t *out8 = s;
+#if !CHIP_HAS_WH64()
+ int ahead32;
+#else
+ int to_align32;
+#endif
+
+ /* Experimentation shows that a trivial tight loop is a win up until
+ * around a size of 20, where writing a word at a time starts to win.
+ */
+#define BYTE_CUTOFF 20
+
+#if BYTE_CUTOFF < 3
+ /* This must be at least at least this big, or some code later
+ * on doesn't work.
+ */
+#error "BYTE_CUTOFF is too small"
+#endif
+
+ if (n < BYTE_CUTOFF) {
+ /* Strangely, this turns out to be the tightest way to
+ * write this loop.
+ */
+ if (n != 0) {
+ do {
+ /* Strangely, combining these into one line
+ * performs worse.
+ */
+ *out8 = c;
+ out8++;
+ } while (--n != 0);
+ }
+
+ return s;
+ }
+
+#if !CHIP_HAS_WH64()
+ /* Use a spare issue slot to start prefetching the first cache
+ * line early. This instruction is free as the store can be buried
+ * in otherwise idle issue slots doing ALU ops.
+ */
+ __insn_prefetch(out8);
+
+ /* We prefetch the end so that a short memset that spans two cache
+ * lines gets some prefetching benefit. Again we believe this is free
+ * to issue.
+ */
+ __insn_prefetch(&out8[n - 1]);
+#endif /* !CHIP_HAS_WH64() */
+
+
+ /* Align 'out8'. We know n >= 3 so this won't write past the end. */
+ while (((uintptr_t) out8 & 3) != 0) {
+ *out8++ = c;
+ --n;
+ }
+
+ /* Align 'n'. */
+ while (n & 3)
+ out8[--n] = c;
+
+ out32 = (uint32_t *) out8;
+ n32 = n >> 2;
+
+ /* Tile input byte out to 32 bits. */
+ v16 = __insn_intlb(c, c);
+ v32 = __insn_intlh(v16, v16);
+
+ /* This must be at least 8 or the following loop doesn't work. */
+#define CACHE_LINE_SIZE_IN_WORDS (CHIP_L2_LINE_SIZE() / 4)
+
+#if !CHIP_HAS_WH64()
+
+ ahead32 = CACHE_LINE_SIZE_IN_WORDS;
+
+ /* We already prefetched the first and last cache lines, so
+ * we only need to do more prefetching if we are storing
+ * to more than two cache lines.
+ */
+ if (n32 > CACHE_LINE_SIZE_IN_WORDS * 2) {
+ int i;
+
+ /* Prefetch the next several cache lines.
+ * This is the setup code for the software-pipelined
+ * loop below.
+ */
+#define MAX_PREFETCH 5
+ ahead32 = n32 & -CACHE_LINE_SIZE_IN_WORDS;
+ if (ahead32 > MAX_PREFETCH * CACHE_LINE_SIZE_IN_WORDS)
+ ahead32 = MAX_PREFETCH * CACHE_LINE_SIZE_IN_WORDS;
+
+ for (i = CACHE_LINE_SIZE_IN_WORDS;
+ i < ahead32; i += CACHE_LINE_SIZE_IN_WORDS)
+ __insn_prefetch(&out32[i]);
+ }
+
+ if (n32 > ahead32) {
+ while (1) {
+ int j;
+
+ /* Prefetch by reading one word several cache lines
+ * ahead. Since loads are non-blocking this will
+ * cause the full cache line to be read while we are
+ * finishing earlier cache lines. Using a store
+ * here causes microarchitectural performance
+ * problems where a victimizing store miss goes to
+ * the head of the retry FIFO and locks the pipe for
+ * a few cycles. So a few subsequent stores in this
+ * loop go into the retry FIFO, and then later
+ * stores see other stores to the same cache line
+ * are already in the retry FIFO and themselves go
+ * into the retry FIFO, filling it up and grinding
+ * to a halt waiting for the original miss to be
+ * satisfied.
+ */
+ __insn_prefetch(&out32[ahead32]);
+
+#if CACHE_LINE_SIZE_IN_WORDS % 4 != 0
+#error "Unhandled CACHE_LINE_SIZE_IN_WORDS"
+#endif
+
+ n32 -= CACHE_LINE_SIZE_IN_WORDS;
+
+ /* Save icache space by only partially unrolling
+ * this loop.
+ */
+ for (j = CACHE_LINE_SIZE_IN_WORDS / 4; j > 0; j--) {
+ *out32++ = v32;
+ *out32++ = v32;
+ *out32++ = v32;
+ *out32++ = v32;
+ }
+
+ /* To save compiled code size, reuse this loop even
+ * when we run out of prefetching to do by dropping
+ * ahead32 down.
+ */
+ if (n32 <= ahead32) {
+ /* Not even a full cache line left,
+ * so stop now.
+ */
+ if (n32 < CACHE_LINE_SIZE_IN_WORDS)
+ break;
+
+ /* Choose a small enough value that we don't
+ * prefetch past the end. There's no sense
+ * in touching cache lines we don't have to.
+ */
+ ahead32 = CACHE_LINE_SIZE_IN_WORDS - 1;
+ }
+ }
+ }
+
+#else /* CHIP_HAS_WH64() */
+
+ /* Determine how many words we need to emit before the 'out32'
+ * pointer becomes aligned modulo the cache line size.
+ */
+ to_align32 =
+ (-((uintptr_t)out32 >> 2)) & (CACHE_LINE_SIZE_IN_WORDS - 1);
+
+ /* Only bother aligning and using wh64 if there is at least
+ * one full cache line to process. This check also prevents
+ * overrunning the end of the buffer with alignment words.
+ */
+ if (to_align32 <= n32 - CACHE_LINE_SIZE_IN_WORDS) {
+ int lines_left;
+
+ /* Align out32 mod the cache line size so we can use wh64. */
+ n32 -= to_align32;
+ for (; to_align32 != 0; to_align32--) {
+ *out32 = v32;
+ out32++;
+ }
+
+ /* Use unsigned divide to turn this into a right shift. */
+ lines_left = (unsigned)n32 / CACHE_LINE_SIZE_IN_WORDS;
+
+ do {
+ /* Only wh64 a few lines at a time, so we don't
+ * exceed the maximum number of victim lines.
+ */
+ int x = ((lines_left < CHIP_MAX_OUTSTANDING_VICTIMS())
+ ? lines_left
+ : CHIP_MAX_OUTSTANDING_VICTIMS());
+ uint32_t *wh = out32;
+ int i = x;
+ int j;
+
+ lines_left -= x;
+
+ do {
+ __insn_wh64(wh);
+ wh += CACHE_LINE_SIZE_IN_WORDS;
+ } while (--i);
+
+ for (j = x * (CACHE_LINE_SIZE_IN_WORDS / 4);
+ j != 0; j--) {
+ *out32++ = v32;
+ *out32++ = v32;
+ *out32++ = v32;
+ *out32++ = v32;
+ }
+ } while (lines_left != 0);
+
+ /* We processed all full lines above, so only this many
+ * words remain to be processed.
+ */
+ n32 &= CACHE_LINE_SIZE_IN_WORDS - 1;
+ }
+
+#endif /* CHIP_HAS_WH64() */
+
+ /* Now handle any leftover values. */
+ if (n32 != 0) {
+ do {
+ *out32 = v32;
+ out32++;
+ } while (--n32 != 0);
+ }
+
+ return s;
+}
+EXPORT_SYMBOL(memset);
diff --git a/arch/tile/lib/memset_64.c b/arch/tile/lib/memset_64.c
new file mode 100644
index 00000000..38730857
--- /dev/null
+++ b/arch/tile/lib/memset_64.c
@@ -0,0 +1,145 @@
+/*
+ * Copyright 2011 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#include <arch/chip.h>
+
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/module.h>
+
+#undef memset
+
+void *memset(void *s, int c, size_t n)
+{
+ uint64_t *out64;
+ int n64, to_align64;
+ uint64_t v64;
+ uint8_t *out8 = s;
+
+ /* Experimentation shows that a trivial tight loop is a win up until
+ * around a size of 20, where writing a word at a time starts to win.
+ */
+#define BYTE_CUTOFF 20
+
+#if BYTE_CUTOFF < 7
+ /* This must be at least at least this big, or some code later
+ * on doesn't work.
+ */
+#error "BYTE_CUTOFF is too small"
+#endif
+
+ if (n < BYTE_CUTOFF) {
+ /* Strangely, this turns out to be the tightest way to
+ * write this loop.
+ */
+ if (n != 0) {
+ do {
+ /* Strangely, combining these into one line
+ * performs worse.
+ */
+ *out8 = c;
+ out8++;
+ } while (--n != 0);
+ }
+
+ return s;
+ }
+
+ /* Align 'out8'. We know n >= 7 so this won't write past the end. */
+ while (((uintptr_t) out8 & 7) != 0) {
+ *out8++ = c;
+ --n;
+ }
+
+ /* Align 'n'. */
+ while (n & 7)
+ out8[--n] = c;
+
+ out64 = (uint64_t *) out8;
+ n64 = n >> 3;
+
+ /* Tile input byte out to 64 bits. */
+ /* KLUDGE */
+ v64 = 0x0101010101010101ULL * (uint8_t)c;
+
+ /* This must be at least 8 or the following loop doesn't work. */
+#define CACHE_LINE_SIZE_IN_DOUBLEWORDS (CHIP_L2_LINE_SIZE() / 8)
+
+ /* Determine how many words we need to emit before the 'out32'
+ * pointer becomes aligned modulo the cache line size.
+ */
+ to_align64 = (-((uintptr_t)out64 >> 3)) &
+ (CACHE_LINE_SIZE_IN_DOUBLEWORDS - 1);
+
+ /* Only bother aligning and using wh64 if there is at least
+ * one full cache line to process. This check also prevents
+ * overrunning the end of the buffer with alignment words.
+ */
+ if (to_align64 <= n64 - CACHE_LINE_SIZE_IN_DOUBLEWORDS) {
+ int lines_left;
+
+ /* Align out64 mod the cache line size so we can use wh64. */
+ n64 -= to_align64;
+ for (; to_align64 != 0; to_align64--) {
+ *out64 = v64;
+ out64++;
+ }
+
+ /* Use unsigned divide to turn this into a right shift. */
+ lines_left = (unsigned)n64 / CACHE_LINE_SIZE_IN_DOUBLEWORDS;
+
+ do {
+ /* Only wh64 a few lines at a time, so we don't
+ * exceed the maximum number of victim lines.
+ */
+ int x = ((lines_left < CHIP_MAX_OUTSTANDING_VICTIMS())
+ ? lines_left
+ : CHIP_MAX_OUTSTANDING_VICTIMS());
+ uint64_t *wh = out64;
+ int i = x;
+ int j;
+
+ lines_left -= x;
+
+ do {
+ __insn_wh64(wh);
+ wh += CACHE_LINE_SIZE_IN_DOUBLEWORDS;
+ } while (--i);
+
+ for (j = x * (CACHE_LINE_SIZE_IN_DOUBLEWORDS / 4);
+ j != 0; j--) {
+ *out64++ = v64;
+ *out64++ = v64;
+ *out64++ = v64;
+ *out64++ = v64;
+ }
+ } while (lines_left != 0);
+
+ /* We processed all full lines above, so only this many
+ * words remain to be processed.
+ */
+ n64 &= CACHE_LINE_SIZE_IN_DOUBLEWORDS - 1;
+ }
+
+ /* Now handle any leftover values. */
+ if (n64 != 0) {
+ do {
+ *out64 = v64;
+ out64++;
+ } while (--n64 != 0);
+ }
+
+ return s;
+}
+EXPORT_SYMBOL(memset);
diff --git a/arch/tile/lib/spinlock_32.c b/arch/tile/lib/spinlock_32.c
new file mode 100644
index 00000000..b16ac49a
--- /dev/null
+++ b/arch/tile/lib/spinlock_32.c
@@ -0,0 +1,259 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/spinlock.h>
+#include <linux/module.h>
+#include <asm/processor.h>
+#include <arch/spr_def.h>
+
+#include "spinlock_common.h"
+
+void arch_spin_lock(arch_spinlock_t *lock)
+{
+ int my_ticket;
+ int iterations = 0;
+ int delta;
+
+ while ((my_ticket = __insn_tns((void *)&lock->next_ticket)) & 1)
+ delay_backoff(iterations++);
+
+ /* Increment the next ticket number, implicitly releasing tns lock. */
+ lock->next_ticket = my_ticket + TICKET_QUANTUM;
+
+ /* Wait until it's our turn. */
+ while ((delta = my_ticket - lock->current_ticket) != 0)
+ relax((128 / CYCLES_PER_RELAX_LOOP) * delta);
+}
+EXPORT_SYMBOL(arch_spin_lock);
+
+int arch_spin_trylock(arch_spinlock_t *lock)
+{
+ /*
+ * Grab a ticket; no need to retry if it's busy, we'll just
+ * treat that the same as "locked", since someone else
+ * will lock it momentarily anyway.
+ */
+ int my_ticket = __insn_tns((void *)&lock->next_ticket);
+
+ if (my_ticket == lock->current_ticket) {
+ /* Not currently locked, so lock it by keeping this ticket. */
+ lock->next_ticket = my_ticket + TICKET_QUANTUM;
+ /* Success! */
+ return 1;
+ }
+
+ if (!(my_ticket & 1)) {
+ /* Release next_ticket. */
+ lock->next_ticket = my_ticket;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(arch_spin_trylock);
+
+void arch_spin_unlock_wait(arch_spinlock_t *lock)
+{
+ u32 iterations = 0;
+ while (arch_spin_is_locked(lock))
+ delay_backoff(iterations++);
+}
+EXPORT_SYMBOL(arch_spin_unlock_wait);
+
+/*
+ * The low byte is always reserved to be the marker for a "tns" operation
+ * since the low bit is set to "1" by a tns. The next seven bits are
+ * zeroes. The next byte holds the "next" writer value, i.e. the ticket
+ * available for the next task that wants to write. The third byte holds
+ * the current writer value, i.e. the writer who holds the current ticket.
+ * If current == next == 0, there are no interested writers.
+ */
+#define WR_NEXT_SHIFT _WR_NEXT_SHIFT
+#define WR_CURR_SHIFT _WR_CURR_SHIFT
+#define WR_WIDTH _WR_WIDTH
+#define WR_MASK ((1 << WR_WIDTH) - 1)
+
+/*
+ * The last eight bits hold the active reader count. This has to be
+ * zero before a writer can start to write.
+ */
+#define RD_COUNT_SHIFT _RD_COUNT_SHIFT
+#define RD_COUNT_WIDTH _RD_COUNT_WIDTH
+#define RD_COUNT_MASK ((1 << RD_COUNT_WIDTH) - 1)
+
+
+/*
+ * We can get the read lock if everything but the reader bits (which
+ * are in the high part of the word) is zero, i.e. no active or
+ * waiting writers, no tns.
+ *
+ * We guard the tns/store-back with an interrupt critical section to
+ * preserve the semantic that the same read lock can be acquired in an
+ * interrupt context.
+ */
+inline int arch_read_trylock(arch_rwlock_t *rwlock)
+{
+ u32 val;
+ __insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 1);
+ val = __insn_tns((int *)&rwlock->lock);
+ if (likely((val << _RD_COUNT_WIDTH) == 0)) {
+ val += 1 << RD_COUNT_SHIFT;
+ rwlock->lock = val;
+ __insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 0);
+ BUG_ON(val == 0); /* we don't expect wraparound */
+ return 1;
+ }
+ if ((val & 1) == 0)
+ rwlock->lock = val;
+ __insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 0);
+ return 0;
+}
+EXPORT_SYMBOL(arch_read_trylock);
+
+/*
+ * Spin doing arch_read_trylock() until we acquire the lock.
+ * ISSUE: This approach can permanently starve readers. A reader who sees
+ * a writer could instead take a ticket lock (just like a writer would),
+ * and atomically enter read mode (with 1 reader) when it gets the ticket.
+ * This way both readers and writers would always make forward progress
+ * in a finite time.
+ */
+void arch_read_lock(arch_rwlock_t *rwlock)
+{
+ u32 iterations = 0;
+ while (unlikely(!arch_read_trylock(rwlock)))
+ delay_backoff(iterations++);
+}
+EXPORT_SYMBOL(arch_read_lock);
+
+void arch_read_unlock(arch_rwlock_t *rwlock)
+{
+ u32 val, iterations = 0;
+
+ mb(); /* guarantee anything modified under the lock is visible */
+ for (;;) {
+ __insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 1);
+ val = __insn_tns((int *)&rwlock->lock);
+ if (likely((val & 1) == 0)) {
+ rwlock->lock = val - (1 << _RD_COUNT_SHIFT);
+ __insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 0);
+ break;
+ }
+ __insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 0);
+ delay_backoff(iterations++);
+ }
+}
+EXPORT_SYMBOL(arch_read_unlock);
+
+/*
+ * We don't need an interrupt critical section here (unlike for
+ * arch_read_lock) since we should never use a bare write lock where
+ * it could be interrupted by code that could try to re-acquire it.
+ */
+void arch_write_lock(arch_rwlock_t *rwlock)
+{
+ /*
+ * The trailing underscore on this variable (and curr_ below)
+ * reminds us that the high bits are garbage; we mask them out
+ * when we compare them.
+ */
+ u32 my_ticket_;
+ u32 iterations = 0;
+ u32 val = __insn_tns((int *)&rwlock->lock);
+
+ if (likely(val == 0)) {
+ rwlock->lock = 1 << _WR_NEXT_SHIFT;
+ return;
+ }
+
+ /*
+ * Wait until there are no readers, then bump up the next
+ * field and capture the ticket value.
+ */
+ for (;;) {
+ if (!(val & 1)) {
+ if ((val >> RD_COUNT_SHIFT) == 0)
+ break;
+ rwlock->lock = val;
+ }
+ delay_backoff(iterations++);
+ val = __insn_tns((int *)&rwlock->lock);
+ }
+
+ /* Take out the next ticket and extract my ticket value. */
+ rwlock->lock = __insn_addb(val, 1 << WR_NEXT_SHIFT);
+ my_ticket_ = val >> WR_NEXT_SHIFT;
+
+ /* Wait until the "current" field matches our ticket. */
+ for (;;) {
+ u32 curr_ = val >> WR_CURR_SHIFT;
+ u32 delta = ((my_ticket_ - curr_) & WR_MASK);
+ if (likely(delta == 0))
+ break;
+
+ /* Delay based on how many lock-holders are still out there. */
+ relax((256 / CYCLES_PER_RELAX_LOOP) * delta);
+
+ /*
+ * Get a non-tns value to check; we don't need to tns
+ * it ourselves. Since we're not tns'ing, we retry
+ * more rapidly to get a valid value.
+ */
+ while ((val = rwlock->lock) & 1)
+ relax(4);
+ }
+}
+EXPORT_SYMBOL(arch_write_lock);
+
+int arch_write_trylock(arch_rwlock_t *rwlock)
+{
+ u32 val = __insn_tns((int *)&rwlock->lock);
+
+ /*
+ * If a tns is in progress, or there's a waiting or active locker,
+ * or active readers, we can't take the lock, so give up.
+ */
+ if (unlikely(val != 0)) {
+ if (!(val & 1))
+ rwlock->lock = val;
+ return 0;
+ }
+
+ /* Set the "next" field to mark it locked. */
+ rwlock->lock = 1 << _WR_NEXT_SHIFT;
+ return 1;
+}
+EXPORT_SYMBOL(arch_write_trylock);
+
+void arch_write_unlock(arch_rwlock_t *rwlock)
+{
+ u32 val, eq, mask;
+
+ mb(); /* guarantee anything modified under the lock is visible */
+ val = __insn_tns((int *)&rwlock->lock);
+ if (likely(val == (1 << _WR_NEXT_SHIFT))) {
+ rwlock->lock = 0;
+ return;
+ }
+ while (unlikely(val & 1)) {
+ /* Limited backoff since we are the highest-priority task. */
+ relax(4);
+ val = __insn_tns((int *)&rwlock->lock);
+ }
+ mask = 1 << WR_CURR_SHIFT;
+ val = __insn_addb(val, mask);
+ eq = __insn_seqb(val, val << (WR_CURR_SHIFT - WR_NEXT_SHIFT));
+ val = __insn_mz(eq & mask, val);
+ rwlock->lock = val;
+}
+EXPORT_SYMBOL(arch_write_unlock);
diff --git a/arch/tile/lib/spinlock_64.c b/arch/tile/lib/spinlock_64.c
new file mode 100644
index 00000000..d6fb9581
--- /dev/null
+++ b/arch/tile/lib/spinlock_64.c
@@ -0,0 +1,104 @@
+/*
+ * Copyright 2011 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/spinlock.h>
+#include <linux/module.h>
+#include <asm/processor.h>
+
+#include "spinlock_common.h"
+
+/*
+ * Read the spinlock value without allocating in our cache and without
+ * causing an invalidation to another cpu with a copy of the cacheline.
+ * This is important when we are spinning waiting for the lock.
+ */
+static inline u32 arch_spin_read_noalloc(void *lock)
+{
+ return atomic_cmpxchg((atomic_t *)lock, -1, -1);
+}
+
+/*
+ * Wait until the high bits (current) match my ticket.
+ * If we notice the overflow bit set on entry, we clear it.
+ */
+void arch_spin_lock_slow(arch_spinlock_t *lock, u32 my_ticket)
+{
+ if (unlikely(my_ticket & __ARCH_SPIN_NEXT_OVERFLOW)) {
+ __insn_fetchand4(&lock->lock, ~__ARCH_SPIN_NEXT_OVERFLOW);
+ my_ticket &= ~__ARCH_SPIN_NEXT_OVERFLOW;
+ }
+
+ for (;;) {
+ u32 val = arch_spin_read_noalloc(lock);
+ u32 delta = my_ticket - arch_spin_current(val);
+ if (delta == 0)
+ return;
+ relax((128 / CYCLES_PER_RELAX_LOOP) * delta);
+ }
+}
+EXPORT_SYMBOL(arch_spin_lock_slow);
+
+/*
+ * Check the lock to see if it is plausible, and try to get it with cmpxchg().
+ */
+int arch_spin_trylock(arch_spinlock_t *lock)
+{
+ u32 val = arch_spin_read_noalloc(lock);
+ if (unlikely(arch_spin_current(val) != arch_spin_next(val)))
+ return 0;
+ return cmpxchg(&lock->lock, val, (val + 1) & ~__ARCH_SPIN_NEXT_OVERFLOW)
+ == val;
+}
+EXPORT_SYMBOL(arch_spin_trylock);
+
+void arch_spin_unlock_wait(arch_spinlock_t *lock)
+{
+ u32 iterations = 0;
+ while (arch_spin_is_locked(lock))
+ delay_backoff(iterations++);
+}
+EXPORT_SYMBOL(arch_spin_unlock_wait);
+
+/*
+ * If the read lock fails due to a writer, we retry periodically
+ * until the value is positive and we write our incremented reader count.
+ */
+void __read_lock_failed(arch_rwlock_t *rw)
+{
+ u32 val;
+ int iterations = 0;
+ do {
+ delay_backoff(iterations++);
+ val = __insn_fetchaddgez4(&rw->lock, 1);
+ } while (unlikely(arch_write_val_locked(val)));
+}
+EXPORT_SYMBOL(__read_lock_failed);
+
+/*
+ * If we failed because there were readers, clear the "writer" bit
+ * so we don't block additional readers. Otherwise, there was another
+ * writer anyway, so our "fetchor" made no difference. Then wait,
+ * issuing periodic fetchor instructions, till we get the lock.
+ */
+void __write_lock_failed(arch_rwlock_t *rw, u32 val)
+{
+ int iterations = 0;
+ do {
+ if (!arch_write_val_locked(val))
+ val = __insn_fetchand4(&rw->lock, ~__WRITE_LOCK_BIT);
+ delay_backoff(iterations++);
+ val = __insn_fetchor4(&rw->lock, __WRITE_LOCK_BIT);
+ } while (val != 0);
+}
+EXPORT_SYMBOL(__write_lock_failed);
diff --git a/arch/tile/lib/spinlock_common.h b/arch/tile/lib/spinlock_common.h
new file mode 100644
index 00000000..6ac37509
--- /dev/null
+++ b/arch/tile/lib/spinlock_common.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ * This file is included into spinlock_32.c or _64.c.
+ */
+
+/*
+ * The mfspr in __spinlock_relax() is 5 or 6 cycles plus 2 for loop
+ * overhead.
+ */
+#ifdef __tilegx__
+#define CYCLES_PER_RELAX_LOOP 7
+#else
+#define CYCLES_PER_RELAX_LOOP 8
+#endif
+
+/*
+ * Idle the core for CYCLES_PER_RELAX_LOOP * iterations cycles.
+ */
+static inline void
+relax(int iterations)
+{
+ for (/*above*/; iterations > 0; iterations--)
+ __insn_mfspr(SPR_PASS);
+ barrier();
+}
+
+/* Perform bounded exponential backoff.*/
+static void delay_backoff(int iterations)
+{
+ u32 exponent, loops;
+
+ /*
+ * 2^exponent is how many times we go around the loop,
+ * which takes 8 cycles. We want to start with a 16- to 31-cycle
+ * loop, so we need to go around minimum 2 = 2^1 times, so we
+ * bias the original value up by 1.
+ */
+ exponent = iterations + 1;
+
+ /*
+ * Don't allow exponent to exceed 7, so we have 128 loops,
+ * or 1,024 (to 2,047) cycles, as our maximum.
+ */
+ if (exponent > 8)
+ exponent = 8;
+
+ loops = 1 << exponent;
+
+ /* Add a randomness factor so two cpus never get in lock step. */
+ loops += __insn_crc32_32(stack_pointer, get_cycles_low()) &
+ (loops - 1);
+
+ relax(loops);
+}
diff --git a/arch/tile/lib/strchr_32.c b/arch/tile/lib/strchr_32.c
new file mode 100644
index 00000000..c94e6f7a
--- /dev/null
+++ b/arch/tile/lib/strchr_32.c
@@ -0,0 +1,66 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/module.h>
+
+#undef strchr
+
+char *strchr(const char *s, int c)
+{
+ int z, g;
+
+ /* Get an aligned pointer. */
+ const uintptr_t s_int = (uintptr_t) s;
+ const uint32_t *p = (const uint32_t *)(s_int & -4);
+
+ /* Create four copies of the byte for which we are looking. */
+ const uint32_t goal = 0x01010101 * (uint8_t) c;
+
+ /* Read the first aligned word, but force bytes before the string to
+ * match neither zero nor goal (we make sure the high bit of each
+ * byte is 1, and the low 7 bits are all the opposite of the goal
+ * byte).
+ *
+ * Note that this shift count expression works because we know shift
+ * counts are taken mod 32.
+ */
+ const uint32_t before_mask = (1 << (s_int << 3)) - 1;
+ uint32_t v = (*p | before_mask) ^ (goal & __insn_shrib(before_mask, 1));
+
+ uint32_t zero_matches, goal_matches;
+ while (1) {
+ /* Look for a terminating '\0'. */
+ zero_matches = __insn_seqb(v, 0);
+
+ /* Look for the goal byte. */
+ goal_matches = __insn_seqb(v, goal);
+
+ if (__builtin_expect(zero_matches | goal_matches, 0))
+ break;
+
+ v = *++p;
+ }
+
+ z = __insn_ctz(zero_matches);
+ g = __insn_ctz(goal_matches);
+
+ /* If we found c before '\0' we got a match. Note that if c == '\0'
+ * then g == z, and we correctly return the address of the '\0'
+ * rather than NULL.
+ */
+ return (g <= z) ? ((char *)p) + (g >> 3) : NULL;
+}
+EXPORT_SYMBOL(strchr);
diff --git a/arch/tile/lib/strchr_64.c b/arch/tile/lib/strchr_64.c
new file mode 100644
index 00000000..617a9273
--- /dev/null
+++ b/arch/tile/lib/strchr_64.c
@@ -0,0 +1,67 @@
+/*
+ * Copyright 2011 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/module.h>
+
+#undef strchr
+
+char *strchr(const char *s, int c)
+{
+ int z, g;
+
+ /* Get an aligned pointer. */
+ const uintptr_t s_int = (uintptr_t) s;
+ const uint64_t *p = (const uint64_t *)(s_int & -8);
+
+ /* Create eight copies of the byte for which we are looking. */
+ const uint64_t goal = 0x0101010101010101ULL * (uint8_t) c;
+
+ /* Read the first aligned word, but force bytes before the string to
+ * match neither zero nor goal (we make sure the high bit of each
+ * byte is 1, and the low 7 bits are all the opposite of the goal
+ * byte).
+ *
+ * Note that this shift count expression works because we know shift
+ * counts are taken mod 64.
+ */
+ const uint64_t before_mask = (1ULL << (s_int << 3)) - 1;
+ uint64_t v = (*p | before_mask) ^
+ (goal & __insn_v1shrsi(before_mask, 1));
+
+ uint64_t zero_matches, goal_matches;
+ while (1) {
+ /* Look for a terminating '\0'. */
+ zero_matches = __insn_v1cmpeqi(v, 0);
+
+ /* Look for the goal byte. */
+ goal_matches = __insn_v1cmpeq(v, goal);
+
+ if (__builtin_expect((zero_matches | goal_matches) != 0, 0))
+ break;
+
+ v = *++p;
+ }
+
+ z = __insn_ctz(zero_matches);
+ g = __insn_ctz(goal_matches);
+
+ /* If we found c before '\0' we got a match. Note that if c == '\0'
+ * then g == z, and we correctly return the address of the '\0'
+ * rather than NULL.
+ */
+ return (g <= z) ? ((char *)p) + (g >> 3) : NULL;
+}
+EXPORT_SYMBOL(strchr);
diff --git a/arch/tile/lib/strlen_32.c b/arch/tile/lib/strlen_32.c
new file mode 100644
index 00000000..4974292a
--- /dev/null
+++ b/arch/tile/lib/strlen_32.c
@@ -0,0 +1,38 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/module.h>
+
+#undef strlen
+
+size_t strlen(const char *s)
+{
+ /* Get an aligned pointer. */
+ const uintptr_t s_int = (uintptr_t) s;
+ const uint32_t *p = (const uint32_t *)(s_int & -4);
+
+ /* Read the first word, but force bytes before the string to be nonzero.
+ * This expression works because we know shift counts are taken mod 32.
+ */
+ uint32_t v = *p | ((1 << (s_int << 3)) - 1);
+
+ uint32_t bits;
+ while ((bits = __insn_seqb(v, 0)) == 0)
+ v = *++p;
+
+ return ((const char *)p) + (__insn_ctz(bits) >> 3) - s;
+}
+EXPORT_SYMBOL(strlen);
diff --git a/arch/tile/lib/strlen_64.c b/arch/tile/lib/strlen_64.c
new file mode 100644
index 00000000..1c92d462
--- /dev/null
+++ b/arch/tile/lib/strlen_64.c
@@ -0,0 +1,38 @@
+/*
+ * Copyright 2011 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/module.h>
+
+#undef strlen
+
+size_t strlen(const char *s)
+{
+ /* Get an aligned pointer. */
+ const uintptr_t s_int = (uintptr_t) s;
+ const uint64_t *p = (const uint64_t *)(s_int & -8);
+
+ /* Read the first word, but force bytes before the string to be nonzero.
+ * This expression works because we know shift counts are taken mod 64.
+ */
+ uint64_t v = *p | ((1ULL << (s_int << 3)) - 1);
+
+ uint64_t bits;
+ while ((bits = __insn_v1cmpeqi(v, 0)) == 0)
+ v = *++p;
+
+ return ((const char *)p) + (__insn_ctz(bits) >> 3) - s;
+}
+EXPORT_SYMBOL(strlen);
diff --git a/arch/tile/lib/uaccess.c b/arch/tile/lib/uaccess.c
new file mode 100644
index 00000000..f8d398c9
--- /dev/null
+++ b/arch/tile/lib/uaccess.c
@@ -0,0 +1,32 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/uaccess.h>
+#include <linux/module.h>
+
+int __range_ok(unsigned long addr, unsigned long size)
+{
+ unsigned long limit = current_thread_info()->addr_limit.seg;
+ return !((addr < limit && size <= limit - addr) ||
+ is_arch_mappable_range(addr, size));
+}
+EXPORT_SYMBOL(__range_ok);
+
+#ifdef CONFIG_DEBUG_COPY_FROM_USER
+void copy_from_user_overflow(void)
+{
+ WARN(1, "Buffer overflow detected!\n");
+}
+EXPORT_SYMBOL(copy_from_user_overflow);
+#endif
diff --git a/arch/tile/lib/usercopy_32.S b/arch/tile/lib/usercopy_32.S
new file mode 100644
index 00000000..979f76d8
--- /dev/null
+++ b/arch/tile/lib/usercopy_32.S
@@ -0,0 +1,223 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/linkage.h>
+#include <asm/errno.h>
+#include <asm/cache.h>
+#include <arch/chip.h>
+
+/* Access user memory, but use MMU to avoid propagating kernel exceptions. */
+
+ .pushsection .fixup,"ax"
+
+get_user_fault:
+ { move r0, zero; move r1, zero }
+ { movei r2, -EFAULT; jrp lr }
+ ENDPROC(get_user_fault)
+
+put_user_fault:
+ { movei r0, -EFAULT; jrp lr }
+ ENDPROC(put_user_fault)
+
+ .popsection
+
+/*
+ * __get_user_N functions take a pointer in r0, and return 0 in r2
+ * on success, with the value in r0; or else -EFAULT in r2.
+ */
+#define __get_user_N(bytes, LOAD) \
+ STD_ENTRY(__get_user_##bytes); \
+1: { LOAD r0, r0; move r1, zero; move r2, zero }; \
+ jrp lr; \
+ STD_ENDPROC(__get_user_##bytes); \
+ .pushsection __ex_table,"a"; \
+ .word 1b, get_user_fault; \
+ .popsection
+
+__get_user_N(1, lb_u)
+__get_user_N(2, lh_u)
+__get_user_N(4, lw)
+
+/*
+ * __get_user_8 takes a pointer in r0, and returns 0 in r2
+ * on success, with the value in r0/r1; or else -EFAULT in r2.
+ */
+ STD_ENTRY(__get_user_8);
+1: { lw r0, r0; addi r1, r0, 4 };
+2: { lw r1, r1; move r2, zero };
+ jrp lr;
+ STD_ENDPROC(__get_user_8);
+ .pushsection __ex_table,"a";
+ .word 1b, get_user_fault;
+ .word 2b, get_user_fault;
+ .popsection
+
+/*
+ * __put_user_N functions take a value in r0 and a pointer in r1,
+ * and return 0 in r0 on success or -EFAULT on failure.
+ */
+#define __put_user_N(bytes, STORE) \
+ STD_ENTRY(__put_user_##bytes); \
+1: { STORE r1, r0; move r0, zero }; \
+ jrp lr; \
+ STD_ENDPROC(__put_user_##bytes); \
+ .pushsection __ex_table,"a"; \
+ .word 1b, put_user_fault; \
+ .popsection
+
+__put_user_N(1, sb)
+__put_user_N(2, sh)
+__put_user_N(4, sw)
+
+/*
+ * __put_user_8 takes a value in r0/r1 and a pointer in r2,
+ * and returns 0 in r0 on success or -EFAULT on failure.
+ */
+STD_ENTRY(__put_user_8)
+1: { sw r2, r0; addi r2, r2, 4 }
+2: { sw r2, r1; move r0, zero }
+ jrp lr
+ STD_ENDPROC(__put_user_8)
+ .pushsection __ex_table,"a"
+ .word 1b, put_user_fault
+ .word 2b, put_user_fault
+ .popsection
+
+
+/*
+ * strnlen_user_asm takes the pointer in r0, and the length bound in r1.
+ * It returns the length, including the terminating NUL, or zero on exception.
+ * If length is greater than the bound, returns one plus the bound.
+ */
+STD_ENTRY(strnlen_user_asm)
+ { bz r1, 2f; addi r3, r0, -1 } /* bias down to include NUL */
+1: { lb_u r4, r0; addi r1, r1, -1 }
+ bz r4, 2f
+ { bnzt r1, 1b; addi r0, r0, 1 }
+2: { sub r0, r0, r3; jrp lr }
+ STD_ENDPROC(strnlen_user_asm)
+ .pushsection .fixup,"ax"
+strnlen_user_fault:
+ { move r0, zero; jrp lr }
+ ENDPROC(strnlen_user_fault)
+ .section __ex_table,"a"
+ .word 1b, strnlen_user_fault
+ .popsection
+
+/*
+ * strncpy_from_user_asm takes the kernel target pointer in r0,
+ * the userspace source pointer in r1, and the length bound (including
+ * the trailing NUL) in r2. On success, it returns the string length
+ * (not including the trailing NUL), or -EFAULT on failure.
+ */
+STD_ENTRY(strncpy_from_user_asm)
+ { bz r2, 2f; move r3, r0 }
+1: { lb_u r4, r1; addi r1, r1, 1; addi r2, r2, -1 }
+ { sb r0, r4; addi r0, r0, 1 }
+ bz r2, 2f
+ bnzt r4, 1b
+ addi r0, r0, -1 /* don't count the trailing NUL */
+2: { sub r0, r0, r3; jrp lr }
+ STD_ENDPROC(strncpy_from_user_asm)
+ .pushsection .fixup,"ax"
+strncpy_from_user_fault:
+ { movei r0, -EFAULT; jrp lr }
+ ENDPROC(strncpy_from_user_fault)
+ .section __ex_table,"a"
+ .word 1b, strncpy_from_user_fault
+ .popsection
+
+/*
+ * clear_user_asm takes the user target address in r0 and the
+ * number of bytes to zero in r1.
+ * It returns the number of uncopiable bytes (hopefully zero) in r0.
+ * Note that we don't use a separate .fixup section here since we fall
+ * through into the "fixup" code as the last straight-line bundle anyway.
+ */
+STD_ENTRY(clear_user_asm)
+ { bz r1, 2f; or r2, r0, r1 }
+ andi r2, r2, 3
+ bzt r2, .Lclear_aligned_user_asm
+1: { sb r0, zero; addi r0, r0, 1; addi r1, r1, -1 }
+ bnzt r1, 1b
+2: { move r0, r1; jrp lr }
+ .pushsection __ex_table,"a"
+ .word 1b, 2b
+ .popsection
+
+.Lclear_aligned_user_asm:
+1: { sw r0, zero; addi r0, r0, 4; addi r1, r1, -4 }
+ bnzt r1, 1b
+2: { move r0, r1; jrp lr }
+ STD_ENDPROC(clear_user_asm)
+ .pushsection __ex_table,"a"
+ .word 1b, 2b
+ .popsection
+
+/*
+ * flush_user_asm takes the user target address in r0 and the
+ * number of bytes to flush in r1.
+ * It returns the number of unflushable bytes (hopefully zero) in r0.
+ */
+STD_ENTRY(flush_user_asm)
+ bz r1, 2f
+ { movei r2, L2_CACHE_BYTES; add r1, r0, r1 }
+ { sub r2, zero, r2; addi r1, r1, L2_CACHE_BYTES-1 }
+ { and r0, r0, r2; and r1, r1, r2 }
+ { sub r1, r1, r0 }
+1: { flush r0; addi r1, r1, -CHIP_FLUSH_STRIDE() }
+ { addi r0, r0, CHIP_FLUSH_STRIDE(); bnzt r1, 1b }
+2: { move r0, r1; jrp lr }
+ STD_ENDPROC(flush_user_asm)
+ .pushsection __ex_table,"a"
+ .word 1b, 2b
+ .popsection
+
+/*
+ * inv_user_asm takes the user target address in r0 and the
+ * number of bytes to invalidate in r1.
+ * It returns the number of not inv'able bytes (hopefully zero) in r0.
+ */
+STD_ENTRY(inv_user_asm)
+ bz r1, 2f
+ { movei r2, L2_CACHE_BYTES; add r1, r0, r1 }
+ { sub r2, zero, r2; addi r1, r1, L2_CACHE_BYTES-1 }
+ { and r0, r0, r2; and r1, r1, r2 }
+ { sub r1, r1, r0 }
+1: { inv r0; addi r1, r1, -CHIP_INV_STRIDE() }
+ { addi r0, r0, CHIP_INV_STRIDE(); bnzt r1, 1b }
+2: { move r0, r1; jrp lr }
+ STD_ENDPROC(inv_user_asm)
+ .pushsection __ex_table,"a"
+ .word 1b, 2b
+ .popsection
+
+/*
+ * finv_user_asm takes the user target address in r0 and the
+ * number of bytes to flush-invalidate in r1.
+ * It returns the number of not finv'able bytes (hopefully zero) in r0.
+ */
+STD_ENTRY(finv_user_asm)
+ bz r1, 2f
+ { movei r2, L2_CACHE_BYTES; add r1, r0, r1 }
+ { sub r2, zero, r2; addi r1, r1, L2_CACHE_BYTES-1 }
+ { and r0, r0, r2; and r1, r1, r2 }
+ { sub r1, r1, r0 }
+1: { finv r0; addi r1, r1, -CHIP_FINV_STRIDE() }
+ { addi r0, r0, CHIP_FINV_STRIDE(); bnzt r1, 1b }
+2: { move r0, r1; jrp lr }
+ STD_ENDPROC(finv_user_asm)
+ .pushsection __ex_table,"a"
+ .word 1b, 2b
+ .popsection
diff --git a/arch/tile/lib/usercopy_64.S b/arch/tile/lib/usercopy_64.S
new file mode 100644
index 00000000..2ff44f87
--- /dev/null
+++ b/arch/tile/lib/usercopy_64.S
@@ -0,0 +1,196 @@
+/*
+ * Copyright 2011 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/linkage.h>
+#include <asm/errno.h>
+#include <asm/cache.h>
+#include <arch/chip.h>
+
+/* Access user memory, but use MMU to avoid propagating kernel exceptions. */
+
+ .pushsection .fixup,"ax"
+
+get_user_fault:
+ { movei r1, -EFAULT; move r0, zero }
+ jrp lr
+ ENDPROC(get_user_fault)
+
+put_user_fault:
+ { movei r0, -EFAULT; jrp lr }
+ ENDPROC(put_user_fault)
+
+ .popsection
+
+/*
+ * __get_user_N functions take a pointer in r0, and return 0 in r1
+ * on success, with the value in r0; or else -EFAULT in r1.
+ */
+#define __get_user_N(bytes, LOAD) \
+ STD_ENTRY(__get_user_##bytes); \
+1: { LOAD r0, r0; move r1, zero }; \
+ jrp lr; \
+ STD_ENDPROC(__get_user_##bytes); \
+ .pushsection __ex_table,"a"; \
+ .quad 1b, get_user_fault; \
+ .popsection
+
+__get_user_N(1, ld1u)
+__get_user_N(2, ld2u)
+__get_user_N(4, ld4u)
+__get_user_N(8, ld)
+
+/*
+ * __put_user_N functions take a value in r0 and a pointer in r1,
+ * and return 0 in r0 on success or -EFAULT on failure.
+ */
+#define __put_user_N(bytes, STORE) \
+ STD_ENTRY(__put_user_##bytes); \
+1: { STORE r1, r0; move r0, zero }; \
+ jrp lr; \
+ STD_ENDPROC(__put_user_##bytes); \
+ .pushsection __ex_table,"a"; \
+ .quad 1b, put_user_fault; \
+ .popsection
+
+__put_user_N(1, st1)
+__put_user_N(2, st2)
+__put_user_N(4, st4)
+__put_user_N(8, st)
+
+/*
+ * strnlen_user_asm takes the pointer in r0, and the length bound in r1.
+ * It returns the length, including the terminating NUL, or zero on exception.
+ * If length is greater than the bound, returns one plus the bound.
+ */
+STD_ENTRY(strnlen_user_asm)
+ { beqz r1, 2f; addi r3, r0, -1 } /* bias down to include NUL */
+1: { ld1u r4, r0; addi r1, r1, -1 }
+ beqz r4, 2f
+ { bnezt r1, 1b; addi r0, r0, 1 }
+2: { sub r0, r0, r3; jrp lr }
+ STD_ENDPROC(strnlen_user_asm)
+ .pushsection .fixup,"ax"
+strnlen_user_fault:
+ { move r0, zero; jrp lr }
+ ENDPROC(strnlen_user_fault)
+ .section __ex_table,"a"
+ .quad 1b, strnlen_user_fault
+ .popsection
+
+/*
+ * strncpy_from_user_asm takes the kernel target pointer in r0,
+ * the userspace source pointer in r1, and the length bound (including
+ * the trailing NUL) in r2. On success, it returns the string length
+ * (not including the trailing NUL), or -EFAULT on failure.
+ */
+STD_ENTRY(strncpy_from_user_asm)
+ { beqz r2, 2f; move r3, r0 }
+1: { ld1u r4, r1; addi r1, r1, 1; addi r2, r2, -1 }
+ { st1 r0, r4; addi r0, r0, 1 }
+ beqz r2, 2f
+ bnezt r4, 1b
+ addi r0, r0, -1 /* don't count the trailing NUL */
+2: { sub r0, r0, r3; jrp lr }
+ STD_ENDPROC(strncpy_from_user_asm)
+ .pushsection .fixup,"ax"
+strncpy_from_user_fault:
+ { movei r0, -EFAULT; jrp lr }
+ ENDPROC(strncpy_from_user_fault)
+ .section __ex_table,"a"
+ .quad 1b, strncpy_from_user_fault
+ .popsection
+
+/*
+ * clear_user_asm takes the user target address in r0 and the
+ * number of bytes to zero in r1.
+ * It returns the number of uncopiable bytes (hopefully zero) in r0.
+ * Note that we don't use a separate .fixup section here since we fall
+ * through into the "fixup" code as the last straight-line bundle anyway.
+ */
+STD_ENTRY(clear_user_asm)
+ { beqz r1, 2f; or r2, r0, r1 }
+ andi r2, r2, 7
+ beqzt r2, .Lclear_aligned_user_asm
+1: { st1 r0, zero; addi r0, r0, 1; addi r1, r1, -1 }
+ bnezt r1, 1b
+2: { move r0, r1; jrp lr }
+ .pushsection __ex_table,"a"
+ .quad 1b, 2b
+ .popsection
+
+.Lclear_aligned_user_asm:
+1: { st r0, zero; addi r0, r0, 8; addi r1, r1, -8 }
+ bnezt r1, 1b
+2: { move r0, r1; jrp lr }
+ STD_ENDPROC(clear_user_asm)
+ .pushsection __ex_table,"a"
+ .quad 1b, 2b
+ .popsection
+
+/*
+ * flush_user_asm takes the user target address in r0 and the
+ * number of bytes to flush in r1.
+ * It returns the number of unflushable bytes (hopefully zero) in r0.
+ */
+STD_ENTRY(flush_user_asm)
+ beqz r1, 2f
+ { movei r2, L2_CACHE_BYTES; add r1, r0, r1 }
+ { sub r2, zero, r2; addi r1, r1, L2_CACHE_BYTES-1 }
+ { and r0, r0, r2; and r1, r1, r2 }
+ { sub r1, r1, r0 }
+1: { flush r0; addi r1, r1, -CHIP_FLUSH_STRIDE() }
+ { addi r0, r0, CHIP_FLUSH_STRIDE(); bnezt r1, 1b }
+2: { move r0, r1; jrp lr }
+ STD_ENDPROC(flush_user_asm)
+ .pushsection __ex_table,"a"
+ .quad 1b, 2b
+ .popsection
+
+/*
+ * inv_user_asm takes the user target address in r0 and the
+ * number of bytes to invalidate in r1.
+ * It returns the number of not inv'able bytes (hopefully zero) in r0.
+ */
+STD_ENTRY(inv_user_asm)
+ beqz r1, 2f
+ { movei r2, L2_CACHE_BYTES; add r1, r0, r1 }
+ { sub r2, zero, r2; addi r1, r1, L2_CACHE_BYTES-1 }
+ { and r0, r0, r2; and r1, r1, r2 }
+ { sub r1, r1, r0 }
+1: { inv r0; addi r1, r1, -CHIP_INV_STRIDE() }
+ { addi r0, r0, CHIP_INV_STRIDE(); bnezt r1, 1b }
+2: { move r0, r1; jrp lr }
+ STD_ENDPROC(inv_user_asm)
+ .pushsection __ex_table,"a"
+ .quad 1b, 2b
+ .popsection
+
+/*
+ * finv_user_asm takes the user target address in r0 and the
+ * number of bytes to flush-invalidate in r1.
+ * It returns the number of not finv'able bytes (hopefully zero) in r0.
+ */
+STD_ENTRY(finv_user_asm)
+ beqz r1, 2f
+ { movei r2, L2_CACHE_BYTES; add r1, r0, r1 }
+ { sub r2, zero, r2; addi r1, r1, L2_CACHE_BYTES-1 }
+ { and r0, r0, r2; and r1, r1, r2 }
+ { sub r1, r1, r0 }
+1: { finv r0; addi r1, r1, -CHIP_FINV_STRIDE() }
+ { addi r0, r0, CHIP_FINV_STRIDE(); bnezt r1, 1b }
+2: { move r0, r1; jrp lr }
+ STD_ENDPROC(finv_user_asm)
+ .pushsection __ex_table,"a"
+ .quad 1b, 2b
+ .popsection
diff --git a/arch/tile/mm/Makefile b/arch/tile/mm/Makefile
new file mode 100644
index 00000000..e252aedd
--- /dev/null
+++ b/arch/tile/mm/Makefile
@@ -0,0 +1,9 @@
+#
+# Makefile for the linux tile-specific parts of the memory manager.
+#
+
+obj-y := init.o pgtable.o fault.o extable.o elf.o \
+ mmap.o homecache.o migrate_$(BITS).o
+
+obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
+obj-$(CONFIG_HIGHMEM) += highmem.o
diff --git a/arch/tile/mm/elf.c b/arch/tile/mm/elf.c
new file mode 100644
index 00000000..758b6038
--- /dev/null
+++ b/arch/tile/mm/elf.c
@@ -0,0 +1,159 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/mm.h>
+#include <linux/pagemap.h>
+#include <linux/binfmts.h>
+#include <linux/compat.h>
+#include <linux/mman.h>
+#include <linux/elf.h>
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/sections.h>
+#include <arch/sim_def.h>
+
+/* Notify a running simulator, if any, that an exec just occurred. */
+static void sim_notify_exec(const char *binary_name)
+{
+ unsigned char c;
+ do {
+ c = *binary_name++;
+ __insn_mtspr(SPR_SIM_CONTROL,
+ (SIM_CONTROL_OS_EXEC
+ | (c << _SIM_CONTROL_OPERATOR_BITS)));
+
+ } while (c);
+}
+
+static int notify_exec(void)
+{
+ int retval = 0; /* failure */
+ struct vm_area_struct *vma = current->mm->mmap;
+ while (vma) {
+ if ((vma->vm_flags & VM_EXECUTABLE) && vma->vm_file)
+ break;
+ vma = vma->vm_next;
+ }
+ if (vma) {
+ char *buf = (char *) __get_free_page(GFP_KERNEL);
+ if (buf) {
+ char *path = d_path(&vma->vm_file->f_path,
+ buf, PAGE_SIZE);
+ if (!IS_ERR(path)) {
+ sim_notify_exec(path);
+ retval = 1;
+ }
+ free_page((unsigned long)buf);
+ }
+ }
+ return retval;
+}
+
+/* Notify a running simulator, if any, that we loaded an interpreter. */
+static void sim_notify_interp(unsigned long load_addr)
+{
+ size_t i;
+ for (i = 0; i < sizeof(load_addr); i++) {
+ unsigned char c = load_addr >> (i * 8);
+ __insn_mtspr(SPR_SIM_CONTROL,
+ (SIM_CONTROL_OS_INTERP
+ | (c << _SIM_CONTROL_OPERATOR_BITS)));
+ }
+}
+
+
+/* Kernel address of page used to map read-only kernel data into userspace. */
+static void *vdso_page;
+
+/* One-entry array used for install_special_mapping. */
+static struct page *vdso_pages[1];
+
+static int __init vdso_setup(void)
+{
+ vdso_page = (void *)get_zeroed_page(GFP_ATOMIC);
+ memcpy(vdso_page, __rt_sigreturn, __rt_sigreturn_end - __rt_sigreturn);
+ vdso_pages[0] = virt_to_page(vdso_page);
+ return 0;
+}
+device_initcall(vdso_setup);
+
+const char *arch_vma_name(struct vm_area_struct *vma)
+{
+ if (vma->vm_private_data == vdso_pages)
+ return "[vdso]";
+#ifndef __tilegx__
+ if (vma->vm_start == MEM_USER_INTRPT)
+ return "[intrpt]";
+#endif
+ return NULL;
+}
+
+int arch_setup_additional_pages(struct linux_binprm *bprm,
+ int executable_stack)
+{
+ struct mm_struct *mm = current->mm;
+ unsigned long vdso_base;
+ int retval = 0;
+
+ /*
+ * Notify the simulator that an exec just occurred.
+ * If we can't find the filename of the mapping, just use
+ * whatever was passed as the linux_binprm filename.
+ */
+ if (!notify_exec())
+ sim_notify_exec(bprm->filename);
+
+ down_write(&mm->mmap_sem);
+
+ /*
+ * MAYWRITE to allow gdb to COW and set breakpoints
+ */
+ vdso_base = VDSO_BASE;
+ retval = install_special_mapping(mm, vdso_base, PAGE_SIZE,
+ VM_READ|VM_EXEC|
+ VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
+ vdso_pages);
+
+#ifndef __tilegx__
+ /*
+ * Set up a user-interrupt mapping here; the user can't
+ * create one themselves since it is above TASK_SIZE.
+ * We make it unwritable by default, so the model for adding
+ * interrupt vectors always involves an mprotect.
+ */
+ if (!retval) {
+ unsigned long addr = MEM_USER_INTRPT;
+ addr = mmap_region(NULL, addr, INTRPT_SIZE,
+ MAP_FIXED|MAP_ANONYMOUS|MAP_PRIVATE,
+ VM_READ|VM_EXEC|
+ VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, 0);
+ if (addr > (unsigned long) -PAGE_SIZE)
+ retval = (int) addr;
+ }
+#endif
+
+ up_write(&mm->mmap_sem);
+
+ return retval;
+}
+
+
+void elf_plat_init(struct pt_regs *regs, unsigned long load_addr)
+{
+ /* Zero all registers. */
+ memset(regs, 0, sizeof(*regs));
+
+ /* Report the interpreter's load address. */
+ sim_notify_interp(load_addr);
+}
diff --git a/arch/tile/mm/extable.c b/arch/tile/mm/extable.c
new file mode 100644
index 00000000..4fb0acb9
--- /dev/null
+++ b/arch/tile/mm/extable.c
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/uaccess.h>
+
+int fixup_exception(struct pt_regs *regs)
+{
+ const struct exception_table_entry *fixup;
+
+ fixup = search_exception_tables(regs->pc);
+ if (fixup) {
+ regs->pc = fixup->fixup;
+ return 1;
+ }
+
+ return 0;
+}
diff --git a/arch/tile/mm/fault.c b/arch/tile/mm/fault.c
new file mode 100644
index 00000000..22e58f51
--- /dev/null
+++ b/arch/tile/mm/fault.c
@@ -0,0 +1,883 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ *
+ * From i386 code copyright (C) 1995 Linus Torvalds
+ */
+
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/tty.h>
+#include <linux/vt_kern.h> /* For unblank_screen() */
+#include <linux/highmem.h>
+#include <linux/module.h>
+#include <linux/kprobes.h>
+#include <linux/hugetlb.h>
+#include <linux/syscalls.h>
+#include <linux/uaccess.h>
+
+#include <asm/pgalloc.h>
+#include <asm/sections.h>
+#include <asm/traps.h>
+#include <asm/syscalls.h>
+
+#include <arch/interrupts.h>
+
+static noinline void force_sig_info_fault(const char *type, int si_signo,
+ int si_code, unsigned long address,
+ int fault_num,
+ struct task_struct *tsk,
+ struct pt_regs *regs)
+{
+ siginfo_t info;
+
+ if (unlikely(tsk->pid < 2)) {
+ panic("Signal %d (code %d) at %#lx sent to %s!",
+ si_signo, si_code & 0xffff, address,
+ is_idle_task(tsk) ? "the idle task" : "init");
+ }
+
+ info.si_signo = si_signo;
+ info.si_errno = 0;
+ info.si_code = si_code;
+ info.si_addr = (void __user *)address;
+ info.si_trapno = fault_num;
+ trace_unhandled_signal(type, regs, address, si_signo);
+ force_sig_info(si_signo, &info, tsk);
+}
+
+#ifndef __tilegx__
+/*
+ * Synthesize the fault a PL0 process would get by doing a word-load of
+ * an unaligned address or a high kernel address.
+ */
+SYSCALL_DEFINE2(cmpxchg_badaddr, unsigned long, address,
+ struct pt_regs *, regs)
+{
+ if (address >= PAGE_OFFSET)
+ force_sig_info_fault("atomic segfault", SIGSEGV, SEGV_MAPERR,
+ address, INT_DTLB_MISS, current, regs);
+ else
+ force_sig_info_fault("atomic alignment fault", SIGBUS,
+ BUS_ADRALN, address,
+ INT_UNALIGN_DATA, current, regs);
+
+ /*
+ * Adjust pc to point at the actual instruction, which is unusual
+ * for syscalls normally, but is appropriate when we are claiming
+ * that a syscall swint1 caused a page fault or bus error.
+ */
+ regs->pc -= 8;
+
+ /*
+ * Mark this as a caller-save interrupt, like a normal page fault,
+ * so that when we go through the signal handler path we will
+ * properly restore r0, r1, and r2 for the signal handler arguments.
+ */
+ regs->flags |= PT_FLAGS_CALLER_SAVES;
+
+ return 0;
+}
+#endif
+
+static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
+{
+ unsigned index = pgd_index(address);
+ pgd_t *pgd_k;
+ pud_t *pud, *pud_k;
+ pmd_t *pmd, *pmd_k;
+
+ pgd += index;
+ pgd_k = init_mm.pgd + index;
+
+ if (!pgd_present(*pgd_k))
+ return NULL;
+
+ pud = pud_offset(pgd, address);
+ pud_k = pud_offset(pgd_k, address);
+ if (!pud_present(*pud_k))
+ return NULL;
+
+ pmd = pmd_offset(pud, address);
+ pmd_k = pmd_offset(pud_k, address);
+ if (!pmd_present(*pmd_k))
+ return NULL;
+ if (!pmd_present(*pmd)) {
+ set_pmd(pmd, *pmd_k);
+ arch_flush_lazy_mmu_mode();
+ } else
+ BUG_ON(pmd_ptfn(*pmd) != pmd_ptfn(*pmd_k));
+ return pmd_k;
+}
+
+/*
+ * Handle a fault on the vmalloc area.
+ */
+static inline int vmalloc_fault(pgd_t *pgd, unsigned long address)
+{
+ pmd_t *pmd_k;
+ pte_t *pte_k;
+
+ /* Make sure we are in vmalloc area */
+ if (!(address >= VMALLOC_START && address < VMALLOC_END))
+ return -1;
+
+ /*
+ * Synchronize this task's top level page-table
+ * with the 'reference' page table.
+ */
+ pmd_k = vmalloc_sync_one(pgd, address);
+ if (!pmd_k)
+ return -1;
+ if (pmd_huge(*pmd_k))
+ return 0; /* support TILE huge_vmap() API */
+ pte_k = pte_offset_kernel(pmd_k, address);
+ if (!pte_present(*pte_k))
+ return -1;
+ return 0;
+}
+
+/* Wait until this PTE has completed migration. */
+static void wait_for_migration(pte_t *pte)
+{
+ if (pte_migrating(*pte)) {
+ /*
+ * Wait until the migrater fixes up this pte.
+ * We scale the loop count by the clock rate so we'll wait for
+ * a few seconds here.
+ */
+ int retries = 0;
+ int bound = get_clock_rate();
+ while (pte_migrating(*pte)) {
+ barrier();
+ if (++retries > bound)
+ panic("Hit migrating PTE (%#llx) and"
+ " page PFN %#lx still migrating",
+ pte->val, pte_pfn(*pte));
+ }
+ }
+}
+
+/*
+ * It's not generally safe to use "current" to get the page table pointer,
+ * since we might be running an oprofile interrupt in the middle of a
+ * task switch.
+ */
+static pgd_t *get_current_pgd(void)
+{
+ HV_Context ctx = hv_inquire_context();
+ unsigned long pgd_pfn = ctx.page_table >> PAGE_SHIFT;
+ struct page *pgd_page = pfn_to_page(pgd_pfn);
+ BUG_ON(PageHighMem(pgd_page)); /* oops, HIGHPTE? */
+ return (pgd_t *) __va(ctx.page_table);
+}
+
+/*
+ * We can receive a page fault from a migrating PTE at any time.
+ * Handle it by just waiting until the fault resolves.
+ *
+ * It's also possible to get a migrating kernel PTE that resolves
+ * itself during the downcall from hypervisor to Linux. We just check
+ * here to see if the PTE seems valid, and if so we retry it.
+ *
+ * NOTE! We MUST NOT take any locks for this case. We may be in an
+ * interrupt or a critical region, and must do as little as possible.
+ * Similarly, we can't use atomic ops here, since we may be handling a
+ * fault caused by an atomic op access.
+ *
+ * If we find a migrating PTE while we're in an NMI context, and we're
+ * at a PC that has a registered exception handler, we don't wait,
+ * since this thread may (e.g.) have been interrupted while migrating
+ * its own stack, which would then cause us to self-deadlock.
+ */
+static int handle_migrating_pte(pgd_t *pgd, int fault_num,
+ unsigned long address, unsigned long pc,
+ int is_kernel_mode, int write)
+{
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+ pte_t pteval;
+
+ if (pgd_addr_invalid(address))
+ return 0;
+
+ pgd += pgd_index(address);
+ pud = pud_offset(pgd, address);
+ if (!pud || !pud_present(*pud))
+ return 0;
+ pmd = pmd_offset(pud, address);
+ if (!pmd || !pmd_present(*pmd))
+ return 0;
+ pte = pmd_huge_page(*pmd) ? ((pte_t *)pmd) :
+ pte_offset_kernel(pmd, address);
+ pteval = *pte;
+ if (pte_migrating(pteval)) {
+ if (in_nmi() && search_exception_tables(pc))
+ return 0;
+ wait_for_migration(pte);
+ return 1;
+ }
+
+ if (!is_kernel_mode || !pte_present(pteval))
+ return 0;
+ if (fault_num == INT_ITLB_MISS) {
+ if (pte_exec(pteval))
+ return 1;
+ } else if (write) {
+ if (pte_write(pteval))
+ return 1;
+ } else {
+ if (pte_read(pteval))
+ return 1;
+ }
+
+ return 0;
+}
+
+/*
+ * This routine is responsible for faulting in user pages.
+ * It passes the work off to one of the appropriate routines.
+ * It returns true if the fault was successfully handled.
+ */
+static int handle_page_fault(struct pt_regs *regs,
+ int fault_num,
+ int is_page_fault,
+ unsigned long address,
+ int write)
+{
+ struct task_struct *tsk;
+ struct mm_struct *mm;
+ struct vm_area_struct *vma;
+ unsigned long stack_offset;
+ int fault;
+ int si_code;
+ int is_kernel_mode;
+ pgd_t *pgd;
+
+ /* on TILE, protection faults are always writes */
+ if (!is_page_fault)
+ write = 1;
+
+ is_kernel_mode = (EX1_PL(regs->ex1) != USER_PL);
+
+ tsk = validate_current();
+
+ /*
+ * Check to see if we might be overwriting the stack, and bail
+ * out if so. The page fault code is a relatively likely
+ * place to get trapped in an infinite regress, and once we
+ * overwrite the whole stack, it becomes very hard to recover.
+ */
+ stack_offset = stack_pointer & (THREAD_SIZE-1);
+ if (stack_offset < THREAD_SIZE / 8) {
+ pr_alert("Potential stack overrun: sp %#lx\n",
+ stack_pointer);
+ show_regs(regs);
+ pr_alert("Killing current process %d/%s\n",
+ tsk->pid, tsk->comm);
+ do_group_exit(SIGKILL);
+ }
+
+ /*
+ * Early on, we need to check for migrating PTE entries;
+ * see homecache.c. If we find a migrating PTE, we wait until
+ * the backing page claims to be done migrating, then we proceed.
+ * For kernel PTEs, we rewrite the PTE and return and retry.
+ * Otherwise, we treat the fault like a normal "no PTE" fault,
+ * rather than trying to patch up the existing PTE.
+ */
+ pgd = get_current_pgd();
+ if (handle_migrating_pte(pgd, fault_num, address, regs->pc,
+ is_kernel_mode, write))
+ return 1;
+
+ si_code = SEGV_MAPERR;
+
+ /*
+ * We fault-in kernel-space virtual memory on-demand. The
+ * 'reference' page table is init_mm.pgd.
+ *
+ * NOTE! We MUST NOT take any locks for this case. We may
+ * be in an interrupt or a critical region, and should
+ * only copy the information from the master page table,
+ * nothing more.
+ *
+ * This verifies that the fault happens in kernel space
+ * and that the fault was not a protection fault.
+ */
+ if (unlikely(address >= TASK_SIZE &&
+ !is_arch_mappable_range(address, 0))) {
+ if (is_kernel_mode && is_page_fault &&
+ vmalloc_fault(pgd, address) >= 0)
+ return 1;
+ /*
+ * Don't take the mm semaphore here. If we fixup a prefetch
+ * fault we could otherwise deadlock.
+ */
+ mm = NULL; /* happy compiler */
+ vma = NULL;
+ goto bad_area_nosemaphore;
+ }
+
+ /*
+ * If we're trying to touch user-space addresses, we must
+ * be either at PL0, or else with interrupts enabled in the
+ * kernel, so either way we can re-enable interrupts here
+ * unless we are doing atomic access to user space with
+ * interrupts disabled.
+ */
+ if (!(regs->flags & PT_FLAGS_DISABLE_IRQ))
+ local_irq_enable();
+
+ mm = tsk->mm;
+
+ /*
+ * If we're in an interrupt, have no user context or are running in an
+ * atomic region then we must not take the fault.
+ */
+ if (in_atomic() || !mm) {
+ vma = NULL; /* happy compiler */
+ goto bad_area_nosemaphore;
+ }
+
+ /*
+ * When running in the kernel we expect faults to occur only to
+ * addresses in user space. All other faults represent errors in the
+ * kernel and should generate an OOPS. Unfortunately, in the case of an
+ * erroneous fault occurring in a code path which already holds mmap_sem
+ * we will deadlock attempting to validate the fault against the
+ * address space. Luckily the kernel only validly references user
+ * space from well defined areas of code, which are listed in the
+ * exceptions table.
+ *
+ * As the vast majority of faults will be valid we will only perform
+ * the source reference check when there is a possibility of a deadlock.
+ * Attempt to lock the address space, if we cannot we then validate the
+ * source. If this is invalid we can skip the address space check,
+ * thus avoiding the deadlock.
+ */
+ if (!down_read_trylock(&mm->mmap_sem)) {
+ if (is_kernel_mode &&
+ !search_exception_tables(regs->pc)) {
+ vma = NULL; /* happy compiler */
+ goto bad_area_nosemaphore;
+ }
+ down_read(&mm->mmap_sem);
+ }
+
+ vma = find_vma(mm, address);
+ if (!vma)
+ goto bad_area;
+ if (vma->vm_start <= address)
+ goto good_area;
+ if (!(vma->vm_flags & VM_GROWSDOWN))
+ goto bad_area;
+ if (regs->sp < PAGE_OFFSET) {
+ /*
+ * accessing the stack below sp is always a bug.
+ */
+ if (address < regs->sp)
+ goto bad_area;
+ }
+ if (expand_stack(vma, address))
+ goto bad_area;
+
+/*
+ * Ok, we have a good vm_area for this memory access, so
+ * we can handle it..
+ */
+good_area:
+ si_code = SEGV_ACCERR;
+ if (fault_num == INT_ITLB_MISS) {
+ if (!(vma->vm_flags & VM_EXEC))
+ goto bad_area;
+ } else if (write) {
+#ifdef TEST_VERIFY_AREA
+ if (!is_page_fault && regs->cs == KERNEL_CS)
+ pr_err("WP fault at "REGFMT"\n", regs->eip);
+#endif
+ if (!(vma->vm_flags & VM_WRITE))
+ goto bad_area;
+ } else {
+ if (!is_page_fault || !(vma->vm_flags & VM_READ))
+ goto bad_area;
+ }
+
+ survive:
+ /*
+ * If for any reason at all we couldn't handle the fault,
+ * make sure we exit gracefully rather than endlessly redo
+ * the fault.
+ */
+ fault = handle_mm_fault(mm, vma, address, write);
+ if (unlikely(fault & VM_FAULT_ERROR)) {
+ if (fault & VM_FAULT_OOM)
+ goto out_of_memory;
+ else if (fault & VM_FAULT_SIGBUS)
+ goto do_sigbus;
+ BUG();
+ }
+ if (fault & VM_FAULT_MAJOR)
+ tsk->maj_flt++;
+ else
+ tsk->min_flt++;
+
+#if CHIP_HAS_TILE_DMA() || CHIP_HAS_SN_PROC()
+ /*
+ * If this was an asynchronous fault,
+ * restart the appropriate engine.
+ */
+ switch (fault_num) {
+#if CHIP_HAS_TILE_DMA()
+ case INT_DMATLB_MISS:
+ case INT_DMATLB_MISS_DWNCL:
+ case INT_DMATLB_ACCESS:
+ case INT_DMATLB_ACCESS_DWNCL:
+ __insn_mtspr(SPR_DMA_CTR, SPR_DMA_CTR__REQUEST_MASK);
+ break;
+#endif
+#if CHIP_HAS_SN_PROC()
+ case INT_SNITLB_MISS:
+ case INT_SNITLB_MISS_DWNCL:
+ __insn_mtspr(SPR_SNCTL,
+ __insn_mfspr(SPR_SNCTL) &
+ ~SPR_SNCTL__FRZPROC_MASK);
+ break;
+#endif
+ }
+#endif
+
+ up_read(&mm->mmap_sem);
+ return 1;
+
+/*
+ * Something tried to access memory that isn't in our memory map..
+ * Fix it, but check if it's kernel or user first..
+ */
+bad_area:
+ up_read(&mm->mmap_sem);
+
+bad_area_nosemaphore:
+ /* User mode accesses just cause a SIGSEGV */
+ if (!is_kernel_mode) {
+ /*
+ * It's possible to have interrupts off here.
+ */
+ local_irq_enable();
+
+ force_sig_info_fault("segfault", SIGSEGV, si_code, address,
+ fault_num, tsk, regs);
+ return 0;
+ }
+
+no_context:
+ /* Are we prepared to handle this kernel fault? */
+ if (fixup_exception(regs))
+ return 0;
+
+/*
+ * Oops. The kernel tried to access some bad page. We'll have to
+ * terminate things with extreme prejudice.
+ */
+
+ bust_spinlocks(1);
+
+ /* FIXME: no lookup_address() yet */
+#ifdef SUPPORT_LOOKUP_ADDRESS
+ if (fault_num == INT_ITLB_MISS) {
+ pte_t *pte = lookup_address(address);
+
+ if (pte && pte_present(*pte) && !pte_exec_kernel(*pte))
+ pr_crit("kernel tried to execute"
+ " non-executable page - exploit attempt?"
+ " (uid: %d)\n", current->uid);
+ }
+#endif
+ if (address < PAGE_SIZE)
+ pr_alert("Unable to handle kernel NULL pointer dereference\n");
+ else
+ pr_alert("Unable to handle kernel paging request\n");
+ pr_alert(" at virtual address "REGFMT", pc "REGFMT"\n",
+ address, regs->pc);
+
+ show_regs(regs);
+
+ if (unlikely(tsk->pid < 2)) {
+ panic("Kernel page fault running %s!",
+ is_idle_task(tsk) ? "the idle task" : "init");
+ }
+
+ /*
+ * More FIXME: we should probably copy the i386 here and
+ * implement a generic die() routine. Not today.
+ */
+#ifdef SUPPORT_DIE
+ die("Oops", regs);
+#endif
+ bust_spinlocks(1);
+
+ do_group_exit(SIGKILL);
+
+/*
+ * We ran out of memory, or some other thing happened to us that made
+ * us unable to handle the page fault gracefully.
+ */
+out_of_memory:
+ up_read(&mm->mmap_sem);
+ if (is_global_init(tsk)) {
+ yield();
+ down_read(&mm->mmap_sem);
+ goto survive;
+ }
+ pr_alert("VM: killing process %s\n", tsk->comm);
+ if (!is_kernel_mode)
+ do_group_exit(SIGKILL);
+ goto no_context;
+
+do_sigbus:
+ up_read(&mm->mmap_sem);
+
+ /* Kernel mode? Handle exceptions or die */
+ if (is_kernel_mode)
+ goto no_context;
+
+ force_sig_info_fault("bus error", SIGBUS, BUS_ADRERR, address,
+ fault_num, tsk, regs);
+ return 0;
+}
+
+#ifndef __tilegx__
+
+/* We must release ICS before panicking or we won't get anywhere. */
+#define ics_panic(fmt, ...) do { \
+ __insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 0); \
+ panic(fmt, __VA_ARGS__); \
+} while (0)
+
+/*
+ * When we take an ITLB or DTLB fault or access violation in the
+ * supervisor while the critical section bit is set, the hypervisor is
+ * reluctant to write new values into the EX_CONTEXT_K_x registers,
+ * since that might indicate we have not yet squirreled the SPR
+ * contents away and can thus safely take a recursive interrupt.
+ * Accordingly, the hypervisor passes us the PC via SYSTEM_SAVE_K_2.
+ *
+ * Note that this routine is called before homecache_tlb_defer_enter(),
+ * which means that we can properly unlock any atomics that might
+ * be used there (good), but also means we must be very sensitive
+ * to not touch any data structures that might be located in memory
+ * that could migrate, as we could be entering the kernel on a dataplane
+ * cpu that has been deferring kernel TLB updates. This means, for
+ * example, that we can't migrate init_mm or its pgd.
+ */
+struct intvec_state do_page_fault_ics(struct pt_regs *regs, int fault_num,
+ unsigned long address,
+ unsigned long info)
+{
+ unsigned long pc = info & ~1;
+ int write = info & 1;
+ pgd_t *pgd = get_current_pgd();
+
+ /* Retval is 1 at first since we will handle the fault fully. */
+ struct intvec_state state = {
+ do_page_fault, fault_num, address, write, 1
+ };
+
+ /* Validate that we are plausibly in the right routine. */
+ if ((pc & 0x7) != 0 || pc < PAGE_OFFSET ||
+ (fault_num != INT_DTLB_MISS &&
+ fault_num != INT_DTLB_ACCESS)) {
+ unsigned long old_pc = regs->pc;
+ regs->pc = pc;
+ ics_panic("Bad ICS page fault args:"
+ " old PC %#lx, fault %d/%d at %#lx\n",
+ old_pc, fault_num, write, address);
+ }
+
+ /* We might be faulting on a vmalloc page, so check that first. */
+ if (fault_num != INT_DTLB_ACCESS && vmalloc_fault(pgd, address) >= 0)
+ return state;
+
+ /*
+ * If we faulted with ICS set in sys_cmpxchg, we are providing
+ * a user syscall service that should generate a signal on
+ * fault. We didn't set up a kernel stack on initial entry to
+ * sys_cmpxchg, but instead had one set up by the fault, which
+ * (because sys_cmpxchg never releases ICS) came to us via the
+ * SYSTEM_SAVE_K_2 mechanism, and thus EX_CONTEXT_K_[01] are
+ * still referencing the original user code. We release the
+ * atomic lock and rewrite pt_regs so that it appears that we
+ * came from user-space directly, and after we finish the
+ * fault we'll go back to user space and re-issue the swint.
+ * This way the backtrace information is correct if we need to
+ * emit a stack dump at any point while handling this.
+ *
+ * Must match register use in sys_cmpxchg().
+ */
+ if (pc >= (unsigned long) sys_cmpxchg &&
+ pc < (unsigned long) __sys_cmpxchg_end) {
+#ifdef CONFIG_SMP
+ /* Don't unlock before we could have locked. */
+ if (pc >= (unsigned long)__sys_cmpxchg_grab_lock) {
+ int *lock_ptr = (int *)(regs->regs[ATOMIC_LOCK_REG]);
+ __atomic_fault_unlock(lock_ptr);
+ }
+#endif
+ regs->sp = regs->regs[27];
+ }
+
+ /*
+ * We can also fault in the atomic assembly, in which
+ * case we use the exception table to do the first-level fixup.
+ * We may re-fixup again in the real fault handler if it
+ * turns out the faulting address is just bad, and not,
+ * for example, migrating.
+ */
+ else if (pc >= (unsigned long) __start_atomic_asm_code &&
+ pc < (unsigned long) __end_atomic_asm_code) {
+ const struct exception_table_entry *fixup;
+#ifdef CONFIG_SMP
+ /* Unlock the atomic lock. */
+ int *lock_ptr = (int *)(regs->regs[ATOMIC_LOCK_REG]);
+ __atomic_fault_unlock(lock_ptr);
+#endif
+ fixup = search_exception_tables(pc);
+ if (!fixup)
+ ics_panic("ICS atomic fault not in table:"
+ " PC %#lx, fault %d", pc, fault_num);
+ regs->pc = fixup->fixup;
+ regs->ex1 = PL_ICS_EX1(KERNEL_PL, 0);
+ }
+
+ /*
+ * Now that we have released the atomic lock (if necessary),
+ * it's safe to spin if the PTE that caused the fault was migrating.
+ */
+ if (fault_num == INT_DTLB_ACCESS)
+ write = 1;
+ if (handle_migrating_pte(pgd, fault_num, address, pc, 1, write))
+ return state;
+
+ /* Return zero so that we continue on with normal fault handling. */
+ state.retval = 0;
+ return state;
+}
+
+#endif /* !__tilegx__ */
+
+/*
+ * This routine handles page faults. It determines the address, and the
+ * problem, and then passes it handle_page_fault() for normal DTLB and
+ * ITLB issues, and for DMA or SN processor faults when we are in user
+ * space. For the latter, if we're in kernel mode, we just save the
+ * interrupt away appropriately and return immediately. We can't do
+ * page faults for user code while in kernel mode.
+ */
+void do_page_fault(struct pt_regs *regs, int fault_num,
+ unsigned long address, unsigned long write)
+{
+ int is_page_fault;
+
+ /* This case should have been handled by do_page_fault_ics(). */
+ BUG_ON(write & ~1);
+
+#if CHIP_HAS_TILE_DMA()
+ /*
+ * If it's a DMA fault, suspend the transfer while we're
+ * handling the miss; we'll restart after it's handled. If we
+ * don't suspend, it's possible that this process could swap
+ * out and back in, and restart the engine since the DMA is
+ * still 'running'.
+ */
+ if (fault_num == INT_DMATLB_MISS ||
+ fault_num == INT_DMATLB_ACCESS ||
+ fault_num == INT_DMATLB_MISS_DWNCL ||
+ fault_num == INT_DMATLB_ACCESS_DWNCL) {
+ __insn_mtspr(SPR_DMA_CTR, SPR_DMA_CTR__SUSPEND_MASK);
+ while (__insn_mfspr(SPR_DMA_USER_STATUS) &
+ SPR_DMA_STATUS__BUSY_MASK)
+ ;
+ }
+#endif
+
+ /* Validate fault num and decide if this is a first-time page fault. */
+ switch (fault_num) {
+ case INT_ITLB_MISS:
+ case INT_DTLB_MISS:
+#if CHIP_HAS_TILE_DMA()
+ case INT_DMATLB_MISS:
+ case INT_DMATLB_MISS_DWNCL:
+#endif
+#if CHIP_HAS_SN_PROC()
+ case INT_SNITLB_MISS:
+ case INT_SNITLB_MISS_DWNCL:
+#endif
+ is_page_fault = 1;
+ break;
+
+ case INT_DTLB_ACCESS:
+#if CHIP_HAS_TILE_DMA()
+ case INT_DMATLB_ACCESS:
+ case INT_DMATLB_ACCESS_DWNCL:
+#endif
+ is_page_fault = 0;
+ break;
+
+ default:
+ panic("Bad fault number %d in do_page_fault", fault_num);
+ }
+
+#if CHIP_HAS_TILE_DMA() || CHIP_HAS_SN_PROC()
+ if (EX1_PL(regs->ex1) != USER_PL) {
+ struct async_tlb *async;
+ switch (fault_num) {
+#if CHIP_HAS_TILE_DMA()
+ case INT_DMATLB_MISS:
+ case INT_DMATLB_ACCESS:
+ case INT_DMATLB_MISS_DWNCL:
+ case INT_DMATLB_ACCESS_DWNCL:
+ async = &current->thread.dma_async_tlb;
+ break;
+#endif
+#if CHIP_HAS_SN_PROC()
+ case INT_SNITLB_MISS:
+ case INT_SNITLB_MISS_DWNCL:
+ async = &current->thread.sn_async_tlb;
+ break;
+#endif
+ default:
+ async = NULL;
+ }
+ if (async) {
+
+ /*
+ * No vmalloc check required, so we can allow
+ * interrupts immediately at this point.
+ */
+ local_irq_enable();
+
+ set_thread_flag(TIF_ASYNC_TLB);
+ if (async->fault_num != 0) {
+ panic("Second async fault %d;"
+ " old fault was %d (%#lx/%ld)",
+ fault_num, async->fault_num,
+ address, write);
+ }
+ BUG_ON(fault_num == 0);
+ async->fault_num = fault_num;
+ async->is_fault = is_page_fault;
+ async->is_write = write;
+ async->address = address;
+ return;
+ }
+ }
+#endif
+
+ handle_page_fault(regs, fault_num, is_page_fault, address, write);
+}
+
+
+#if CHIP_HAS_TILE_DMA() || CHIP_HAS_SN_PROC()
+/*
+ * Check an async_tlb structure to see if a deferred fault is waiting,
+ * and if so pass it to the page-fault code.
+ */
+static void handle_async_page_fault(struct pt_regs *regs,
+ struct async_tlb *async)
+{
+ if (async->fault_num) {
+ /*
+ * Clear async->fault_num before calling the page-fault
+ * handler so that if we re-interrupt before returning
+ * from the function we have somewhere to put the
+ * information from the new interrupt.
+ */
+ int fault_num = async->fault_num;
+ async->fault_num = 0;
+ handle_page_fault(regs, fault_num, async->is_fault,
+ async->address, async->is_write);
+ }
+}
+
+/*
+ * This routine effectively re-issues asynchronous page faults
+ * when we are returning to user space.
+ */
+void do_async_page_fault(struct pt_regs *regs)
+{
+ /*
+ * Clear thread flag early. If we re-interrupt while processing
+ * code here, we will reset it and recall this routine before
+ * returning to user space.
+ */
+ clear_thread_flag(TIF_ASYNC_TLB);
+
+#if CHIP_HAS_TILE_DMA()
+ handle_async_page_fault(regs, &current->thread.dma_async_tlb);
+#endif
+#if CHIP_HAS_SN_PROC()
+ handle_async_page_fault(regs, &current->thread.sn_async_tlb);
+#endif
+}
+#endif /* CHIP_HAS_TILE_DMA() || CHIP_HAS_SN_PROC() */
+
+
+void vmalloc_sync_all(void)
+{
+#ifdef __tilegx__
+ /* Currently all L1 kernel pmd's are static and shared. */
+ BUG_ON(pgd_index(VMALLOC_END) != pgd_index(VMALLOC_START));
+#else
+ /*
+ * Note that races in the updates of insync and start aren't
+ * problematic: insync can only get set bits added, and updates to
+ * start are only improving performance (without affecting correctness
+ * if undone).
+ */
+ static DECLARE_BITMAP(insync, PTRS_PER_PGD);
+ static unsigned long start = PAGE_OFFSET;
+ unsigned long address;
+
+ BUILD_BUG_ON(PAGE_OFFSET & ~PGDIR_MASK);
+ for (address = start; address >= PAGE_OFFSET; address += PGDIR_SIZE) {
+ if (!test_bit(pgd_index(address), insync)) {
+ unsigned long flags;
+ struct list_head *pos;
+
+ spin_lock_irqsave(&pgd_lock, flags);
+ list_for_each(pos, &pgd_list)
+ if (!vmalloc_sync_one(list_to_pgd(pos),
+ address)) {
+ /* Must be at first entry in list. */
+ BUG_ON(pos != pgd_list.next);
+ break;
+ }
+ spin_unlock_irqrestore(&pgd_lock, flags);
+ if (pos != pgd_list.next)
+ set_bit(pgd_index(address), insync);
+ }
+ if (address == start && test_bit(pgd_index(address), insync))
+ start = address + PGDIR_SIZE;
+ }
+#endif
+}
diff --git a/arch/tile/mm/highmem.c b/arch/tile/mm/highmem.c
new file mode 100644
index 00000000..ef8e5a62
--- /dev/null
+++ b/arch/tile/mm/highmem.c
@@ -0,0 +1,290 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/highmem.h>
+#include <linux/module.h>
+#include <linux/pagemap.h>
+#include <asm/homecache.h>
+
+#define kmap_get_pte(vaddr) \
+ pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(vaddr), (vaddr)),\
+ (vaddr)), (vaddr))
+
+
+void *kmap(struct page *page)
+{
+ void *kva;
+ unsigned long flags;
+ pte_t *ptep;
+
+ might_sleep();
+ if (!PageHighMem(page))
+ return page_address(page);
+ kva = kmap_high(page);
+
+ /*
+ * Rewrite the PTE under the lock. This ensures that the page
+ * is not currently migrating.
+ */
+ ptep = kmap_get_pte((unsigned long)kva);
+ flags = homecache_kpte_lock();
+ set_pte_at(&init_mm, kva, ptep, mk_pte(page, page_to_kpgprot(page)));
+ homecache_kpte_unlock(flags);
+
+ return kva;
+}
+EXPORT_SYMBOL(kmap);
+
+void kunmap(struct page *page)
+{
+ if (in_interrupt())
+ BUG();
+ if (!PageHighMem(page))
+ return;
+ kunmap_high(page);
+}
+EXPORT_SYMBOL(kunmap);
+
+/*
+ * Describe a single atomic mapping of a page on a given cpu at a
+ * given address, and allow it to be linked into a list.
+ */
+struct atomic_mapped_page {
+ struct list_head list;
+ struct page *page;
+ int cpu;
+ unsigned long va;
+};
+
+static spinlock_t amp_lock = __SPIN_LOCK_UNLOCKED(&amp_lock);
+static struct list_head amp_list = LIST_HEAD_INIT(amp_list);
+
+/*
+ * Combining this structure with a per-cpu declaration lets us give
+ * each cpu an atomic_mapped_page structure per type.
+ */
+struct kmap_amps {
+ struct atomic_mapped_page per_type[KM_TYPE_NR];
+};
+static DEFINE_PER_CPU(struct kmap_amps, amps);
+
+/*
+ * Add a page and va, on this cpu, to the list of kmap_atomic pages,
+ * and write the new pte to memory. Writing the new PTE under the
+ * lock guarantees that it is either on the list before migration starts
+ * (if we won the race), or set_pte() sets the migrating bit in the PTE
+ * (if we lost the race). And doing it under the lock guarantees
+ * that when kmap_atomic_fix_one_pte() comes along, it finds a valid
+ * PTE in memory, iff the mapping is still on the amp_list.
+ *
+ * Finally, doing it under the lock lets us safely examine the page
+ * to see if it is immutable or not, for the generic kmap_atomic() case.
+ * If we examine it earlier we are exposed to a race where it looks
+ * writable earlier, but becomes immutable before we write the PTE.
+ */
+static void kmap_atomic_register(struct page *page, enum km_type type,
+ unsigned long va, pte_t *ptep, pte_t pteval)
+{
+ unsigned long flags;
+ struct atomic_mapped_page *amp;
+
+ flags = homecache_kpte_lock();
+ spin_lock(&amp_lock);
+
+ /* With interrupts disabled, now fill in the per-cpu info. */
+ amp = &__get_cpu_var(amps).per_type[type];
+ amp->page = page;
+ amp->cpu = smp_processor_id();
+ amp->va = va;
+
+ /* For generic kmap_atomic(), choose the PTE writability now. */
+ if (!pte_read(pteval))
+ pteval = mk_pte(page, page_to_kpgprot(page));
+
+ list_add(&amp->list, &amp_list);
+ set_pte(ptep, pteval);
+ arch_flush_lazy_mmu_mode();
+
+ spin_unlock(&amp_lock);
+ homecache_kpte_unlock(flags);
+}
+
+/*
+ * Remove a page and va, on this cpu, from the list of kmap_atomic pages.
+ * Linear-time search, but we count on the lists being short.
+ * We don't need to adjust the PTE under the lock (as opposed to the
+ * kmap_atomic_register() case), since we're just unconditionally
+ * zeroing the PTE after it's off the list.
+ */
+static void kmap_atomic_unregister(struct page *page, unsigned long va)
+{
+ unsigned long flags;
+ struct atomic_mapped_page *amp;
+ int cpu = smp_processor_id();
+ spin_lock_irqsave(&amp_lock, flags);
+ list_for_each_entry(amp, &amp_list, list) {
+ if (amp->page == page && amp->cpu == cpu && amp->va == va)
+ break;
+ }
+ BUG_ON(&amp->list == &amp_list);
+ list_del(&amp->list);
+ spin_unlock_irqrestore(&amp_lock, flags);
+}
+
+/* Helper routine for kmap_atomic_fix_kpte(), below. */
+static void kmap_atomic_fix_one_kpte(struct atomic_mapped_page *amp,
+ int finished)
+{
+ pte_t *ptep = kmap_get_pte(amp->va);
+ if (!finished) {
+ set_pte(ptep, pte_mkmigrate(*ptep));
+ flush_remote(0, 0, NULL, amp->va, PAGE_SIZE, PAGE_SIZE,
+ cpumask_of(amp->cpu), NULL, 0);
+ } else {
+ /*
+ * Rewrite a default kernel PTE for this page.
+ * We rely on the fact that set_pte() writes the
+ * present+migrating bits last.
+ */
+ pte_t pte = mk_pte(amp->page, page_to_kpgprot(amp->page));
+ set_pte(ptep, pte);
+ }
+}
+
+/*
+ * This routine is a helper function for homecache_fix_kpte(); see
+ * its comments for more information on the "finished" argument here.
+ *
+ * Note that we hold the lock while doing the remote flushes, which
+ * will stall any unrelated cpus trying to do kmap_atomic operations.
+ * We could just update the PTEs under the lock, and save away copies
+ * of the structs (or just the va+cpu), then flush them after we
+ * release the lock, but it seems easier just to do it all under the lock.
+ */
+void kmap_atomic_fix_kpte(struct page *page, int finished)
+{
+ struct atomic_mapped_page *amp;
+ unsigned long flags;
+ spin_lock_irqsave(&amp_lock, flags);
+ list_for_each_entry(amp, &amp_list, list) {
+ if (amp->page == page)
+ kmap_atomic_fix_one_kpte(amp, finished);
+ }
+ spin_unlock_irqrestore(&amp_lock, flags);
+}
+
+/*
+ * kmap_atomic/kunmap_atomic is significantly faster than kmap/kunmap
+ * because the kmap code must perform a global TLB invalidation when
+ * the kmap pool wraps.
+ *
+ * Note that they may be slower than on x86 (etc.) because unlike on
+ * those platforms, we do have to take a global lock to map and unmap
+ * pages on Tile (see above).
+ *
+ * When holding an atomic kmap is is not legal to sleep, so atomic
+ * kmaps are appropriate for short, tight code paths only.
+ */
+void *kmap_atomic_prot(struct page *page, pgprot_t prot)
+{
+ unsigned long vaddr;
+ int idx, type;
+ pte_t *pte;
+
+ /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
+ pagefault_disable();
+
+ /* Avoid icache flushes by disallowing atomic executable mappings. */
+ BUG_ON(pte_exec(prot));
+
+ if (!PageHighMem(page))
+ return page_address(page);
+
+ type = kmap_atomic_idx_push();
+ idx = type + KM_TYPE_NR*smp_processor_id();
+ vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
+ pte = kmap_get_pte(vaddr);
+ BUG_ON(!pte_none(*pte));
+
+ /* Register that this page is mapped atomically on this cpu. */
+ kmap_atomic_register(page, type, vaddr, pte, mk_pte(page, prot));
+
+ return (void *)vaddr;
+}
+EXPORT_SYMBOL(kmap_atomic_prot);
+
+void *kmap_atomic(struct page *page)
+{
+ /* PAGE_NONE is a magic value that tells us to check immutability. */
+ return kmap_atomic_prot(page, PAGE_NONE);
+}
+EXPORT_SYMBOL(kmap_atomic);
+
+void __kunmap_atomic(void *kvaddr)
+{
+ unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK;
+
+ if (vaddr >= __fix_to_virt(FIX_KMAP_END) &&
+ vaddr <= __fix_to_virt(FIX_KMAP_BEGIN)) {
+ pte_t *pte = kmap_get_pte(vaddr);
+ pte_t pteval = *pte;
+ int idx, type;
+
+ type = kmap_atomic_idx();
+ idx = type + KM_TYPE_NR*smp_processor_id();
+
+ /*
+ * Force other mappings to Oops if they try to access this pte
+ * without first remapping it. Keeping stale mappings around
+ * is a bad idea.
+ */
+ BUG_ON(!pte_present(pteval) && !pte_migrating(pteval));
+ kmap_atomic_unregister(pte_page(pteval), vaddr);
+ kpte_clear_flush(pte, vaddr);
+ kmap_atomic_idx_pop();
+ } else {
+ /* Must be a lowmem page */
+ BUG_ON(vaddr < PAGE_OFFSET);
+ BUG_ON(vaddr >= (unsigned long)high_memory);
+ }
+
+ arch_flush_lazy_mmu_mode();
+ pagefault_enable();
+}
+EXPORT_SYMBOL(__kunmap_atomic);
+
+/*
+ * This API is supposed to allow us to map memory without a "struct page".
+ * Currently we don't support this, though this may change in the future.
+ */
+void *kmap_atomic_pfn(unsigned long pfn)
+{
+ return kmap_atomic(pfn_to_page(pfn));
+}
+void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot)
+{
+ return kmap_atomic_prot(pfn_to_page(pfn), prot);
+}
+
+struct page *kmap_atomic_to_page(void *ptr)
+{
+ pte_t *pte;
+ unsigned long vaddr = (unsigned long)ptr;
+
+ if (vaddr < FIXADDR_START)
+ return virt_to_page(ptr);
+
+ pte = kmap_get_pte(vaddr);
+ return pte_page(*pte);
+}
diff --git a/arch/tile/mm/homecache.c b/arch/tile/mm/homecache.c
new file mode 100644
index 00000000..499f7377
--- /dev/null
+++ b/arch/tile/mm/homecache.c
@@ -0,0 +1,461 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ *
+ * This code maintains the "home" for each page in the system.
+ */
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/spinlock.h>
+#include <linux/list.h>
+#include <linux/bootmem.h>
+#include <linux/rmap.h>
+#include <linux/pagemap.h>
+#include <linux/mutex.h>
+#include <linux/interrupt.h>
+#include <linux/sysctl.h>
+#include <linux/pagevec.h>
+#include <linux/ptrace.h>
+#include <linux/timex.h>
+#include <linux/cache.h>
+#include <linux/smp.h>
+#include <linux/module.h>
+
+#include <asm/page.h>
+#include <asm/sections.h>
+#include <asm/tlbflush.h>
+#include <asm/pgalloc.h>
+#include <asm/homecache.h>
+
+#include <arch/sim.h>
+
+#include "migrate.h"
+
+
+#if CHIP_HAS_COHERENT_LOCAL_CACHE()
+
+/*
+ * The noallocl2 option suppresses all use of the L2 cache to cache
+ * locally from a remote home. There's no point in using it if we
+ * don't have coherent local caching, though.
+ */
+static int __write_once noallocl2;
+static int __init set_noallocl2(char *str)
+{
+ noallocl2 = 1;
+ return 0;
+}
+early_param("noallocl2", set_noallocl2);
+
+#else
+
+#define noallocl2 0
+
+#endif
+
+/* Provide no-op versions of these routines to keep flush_remote() cleaner. */
+#define mark_caches_evicted_start() 0
+#define mark_caches_evicted_finish(mask, timestamp) do {} while (0)
+
+
+/*
+ * Update the irq_stat for cpus that we are going to interrupt
+ * with TLB or cache flushes. Also handle removing dataplane cpus
+ * from the TLB flush set, and setting dataplane_tlb_state instead.
+ */
+static void hv_flush_update(const struct cpumask *cache_cpumask,
+ struct cpumask *tlb_cpumask,
+ unsigned long tlb_va, unsigned long tlb_length,
+ HV_Remote_ASID *asids, int asidcount)
+{
+ struct cpumask mask;
+ int i, cpu;
+
+ cpumask_clear(&mask);
+ if (cache_cpumask)
+ cpumask_or(&mask, &mask, cache_cpumask);
+ if (tlb_cpumask && tlb_length) {
+ cpumask_or(&mask, &mask, tlb_cpumask);
+ }
+
+ for (i = 0; i < asidcount; ++i)
+ cpumask_set_cpu(asids[i].y * smp_width + asids[i].x, &mask);
+
+ /*
+ * Don't bother to update atomically; losing a count
+ * here is not that critical.
+ */
+ for_each_cpu(cpu, &mask)
+ ++per_cpu(irq_stat, cpu).irq_hv_flush_count;
+}
+
+/*
+ * This wrapper function around hv_flush_remote() does several things:
+ *
+ * - Provides a return value error-checking panic path, since
+ * there's never any good reason for hv_flush_remote() to fail.
+ * - Accepts a 32-bit PFN rather than a 64-bit PA, which generally
+ * is the type that Linux wants to pass around anyway.
+ * - Centralizes the mark_caches_evicted() handling.
+ * - Canonicalizes that lengths of zero make cpumasks NULL.
+ * - Handles deferring TLB flushes for dataplane tiles.
+ * - Tracks remote interrupts in the per-cpu irq_cpustat_t.
+ *
+ * Note that we have to wait until the cache flush completes before
+ * updating the per-cpu last_cache_flush word, since otherwise another
+ * concurrent flush can race, conclude the flush has already
+ * completed, and start to use the page while it's still dirty
+ * remotely (running concurrently with the actual evict, presumably).
+ */
+void flush_remote(unsigned long cache_pfn, unsigned long cache_control,
+ const struct cpumask *cache_cpumask_orig,
+ HV_VirtAddr tlb_va, unsigned long tlb_length,
+ unsigned long tlb_pgsize,
+ const struct cpumask *tlb_cpumask_orig,
+ HV_Remote_ASID *asids, int asidcount)
+{
+ int rc;
+ int timestamp = 0; /* happy compiler */
+ struct cpumask cache_cpumask_copy, tlb_cpumask_copy;
+ struct cpumask *cache_cpumask, *tlb_cpumask;
+ HV_PhysAddr cache_pa;
+ char cache_buf[NR_CPUS*5], tlb_buf[NR_CPUS*5];
+
+ mb(); /* provided just to simplify "magic hypervisor" mode */
+
+ /*
+ * Canonicalize and copy the cpumasks.
+ */
+ if (cache_cpumask_orig && cache_control) {
+ cpumask_copy(&cache_cpumask_copy, cache_cpumask_orig);
+ cache_cpumask = &cache_cpumask_copy;
+ } else {
+ cpumask_clear(&cache_cpumask_copy);
+ cache_cpumask = NULL;
+ }
+ if (cache_cpumask == NULL)
+ cache_control = 0;
+ if (tlb_cpumask_orig && tlb_length) {
+ cpumask_copy(&tlb_cpumask_copy, tlb_cpumask_orig);
+ tlb_cpumask = &tlb_cpumask_copy;
+ } else {
+ cpumask_clear(&tlb_cpumask_copy);
+ tlb_cpumask = NULL;
+ }
+
+ hv_flush_update(cache_cpumask, tlb_cpumask, tlb_va, tlb_length,
+ asids, asidcount);
+ cache_pa = (HV_PhysAddr)cache_pfn << PAGE_SHIFT;
+ if (cache_control & HV_FLUSH_EVICT_L2)
+ timestamp = mark_caches_evicted_start();
+ rc = hv_flush_remote(cache_pa, cache_control,
+ cpumask_bits(cache_cpumask),
+ tlb_va, tlb_length, tlb_pgsize,
+ cpumask_bits(tlb_cpumask),
+ asids, asidcount);
+ if (cache_control & HV_FLUSH_EVICT_L2)
+ mark_caches_evicted_finish(cache_cpumask, timestamp);
+ if (rc == 0)
+ return;
+ cpumask_scnprintf(cache_buf, sizeof(cache_buf), &cache_cpumask_copy);
+ cpumask_scnprintf(tlb_buf, sizeof(tlb_buf), &tlb_cpumask_copy);
+
+ pr_err("hv_flush_remote(%#llx, %#lx, %p [%s],"
+ " %#lx, %#lx, %#lx, %p [%s], %p, %d) = %d\n",
+ cache_pa, cache_control, cache_cpumask, cache_buf,
+ (unsigned long)tlb_va, tlb_length, tlb_pgsize,
+ tlb_cpumask, tlb_buf,
+ asids, asidcount, rc);
+ panic("Unsafe to continue.");
+}
+
+void flush_remote_page(struct page *page, int order)
+{
+ int i, pages = (1 << order);
+ for (i = 0; i < pages; ++i, ++page) {
+ void *p = kmap_atomic(page);
+ int hfh = 0;
+ int home = page_home(page);
+#if CHIP_HAS_CBOX_HOME_MAP()
+ if (home == PAGE_HOME_HASH)
+ hfh = 1;
+ else
+#endif
+ BUG_ON(home < 0 || home >= NR_CPUS);
+ finv_buffer_remote(p, PAGE_SIZE, hfh);
+ kunmap_atomic(p);
+ }
+}
+
+void homecache_evict(const struct cpumask *mask)
+{
+ flush_remote(0, HV_FLUSH_EVICT_L2, mask, 0, 0, 0, NULL, NULL, 0);
+}
+
+/*
+ * Return a mask of the cpus whose caches currently own these pages.
+ * The return value is whether the pages are all coherently cached
+ * (i.e. none are immutable, incoherent, or uncached).
+ */
+static int homecache_mask(struct page *page, int pages,
+ struct cpumask *home_mask)
+{
+ int i;
+ int cached_coherently = 1;
+ cpumask_clear(home_mask);
+ for (i = 0; i < pages; ++i) {
+ int home = page_home(&page[i]);
+ if (home == PAGE_HOME_IMMUTABLE ||
+ home == PAGE_HOME_INCOHERENT) {
+ cpumask_copy(home_mask, cpu_possible_mask);
+ return 0;
+ }
+#if CHIP_HAS_CBOX_HOME_MAP()
+ if (home == PAGE_HOME_HASH) {
+ cpumask_or(home_mask, home_mask, &hash_for_home_map);
+ continue;
+ }
+#endif
+ if (home == PAGE_HOME_UNCACHED) {
+ cached_coherently = 0;
+ continue;
+ }
+ BUG_ON(home < 0 || home >= NR_CPUS);
+ cpumask_set_cpu(home, home_mask);
+ }
+ return cached_coherently;
+}
+
+/*
+ * Return the passed length, or zero if it's long enough that we
+ * believe we should evict the whole L2 cache.
+ */
+static unsigned long cache_flush_length(unsigned long length)
+{
+ return (length >= CHIP_L2_CACHE_SIZE()) ? HV_FLUSH_EVICT_L2 : length;
+}
+
+/* Flush a page out of whatever cache(s) it is in. */
+void homecache_flush_cache(struct page *page, int order)
+{
+ int pages = 1 << order;
+ int length = cache_flush_length(pages * PAGE_SIZE);
+ unsigned long pfn = page_to_pfn(page);
+ struct cpumask home_mask;
+
+ homecache_mask(page, pages, &home_mask);
+ flush_remote(pfn, length, &home_mask, 0, 0, 0, NULL, NULL, 0);
+ sim_validate_lines_evicted(PFN_PHYS(pfn), pages * PAGE_SIZE);
+}
+
+
+/* Report the home corresponding to a given PTE. */
+static int pte_to_home(pte_t pte)
+{
+ if (hv_pte_get_nc(pte))
+ return PAGE_HOME_IMMUTABLE;
+ switch (hv_pte_get_mode(pte)) {
+ case HV_PTE_MODE_CACHE_TILE_L3:
+ return get_remote_cache_cpu(pte);
+ case HV_PTE_MODE_CACHE_NO_L3:
+ return PAGE_HOME_INCOHERENT;
+ case HV_PTE_MODE_UNCACHED:
+ return PAGE_HOME_UNCACHED;
+#if CHIP_HAS_CBOX_HOME_MAP()
+ case HV_PTE_MODE_CACHE_HASH_L3:
+ return PAGE_HOME_HASH;
+#endif
+ }
+ panic("Bad PTE %#llx\n", pte.val);
+}
+
+/* Update the home of a PTE if necessary (can also be used for a pgprot_t). */
+pte_t pte_set_home(pte_t pte, int home)
+{
+ /* Check for non-linear file mapping "PTEs" and pass them through. */
+ if (pte_file(pte))
+ return pte;
+
+#if CHIP_HAS_MMIO()
+ /* Check for MMIO mappings and pass them through. */
+ if (hv_pte_get_mode(pte) == HV_PTE_MODE_MMIO)
+ return pte;
+#endif
+
+
+ /*
+ * Only immutable pages get NC mappings. If we have a
+ * non-coherent PTE, but the underlying page is not
+ * immutable, it's likely the result of a forced
+ * caching setting running up against ptrace setting
+ * the page to be writable underneath. In this case,
+ * just keep the PTE coherent.
+ */
+ if (hv_pte_get_nc(pte) && home != PAGE_HOME_IMMUTABLE) {
+ pte = hv_pte_clear_nc(pte);
+ pr_err("non-immutable page incoherently referenced: %#llx\n",
+ pte.val);
+ }
+
+ switch (home) {
+
+ case PAGE_HOME_UNCACHED:
+ pte = hv_pte_set_mode(pte, HV_PTE_MODE_UNCACHED);
+ break;
+
+ case PAGE_HOME_INCOHERENT:
+ pte = hv_pte_set_mode(pte, HV_PTE_MODE_CACHE_NO_L3);
+ break;
+
+ case PAGE_HOME_IMMUTABLE:
+ /*
+ * We could home this page anywhere, since it's immutable,
+ * but by default just home it to follow "hash_default".
+ */
+ BUG_ON(hv_pte_get_writable(pte));
+ if (pte_get_forcecache(pte)) {
+ /* Upgrade "force any cpu" to "No L3" for immutable. */
+ if (hv_pte_get_mode(pte) == HV_PTE_MODE_CACHE_TILE_L3
+ && pte_get_anyhome(pte)) {
+ pte = hv_pte_set_mode(pte,
+ HV_PTE_MODE_CACHE_NO_L3);
+ }
+ } else
+#if CHIP_HAS_CBOX_HOME_MAP()
+ if (hash_default)
+ pte = hv_pte_set_mode(pte, HV_PTE_MODE_CACHE_HASH_L3);
+ else
+#endif
+ pte = hv_pte_set_mode(pte, HV_PTE_MODE_CACHE_NO_L3);
+ pte = hv_pte_set_nc(pte);
+ break;
+
+#if CHIP_HAS_CBOX_HOME_MAP()
+ case PAGE_HOME_HASH:
+ pte = hv_pte_set_mode(pte, HV_PTE_MODE_CACHE_HASH_L3);
+ break;
+#endif
+
+ default:
+ BUG_ON(home < 0 || home >= NR_CPUS ||
+ !cpu_is_valid_lotar(home));
+ pte = hv_pte_set_mode(pte, HV_PTE_MODE_CACHE_TILE_L3);
+ pte = set_remote_cache_cpu(pte, home);
+ break;
+ }
+
+#if CHIP_HAS_NC_AND_NOALLOC_BITS()
+ if (noallocl2)
+ pte = hv_pte_set_no_alloc_l2(pte);
+
+ /* Simplify "no local and no l3" to "uncached" */
+ if (hv_pte_get_no_alloc_l2(pte) && hv_pte_get_no_alloc_l1(pte) &&
+ hv_pte_get_mode(pte) == HV_PTE_MODE_CACHE_NO_L3) {
+ pte = hv_pte_set_mode(pte, HV_PTE_MODE_UNCACHED);
+ }
+#endif
+
+ /* Checking this case here gives a better panic than from the hv. */
+ BUG_ON(hv_pte_get_mode(pte) == 0);
+
+ return pte;
+}
+EXPORT_SYMBOL(pte_set_home);
+
+/*
+ * The routines in this section are the "static" versions of the normal
+ * dynamic homecaching routines; they just set the home cache
+ * of a kernel page once, and require a full-chip cache/TLB flush,
+ * so they're not suitable for anything but infrequent use.
+ */
+
+#if CHIP_HAS_CBOX_HOME_MAP()
+static inline int initial_page_home(void) { return PAGE_HOME_HASH; }
+#else
+static inline int initial_page_home(void) { return 0; }
+#endif
+
+int page_home(struct page *page)
+{
+ if (PageHighMem(page)) {
+ return initial_page_home();
+ } else {
+ unsigned long kva = (unsigned long)page_address(page);
+ return pte_to_home(*virt_to_pte(NULL, kva));
+ }
+}
+EXPORT_SYMBOL(page_home);
+
+void homecache_change_page_home(struct page *page, int order, int home)
+{
+ int i, pages = (1 << order);
+ unsigned long kva;
+
+ BUG_ON(PageHighMem(page));
+ BUG_ON(page_count(page) > 1);
+ BUG_ON(page_mapcount(page) != 0);
+ kva = (unsigned long) page_address(page);
+ flush_remote(0, HV_FLUSH_EVICT_L2, &cpu_cacheable_map,
+ kva, pages * PAGE_SIZE, PAGE_SIZE, cpu_online_mask,
+ NULL, 0);
+
+ for (i = 0; i < pages; ++i, kva += PAGE_SIZE) {
+ pte_t *ptep = virt_to_pte(NULL, kva);
+ pte_t pteval = *ptep;
+ BUG_ON(!pte_present(pteval) || pte_huge(pteval));
+ __set_pte(ptep, pte_set_home(pteval, home));
+ }
+}
+
+struct page *homecache_alloc_pages(gfp_t gfp_mask,
+ unsigned int order, int home)
+{
+ struct page *page;
+ BUG_ON(gfp_mask & __GFP_HIGHMEM); /* must be lowmem */
+ page = alloc_pages(gfp_mask, order);
+ if (page)
+ homecache_change_page_home(page, order, home);
+ return page;
+}
+EXPORT_SYMBOL(homecache_alloc_pages);
+
+struct page *homecache_alloc_pages_node(int nid, gfp_t gfp_mask,
+ unsigned int order, int home)
+{
+ struct page *page;
+ BUG_ON(gfp_mask & __GFP_HIGHMEM); /* must be lowmem */
+ page = alloc_pages_node(nid, gfp_mask, order);
+ if (page)
+ homecache_change_page_home(page, order, home);
+ return page;
+}
+
+void homecache_free_pages(unsigned long addr, unsigned int order)
+{
+ struct page *page;
+
+ if (addr == 0)
+ return;
+
+ VM_BUG_ON(!virt_addr_valid((void *)addr));
+ page = virt_to_page((void *)addr);
+ if (put_page_testzero(page)) {
+ homecache_change_page_home(page, order, initial_page_home());
+ if (order == 0) {
+ free_hot_cold_page(page, 0);
+ } else {
+ init_page_count(page);
+ __free_pages(page, order);
+ }
+ }
+}
diff --git a/arch/tile/mm/hugetlbpage.c b/arch/tile/mm/hugetlbpage.c
new file mode 100644
index 00000000..42cfcba4
--- /dev/null
+++ b/arch/tile/mm/hugetlbpage.c
@@ -0,0 +1,342 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ *
+ * TILE Huge TLB Page Support for Kernel.
+ * Taken from i386 hugetlb implementation:
+ * Copyright (C) 2002, Rohit Seth <rohit.seth@intel.com>
+ */
+
+#include <linux/init.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/hugetlb.h>
+#include <linux/pagemap.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/sysctl.h>
+#include <linux/mman.h>
+#include <asm/tlb.h>
+#include <asm/tlbflush.h>
+
+pte_t *huge_pte_alloc(struct mm_struct *mm,
+ unsigned long addr, unsigned long sz)
+{
+ pgd_t *pgd;
+ pud_t *pud;
+ pte_t *pte = NULL;
+
+ /* We do not yet support multiple huge page sizes. */
+ BUG_ON(sz != PMD_SIZE);
+
+ pgd = pgd_offset(mm, addr);
+ pud = pud_alloc(mm, pgd, addr);
+ if (pud)
+ pte = (pte_t *) pmd_alloc(mm, pud, addr);
+ BUG_ON(pte && !pte_none(*pte) && !pte_huge(*pte));
+
+ return pte;
+}
+
+pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
+{
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd = NULL;
+
+ pgd = pgd_offset(mm, addr);
+ if (pgd_present(*pgd)) {
+ pud = pud_offset(pgd, addr);
+ if (pud_present(*pud))
+ pmd = pmd_offset(pud, addr);
+ }
+ return (pte_t *) pmd;
+}
+
+#ifdef HUGETLB_TEST
+struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address,
+ int write)
+{
+ unsigned long start = address;
+ int length = 1;
+ int nr;
+ struct page *page;
+ struct vm_area_struct *vma;
+
+ vma = find_vma(mm, addr);
+ if (!vma || !is_vm_hugetlb_page(vma))
+ return ERR_PTR(-EINVAL);
+
+ pte = huge_pte_offset(mm, address);
+
+ /* hugetlb should be locked, and hence, prefaulted */
+ WARN_ON(!pte || pte_none(*pte));
+
+ page = &pte_page(*pte)[vpfn % (HPAGE_SIZE/PAGE_SIZE)];
+
+ WARN_ON(!PageHead(page));
+
+ return page;
+}
+
+int pmd_huge(pmd_t pmd)
+{
+ return 0;
+}
+
+int pud_huge(pud_t pud)
+{
+ return 0;
+}
+
+struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
+ pmd_t *pmd, int write)
+{
+ return NULL;
+}
+
+#else
+
+struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address,
+ int write)
+{
+ return ERR_PTR(-EINVAL);
+}
+
+int pmd_huge(pmd_t pmd)
+{
+ return !!(pmd_val(pmd) & _PAGE_HUGE_PAGE);
+}
+
+int pud_huge(pud_t pud)
+{
+ return !!(pud_val(pud) & _PAGE_HUGE_PAGE);
+}
+
+struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
+ pmd_t *pmd, int write)
+{
+ struct page *page;
+
+ page = pte_page(*(pte_t *)pmd);
+ if (page)
+ page += ((address & ~PMD_MASK) >> PAGE_SHIFT);
+ return page;
+}
+
+struct page *follow_huge_pud(struct mm_struct *mm, unsigned long address,
+ pud_t *pud, int write)
+{
+ struct page *page;
+
+ page = pte_page(*(pte_t *)pud);
+ if (page)
+ page += ((address & ~PUD_MASK) >> PAGE_SHIFT);
+ return page;
+}
+
+int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
+{
+ return 0;
+}
+
+#endif
+
+#ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA
+static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file,
+ unsigned long addr, unsigned long len,
+ unsigned long pgoff, unsigned long flags)
+{
+ struct hstate *h = hstate_file(file);
+ struct mm_struct *mm = current->mm;
+ struct vm_area_struct *vma;
+ unsigned long start_addr;
+
+ if (len > mm->cached_hole_size) {
+ start_addr = mm->free_area_cache;
+ } else {
+ start_addr = TASK_UNMAPPED_BASE;
+ mm->cached_hole_size = 0;
+ }
+
+full_search:
+ addr = ALIGN(start_addr, huge_page_size(h));
+
+ for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
+ /* At this point: (!vma || addr < vma->vm_end). */
+ if (TASK_SIZE - len < addr) {
+ /*
+ * Start a new search - just in case we missed
+ * some holes.
+ */
+ if (start_addr != TASK_UNMAPPED_BASE) {
+ start_addr = TASK_UNMAPPED_BASE;
+ mm->cached_hole_size = 0;
+ goto full_search;
+ }
+ return -ENOMEM;
+ }
+ if (!vma || addr + len <= vma->vm_start) {
+ mm->free_area_cache = addr + len;
+ return addr;
+ }
+ if (addr + mm->cached_hole_size < vma->vm_start)
+ mm->cached_hole_size = vma->vm_start - addr;
+ addr = ALIGN(vma->vm_end, huge_page_size(h));
+ }
+}
+
+static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file,
+ unsigned long addr0, unsigned long len,
+ unsigned long pgoff, unsigned long flags)
+{
+ struct hstate *h = hstate_file(file);
+ struct mm_struct *mm = current->mm;
+ struct vm_area_struct *vma, *prev_vma;
+ unsigned long base = mm->mmap_base, addr = addr0;
+ unsigned long largest_hole = mm->cached_hole_size;
+ int first_time = 1;
+
+ /* don't allow allocations above current base */
+ if (mm->free_area_cache > base)
+ mm->free_area_cache = base;
+
+ if (len <= largest_hole) {
+ largest_hole = 0;
+ mm->free_area_cache = base;
+ }
+try_again:
+ /* make sure it can fit in the remaining address space */
+ if (mm->free_area_cache < len)
+ goto fail;
+
+ /* either no address requested or can't fit in requested address hole */
+ addr = (mm->free_area_cache - len) & huge_page_mask(h);
+ do {
+ /*
+ * Lookup failure means no vma is above this address,
+ * i.e. return with success:
+ */
+ vma = find_vma_prev(mm, addr, &prev_vma);
+ if (!vma) {
+ return addr;
+ break;
+ }
+
+ /*
+ * new region fits between prev_vma->vm_end and
+ * vma->vm_start, use it:
+ */
+ if (addr + len <= vma->vm_start &&
+ (!prev_vma || (addr >= prev_vma->vm_end))) {
+ /* remember the address as a hint for next time */
+ mm->cached_hole_size = largest_hole;
+ mm->free_area_cache = addr;
+ return addr;
+ } else {
+ /* pull free_area_cache down to the first hole */
+ if (mm->free_area_cache == vma->vm_end) {
+ mm->free_area_cache = vma->vm_start;
+ mm->cached_hole_size = largest_hole;
+ }
+ }
+
+ /* remember the largest hole we saw so far */
+ if (addr + largest_hole < vma->vm_start)
+ largest_hole = vma->vm_start - addr;
+
+ /* try just below the current vma->vm_start */
+ addr = (vma->vm_start - len) & huge_page_mask(h);
+
+ } while (len <= vma->vm_start);
+
+fail:
+ /*
+ * if hint left us with no space for the requested
+ * mapping then try again:
+ */
+ if (first_time) {
+ mm->free_area_cache = base;
+ largest_hole = 0;
+ first_time = 0;
+ goto try_again;
+ }
+ /*
+ * A failed mmap() very likely causes application failure,
+ * so fall back to the bottom-up function here. This scenario
+ * can happen with large stack limits and large mmap()
+ * allocations.
+ */
+ mm->free_area_cache = TASK_UNMAPPED_BASE;
+ mm->cached_hole_size = ~0UL;
+ addr = hugetlb_get_unmapped_area_bottomup(file, addr0,
+ len, pgoff, flags);
+
+ /*
+ * Restore the topdown base:
+ */
+ mm->free_area_cache = base;
+ mm->cached_hole_size = ~0UL;
+
+ return addr;
+}
+
+unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
+ unsigned long len, unsigned long pgoff, unsigned long flags)
+{
+ struct hstate *h = hstate_file(file);
+ struct mm_struct *mm = current->mm;
+ struct vm_area_struct *vma;
+
+ if (len & ~huge_page_mask(h))
+ return -EINVAL;
+ if (len > TASK_SIZE)
+ return -ENOMEM;
+
+ if (flags & MAP_FIXED) {
+ if (prepare_hugepage_range(file, addr, len))
+ return -EINVAL;
+ return addr;
+ }
+
+ if (addr) {
+ addr = ALIGN(addr, huge_page_size(h));
+ vma = find_vma(mm, addr);
+ if (TASK_SIZE - len >= addr &&
+ (!vma || addr + len <= vma->vm_start))
+ return addr;
+ }
+ if (current->mm->get_unmapped_area == arch_get_unmapped_area)
+ return hugetlb_get_unmapped_area_bottomup(file, addr, len,
+ pgoff, flags);
+ else
+ return hugetlb_get_unmapped_area_topdown(file, addr, len,
+ pgoff, flags);
+}
+
+static __init int setup_hugepagesz(char *opt)
+{
+ unsigned long ps = memparse(opt, &opt);
+ if (ps == PMD_SIZE) {
+ hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT);
+ } else if (ps == PUD_SIZE) {
+ hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
+ } else {
+ pr_err("hugepagesz: Unsupported page size %lu M\n",
+ ps >> 20);
+ return 0;
+ }
+ return 1;
+}
+__setup("hugepagesz=", setup_hugepagesz);
+
+#endif /*HAVE_ARCH_HUGETLB_UNMAPPED_AREA*/
diff --git a/arch/tile/mm/init.c b/arch/tile/mm/init.c
new file mode 100644
index 00000000..6a9d20dd
--- /dev/null
+++ b/arch/tile/mm/init.c
@@ -0,0 +1,1085 @@
+/*
+ * Copyright (C) 1995 Linus Torvalds
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/module.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/hugetlb.h>
+#include <linux/swap.h>
+#include <linux/smp.h>
+#include <linux/init.h>
+#include <linux/highmem.h>
+#include <linux/pagemap.h>
+#include <linux/poison.h>
+#include <linux/bootmem.h>
+#include <linux/slab.h>
+#include <linux/proc_fs.h>
+#include <linux/efi.h>
+#include <linux/memory_hotplug.h>
+#include <linux/uaccess.h>
+#include <asm/mmu_context.h>
+#include <asm/processor.h>
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/dma.h>
+#include <asm/fixmap.h>
+#include <asm/tlb.h>
+#include <asm/tlbflush.h>
+#include <asm/sections.h>
+#include <asm/setup.h>
+#include <asm/homecache.h>
+#include <hv/hypervisor.h>
+#include <arch/chip.h>
+
+#include "migrate.h"
+
+#define clear_pgd(pmdptr) (*(pmdptr) = hv_pte(0))
+
+#ifndef __tilegx__
+unsigned long VMALLOC_RESERVE = CONFIG_VMALLOC_RESERVE;
+EXPORT_SYMBOL(VMALLOC_RESERVE);
+#endif
+
+/* Create an L2 page table */
+static pte_t * __init alloc_pte(void)
+{
+ return __alloc_bootmem(L2_KERNEL_PGTABLE_SIZE, HV_PAGE_TABLE_ALIGN, 0);
+}
+
+/*
+ * L2 page tables per controller. We allocate these all at once from
+ * the bootmem allocator and store them here. This saves on kernel L2
+ * page table memory, compared to allocating a full 64K page per L2
+ * page table, and also means that in cases where we use huge pages,
+ * we are guaranteed to later be able to shatter those huge pages and
+ * switch to using these page tables instead, without requiring
+ * further allocation. Each l2_ptes[] entry points to the first page
+ * table for the first hugepage-size piece of memory on the
+ * controller; other page tables are just indexed directly, i.e. the
+ * L2 page tables are contiguous in memory for each controller.
+ */
+static pte_t *l2_ptes[MAX_NUMNODES];
+static int num_l2_ptes[MAX_NUMNODES];
+
+static void init_prealloc_ptes(int node, int pages)
+{
+ BUG_ON(pages & (HV_L2_ENTRIES-1));
+ if (pages) {
+ num_l2_ptes[node] = pages;
+ l2_ptes[node] = __alloc_bootmem(pages * sizeof(pte_t),
+ HV_PAGE_TABLE_ALIGN, 0);
+ }
+}
+
+pte_t *get_prealloc_pte(unsigned long pfn)
+{
+ int node = pfn_to_nid(pfn);
+ pfn &= ~(-1UL << (NR_PA_HIGHBIT_SHIFT - PAGE_SHIFT));
+ BUG_ON(node >= MAX_NUMNODES);
+ BUG_ON(pfn >= num_l2_ptes[node]);
+ return &l2_ptes[node][pfn];
+}
+
+/*
+ * What caching do we expect pages from the heap to have when
+ * they are allocated during bootup? (Once we've installed the
+ * "real" swapper_pg_dir.)
+ */
+static int initial_heap_home(void)
+{
+#if CHIP_HAS_CBOX_HOME_MAP()
+ if (hash_default)
+ return PAGE_HOME_HASH;
+#endif
+ return smp_processor_id();
+}
+
+/*
+ * Place a pointer to an L2 page table in a middle page
+ * directory entry.
+ */
+static void __init assign_pte(pmd_t *pmd, pte_t *page_table)
+{
+ phys_addr_t pa = __pa(page_table);
+ unsigned long l2_ptfn = pa >> HV_LOG2_PAGE_TABLE_ALIGN;
+ pte_t pteval = hv_pte_set_ptfn(__pgprot(_PAGE_TABLE), l2_ptfn);
+ BUG_ON((pa & (HV_PAGE_TABLE_ALIGN-1)) != 0);
+ pteval = pte_set_home(pteval, initial_heap_home());
+ *(pte_t *)pmd = pteval;
+ if (page_table != (pte_t *)pmd_page_vaddr(*pmd))
+ BUG();
+}
+
+#ifdef __tilegx__
+
+#if HV_L1_SIZE != HV_L2_SIZE
+# error Rework assumption that L1 and L2 page tables are same size.
+#endif
+
+/* Since pmd_t arrays and pte_t arrays are the same size, just use casts. */
+static inline pmd_t *alloc_pmd(void)
+{
+ return (pmd_t *)alloc_pte();
+}
+
+static inline void assign_pmd(pud_t *pud, pmd_t *pmd)
+{
+ assign_pte((pmd_t *)pud, (pte_t *)pmd);
+}
+
+#endif /* __tilegx__ */
+
+/* Replace the given pmd with a full PTE table. */
+void __init shatter_pmd(pmd_t *pmd)
+{
+ pte_t *pte = get_prealloc_pte(pte_pfn(*(pte_t *)pmd));
+ assign_pte(pmd, pte);
+}
+
+#ifdef CONFIG_HIGHMEM
+/*
+ * This function initializes a certain range of kernel virtual memory
+ * with new bootmem page tables, everywhere page tables are missing in
+ * the given range.
+ */
+
+/*
+ * NOTE: The pagetables are allocated contiguous on the physical space
+ * so we can cache the place of the first one and move around without
+ * checking the pgd every time.
+ */
+static void __init page_table_range_init(unsigned long start,
+ unsigned long end, pgd_t *pgd_base)
+{
+ pgd_t *pgd;
+ int pgd_idx;
+ unsigned long vaddr;
+
+ vaddr = start;
+ pgd_idx = pgd_index(vaddr);
+ pgd = pgd_base + pgd_idx;
+
+ for ( ; (pgd_idx < PTRS_PER_PGD) && (vaddr != end); pgd++, pgd_idx++) {
+ pmd_t *pmd = pmd_offset(pud_offset(pgd, vaddr), vaddr);
+ if (pmd_none(*pmd))
+ assign_pte(pmd, alloc_pte());
+ vaddr += PMD_SIZE;
+ }
+}
+#endif /* CONFIG_HIGHMEM */
+
+
+#if CHIP_HAS_CBOX_HOME_MAP()
+
+static int __initdata ktext_hash = 1; /* .text pages */
+static int __initdata kdata_hash = 1; /* .data and .bss pages */
+int __write_once hash_default = 1; /* kernel allocator pages */
+EXPORT_SYMBOL(hash_default);
+int __write_once kstack_hash = 1; /* if no homecaching, use h4h */
+#endif /* CHIP_HAS_CBOX_HOME_MAP */
+
+/*
+ * CPUs to use to for striping the pages of kernel data. If hash-for-home
+ * is available, this is only relevant if kcache_hash sets up the
+ * .data and .bss to be page-homed, and we don't want the default mode
+ * of using the full set of kernel cpus for the striping.
+ */
+static __initdata struct cpumask kdata_mask;
+static __initdata int kdata_arg_seen;
+
+int __write_once kdata_huge; /* if no homecaching, small pages */
+
+
+/* Combine a generic pgprot_t with cache home to get a cache-aware pgprot. */
+static pgprot_t __init construct_pgprot(pgprot_t prot, int home)
+{
+ prot = pte_set_home(prot, home);
+#if CHIP_HAS_CBOX_HOME_MAP()
+ if (home == PAGE_HOME_IMMUTABLE) {
+ if (ktext_hash)
+ prot = hv_pte_set_mode(prot, HV_PTE_MODE_CACHE_HASH_L3);
+ else
+ prot = hv_pte_set_mode(prot, HV_PTE_MODE_CACHE_NO_L3);
+ }
+#endif
+ return prot;
+}
+
+/*
+ * For a given kernel data VA, how should it be cached?
+ * We return the complete pgprot_t with caching bits set.
+ */
+static pgprot_t __init init_pgprot(ulong address)
+{
+ int cpu;
+ unsigned long page;
+ enum { CODE_DELTA = MEM_SV_INTRPT - PAGE_OFFSET };
+
+#if CHIP_HAS_CBOX_HOME_MAP()
+ /* For kdata=huge, everything is just hash-for-home. */
+ if (kdata_huge)
+ return construct_pgprot(PAGE_KERNEL, PAGE_HOME_HASH);
+#endif
+
+ /* We map the aliased pages of permanent text inaccessible. */
+ if (address < (ulong) _sinittext - CODE_DELTA)
+ return PAGE_NONE;
+
+ /*
+ * We map read-only data non-coherent for performance. We could
+ * use neighborhood caching on TILE64, but it's not clear it's a win.
+ */
+ if ((address >= (ulong) __start_rodata &&
+ address < (ulong) __end_rodata) ||
+ address == (ulong) empty_zero_page) {
+ return construct_pgprot(PAGE_KERNEL_RO, PAGE_HOME_IMMUTABLE);
+ }
+
+#ifndef __tilegx__
+#if !ATOMIC_LOCKS_FOUND_VIA_TABLE()
+ /* Force the atomic_locks[] array page to be hash-for-home. */
+ if (address == (ulong) atomic_locks)
+ return construct_pgprot(PAGE_KERNEL, PAGE_HOME_HASH);
+#endif
+#endif
+
+ /*
+ * Everything else that isn't data or bss is heap, so mark it
+ * with the initial heap home (hash-for-home, or this cpu). This
+ * includes any addresses after the loaded image and any address before
+ * _einitdata, since we already captured the case of text before
+ * _sinittext, and __pa(einittext) is approximately __pa(sinitdata).
+ *
+ * All the LOWMEM pages that we mark this way will get their
+ * struct page homecache properly marked later, in set_page_homes().
+ * The HIGHMEM pages we leave with a default zero for their
+ * homes, but with a zero free_time we don't have to actually
+ * do a flush action the first time we use them, either.
+ */
+ if (address >= (ulong) _end || address < (ulong) _einitdata)
+ return construct_pgprot(PAGE_KERNEL, initial_heap_home());
+
+#if CHIP_HAS_CBOX_HOME_MAP()
+ /* Use hash-for-home if requested for data/bss. */
+ if (kdata_hash)
+ return construct_pgprot(PAGE_KERNEL, PAGE_HOME_HASH);
+#endif
+
+ /*
+ * Make the w1data homed like heap to start with, to avoid
+ * making it part of the page-striped data area when we're just
+ * going to convert it to read-only soon anyway.
+ */
+ if (address >= (ulong)__w1data_begin && address < (ulong)__w1data_end)
+ return construct_pgprot(PAGE_KERNEL, initial_heap_home());
+
+ /*
+ * Otherwise we just hand out consecutive cpus. To avoid
+ * requiring this function to hold state, we just walk forward from
+ * _sdata by PAGE_SIZE, skipping the readonly and init data, to reach
+ * the requested address, while walking cpu home around kdata_mask.
+ * This is typically no more than a dozen or so iterations.
+ */
+ page = (((ulong)__w1data_end) + PAGE_SIZE - 1) & PAGE_MASK;
+ BUG_ON(address < page || address >= (ulong)_end);
+ cpu = cpumask_first(&kdata_mask);
+ for (; page < address; page += PAGE_SIZE) {
+ if (page >= (ulong)&init_thread_union &&
+ page < (ulong)&init_thread_union + THREAD_SIZE)
+ continue;
+ if (page == (ulong)empty_zero_page)
+ continue;
+#ifndef __tilegx__
+#if !ATOMIC_LOCKS_FOUND_VIA_TABLE()
+ if (page == (ulong)atomic_locks)
+ continue;
+#endif
+#endif
+ cpu = cpumask_next(cpu, &kdata_mask);
+ if (cpu == NR_CPUS)
+ cpu = cpumask_first(&kdata_mask);
+ }
+ return construct_pgprot(PAGE_KERNEL, cpu);
+}
+
+/*
+ * This function sets up how we cache the kernel text. If we have
+ * hash-for-home support, normally that is used instead (see the
+ * kcache_hash boot flag for more information). But if we end up
+ * using a page-based caching technique, this option sets up the
+ * details of that. In addition, the "ktext=nocache" option may
+ * always be used to disable local caching of text pages, if desired.
+ */
+
+static int __initdata ktext_arg_seen;
+static int __initdata ktext_small;
+static int __initdata ktext_local;
+static int __initdata ktext_all;
+static int __initdata ktext_nondataplane;
+static int __initdata ktext_nocache;
+static struct cpumask __initdata ktext_mask;
+
+static int __init setup_ktext(char *str)
+{
+ if (str == NULL)
+ return -EINVAL;
+
+ /* If you have a leading "nocache", turn off ktext caching */
+ if (strncmp(str, "nocache", 7) == 0) {
+ ktext_nocache = 1;
+ pr_info("ktext: disabling local caching of kernel text\n");
+ str += 7;
+ if (*str == ',')
+ ++str;
+ if (*str == '\0')
+ return 0;
+ }
+
+ ktext_arg_seen = 1;
+
+ /* Default setting on Tile64: use a huge page */
+ if (strcmp(str, "huge") == 0)
+ pr_info("ktext: using one huge locally cached page\n");
+
+ /* Pay TLB cost but get no cache benefit: cache small pages locally */
+ else if (strcmp(str, "local") == 0) {
+ ktext_small = 1;
+ ktext_local = 1;
+ pr_info("ktext: using small pages with local caching\n");
+ }
+
+ /* Neighborhood cache ktext pages on all cpus. */
+ else if (strcmp(str, "all") == 0) {
+ ktext_small = 1;
+ ktext_all = 1;
+ pr_info("ktext: using maximal caching neighborhood\n");
+ }
+
+
+ /* Neighborhood ktext pages on specified mask */
+ else if (cpulist_parse(str, &ktext_mask) == 0) {
+ char buf[NR_CPUS * 5];
+ cpulist_scnprintf(buf, sizeof(buf), &ktext_mask);
+ if (cpumask_weight(&ktext_mask) > 1) {
+ ktext_small = 1;
+ pr_info("ktext: using caching neighborhood %s "
+ "with small pages\n", buf);
+ } else {
+ pr_info("ktext: caching on cpu %s with one huge page\n",
+ buf);
+ }
+ }
+
+ else if (*str)
+ return -EINVAL;
+
+ return 0;
+}
+
+early_param("ktext", setup_ktext);
+
+
+static inline pgprot_t ktext_set_nocache(pgprot_t prot)
+{
+ if (!ktext_nocache)
+ prot = hv_pte_set_nc(prot);
+#if CHIP_HAS_NC_AND_NOALLOC_BITS()
+ else
+ prot = hv_pte_set_no_alloc_l2(prot);
+#endif
+ return prot;
+}
+
+#ifndef __tilegx__
+static pmd_t *__init get_pmd(pgd_t pgtables[], unsigned long va)
+{
+ return pmd_offset(pud_offset(&pgtables[pgd_index(va)], va), va);
+}
+#else
+static pmd_t *__init get_pmd(pgd_t pgtables[], unsigned long va)
+{
+ pud_t *pud = pud_offset(&pgtables[pgd_index(va)], va);
+ if (pud_none(*pud))
+ assign_pmd(pud, alloc_pmd());
+ return pmd_offset(pud, va);
+}
+#endif
+
+/* Temporary page table we use for staging. */
+static pgd_t pgtables[PTRS_PER_PGD]
+ __attribute__((aligned(HV_PAGE_TABLE_ALIGN)));
+
+/*
+ * This maps the physical memory to kernel virtual address space, a total
+ * of max_low_pfn pages, by creating page tables starting from address
+ * PAGE_OFFSET.
+ *
+ * This routine transitions us from using a set of compiled-in large
+ * pages to using some more precise caching, including removing access
+ * to code pages mapped at PAGE_OFFSET (executed only at MEM_SV_START)
+ * marking read-only data as locally cacheable, striping the remaining
+ * .data and .bss across all the available tiles, and removing access
+ * to pages above the top of RAM (thus ensuring a page fault from a bad
+ * virtual address rather than a hypervisor shoot down for accessing
+ * memory outside the assigned limits).
+ */
+static void __init kernel_physical_mapping_init(pgd_t *pgd_base)
+{
+ unsigned long address, pfn;
+ pmd_t *pmd;
+ pte_t *pte;
+ int pte_ofs;
+ const struct cpumask *my_cpu_mask = cpumask_of(smp_processor_id());
+ struct cpumask kstripe_mask;
+ int rc, i;
+
+#if CHIP_HAS_CBOX_HOME_MAP()
+ if (ktext_arg_seen && ktext_hash) {
+ pr_warning("warning: \"ktext\" boot argument ignored"
+ " if \"kcache_hash\" sets up text hash-for-home\n");
+ ktext_small = 0;
+ }
+
+ if (kdata_arg_seen && kdata_hash) {
+ pr_warning("warning: \"kdata\" boot argument ignored"
+ " if \"kcache_hash\" sets up data hash-for-home\n");
+ }
+
+ if (kdata_huge && !hash_default) {
+ pr_warning("warning: disabling \"kdata=huge\"; requires"
+ " kcache_hash=all or =allbutstack\n");
+ kdata_huge = 0;
+ }
+#endif
+
+ /*
+ * Set up a mask for cpus to use for kernel striping.
+ * This is normally all cpus, but minus dataplane cpus if any.
+ * If the dataplane covers the whole chip, we stripe over
+ * the whole chip too.
+ */
+ cpumask_copy(&kstripe_mask, cpu_possible_mask);
+ if (!kdata_arg_seen)
+ kdata_mask = kstripe_mask;
+
+ /* Allocate and fill in L2 page tables */
+ for (i = 0; i < MAX_NUMNODES; ++i) {
+#ifdef CONFIG_HIGHMEM
+ unsigned long end_pfn = node_lowmem_end_pfn[i];
+#else
+ unsigned long end_pfn = node_end_pfn[i];
+#endif
+ unsigned long end_huge_pfn = 0;
+
+ /* Pre-shatter the last huge page to allow per-cpu pages. */
+ if (kdata_huge)
+ end_huge_pfn = end_pfn - (HPAGE_SIZE >> PAGE_SHIFT);
+
+ pfn = node_start_pfn[i];
+
+ /* Allocate enough memory to hold L2 page tables for node. */
+ init_prealloc_ptes(i, end_pfn - pfn);
+
+ address = (unsigned long) pfn_to_kaddr(pfn);
+ while (pfn < end_pfn) {
+ BUG_ON(address & (HPAGE_SIZE-1));
+ pmd = get_pmd(pgtables, address);
+ pte = get_prealloc_pte(pfn);
+ if (pfn < end_huge_pfn) {
+ pgprot_t prot = init_pgprot(address);
+ *(pte_t *)pmd = pte_mkhuge(pfn_pte(pfn, prot));
+ for (pte_ofs = 0; pte_ofs < PTRS_PER_PTE;
+ pfn++, pte_ofs++, address += PAGE_SIZE)
+ pte[pte_ofs] = pfn_pte(pfn, prot);
+ } else {
+ if (kdata_huge)
+ printk(KERN_DEBUG "pre-shattered huge"
+ " page at %#lx\n", address);
+ for (pte_ofs = 0; pte_ofs < PTRS_PER_PTE;
+ pfn++, pte_ofs++, address += PAGE_SIZE) {
+ pgprot_t prot = init_pgprot(address);
+ pte[pte_ofs] = pfn_pte(pfn, prot);
+ }
+ assign_pte(pmd, pte);
+ }
+ }
+ }
+
+ /*
+ * Set or check ktext_map now that we have cpu_possible_mask
+ * and kstripe_mask to work with.
+ */
+ if (ktext_all)
+ cpumask_copy(&ktext_mask, cpu_possible_mask);
+ else if (ktext_nondataplane)
+ ktext_mask = kstripe_mask;
+ else if (!cpumask_empty(&ktext_mask)) {
+ /* Sanity-check any mask that was requested */
+ struct cpumask bad;
+ cpumask_andnot(&bad, &ktext_mask, cpu_possible_mask);
+ cpumask_and(&ktext_mask, &ktext_mask, cpu_possible_mask);
+ if (!cpumask_empty(&bad)) {
+ char buf[NR_CPUS * 5];
+ cpulist_scnprintf(buf, sizeof(buf), &bad);
+ pr_info("ktext: not using unavailable cpus %s\n", buf);
+ }
+ if (cpumask_empty(&ktext_mask)) {
+ pr_warning("ktext: no valid cpus; caching on %d.\n",
+ smp_processor_id());
+ cpumask_copy(&ktext_mask,
+ cpumask_of(smp_processor_id()));
+ }
+ }
+
+ address = MEM_SV_INTRPT;
+ pmd = get_pmd(pgtables, address);
+ pfn = 0; /* code starts at PA 0 */
+ if (ktext_small) {
+ /* Allocate an L2 PTE for the kernel text */
+ int cpu = 0;
+ pgprot_t prot = construct_pgprot(PAGE_KERNEL_EXEC,
+ PAGE_HOME_IMMUTABLE);
+
+ if (ktext_local) {
+ if (ktext_nocache)
+ prot = hv_pte_set_mode(prot,
+ HV_PTE_MODE_UNCACHED);
+ else
+ prot = hv_pte_set_mode(prot,
+ HV_PTE_MODE_CACHE_NO_L3);
+ } else {
+ prot = hv_pte_set_mode(prot,
+ HV_PTE_MODE_CACHE_TILE_L3);
+ cpu = cpumask_first(&ktext_mask);
+
+ prot = ktext_set_nocache(prot);
+ }
+
+ BUG_ON(address != (unsigned long)_stext);
+ pte = NULL;
+ for (; address < (unsigned long)_einittext;
+ pfn++, address += PAGE_SIZE) {
+ pte_ofs = pte_index(address);
+ if (pte_ofs == 0) {
+ if (pte)
+ assign_pte(pmd++, pte);
+ pte = alloc_pte();
+ }
+ if (!ktext_local) {
+ prot = set_remote_cache_cpu(prot, cpu);
+ cpu = cpumask_next(cpu, &ktext_mask);
+ if (cpu == NR_CPUS)
+ cpu = cpumask_first(&ktext_mask);
+ }
+ pte[pte_ofs] = pfn_pte(pfn, prot);
+ }
+ if (pte)
+ assign_pte(pmd, pte);
+ } else {
+ pte_t pteval = pfn_pte(0, PAGE_KERNEL_EXEC);
+ pteval = pte_mkhuge(pteval);
+#if CHIP_HAS_CBOX_HOME_MAP()
+ if (ktext_hash) {
+ pteval = hv_pte_set_mode(pteval,
+ HV_PTE_MODE_CACHE_HASH_L3);
+ pteval = ktext_set_nocache(pteval);
+ } else
+#endif /* CHIP_HAS_CBOX_HOME_MAP() */
+ if (cpumask_weight(&ktext_mask) == 1) {
+ pteval = set_remote_cache_cpu(pteval,
+ cpumask_first(&ktext_mask));
+ pteval = hv_pte_set_mode(pteval,
+ HV_PTE_MODE_CACHE_TILE_L3);
+ pteval = ktext_set_nocache(pteval);
+ } else if (ktext_nocache)
+ pteval = hv_pte_set_mode(pteval,
+ HV_PTE_MODE_UNCACHED);
+ else
+ pteval = hv_pte_set_mode(pteval,
+ HV_PTE_MODE_CACHE_NO_L3);
+ for (; address < (unsigned long)_einittext;
+ pfn += PFN_DOWN(HPAGE_SIZE), address += HPAGE_SIZE)
+ *(pte_t *)(pmd++) = pfn_pte(pfn, pteval);
+ }
+
+ /* Set swapper_pgprot here so it is flushed to memory right away. */
+ swapper_pgprot = init_pgprot((unsigned long)swapper_pg_dir);
+
+ /*
+ * Since we may be changing the caching of the stack and page
+ * table itself, we invoke an assembly helper to do the
+ * following steps:
+ *
+ * - flush the cache so we start with an empty slate
+ * - install pgtables[] as the real page table
+ * - flush the TLB so the new page table takes effect
+ */
+ rc = flush_and_install_context(__pa(pgtables),
+ init_pgprot((unsigned long)pgtables),
+ __get_cpu_var(current_asid),
+ cpumask_bits(my_cpu_mask));
+ BUG_ON(rc != 0);
+
+ /* Copy the page table back to the normal swapper_pg_dir. */
+ memcpy(pgd_base, pgtables, sizeof(pgtables));
+ __install_page_table(pgd_base, __get_cpu_var(current_asid),
+ swapper_pgprot);
+
+ /*
+ * We just read swapper_pgprot and thus brought it into the cache,
+ * with its new home & caching mode. When we start the other CPUs,
+ * they're going to reference swapper_pgprot via their initial fake
+ * VA-is-PA mappings, which cache everything locally. At that
+ * time, if it's in our cache with a conflicting home, the
+ * simulator's coherence checker will complain. So, flush it out
+ * of our cache; we're not going to ever use it again anyway.
+ */
+ __insn_finv(&swapper_pgprot);
+}
+
+/*
+ * devmem_is_allowed() checks to see if /dev/mem access to a certain address
+ * is valid. The argument is a physical page number.
+ *
+ * On Tile, the only valid things for which we can just hand out unchecked
+ * PTEs are the kernel code and data. Anything else might change its
+ * homing with time, and we wouldn't know to adjust the /dev/mem PTEs.
+ * Note that init_thread_union is released to heap soon after boot,
+ * so we include it in the init data.
+ *
+ * For TILE-Gx, we might want to consider allowing access to PA
+ * regions corresponding to PCI space, etc.
+ */
+int devmem_is_allowed(unsigned long pagenr)
+{
+ return pagenr < kaddr_to_pfn(_end) &&
+ !(pagenr >= kaddr_to_pfn(&init_thread_union) ||
+ pagenr < kaddr_to_pfn(_einitdata)) &&
+ !(pagenr >= kaddr_to_pfn(_sinittext) ||
+ pagenr <= kaddr_to_pfn(_einittext-1));
+}
+
+#ifdef CONFIG_HIGHMEM
+static void __init permanent_kmaps_init(pgd_t *pgd_base)
+{
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+ unsigned long vaddr;
+
+ vaddr = PKMAP_BASE;
+ page_table_range_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base);
+
+ pgd = swapper_pg_dir + pgd_index(vaddr);
+ pud = pud_offset(pgd, vaddr);
+ pmd = pmd_offset(pud, vaddr);
+ pte = pte_offset_kernel(pmd, vaddr);
+ pkmap_page_table = pte;
+}
+#endif /* CONFIG_HIGHMEM */
+
+
+static void __init init_free_pfn_range(unsigned long start, unsigned long end)
+{
+ unsigned long pfn;
+ struct page *page = pfn_to_page(start);
+
+ for (pfn = start; pfn < end; ) {
+ /* Optimize by freeing pages in large batches */
+ int order = __ffs(pfn);
+ int count, i;
+ struct page *p;
+
+ if (order >= MAX_ORDER)
+ order = MAX_ORDER-1;
+ count = 1 << order;
+ while (pfn + count > end) {
+ count >>= 1;
+ --order;
+ }
+ for (p = page, i = 0; i < count; ++i, ++p) {
+ __ClearPageReserved(p);
+ /*
+ * Hacky direct set to avoid unnecessary
+ * lock take/release for EVERY page here.
+ */
+ p->_count.counter = 0;
+ p->_mapcount.counter = -1;
+ }
+ init_page_count(page);
+ __free_pages(page, order);
+ totalram_pages += count;
+
+ page += count;
+ pfn += count;
+ }
+}
+
+static void __init set_non_bootmem_pages_init(void)
+{
+ struct zone *z;
+ for_each_zone(z) {
+ unsigned long start, end;
+ int nid = z->zone_pgdat->node_id;
+ int idx = zone_idx(z);
+
+ start = z->zone_start_pfn;
+ if (start == 0)
+ continue; /* bootmem */
+ end = start + z->spanned_pages;
+ if (idx == ZONE_NORMAL) {
+ BUG_ON(start != node_start_pfn[nid]);
+ start = node_free_pfn[nid];
+ }
+#ifdef CONFIG_HIGHMEM
+ if (idx == ZONE_HIGHMEM)
+ totalhigh_pages += z->spanned_pages;
+#endif
+ if (kdata_huge) {
+ unsigned long percpu_pfn = node_percpu_pfn[nid];
+ if (start < percpu_pfn && end > percpu_pfn)
+ end = percpu_pfn;
+ }
+#ifdef CONFIG_PCI
+ if (start <= pci_reserve_start_pfn &&
+ end > pci_reserve_start_pfn) {
+ if (end > pci_reserve_end_pfn)
+ init_free_pfn_range(pci_reserve_end_pfn, end);
+ end = pci_reserve_start_pfn;
+ }
+#endif
+ init_free_pfn_range(start, end);
+ }
+}
+
+/*
+ * paging_init() sets up the page tables - note that all of lowmem is
+ * already mapped by head.S.
+ */
+void __init paging_init(void)
+{
+#ifdef CONFIG_HIGHMEM
+ unsigned long vaddr, end;
+#endif
+#ifdef __tilegx__
+ pud_t *pud;
+#endif
+ pgd_t *pgd_base = swapper_pg_dir;
+
+ kernel_physical_mapping_init(pgd_base);
+
+#ifdef CONFIG_HIGHMEM
+ /*
+ * Fixed mappings, only the page table structure has to be
+ * created - mappings will be set by set_fixmap():
+ */
+ vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK;
+ end = (FIXADDR_TOP + PMD_SIZE - 1) & PMD_MASK;
+ page_table_range_init(vaddr, end, pgd_base);
+ permanent_kmaps_init(pgd_base);
+#endif
+
+#ifdef __tilegx__
+ /*
+ * Since GX allocates just one pmd_t array worth of vmalloc space,
+ * we go ahead and allocate it statically here, then share it
+ * globally. As a result we don't have to worry about any task
+ * changing init_mm once we get up and running, and there's no
+ * need for e.g. vmalloc_sync_all().
+ */
+ BUILD_BUG_ON(pgd_index(VMALLOC_START) != pgd_index(VMALLOC_END));
+ pud = pud_offset(pgd_base + pgd_index(VMALLOC_START), VMALLOC_START);
+ assign_pmd(pud, alloc_pmd());
+#endif
+}
+
+
+/*
+ * Walk the kernel page tables and derive the page_home() from
+ * the PTEs, so that set_pte() can properly validate the caching
+ * of all PTEs it sees.
+ */
+void __init set_page_homes(void)
+{
+}
+
+static void __init set_max_mapnr_init(void)
+{
+#ifdef CONFIG_FLATMEM
+ max_mapnr = max_low_pfn;
+#endif
+}
+
+void __init mem_init(void)
+{
+ int codesize, datasize, initsize;
+ int i;
+#ifndef __tilegx__
+ void *last;
+#endif
+
+#ifdef CONFIG_FLATMEM
+ BUG_ON(!mem_map);
+#endif
+
+#ifdef CONFIG_HIGHMEM
+ /* check that fixmap and pkmap do not overlap */
+ if (PKMAP_ADDR(LAST_PKMAP-1) >= FIXADDR_START) {
+ pr_err("fixmap and kmap areas overlap"
+ " - this will crash\n");
+ pr_err("pkstart: %lxh pkend: %lxh fixstart %lxh\n",
+ PKMAP_BASE, PKMAP_ADDR(LAST_PKMAP-1),
+ FIXADDR_START);
+ BUG();
+ }
+#endif
+
+ set_max_mapnr_init();
+
+ /* this will put all bootmem onto the freelists */
+ totalram_pages += free_all_bootmem();
+
+ /* count all remaining LOWMEM and give all HIGHMEM to page allocator */
+ set_non_bootmem_pages_init();
+
+ codesize = (unsigned long)&_etext - (unsigned long)&_text;
+ datasize = (unsigned long)&_end - (unsigned long)&_sdata;
+ initsize = (unsigned long)&_einittext - (unsigned long)&_sinittext;
+ initsize += (unsigned long)&_einitdata - (unsigned long)&_sinitdata;
+
+ pr_info("Memory: %luk/%luk available (%dk kernel code, %dk data, %dk init, %ldk highmem)\n",
+ (unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
+ num_physpages << (PAGE_SHIFT-10),
+ codesize >> 10,
+ datasize >> 10,
+ initsize >> 10,
+ (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10))
+ );
+
+ /*
+ * In debug mode, dump some interesting memory mappings.
+ */
+#ifdef CONFIG_HIGHMEM
+ printk(KERN_DEBUG " KMAP %#lx - %#lx\n",
+ FIXADDR_START, FIXADDR_TOP + PAGE_SIZE - 1);
+ printk(KERN_DEBUG " PKMAP %#lx - %#lx\n",
+ PKMAP_BASE, PKMAP_ADDR(LAST_PKMAP) - 1);
+#endif
+#ifdef CONFIG_HUGEVMAP
+ printk(KERN_DEBUG " HUGEMAP %#lx - %#lx\n",
+ HUGE_VMAP_BASE, HUGE_VMAP_END - 1);
+#endif
+ printk(KERN_DEBUG " VMALLOC %#lx - %#lx\n",
+ _VMALLOC_START, _VMALLOC_END - 1);
+#ifdef __tilegx__
+ for (i = MAX_NUMNODES-1; i >= 0; --i) {
+ struct pglist_data *node = &node_data[i];
+ if (node->node_present_pages) {
+ unsigned long start = (unsigned long)
+ pfn_to_kaddr(node->node_start_pfn);
+ unsigned long end = start +
+ (node->node_present_pages << PAGE_SHIFT);
+ printk(KERN_DEBUG " MEM%d %#lx - %#lx\n",
+ i, start, end - 1);
+ }
+ }
+#else
+ last = high_memory;
+ for (i = MAX_NUMNODES-1; i >= 0; --i) {
+ if ((unsigned long)vbase_map[i] != -1UL) {
+ printk(KERN_DEBUG " LOWMEM%d %#lx - %#lx\n",
+ i, (unsigned long) (vbase_map[i]),
+ (unsigned long) (last-1));
+ last = vbase_map[i];
+ }
+ }
+#endif
+
+#ifndef __tilegx__
+ /*
+ * Convert from using one lock for all atomic operations to
+ * one per cpu.
+ */
+ __init_atomic_per_cpu();
+#endif
+}
+
+/*
+ * this is for the non-NUMA, single node SMP system case.
+ * Specifically, in the case of x86, we will always add
+ * memory to the highmem for now.
+ */
+#ifndef CONFIG_NEED_MULTIPLE_NODES
+int arch_add_memory(u64 start, u64 size)
+{
+ struct pglist_data *pgdata = &contig_page_data;
+ struct zone *zone = pgdata->node_zones + MAX_NR_ZONES-1;
+ unsigned long start_pfn = start >> PAGE_SHIFT;
+ unsigned long nr_pages = size >> PAGE_SHIFT;
+
+ return __add_pages(zone, start_pfn, nr_pages);
+}
+
+int remove_memory(u64 start, u64 size)
+{
+ return -EINVAL;
+}
+#endif
+
+struct kmem_cache *pgd_cache;
+
+void __init pgtable_cache_init(void)
+{
+ pgd_cache = kmem_cache_create("pgd", SIZEOF_PGD, SIZEOF_PGD, 0, NULL);
+ if (!pgd_cache)
+ panic("pgtable_cache_init(): Cannot create pgd cache");
+}
+
+#if !CHIP_HAS_COHERENT_LOCAL_CACHE()
+/*
+ * The __w1data area holds data that is only written during initialization,
+ * and is read-only and thus freely cacheable thereafter. Fix the page
+ * table entries that cover that region accordingly.
+ */
+static void mark_w1data_ro(void)
+{
+ /* Loop over page table entries */
+ unsigned long addr = (unsigned long)__w1data_begin;
+ BUG_ON((addr & (PAGE_SIZE-1)) != 0);
+ for (; addr <= (unsigned long)__w1data_end - 1; addr += PAGE_SIZE) {
+ unsigned long pfn = kaddr_to_pfn((void *)addr);
+ pte_t *ptep = virt_to_pte(NULL, addr);
+ BUG_ON(pte_huge(*ptep)); /* not relevant for kdata_huge */
+ set_pte_at(&init_mm, addr, ptep, pfn_pte(pfn, PAGE_KERNEL_RO));
+ }
+}
+#endif
+
+#ifdef CONFIG_DEBUG_PAGEALLOC
+static long __write_once initfree;
+#else
+static long __write_once initfree = 1;
+#endif
+
+/* Select whether to free (1) or mark unusable (0) the __init pages. */
+static int __init set_initfree(char *str)
+{
+ long val;
+ if (strict_strtol(str, 0, &val) == 0) {
+ initfree = val;
+ pr_info("initfree: %s free init pages\n",
+ initfree ? "will" : "won't");
+ }
+ return 1;
+}
+__setup("initfree=", set_initfree);
+
+static void free_init_pages(char *what, unsigned long begin, unsigned long end)
+{
+ unsigned long addr = (unsigned long) begin;
+
+ if (kdata_huge && !initfree) {
+ pr_warning("Warning: ignoring initfree=0:"
+ " incompatible with kdata=huge\n");
+ initfree = 1;
+ }
+ end = (end + PAGE_SIZE - 1) & PAGE_MASK;
+ local_flush_tlb_pages(NULL, begin, PAGE_SIZE, end - begin);
+ for (addr = begin; addr < end; addr += PAGE_SIZE) {
+ /*
+ * Note we just reset the home here directly in the
+ * page table. We know this is safe because our caller
+ * just flushed the caches on all the other cpus,
+ * and they won't be touching any of these pages.
+ */
+ int pfn = kaddr_to_pfn((void *)addr);
+ struct page *page = pfn_to_page(pfn);
+ pte_t *ptep = virt_to_pte(NULL, addr);
+ if (!initfree) {
+ /*
+ * If debugging page accesses then do not free
+ * this memory but mark them not present - any
+ * buggy init-section access will create a
+ * kernel page fault:
+ */
+ pte_clear(&init_mm, addr, ptep);
+ continue;
+ }
+ __ClearPageReserved(page);
+ init_page_count(page);
+ if (pte_huge(*ptep))
+ BUG_ON(!kdata_huge);
+ else
+ set_pte_at(&init_mm, addr, ptep,
+ pfn_pte(pfn, PAGE_KERNEL));
+ memset((void *)addr, POISON_FREE_INITMEM, PAGE_SIZE);
+ free_page(addr);
+ totalram_pages++;
+ }
+ pr_info("Freeing %s: %ldk freed\n", what, (end - begin) >> 10);
+}
+
+void free_initmem(void)
+{
+ const unsigned long text_delta = MEM_SV_INTRPT - PAGE_OFFSET;
+
+ /*
+ * Evict the dirty initdata on the boot cpu, evict the w1data
+ * wherever it's homed, and evict all the init code everywhere.
+ * We are guaranteed that no one will touch the init pages any
+ * more, and although other cpus may be touching the w1data,
+ * we only actually change the caching on tile64, which won't
+ * be keeping local copies in the other tiles' caches anyway.
+ */
+ homecache_evict(&cpu_cacheable_map);
+
+ /* Free the data pages that we won't use again after init. */
+ free_init_pages("unused kernel data",
+ (unsigned long)_sinitdata,
+ (unsigned long)_einitdata);
+
+ /*
+ * Free the pages mapped from 0xc0000000 that correspond to code
+ * pages from MEM_SV_INTRPT that we won't use again after init.
+ */
+ free_init_pages("unused kernel text",
+ (unsigned long)_sinittext - text_delta,
+ (unsigned long)_einittext - text_delta);
+
+#if !CHIP_HAS_COHERENT_LOCAL_CACHE()
+ /*
+ * Upgrade the .w1data section to globally cached.
+ * We don't do this on tilepro, since the cache architecture
+ * pretty much makes it irrelevant, and in any case we end
+ * up having racing issues with other tiles that may touch
+ * the data after we flush the cache but before we update
+ * the PTEs and flush the TLBs, causing sharer shootdowns
+ * later. Even though this is to clean data, it seems like
+ * an unnecessary complication.
+ */
+ mark_w1data_ro();
+#endif
+
+ /* Do a global TLB flush so everyone sees the changes. */
+ flush_tlb_all();
+}
diff --git a/arch/tile/mm/migrate.h b/arch/tile/mm/migrate.h
new file mode 100644
index 00000000..cd45a083
--- /dev/null
+++ b/arch/tile/mm/migrate.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ *
+ * Structure definitions for migration, exposed here for use by
+ * arch/tile/kernel/asm-offsets.c.
+ */
+
+#ifndef MM_MIGRATE_H
+#define MM_MIGRATE_H
+
+#include <linux/cpumask.h>
+#include <hv/hypervisor.h>
+
+/*
+ * This function is used as a helper when setting up the initial
+ * page table (swapper_pg_dir).
+ */
+extern int flush_and_install_context(HV_PhysAddr page_table, HV_PTE access,
+ HV_ASID asid,
+ const unsigned long *cpumask);
+
+/*
+ * This function supports migration as a "helper" as follows:
+ *
+ * - Set the stack PTE itself to "migrating".
+ * - Do a global TLB flush for (va,length) and the specified ASIDs.
+ * - Do a cache-evict on all necessary cpus.
+ * - Write the new stack PTE.
+ *
+ * Note that any non-NULL pointers must not point to the page that
+ * is handled by the stack_pte itself.
+ */
+extern int homecache_migrate_stack_and_flush(pte_t stack_pte, unsigned long va,
+ size_t length, pte_t *stack_ptep,
+ const struct cpumask *cache_cpumask,
+ const struct cpumask *tlb_cpumask,
+ HV_Remote_ASID *asids,
+ int asidcount);
+
+#endif /* MM_MIGRATE_H */
diff --git a/arch/tile/mm/migrate_32.S b/arch/tile/mm/migrate_32.S
new file mode 100644
index 00000000..ac01a7cd
--- /dev/null
+++ b/arch/tile/mm/migrate_32.S
@@ -0,0 +1,212 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ *
+ * This routine is a helper for migrating the home of a set of pages to
+ * a new cpu. See the documentation in homecache.c for more information.
+ */
+
+#include <linux/linkage.h>
+#include <linux/threads.h>
+#include <asm/page.h>
+#include <asm/thread_info.h>
+#include <asm/types.h>
+#include <asm/asm-offsets.h>
+#include <hv/hypervisor.h>
+
+ .text
+
+/*
+ * First, some definitions that apply to all the code in the file.
+ */
+
+/* Locals (caller-save) */
+#define r_tmp r10
+#define r_save_sp r11
+
+/* What we save where in the stack frame; must include all callee-saves. */
+#define FRAME_SP 4
+#define FRAME_R30 8
+#define FRAME_R31 12
+#define FRAME_R32 16
+#define FRAME_R33 20
+#define FRAME_R34 24
+#define FRAME_R35 28
+#define FRAME_SIZE 32
+
+
+
+
+/*
+ * On entry:
+ *
+ * r0 low word of the new context PA to install (moved to r_context_lo)
+ * r1 high word of the new context PA to install (moved to r_context_hi)
+ * r2 low word of PTE to use for context access (moved to r_access_lo)
+ * r3 high word of PTE to use for context access (moved to r_access_lo)
+ * r4 ASID to use for new context (moved to r_asid)
+ * r5 pointer to cpumask with just this cpu set in it (r_my_cpumask)
+ */
+
+/* Arguments (caller-save) */
+#define r_context_lo_in r0
+#define r_context_hi_in r1
+#define r_access_lo_in r2
+#define r_access_hi_in r3
+#define r_asid_in r4
+#define r_my_cpumask r5
+
+/* Locals (callee-save); must not be more than FRAME_xxx above. */
+#define r_save_ics r30
+#define r_context_lo r31
+#define r_context_hi r32
+#define r_access_lo r33
+#define r_access_hi r34
+#define r_asid r35
+
+STD_ENTRY(flush_and_install_context)
+ /*
+ * Create a stack frame; we can't touch it once we flush the
+ * cache until we install the new page table and flush the TLB.
+ */
+ {
+ move r_save_sp, sp
+ sw sp, lr
+ addi sp, sp, -FRAME_SIZE
+ }
+ addi r_tmp, sp, FRAME_SP
+ {
+ sw r_tmp, r_save_sp
+ addi r_tmp, sp, FRAME_R30
+ }
+ {
+ sw r_tmp, r30
+ addi r_tmp, sp, FRAME_R31
+ }
+ {
+ sw r_tmp, r31
+ addi r_tmp, sp, FRAME_R32
+ }
+ {
+ sw r_tmp, r32
+ addi r_tmp, sp, FRAME_R33
+ }
+ {
+ sw r_tmp, r33
+ addi r_tmp, sp, FRAME_R34
+ }
+ {
+ sw r_tmp, r34
+ addi r_tmp, sp, FRAME_R35
+ }
+ sw r_tmp, r35
+
+ /* Move some arguments to callee-save registers. */
+ {
+ move r_context_lo, r_context_lo_in
+ move r_context_hi, r_context_hi_in
+ }
+ {
+ move r_access_lo, r_access_lo_in
+ move r_access_hi, r_access_hi_in
+ }
+ move r_asid, r_asid_in
+
+ /* Disable interrupts, since we can't use our stack. */
+ {
+ mfspr r_save_ics, INTERRUPT_CRITICAL_SECTION
+ movei r_tmp, 1
+ }
+ mtspr INTERRUPT_CRITICAL_SECTION, r_tmp
+
+ /* First, flush our L2 cache. */
+ {
+ move r0, zero /* cache_pa */
+ move r1, zero
+ }
+ {
+ auli r2, zero, ha16(HV_FLUSH_EVICT_L2) /* cache_control */
+ move r3, r_my_cpumask /* cache_cpumask */
+ }
+ {
+ move r4, zero /* tlb_va */
+ move r5, zero /* tlb_length */
+ }
+ {
+ move r6, zero /* tlb_pgsize */
+ move r7, zero /* tlb_cpumask */
+ }
+ {
+ move r8, zero /* asids */
+ move r9, zero /* asidcount */
+ }
+ jal hv_flush_remote
+ bnz r0, .Ldone
+
+ /* Now install the new page table. */
+ {
+ move r0, r_context_lo
+ move r1, r_context_hi
+ }
+ {
+ move r2, r_access_lo
+ move r3, r_access_hi
+ }
+ {
+ move r4, r_asid
+ movei r5, HV_CTX_DIRECTIO
+ }
+ jal hv_install_context
+ bnz r0, .Ldone
+
+ /* Finally, flush the TLB. */
+ {
+ movei r0, 0 /* preserve_global */
+ jal hv_flush_all
+ }
+
+.Ldone:
+ /* Reset interrupts back how they were before. */
+ mtspr INTERRUPT_CRITICAL_SECTION, r_save_ics
+
+ /* Restore the callee-saved registers and return. */
+ addli lr, sp, FRAME_SIZE
+ {
+ lw lr, lr
+ addli r_tmp, sp, FRAME_R30
+ }
+ {
+ lw r30, r_tmp
+ addli r_tmp, sp, FRAME_R31
+ }
+ {
+ lw r31, r_tmp
+ addli r_tmp, sp, FRAME_R32
+ }
+ {
+ lw r32, r_tmp
+ addli r_tmp, sp, FRAME_R33
+ }
+ {
+ lw r33, r_tmp
+ addli r_tmp, sp, FRAME_R34
+ }
+ {
+ lw r34, r_tmp
+ addli r_tmp, sp, FRAME_R35
+ }
+ {
+ lw r35, r_tmp
+ addi sp, sp, FRAME_SIZE
+ }
+ jrp lr
+ STD_ENDPROC(flush_and_install_context)
diff --git a/arch/tile/mm/migrate_64.S b/arch/tile/mm/migrate_64.S
new file mode 100644
index 00000000..e76fea68
--- /dev/null
+++ b/arch/tile/mm/migrate_64.S
@@ -0,0 +1,187 @@
+/*
+ * Copyright 2011 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ *
+ * This routine is a helper for migrating the home of a set of pages to
+ * a new cpu. See the documentation in homecache.c for more information.
+ */
+
+#include <linux/linkage.h>
+#include <linux/threads.h>
+#include <asm/page.h>
+#include <asm/thread_info.h>
+#include <asm/types.h>
+#include <asm/asm-offsets.h>
+#include <hv/hypervisor.h>
+
+ .text
+
+/*
+ * First, some definitions that apply to all the code in the file.
+ */
+
+/* Locals (caller-save) */
+#define r_tmp r10
+#define r_save_sp r11
+
+/* What we save where in the stack frame; must include all callee-saves. */
+#define FRAME_SP 8
+#define FRAME_R30 16
+#define FRAME_R31 24
+#define FRAME_R32 32
+#define FRAME_R33 40
+#define FRAME_SIZE 48
+
+
+
+
+/*
+ * On entry:
+ *
+ * r0 the new context PA to install (moved to r_context)
+ * r1 PTE to use for context access (moved to r_access)
+ * r2 ASID to use for new context (moved to r_asid)
+ * r3 pointer to cpumask with just this cpu set in it (r_my_cpumask)
+ */
+
+/* Arguments (caller-save) */
+#define r_context_in r0
+#define r_access_in r1
+#define r_asid_in r2
+#define r_my_cpumask r3
+
+/* Locals (callee-save); must not be more than FRAME_xxx above. */
+#define r_save_ics r30
+#define r_context r31
+#define r_access r32
+#define r_asid r33
+
+/*
+ * Caller-save locals and frame constants are the same as
+ * for homecache_migrate_stack_and_flush.
+ */
+
+STD_ENTRY(flush_and_install_context)
+ /*
+ * Create a stack frame; we can't touch it once we flush the
+ * cache until we install the new page table and flush the TLB.
+ */
+ {
+ move r_save_sp, sp
+ st sp, lr
+ addi sp, sp, -FRAME_SIZE
+ }
+ addi r_tmp, sp, FRAME_SP
+ {
+ st r_tmp, r_save_sp
+ addi r_tmp, sp, FRAME_R30
+ }
+ {
+ st r_tmp, r30
+ addi r_tmp, sp, FRAME_R31
+ }
+ {
+ st r_tmp, r31
+ addi r_tmp, sp, FRAME_R32
+ }
+ {
+ st r_tmp, r32
+ addi r_tmp, sp, FRAME_R33
+ }
+ st r_tmp, r33
+
+ /* Move some arguments to callee-save registers. */
+ {
+ move r_context, r_context_in
+ move r_access, r_access_in
+ }
+ move r_asid, r_asid_in
+
+ /* Disable interrupts, since we can't use our stack. */
+ {
+ mfspr r_save_ics, INTERRUPT_CRITICAL_SECTION
+ movei r_tmp, 1
+ }
+ mtspr INTERRUPT_CRITICAL_SECTION, r_tmp
+
+ /* First, flush our L2 cache. */
+ {
+ move r0, zero /* cache_pa */
+ moveli r1, hw2_last(HV_FLUSH_EVICT_L2) /* cache_control */
+ }
+ {
+ shl16insli r1, r1, hw1(HV_FLUSH_EVICT_L2)
+ move r2, r_my_cpumask /* cache_cpumask */
+ }
+ {
+ shl16insli r1, r1, hw0(HV_FLUSH_EVICT_L2)
+ move r3, zero /* tlb_va */
+ }
+ {
+ move r4, zero /* tlb_length */
+ move r5, zero /* tlb_pgsize */
+ }
+ {
+ move r6, zero /* tlb_cpumask */
+ move r7, zero /* asids */
+ }
+ {
+ move r8, zero /* asidcount */
+ jal hv_flush_remote
+ }
+ bnez r0, 1f
+
+ /* Now install the new page table. */
+ {
+ move r0, r_context
+ move r1, r_access
+ }
+ {
+ move r2, r_asid
+ movei r3, HV_CTX_DIRECTIO
+ }
+ jal hv_install_context
+ bnez r0, 1f
+
+ /* Finally, flush the TLB. */
+ {
+ movei r0, 0 /* preserve_global */
+ jal hv_flush_all
+ }
+
+1: /* Reset interrupts back how they were before. */
+ mtspr INTERRUPT_CRITICAL_SECTION, r_save_ics
+
+ /* Restore the callee-saved registers and return. */
+ addli lr, sp, FRAME_SIZE
+ {
+ ld lr, lr
+ addli r_tmp, sp, FRAME_R30
+ }
+ {
+ ld r30, r_tmp
+ addli r_tmp, sp, FRAME_R31
+ }
+ {
+ ld r31, r_tmp
+ addli r_tmp, sp, FRAME_R32
+ }
+ {
+ ld r32, r_tmp
+ addli r_tmp, sp, FRAME_R33
+ }
+ {
+ ld r33, r_tmp
+ addi sp, sp, FRAME_SIZE
+ }
+ jrp lr
+ STD_ENDPROC(flush_and_install_context)
diff --git a/arch/tile/mm/mmap.c b/arch/tile/mm/mmap.c
new file mode 100644
index 00000000..f96f4cec
--- /dev/null
+++ b/arch/tile/mm/mmap.c
@@ -0,0 +1,75 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ *
+ * Taken from the i386 architecture and simplified.
+ */
+
+#include <linux/mm.h>
+#include <linux/random.h>
+#include <linux/limits.h>
+#include <linux/sched.h>
+#include <linux/mman.h>
+#include <linux/compat.h>
+
+/*
+ * Top of mmap area (just below the process stack).
+ *
+ * Leave an at least ~128 MB hole.
+ */
+#define MIN_GAP (128*1024*1024)
+#define MAX_GAP (TASK_SIZE/6*5)
+
+static inline unsigned long mmap_base(struct mm_struct *mm)
+{
+ unsigned long gap = rlimit(RLIMIT_STACK);
+ unsigned long random_factor = 0;
+
+ if (current->flags & PF_RANDOMIZE)
+ random_factor = get_random_int() % (1024*1024);
+
+ if (gap < MIN_GAP)
+ gap = MIN_GAP;
+ else if (gap > MAX_GAP)
+ gap = MAX_GAP;
+
+ return PAGE_ALIGN(TASK_SIZE - gap - random_factor);
+}
+
+/*
+ * This function, called very early during the creation of a new
+ * process VM image, sets up which VM layout function to use:
+ */
+void arch_pick_mmap_layout(struct mm_struct *mm)
+{
+#if !defined(__tilegx__)
+ int is_32bit = 1;
+#elif defined(CONFIG_COMPAT)
+ int is_32bit = is_compat_task();
+#else
+ int is_32bit = 0;
+#endif
+
+ /*
+ * Use standard layout if the expected stack growth is unlimited
+ * or we are running native 64 bits.
+ */
+ if (!is_32bit || rlimit(RLIMIT_STACK) == RLIM_INFINITY) {
+ mm->mmap_base = TASK_UNMAPPED_BASE;
+ mm->get_unmapped_area = arch_get_unmapped_area;
+ mm->unmap_area = arch_unmap_area;
+ } else {
+ mm->mmap_base = mmap_base(mm);
+ mm->get_unmapped_area = arch_get_unmapped_area_topdown;
+ mm->unmap_area = arch_unmap_area_topdown;
+ }
+}
diff --git a/arch/tile/mm/pgtable.c b/arch/tile/mm/pgtable.c
new file mode 100644
index 00000000..2410aa89
--- /dev/null
+++ b/arch/tile/mm/pgtable.c
@@ -0,0 +1,639 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/highmem.h>
+#include <linux/slab.h>
+#include <linux/pagemap.h>
+#include <linux/spinlock.h>
+#include <linux/cpumask.h>
+#include <linux/module.h>
+#include <linux/io.h>
+#include <linux/vmalloc.h>
+#include <linux/smp.h>
+
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/fixmap.h>
+#include <asm/tlb.h>
+#include <asm/tlbflush.h>
+#include <asm/homecache.h>
+
+#define K(x) ((x) << (PAGE_SHIFT-10))
+
+/*
+ * The normal show_free_areas() is too verbose on Tile, with dozens
+ * of processors and often four NUMA zones each with high and lowmem.
+ */
+void show_mem(unsigned int filter)
+{
+ struct zone *zone;
+
+ pr_err("Active:%lu inactive:%lu dirty:%lu writeback:%lu unstable:%lu"
+ " free:%lu\n slab:%lu mapped:%lu pagetables:%lu bounce:%lu"
+ " pagecache:%lu swap:%lu\n",
+ (global_page_state(NR_ACTIVE_ANON) +
+ global_page_state(NR_ACTIVE_FILE)),
+ (global_page_state(NR_INACTIVE_ANON) +
+ global_page_state(NR_INACTIVE_FILE)),
+ global_page_state(NR_FILE_DIRTY),
+ global_page_state(NR_WRITEBACK),
+ global_page_state(NR_UNSTABLE_NFS),
+ global_page_state(NR_FREE_PAGES),
+ (global_page_state(NR_SLAB_RECLAIMABLE) +
+ global_page_state(NR_SLAB_UNRECLAIMABLE)),
+ global_page_state(NR_FILE_MAPPED),
+ global_page_state(NR_PAGETABLE),
+ global_page_state(NR_BOUNCE),
+ global_page_state(NR_FILE_PAGES),
+ nr_swap_pages);
+
+ for_each_zone(zone) {
+ unsigned long flags, order, total = 0, largest_order = -1;
+
+ if (!populated_zone(zone))
+ continue;
+
+ spin_lock_irqsave(&zone->lock, flags);
+ for (order = 0; order < MAX_ORDER; order++) {
+ int nr = zone->free_area[order].nr_free;
+ total += nr << order;
+ if (nr)
+ largest_order = order;
+ }
+ spin_unlock_irqrestore(&zone->lock, flags);
+ pr_err("Node %d %7s: %lukB (largest %luKb)\n",
+ zone_to_nid(zone), zone->name,
+ K(total), largest_order ? K(1UL) << largest_order : 0);
+ }
+}
+
+/*
+ * Associate a virtual page frame with a given physical page frame
+ * and protection flags for that frame.
+ */
+static void set_pte_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags)
+{
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+
+ pgd = swapper_pg_dir + pgd_index(vaddr);
+ if (pgd_none(*pgd)) {
+ BUG();
+ return;
+ }
+ pud = pud_offset(pgd, vaddr);
+ if (pud_none(*pud)) {
+ BUG();
+ return;
+ }
+ pmd = pmd_offset(pud, vaddr);
+ if (pmd_none(*pmd)) {
+ BUG();
+ return;
+ }
+ pte = pte_offset_kernel(pmd, vaddr);
+ /* <pfn,flags> stored as-is, to permit clearing entries */
+ set_pte(pte, pfn_pte(pfn, flags));
+
+ /*
+ * It's enough to flush this one mapping.
+ * This appears conservative since it is only called
+ * from __set_fixmap.
+ */
+ local_flush_tlb_page(NULL, vaddr, PAGE_SIZE);
+}
+
+void __set_fixmap(enum fixed_addresses idx, unsigned long phys, pgprot_t flags)
+{
+ unsigned long address = __fix_to_virt(idx);
+
+ if (idx >= __end_of_fixed_addresses) {
+ BUG();
+ return;
+ }
+ set_pte_pfn(address, phys >> PAGE_SHIFT, flags);
+}
+
+#if defined(CONFIG_HIGHPTE)
+pte_t *_pte_offset_map(pmd_t *dir, unsigned long address)
+{
+ pte_t *pte = kmap_atomic(pmd_page(*dir)) +
+ (pmd_ptfn(*dir) << HV_LOG2_PAGE_TABLE_ALIGN) & ~PAGE_MASK;
+ return &pte[pte_index(address)];
+}
+#endif
+
+/**
+ * shatter_huge_page() - ensure a given address is mapped by a small page.
+ *
+ * This function converts a huge PTE mapping kernel LOWMEM into a bunch
+ * of small PTEs with the same caching. No cache flush required, but we
+ * must do a global TLB flush.
+ *
+ * Any caller that wishes to modify a kernel mapping that might
+ * have been made with a huge page should call this function,
+ * since doing so properly avoids race conditions with installing the
+ * newly-shattered page and then flushing all the TLB entries.
+ *
+ * @addr: Address at which to shatter any existing huge page.
+ */
+void shatter_huge_page(unsigned long addr)
+{
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ unsigned long flags = 0; /* happy compiler */
+#ifdef __PAGETABLE_PMD_FOLDED
+ struct list_head *pos;
+#endif
+
+ /* Get a pointer to the pmd entry that we need to change. */
+ addr &= HPAGE_MASK;
+ BUG_ON(pgd_addr_invalid(addr));
+ BUG_ON(addr < PAGE_OFFSET); /* only for kernel LOWMEM */
+ pgd = swapper_pg_dir + pgd_index(addr);
+ pud = pud_offset(pgd, addr);
+ BUG_ON(!pud_present(*pud));
+ pmd = pmd_offset(pud, addr);
+ BUG_ON(!pmd_present(*pmd));
+ if (!pmd_huge_page(*pmd))
+ return;
+
+ spin_lock_irqsave(&init_mm.page_table_lock, flags);
+ if (!pmd_huge_page(*pmd)) {
+ /* Lost the race to convert the huge page. */
+ spin_unlock_irqrestore(&init_mm.page_table_lock, flags);
+ return;
+ }
+
+ /* Shatter the huge page into the preallocated L2 page table. */
+ pmd_populate_kernel(&init_mm, pmd,
+ get_prealloc_pte(pte_pfn(*(pte_t *)pmd)));
+
+#ifdef __PAGETABLE_PMD_FOLDED
+ /* Walk every pgd on the system and update the pmd there. */
+ spin_lock(&pgd_lock);
+ list_for_each(pos, &pgd_list) {
+ pmd_t *copy_pmd;
+ pgd = list_to_pgd(pos) + pgd_index(addr);
+ pud = pud_offset(pgd, addr);
+ copy_pmd = pmd_offset(pud, addr);
+ __set_pmd(copy_pmd, *pmd);
+ }
+ spin_unlock(&pgd_lock);
+#endif
+
+ /* Tell every cpu to notice the change. */
+ flush_remote(0, 0, NULL, addr, HPAGE_SIZE, HPAGE_SIZE,
+ cpu_possible_mask, NULL, 0);
+
+ /* Hold the lock until the TLB flush is finished to avoid races. */
+ spin_unlock_irqrestore(&init_mm.page_table_lock, flags);
+}
+
+/*
+ * List of all pgd's needed so it can invalidate entries in both cached
+ * and uncached pgd's. This is essentially codepath-based locking
+ * against pageattr.c; it is the unique case in which a valid change
+ * of kernel pagetables can't be lazily synchronized by vmalloc faults.
+ * vmalloc faults work because attached pagetables are never freed.
+ *
+ * The lock is always taken with interrupts disabled, unlike on x86
+ * and other platforms, because we need to take the lock in
+ * shatter_huge_page(), which may be called from an interrupt context.
+ * We are not at risk from the tlbflush IPI deadlock that was seen on
+ * x86, since we use the flush_remote() API to have the hypervisor do
+ * the TLB flushes regardless of irq disabling.
+ */
+DEFINE_SPINLOCK(pgd_lock);
+LIST_HEAD(pgd_list);
+
+static inline void pgd_list_add(pgd_t *pgd)
+{
+ list_add(pgd_to_list(pgd), &pgd_list);
+}
+
+static inline void pgd_list_del(pgd_t *pgd)
+{
+ list_del(pgd_to_list(pgd));
+}
+
+#define KERNEL_PGD_INDEX_START pgd_index(PAGE_OFFSET)
+#define KERNEL_PGD_PTRS (PTRS_PER_PGD - KERNEL_PGD_INDEX_START)
+
+static void pgd_ctor(pgd_t *pgd)
+{
+ unsigned long flags;
+
+ memset(pgd, 0, KERNEL_PGD_INDEX_START*sizeof(pgd_t));
+ spin_lock_irqsave(&pgd_lock, flags);
+
+#ifndef __tilegx__
+ /*
+ * Check that the user interrupt vector has no L2.
+ * It never should for the swapper, and new page tables
+ * should always start with an empty user interrupt vector.
+ */
+ BUG_ON(((u64 *)swapper_pg_dir)[pgd_index(MEM_USER_INTRPT)] != 0);
+#endif
+
+ memcpy(pgd + KERNEL_PGD_INDEX_START,
+ swapper_pg_dir + KERNEL_PGD_INDEX_START,
+ KERNEL_PGD_PTRS * sizeof(pgd_t));
+
+ pgd_list_add(pgd);
+ spin_unlock_irqrestore(&pgd_lock, flags);
+}
+
+static void pgd_dtor(pgd_t *pgd)
+{
+ unsigned long flags; /* can be called from interrupt context */
+
+ spin_lock_irqsave(&pgd_lock, flags);
+ pgd_list_del(pgd);
+ spin_unlock_irqrestore(&pgd_lock, flags);
+}
+
+pgd_t *pgd_alloc(struct mm_struct *mm)
+{
+ pgd_t *pgd = kmem_cache_alloc(pgd_cache, GFP_KERNEL);
+ if (pgd)
+ pgd_ctor(pgd);
+ return pgd;
+}
+
+void pgd_free(struct mm_struct *mm, pgd_t *pgd)
+{
+ pgd_dtor(pgd);
+ kmem_cache_free(pgd_cache, pgd);
+}
+
+
+#define L2_USER_PGTABLE_PAGES (1 << L2_USER_PGTABLE_ORDER)
+
+struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
+{
+ gfp_t flags = GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO;
+ struct page *p;
+#if L2_USER_PGTABLE_ORDER > 0
+ int i;
+#endif
+
+#ifdef CONFIG_HIGHPTE
+ flags |= __GFP_HIGHMEM;
+#endif
+
+ p = alloc_pages(flags, L2_USER_PGTABLE_ORDER);
+ if (p == NULL)
+ return NULL;
+
+#if L2_USER_PGTABLE_ORDER > 0
+ /*
+ * Make every page have a page_count() of one, not just the first.
+ * We don't use __GFP_COMP since it doesn't look like it works
+ * correctly with tlb_remove_page().
+ */
+ for (i = 1; i < L2_USER_PGTABLE_PAGES; ++i) {
+ init_page_count(p+i);
+ inc_zone_page_state(p+i, NR_PAGETABLE);
+ }
+#endif
+
+ pgtable_page_ctor(p);
+ return p;
+}
+
+/*
+ * Free page immediately (used in __pte_alloc if we raced with another
+ * process). We have to correct whatever pte_alloc_one() did before
+ * returning the pages to the allocator.
+ */
+void pte_free(struct mm_struct *mm, struct page *p)
+{
+ int i;
+
+ pgtable_page_dtor(p);
+ __free_page(p);
+
+ for (i = 1; i < L2_USER_PGTABLE_PAGES; ++i) {
+ __free_page(p+i);
+ dec_zone_page_state(p+i, NR_PAGETABLE);
+ }
+}
+
+void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte,
+ unsigned long address)
+{
+ int i;
+
+ pgtable_page_dtor(pte);
+ tlb_remove_page(tlb, pte);
+
+ for (i = 1; i < L2_USER_PGTABLE_PAGES; ++i) {
+ tlb_remove_page(tlb, pte + i);
+ dec_zone_page_state(pte + i, NR_PAGETABLE);
+ }
+}
+
+#ifndef __tilegx__
+
+/*
+ * FIXME: needs to be atomic vs hypervisor writes. For now we make the
+ * window of vulnerability a bit smaller by doing an unlocked 8-bit update.
+ */
+int ptep_test_and_clear_young(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep)
+{
+#if HV_PTE_INDEX_ACCESSED < 8 || HV_PTE_INDEX_ACCESSED >= 16
+# error Code assumes HV_PTE "accessed" bit in second byte
+#endif
+ u8 *tmp = (u8 *)ptep;
+ u8 second_byte = tmp[1];
+ if (!(second_byte & (1 << (HV_PTE_INDEX_ACCESSED - 8))))
+ return 0;
+ tmp[1] = second_byte & ~(1 << (HV_PTE_INDEX_ACCESSED - 8));
+ return 1;
+}
+
+/*
+ * This implementation is atomic vs hypervisor writes, since the hypervisor
+ * always writes the low word (where "accessed" and "dirty" are) and this
+ * routine only writes the high word.
+ */
+void ptep_set_wrprotect(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep)
+{
+#if HV_PTE_INDEX_WRITABLE < 32
+# error Code assumes HV_PTE "writable" bit in high word
+#endif
+ u32 *tmp = (u32 *)ptep;
+ tmp[1] = tmp[1] & ~(1 << (HV_PTE_INDEX_WRITABLE - 32));
+}
+
+#endif
+
+pte_t *virt_to_pte(struct mm_struct* mm, unsigned long addr)
+{
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+
+ if (pgd_addr_invalid(addr))
+ return NULL;
+
+ pgd = mm ? pgd_offset(mm, addr) : swapper_pg_dir + pgd_index(addr);
+ pud = pud_offset(pgd, addr);
+ if (!pud_present(*pud))
+ return NULL;
+ pmd = pmd_offset(pud, addr);
+ if (pmd_huge_page(*pmd))
+ return (pte_t *)pmd;
+ if (!pmd_present(*pmd))
+ return NULL;
+ return pte_offset_kernel(pmd, addr);
+}
+
+pgprot_t set_remote_cache_cpu(pgprot_t prot, int cpu)
+{
+ unsigned int width = smp_width;
+ int x = cpu % width;
+ int y = cpu / width;
+ BUG_ON(y >= smp_height);
+ BUG_ON(hv_pte_get_mode(prot) != HV_PTE_MODE_CACHE_TILE_L3);
+ BUG_ON(cpu < 0 || cpu >= NR_CPUS);
+ BUG_ON(!cpu_is_valid_lotar(cpu));
+ return hv_pte_set_lotar(prot, HV_XY_TO_LOTAR(x, y));
+}
+
+int get_remote_cache_cpu(pgprot_t prot)
+{
+ HV_LOTAR lotar = hv_pte_get_lotar(prot);
+ int x = HV_LOTAR_X(lotar);
+ int y = HV_LOTAR_Y(lotar);
+ BUG_ON(hv_pte_get_mode(prot) != HV_PTE_MODE_CACHE_TILE_L3);
+ return x + y * smp_width;
+}
+
+/*
+ * Convert a kernel VA to a PA and homing information.
+ */
+int va_to_cpa_and_pte(void *va, unsigned long long *cpa, pte_t *pte)
+{
+ struct page *page = virt_to_page(va);
+ pte_t null_pte = { 0 };
+
+ *cpa = __pa(va);
+
+ /* Note that this is not writing a page table, just returning a pte. */
+ *pte = pte_set_home(null_pte, page_home(page));
+
+ return 0; /* return non-zero if not hfh? */
+}
+EXPORT_SYMBOL(va_to_cpa_and_pte);
+
+void __set_pte(pte_t *ptep, pte_t pte)
+{
+#ifdef __tilegx__
+ *ptep = pte;
+#else
+# if HV_PTE_INDEX_PRESENT >= 32 || HV_PTE_INDEX_MIGRATING >= 32
+# error Must write the present and migrating bits last
+# endif
+ if (pte_present(pte)) {
+ ((u32 *)ptep)[1] = (u32)(pte_val(pte) >> 32);
+ barrier();
+ ((u32 *)ptep)[0] = (u32)(pte_val(pte));
+ } else {
+ ((u32 *)ptep)[0] = (u32)(pte_val(pte));
+ barrier();
+ ((u32 *)ptep)[1] = (u32)(pte_val(pte) >> 32);
+ }
+#endif /* __tilegx__ */
+}
+
+void set_pte(pte_t *ptep, pte_t pte)
+{
+ if (pte_present(pte) &&
+ (!CHIP_HAS_MMIO() || hv_pte_get_mode(pte) != HV_PTE_MODE_MMIO)) {
+ /* The PTE actually references physical memory. */
+ unsigned long pfn = pte_pfn(pte);
+ if (pfn_valid(pfn)) {
+ /* Update the home of the PTE from the struct page. */
+ pte = pte_set_home(pte, page_home(pfn_to_page(pfn)));
+ } else if (hv_pte_get_mode(pte) == 0) {
+ /* remap_pfn_range(), etc, must supply PTE mode. */
+ panic("set_pte(): out-of-range PFN and mode 0\n");
+ }
+ }
+
+ __set_pte(ptep, pte);
+}
+
+/* Can this mm load a PTE with cached_priority set? */
+static inline int mm_is_priority_cached(struct mm_struct *mm)
+{
+ return mm->context.priority_cached;
+}
+
+/*
+ * Add a priority mapping to an mm_context and
+ * notify the hypervisor if this is the first one.
+ */
+void start_mm_caching(struct mm_struct *mm)
+{
+ if (!mm_is_priority_cached(mm)) {
+ mm->context.priority_cached = -1U;
+ hv_set_caching(-1U);
+ }
+}
+
+/*
+ * Validate and return the priority_cached flag. We know if it's zero
+ * that we don't need to scan, since we immediately set it non-zero
+ * when we first consider a MAP_CACHE_PRIORITY mapping.
+ *
+ * We only _try_ to acquire the mmap_sem semaphore; if we can't acquire it,
+ * since we're in an interrupt context (servicing switch_mm) we don't
+ * worry about it and don't unset the "priority_cached" field.
+ * Presumably we'll come back later and have more luck and clear
+ * the value then; for now we'll just keep the cache marked for priority.
+ */
+static unsigned int update_priority_cached(struct mm_struct *mm)
+{
+ if (mm->context.priority_cached && down_write_trylock(&mm->mmap_sem)) {
+ struct vm_area_struct *vm;
+ for (vm = mm->mmap; vm; vm = vm->vm_next) {
+ if (hv_pte_get_cached_priority(vm->vm_page_prot))
+ break;
+ }
+ if (vm == NULL)
+ mm->context.priority_cached = 0;
+ up_write(&mm->mmap_sem);
+ }
+ return mm->context.priority_cached;
+}
+
+/* Set caching correctly for an mm that we are switching to. */
+void check_mm_caching(struct mm_struct *prev, struct mm_struct *next)
+{
+ if (!mm_is_priority_cached(next)) {
+ /*
+ * If the new mm doesn't use priority caching, just see if we
+ * need the hv_set_caching(), or can assume it's already zero.
+ */
+ if (mm_is_priority_cached(prev))
+ hv_set_caching(0);
+ } else {
+ hv_set_caching(update_priority_cached(next));
+ }
+}
+
+#if CHIP_HAS_MMIO()
+
+/* Map an arbitrary MMIO address, homed according to pgprot, into VA space. */
+void __iomem *ioremap_prot(resource_size_t phys_addr, unsigned long size,
+ pgprot_t home)
+{
+ void *addr;
+ struct vm_struct *area;
+ unsigned long offset, last_addr;
+ pgprot_t pgprot;
+
+ /* Don't allow wraparound or zero size */
+ last_addr = phys_addr + size - 1;
+ if (!size || last_addr < phys_addr)
+ return NULL;
+
+ /* Create a read/write, MMIO VA mapping homed at the requested shim. */
+ pgprot = PAGE_KERNEL;
+ pgprot = hv_pte_set_mode(pgprot, HV_PTE_MODE_MMIO);
+ pgprot = hv_pte_set_lotar(pgprot, hv_pte_get_lotar(home));
+
+ /*
+ * Mappings have to be page-aligned
+ */
+ offset = phys_addr & ~PAGE_MASK;
+ phys_addr &= PAGE_MASK;
+ size = PAGE_ALIGN(last_addr+1) - phys_addr;
+
+ /*
+ * Ok, go for it..
+ */
+ area = get_vm_area(size, VM_IOREMAP /* | other flags? */);
+ if (!area)
+ return NULL;
+ area->phys_addr = phys_addr;
+ addr = area->addr;
+ if (ioremap_page_range((unsigned long)addr, (unsigned long)addr + size,
+ phys_addr, pgprot)) {
+ remove_vm_area((void *)(PAGE_MASK & (unsigned long) addr));
+ return NULL;
+ }
+ return (__force void __iomem *) (offset + (char *)addr);
+}
+EXPORT_SYMBOL(ioremap_prot);
+
+/* Map a PCI MMIO bus address into VA space. */
+void __iomem *ioremap(resource_size_t phys_addr, unsigned long size)
+{
+ panic("ioremap for PCI MMIO is not supported");
+}
+EXPORT_SYMBOL(ioremap);
+
+/* Unmap an MMIO VA mapping. */
+void iounmap(volatile void __iomem *addr_in)
+{
+ volatile void __iomem *addr = (volatile void __iomem *)
+ (PAGE_MASK & (unsigned long __force)addr_in);
+#if 1
+ vunmap((void * __force)addr);
+#else
+ /* x86 uses this complicated flow instead of vunmap(). Is
+ * there any particular reason we should do the same? */
+ struct vm_struct *p, *o;
+
+ /* Use the vm area unlocked, assuming the caller
+ ensures there isn't another iounmap for the same address
+ in parallel. Reuse of the virtual address is prevented by
+ leaving it in the global lists until we're done with it.
+ cpa takes care of the direct mappings. */
+ read_lock(&vmlist_lock);
+ for (p = vmlist; p; p = p->next) {
+ if (p->addr == addr)
+ break;
+ }
+ read_unlock(&vmlist_lock);
+
+ if (!p) {
+ pr_err("iounmap: bad address %p\n", addr);
+ dump_stack();
+ return;
+ }
+
+ /* Finally remove it */
+ o = remove_vm_area((void *)addr);
+ BUG_ON(p != o || o == NULL);
+ kfree(p);
+#endif
+}
+EXPORT_SYMBOL(iounmap);
+
+#endif /* CHIP_HAS_MMIO() */