Moved, renamed, and deleted files

The original directory structure was scattered and unorganized. Changes are basically to make it look like kernel structure.
author: Srikant Patnaik 2015-01-11 12:28:04 +0530
committer: Srikant Patnaik 2015-01-11 12:28:04 +0530
commit: 871480933a1c28f8a9fed4c4d34d06c439a7a422 (patch)
tree: 8718f573808810c2a1e8cb8fb6ac469093ca2784 /arch/tile/include
parent: 9d40ac5867b9aefe0722bc1f110b965ff294d30d (diff)
download: FOSSEE-netbook-kernel-source-871480933a1c28f8a9fed4c4d34d06c439a7a422.tar.gz
FOSSEE-netbook-kernel-source-871480933a1c28f8a9fed4c4d34d06c439a7a422.tar.bz2
FOSSEE-netbook-kernel-source-871480933a1c28f8a9fed4c4d34d06c439a7a422.zip
115 files changed, 22427 insertions, 0 deletions
diff --git a/arch/tile/include/arch/Kbuild b/arch/tile/include/arch/Kbuild
new file mode 100644
index 00000000..9c0ea24c
--- /dev/null
+++ b/arch/tile/include/arch/Kbuild
@@ -0,0 +1,17 @@
+header-y += abi.h
+header-y += chip.h
+header-y += chip_tile64.h
+header-y += chip_tilegx.h
+header-y += chip_tilepro.h
+header-y += icache.h
+header-y += interrupts.h
+header-y += interrupts_32.h
+header-y += interrupts_64.h
+header-y += opcode.h
+header-y += opcode_tilegx.h
+header-y += opcode_tilepro.h
+header-y += sim.h
+header-y += sim_def.h
+header-y += spr_def.h
+header-y += spr_def_32.h
+header-y += spr_def_64.h
diff --git a/arch/tile/include/arch/abi.h b/arch/tile/include/arch/abi.h
new file mode 100644
index 00000000..c55a3d43
--- /dev/null
+++ b/arch/tile/include/arch/abi.h
@@ -0,0 +1,141 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+/**
+ * @file
+ *
+ * ABI-related register definitions.
+ */
+
+#ifndef __ARCH_ABI_H__
+
+#if !defined __need_int_reg_t && !defined __DOXYGEN__
+# define __ARCH_ABI_H__
+# include <arch/chip.h>
+#endif
+
+/* Provide the basic machine types. */
+#ifndef __INT_REG_BITS
+
+/** Number of bits in a register. */
+#if defined __tilegx__
+# define __INT_REG_BITS 64
+#elif defined __tilepro__
+# define __INT_REG_BITS 32
+#elif !defined __need_int_reg_t
+# include <arch/chip.h>
+# define __INT_REG_BITS CHIP_WORD_SIZE()
+#else
+# error Unrecognized architecture with __need_int_reg_t
+#endif
+
+#if __INT_REG_BITS == 64
+
+#ifndef __ASSEMBLER__
+/** Unsigned type that can hold a register. */
+typedef unsigned long long __uint_reg_t;
+
+/** Signed type that can hold a register. */
+typedef long long __int_reg_t;
+#endif
+
+/** String prefix to use for printf(). */
+#define __INT_REG_FMT "ll"
+
+#else
+
+#ifndef __ASSEMBLER__
+/** Unsigned type that can hold a register. */
+typedef unsigned long __uint_reg_t;
+
+/** Signed type that can hold a register. */
+typedef long __int_reg_t;
+#endif
+
+/** String prefix to use for printf(). */
+#define __INT_REG_FMT "l"
+
+#endif
+#endif /* __INT_REG_BITS */
+
+
+#ifndef __need_int_reg_t
+
+
+#ifndef __ASSEMBLER__
+/** Unsigned type that can hold a register. */
+typedef __uint_reg_t uint_reg_t;
+
+/** Signed type that can hold a register. */
+typedef __int_reg_t int_reg_t;
+#endif
+
+/** String prefix to use for printf(). */
+#define INT_REG_FMT __INT_REG_FMT
+
+/** Number of bits in a register. */
+#define INT_REG_BITS __INT_REG_BITS
+
+
+/* Registers 0 - 55 are "normal", but some perform special roles. */
+
+#define TREG_FP       52   /**< Frame pointer. */
+#define TREG_TP       53   /**< Thread pointer. */
+#define TREG_SP       54   /**< Stack pointer. */
+#define TREG_LR       55   /**< Link to calling function PC. */
+
+/** Index of last normal general-purpose register. */
+#define TREG_LAST_GPR 55
+
+/* Registers 56 - 62 are "special" network registers. */
+
+#define TREG_SN       56   /**< Static network access. */
+#define TREG_IDN0     57   /**< IDN demux 0 access. */
+#define TREG_IDN1     58   /**< IDN demux 1 access. */
+#define TREG_UDN0     59   /**< UDN demux 0 access. */
+#define TREG_UDN1     60   /**< UDN demux 1 access. */
+#define TREG_UDN2     61   /**< UDN demux 2 access. */
+#define TREG_UDN3     62   /**< UDN demux 3 access. */
+
+/* Register 63 is the "special" zero register. */
+
+#define TREG_ZERO     63   /**< "Zero" register; always reads as "0". */
+
+
+/** By convention, this register is used to hold the syscall number. */
+#define TREG_SYSCALL_NR      10
+
+/** Name of register that holds the syscall number, for use in assembly. */
+#define TREG_SYSCALL_NR_NAME r10
+
+
+/**
+ * The ABI requires callers to allocate a caller state save area of
+ * this many bytes at the bottom of each stack frame.
+ */
+#define C_ABI_SAVE_AREA_SIZE (2 * (INT_REG_BITS / 8))
+
+/**
+ * The operand to an 'info' opcode directing the backtracer to not
+ * try to find the calling frame.
+ */
+#define INFO_OP_CANNOT_BACKTRACE 2
+
+
+#endif /* !__need_int_reg_t */
+
+/* Make sure we later can get all the definitions and declarations.  */
+#undef __need_int_reg_t
+
+#endif /* !__ARCH_ABI_H__ */
diff --git a/arch/tile/include/arch/chip.h b/arch/tile/include/arch/chip.h
new file mode 100644
index 00000000..926d3db0
--- /dev/null
+++ b/arch/tile/include/arch/chip.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#if __tile_chip__ == 0
+#include <arch/chip_tile64.h>
+#elif __tile_chip__ == 1
+#include <arch/chip_tilepro.h>
+#elif defined(__tilegx__)
+#include <arch/chip_tilegx.h>
+#else
+#error Unexpected Tilera chip type
+#endif
diff --git a/arch/tile/include/arch/chip_tile64.h b/arch/tile/include/arch/chip_tile64.h
new file mode 100644
index 00000000..261aaba0
--- /dev/null
+++ b/arch/tile/include/arch/chip_tile64.h
@@ -0,0 +1,258 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+/*
+ * @file
+ * Global header file.
+ * This header file specifies defines for TILE64.
+ */
+
+#ifndef __ARCH_CHIP_H__
+#define __ARCH_CHIP_H__
+
+/** Specify chip version.
+ * When possible, prefer the CHIP_xxx symbols below for future-proofing.
+ * This is intended for cross-compiling; native compilation should
+ * use the predefined __tile_chip__ symbol.
+ */
+#define TILE_CHIP 0
+
+/** Specify chip revision.
+ * This provides for the case of a respin of a particular chip type;
+ * the normal value for this symbol is "0".
+ * This is intended for cross-compiling; native compilation should
+ * use the predefined __tile_chip_rev__ symbol.
+ */
+#define TILE_CHIP_REV 0
+
+/** The name of this architecture. */
+#define CHIP_ARCH_NAME "tile64"
+
+/** The ELF e_machine type for binaries for this chip. */
+#define CHIP_ELF_TYPE() EM_TILE64
+
+/** The alternate ELF e_machine type for binaries for this chip. */
+#define CHIP_COMPAT_ELF_TYPE() 0x2506
+
+/** What is the native word size of the machine? */
+#define CHIP_WORD_SIZE() 32
+
+/** How many bits of a virtual address are used. Extra bits must be
+ * the sign extension of the low bits.
+ */
+#define CHIP_VA_WIDTH() 32
+
+/** How many bits are in a physical address? */
+#define CHIP_PA_WIDTH() 36
+
+/** Size of the L2 cache, in bytes. */
+#define CHIP_L2_CACHE_SIZE() 65536
+
+/** Log size of an L2 cache line in bytes. */
+#define CHIP_L2_LOG_LINE_SIZE() 6
+
+/** Size of an L2 cache line, in bytes. */
+#define CHIP_L2_LINE_SIZE() (1 << CHIP_L2_LOG_LINE_SIZE())
+
+/** Associativity of the L2 cache. */
+#define CHIP_L2_ASSOC() 2
+
+/** Size of the L1 data cache, in bytes. */
+#define CHIP_L1D_CACHE_SIZE() 8192
+
+/** Log size of an L1 data cache line in bytes. */
+#define CHIP_L1D_LOG_LINE_SIZE() 4
+
+/** Size of an L1 data cache line, in bytes. */
+#define CHIP_L1D_LINE_SIZE() (1 << CHIP_L1D_LOG_LINE_SIZE())
+
+/** Associativity of the L1 data cache. */
+#define CHIP_L1D_ASSOC() 2
+
+/** Size of the L1 instruction cache, in bytes. */
+#define CHIP_L1I_CACHE_SIZE() 8192
+
+/** Log size of an L1 instruction cache line in bytes. */
+#define CHIP_L1I_LOG_LINE_SIZE() 6
+
+/** Size of an L1 instruction cache line, in bytes. */
+#define CHIP_L1I_LINE_SIZE() (1 << CHIP_L1I_LOG_LINE_SIZE())
+
+/** Associativity of the L1 instruction cache. */
+#define CHIP_L1I_ASSOC() 1
+
+/** Stride with which flush instructions must be issued. */
+#define CHIP_FLUSH_STRIDE() CHIP_L2_LINE_SIZE()
+
+/** Stride with which inv instructions must be issued. */
+#define CHIP_INV_STRIDE() CHIP_L1D_LINE_SIZE()
+
+/** Stride with which finv instructions must be issued. */
+#define CHIP_FINV_STRIDE() CHIP_L1D_LINE_SIZE()
+
+/** Can the local cache coherently cache data that is homed elsewhere? */
+#define CHIP_HAS_COHERENT_LOCAL_CACHE() 0
+
+/** How many simultaneous outstanding victims can the L2 cache have? */
+#define CHIP_MAX_OUTSTANDING_VICTIMS() 2
+
+/** Does the TLB support the NC and NOALLOC bits? */
+#define CHIP_HAS_NC_AND_NOALLOC_BITS() 0
+
+/** Does the chip support hash-for-home caching? */
+#define CHIP_HAS_CBOX_HOME_MAP() 0
+
+/** Number of entries in the chip's home map tables. */
+/* #define CHIP_CBOX_HOME_MAP_SIZE() -- does not apply to chip 0 */
+
+/** Do uncacheable requests miss in the cache regardless of whether
+ * there is matching data? */
+#define CHIP_HAS_ENFORCED_UNCACHEABLE_REQUESTS() 0
+
+/** Does the mf instruction wait for victims? */
+#define CHIP_HAS_MF_WAITS_FOR_VICTIMS() 1
+
+/** Does the chip have an "inv" instruction that doesn't also flush? */
+#define CHIP_HAS_INV() 0
+
+/** Does the chip have a "wh64" instruction? */
+#define CHIP_HAS_WH64() 0
+
+/** Does this chip have a 'dword_align' instruction? */
+#define CHIP_HAS_DWORD_ALIGN() 0
+
+/** Number of performance counters. */
+#define CHIP_PERFORMANCE_COUNTERS() 2
+
+/** Does this chip have auxiliary performance counters? */
+#define CHIP_HAS_AUX_PERF_COUNTERS() 0
+
+/** Is the CBOX_MSR1 SPR supported? */
+#define CHIP_HAS_CBOX_MSR1() 0
+
+/** Is the TILE_RTF_HWM SPR supported? */
+#define CHIP_HAS_TILE_RTF_HWM() 0
+
+/** Is the TILE_WRITE_PENDING SPR supported? */
+#define CHIP_HAS_TILE_WRITE_PENDING() 0
+
+/** Is the PROC_STATUS SPR supported? */
+#define CHIP_HAS_PROC_STATUS_SPR() 0
+
+/** Is the DSTREAM_PF SPR supported? */
+#define CHIP_HAS_DSTREAM_PF() 0
+
+/** Log of the number of mshims we have. */
+#define CHIP_LOG_NUM_MSHIMS() 2
+
+/** Are the bases of the interrupt vector areas fixed? */
+#define CHIP_HAS_FIXED_INTVEC_BASE() 1
+
+/** Are the interrupt masks split up into 2 SPRs? */
+#define CHIP_HAS_SPLIT_INTR_MASK() 1
+
+/** Is the cycle count split up into 2 SPRs? */
+#define CHIP_HAS_SPLIT_CYCLE() 1
+
+/** Does the chip have a static network? */
+#define CHIP_HAS_SN() 1
+
+/** Does the chip have a static network processor? */
+#define CHIP_HAS_SN_PROC() 1
+
+/** Size of the L1 static network processor instruction cache, in bytes. */
+#define CHIP_L1SNI_CACHE_SIZE() 2048
+
+/** Does the chip have DMA support in each tile? */
+#define CHIP_HAS_TILE_DMA() 1
+
+/** Does the chip have the second revision of the directly accessible
+ *  dynamic networks?  This encapsulates a number of characteristics,
+ *  including the absence of the catch-all, the absence of inline message
+ *  tags, the absence of support for network context-switching, and so on.
+ */
+#define CHIP_HAS_REV1_XDN() 0
+
+/** Does the chip have cmpexch and similar (fetchadd, exch, etc.)? */
+#define CHIP_HAS_CMPEXCH() 0
+
+/** Does the chip have memory-mapped I/O support? */
+#define CHIP_HAS_MMIO() 0
+
+/** Does the chip have post-completion interrupts? */
+#define CHIP_HAS_POST_COMPLETION_INTERRUPTS() 0
+
+/** Does the chip have native single step support? */
+#define CHIP_HAS_SINGLE_STEP() 0
+
+#ifndef __OPEN_SOURCE__  /* features only relevant to hypervisor-level code */
+
+/** How many entries are present in the instruction TLB? */
+#define CHIP_ITLB_ENTRIES() 8
+
+/** How many entries are present in the data TLB? */
+#define CHIP_DTLB_ENTRIES() 16
+
+/** How many MAF entries does the XAUI shim have? */
+#define CHIP_XAUI_MAF_ENTRIES() 16
+
+/** Does the memory shim have a source-id table? */
+#define CHIP_HAS_MSHIM_SRCID_TABLE() 1
+
+/** Does the L1 instruction cache clear on reset? */
+#define CHIP_HAS_L1I_CLEAR_ON_RESET() 0
+
+/** Does the chip come out of reset with valid coordinates on all tiles?
+ * Note that if defined, this also implies that the upper left is 1,1.
+ */
+#define CHIP_HAS_VALID_TILE_COORD_RESET() 0
+
+/** Does the chip have unified packet formats? */
+#define CHIP_HAS_UNIFIED_PACKET_FORMATS() 0
+
+/** Does the chip support write reordering? */
+#define CHIP_HAS_WRITE_REORDERING() 0
+
+/** Does the chip support Y-X routing as well as X-Y? */
+#define CHIP_HAS_Y_X_ROUTING() 0
+
+/** Is INTCTRL_3 managed with the correct MPL? */
+#define CHIP_HAS_INTCTRL_3_STATUS_FIX() 0
+
+/** Is it possible to configure the chip to be big-endian? */
+#define CHIP_HAS_BIG_ENDIAN_CONFIG() 0
+
+/** Is the CACHE_RED_WAY_OVERRIDDEN SPR supported? */
+#define CHIP_HAS_CACHE_RED_WAY_OVERRIDDEN() 0
+
+/** Is the DIAG_TRACE_WAY SPR supported? */
+#define CHIP_HAS_DIAG_TRACE_WAY() 0
+
+/** Is the MEM_STRIPE_CONFIG SPR supported? */
+#define CHIP_HAS_MEM_STRIPE_CONFIG() 0
+
+/** Are the TLB_PERF SPRs supported? */
+#define CHIP_HAS_TLB_PERF() 0
+
+/** Is the VDN_SNOOP_SHIM_CTL SPR supported? */
+#define CHIP_HAS_VDN_SNOOP_SHIM_CTL() 0
+
+/** Does the chip support rev1 DMA packets? */
+#define CHIP_HAS_REV1_DMA_PACKETS() 0
+
+/** Does the chip have an IPI shim? */
+#define CHIP_HAS_IPI() 0
+
+#endif /* !__OPEN_SOURCE__ */
+#endif /* __ARCH_CHIP_H__ */
diff --git a/arch/tile/include/arch/chip_tilegx.h b/arch/tile/include/arch/chip_tilegx.h
new file mode 100644
index 00000000..ea8e4f2c
--- /dev/null
+++ b/arch/tile/include/arch/chip_tilegx.h
@@ -0,0 +1,258 @@
+/*
+ * Copyright 2011 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+/*
+ * @file
+ * Global header file.
+ * This header file specifies defines for TILE-Gx.
+ */
+
+#ifndef __ARCH_CHIP_H__
+#define __ARCH_CHIP_H__
+
+/** Specify chip version.
+ * When possible, prefer the CHIP_xxx symbols below for future-proofing.
+ * This is intended for cross-compiling; native compilation should
+ * use the predefined __tile_chip__ symbol.
+ */
+#define TILE_CHIP 10
+
+/** Specify chip revision.
+ * This provides for the case of a respin of a particular chip type;
+ * the normal value for this symbol is "0".
+ * This is intended for cross-compiling; native compilation should
+ * use the predefined __tile_chip_rev__ symbol.
+ */
+#define TILE_CHIP_REV 0
+
+/** The name of this architecture. */
+#define CHIP_ARCH_NAME "tilegx"
+
+/** The ELF e_machine type for binaries for this chip. */
+#define CHIP_ELF_TYPE() EM_TILEGX
+
+/** The alternate ELF e_machine type for binaries for this chip. */
+#define CHIP_COMPAT_ELF_TYPE() 0x2597
+
+/** What is the native word size of the machine? */
+#define CHIP_WORD_SIZE() 64
+
+/** How many bits of a virtual address are used. Extra bits must be
+ * the sign extension of the low bits.
+ */
+#define CHIP_VA_WIDTH() 42
+
+/** How many bits are in a physical address? */
+#define CHIP_PA_WIDTH() 40
+
+/** Size of the L2 cache, in bytes. */
+#define CHIP_L2_CACHE_SIZE() 262144
+
+/** Log size of an L2 cache line in bytes. */
+#define CHIP_L2_LOG_LINE_SIZE() 6
+
+/** Size of an L2 cache line, in bytes. */
+#define CHIP_L2_LINE_SIZE() (1 << CHIP_L2_LOG_LINE_SIZE())
+
+/** Associativity of the L2 cache. */
+#define CHIP_L2_ASSOC() 8
+
+/** Size of the L1 data cache, in bytes. */
+#define CHIP_L1D_CACHE_SIZE() 32768
+
+/** Log size of an L1 data cache line in bytes. */
+#define CHIP_L1D_LOG_LINE_SIZE() 6
+
+/** Size of an L1 data cache line, in bytes. */
+#define CHIP_L1D_LINE_SIZE() (1 << CHIP_L1D_LOG_LINE_SIZE())
+
+/** Associativity of the L1 data cache. */
+#define CHIP_L1D_ASSOC() 2
+
+/** Size of the L1 instruction cache, in bytes. */
+#define CHIP_L1I_CACHE_SIZE() 32768
+
+/** Log size of an L1 instruction cache line in bytes. */
+#define CHIP_L1I_LOG_LINE_SIZE() 6
+
+/** Size of an L1 instruction cache line, in bytes. */
+#define CHIP_L1I_LINE_SIZE() (1 << CHIP_L1I_LOG_LINE_SIZE())
+
+/** Associativity of the L1 instruction cache. */
+#define CHIP_L1I_ASSOC() 2
+
+/** Stride with which flush instructions must be issued. */
+#define CHIP_FLUSH_STRIDE() CHIP_L2_LINE_SIZE()
+
+/** Stride with which inv instructions must be issued. */
+#define CHIP_INV_STRIDE() CHIP_L2_LINE_SIZE()
+
+/** Stride with which finv instructions must be issued. */
+#define CHIP_FINV_STRIDE() CHIP_L2_LINE_SIZE()
+
+/** Can the local cache coherently cache data that is homed elsewhere? */
+#define CHIP_HAS_COHERENT_LOCAL_CACHE() 1
+
+/** How many simultaneous outstanding victims can the L2 cache have? */
+#define CHIP_MAX_OUTSTANDING_VICTIMS() 128
+
+/** Does the TLB support the NC and NOALLOC bits? */
+#define CHIP_HAS_NC_AND_NOALLOC_BITS() 1
+
+/** Does the chip support hash-for-home caching? */
+#define CHIP_HAS_CBOX_HOME_MAP() 1
+
+/** Number of entries in the chip's home map tables. */
+#define CHIP_CBOX_HOME_MAP_SIZE() 128
+
+/** Do uncacheable requests miss in the cache regardless of whether
+ * there is matching data? */
+#define CHIP_HAS_ENFORCED_UNCACHEABLE_REQUESTS() 1
+
+/** Does the mf instruction wait for victims? */
+#define CHIP_HAS_MF_WAITS_FOR_VICTIMS() 0
+
+/** Does the chip have an "inv" instruction that doesn't also flush? */
+#define CHIP_HAS_INV() 1
+
+/** Does the chip have a "wh64" instruction? */
+#define CHIP_HAS_WH64() 1
+
+/** Does this chip have a 'dword_align' instruction? */
+#define CHIP_HAS_DWORD_ALIGN() 0
+
+/** Number of performance counters. */
+#define CHIP_PERFORMANCE_COUNTERS() 4
+
+/** Does this chip have auxiliary performance counters? */
+#define CHIP_HAS_AUX_PERF_COUNTERS() 1
+
+/** Is the CBOX_MSR1 SPR supported? */
+#define CHIP_HAS_CBOX_MSR1() 0
+
+/** Is the TILE_RTF_HWM SPR supported? */
+#define CHIP_HAS_TILE_RTF_HWM() 1
+
+/** Is the TILE_WRITE_PENDING SPR supported? */
+#define CHIP_HAS_TILE_WRITE_PENDING() 0
+
+/** Is the PROC_STATUS SPR supported? */
+#define CHIP_HAS_PROC_STATUS_SPR() 1
+
+/** Is the DSTREAM_PF SPR supported? */
+#define CHIP_HAS_DSTREAM_PF() 1
+
+/** Log of the number of mshims we have. */
+#define CHIP_LOG_NUM_MSHIMS() 2
+
+/** Are the bases of the interrupt vector areas fixed? */
+#define CHIP_HAS_FIXED_INTVEC_BASE() 0
+
+/** Are the interrupt masks split up into 2 SPRs? */
+#define CHIP_HAS_SPLIT_INTR_MASK() 0
+
+/** Is the cycle count split up into 2 SPRs? */
+#define CHIP_HAS_SPLIT_CYCLE() 0
+
+/** Does the chip have a static network? */
+#define CHIP_HAS_SN() 0
+
+/** Does the chip have a static network processor? */
+#define CHIP_HAS_SN_PROC() 0
+
+/** Size of the L1 static network processor instruction cache, in bytes. */
+/* #define CHIP_L1SNI_CACHE_SIZE() -- does not apply to chip 10 */
+
+/** Does the chip have DMA support in each tile? */
+#define CHIP_HAS_TILE_DMA() 0
+
+/** Does the chip have the second revision of the directly accessible
+ *  dynamic networks?  This encapsulates a number of characteristics,
+ *  including the absence of the catch-all, the absence of inline message
+ *  tags, the absence of support for network context-switching, and so on.
+ */
+#define CHIP_HAS_REV1_XDN() 1
+
+/** Does the chip have cmpexch and similar (fetchadd, exch, etc.)? */
+#define CHIP_HAS_CMPEXCH() 1
+
+/** Does the chip have memory-mapped I/O support? */
+#define CHIP_HAS_MMIO() 1
+
+/** Does the chip have post-completion interrupts? */
+#define CHIP_HAS_POST_COMPLETION_INTERRUPTS() 1
+
+/** Does the chip have native single step support? */
+#define CHIP_HAS_SINGLE_STEP() 1
+
+#ifndef __OPEN_SOURCE__  /* features only relevant to hypervisor-level code */
+
+/** How many entries are present in the instruction TLB? */
+#define CHIP_ITLB_ENTRIES() 16
+
+/** How many entries are present in the data TLB? */
+#define CHIP_DTLB_ENTRIES() 32
+
+/** How many MAF entries does the XAUI shim have? */
+#define CHIP_XAUI_MAF_ENTRIES() 32
+
+/** Does the memory shim have a source-id table? */
+#define CHIP_HAS_MSHIM_SRCID_TABLE() 0
+
+/** Does the L1 instruction cache clear on reset? */
+#define CHIP_HAS_L1I_CLEAR_ON_RESET() 1
+
+/** Does the chip come out of reset with valid coordinates on all tiles?
+ * Note that if defined, this also implies that the upper left is 1,1.
+ */
+#define CHIP_HAS_VALID_TILE_COORD_RESET() 1
+
+/** Does the chip have unified packet formats? */
+#define CHIP_HAS_UNIFIED_PACKET_FORMATS() 1
+
+/** Does the chip support write reordering? */
+#define CHIP_HAS_WRITE_REORDERING() 1
+
+/** Does the chip support Y-X routing as well as X-Y? */
+#define CHIP_HAS_Y_X_ROUTING() 1
+
+/** Is INTCTRL_3 managed with the correct MPL? */
+#define CHIP_HAS_INTCTRL_3_STATUS_FIX() 1
+
+/** Is it possible to configure the chip to be big-endian? */
+#define CHIP_HAS_BIG_ENDIAN_CONFIG() 1
+
+/** Is the CACHE_RED_WAY_OVERRIDDEN SPR supported? */
+#define CHIP_HAS_CACHE_RED_WAY_OVERRIDDEN() 0
+
+/** Is the DIAG_TRACE_WAY SPR supported? */
+#define CHIP_HAS_DIAG_TRACE_WAY() 0
+
+/** Is the MEM_STRIPE_CONFIG SPR supported? */
+#define CHIP_HAS_MEM_STRIPE_CONFIG() 1
+
+/** Are the TLB_PERF SPRs supported? */
+#define CHIP_HAS_TLB_PERF() 1
+
+/** Is the VDN_SNOOP_SHIM_CTL SPR supported? */
+#define CHIP_HAS_VDN_SNOOP_SHIM_CTL() 0
+
+/** Does the chip support rev1 DMA packets? */
+#define CHIP_HAS_REV1_DMA_PACKETS() 1
+
+/** Does the chip have an IPI shim? */
+#define CHIP_HAS_IPI() 1
+
+#endif /* !__OPEN_SOURCE__ */
+#endif /* __ARCH_CHIP_H__ */
diff --git a/arch/tile/include/arch/chip_tilepro.h b/arch/tile/include/arch/chip_tilepro.h
new file mode 100644
index 00000000..70017699
--- /dev/null
+++ b/arch/tile/include/arch/chip_tilepro.h
@@ -0,0 +1,258 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+/*
+ * @file
+ * Global header file.
+ * This header file specifies defines for TILEPro.
+ */
+
+#ifndef __ARCH_CHIP_H__
+#define __ARCH_CHIP_H__
+
+/** Specify chip version.
+ * When possible, prefer the CHIP_xxx symbols below for future-proofing.
+ * This is intended for cross-compiling; native compilation should
+ * use the predefined __tile_chip__ symbol.
+ */
+#define TILE_CHIP 1
+
+/** Specify chip revision.
+ * This provides for the case of a respin of a particular chip type;
+ * the normal value for this symbol is "0".
+ * This is intended for cross-compiling; native compilation should
+ * use the predefined __tile_chip_rev__ symbol.
+ */
+#define TILE_CHIP_REV 0
+
+/** The name of this architecture. */
+#define CHIP_ARCH_NAME "tilepro"
+
+/** The ELF e_machine type for binaries for this chip. */
+#define CHIP_ELF_TYPE() EM_TILEPRO
+
+/** The alternate ELF e_machine type for binaries for this chip. */
+#define CHIP_COMPAT_ELF_TYPE() 0x2507
+
+/** What is the native word size of the machine? */
+#define CHIP_WORD_SIZE() 32
+
+/** How many bits of a virtual address are used. Extra bits must be
+ * the sign extension of the low bits.
+ */
+#define CHIP_VA_WIDTH() 32
+
+/** How many bits are in a physical address? */
+#define CHIP_PA_WIDTH() 36
+
+/** Size of the L2 cache, in bytes. */
+#define CHIP_L2_CACHE_SIZE() 65536
+
+/** Log size of an L2 cache line in bytes. */
+#define CHIP_L2_LOG_LINE_SIZE() 6
+
+/** Size of an L2 cache line, in bytes. */
+#define CHIP_L2_LINE_SIZE() (1 << CHIP_L2_LOG_LINE_SIZE())
+
+/** Associativity of the L2 cache. */
+#define CHIP_L2_ASSOC() 4
+
+/** Size of the L1 data cache, in bytes. */
+#define CHIP_L1D_CACHE_SIZE() 8192
+
+/** Log size of an L1 data cache line in bytes. */
+#define CHIP_L1D_LOG_LINE_SIZE() 4
+
+/** Size of an L1 data cache line, in bytes. */
+#define CHIP_L1D_LINE_SIZE() (1 << CHIP_L1D_LOG_LINE_SIZE())
+
+/** Associativity of the L1 data cache. */
+#define CHIP_L1D_ASSOC() 2
+
+/** Size of the L1 instruction cache, in bytes. */
+#define CHIP_L1I_CACHE_SIZE() 16384
+
+/** Log size of an L1 instruction cache line in bytes. */
+#define CHIP_L1I_LOG_LINE_SIZE() 6
+
+/** Size of an L1 instruction cache line, in bytes. */
+#define CHIP_L1I_LINE_SIZE() (1 << CHIP_L1I_LOG_LINE_SIZE())
+
+/** Associativity of the L1 instruction cache. */
+#define CHIP_L1I_ASSOC() 1
+
+/** Stride with which flush instructions must be issued. */
+#define CHIP_FLUSH_STRIDE() CHIP_L2_LINE_SIZE()
+
+/** Stride with which inv instructions must be issued. */
+#define CHIP_INV_STRIDE() CHIP_L2_LINE_SIZE()
+
+/** Stride with which finv instructions must be issued. */
+#define CHIP_FINV_STRIDE() CHIP_L2_LINE_SIZE()
+
+/** Can the local cache coherently cache data that is homed elsewhere? */
+#define CHIP_HAS_COHERENT_LOCAL_CACHE() 1
+
+/** How many simultaneous outstanding victims can the L2 cache have? */
+#define CHIP_MAX_OUTSTANDING_VICTIMS() 4
+
+/** Does the TLB support the NC and NOALLOC bits? */
+#define CHIP_HAS_NC_AND_NOALLOC_BITS() 1
+
+/** Does the chip support hash-for-home caching? */
+#define CHIP_HAS_CBOX_HOME_MAP() 1
+
+/** Number of entries in the chip's home map tables. */
+#define CHIP_CBOX_HOME_MAP_SIZE() 64
+
+/** Do uncacheable requests miss in the cache regardless of whether
+ * there is matching data? */
+#define CHIP_HAS_ENFORCED_UNCACHEABLE_REQUESTS() 1
+
+/** Does the mf instruction wait for victims? */
+#define CHIP_HAS_MF_WAITS_FOR_VICTIMS() 0
+
+/** Does the chip have an "inv" instruction that doesn't also flush? */
+#define CHIP_HAS_INV() 1
+
+/** Does the chip have a "wh64" instruction? */
+#define CHIP_HAS_WH64() 1
+
+/** Does this chip have a 'dword_align' instruction? */
+#define CHIP_HAS_DWORD_ALIGN() 1
+
+/** Number of performance counters. */
+#define CHIP_PERFORMANCE_COUNTERS() 4
+
+/** Does this chip have auxiliary performance counters? */
+#define CHIP_HAS_AUX_PERF_COUNTERS() 1
+
+/** Is the CBOX_MSR1 SPR supported? */
+#define CHIP_HAS_CBOX_MSR1() 1
+
+/** Is the TILE_RTF_HWM SPR supported? */
+#define CHIP_HAS_TILE_RTF_HWM() 1
+
+/** Is the TILE_WRITE_PENDING SPR supported? */
+#define CHIP_HAS_TILE_WRITE_PENDING() 1
+
+/** Is the PROC_STATUS SPR supported? */
+#define CHIP_HAS_PROC_STATUS_SPR() 1
+
+/** Is the DSTREAM_PF SPR supported? */
+#define CHIP_HAS_DSTREAM_PF() 0
+
+/** Log of the number of mshims we have. */
+#define CHIP_LOG_NUM_MSHIMS() 2
+
+/** Are the bases of the interrupt vector areas fixed? */
+#define CHIP_HAS_FIXED_INTVEC_BASE() 1
+
+/** Are the interrupt masks split up into 2 SPRs? */
+#define CHIP_HAS_SPLIT_INTR_MASK() 1
+
+/** Is the cycle count split up into 2 SPRs? */
+#define CHIP_HAS_SPLIT_CYCLE() 1
+
+/** Does the chip have a static network? */
+#define CHIP_HAS_SN() 1
+
+/** Does the chip have a static network processor? */
+#define CHIP_HAS_SN_PROC() 0
+
+/** Size of the L1 static network processor instruction cache, in bytes. */
+/* #define CHIP_L1SNI_CACHE_SIZE() -- does not apply to chip 1 */
+
+/** Does the chip have DMA support in each tile? */
+#define CHIP_HAS_TILE_DMA() 1
+
+/** Does the chip have the second revision of the directly accessible
+ *  dynamic networks?  This encapsulates a number of characteristics,
+ *  including the absence of the catch-all, the absence of inline message
+ *  tags, the absence of support for network context-switching, and so on.
+ */
+#define CHIP_HAS_REV1_XDN() 0
+
+/** Does the chip have cmpexch and similar (fetchadd, exch, etc.)? */
+#define CHIP_HAS_CMPEXCH() 0
+
+/** Does the chip have memory-mapped I/O support? */
+#define CHIP_HAS_MMIO() 0
+
+/** Does the chip have post-completion interrupts? */
+#define CHIP_HAS_POST_COMPLETION_INTERRUPTS() 0
+
+/** Does the chip have native single step support? */
+#define CHIP_HAS_SINGLE_STEP() 0
+
+#ifndef __OPEN_SOURCE__  /* features only relevant to hypervisor-level code */
+
+/** How many entries are present in the instruction TLB? */
+#define CHIP_ITLB_ENTRIES() 16
+
+/** How many entries are present in the data TLB? */
+#define CHIP_DTLB_ENTRIES() 16
+
+/** How many MAF entries does the XAUI shim have? */
+#define CHIP_XAUI_MAF_ENTRIES() 32
+
+/** Does the memory shim have a source-id table? */
+#define CHIP_HAS_MSHIM_SRCID_TABLE() 0
+
+/** Does the L1 instruction cache clear on reset? */
+#define CHIP_HAS_L1I_CLEAR_ON_RESET() 1
+
+/** Does the chip come out of reset with valid coordinates on all tiles?
+ * Note that if defined, this also implies that the upper left is 1,1.
+ */
+#define CHIP_HAS_VALID_TILE_COORD_RESET() 1
+
+/** Does the chip have unified packet formats? */
+#define CHIP_HAS_UNIFIED_PACKET_FORMATS() 1
+
+/** Does the chip support write reordering? */
+#define CHIP_HAS_WRITE_REORDERING() 1
+
+/** Does the chip support Y-X routing as well as X-Y? */
+#define CHIP_HAS_Y_X_ROUTING() 1
+
+/** Is INTCTRL_3 managed with the correct MPL? */
+#define CHIP_HAS_INTCTRL_3_STATUS_FIX() 1
+
+/** Is it possible to configure the chip to be big-endian? */
+#define CHIP_HAS_BIG_ENDIAN_CONFIG() 1
+
+/** Is the CACHE_RED_WAY_OVERRIDDEN SPR supported? */
+#define CHIP_HAS_CACHE_RED_WAY_OVERRIDDEN() 1
+
+/** Is the DIAG_TRACE_WAY SPR supported? */
+#define CHIP_HAS_DIAG_TRACE_WAY() 1
+
+/** Is the MEM_STRIPE_CONFIG SPR supported? */
+#define CHIP_HAS_MEM_STRIPE_CONFIG() 1
+
+/** Are the TLB_PERF SPRs supported? */
+#define CHIP_HAS_TLB_PERF() 1
+
+/** Is the VDN_SNOOP_SHIM_CTL SPR supported? */
+#define CHIP_HAS_VDN_SNOOP_SHIM_CTL() 1
+
+/** Does the chip support rev1 DMA packets? */
+#define CHIP_HAS_REV1_DMA_PACKETS() 1
+
+/** Does the chip have an IPI shim? */
+#define CHIP_HAS_IPI() 0
+
+#endif /* !__OPEN_SOURCE__ */
+#endif /* __ARCH_CHIP_H__ */
diff --git a/arch/tile/include/arch/icache.h b/arch/tile/include/arch/icache.h
new file mode 100644
index 00000000..762eafa8
--- /dev/null
+++ b/arch/tile/include/arch/icache.h
@@ -0,0 +1,93 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ *
+ */
+
+/**
+ * @file
+ *
+ * Support for invalidating bytes in the instruction cache.
+ */
+
+#ifndef __ARCH_ICACHE_H__
+#define __ARCH_ICACHE_H__
+
+#include <arch/chip.h>
+
+
+/**
+ * Invalidate the instruction cache for the given range of memory.
+ *
+ * @param addr The start of memory to be invalidated.
+ * @param size The number of bytes to be invalidated.
+ * @param page_size The system's page size, e.g. getpagesize() in userspace.
+ * This value must be a power of two no larger than the page containing
+ * the code to be invalidated. If the value is smaller than the actual page
+ * size, this function will still work, but may run slower than necessary.
+ */
+static __inline void
+invalidate_icache(const void* addr, unsigned long size,
+                  unsigned long page_size)
+{
+  const unsigned long cache_way_size =
+    CHIP_L1I_CACHE_SIZE() / CHIP_L1I_ASSOC();
+  unsigned long max_useful_size;
+  const char* start, *end;
+  long num_passes;
+
+  if (__builtin_expect(size == 0, 0))
+    return;
+
+#ifdef __tilegx__
+  /* Limit the number of bytes visited to avoid redundant iterations. */
+  max_useful_size = (page_size < cache_way_size) ? page_size : cache_way_size;
+
+  /* No PA aliasing is possible, so one pass always suffices. */
+  num_passes = 1;
+#else
+  /* Limit the number of bytes visited to avoid redundant iterations. */
+  max_useful_size = cache_way_size;
+
+  /*
+   * Compute how many passes we need (we'll treat 0 as if it were 1).
+   * This works because we know the page size is a power of two.
+   */
+  num_passes = cache_way_size >> __builtin_ctzl(page_size);
+#endif
+
+  if (__builtin_expect(size > max_useful_size, 0))
+    size = max_useful_size;
+
+  /* Locate the first and last bytes to be invalidated. */
+  start = (const char *)((unsigned long)addr & -CHIP_L1I_LINE_SIZE());
+  end = (const char*)addr + size - 1;
+
+  __insn_mf();
+
+  do
+  {
+    const char* p;
+
+    for (p = start; p <= end; p += CHIP_L1I_LINE_SIZE())
+      __insn_icoh(p);
+
+    start += page_size;
+    end += page_size;
+  }
+  while (--num_passes > 0);
+
+  __insn_drain();
+}
+
+
+#endif /* __ARCH_ICACHE_H__ */
diff --git a/arch/tile/include/arch/interrupts.h b/arch/tile/include/arch/interrupts.h
new file mode 100644
index 00000000..20f8f07d
--- /dev/null
+++ b/arch/tile/include/arch/interrupts.h
@@ -0,0 +1,19 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifdef __tilegx__
+#include <arch/interrupts_64.h>
+#else
+#include <arch/interrupts_32.h>
+#endif
diff --git a/arch/tile/include/arch/interrupts_32.h b/arch/tile/include/arch/interrupts_32.h
new file mode 100644
index 00000000..96b57105
--- /dev/null
+++ b/arch/tile/include/arch/interrupts_32.h
@@ -0,0 +1,307 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef __ARCH_INTERRUPTS_H__
+#define __ARCH_INTERRUPTS_H__
+
+/** Mask for an interrupt. */
+/* Note: must handle breaking interrupts into high and low words manually. */
+#define INT_MASK_LO(intno) (1 << (intno))
+#define INT_MASK_HI(intno) (1 << ((intno) - 32))
+
+#ifndef __ASSEMBLER__
+#define INT_MASK(intno) (1ULL << (intno))
+#endif
+
+
+/** Where a given interrupt executes */
+#define INTERRUPT_VECTOR(i, pl) (0xFC000000 + ((pl) << 24) + ((i) << 8))
+
+/** Where to store a vector for a given interrupt. */
+#define USER_INTERRUPT_VECTOR(i) INTERRUPT_VECTOR(i, 0)
+
+/** The base address of user-level interrupts. */
+#define USER_INTERRUPT_VECTOR_BASE INTERRUPT_VECTOR(0, 0)
+
+
+/** Additional synthetic interrupt. */
+#define INT_BREAKPOINT (63)
+
+#define INT_ITLB_MISS    0
+#define INT_MEM_ERROR    1
+#define INT_ILL    2
+#define INT_GPV    3
+#define INT_SN_ACCESS    4
+#define INT_IDN_ACCESS    5
+#define INT_UDN_ACCESS    6
+#define INT_IDN_REFILL    7
+#define INT_UDN_REFILL    8
+#define INT_IDN_COMPLETE    9
+#define INT_UDN_COMPLETE   10
+#define INT_SWINT_3   11
+#define INT_SWINT_2   12
+#define INT_SWINT_1   13
+#define INT_SWINT_0   14
+#define INT_UNALIGN_DATA   15
+#define INT_DTLB_MISS   16
+#define INT_DTLB_ACCESS   17
+#define INT_DMATLB_MISS   18
+#define INT_DMATLB_ACCESS   19
+#define INT_SNITLB_MISS   20
+#define INT_SN_NOTIFY   21
+#define INT_SN_FIREWALL   22
+#define INT_IDN_FIREWALL   23
+#define INT_UDN_FIREWALL   24
+#define INT_TILE_TIMER   25
+#define INT_IDN_TIMER   26
+#define INT_UDN_TIMER   27
+#define INT_DMA_NOTIFY   28
+#define INT_IDN_CA   29
+#define INT_UDN_CA   30
+#define INT_IDN_AVAIL   31
+#define INT_UDN_AVAIL   32
+#define INT_PERF_COUNT   33
+#define INT_INTCTRL_3   34
+#define INT_INTCTRL_2   35
+#define INT_INTCTRL_1   36
+#define INT_INTCTRL_0   37
+#define INT_BOOT_ACCESS   38
+#define INT_WORLD_ACCESS   39
+#define INT_I_ASID   40
+#define INT_D_ASID   41
+#define INT_DMA_ASID   42
+#define INT_SNI_ASID   43
+#define INT_DMA_CPL   44
+#define INT_SN_CPL   45
+#define INT_DOUBLE_FAULT   46
+#define INT_SN_STATIC_ACCESS   47
+#define INT_AUX_PERF_COUNT   48
+
+#define NUM_INTERRUPTS 49
+
+#ifndef __ASSEMBLER__
+#define QUEUED_INTERRUPTS ( \
+    INT_MASK(INT_MEM_ERROR) | \
+    INT_MASK(INT_DMATLB_MISS) | \
+    INT_MASK(INT_DMATLB_ACCESS) | \
+    INT_MASK(INT_SNITLB_MISS) | \
+    INT_MASK(INT_SN_NOTIFY) | \
+    INT_MASK(INT_SN_FIREWALL) | \
+    INT_MASK(INT_IDN_FIREWALL) | \
+    INT_MASK(INT_UDN_FIREWALL) | \
+    INT_MASK(INT_TILE_TIMER) | \
+    INT_MASK(INT_IDN_TIMER) | \
+    INT_MASK(INT_UDN_TIMER) | \
+    INT_MASK(INT_DMA_NOTIFY) | \
+    INT_MASK(INT_IDN_CA) | \
+    INT_MASK(INT_UDN_CA) | \
+    INT_MASK(INT_IDN_AVAIL) | \
+    INT_MASK(INT_UDN_AVAIL) | \
+    INT_MASK(INT_PERF_COUNT) | \
+    INT_MASK(INT_INTCTRL_3) | \
+    INT_MASK(INT_INTCTRL_2) | \
+    INT_MASK(INT_INTCTRL_1) | \
+    INT_MASK(INT_INTCTRL_0) | \
+    INT_MASK(INT_BOOT_ACCESS) | \
+    INT_MASK(INT_WORLD_ACCESS) | \
+    INT_MASK(INT_I_ASID) | \
+    INT_MASK(INT_D_ASID) | \
+    INT_MASK(INT_DMA_ASID) | \
+    INT_MASK(INT_SNI_ASID) | \
+    INT_MASK(INT_DMA_CPL) | \
+    INT_MASK(INT_SN_CPL) | \
+    INT_MASK(INT_DOUBLE_FAULT) | \
+    INT_MASK(INT_AUX_PERF_COUNT) | \
+    0)
+#define NONQUEUED_INTERRUPTS ( \
+    INT_MASK(INT_ITLB_MISS) | \
+    INT_MASK(INT_ILL) | \
+    INT_MASK(INT_GPV) | \
+    INT_MASK(INT_SN_ACCESS) | \
+    INT_MASK(INT_IDN_ACCESS) | \
+    INT_MASK(INT_UDN_ACCESS) | \
+    INT_MASK(INT_IDN_REFILL) | \
+    INT_MASK(INT_UDN_REFILL) | \
+    INT_MASK(INT_IDN_COMPLETE) | \
+    INT_MASK(INT_UDN_COMPLETE) | \
+    INT_MASK(INT_SWINT_3) | \
+    INT_MASK(INT_SWINT_2) | \
+    INT_MASK(INT_SWINT_1) | \
+    INT_MASK(INT_SWINT_0) | \
+    INT_MASK(INT_UNALIGN_DATA) | \
+    INT_MASK(INT_DTLB_MISS) | \
+    INT_MASK(INT_DTLB_ACCESS) | \
+    INT_MASK(INT_SN_STATIC_ACCESS) | \
+    0)
+#define CRITICAL_MASKED_INTERRUPTS ( \
+    INT_MASK(INT_MEM_ERROR) | \
+    INT_MASK(INT_DMATLB_MISS) | \
+    INT_MASK(INT_DMATLB_ACCESS) | \
+    INT_MASK(INT_SNITLB_MISS) | \
+    INT_MASK(INT_SN_NOTIFY) | \
+    INT_MASK(INT_SN_FIREWALL) | \
+    INT_MASK(INT_IDN_FIREWALL) | \
+    INT_MASK(INT_UDN_FIREWALL) | \
+    INT_MASK(INT_TILE_TIMER) | \
+    INT_MASK(INT_IDN_TIMER) | \
+    INT_MASK(INT_UDN_TIMER) | \
+    INT_MASK(INT_DMA_NOTIFY) | \
+    INT_MASK(INT_IDN_CA) | \
+    INT_MASK(INT_UDN_CA) | \
+    INT_MASK(INT_IDN_AVAIL) | \
+    INT_MASK(INT_UDN_AVAIL) | \
+    INT_MASK(INT_PERF_COUNT) | \
+    INT_MASK(INT_INTCTRL_3) | \
+    INT_MASK(INT_INTCTRL_2) | \
+    INT_MASK(INT_INTCTRL_1) | \
+    INT_MASK(INT_INTCTRL_0) | \
+    INT_MASK(INT_AUX_PERF_COUNT) | \
+    0)
+#define CRITICAL_UNMASKED_INTERRUPTS ( \
+    INT_MASK(INT_ITLB_MISS) | \
+    INT_MASK(INT_ILL) | \
+    INT_MASK(INT_GPV) | \
+    INT_MASK(INT_SN_ACCESS) | \
+    INT_MASK(INT_IDN_ACCESS) | \
+    INT_MASK(INT_UDN_ACCESS) | \
+    INT_MASK(INT_IDN_REFILL) | \
+    INT_MASK(INT_UDN_REFILL) | \
+    INT_MASK(INT_IDN_COMPLETE) | \
+    INT_MASK(INT_UDN_COMPLETE) | \
+    INT_MASK(INT_SWINT_3) | \
+    INT_MASK(INT_SWINT_2) | \
+    INT_MASK(INT_SWINT_1) | \
+    INT_MASK(INT_SWINT_0) | \
+    INT_MASK(INT_UNALIGN_DATA) | \
+    INT_MASK(INT_DTLB_MISS) | \
+    INT_MASK(INT_DTLB_ACCESS) | \
+    INT_MASK(INT_BOOT_ACCESS) | \
+    INT_MASK(INT_WORLD_ACCESS) | \
+    INT_MASK(INT_I_ASID) | \
+    INT_MASK(INT_D_ASID) | \
+    INT_MASK(INT_DMA_ASID) | \
+    INT_MASK(INT_SNI_ASID) | \
+    INT_MASK(INT_DMA_CPL) | \
+    INT_MASK(INT_SN_CPL) | \
+    INT_MASK(INT_DOUBLE_FAULT) | \
+    INT_MASK(INT_SN_STATIC_ACCESS) | \
+    0)
+#define MASKABLE_INTERRUPTS ( \
+    INT_MASK(INT_MEM_ERROR) | \
+    INT_MASK(INT_IDN_REFILL) | \
+    INT_MASK(INT_UDN_REFILL) | \
+    INT_MASK(INT_IDN_COMPLETE) | \
+    INT_MASK(INT_UDN_COMPLETE) | \
+    INT_MASK(INT_DMATLB_MISS) | \
+    INT_MASK(INT_DMATLB_ACCESS) | \
+    INT_MASK(INT_SNITLB_MISS) | \
+    INT_MASK(INT_SN_NOTIFY) | \
+    INT_MASK(INT_SN_FIREWALL) | \
+    INT_MASK(INT_IDN_FIREWALL) | \
+    INT_MASK(INT_UDN_FIREWALL) | \
+    INT_MASK(INT_TILE_TIMER) | \
+    INT_MASK(INT_IDN_TIMER) | \
+    INT_MASK(INT_UDN_TIMER) | \
+    INT_MASK(INT_DMA_NOTIFY) | \
+    INT_MASK(INT_IDN_CA) | \
+    INT_MASK(INT_UDN_CA) | \
+    INT_MASK(INT_IDN_AVAIL) | \
+    INT_MASK(INT_UDN_AVAIL) | \
+    INT_MASK(INT_PERF_COUNT) | \
+    INT_MASK(INT_INTCTRL_3) | \
+    INT_MASK(INT_INTCTRL_2) | \
+    INT_MASK(INT_INTCTRL_1) | \
+    INT_MASK(INT_INTCTRL_0) | \
+    INT_MASK(INT_AUX_PERF_COUNT) | \
+    0)
+#define UNMASKABLE_INTERRUPTS ( \
+    INT_MASK(INT_ITLB_MISS) | \
+    INT_MASK(INT_ILL) | \
+    INT_MASK(INT_GPV) | \
+    INT_MASK(INT_SN_ACCESS) | \
+    INT_MASK(INT_IDN_ACCESS) | \
+    INT_MASK(INT_UDN_ACCESS) | \
+    INT_MASK(INT_SWINT_3) | \
+    INT_MASK(INT_SWINT_2) | \
+    INT_MASK(INT_SWINT_1) | \
+    INT_MASK(INT_SWINT_0) | \
+    INT_MASK(INT_UNALIGN_DATA) | \
+    INT_MASK(INT_DTLB_MISS) | \
+    INT_MASK(INT_DTLB_ACCESS) | \
+    INT_MASK(INT_BOOT_ACCESS) | \
+    INT_MASK(INT_WORLD_ACCESS) | \
+    INT_MASK(INT_I_ASID) | \
+    INT_MASK(INT_D_ASID) | \
+    INT_MASK(INT_DMA_ASID) | \
+    INT_MASK(INT_SNI_ASID) | \
+    INT_MASK(INT_DMA_CPL) | \
+    INT_MASK(INT_SN_CPL) | \
+    INT_MASK(INT_DOUBLE_FAULT) | \
+    INT_MASK(INT_SN_STATIC_ACCESS) | \
+    0)
+#define SYNC_INTERRUPTS ( \
+    INT_MASK(INT_ITLB_MISS) | \
+    INT_MASK(INT_ILL) | \
+    INT_MASK(INT_GPV) | \
+    INT_MASK(INT_SN_ACCESS) | \
+    INT_MASK(INT_IDN_ACCESS) | \
+    INT_MASK(INT_UDN_ACCESS) | \
+    INT_MASK(INT_IDN_REFILL) | \
+    INT_MASK(INT_UDN_REFILL) | \
+    INT_MASK(INT_IDN_COMPLETE) | \
+    INT_MASK(INT_UDN_COMPLETE) | \
+    INT_MASK(INT_SWINT_3) | \
+    INT_MASK(INT_SWINT_2) | \
+    INT_MASK(INT_SWINT_1) | \
+    INT_MASK(INT_SWINT_0) | \
+    INT_MASK(INT_UNALIGN_DATA) | \
+    INT_MASK(INT_DTLB_MISS) | \
+    INT_MASK(INT_DTLB_ACCESS) | \
+    INT_MASK(INT_SN_STATIC_ACCESS) | \
+    0)
+#define NON_SYNC_INTERRUPTS ( \
+    INT_MASK(INT_MEM_ERROR) | \
+    INT_MASK(INT_DMATLB_MISS) | \
+    INT_MASK(INT_DMATLB_ACCESS) | \
+    INT_MASK(INT_SNITLB_MISS) | \
+    INT_MASK(INT_SN_NOTIFY) | \
+    INT_MASK(INT_SN_FIREWALL) | \
+    INT_MASK(INT_IDN_FIREWALL) | \
+    INT_MASK(INT_UDN_FIREWALL) | \
+    INT_MASK(INT_TILE_TIMER) | \
+    INT_MASK(INT_IDN_TIMER) | \
+    INT_MASK(INT_UDN_TIMER) | \
+    INT_MASK(INT_DMA_NOTIFY) | \
+    INT_MASK(INT_IDN_CA) | \
+    INT_MASK(INT_UDN_CA) | \
+    INT_MASK(INT_IDN_AVAIL) | \
+    INT_MASK(INT_UDN_AVAIL) | \
+    INT_MASK(INT_PERF_COUNT) | \
+    INT_MASK(INT_INTCTRL_3) | \
+    INT_MASK(INT_INTCTRL_2) | \
+    INT_MASK(INT_INTCTRL_1) | \
+    INT_MASK(INT_INTCTRL_0) | \
+    INT_MASK(INT_BOOT_ACCESS) | \
+    INT_MASK(INT_WORLD_ACCESS) | \
+    INT_MASK(INT_I_ASID) | \
+    INT_MASK(INT_D_ASID) | \
+    INT_MASK(INT_DMA_ASID) | \
+    INT_MASK(INT_SNI_ASID) | \
+    INT_MASK(INT_DMA_CPL) | \
+    INT_MASK(INT_SN_CPL) | \
+    INT_MASK(INT_DOUBLE_FAULT) | \
+    INT_MASK(INT_AUX_PERF_COUNT) | \
+    0)
+#endif /* !__ASSEMBLER__ */
+#endif /* !__ARCH_INTERRUPTS_H__ */
diff --git a/arch/tile/include/arch/interrupts_64.h b/arch/tile/include/arch/interrupts_64.h
new file mode 100644
index 00000000..5bb58b2e
--- /dev/null
+++ b/arch/tile/include/arch/interrupts_64.h
@@ -0,0 +1,276 @@
+/*
+ * Copyright 2011 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef __ARCH_INTERRUPTS_H__
+#define __ARCH_INTERRUPTS_H__
+
+/** Mask for an interrupt. */
+#ifdef __ASSEMBLER__
+/* Note: must handle breaking interrupts into high and low words manually. */
+#define INT_MASK(intno) (1 << (intno))
+#else
+#define INT_MASK(intno) (1ULL << (intno))
+#endif
+
+
+/** Where a given interrupt executes */
+#define INTERRUPT_VECTOR(i, pl) (0xFC000000 + ((pl) << 24) + ((i) << 8))
+
+/** Where to store a vector for a given interrupt. */
+#define USER_INTERRUPT_VECTOR(i) INTERRUPT_VECTOR(i, 0)
+
+/** The base address of user-level interrupts. */
+#define USER_INTERRUPT_VECTOR_BASE INTERRUPT_VECTOR(0, 0)
+
+
+/** Additional synthetic interrupt. */
+#define INT_BREAKPOINT (63)
+
+#define INT_MEM_ERROR    0
+#define INT_SINGLE_STEP_3    1
+#define INT_SINGLE_STEP_2    2
+#define INT_SINGLE_STEP_1    3
+#define INT_SINGLE_STEP_0    4
+#define INT_IDN_COMPLETE    5
+#define INT_UDN_COMPLETE    6
+#define INT_ITLB_MISS    7
+#define INT_ILL    8
+#define INT_GPV    9
+#define INT_IDN_ACCESS   10
+#define INT_UDN_ACCESS   11
+#define INT_SWINT_3   12
+#define INT_SWINT_2   13
+#define INT_SWINT_1   14
+#define INT_SWINT_0   15
+#define INT_ILL_TRANS   16
+#define INT_UNALIGN_DATA   17
+#define INT_DTLB_MISS   18
+#define INT_DTLB_ACCESS   19
+#define INT_IDN_FIREWALL   20
+#define INT_UDN_FIREWALL   21
+#define INT_TILE_TIMER   22
+#define INT_AUX_TILE_TIMER   23
+#define INT_IDN_TIMER   24
+#define INT_UDN_TIMER   25
+#define INT_IDN_AVAIL   26
+#define INT_UDN_AVAIL   27
+#define INT_IPI_3   28
+#define INT_IPI_2   29
+#define INT_IPI_1   30
+#define INT_IPI_0   31
+#define INT_PERF_COUNT   32
+#define INT_AUX_PERF_COUNT   33
+#define INT_INTCTRL_3   34
+#define INT_INTCTRL_2   35
+#define INT_INTCTRL_1   36
+#define INT_INTCTRL_0   37
+#define INT_BOOT_ACCESS   38
+#define INT_WORLD_ACCESS   39
+#define INT_I_ASID   40
+#define INT_D_ASID   41
+#define INT_DOUBLE_FAULT   42
+
+#define NUM_INTERRUPTS 43
+
+#ifndef __ASSEMBLER__
+#define QUEUED_INTERRUPTS ( \
+    INT_MASK(INT_MEM_ERROR) | \
+    INT_MASK(INT_IDN_COMPLETE) | \
+    INT_MASK(INT_UDN_COMPLETE) | \
+    INT_MASK(INT_IDN_FIREWALL) | \
+    INT_MASK(INT_UDN_FIREWALL) | \
+    INT_MASK(INT_TILE_TIMER) | \
+    INT_MASK(INT_AUX_TILE_TIMER) | \
+    INT_MASK(INT_IDN_TIMER) | \
+    INT_MASK(INT_UDN_TIMER) | \
+    INT_MASK(INT_IDN_AVAIL) | \
+    INT_MASK(INT_UDN_AVAIL) | \
+    INT_MASK(INT_IPI_3) | \
+    INT_MASK(INT_IPI_2) | \
+    INT_MASK(INT_IPI_1) | \
+    INT_MASK(INT_IPI_0) | \
+    INT_MASK(INT_PERF_COUNT) | \
+    INT_MASK(INT_AUX_PERF_COUNT) | \
+    INT_MASK(INT_INTCTRL_3) | \
+    INT_MASK(INT_INTCTRL_2) | \
+    INT_MASK(INT_INTCTRL_1) | \
+    INT_MASK(INT_INTCTRL_0) | \
+    INT_MASK(INT_BOOT_ACCESS) | \
+    INT_MASK(INT_WORLD_ACCESS) | \
+    INT_MASK(INT_I_ASID) | \
+    INT_MASK(INT_D_ASID) | \
+    INT_MASK(INT_DOUBLE_FAULT) | \
+    0)
+#define NONQUEUED_INTERRUPTS ( \
+    INT_MASK(INT_SINGLE_STEP_3) | \
+    INT_MASK(INT_SINGLE_STEP_2) | \
+    INT_MASK(INT_SINGLE_STEP_1) | \
+    INT_MASK(INT_SINGLE_STEP_0) | \
+    INT_MASK(INT_ITLB_MISS) | \
+    INT_MASK(INT_ILL) | \
+    INT_MASK(INT_GPV) | \
+    INT_MASK(INT_IDN_ACCESS) | \
+    INT_MASK(INT_UDN_ACCESS) | \
+    INT_MASK(INT_SWINT_3) | \
+    INT_MASK(INT_SWINT_2) | \
+    INT_MASK(INT_SWINT_1) | \
+    INT_MASK(INT_SWINT_0) | \
+    INT_MASK(INT_ILL_TRANS) | \
+    INT_MASK(INT_UNALIGN_DATA) | \
+    INT_MASK(INT_DTLB_MISS) | \
+    INT_MASK(INT_DTLB_ACCESS) | \
+    0)
+#define CRITICAL_MASKED_INTERRUPTS ( \
+    INT_MASK(INT_MEM_ERROR) | \
+    INT_MASK(INT_SINGLE_STEP_3) | \
+    INT_MASK(INT_SINGLE_STEP_2) | \
+    INT_MASK(INT_SINGLE_STEP_1) | \
+    INT_MASK(INT_SINGLE_STEP_0) | \
+    INT_MASK(INT_IDN_COMPLETE) | \
+    INT_MASK(INT_UDN_COMPLETE) | \
+    INT_MASK(INT_IDN_FIREWALL) | \
+    INT_MASK(INT_UDN_FIREWALL) | \
+    INT_MASK(INT_TILE_TIMER) | \
+    INT_MASK(INT_AUX_TILE_TIMER) | \
+    INT_MASK(INT_IDN_TIMER) | \
+    INT_MASK(INT_UDN_TIMER) | \
+    INT_MASK(INT_IDN_AVAIL) | \
+    INT_MASK(INT_UDN_AVAIL) | \
+    INT_MASK(INT_IPI_3) | \
+    INT_MASK(INT_IPI_2) | \
+    INT_MASK(INT_IPI_1) | \
+    INT_MASK(INT_IPI_0) | \
+    INT_MASK(INT_PERF_COUNT) | \
+    INT_MASK(INT_AUX_PERF_COUNT) | \
+    INT_MASK(INT_INTCTRL_3) | \
+    INT_MASK(INT_INTCTRL_2) | \
+    INT_MASK(INT_INTCTRL_1) | \
+    INT_MASK(INT_INTCTRL_0) | \
+    0)
+#define CRITICAL_UNMASKED_INTERRUPTS ( \
+    INT_MASK(INT_ITLB_MISS) | \
+    INT_MASK(INT_ILL) | \
+    INT_MASK(INT_GPV) | \
+    INT_MASK(INT_IDN_ACCESS) | \
+    INT_MASK(INT_UDN_ACCESS) | \
+    INT_MASK(INT_SWINT_3) | \
+    INT_MASK(INT_SWINT_2) | \
+    INT_MASK(INT_SWINT_1) | \
+    INT_MASK(INT_SWINT_0) | \
+    INT_MASK(INT_ILL_TRANS) | \
+    INT_MASK(INT_UNALIGN_DATA) | \
+    INT_MASK(INT_DTLB_MISS) | \
+    INT_MASK(INT_DTLB_ACCESS) | \
+    INT_MASK(INT_BOOT_ACCESS) | \
+    INT_MASK(INT_WORLD_ACCESS) | \
+    INT_MASK(INT_I_ASID) | \
+    INT_MASK(INT_D_ASID) | \
+    INT_MASK(INT_DOUBLE_FAULT) | \
+    0)
+#define MASKABLE_INTERRUPTS ( \
+    INT_MASK(INT_MEM_ERROR) | \
+    INT_MASK(INT_SINGLE_STEP_3) | \
+    INT_MASK(INT_SINGLE_STEP_2) | \
+    INT_MASK(INT_SINGLE_STEP_1) | \
+    INT_MASK(INT_SINGLE_STEP_0) | \
+    INT_MASK(INT_IDN_COMPLETE) | \
+    INT_MASK(INT_UDN_COMPLETE) | \
+    INT_MASK(INT_IDN_FIREWALL) | \
+    INT_MASK(INT_UDN_FIREWALL) | \
+    INT_MASK(INT_TILE_TIMER) | \
+    INT_MASK(INT_AUX_TILE_TIMER) | \
+    INT_MASK(INT_IDN_TIMER) | \
+    INT_MASK(INT_UDN_TIMER) | \
+    INT_MASK(INT_IDN_AVAIL) | \
+    INT_MASK(INT_UDN_AVAIL) | \
+    INT_MASK(INT_IPI_3) | \
+    INT_MASK(INT_IPI_2) | \
+    INT_MASK(INT_IPI_1) | \
+    INT_MASK(INT_IPI_0) | \
+    INT_MASK(INT_PERF_COUNT) | \
+    INT_MASK(INT_AUX_PERF_COUNT) | \
+    INT_MASK(INT_INTCTRL_3) | \
+    INT_MASK(INT_INTCTRL_2) | \
+    INT_MASK(INT_INTCTRL_1) | \
+    INT_MASK(INT_INTCTRL_0) | \
+    0)
+#define UNMASKABLE_INTERRUPTS ( \
+    INT_MASK(INT_ITLB_MISS) | \
+    INT_MASK(INT_ILL) | \
+    INT_MASK(INT_GPV) | \
+    INT_MASK(INT_IDN_ACCESS) | \
+    INT_MASK(INT_UDN_ACCESS) | \
+    INT_MASK(INT_SWINT_3) | \
+    INT_MASK(INT_SWINT_2) | \
+    INT_MASK(INT_SWINT_1) | \
+    INT_MASK(INT_SWINT_0) | \
+    INT_MASK(INT_ILL_TRANS) | \
+    INT_MASK(INT_UNALIGN_DATA) | \
+    INT_MASK(INT_DTLB_MISS) | \
+    INT_MASK(INT_DTLB_ACCESS) | \
+    INT_MASK(INT_BOOT_ACCESS) | \
+    INT_MASK(INT_WORLD_ACCESS) | \
+    INT_MASK(INT_I_ASID) | \
+    INT_MASK(INT_D_ASID) | \
+    INT_MASK(INT_DOUBLE_FAULT) | \
+    0)
+#define SYNC_INTERRUPTS ( \
+    INT_MASK(INT_SINGLE_STEP_3) | \
+    INT_MASK(INT_SINGLE_STEP_2) | \
+    INT_MASK(INT_SINGLE_STEP_1) | \
+    INT_MASK(INT_SINGLE_STEP_0) | \
+    INT_MASK(INT_IDN_COMPLETE) | \
+    INT_MASK(INT_UDN_COMPLETE) | \
+    INT_MASK(INT_ITLB_MISS) | \
+    INT_MASK(INT_ILL) | \
+    INT_MASK(INT_GPV) | \
+    INT_MASK(INT_IDN_ACCESS) | \
+    INT_MASK(INT_UDN_ACCESS) | \
+    INT_MASK(INT_SWINT_3) | \
+    INT_MASK(INT_SWINT_2) | \
+    INT_MASK(INT_SWINT_1) | \
+    INT_MASK(INT_SWINT_0) | \
+    INT_MASK(INT_ILL_TRANS) | \
+    INT_MASK(INT_UNALIGN_DATA) | \
+    INT_MASK(INT_DTLB_MISS) | \
+    INT_MASK(INT_DTLB_ACCESS) | \
+    0)
+#define NON_SYNC_INTERRUPTS ( \
+    INT_MASK(INT_MEM_ERROR) | \
+    INT_MASK(INT_IDN_FIREWALL) | \
+    INT_MASK(INT_UDN_FIREWALL) | \
+    INT_MASK(INT_TILE_TIMER) | \
+    INT_MASK(INT_AUX_TILE_TIMER) | \
+    INT_MASK(INT_IDN_TIMER) | \
+    INT_MASK(INT_UDN_TIMER) | \
+    INT_MASK(INT_IDN_AVAIL) | \
+    INT_MASK(INT_UDN_AVAIL) | \
+    INT_MASK(INT_IPI_3) | \
+    INT_MASK(INT_IPI_2) | \
+    INT_MASK(INT_IPI_1) | \
+    INT_MASK(INT_IPI_0) | \
+    INT_MASK(INT_PERF_COUNT) | \
+    INT_MASK(INT_AUX_PERF_COUNT) | \
+    INT_MASK(INT_INTCTRL_3) | \
+    INT_MASK(INT_INTCTRL_2) | \
+    INT_MASK(INT_INTCTRL_1) | \
+    INT_MASK(INT_INTCTRL_0) | \
+    INT_MASK(INT_BOOT_ACCESS) | \
+    INT_MASK(INT_WORLD_ACCESS) | \
+    INT_MASK(INT_I_ASID) | \
+    INT_MASK(INT_D_ASID) | \
+    INT_MASK(INT_DOUBLE_FAULT) | \
+    0)
+#endif /* !__ASSEMBLER__ */
+#endif /* !__ARCH_INTERRUPTS_H__ */
diff --git a/arch/tile/include/arch/opcode.h b/arch/tile/include/arch/opcode.h
new file mode 100644
index 00000000..92d15229
--- /dev/null
+++ b/arch/tile/include/arch/opcode.h
@@ -0,0 +1,21 @@
+/*
+ * Copyright 2011 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#if defined(__tilepro__)
+#include <arch/opcode_tilepro.h>
+#elif defined(__tilegx__)
+#include <arch/opcode_tilegx.h>
+#else
+#error Unexpected Tilera chip type
+#endif
diff --git a/arch/tile/include/arch/opcode_tilegx.h b/arch/tile/include/arch/opcode_tilegx.h
new file mode 100644
index 00000000..c14d02c8
--- /dev/null
+++ b/arch/tile/include/arch/opcode_tilegx.h
@@ -0,0 +1,1405 @@
+/* TILE-Gx opcode information.
+ *
+ * Copyright 2011 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ *
+ *
+ *
+ *
+ *
+ */
+
+#ifndef __ARCH_OPCODE_H__
+#define __ARCH_OPCODE_H__
+
+#ifndef __ASSEMBLER__
+
+typedef unsigned long long tilegx_bundle_bits;
+
+/* These are the bits that determine if a bundle is in the X encoding. */
+#define TILEGX_BUNDLE_MODE_MASK ((tilegx_bundle_bits)3 << 62)
+
+enum
+{
+  /* Maximum number of instructions in a bundle (2 for X, 3 for Y). */
+  TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE = 3,
+
+  /* How many different pipeline encodings are there? X0, X1, Y0, Y1, Y2. */
+  TILEGX_NUM_PIPELINE_ENCODINGS = 5,
+
+  /* Log base 2 of TILEGX_BUNDLE_SIZE_IN_BYTES. */
+  TILEGX_LOG2_BUNDLE_SIZE_IN_BYTES = 3,
+
+  /* Instructions take this many bytes. */
+  TILEGX_BUNDLE_SIZE_IN_BYTES = 1 << TILEGX_LOG2_BUNDLE_SIZE_IN_BYTES,
+
+  /* Log base 2 of TILEGX_BUNDLE_ALIGNMENT_IN_BYTES. */
+  TILEGX_LOG2_BUNDLE_ALIGNMENT_IN_BYTES = 3,
+
+  /* Bundles should be aligned modulo this number of bytes. */
+  TILEGX_BUNDLE_ALIGNMENT_IN_BYTES =
+    (1 << TILEGX_LOG2_BUNDLE_ALIGNMENT_IN_BYTES),
+
+  /* Number of registers (some are magic, such as network I/O). */
+  TILEGX_NUM_REGISTERS = 64,
+};
+
+/* Make a few "tile_" variables to simplify common code between
+   architectures.  */
+
+typedef tilegx_bundle_bits tile_bundle_bits;
+#define TILE_BUNDLE_SIZE_IN_BYTES TILEGX_BUNDLE_SIZE_IN_BYTES
+#define TILE_BUNDLE_ALIGNMENT_IN_BYTES TILEGX_BUNDLE_ALIGNMENT_IN_BYTES
+#define TILE_LOG2_BUNDLE_ALIGNMENT_IN_BYTES \
+  TILEGX_LOG2_BUNDLE_ALIGNMENT_IN_BYTES
+
+/* 64-bit pattern for a { bpt ; nop } bundle. */
+#define TILEGX_BPT_BUNDLE 0x286a44ae51485000ULL
+
+static __inline unsigned int
+get_BFEnd_X0(tilegx_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 12)) & 0x3f);
+}
+
+static __inline unsigned int
+get_BFOpcodeExtension_X0(tilegx_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 24)) & 0xf);
+}
+
+static __inline unsigned int
+get_BFStart_X0(tilegx_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 18)) & 0x3f);
+}
+
+static __inline unsigned int
+get_BrOff_X1(tilegx_bundle_bits n)
+{
+  return (((unsigned int)(n >> 31)) & 0x0000003f) |
+         (((unsigned int)(n >> 37)) & 0x0001ffc0);
+}
+
+static __inline unsigned int
+get_BrType_X1(tilegx_bundle_bits n)
+{
+  return (((unsigned int)(n >> 54)) & 0x1f);
+}
+
+static __inline unsigned int
+get_Dest_Imm8_X1(tilegx_bundle_bits n)
+{
+  return (((unsigned int)(n >> 31)) & 0x0000003f) |
+         (((unsigned int)(n >> 43)) & 0x000000c0);
+}
+
+static __inline unsigned int
+get_Dest_X0(tilegx_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 0)) & 0x3f);
+}
+
+static __inline unsigned int
+get_Dest_X1(tilegx_bundle_bits n)
+{
+  return (((unsigned int)(n >> 31)) & 0x3f);
+}
+
+static __inline unsigned int
+get_Dest_Y0(tilegx_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 0)) & 0x3f);
+}
+
+static __inline unsigned int
+get_Dest_Y1(tilegx_bundle_bits n)
+{
+  return (((unsigned int)(n >> 31)) & 0x3f);
+}
+
+static __inline unsigned int
+get_Imm16_X0(tilegx_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 12)) & 0xffff);
+}
+
+static __inline unsigned int
+get_Imm16_X1(tilegx_bundle_bits n)
+{
+  return (((unsigned int)(n >> 43)) & 0xffff);
+}
+
+static __inline unsigned int
+get_Imm8OpcodeExtension_X0(tilegx_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 20)) & 0xff);
+}
+
+static __inline unsigned int
+get_Imm8OpcodeExtension_X1(tilegx_bundle_bits n)
+{
+  return (((unsigned int)(n >> 51)) & 0xff);
+}
+
+static __inline unsigned int
+get_Imm8_X0(tilegx_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 12)) & 0xff);
+}
+
+static __inline unsigned int
+get_Imm8_X1(tilegx_bundle_bits n)
+{
+  return (((unsigned int)(n >> 43)) & 0xff);
+}
+
+static __inline unsigned int
+get_Imm8_Y0(tilegx_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 12)) & 0xff);
+}
+
+static __inline unsigned int
+get_Imm8_Y1(tilegx_bundle_bits n)
+{
+  return (((unsigned int)(n >> 43)) & 0xff);
+}
+
+static __inline unsigned int
+get_JumpOff_X1(tilegx_bundle_bits n)
+{
+  return (((unsigned int)(n >> 31)) & 0x7ffffff);
+}
+
+static __inline unsigned int
+get_JumpOpcodeExtension_X1(tilegx_bundle_bits n)
+{
+  return (((unsigned int)(n >> 58)) & 0x1);
+}
+
+static __inline unsigned int
+get_MF_Imm14_X1(tilegx_bundle_bits n)
+{
+  return (((unsigned int)(n >> 37)) & 0x3fff);
+}
+
+static __inline unsigned int
+get_MT_Imm14_X1(tilegx_bundle_bits n)
+{
+  return (((unsigned int)(n >> 31)) & 0x0000003f) |
+         (((unsigned int)(n >> 37)) & 0x00003fc0);
+}
+
+static __inline unsigned int
+get_Mode(tilegx_bundle_bits n)
+{
+  return (((unsigned int)(n >> 62)) & 0x3);
+}
+
+static __inline unsigned int
+get_Opcode_X0(tilegx_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 28)) & 0x7);
+}
+
+static __inline unsigned int
+get_Opcode_X1(tilegx_bundle_bits n)
+{
+  return (((unsigned int)(n >> 59)) & 0x7);
+}
+
+static __inline unsigned int
+get_Opcode_Y0(tilegx_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 27)) & 0xf);
+}
+
+static __inline unsigned int
+get_Opcode_Y1(tilegx_bundle_bits n)
+{
+  return (((unsigned int)(n >> 58)) & 0xf);
+}
+
+static __inline unsigned int
+get_Opcode_Y2(tilegx_bundle_bits n)
+{
+  return (((n >> 26)) & 0x00000001) |
+         (((unsigned int)(n >> 56)) & 0x00000002);
+}
+
+static __inline unsigned int
+get_RRROpcodeExtension_X0(tilegx_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 18)) & 0x3ff);
+}
+
+static __inline unsigned int
+get_RRROpcodeExtension_X1(tilegx_bundle_bits n)
+{
+  return (((unsigned int)(n >> 49)) & 0x3ff);
+}
+
+static __inline unsigned int
+get_RRROpcodeExtension_Y0(tilegx_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 18)) & 0x3);
+}
+
+static __inline unsigned int
+get_RRROpcodeExtension_Y1(tilegx_bundle_bits n)
+{
+  return (((unsigned int)(n >> 49)) & 0x3);
+}
+
+static __inline unsigned int
+get_ShAmt_X0(tilegx_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 12)) & 0x3f);
+}
+
+static __inline unsigned int
+get_ShAmt_X1(tilegx_bundle_bits n)
+{
+  return (((unsigned int)(n >> 43)) & 0x3f);
+}
+
+static __inline unsigned int
+get_ShAmt_Y0(tilegx_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 12)) & 0x3f);
+}
+
+static __inline unsigned int
+get_ShAmt_Y1(tilegx_bundle_bits n)
+{
+  return (((unsigned int)(n >> 43)) & 0x3f);
+}
+
+static __inline unsigned int
+get_ShiftOpcodeExtension_X0(tilegx_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 18)) & 0x3ff);
+}
+
+static __inline unsigned int
+get_ShiftOpcodeExtension_X1(tilegx_bundle_bits n)
+{
+  return (((unsigned int)(n >> 49)) & 0x3ff);
+}
+
+static __inline unsigned int
+get_ShiftOpcodeExtension_Y0(tilegx_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 18)) & 0x3);
+}
+
+static __inline unsigned int
+get_ShiftOpcodeExtension_Y1(tilegx_bundle_bits n)
+{
+  return (((unsigned int)(n >> 49)) & 0x3);
+}
+
+static __inline unsigned int
+get_SrcA_X0(tilegx_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 6)) & 0x3f);
+}
+
+static __inline unsigned int
+get_SrcA_X1(tilegx_bundle_bits n)
+{
+  return (((unsigned int)(n >> 37)) & 0x3f);
+}
+
+static __inline unsigned int
+get_SrcA_Y0(tilegx_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 6)) & 0x3f);
+}
+
+static __inline unsigned int
+get_SrcA_Y1(tilegx_bundle_bits n)
+{
+  return (((unsigned int)(n >> 37)) & 0x3f);
+}
+
+static __inline unsigned int
+get_SrcA_Y2(tilegx_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 20)) & 0x3f);
+}
+
+static __inline unsigned int
+get_SrcBDest_Y2(tilegx_bundle_bits n)
+{
+  return (((unsigned int)(n >> 51)) & 0x3f);
+}
+
+static __inline unsigned int
+get_SrcB_X0(tilegx_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 12)) & 0x3f);
+}
+
+static __inline unsigned int
+get_SrcB_X1(tilegx_bundle_bits n)
+{
+  return (((unsigned int)(n >> 43)) & 0x3f);
+}
+
+static __inline unsigned int
+get_SrcB_Y0(tilegx_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 12)) & 0x3f);
+}
+
+static __inline unsigned int
+get_SrcB_Y1(tilegx_bundle_bits n)
+{
+  return (((unsigned int)(n >> 43)) & 0x3f);
+}
+
+static __inline unsigned int
+get_UnaryOpcodeExtension_X0(tilegx_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 12)) & 0x3f);
+}
+
+static __inline unsigned int
+get_UnaryOpcodeExtension_X1(tilegx_bundle_bits n)
+{
+  return (((unsigned int)(n >> 43)) & 0x3f);
+}
+
+static __inline unsigned int
+get_UnaryOpcodeExtension_Y0(tilegx_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 12)) & 0x3f);
+}
+
+static __inline unsigned int
+get_UnaryOpcodeExtension_Y1(tilegx_bundle_bits n)
+{
+  return (((unsigned int)(n >> 43)) & 0x3f);
+}
+
+
+static __inline int
+sign_extend(int n, int num_bits)
+{
+  int shift = (int)(sizeof(int) * 8 - num_bits);
+  return (n << shift) >> shift;
+}
+
+
+
+static __inline tilegx_bundle_bits
+create_BFEnd_X0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x3f) << 12);
+}
+
+static __inline tilegx_bundle_bits
+create_BFOpcodeExtension_X0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0xf) << 24);
+}
+
+static __inline tilegx_bundle_bits
+create_BFStart_X0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x3f) << 18);
+}
+
+static __inline tilegx_bundle_bits
+create_BrOff_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilegx_bundle_bits)(n & 0x0000003f)) << 31) |
+         (((tilegx_bundle_bits)(n & 0x0001ffc0)) << 37);
+}
+
+static __inline tilegx_bundle_bits
+create_BrType_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilegx_bundle_bits)(n & 0x1f)) << 54);
+}
+
+static __inline tilegx_bundle_bits
+create_Dest_Imm8_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilegx_bundle_bits)(n & 0x0000003f)) << 31) |
+         (((tilegx_bundle_bits)(n & 0x000000c0)) << 43);
+}
+
+static __inline tilegx_bundle_bits
+create_Dest_X0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x3f) << 0);
+}
+
+static __inline tilegx_bundle_bits
+create_Dest_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilegx_bundle_bits)(n & 0x3f)) << 31);
+}
+
+static __inline tilegx_bundle_bits
+create_Dest_Y0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x3f) << 0);
+}
+
+static __inline tilegx_bundle_bits
+create_Dest_Y1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilegx_bundle_bits)(n & 0x3f)) << 31);
+}
+
+static __inline tilegx_bundle_bits
+create_Imm16_X0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0xffff) << 12);
+}
+
+static __inline tilegx_bundle_bits
+create_Imm16_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilegx_bundle_bits)(n & 0xffff)) << 43);
+}
+
+static __inline tilegx_bundle_bits
+create_Imm8OpcodeExtension_X0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0xff) << 20);
+}
+
+static __inline tilegx_bundle_bits
+create_Imm8OpcodeExtension_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilegx_bundle_bits)(n & 0xff)) << 51);
+}
+
+static __inline tilegx_bundle_bits
+create_Imm8_X0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0xff) << 12);
+}
+
+static __inline tilegx_bundle_bits
+create_Imm8_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilegx_bundle_bits)(n & 0xff)) << 43);
+}
+
+static __inline tilegx_bundle_bits
+create_Imm8_Y0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0xff) << 12);
+}
+
+static __inline tilegx_bundle_bits
+create_Imm8_Y1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilegx_bundle_bits)(n & 0xff)) << 43);
+}
+
+static __inline tilegx_bundle_bits
+create_JumpOff_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilegx_bundle_bits)(n & 0x7ffffff)) << 31);
+}
+
+static __inline tilegx_bundle_bits
+create_JumpOpcodeExtension_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilegx_bundle_bits)(n & 0x1)) << 58);
+}
+
+static __inline tilegx_bundle_bits
+create_MF_Imm14_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilegx_bundle_bits)(n & 0x3fff)) << 37);
+}
+
+static __inline tilegx_bundle_bits
+create_MT_Imm14_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilegx_bundle_bits)(n & 0x0000003f)) << 31) |
+         (((tilegx_bundle_bits)(n & 0x00003fc0)) << 37);
+}
+
+static __inline tilegx_bundle_bits
+create_Mode(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilegx_bundle_bits)(n & 0x3)) << 62);
+}
+
+static __inline tilegx_bundle_bits
+create_Opcode_X0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x7) << 28);
+}
+
+static __inline tilegx_bundle_bits
+create_Opcode_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilegx_bundle_bits)(n & 0x7)) << 59);
+}
+
+static __inline tilegx_bundle_bits
+create_Opcode_Y0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0xf) << 27);
+}
+
+static __inline tilegx_bundle_bits
+create_Opcode_Y1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilegx_bundle_bits)(n & 0xf)) << 58);
+}
+
+static __inline tilegx_bundle_bits
+create_Opcode_Y2(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x00000001) << 26) |
+         (((tilegx_bundle_bits)(n & 0x00000002)) << 56);
+}
+
+static __inline tilegx_bundle_bits
+create_RRROpcodeExtension_X0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x3ff) << 18);
+}
+
+static __inline tilegx_bundle_bits
+create_RRROpcodeExtension_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilegx_bundle_bits)(n & 0x3ff)) << 49);
+}
+
+static __inline tilegx_bundle_bits
+create_RRROpcodeExtension_Y0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x3) << 18);
+}
+
+static __inline tilegx_bundle_bits
+create_RRROpcodeExtension_Y1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilegx_bundle_bits)(n & 0x3)) << 49);
+}
+
+static __inline tilegx_bundle_bits
+create_ShAmt_X0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x3f) << 12);
+}
+
+static __inline tilegx_bundle_bits
+create_ShAmt_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilegx_bundle_bits)(n & 0x3f)) << 43);
+}
+
+static __inline tilegx_bundle_bits
+create_ShAmt_Y0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x3f) << 12);
+}
+
+static __inline tilegx_bundle_bits
+create_ShAmt_Y1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilegx_bundle_bits)(n & 0x3f)) << 43);
+}
+
+static __inline tilegx_bundle_bits
+create_ShiftOpcodeExtension_X0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x3ff) << 18);
+}
+
+static __inline tilegx_bundle_bits
+create_ShiftOpcodeExtension_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilegx_bundle_bits)(n & 0x3ff)) << 49);
+}
+
+static __inline tilegx_bundle_bits
+create_ShiftOpcodeExtension_Y0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x3) << 18);
+}
+
+static __inline tilegx_bundle_bits
+create_ShiftOpcodeExtension_Y1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilegx_bundle_bits)(n & 0x3)) << 49);
+}
+
+static __inline tilegx_bundle_bits
+create_SrcA_X0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x3f) << 6);
+}
+
+static __inline tilegx_bundle_bits
+create_SrcA_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilegx_bundle_bits)(n & 0x3f)) << 37);
+}
+
+static __inline tilegx_bundle_bits
+create_SrcA_Y0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x3f) << 6);
+}
+
+static __inline tilegx_bundle_bits
+create_SrcA_Y1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilegx_bundle_bits)(n & 0x3f)) << 37);
+}
+
+static __inline tilegx_bundle_bits
+create_SrcA_Y2(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x3f) << 20);
+}
+
+static __inline tilegx_bundle_bits
+create_SrcBDest_Y2(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilegx_bundle_bits)(n & 0x3f)) << 51);
+}
+
+static __inline tilegx_bundle_bits
+create_SrcB_X0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x3f) << 12);
+}
+
+static __inline tilegx_bundle_bits
+create_SrcB_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilegx_bundle_bits)(n & 0x3f)) << 43);
+}
+
+static __inline tilegx_bundle_bits
+create_SrcB_Y0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x3f) << 12);
+}
+
+static __inline tilegx_bundle_bits
+create_SrcB_Y1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilegx_bundle_bits)(n & 0x3f)) << 43);
+}
+
+static __inline tilegx_bundle_bits
+create_UnaryOpcodeExtension_X0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x3f) << 12);
+}
+
+static __inline tilegx_bundle_bits
+create_UnaryOpcodeExtension_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilegx_bundle_bits)(n & 0x3f)) << 43);
+}
+
+static __inline tilegx_bundle_bits
+create_UnaryOpcodeExtension_Y0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x3f) << 12);
+}
+
+static __inline tilegx_bundle_bits
+create_UnaryOpcodeExtension_Y1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilegx_bundle_bits)(n & 0x3f)) << 43);
+}
+
+
+enum
+{
+  ADDI_IMM8_OPCODE_X0 = 1,
+  ADDI_IMM8_OPCODE_X1 = 1,
+  ADDI_OPCODE_Y0 = 0,
+  ADDI_OPCODE_Y1 = 1,
+  ADDLI_OPCODE_X0 = 1,
+  ADDLI_OPCODE_X1 = 0,
+  ADDXI_IMM8_OPCODE_X0 = 2,
+  ADDXI_IMM8_OPCODE_X1 = 2,
+  ADDXI_OPCODE_Y0 = 1,
+  ADDXI_OPCODE_Y1 = 2,
+  ADDXLI_OPCODE_X0 = 2,
+  ADDXLI_OPCODE_X1 = 1,
+  ADDXSC_RRR_0_OPCODE_X0 = 1,
+  ADDXSC_RRR_0_OPCODE_X1 = 1,
+  ADDX_RRR_0_OPCODE_X0 = 2,
+  ADDX_RRR_0_OPCODE_X1 = 2,
+  ADDX_RRR_0_OPCODE_Y0 = 0,
+  ADDX_SPECIAL_0_OPCODE_Y1 = 0,
+  ADD_RRR_0_OPCODE_X0 = 3,
+  ADD_RRR_0_OPCODE_X1 = 3,
+  ADD_RRR_0_OPCODE_Y0 = 1,
+  ADD_SPECIAL_0_OPCODE_Y1 = 1,
+  ANDI_IMM8_OPCODE_X0 = 3,
+  ANDI_IMM8_OPCODE_X1 = 3,
+  ANDI_OPCODE_Y0 = 2,
+  ANDI_OPCODE_Y1 = 3,
+  AND_RRR_0_OPCODE_X0 = 4,
+  AND_RRR_0_OPCODE_X1 = 4,
+  AND_RRR_5_OPCODE_Y0 = 0,
+  AND_RRR_5_OPCODE_Y1 = 0,
+  BEQZT_BRANCH_OPCODE_X1 = 16,
+  BEQZ_BRANCH_OPCODE_X1 = 17,
+  BFEXTS_BF_OPCODE_X0 = 4,
+  BFEXTU_BF_OPCODE_X0 = 5,
+  BFINS_BF_OPCODE_X0 = 6,
+  BF_OPCODE_X0 = 3,
+  BGEZT_BRANCH_OPCODE_X1 = 18,
+  BGEZ_BRANCH_OPCODE_X1 = 19,
+  BGTZT_BRANCH_OPCODE_X1 = 20,
+  BGTZ_BRANCH_OPCODE_X1 = 21,
+  BLBCT_BRANCH_OPCODE_X1 = 22,
+  BLBC_BRANCH_OPCODE_X1 = 23,
+  BLBST_BRANCH_OPCODE_X1 = 24,
+  BLBS_BRANCH_OPCODE_X1 = 25,
+  BLEZT_BRANCH_OPCODE_X1 = 26,
+  BLEZ_BRANCH_OPCODE_X1 = 27,
+  BLTZT_BRANCH_OPCODE_X1 = 28,
+  BLTZ_BRANCH_OPCODE_X1 = 29,
+  BNEZT_BRANCH_OPCODE_X1 = 30,
+  BNEZ_BRANCH_OPCODE_X1 = 31,
+  BRANCH_OPCODE_X1 = 2,
+  CMOVEQZ_RRR_0_OPCODE_X0 = 5,
+  CMOVEQZ_RRR_4_OPCODE_Y0 = 0,
+  CMOVNEZ_RRR_0_OPCODE_X0 = 6,
+  CMOVNEZ_RRR_4_OPCODE_Y0 = 1,
+  CMPEQI_IMM8_OPCODE_X0 = 4,
+  CMPEQI_IMM8_OPCODE_X1 = 4,
+  CMPEQI_OPCODE_Y0 = 3,
+  CMPEQI_OPCODE_Y1 = 4,
+  CMPEQ_RRR_0_OPCODE_X0 = 7,
+  CMPEQ_RRR_0_OPCODE_X1 = 5,
+  CMPEQ_RRR_3_OPCODE_Y0 = 0,
+  CMPEQ_RRR_3_OPCODE_Y1 = 2,
+  CMPEXCH4_RRR_0_OPCODE_X1 = 6,
+  CMPEXCH_RRR_0_OPCODE_X1 = 7,
+  CMPLES_RRR_0_OPCODE_X0 = 8,
+  CMPLES_RRR_0_OPCODE_X1 = 8,
+  CMPLES_RRR_2_OPCODE_Y0 = 0,
+  CMPLES_RRR_2_OPCODE_Y1 = 0,
+  CMPLEU_RRR_0_OPCODE_X0 = 9,
+  CMPLEU_RRR_0_OPCODE_X1 = 9,
+  CMPLEU_RRR_2_OPCODE_Y0 = 1,
+  CMPLEU_RRR_2_OPCODE_Y1 = 1,
+  CMPLTSI_IMM8_OPCODE_X0 = 5,
+  CMPLTSI_IMM8_OPCODE_X1 = 5,
+  CMPLTSI_OPCODE_Y0 = 4,
+  CMPLTSI_OPCODE_Y1 = 5,
+  CMPLTS_RRR_0_OPCODE_X0 = 10,
+  CMPLTS_RRR_0_OPCODE_X1 = 10,
+  CMPLTS_RRR_2_OPCODE_Y0 = 2,
+  CMPLTS_RRR_2_OPCODE_Y1 = 2,
+  CMPLTUI_IMM8_OPCODE_X0 = 6,
+  CMPLTUI_IMM8_OPCODE_X1 = 6,
+  CMPLTU_RRR_0_OPCODE_X0 = 11,
+  CMPLTU_RRR_0_OPCODE_X1 = 11,
+  CMPLTU_RRR_2_OPCODE_Y0 = 3,
+  CMPLTU_RRR_2_OPCODE_Y1 = 3,
+  CMPNE_RRR_0_OPCODE_X0 = 12,
+  CMPNE_RRR_0_OPCODE_X1 = 12,
+  CMPNE_RRR_3_OPCODE_Y0 = 1,
+  CMPNE_RRR_3_OPCODE_Y1 = 3,
+  CMULAF_RRR_0_OPCODE_X0 = 13,
+  CMULA_RRR_0_OPCODE_X0 = 14,
+  CMULFR_RRR_0_OPCODE_X0 = 15,
+  CMULF_RRR_0_OPCODE_X0 = 16,
+  CMULHR_RRR_0_OPCODE_X0 = 17,
+  CMULH_RRR_0_OPCODE_X0 = 18,
+  CMUL_RRR_0_OPCODE_X0 = 19,
+  CNTLZ_UNARY_OPCODE_X0 = 1,
+  CNTLZ_UNARY_OPCODE_Y0 = 1,
+  CNTTZ_UNARY_OPCODE_X0 = 2,
+  CNTTZ_UNARY_OPCODE_Y0 = 2,
+  CRC32_32_RRR_0_OPCODE_X0 = 20,
+  CRC32_8_RRR_0_OPCODE_X0 = 21,
+  DBLALIGN2_RRR_0_OPCODE_X0 = 22,
+  DBLALIGN2_RRR_0_OPCODE_X1 = 13,
+  DBLALIGN4_RRR_0_OPCODE_X0 = 23,
+  DBLALIGN4_RRR_0_OPCODE_X1 = 14,
+  DBLALIGN6_RRR_0_OPCODE_X0 = 24,
+  DBLALIGN6_RRR_0_OPCODE_X1 = 15,
+  DBLALIGN_RRR_0_OPCODE_X0 = 25,
+  DRAIN_UNARY_OPCODE_X1 = 1,
+  DTLBPR_UNARY_OPCODE_X1 = 2,
+  EXCH4_RRR_0_OPCODE_X1 = 16,
+  EXCH_RRR_0_OPCODE_X1 = 17,
+  FDOUBLE_ADDSUB_RRR_0_OPCODE_X0 = 26,
+  FDOUBLE_ADD_FLAGS_RRR_0_OPCODE_X0 = 27,
+  FDOUBLE_MUL_FLAGS_RRR_0_OPCODE_X0 = 28,
+  FDOUBLE_PACK1_RRR_0_OPCODE_X0 = 29,
+  FDOUBLE_PACK2_RRR_0_OPCODE_X0 = 30,
+  FDOUBLE_SUB_FLAGS_RRR_0_OPCODE_X0 = 31,
+  FDOUBLE_UNPACK_MAX_RRR_0_OPCODE_X0 = 32,
+  FDOUBLE_UNPACK_MIN_RRR_0_OPCODE_X0 = 33,
+  FETCHADD4_RRR_0_OPCODE_X1 = 18,
+  FETCHADDGEZ4_RRR_0_OPCODE_X1 = 19,
+  FETCHADDGEZ_RRR_0_OPCODE_X1 = 20,
+  FETCHADD_RRR_0_OPCODE_X1 = 21,
+  FETCHAND4_RRR_0_OPCODE_X1 = 22,
+  FETCHAND_RRR_0_OPCODE_X1 = 23,
+  FETCHOR4_RRR_0_OPCODE_X1 = 24,
+  FETCHOR_RRR_0_OPCODE_X1 = 25,
+  FINV_UNARY_OPCODE_X1 = 3,
+  FLUSHWB_UNARY_OPCODE_X1 = 4,
+  FLUSH_UNARY_OPCODE_X1 = 5,
+  FNOP_UNARY_OPCODE_X0 = 3,
+  FNOP_UNARY_OPCODE_X1 = 6,
+  FNOP_UNARY_OPCODE_Y0 = 3,
+  FNOP_UNARY_OPCODE_Y1 = 8,
+  FSINGLE_ADD1_RRR_0_OPCODE_X0 = 34,
+  FSINGLE_ADDSUB2_RRR_0_OPCODE_X0 = 35,
+  FSINGLE_MUL1_RRR_0_OPCODE_X0 = 36,
+  FSINGLE_MUL2_RRR_0_OPCODE_X0 = 37,
+  FSINGLE_PACK1_UNARY_OPCODE_X0 = 4,
+  FSINGLE_PACK1_UNARY_OPCODE_Y0 = 4,
+  FSINGLE_PACK2_RRR_0_OPCODE_X0 = 38,
+  FSINGLE_SUB1_RRR_0_OPCODE_X0 = 39,
+  ICOH_UNARY_OPCODE_X1 = 7,
+  ILL_UNARY_OPCODE_X1 = 8,
+  ILL_UNARY_OPCODE_Y1 = 9,
+  IMM8_OPCODE_X0 = 4,
+  IMM8_OPCODE_X1 = 3,
+  INV_UNARY_OPCODE_X1 = 9,
+  IRET_UNARY_OPCODE_X1 = 10,
+  JALRP_UNARY_OPCODE_X1 = 11,
+  JALRP_UNARY_OPCODE_Y1 = 10,
+  JALR_UNARY_OPCODE_X1 = 12,
+  JALR_UNARY_OPCODE_Y1 = 11,
+  JAL_JUMP_OPCODE_X1 = 0,
+  JRP_UNARY_OPCODE_X1 = 13,
+  JRP_UNARY_OPCODE_Y1 = 12,
+  JR_UNARY_OPCODE_X1 = 14,
+  JR_UNARY_OPCODE_Y1 = 13,
+  JUMP_OPCODE_X1 = 4,
+  J_JUMP_OPCODE_X1 = 1,
+  LD1S_ADD_IMM8_OPCODE_X1 = 7,
+  LD1S_OPCODE_Y2 = 0,
+  LD1S_UNARY_OPCODE_X1 = 15,
+  LD1U_ADD_IMM8_OPCODE_X1 = 8,
+  LD1U_OPCODE_Y2 = 1,
+  LD1U_UNARY_OPCODE_X1 = 16,
+  LD2S_ADD_IMM8_OPCODE_X1 = 9,
+  LD2S_OPCODE_Y2 = 2,
+  LD2S_UNARY_OPCODE_X1 = 17,
+  LD2U_ADD_IMM8_OPCODE_X1 = 10,
+  LD2U_OPCODE_Y2 = 3,
+  LD2U_UNARY_OPCODE_X1 = 18,
+  LD4S_ADD_IMM8_OPCODE_X1 = 11,
+  LD4S_OPCODE_Y2 = 1,
+  LD4S_UNARY_OPCODE_X1 = 19,
+  LD4U_ADD_IMM8_OPCODE_X1 = 12,
+  LD4U_OPCODE_Y2 = 2,
+  LD4U_UNARY_OPCODE_X1 = 20,
+  LDNA_UNARY_OPCODE_X1 = 21,
+  LDNT1S_ADD_IMM8_OPCODE_X1 = 13,
+  LDNT1S_UNARY_OPCODE_X1 = 22,
+  LDNT1U_ADD_IMM8_OPCODE_X1 = 14,
+  LDNT1U_UNARY_OPCODE_X1 = 23,
+  LDNT2S_ADD_IMM8_OPCODE_X1 = 15,
+  LDNT2S_UNARY_OPCODE_X1 = 24,
+  LDNT2U_ADD_IMM8_OPCODE_X1 = 16,
+  LDNT2U_UNARY_OPCODE_X1 = 25,
+  LDNT4S_ADD_IMM8_OPCODE_X1 = 17,
+  LDNT4S_UNARY_OPCODE_X1 = 26,
+  LDNT4U_ADD_IMM8_OPCODE_X1 = 18,
+  LDNT4U_UNARY_OPCODE_X1 = 27,
+  LDNT_ADD_IMM8_OPCODE_X1 = 19,
+  LDNT_UNARY_OPCODE_X1 = 28,
+  LD_ADD_IMM8_OPCODE_X1 = 20,
+  LD_OPCODE_Y2 = 3,
+  LD_UNARY_OPCODE_X1 = 29,
+  LNK_UNARY_OPCODE_X1 = 30,
+  LNK_UNARY_OPCODE_Y1 = 14,
+  LWNA_ADD_IMM8_OPCODE_X1 = 21,
+  MFSPR_IMM8_OPCODE_X1 = 22,
+  MF_UNARY_OPCODE_X1 = 31,
+  MM_BF_OPCODE_X0 = 7,
+  MNZ_RRR_0_OPCODE_X0 = 40,
+  MNZ_RRR_0_OPCODE_X1 = 26,
+  MNZ_RRR_4_OPCODE_Y0 = 2,
+  MNZ_RRR_4_OPCODE_Y1 = 2,
+  MODE_OPCODE_YA2 = 1,
+  MODE_OPCODE_YB2 = 2,
+  MODE_OPCODE_YC2 = 3,
+  MTSPR_IMM8_OPCODE_X1 = 23,
+  MULAX_RRR_0_OPCODE_X0 = 41,
+  MULAX_RRR_3_OPCODE_Y0 = 2,
+  MULA_HS_HS_RRR_0_OPCODE_X0 = 42,
+  MULA_HS_HS_RRR_9_OPCODE_Y0 = 0,
+  MULA_HS_HU_RRR_0_OPCODE_X0 = 43,
+  MULA_HS_LS_RRR_0_OPCODE_X0 = 44,
+  MULA_HS_LU_RRR_0_OPCODE_X0 = 45,
+  MULA_HU_HU_RRR_0_OPCODE_X0 = 46,
+  MULA_HU_HU_RRR_9_OPCODE_Y0 = 1,
+  MULA_HU_LS_RRR_0_OPCODE_X0 = 47,
+  MULA_HU_LU_RRR_0_OPCODE_X0 = 48,
+  MULA_LS_LS_RRR_0_OPCODE_X0 = 49,
+  MULA_LS_LS_RRR_9_OPCODE_Y0 = 2,
+  MULA_LS_LU_RRR_0_OPCODE_X0 = 50,
+  MULA_LU_LU_RRR_0_OPCODE_X0 = 51,
+  MULA_LU_LU_RRR_9_OPCODE_Y0 = 3,
+  MULX_RRR_0_OPCODE_X0 = 52,
+  MULX_RRR_3_OPCODE_Y0 = 3,
+  MUL_HS_HS_RRR_0_OPCODE_X0 = 53,
+  MUL_HS_HS_RRR_8_OPCODE_Y0 = 0,
+  MUL_HS_HU_RRR_0_OPCODE_X0 = 54,
+  MUL_HS_LS_RRR_0_OPCODE_X0 = 55,
+  MUL_HS_LU_RRR_0_OPCODE_X0 = 56,
+  MUL_HU_HU_RRR_0_OPCODE_X0 = 57,
+  MUL_HU_HU_RRR_8_OPCODE_Y0 = 1,
+  MUL_HU_LS_RRR_0_OPCODE_X0 = 58,
+  MUL_HU_LU_RRR_0_OPCODE_X0 = 59,
+  MUL_LS_LS_RRR_0_OPCODE_X0 = 60,
+  MUL_LS_LS_RRR_8_OPCODE_Y0 = 2,
+  MUL_LS_LU_RRR_0_OPCODE_X0 = 61,
+  MUL_LU_LU_RRR_0_OPCODE_X0 = 62,
+  MUL_LU_LU_RRR_8_OPCODE_Y0 = 3,
+  MZ_RRR_0_OPCODE_X0 = 63,
+  MZ_RRR_0_OPCODE_X1 = 27,
+  MZ_RRR_4_OPCODE_Y0 = 3,
+  MZ_RRR_4_OPCODE_Y1 = 3,
+  NAP_UNARY_OPCODE_X1 = 32,
+  NOP_UNARY_OPCODE_X0 = 5,
+  NOP_UNARY_OPCODE_X1 = 33,
+  NOP_UNARY_OPCODE_Y0 = 5,
+  NOP_UNARY_OPCODE_Y1 = 15,
+  NOR_RRR_0_OPCODE_X0 = 64,
+  NOR_RRR_0_OPCODE_X1 = 28,
+  NOR_RRR_5_OPCODE_Y0 = 1,
+  NOR_RRR_5_OPCODE_Y1 = 1,
+  ORI_IMM8_OPCODE_X0 = 7,
+  ORI_IMM8_OPCODE_X1 = 24,
+  OR_RRR_0_OPCODE_X0 = 65,
+  OR_RRR_0_OPCODE_X1 = 29,
+  OR_RRR_5_OPCODE_Y0 = 2,
+  OR_RRR_5_OPCODE_Y1 = 2,
+  PCNT_UNARY_OPCODE_X0 = 6,
+  PCNT_UNARY_OPCODE_Y0 = 6,
+  REVBITS_UNARY_OPCODE_X0 = 7,
+  REVBITS_UNARY_OPCODE_Y0 = 7,
+  REVBYTES_UNARY_OPCODE_X0 = 8,
+  REVBYTES_UNARY_OPCODE_Y0 = 8,
+  ROTLI_SHIFT_OPCODE_X0 = 1,
+  ROTLI_SHIFT_OPCODE_X1 = 1,
+  ROTLI_SHIFT_OPCODE_Y0 = 0,
+  ROTLI_SHIFT_OPCODE_Y1 = 0,
+  ROTL_RRR_0_OPCODE_X0 = 66,
+  ROTL_RRR_0_OPCODE_X1 = 30,
+  ROTL_RRR_6_OPCODE_Y0 = 0,
+  ROTL_RRR_6_OPCODE_Y1 = 0,
+  RRR_0_OPCODE_X0 = 5,
+  RRR_0_OPCODE_X1 = 5,
+  RRR_0_OPCODE_Y0 = 5,
+  RRR_0_OPCODE_Y1 = 6,
+  RRR_1_OPCODE_Y0 = 6,
+  RRR_1_OPCODE_Y1 = 7,
+  RRR_2_OPCODE_Y0 = 7,
+  RRR_2_OPCODE_Y1 = 8,
+  RRR_3_OPCODE_Y0 = 8,
+  RRR_3_OPCODE_Y1 = 9,
+  RRR_4_OPCODE_Y0 = 9,
+  RRR_4_OPCODE_Y1 = 10,
+  RRR_5_OPCODE_Y0 = 10,
+  RRR_5_OPCODE_Y1 = 11,
+  RRR_6_OPCODE_Y0 = 11,
+  RRR_6_OPCODE_Y1 = 12,
+  RRR_7_OPCODE_Y0 = 12,
+  RRR_7_OPCODE_Y1 = 13,
+  RRR_8_OPCODE_Y0 = 13,
+  RRR_9_OPCODE_Y0 = 14,
+  SHIFT_OPCODE_X0 = 6,
+  SHIFT_OPCODE_X1 = 6,
+  SHIFT_OPCODE_Y0 = 15,
+  SHIFT_OPCODE_Y1 = 14,
+  SHL16INSLI_OPCODE_X0 = 7,
+  SHL16INSLI_OPCODE_X1 = 7,
+  SHL1ADDX_RRR_0_OPCODE_X0 = 67,
+  SHL1ADDX_RRR_0_OPCODE_X1 = 31,
+  SHL1ADDX_RRR_7_OPCODE_Y0 = 1,
+  SHL1ADDX_RRR_7_OPCODE_Y1 = 1,
+  SHL1ADD_RRR_0_OPCODE_X0 = 68,
+  SHL1ADD_RRR_0_OPCODE_X1 = 32,
+  SHL1ADD_RRR_1_OPCODE_Y0 = 0,
+  SHL1ADD_RRR_1_OPCODE_Y1 = 0,
+  SHL2ADDX_RRR_0_OPCODE_X0 = 69,
+  SHL2ADDX_RRR_0_OPCODE_X1 = 33,
+  SHL2ADDX_RRR_7_OPCODE_Y0 = 2,
+  SHL2ADDX_RRR_7_OPCODE_Y1 = 2,
+  SHL2ADD_RRR_0_OPCODE_X0 = 70,
+  SHL2ADD_RRR_0_OPCODE_X1 = 34,
+  SHL2ADD_RRR_1_OPCODE_Y0 = 1,
+  SHL2ADD_RRR_1_OPCODE_Y1 = 1,
+  SHL3ADDX_RRR_0_OPCODE_X0 = 71,
+  SHL3ADDX_RRR_0_OPCODE_X1 = 35,
+  SHL3ADDX_RRR_7_OPCODE_Y0 = 3,
+  SHL3ADDX_RRR_7_OPCODE_Y1 = 3,
+  SHL3ADD_RRR_0_OPCODE_X0 = 72,
+  SHL3ADD_RRR_0_OPCODE_X1 = 36,
+  SHL3ADD_RRR_1_OPCODE_Y0 = 2,
+  SHL3ADD_RRR_1_OPCODE_Y1 = 2,
+  SHLI_SHIFT_OPCODE_X0 = 2,
+  SHLI_SHIFT_OPCODE_X1 = 2,
+  SHLI_SHIFT_OPCODE_Y0 = 1,
+  SHLI_SHIFT_OPCODE_Y1 = 1,
+  SHLXI_SHIFT_OPCODE_X0 = 3,
+  SHLXI_SHIFT_OPCODE_X1 = 3,
+  SHLX_RRR_0_OPCODE_X0 = 73,
+  SHLX_RRR_0_OPCODE_X1 = 37,
+  SHL_RRR_0_OPCODE_X0 = 74,
+  SHL_RRR_0_OPCODE_X1 = 38,
+  SHL_RRR_6_OPCODE_Y0 = 1,
+  SHL_RRR_6_OPCODE_Y1 = 1,
+  SHRSI_SHIFT_OPCODE_X0 = 4,
+  SHRSI_SHIFT_OPCODE_X1 = 4,
+  SHRSI_SHIFT_OPCODE_Y0 = 2,
+  SHRSI_SHIFT_OPCODE_Y1 = 2,
+  SHRS_RRR_0_OPCODE_X0 = 75,
+  SHRS_RRR_0_OPCODE_X1 = 39,
+  SHRS_RRR_6_OPCODE_Y0 = 2,
+  SHRS_RRR_6_OPCODE_Y1 = 2,
+  SHRUI_SHIFT_OPCODE_X0 = 5,
+  SHRUI_SHIFT_OPCODE_X1 = 5,
+  SHRUI_SHIFT_OPCODE_Y0 = 3,
+  SHRUI_SHIFT_OPCODE_Y1 = 3,
+  SHRUXI_SHIFT_OPCODE_X0 = 6,
+  SHRUXI_SHIFT_OPCODE_X1 = 6,
+  SHRUX_RRR_0_OPCODE_X0 = 76,
+  SHRUX_RRR_0_OPCODE_X1 = 40,
+  SHRU_RRR_0_OPCODE_X0 = 77,
+  SHRU_RRR_0_OPCODE_X1 = 41,
+  SHRU_RRR_6_OPCODE_Y0 = 3,
+  SHRU_RRR_6_OPCODE_Y1 = 3,
+  SHUFFLEBYTES_RRR_0_OPCODE_X0 = 78,
+  ST1_ADD_IMM8_OPCODE_X1 = 25,
+  ST1_OPCODE_Y2 = 0,
+  ST1_RRR_0_OPCODE_X1 = 42,
+  ST2_ADD_IMM8_OPCODE_X1 = 26,
+  ST2_OPCODE_Y2 = 1,
+  ST2_RRR_0_OPCODE_X1 = 43,
+  ST4_ADD_IMM8_OPCODE_X1 = 27,
+  ST4_OPCODE_Y2 = 2,
+  ST4_RRR_0_OPCODE_X1 = 44,
+  STNT1_ADD_IMM8_OPCODE_X1 = 28,
+  STNT1_RRR_0_OPCODE_X1 = 45,
+  STNT2_ADD_IMM8_OPCODE_X1 = 29,
+  STNT2_RRR_0_OPCODE_X1 = 46,
+  STNT4_ADD_IMM8_OPCODE_X1 = 30,
+  STNT4_RRR_0_OPCODE_X1 = 47,
+  STNT_ADD_IMM8_OPCODE_X1 = 31,
+  STNT_RRR_0_OPCODE_X1 = 48,
+  ST_ADD_IMM8_OPCODE_X1 = 32,
+  ST_OPCODE_Y2 = 3,
+  ST_RRR_0_OPCODE_X1 = 49,
+  SUBXSC_RRR_0_OPCODE_X0 = 79,
+  SUBXSC_RRR_0_OPCODE_X1 = 50,
+  SUBX_RRR_0_OPCODE_X0 = 80,
+  SUBX_RRR_0_OPCODE_X1 = 51,
+  SUBX_RRR_0_OPCODE_Y0 = 2,
+  SUBX_RRR_0_OPCODE_Y1 = 2,
+  SUB_RRR_0_OPCODE_X0 = 81,
+  SUB_RRR_0_OPCODE_X1 = 52,
+  SUB_RRR_0_OPCODE_Y0 = 3,
+  SUB_RRR_0_OPCODE_Y1 = 3,
+  SWINT0_UNARY_OPCODE_X1 = 34,
+  SWINT1_UNARY_OPCODE_X1 = 35,
+  SWINT2_UNARY_OPCODE_X1 = 36,
+  SWINT3_UNARY_OPCODE_X1 = 37,
+  TBLIDXB0_UNARY_OPCODE_X0 = 9,
+  TBLIDXB0_UNARY_OPCODE_Y0 = 9,
+  TBLIDXB1_UNARY_OPCODE_X0 = 10,
+  TBLIDXB1_UNARY_OPCODE_Y0 = 10,
+  TBLIDXB2_UNARY_OPCODE_X0 = 11,
+  TBLIDXB2_UNARY_OPCODE_Y0 = 11,
+  TBLIDXB3_UNARY_OPCODE_X0 = 12,
+  TBLIDXB3_UNARY_OPCODE_Y0 = 12,
+  UNARY_RRR_0_OPCODE_X0 = 82,
+  UNARY_RRR_0_OPCODE_X1 = 53,
+  UNARY_RRR_1_OPCODE_Y0 = 3,
+  UNARY_RRR_1_OPCODE_Y1 = 3,
+  V1ADDI_IMM8_OPCODE_X0 = 8,
+  V1ADDI_IMM8_OPCODE_X1 = 33,
+  V1ADDUC_RRR_0_OPCODE_X0 = 83,
+  V1ADDUC_RRR_0_OPCODE_X1 = 54,
+  V1ADD_RRR_0_OPCODE_X0 = 84,
+  V1ADD_RRR_0_OPCODE_X1 = 55,
+  V1ADIFFU_RRR_0_OPCODE_X0 = 85,
+  V1AVGU_RRR_0_OPCODE_X0 = 86,
+  V1CMPEQI_IMM8_OPCODE_X0 = 9,
+  V1CMPEQI_IMM8_OPCODE_X1 = 34,
+  V1CMPEQ_RRR_0_OPCODE_X0 = 87,
+  V1CMPEQ_RRR_0_OPCODE_X1 = 56,
+  V1CMPLES_RRR_0_OPCODE_X0 = 88,
+  V1CMPLES_RRR_0_OPCODE_X1 = 57,
+  V1CMPLEU_RRR_0_OPCODE_X0 = 89,
+  V1CMPLEU_RRR_0_OPCODE_X1 = 58,
+  V1CMPLTSI_IMM8_OPCODE_X0 = 10,
+  V1CMPLTSI_IMM8_OPCODE_X1 = 35,
+  V1CMPLTS_RRR_0_OPCODE_X0 = 90,
+  V1CMPLTS_RRR_0_OPCODE_X1 = 59,
+  V1CMPLTUI_IMM8_OPCODE_X0 = 11,
+  V1CMPLTUI_IMM8_OPCODE_X1 = 36,
+  V1CMPLTU_RRR_0_OPCODE_X0 = 91,
+  V1CMPLTU_RRR_0_OPCODE_X1 = 60,
+  V1CMPNE_RRR_0_OPCODE_X0 = 92,
+  V1CMPNE_RRR_0_OPCODE_X1 = 61,
+  V1DDOTPUA_RRR_0_OPCODE_X0 = 161,
+  V1DDOTPUSA_RRR_0_OPCODE_X0 = 93,
+  V1DDOTPUS_RRR_0_OPCODE_X0 = 94,
+  V1DDOTPU_RRR_0_OPCODE_X0 = 162,
+  V1DOTPA_RRR_0_OPCODE_X0 = 95,
+  V1DOTPUA_RRR_0_OPCODE_X0 = 163,
+  V1DOTPUSA_RRR_0_OPCODE_X0 = 96,
+  V1DOTPUS_RRR_0_OPCODE_X0 = 97,
+  V1DOTPU_RRR_0_OPCODE_X0 = 164,
+  V1DOTP_RRR_0_OPCODE_X0 = 98,
+  V1INT_H_RRR_0_OPCODE_X0 = 99,
+  V1INT_H_RRR_0_OPCODE_X1 = 62,
+  V1INT_L_RRR_0_OPCODE_X0 = 100,
+  V1INT_L_RRR_0_OPCODE_X1 = 63,
+  V1MAXUI_IMM8_OPCODE_X0 = 12,
+  V1MAXUI_IMM8_OPCODE_X1 = 37,
+  V1MAXU_RRR_0_OPCODE_X0 = 101,
+  V1MAXU_RRR_0_OPCODE_X1 = 64,
+  V1MINUI_IMM8_OPCODE_X0 = 13,
+  V1MINUI_IMM8_OPCODE_X1 = 38,
+  V1MINU_RRR_0_OPCODE_X0 = 102,
+  V1MINU_RRR_0_OPCODE_X1 = 65,
+  V1MNZ_RRR_0_OPCODE_X0 = 103,
+  V1MNZ_RRR_0_OPCODE_X1 = 66,
+  V1MULTU_RRR_0_OPCODE_X0 = 104,
+  V1MULUS_RRR_0_OPCODE_X0 = 105,
+  V1MULU_RRR_0_OPCODE_X0 = 106,
+  V1MZ_RRR_0_OPCODE_X0 = 107,
+  V1MZ_RRR_0_OPCODE_X1 = 67,
+  V1SADAU_RRR_0_OPCODE_X0 = 108,
+  V1SADU_RRR_0_OPCODE_X0 = 109,
+  V1SHLI_SHIFT_OPCODE_X0 = 7,
+  V1SHLI_SHIFT_OPCODE_X1 = 7,
+  V1SHL_RRR_0_OPCODE_X0 = 110,
+  V1SHL_RRR_0_OPCODE_X1 = 68,
+  V1SHRSI_SHIFT_OPCODE_X0 = 8,
+  V1SHRSI_SHIFT_OPCODE_X1 = 8,
+  V1SHRS_RRR_0_OPCODE_X0 = 111,
+  V1SHRS_RRR_0_OPCODE_X1 = 69,
+  V1SHRUI_SHIFT_OPCODE_X0 = 9,
+  V1SHRUI_SHIFT_OPCODE_X1 = 9,
+  V1SHRU_RRR_0_OPCODE_X0 = 112,
+  V1SHRU_RRR_0_OPCODE_X1 = 70,
+  V1SUBUC_RRR_0_OPCODE_X0 = 113,
+  V1SUBUC_RRR_0_OPCODE_X1 = 71,
+  V1SUB_RRR_0_OPCODE_X0 = 114,
+  V1SUB_RRR_0_OPCODE_X1 = 72,
+  V2ADDI_IMM8_OPCODE_X0 = 14,
+  V2ADDI_IMM8_OPCODE_X1 = 39,
+  V2ADDSC_RRR_0_OPCODE_X0 = 115,
+  V2ADDSC_RRR_0_OPCODE_X1 = 73,
+  V2ADD_RRR_0_OPCODE_X0 = 116,
+  V2ADD_RRR_0_OPCODE_X1 = 74,
+  V2ADIFFS_RRR_0_OPCODE_X0 = 117,
+  V2AVGS_RRR_0_OPCODE_X0 = 118,
+  V2CMPEQI_IMM8_OPCODE_X0 = 15,
+  V2CMPEQI_IMM8_OPCODE_X1 = 40,
+  V2CMPEQ_RRR_0_OPCODE_X0 = 119,
+  V2CMPEQ_RRR_0_OPCODE_X1 = 75,
+  V2CMPLES_RRR_0_OPCODE_X0 = 120,
+  V2CMPLES_RRR_0_OPCODE_X1 = 76,
+  V2CMPLEU_RRR_0_OPCODE_X0 = 121,
+  V2CMPLEU_RRR_0_OPCODE_X1 = 77,
+  V2CMPLTSI_IMM8_OPCODE_X0 = 16,
+  V2CMPLTSI_IMM8_OPCODE_X1 = 41,
+  V2CMPLTS_RRR_0_OPCODE_X0 = 122,
+  V2CMPLTS_RRR_0_OPCODE_X1 = 78,
+  V2CMPLTUI_IMM8_OPCODE_X0 = 17,
+  V2CMPLTUI_IMM8_OPCODE_X1 = 42,
+  V2CMPLTU_RRR_0_OPCODE_X0 = 123,
+  V2CMPLTU_RRR_0_OPCODE_X1 = 79,
+  V2CMPNE_RRR_0_OPCODE_X0 = 124,
+  V2CMPNE_RRR_0_OPCODE_X1 = 80,
+  V2DOTPA_RRR_0_OPCODE_X0 = 125,
+  V2DOTP_RRR_0_OPCODE_X0 = 126,
+  V2INT_H_RRR_0_OPCODE_X0 = 127,
+  V2INT_H_RRR_0_OPCODE_X1 = 81,
+  V2INT_L_RRR_0_OPCODE_X0 = 128,
+  V2INT_L_RRR_0_OPCODE_X1 = 82,
+  V2MAXSI_IMM8_OPCODE_X0 = 18,
+  V2MAXSI_IMM8_OPCODE_X1 = 43,
+  V2MAXS_RRR_0_OPCODE_X0 = 129,
+  V2MAXS_RRR_0_OPCODE_X1 = 83,
+  V2MINSI_IMM8_OPCODE_X0 = 19,
+  V2MINSI_IMM8_OPCODE_X1 = 44,
+  V2MINS_RRR_0_OPCODE_X0 = 130,
+  V2MINS_RRR_0_OPCODE_X1 = 84,
+  V2MNZ_RRR_0_OPCODE_X0 = 131,
+  V2MNZ_RRR_0_OPCODE_X1 = 85,
+  V2MULFSC_RRR_0_OPCODE_X0 = 132,
+  V2MULS_RRR_0_OPCODE_X0 = 133,
+  V2MULTS_RRR_0_OPCODE_X0 = 134,
+  V2MZ_RRR_0_OPCODE_X0 = 135,
+  V2MZ_RRR_0_OPCODE_X1 = 86,
+  V2PACKH_RRR_0_OPCODE_X0 = 136,
+  V2PACKH_RRR_0_OPCODE_X1 = 87,
+  V2PACKL_RRR_0_OPCODE_X0 = 137,
+  V2PACKL_RRR_0_OPCODE_X1 = 88,
+  V2PACKUC_RRR_0_OPCODE_X0 = 138,
+  V2PACKUC_RRR_0_OPCODE_X1 = 89,
+  V2SADAS_RRR_0_OPCODE_X0 = 139,
+  V2SADAU_RRR_0_OPCODE_X0 = 140,
+  V2SADS_RRR_0_OPCODE_X0 = 141,
+  V2SADU_RRR_0_OPCODE_X0 = 142,
+  V2SHLI_SHIFT_OPCODE_X0 = 10,
+  V2SHLI_SHIFT_OPCODE_X1 = 10,
+  V2SHLSC_RRR_0_OPCODE_X0 = 143,
+  V2SHLSC_RRR_0_OPCODE_X1 = 90,
+  V2SHL_RRR_0_OPCODE_X0 = 144,
+  V2SHL_RRR_0_OPCODE_X1 = 91,
+  V2SHRSI_SHIFT_OPCODE_X0 = 11,
+  V2SHRSI_SHIFT_OPCODE_X1 = 11,
+  V2SHRS_RRR_0_OPCODE_X0 = 145,
+  V2SHRS_RRR_0_OPCODE_X1 = 92,
+  V2SHRUI_SHIFT_OPCODE_X0 = 12,
+  V2SHRUI_SHIFT_OPCODE_X1 = 12,
+  V2SHRU_RRR_0_OPCODE_X0 = 146,
+  V2SHRU_RRR_0_OPCODE_X1 = 93,
+  V2SUBSC_RRR_0_OPCODE_X0 = 147,
+  V2SUBSC_RRR_0_OPCODE_X1 = 94,
+  V2SUB_RRR_0_OPCODE_X0 = 148,
+  V2SUB_RRR_0_OPCODE_X1 = 95,
+  V4ADDSC_RRR_0_OPCODE_X0 = 149,
+  V4ADDSC_RRR_0_OPCODE_X1 = 96,
+  V4ADD_RRR_0_OPCODE_X0 = 150,
+  V4ADD_RRR_0_OPCODE_X1 = 97,
+  V4INT_H_RRR_0_OPCODE_X0 = 151,
+  V4INT_H_RRR_0_OPCODE_X1 = 98,
+  V4INT_L_RRR_0_OPCODE_X0 = 152,
+  V4INT_L_RRR_0_OPCODE_X1 = 99,
+  V4PACKSC_RRR_0_OPCODE_X0 = 153,
+  V4PACKSC_RRR_0_OPCODE_X1 = 100,
+  V4SHLSC_RRR_0_OPCODE_X0 = 154,
+  V4SHLSC_RRR_0_OPCODE_X1 = 101,
+  V4SHL_RRR_0_OPCODE_X0 = 155,
+  V4SHL_RRR_0_OPCODE_X1 = 102,
+  V4SHRS_RRR_0_OPCODE_X0 = 156,
+  V4SHRS_RRR_0_OPCODE_X1 = 103,
+  V4SHRU_RRR_0_OPCODE_X0 = 157,
+  V4SHRU_RRR_0_OPCODE_X1 = 104,
+  V4SUBSC_RRR_0_OPCODE_X0 = 158,
+  V4SUBSC_RRR_0_OPCODE_X1 = 105,
+  V4SUB_RRR_0_OPCODE_X0 = 159,
+  V4SUB_RRR_0_OPCODE_X1 = 106,
+  WH64_UNARY_OPCODE_X1 = 38,
+  XORI_IMM8_OPCODE_X0 = 20,
+  XORI_IMM8_OPCODE_X1 = 45,
+  XOR_RRR_0_OPCODE_X0 = 160,
+  XOR_RRR_0_OPCODE_X1 = 107,
+  XOR_RRR_5_OPCODE_Y0 = 3,
+  XOR_RRR_5_OPCODE_Y1 = 3
+};
+
+
+#endif /* __ASSEMBLER__ */
+
+#endif /* __ARCH_OPCODE_H__ */
diff --git a/arch/tile/include/arch/opcode_tilepro.h b/arch/tile/include/arch/opcode_tilepro.h
new file mode 100644
index 00000000..71b763b8
--- /dev/null
+++ b/arch/tile/include/arch/opcode_tilepro.h
@@ -0,0 +1,1471 @@
+/* TILEPro opcode information.
+ *
+ * Copyright 2011 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ *
+ *
+ *
+ *
+ *
+ */
+
+#ifndef __ARCH_OPCODE_H__
+#define __ARCH_OPCODE_H__
+
+#ifndef __ASSEMBLER__
+
+typedef unsigned long long tilepro_bundle_bits;
+
+/* This is the bit that determines if a bundle is in the Y encoding. */
+#define TILEPRO_BUNDLE_Y_ENCODING_MASK ((tilepro_bundle_bits)1 << 63)
+
+enum
+{
+  /* Maximum number of instructions in a bundle (2 for X, 3 for Y). */
+  TILEPRO_MAX_INSTRUCTIONS_PER_BUNDLE = 3,
+
+  /* How many different pipeline encodings are there? X0, X1, Y0, Y1, Y2. */
+  TILEPRO_NUM_PIPELINE_ENCODINGS = 5,
+
+  /* Log base 2 of TILEPRO_BUNDLE_SIZE_IN_BYTES. */
+  TILEPRO_LOG2_BUNDLE_SIZE_IN_BYTES = 3,
+
+  /* Instructions take this many bytes. */
+  TILEPRO_BUNDLE_SIZE_IN_BYTES = 1 << TILEPRO_LOG2_BUNDLE_SIZE_IN_BYTES,
+
+  /* Log base 2 of TILEPRO_BUNDLE_ALIGNMENT_IN_BYTES. */
+  TILEPRO_LOG2_BUNDLE_ALIGNMENT_IN_BYTES = 3,
+
+  /* Bundles should be aligned modulo this number of bytes. */
+  TILEPRO_BUNDLE_ALIGNMENT_IN_BYTES =
+    (1 << TILEPRO_LOG2_BUNDLE_ALIGNMENT_IN_BYTES),
+
+  /* Log base 2 of TILEPRO_SN_INSTRUCTION_SIZE_IN_BYTES. */
+  TILEPRO_LOG2_SN_INSTRUCTION_SIZE_IN_BYTES = 1,
+
+  /* Static network instructions take this many bytes. */
+  TILEPRO_SN_INSTRUCTION_SIZE_IN_BYTES =
+    (1 << TILEPRO_LOG2_SN_INSTRUCTION_SIZE_IN_BYTES),
+
+  /* Number of registers (some are magic, such as network I/O). */
+  TILEPRO_NUM_REGISTERS = 64,
+
+  /* Number of static network registers. */
+  TILEPRO_NUM_SN_REGISTERS = 4
+};
+
+/* Make a few "tile_" variables to simplify common code between
+   architectures.  */
+
+typedef tilepro_bundle_bits tile_bundle_bits;
+#define TILE_BUNDLE_SIZE_IN_BYTES TILEPRO_BUNDLE_SIZE_IN_BYTES
+#define TILE_BUNDLE_ALIGNMENT_IN_BYTES TILEPRO_BUNDLE_ALIGNMENT_IN_BYTES
+#define TILE_LOG2_BUNDLE_ALIGNMENT_IN_BYTES \
+  TILEPRO_LOG2_BUNDLE_ALIGNMENT_IN_BYTES
+
+/* 64-bit pattern for a { bpt ; nop } bundle. */
+#define TILEPRO_BPT_BUNDLE 0x400b3cae70166000ULL
+
+static __inline unsigned int
+get_BrOff_SN(tilepro_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 0)) & 0x3ff);
+}
+
+static __inline unsigned int
+get_BrOff_X1(tilepro_bundle_bits n)
+{
+  return (((unsigned int)(n >> 43)) & 0x00007fff) |
+         (((unsigned int)(n >> 20)) & 0x00018000);
+}
+
+static __inline unsigned int
+get_BrType_X1(tilepro_bundle_bits n)
+{
+  return (((unsigned int)(n >> 31)) & 0xf);
+}
+
+static __inline unsigned int
+get_Dest_Imm8_X1(tilepro_bundle_bits n)
+{
+  return (((unsigned int)(n >> 31)) & 0x0000003f) |
+         (((unsigned int)(n >> 43)) & 0x000000c0);
+}
+
+static __inline unsigned int
+get_Dest_SN(tilepro_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 2)) & 0x3);
+}
+
+static __inline unsigned int
+get_Dest_X0(tilepro_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 0)) & 0x3f);
+}
+
+static __inline unsigned int
+get_Dest_X1(tilepro_bundle_bits n)
+{
+  return (((unsigned int)(n >> 31)) & 0x3f);
+}
+
+static __inline unsigned int
+get_Dest_Y0(tilepro_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 0)) & 0x3f);
+}
+
+static __inline unsigned int
+get_Dest_Y1(tilepro_bundle_bits n)
+{
+  return (((unsigned int)(n >> 31)) & 0x3f);
+}
+
+static __inline unsigned int
+get_Imm16_X0(tilepro_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 12)) & 0xffff);
+}
+
+static __inline unsigned int
+get_Imm16_X1(tilepro_bundle_bits n)
+{
+  return (((unsigned int)(n >> 43)) & 0xffff);
+}
+
+static __inline unsigned int
+get_Imm8_SN(tilepro_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 0)) & 0xff);
+}
+
+static __inline unsigned int
+get_Imm8_X0(tilepro_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 12)) & 0xff);
+}
+
+static __inline unsigned int
+get_Imm8_X1(tilepro_bundle_bits n)
+{
+  return (((unsigned int)(n >> 43)) & 0xff);
+}
+
+static __inline unsigned int
+get_Imm8_Y0(tilepro_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 12)) & 0xff);
+}
+
+static __inline unsigned int
+get_Imm8_Y1(tilepro_bundle_bits n)
+{
+  return (((unsigned int)(n >> 43)) & 0xff);
+}
+
+static __inline unsigned int
+get_ImmOpcodeExtension_X0(tilepro_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 20)) & 0x7f);
+}
+
+static __inline unsigned int
+get_ImmOpcodeExtension_X1(tilepro_bundle_bits n)
+{
+  return (((unsigned int)(n >> 51)) & 0x7f);
+}
+
+static __inline unsigned int
+get_ImmRROpcodeExtension_SN(tilepro_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 8)) & 0x3);
+}
+
+static __inline unsigned int
+get_JOffLong_X1(tilepro_bundle_bits n)
+{
+  return (((unsigned int)(n >> 43)) & 0x00007fff) |
+         (((unsigned int)(n >> 20)) & 0x00018000) |
+         (((unsigned int)(n >> 14)) & 0x001e0000) |
+         (((unsigned int)(n >> 16)) & 0x07e00000) |
+         (((unsigned int)(n >> 31)) & 0x18000000);
+}
+
+static __inline unsigned int
+get_JOff_X1(tilepro_bundle_bits n)
+{
+  return (((unsigned int)(n >> 43)) & 0x00007fff) |
+         (((unsigned int)(n >> 20)) & 0x00018000) |
+         (((unsigned int)(n >> 14)) & 0x001e0000) |
+         (((unsigned int)(n >> 16)) & 0x07e00000) |
+         (((unsigned int)(n >> 31)) & 0x08000000);
+}
+
+static __inline unsigned int
+get_MF_Imm15_X1(tilepro_bundle_bits n)
+{
+  return (((unsigned int)(n >> 37)) & 0x00003fff) |
+         (((unsigned int)(n >> 44)) & 0x00004000);
+}
+
+static __inline unsigned int
+get_MMEnd_X0(tilepro_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 18)) & 0x1f);
+}
+
+static __inline unsigned int
+get_MMEnd_X1(tilepro_bundle_bits n)
+{
+  return (((unsigned int)(n >> 49)) & 0x1f);
+}
+
+static __inline unsigned int
+get_MMStart_X0(tilepro_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 23)) & 0x1f);
+}
+
+static __inline unsigned int
+get_MMStart_X1(tilepro_bundle_bits n)
+{
+  return (((unsigned int)(n >> 54)) & 0x1f);
+}
+
+static __inline unsigned int
+get_MT_Imm15_X1(tilepro_bundle_bits n)
+{
+  return (((unsigned int)(n >> 31)) & 0x0000003f) |
+         (((unsigned int)(n >> 37)) & 0x00003fc0) |
+         (((unsigned int)(n >> 44)) & 0x00004000);
+}
+
+static __inline unsigned int
+get_Mode(tilepro_bundle_bits n)
+{
+  return (((unsigned int)(n >> 63)) & 0x1);
+}
+
+static __inline unsigned int
+get_NoRegOpcodeExtension_SN(tilepro_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 0)) & 0xf);
+}
+
+static __inline unsigned int
+get_Opcode_SN(tilepro_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 10)) & 0x3f);
+}
+
+static __inline unsigned int
+get_Opcode_X0(tilepro_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 28)) & 0x7);
+}
+
+static __inline unsigned int
+get_Opcode_X1(tilepro_bundle_bits n)
+{
+  return (((unsigned int)(n >> 59)) & 0xf);
+}
+
+static __inline unsigned int
+get_Opcode_Y0(tilepro_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 27)) & 0xf);
+}
+
+static __inline unsigned int
+get_Opcode_Y1(tilepro_bundle_bits n)
+{
+  return (((unsigned int)(n >> 59)) & 0xf);
+}
+
+static __inline unsigned int
+get_Opcode_Y2(tilepro_bundle_bits n)
+{
+  return (((unsigned int)(n >> 56)) & 0x7);
+}
+
+static __inline unsigned int
+get_RROpcodeExtension_SN(tilepro_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 4)) & 0xf);
+}
+
+static __inline unsigned int
+get_RRROpcodeExtension_X0(tilepro_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 18)) & 0x1ff);
+}
+
+static __inline unsigned int
+get_RRROpcodeExtension_X1(tilepro_bundle_bits n)
+{
+  return (((unsigned int)(n >> 49)) & 0x1ff);
+}
+
+static __inline unsigned int
+get_RRROpcodeExtension_Y0(tilepro_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 18)) & 0x3);
+}
+
+static __inline unsigned int
+get_RRROpcodeExtension_Y1(tilepro_bundle_bits n)
+{
+  return (((unsigned int)(n >> 49)) & 0x3);
+}
+
+static __inline unsigned int
+get_RouteOpcodeExtension_SN(tilepro_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 0)) & 0x3ff);
+}
+
+static __inline unsigned int
+get_S_X0(tilepro_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 27)) & 0x1);
+}
+
+static __inline unsigned int
+get_S_X1(tilepro_bundle_bits n)
+{
+  return (((unsigned int)(n >> 58)) & 0x1);
+}
+
+static __inline unsigned int
+get_ShAmt_X0(tilepro_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 12)) & 0x1f);
+}
+
+static __inline unsigned int
+get_ShAmt_X1(tilepro_bundle_bits n)
+{
+  return (((unsigned int)(n >> 43)) & 0x1f);
+}
+
+static __inline unsigned int
+get_ShAmt_Y0(tilepro_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 12)) & 0x1f);
+}
+
+static __inline unsigned int
+get_ShAmt_Y1(tilepro_bundle_bits n)
+{
+  return (((unsigned int)(n >> 43)) & 0x1f);
+}
+
+static __inline unsigned int
+get_SrcA_X0(tilepro_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 6)) & 0x3f);
+}
+
+static __inline unsigned int
+get_SrcA_X1(tilepro_bundle_bits n)
+{
+  return (((unsigned int)(n >> 37)) & 0x3f);
+}
+
+static __inline unsigned int
+get_SrcA_Y0(tilepro_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 6)) & 0x3f);
+}
+
+static __inline unsigned int
+get_SrcA_Y1(tilepro_bundle_bits n)
+{
+  return (((unsigned int)(n >> 37)) & 0x3f);
+}
+
+static __inline unsigned int
+get_SrcA_Y2(tilepro_bundle_bits n)
+{
+  return (((n >> 26)) & 0x00000001) |
+         (((unsigned int)(n >> 50)) & 0x0000003e);
+}
+
+static __inline unsigned int
+get_SrcBDest_Y2(tilepro_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 20)) & 0x3f);
+}
+
+static __inline unsigned int
+get_SrcB_X0(tilepro_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 12)) & 0x3f);
+}
+
+static __inline unsigned int
+get_SrcB_X1(tilepro_bundle_bits n)
+{
+  return (((unsigned int)(n >> 43)) & 0x3f);
+}
+
+static __inline unsigned int
+get_SrcB_Y0(tilepro_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 12)) & 0x3f);
+}
+
+static __inline unsigned int
+get_SrcB_Y1(tilepro_bundle_bits n)
+{
+  return (((unsigned int)(n >> 43)) & 0x3f);
+}
+
+static __inline unsigned int
+get_Src_SN(tilepro_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 0)) & 0x3);
+}
+
+static __inline unsigned int
+get_UnOpcodeExtension_X0(tilepro_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 12)) & 0x1f);
+}
+
+static __inline unsigned int
+get_UnOpcodeExtension_X1(tilepro_bundle_bits n)
+{
+  return (((unsigned int)(n >> 43)) & 0x1f);
+}
+
+static __inline unsigned int
+get_UnOpcodeExtension_Y0(tilepro_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 12)) & 0x1f);
+}
+
+static __inline unsigned int
+get_UnOpcodeExtension_Y1(tilepro_bundle_bits n)
+{
+  return (((unsigned int)(n >> 43)) & 0x1f);
+}
+
+static __inline unsigned int
+get_UnShOpcodeExtension_X0(tilepro_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 17)) & 0x3ff);
+}
+
+static __inline unsigned int
+get_UnShOpcodeExtension_X1(tilepro_bundle_bits n)
+{
+  return (((unsigned int)(n >> 48)) & 0x3ff);
+}
+
+static __inline unsigned int
+get_UnShOpcodeExtension_Y0(tilepro_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 17)) & 0x7);
+}
+
+static __inline unsigned int
+get_UnShOpcodeExtension_Y1(tilepro_bundle_bits n)
+{
+  return (((unsigned int)(n >> 48)) & 0x7);
+}
+
+
+static __inline int
+sign_extend(int n, int num_bits)
+{
+  int shift = (int)(sizeof(int) * 8 - num_bits);
+  return (n << shift) >> shift;
+}
+
+
+
+static __inline tilepro_bundle_bits
+create_BrOff_SN(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x3ff) << 0);
+}
+
+static __inline tilepro_bundle_bits
+create_BrOff_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilepro_bundle_bits)(n & 0x00007fff)) << 43) |
+         (((tilepro_bundle_bits)(n & 0x00018000)) << 20);
+}
+
+static __inline tilepro_bundle_bits
+create_BrType_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilepro_bundle_bits)(n & 0xf)) << 31);
+}
+
+static __inline tilepro_bundle_bits
+create_Dest_Imm8_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilepro_bundle_bits)(n & 0x0000003f)) << 31) |
+         (((tilepro_bundle_bits)(n & 0x000000c0)) << 43);
+}
+
+static __inline tilepro_bundle_bits
+create_Dest_SN(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x3) << 2);
+}
+
+static __inline tilepro_bundle_bits
+create_Dest_X0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x3f) << 0);
+}
+
+static __inline tilepro_bundle_bits
+create_Dest_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilepro_bundle_bits)(n & 0x3f)) << 31);
+}
+
+static __inline tilepro_bundle_bits
+create_Dest_Y0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x3f) << 0);
+}
+
+static __inline tilepro_bundle_bits
+create_Dest_Y1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilepro_bundle_bits)(n & 0x3f)) << 31);
+}
+
+static __inline tilepro_bundle_bits
+create_Imm16_X0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0xffff) << 12);
+}
+
+static __inline tilepro_bundle_bits
+create_Imm16_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilepro_bundle_bits)(n & 0xffff)) << 43);
+}
+
+static __inline tilepro_bundle_bits
+create_Imm8_SN(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0xff) << 0);
+}
+
+static __inline tilepro_bundle_bits
+create_Imm8_X0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0xff) << 12);
+}
+
+static __inline tilepro_bundle_bits
+create_Imm8_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilepro_bundle_bits)(n & 0xff)) << 43);
+}
+
+static __inline tilepro_bundle_bits
+create_Imm8_Y0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0xff) << 12);
+}
+
+static __inline tilepro_bundle_bits
+create_Imm8_Y1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilepro_bundle_bits)(n & 0xff)) << 43);
+}
+
+static __inline tilepro_bundle_bits
+create_ImmOpcodeExtension_X0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x7f) << 20);
+}
+
+static __inline tilepro_bundle_bits
+create_ImmOpcodeExtension_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilepro_bundle_bits)(n & 0x7f)) << 51);
+}
+
+static __inline tilepro_bundle_bits
+create_ImmRROpcodeExtension_SN(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x3) << 8);
+}
+
+static __inline tilepro_bundle_bits
+create_JOffLong_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilepro_bundle_bits)(n & 0x00007fff)) << 43) |
+         (((tilepro_bundle_bits)(n & 0x00018000)) << 20) |
+         (((tilepro_bundle_bits)(n & 0x001e0000)) << 14) |
+         (((tilepro_bundle_bits)(n & 0x07e00000)) << 16) |
+         (((tilepro_bundle_bits)(n & 0x18000000)) << 31);
+}
+
+static __inline tilepro_bundle_bits
+create_JOff_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilepro_bundle_bits)(n & 0x00007fff)) << 43) |
+         (((tilepro_bundle_bits)(n & 0x00018000)) << 20) |
+         (((tilepro_bundle_bits)(n & 0x001e0000)) << 14) |
+         (((tilepro_bundle_bits)(n & 0x07e00000)) << 16) |
+         (((tilepro_bundle_bits)(n & 0x08000000)) << 31);
+}
+
+static __inline tilepro_bundle_bits
+create_MF_Imm15_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilepro_bundle_bits)(n & 0x00003fff)) << 37) |
+         (((tilepro_bundle_bits)(n & 0x00004000)) << 44);
+}
+
+static __inline tilepro_bundle_bits
+create_MMEnd_X0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x1f) << 18);
+}
+
+static __inline tilepro_bundle_bits
+create_MMEnd_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilepro_bundle_bits)(n & 0x1f)) << 49);
+}
+
+static __inline tilepro_bundle_bits
+create_MMStart_X0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x1f) << 23);
+}
+
+static __inline tilepro_bundle_bits
+create_MMStart_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilepro_bundle_bits)(n & 0x1f)) << 54);
+}
+
+static __inline tilepro_bundle_bits
+create_MT_Imm15_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilepro_bundle_bits)(n & 0x0000003f)) << 31) |
+         (((tilepro_bundle_bits)(n & 0x00003fc0)) << 37) |
+         (((tilepro_bundle_bits)(n & 0x00004000)) << 44);
+}
+
+static __inline tilepro_bundle_bits
+create_Mode(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilepro_bundle_bits)(n & 0x1)) << 63);
+}
+
+static __inline tilepro_bundle_bits
+create_NoRegOpcodeExtension_SN(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0xf) << 0);
+}
+
+static __inline tilepro_bundle_bits
+create_Opcode_SN(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x3f) << 10);
+}
+
+static __inline tilepro_bundle_bits
+create_Opcode_X0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x7) << 28);
+}
+
+static __inline tilepro_bundle_bits
+create_Opcode_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilepro_bundle_bits)(n & 0xf)) << 59);
+}
+
+static __inline tilepro_bundle_bits
+create_Opcode_Y0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0xf) << 27);
+}
+
+static __inline tilepro_bundle_bits
+create_Opcode_Y1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilepro_bundle_bits)(n & 0xf)) << 59);
+}
+
+static __inline tilepro_bundle_bits
+create_Opcode_Y2(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilepro_bundle_bits)(n & 0x7)) << 56);
+}
+
+static __inline tilepro_bundle_bits
+create_RROpcodeExtension_SN(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0xf) << 4);
+}
+
+static __inline tilepro_bundle_bits
+create_RRROpcodeExtension_X0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x1ff) << 18);
+}
+
+static __inline tilepro_bundle_bits
+create_RRROpcodeExtension_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilepro_bundle_bits)(n & 0x1ff)) << 49);
+}
+
+static __inline tilepro_bundle_bits
+create_RRROpcodeExtension_Y0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x3) << 18);
+}
+
+static __inline tilepro_bundle_bits
+create_RRROpcodeExtension_Y1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilepro_bundle_bits)(n & 0x3)) << 49);
+}
+
+static __inline tilepro_bundle_bits
+create_RouteOpcodeExtension_SN(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x3ff) << 0);
+}
+
+static __inline tilepro_bundle_bits
+create_S_X0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x1) << 27);
+}
+
+static __inline tilepro_bundle_bits
+create_S_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilepro_bundle_bits)(n & 0x1)) << 58);
+}
+
+static __inline tilepro_bundle_bits
+create_ShAmt_X0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x1f) << 12);
+}
+
+static __inline tilepro_bundle_bits
+create_ShAmt_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilepro_bundle_bits)(n & 0x1f)) << 43);
+}
+
+static __inline tilepro_bundle_bits
+create_ShAmt_Y0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x1f) << 12);
+}
+
+static __inline tilepro_bundle_bits
+create_ShAmt_Y1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilepro_bundle_bits)(n & 0x1f)) << 43);
+}
+
+static __inline tilepro_bundle_bits
+create_SrcA_X0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x3f) << 6);
+}
+
+static __inline tilepro_bundle_bits
+create_SrcA_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilepro_bundle_bits)(n & 0x3f)) << 37);
+}
+
+static __inline tilepro_bundle_bits
+create_SrcA_Y0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x3f) << 6);
+}
+
+static __inline tilepro_bundle_bits
+create_SrcA_Y1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilepro_bundle_bits)(n & 0x3f)) << 37);
+}
+
+static __inline tilepro_bundle_bits
+create_SrcA_Y2(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x00000001) << 26) |
+         (((tilepro_bundle_bits)(n & 0x0000003e)) << 50);
+}
+
+static __inline tilepro_bundle_bits
+create_SrcBDest_Y2(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x3f) << 20);
+}
+
+static __inline tilepro_bundle_bits
+create_SrcB_X0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x3f) << 12);
+}
+
+static __inline tilepro_bundle_bits
+create_SrcB_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilepro_bundle_bits)(n & 0x3f)) << 43);
+}
+
+static __inline tilepro_bundle_bits
+create_SrcB_Y0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x3f) << 12);
+}
+
+static __inline tilepro_bundle_bits
+create_SrcB_Y1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilepro_bundle_bits)(n & 0x3f)) << 43);
+}
+
+static __inline tilepro_bundle_bits
+create_Src_SN(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x3) << 0);
+}
+
+static __inline tilepro_bundle_bits
+create_UnOpcodeExtension_X0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x1f) << 12);
+}
+
+static __inline tilepro_bundle_bits
+create_UnOpcodeExtension_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilepro_bundle_bits)(n & 0x1f)) << 43);
+}
+
+static __inline tilepro_bundle_bits
+create_UnOpcodeExtension_Y0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x1f) << 12);
+}
+
+static __inline tilepro_bundle_bits
+create_UnOpcodeExtension_Y1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilepro_bundle_bits)(n & 0x1f)) << 43);
+}
+
+static __inline tilepro_bundle_bits
+create_UnShOpcodeExtension_X0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x3ff) << 17);
+}
+
+static __inline tilepro_bundle_bits
+create_UnShOpcodeExtension_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilepro_bundle_bits)(n & 0x3ff)) << 48);
+}
+
+static __inline tilepro_bundle_bits
+create_UnShOpcodeExtension_Y0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x7) << 17);
+}
+
+static __inline tilepro_bundle_bits
+create_UnShOpcodeExtension_Y1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilepro_bundle_bits)(n & 0x7)) << 48);
+}
+
+
+enum
+{
+  ADDBS_U_SPECIAL_0_OPCODE_X0 = 98,
+  ADDBS_U_SPECIAL_0_OPCODE_X1 = 68,
+  ADDB_SPECIAL_0_OPCODE_X0 = 1,
+  ADDB_SPECIAL_0_OPCODE_X1 = 1,
+  ADDHS_SPECIAL_0_OPCODE_X0 = 99,
+  ADDHS_SPECIAL_0_OPCODE_X1 = 69,
+  ADDH_SPECIAL_0_OPCODE_X0 = 2,
+  ADDH_SPECIAL_0_OPCODE_X1 = 2,
+  ADDIB_IMM_0_OPCODE_X0 = 1,
+  ADDIB_IMM_0_OPCODE_X1 = 1,
+  ADDIH_IMM_0_OPCODE_X0 = 2,
+  ADDIH_IMM_0_OPCODE_X1 = 2,
+  ADDI_IMM_0_OPCODE_X0 = 3,
+  ADDI_IMM_0_OPCODE_X1 = 3,
+  ADDI_IMM_1_OPCODE_SN = 1,
+  ADDI_OPCODE_Y0 = 9,
+  ADDI_OPCODE_Y1 = 7,
+  ADDLIS_OPCODE_X0 = 1,
+  ADDLIS_OPCODE_X1 = 2,
+  ADDLI_OPCODE_X0 = 2,
+  ADDLI_OPCODE_X1 = 3,
+  ADDS_SPECIAL_0_OPCODE_X0 = 96,
+  ADDS_SPECIAL_0_OPCODE_X1 = 66,
+  ADD_SPECIAL_0_OPCODE_X0 = 3,
+  ADD_SPECIAL_0_OPCODE_X1 = 3,
+  ADD_SPECIAL_0_OPCODE_Y0 = 0,
+  ADD_SPECIAL_0_OPCODE_Y1 = 0,
+  ADIFFB_U_SPECIAL_0_OPCODE_X0 = 4,
+  ADIFFH_SPECIAL_0_OPCODE_X0 = 5,
+  ANDI_IMM_0_OPCODE_X0 = 1,
+  ANDI_IMM_0_OPCODE_X1 = 4,
+  ANDI_OPCODE_Y0 = 10,
+  ANDI_OPCODE_Y1 = 8,
+  AND_SPECIAL_0_OPCODE_X0 = 6,
+  AND_SPECIAL_0_OPCODE_X1 = 4,
+  AND_SPECIAL_2_OPCODE_Y0 = 0,
+  AND_SPECIAL_2_OPCODE_Y1 = 0,
+  AULI_OPCODE_X0 = 3,
+  AULI_OPCODE_X1 = 4,
+  AVGB_U_SPECIAL_0_OPCODE_X0 = 7,
+  AVGH_SPECIAL_0_OPCODE_X0 = 8,
+  BBNST_BRANCH_OPCODE_X1 = 15,
+  BBNS_BRANCH_OPCODE_X1 = 14,
+  BBNS_OPCODE_SN = 63,
+  BBST_BRANCH_OPCODE_X1 = 13,
+  BBS_BRANCH_OPCODE_X1 = 12,
+  BBS_OPCODE_SN = 62,
+  BGEZT_BRANCH_OPCODE_X1 = 7,
+  BGEZ_BRANCH_OPCODE_X1 = 6,
+  BGEZ_OPCODE_SN = 61,
+  BGZT_BRANCH_OPCODE_X1 = 5,
+  BGZ_BRANCH_OPCODE_X1 = 4,
+  BGZ_OPCODE_SN = 58,
+  BITX_UN_0_SHUN_0_OPCODE_X0 = 1,
+  BITX_UN_0_SHUN_0_OPCODE_Y0 = 1,
+  BLEZT_BRANCH_OPCODE_X1 = 11,
+  BLEZ_BRANCH_OPCODE_X1 = 10,
+  BLEZ_OPCODE_SN = 59,
+  BLZT_BRANCH_OPCODE_X1 = 9,
+  BLZ_BRANCH_OPCODE_X1 = 8,
+  BLZ_OPCODE_SN = 60,
+  BNZT_BRANCH_OPCODE_X1 = 3,
+  BNZ_BRANCH_OPCODE_X1 = 2,
+  BNZ_OPCODE_SN = 57,
+  BPT_NOREG_RR_IMM_0_OPCODE_SN = 1,
+  BRANCH_OPCODE_X1 = 5,
+  BYTEX_UN_0_SHUN_0_OPCODE_X0 = 2,
+  BYTEX_UN_0_SHUN_0_OPCODE_Y0 = 2,
+  BZT_BRANCH_OPCODE_X1 = 1,
+  BZ_BRANCH_OPCODE_X1 = 0,
+  BZ_OPCODE_SN = 56,
+  CLZ_UN_0_SHUN_0_OPCODE_X0 = 3,
+  CLZ_UN_0_SHUN_0_OPCODE_Y0 = 3,
+  CRC32_32_SPECIAL_0_OPCODE_X0 = 9,
+  CRC32_8_SPECIAL_0_OPCODE_X0 = 10,
+  CTZ_UN_0_SHUN_0_OPCODE_X0 = 4,
+  CTZ_UN_0_SHUN_0_OPCODE_Y0 = 4,
+  DRAIN_UN_0_SHUN_0_OPCODE_X1 = 1,
+  DTLBPR_UN_0_SHUN_0_OPCODE_X1 = 2,
+  DWORD_ALIGN_SPECIAL_0_OPCODE_X0 = 95,
+  FINV_UN_0_SHUN_0_OPCODE_X1 = 3,
+  FLUSH_UN_0_SHUN_0_OPCODE_X1 = 4,
+  FNOP_NOREG_RR_IMM_0_OPCODE_SN = 3,
+  FNOP_UN_0_SHUN_0_OPCODE_X0 = 5,
+  FNOP_UN_0_SHUN_0_OPCODE_X1 = 5,
+  FNOP_UN_0_SHUN_0_OPCODE_Y0 = 5,
+  FNOP_UN_0_SHUN_0_OPCODE_Y1 = 1,
+  HALT_NOREG_RR_IMM_0_OPCODE_SN = 0,
+  ICOH_UN_0_SHUN_0_OPCODE_X1 = 6,
+  ILL_UN_0_SHUN_0_OPCODE_X1 = 7,
+  ILL_UN_0_SHUN_0_OPCODE_Y1 = 2,
+  IMM_0_OPCODE_SN = 0,
+  IMM_0_OPCODE_X0 = 4,
+  IMM_0_OPCODE_X1 = 6,
+  IMM_1_OPCODE_SN = 1,
+  IMM_OPCODE_0_X0 = 5,
+  INTHB_SPECIAL_0_OPCODE_X0 = 11,
+  INTHB_SPECIAL_0_OPCODE_X1 = 5,
+  INTHH_SPECIAL_0_OPCODE_X0 = 12,
+  INTHH_SPECIAL_0_OPCODE_X1 = 6,
+  INTLB_SPECIAL_0_OPCODE_X0 = 13,
+  INTLB_SPECIAL_0_OPCODE_X1 = 7,
+  INTLH_SPECIAL_0_OPCODE_X0 = 14,
+  INTLH_SPECIAL_0_OPCODE_X1 = 8,
+  INV_UN_0_SHUN_0_OPCODE_X1 = 8,
+  IRET_UN_0_SHUN_0_OPCODE_X1 = 9,
+  JALB_OPCODE_X1 = 13,
+  JALF_OPCODE_X1 = 12,
+  JALRP_SPECIAL_0_OPCODE_X1 = 9,
+  JALRR_IMM_1_OPCODE_SN = 3,
+  JALR_RR_IMM_0_OPCODE_SN = 5,
+  JALR_SPECIAL_0_OPCODE_X1 = 10,
+  JB_OPCODE_X1 = 11,
+  JF_OPCODE_X1 = 10,
+  JRP_SPECIAL_0_OPCODE_X1 = 11,
+  JRR_IMM_1_OPCODE_SN = 2,
+  JR_RR_IMM_0_OPCODE_SN = 4,
+  JR_SPECIAL_0_OPCODE_X1 = 12,
+  LBADD_IMM_0_OPCODE_X1 = 22,
+  LBADD_U_IMM_0_OPCODE_X1 = 23,
+  LB_OPCODE_Y2 = 0,
+  LB_UN_0_SHUN_0_OPCODE_X1 = 10,
+  LB_U_OPCODE_Y2 = 1,
+  LB_U_UN_0_SHUN_0_OPCODE_X1 = 11,
+  LHADD_IMM_0_OPCODE_X1 = 24,
+  LHADD_U_IMM_0_OPCODE_X1 = 25,
+  LH_OPCODE_Y2 = 2,
+  LH_UN_0_SHUN_0_OPCODE_X1 = 12,
+  LH_U_OPCODE_Y2 = 3,
+  LH_U_UN_0_SHUN_0_OPCODE_X1 = 13,
+  LNK_SPECIAL_0_OPCODE_X1 = 13,
+  LWADD_IMM_0_OPCODE_X1 = 26,
+  LWADD_NA_IMM_0_OPCODE_X1 = 27,
+  LW_NA_UN_0_SHUN_0_OPCODE_X1 = 24,
+  LW_OPCODE_Y2 = 4,
+  LW_UN_0_SHUN_0_OPCODE_X1 = 14,
+  MAXB_U_SPECIAL_0_OPCODE_X0 = 15,
+  MAXB_U_SPECIAL_0_OPCODE_X1 = 14,
+  MAXH_SPECIAL_0_OPCODE_X0 = 16,
+  MAXH_SPECIAL_0_OPCODE_X1 = 15,
+  MAXIB_U_IMM_0_OPCODE_X0 = 4,
+  MAXIB_U_IMM_0_OPCODE_X1 = 5,
+  MAXIH_IMM_0_OPCODE_X0 = 5,
+  MAXIH_IMM_0_OPCODE_X1 = 6,
+  MFSPR_IMM_0_OPCODE_X1 = 7,
+  MF_UN_0_SHUN_0_OPCODE_X1 = 15,
+  MINB_U_SPECIAL_0_OPCODE_X0 = 17,
+  MINB_U_SPECIAL_0_OPCODE_X1 = 16,
+  MINH_SPECIAL_0_OPCODE_X0 = 18,
+  MINH_SPECIAL_0_OPCODE_X1 = 17,
+  MINIB_U_IMM_0_OPCODE_X0 = 6,
+  MINIB_U_IMM_0_OPCODE_X1 = 8,
+  MINIH_IMM_0_OPCODE_X0 = 7,
+  MINIH_IMM_0_OPCODE_X1 = 9,
+  MM_OPCODE_X0 = 6,
+  MM_OPCODE_X1 = 7,
+  MNZB_SPECIAL_0_OPCODE_X0 = 19,
+  MNZB_SPECIAL_0_OPCODE_X1 = 18,
+  MNZH_SPECIAL_0_OPCODE_X0 = 20,
+  MNZH_SPECIAL_0_OPCODE_X1 = 19,
+  MNZ_SPECIAL_0_OPCODE_X0 = 21,
+  MNZ_SPECIAL_0_OPCODE_X1 = 20,
+  MNZ_SPECIAL_1_OPCODE_Y0 = 0,
+  MNZ_SPECIAL_1_OPCODE_Y1 = 1,
+  MOVEI_IMM_1_OPCODE_SN = 0,
+  MOVE_RR_IMM_0_OPCODE_SN = 8,
+  MTSPR_IMM_0_OPCODE_X1 = 10,
+  MULHHA_SS_SPECIAL_0_OPCODE_X0 = 22,
+  MULHHA_SS_SPECIAL_7_OPCODE_Y0 = 0,
+  MULHHA_SU_SPECIAL_0_OPCODE_X0 = 23,
+  MULHHA_UU_SPECIAL_0_OPCODE_X0 = 24,
+  MULHHA_UU_SPECIAL_7_OPCODE_Y0 = 1,
+  MULHHSA_UU_SPECIAL_0_OPCODE_X0 = 25,
+  MULHH_SS_SPECIAL_0_OPCODE_X0 = 26,
+  MULHH_SS_SPECIAL_6_OPCODE_Y0 = 0,
+  MULHH_SU_SPECIAL_0_OPCODE_X0 = 27,
+  MULHH_UU_SPECIAL_0_OPCODE_X0 = 28,
+  MULHH_UU_SPECIAL_6_OPCODE_Y0 = 1,
+  MULHLA_SS_SPECIAL_0_OPCODE_X0 = 29,
+  MULHLA_SU_SPECIAL_0_OPCODE_X0 = 30,
+  MULHLA_US_SPECIAL_0_OPCODE_X0 = 31,
+  MULHLA_UU_SPECIAL_0_OPCODE_X0 = 32,
+  MULHLSA_UU_SPECIAL_0_OPCODE_X0 = 33,
+  MULHLSA_UU_SPECIAL_5_OPCODE_Y0 = 0,
+  MULHL_SS_SPECIAL_0_OPCODE_X0 = 34,
+  MULHL_SU_SPECIAL_0_OPCODE_X0 = 35,
+  MULHL_US_SPECIAL_0_OPCODE_X0 = 36,
+  MULHL_UU_SPECIAL_0_OPCODE_X0 = 37,
+  MULLLA_SS_SPECIAL_0_OPCODE_X0 = 38,
+  MULLLA_SS_SPECIAL_7_OPCODE_Y0 = 2,
+  MULLLA_SU_SPECIAL_0_OPCODE_X0 = 39,
+  MULLLA_UU_SPECIAL_0_OPCODE_X0 = 40,
+  MULLLA_UU_SPECIAL_7_OPCODE_Y0 = 3,
+  MULLLSA_UU_SPECIAL_0_OPCODE_X0 = 41,
+  MULLL_SS_SPECIAL_0_OPCODE_X0 = 42,
+  MULLL_SS_SPECIAL_6_OPCODE_Y0 = 2,
+  MULLL_SU_SPECIAL_0_OPCODE_X0 = 43,
+  MULLL_UU_SPECIAL_0_OPCODE_X0 = 44,
+  MULLL_UU_SPECIAL_6_OPCODE_Y0 = 3,
+  MVNZ_SPECIAL_0_OPCODE_X0 = 45,
+  MVNZ_SPECIAL_1_OPCODE_Y0 = 1,
+  MVZ_SPECIAL_0_OPCODE_X0 = 46,
+  MVZ_SPECIAL_1_OPCODE_Y0 = 2,
+  MZB_SPECIAL_0_OPCODE_X0 = 47,
+  MZB_SPECIAL_0_OPCODE_X1 = 21,
+  MZH_SPECIAL_0_OPCODE_X0 = 48,
+  MZH_SPECIAL_0_OPCODE_X1 = 22,
+  MZ_SPECIAL_0_OPCODE_X0 = 49,
+  MZ_SPECIAL_0_OPCODE_X1 = 23,
+  MZ_SPECIAL_1_OPCODE_Y0 = 3,
+  MZ_SPECIAL_1_OPCODE_Y1 = 2,
+  NAP_UN_0_SHUN_0_OPCODE_X1 = 16,
+  NOP_NOREG_RR_IMM_0_OPCODE_SN = 2,
+  NOP_UN_0_SHUN_0_OPCODE_X0 = 6,
+  NOP_UN_0_SHUN_0_OPCODE_X1 = 17,
+  NOP_UN_0_SHUN_0_OPCODE_Y0 = 6,
+  NOP_UN_0_SHUN_0_OPCODE_Y1 = 3,
+  NOREG_RR_IMM_0_OPCODE_SN = 0,
+  NOR_SPECIAL_0_OPCODE_X0 = 50,
+  NOR_SPECIAL_0_OPCODE_X1 = 24,
+  NOR_SPECIAL_2_OPCODE_Y0 = 1,
+  NOR_SPECIAL_2_OPCODE_Y1 = 1,
+  ORI_IMM_0_OPCODE_X0 = 8,
+  ORI_IMM_0_OPCODE_X1 = 11,
+  ORI_OPCODE_Y0 = 11,
+  ORI_OPCODE_Y1 = 9,
+  OR_SPECIAL_0_OPCODE_X0 = 51,
+  OR_SPECIAL_0_OPCODE_X1 = 25,
+  OR_SPECIAL_2_OPCODE_Y0 = 2,
+  OR_SPECIAL_2_OPCODE_Y1 = 2,
+  PACKBS_U_SPECIAL_0_OPCODE_X0 = 103,
+  PACKBS_U_SPECIAL_0_OPCODE_X1 = 73,
+  PACKHB_SPECIAL_0_OPCODE_X0 = 52,
+  PACKHB_SPECIAL_0_OPCODE_X1 = 26,
+  PACKHS_SPECIAL_0_OPCODE_X0 = 102,
+  PACKHS_SPECIAL_0_OPCODE_X1 = 72,
+  PACKLB_SPECIAL_0_OPCODE_X0 = 53,
+  PACKLB_SPECIAL_0_OPCODE_X1 = 27,
+  PCNT_UN_0_SHUN_0_OPCODE_X0 = 7,
+  PCNT_UN_0_SHUN_0_OPCODE_Y0 = 7,
+  RLI_SHUN_0_OPCODE_X0 = 1,
+  RLI_SHUN_0_OPCODE_X1 = 1,
+  RLI_SHUN_0_OPCODE_Y0 = 1,
+  RLI_SHUN_0_OPCODE_Y1 = 1,
+  RL_SPECIAL_0_OPCODE_X0 = 54,
+  RL_SPECIAL_0_OPCODE_X1 = 28,
+  RL_SPECIAL_3_OPCODE_Y0 = 0,
+  RL_SPECIAL_3_OPCODE_Y1 = 0,
+  RR_IMM_0_OPCODE_SN = 0,
+  S1A_SPECIAL_0_OPCODE_X0 = 55,
+  S1A_SPECIAL_0_OPCODE_X1 = 29,
+  S1A_SPECIAL_0_OPCODE_Y0 = 1,
+  S1A_SPECIAL_0_OPCODE_Y1 = 1,
+  S2A_SPECIAL_0_OPCODE_X0 = 56,
+  S2A_SPECIAL_0_OPCODE_X1 = 30,
+  S2A_SPECIAL_0_OPCODE_Y0 = 2,
+  S2A_SPECIAL_0_OPCODE_Y1 = 2,
+  S3A_SPECIAL_0_OPCODE_X0 = 57,
+  S3A_SPECIAL_0_OPCODE_X1 = 31,
+  S3A_SPECIAL_5_OPCODE_Y0 = 1,
+  S3A_SPECIAL_5_OPCODE_Y1 = 1,
+  SADAB_U_SPECIAL_0_OPCODE_X0 = 58,
+  SADAH_SPECIAL_0_OPCODE_X0 = 59,
+  SADAH_U_SPECIAL_0_OPCODE_X0 = 60,
+  SADB_U_SPECIAL_0_OPCODE_X0 = 61,
+  SADH_SPECIAL_0_OPCODE_X0 = 62,
+  SADH_U_SPECIAL_0_OPCODE_X0 = 63,
+  SBADD_IMM_0_OPCODE_X1 = 28,
+  SB_OPCODE_Y2 = 5,
+  SB_SPECIAL_0_OPCODE_X1 = 32,
+  SEQB_SPECIAL_0_OPCODE_X0 = 64,
+  SEQB_SPECIAL_0_OPCODE_X1 = 33,
+  SEQH_SPECIAL_0_OPCODE_X0 = 65,
+  SEQH_SPECIAL_0_OPCODE_X1 = 34,
+  SEQIB_IMM_0_OPCODE_X0 = 9,
+  SEQIB_IMM_0_OPCODE_X1 = 12,
+  SEQIH_IMM_0_OPCODE_X0 = 10,
+  SEQIH_IMM_0_OPCODE_X1 = 13,
+  SEQI_IMM_0_OPCODE_X0 = 11,
+  SEQI_IMM_0_OPCODE_X1 = 14,
+  SEQI_OPCODE_Y0 = 12,
+  SEQI_OPCODE_Y1 = 10,
+  SEQ_SPECIAL_0_OPCODE_X0 = 66,
+  SEQ_SPECIAL_0_OPCODE_X1 = 35,
+  SEQ_SPECIAL_5_OPCODE_Y0 = 2,
+  SEQ_SPECIAL_5_OPCODE_Y1 = 2,
+  SHADD_IMM_0_OPCODE_X1 = 29,
+  SHL8II_IMM_0_OPCODE_SN = 3,
+  SHLB_SPECIAL_0_OPCODE_X0 = 67,
+  SHLB_SPECIAL_0_OPCODE_X1 = 36,
+  SHLH_SPECIAL_0_OPCODE_X0 = 68,
+  SHLH_SPECIAL_0_OPCODE_X1 = 37,
+  SHLIB_SHUN_0_OPCODE_X0 = 2,
+  SHLIB_SHUN_0_OPCODE_X1 = 2,
+  SHLIH_SHUN_0_OPCODE_X0 = 3,
+  SHLIH_SHUN_0_OPCODE_X1 = 3,
+  SHLI_SHUN_0_OPCODE_X0 = 4,
+  SHLI_SHUN_0_OPCODE_X1 = 4,
+  SHLI_SHUN_0_OPCODE_Y0 = 2,
+  SHLI_SHUN_0_OPCODE_Y1 = 2,
+  SHL_SPECIAL_0_OPCODE_X0 = 69,
+  SHL_SPECIAL_0_OPCODE_X1 = 38,
+  SHL_SPECIAL_3_OPCODE_Y0 = 1,
+  SHL_SPECIAL_3_OPCODE_Y1 = 1,
+  SHR1_RR_IMM_0_OPCODE_SN = 9,
+  SHRB_SPECIAL_0_OPCODE_X0 = 70,
+  SHRB_SPECIAL_0_OPCODE_X1 = 39,
+  SHRH_SPECIAL_0_OPCODE_X0 = 71,
+  SHRH_SPECIAL_0_OPCODE_X1 = 40,
+  SHRIB_SHUN_0_OPCODE_X0 = 5,
+  SHRIB_SHUN_0_OPCODE_X1 = 5,
+  SHRIH_SHUN_0_OPCODE_X0 = 6,
+  SHRIH_SHUN_0_OPCODE_X1 = 6,
+  SHRI_SHUN_0_OPCODE_X0 = 7,
+  SHRI_SHUN_0_OPCODE_X1 = 7,
+  SHRI_SHUN_0_OPCODE_Y0 = 3,
+  SHRI_SHUN_0_OPCODE_Y1 = 3,
+  SHR_SPECIAL_0_OPCODE_X0 = 72,
+  SHR_SPECIAL_0_OPCODE_X1 = 41,
+  SHR_SPECIAL_3_OPCODE_Y0 = 2,
+  SHR_SPECIAL_3_OPCODE_Y1 = 2,
+  SHUN_0_OPCODE_X0 = 7,
+  SHUN_0_OPCODE_X1 = 8,
+  SHUN_0_OPCODE_Y0 = 13,
+  SHUN_0_OPCODE_Y1 = 11,
+  SH_OPCODE_Y2 = 6,
+  SH_SPECIAL_0_OPCODE_X1 = 42,
+  SLTB_SPECIAL_0_OPCODE_X0 = 73,
+  SLTB_SPECIAL_0_OPCODE_X1 = 43,
+  SLTB_U_SPECIAL_0_OPCODE_X0 = 74,
+  SLTB_U_SPECIAL_0_OPCODE_X1 = 44,
+  SLTEB_SPECIAL_0_OPCODE_X0 = 75,
+  SLTEB_SPECIAL_0_OPCODE_X1 = 45,
+  SLTEB_U_SPECIAL_0_OPCODE_X0 = 76,
+  SLTEB_U_SPECIAL_0_OPCODE_X1 = 46,
+  SLTEH_SPECIAL_0_OPCODE_X0 = 77,
+  SLTEH_SPECIAL_0_OPCODE_X1 = 47,
+  SLTEH_U_SPECIAL_0_OPCODE_X0 = 78,
+  SLTEH_U_SPECIAL_0_OPCODE_X1 = 48,
+  SLTE_SPECIAL_0_OPCODE_X0 = 79,
+  SLTE_SPECIAL_0_OPCODE_X1 = 49,
+  SLTE_SPECIAL_4_OPCODE_Y0 = 0,
+  SLTE_SPECIAL_4_OPCODE_Y1 = 0,
+  SLTE_U_SPECIAL_0_OPCODE_X0 = 80,
+  SLTE_U_SPECIAL_0_OPCODE_X1 = 50,
+  SLTE_U_SPECIAL_4_OPCODE_Y0 = 1,
+  SLTE_U_SPECIAL_4_OPCODE_Y1 = 1,
+  SLTH_SPECIAL_0_OPCODE_X0 = 81,
+  SLTH_SPECIAL_0_OPCODE_X1 = 51,
+  SLTH_U_SPECIAL_0_OPCODE_X0 = 82,
+  SLTH_U_SPECIAL_0_OPCODE_X1 = 52,
+  SLTIB_IMM_0_OPCODE_X0 = 12,
+  SLTIB_IMM_0_OPCODE_X1 = 15,
+  SLTIB_U_IMM_0_OPCODE_X0 = 13,
+  SLTIB_U_IMM_0_OPCODE_X1 = 16,
+  SLTIH_IMM_0_OPCODE_X0 = 14,
+  SLTIH_IMM_0_OPCODE_X1 = 17,
+  SLTIH_U_IMM_0_OPCODE_X0 = 15,
+  SLTIH_U_IMM_0_OPCODE_X1 = 18,
+  SLTI_IMM_0_OPCODE_X0 = 16,
+  SLTI_IMM_0_OPCODE_X1 = 19,
+  SLTI_OPCODE_Y0 = 14,
+  SLTI_OPCODE_Y1 = 12,
+  SLTI_U_IMM_0_OPCODE_X0 = 17,
+  SLTI_U_IMM_0_OPCODE_X1 = 20,
+  SLTI_U_OPCODE_Y0 = 15,
+  SLTI_U_OPCODE_Y1 = 13,
+  SLT_SPECIAL_0_OPCODE_X0 = 83,
+  SLT_SPECIAL_0_OPCODE_X1 = 53,
+  SLT_SPECIAL_4_OPCODE_Y0 = 2,
+  SLT_SPECIAL_4_OPCODE_Y1 = 2,
+  SLT_U_SPECIAL_0_OPCODE_X0 = 84,
+  SLT_U_SPECIAL_0_OPCODE_X1 = 54,
+  SLT_U_SPECIAL_4_OPCODE_Y0 = 3,
+  SLT_U_SPECIAL_4_OPCODE_Y1 = 3,
+  SNEB_SPECIAL_0_OPCODE_X0 = 85,
+  SNEB_SPECIAL_0_OPCODE_X1 = 55,
+  SNEH_SPECIAL_0_OPCODE_X0 = 86,
+  SNEH_SPECIAL_0_OPCODE_X1 = 56,
+  SNE_SPECIAL_0_OPCODE_X0 = 87,
+  SNE_SPECIAL_0_OPCODE_X1 = 57,
+  SNE_SPECIAL_5_OPCODE_Y0 = 3,
+  SNE_SPECIAL_5_OPCODE_Y1 = 3,
+  SPECIAL_0_OPCODE_X0 = 0,
+  SPECIAL_0_OPCODE_X1 = 1,
+  SPECIAL_0_OPCODE_Y0 = 1,
+  SPECIAL_0_OPCODE_Y1 = 1,
+  SPECIAL_1_OPCODE_Y0 = 2,
+  SPECIAL_1_OPCODE_Y1 = 2,
+  SPECIAL_2_OPCODE_Y0 = 3,
+  SPECIAL_2_OPCODE_Y1 = 3,
+  SPECIAL_3_OPCODE_Y0 = 4,
+  SPECIAL_3_OPCODE_Y1 = 4,
+  SPECIAL_4_OPCODE_Y0 = 5,
+  SPECIAL_4_OPCODE_Y1 = 5,
+  SPECIAL_5_OPCODE_Y0 = 6,
+  SPECIAL_5_OPCODE_Y1 = 6,
+  SPECIAL_6_OPCODE_Y0 = 7,
+  SPECIAL_7_OPCODE_Y0 = 8,
+  SRAB_SPECIAL_0_OPCODE_X0 = 88,
+  SRAB_SPECIAL_0_OPCODE_X1 = 58,
+  SRAH_SPECIAL_0_OPCODE_X0 = 89,
+  SRAH_SPECIAL_0_OPCODE_X1 = 59,
+  SRAIB_SHUN_0_OPCODE_X0 = 8,
+  SRAIB_SHUN_0_OPCODE_X1 = 8,
+  SRAIH_SHUN_0_OPCODE_X0 = 9,
+  SRAIH_SHUN_0_OPCODE_X1 = 9,
+  SRAI_SHUN_0_OPCODE_X0 = 10,
+  SRAI_SHUN_0_OPCODE_X1 = 10,
+  SRAI_SHUN_0_OPCODE_Y0 = 4,
+  SRAI_SHUN_0_OPCODE_Y1 = 4,
+  SRA_SPECIAL_0_OPCODE_X0 = 90,
+  SRA_SPECIAL_0_OPCODE_X1 = 60,
+  SRA_SPECIAL_3_OPCODE_Y0 = 3,
+  SRA_SPECIAL_3_OPCODE_Y1 = 3,
+  SUBBS_U_SPECIAL_0_OPCODE_X0 = 100,
+  SUBBS_U_SPECIAL_0_OPCODE_X1 = 70,
+  SUBB_SPECIAL_0_OPCODE_X0 = 91,
+  SUBB_SPECIAL_0_OPCODE_X1 = 61,
+  SUBHS_SPECIAL_0_OPCODE_X0 = 101,
+  SUBHS_SPECIAL_0_OPCODE_X1 = 71,
+  SUBH_SPECIAL_0_OPCODE_X0 = 92,
+  SUBH_SPECIAL_0_OPCODE_X1 = 62,
+  SUBS_SPECIAL_0_OPCODE_X0 = 97,
+  SUBS_SPECIAL_0_OPCODE_X1 = 67,
+  SUB_SPECIAL_0_OPCODE_X0 = 93,
+  SUB_SPECIAL_0_OPCODE_X1 = 63,
+  SUB_SPECIAL_0_OPCODE_Y0 = 3,
+  SUB_SPECIAL_0_OPCODE_Y1 = 3,
+  SWADD_IMM_0_OPCODE_X1 = 30,
+  SWINT0_UN_0_SHUN_0_OPCODE_X1 = 18,
+  SWINT1_UN_0_SHUN_0_OPCODE_X1 = 19,
+  SWINT2_UN_0_SHUN_0_OPCODE_X1 = 20,
+  SWINT3_UN_0_SHUN_0_OPCODE_X1 = 21,
+  SW_OPCODE_Y2 = 7,
+  SW_SPECIAL_0_OPCODE_X1 = 64,
+  TBLIDXB0_UN_0_SHUN_0_OPCODE_X0 = 8,
+  TBLIDXB0_UN_0_SHUN_0_OPCODE_Y0 = 8,
+  TBLIDXB1_UN_0_SHUN_0_OPCODE_X0 = 9,
+  TBLIDXB1_UN_0_SHUN_0_OPCODE_Y0 = 9,
+  TBLIDXB2_UN_0_SHUN_0_OPCODE_X0 = 10,
+  TBLIDXB2_UN_0_SHUN_0_OPCODE_Y0 = 10,
+  TBLIDXB3_UN_0_SHUN_0_OPCODE_X0 = 11,
+  TBLIDXB3_UN_0_SHUN_0_OPCODE_Y0 = 11,
+  TNS_UN_0_SHUN_0_OPCODE_X1 = 22,
+  UN_0_SHUN_0_OPCODE_X0 = 11,
+  UN_0_SHUN_0_OPCODE_X1 = 11,
+  UN_0_SHUN_0_OPCODE_Y0 = 5,
+  UN_0_SHUN_0_OPCODE_Y1 = 5,
+  WH64_UN_0_SHUN_0_OPCODE_X1 = 23,
+  XORI_IMM_0_OPCODE_X0 = 2,
+  XORI_IMM_0_OPCODE_X1 = 21,
+  XOR_SPECIAL_0_OPCODE_X0 = 94,
+  XOR_SPECIAL_0_OPCODE_X1 = 65,
+  XOR_SPECIAL_2_OPCODE_Y0 = 3,
+  XOR_SPECIAL_2_OPCODE_Y1 = 3
+};
+
+
+#endif /* __ASSEMBLER__ */
+
+#endif /* __ARCH_OPCODE_H__ */
diff --git a/arch/tile/include/arch/sim.h b/arch/tile/include/arch/sim.h
new file mode 100644
index 00000000..e54b7b05
--- /dev/null
+++ b/arch/tile/include/arch/sim.h
@@ -0,0 +1,643 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+/**
+ * @file
+ *
+ * Provides an API for controlling the simulator at runtime.
+ */
+
+/**
+ * @addtogroup arch_sim
+ * @{
+ *
+ * An API for controlling the simulator at runtime.
+ *
+ * The simulator's behavior can be modified while it is running.
+ * For example, human-readable trace output can be enabled and disabled
+ * around code of interest.
+ *
+ * There are two ways to modify simulator behavior:
+ * programmatically, by calling various sim_* functions, and
+ * interactively, by entering commands like "sim set functional true"
+ * at the tile-monitor prompt.  Typing "sim help" at that prompt provides
+ * a list of interactive commands.
+ *
+ * All interactive commands can also be executed programmatically by
+ * passing a string to the sim_command function.
+ */
+
+#ifndef __ARCH_SIM_H__
+#define __ARCH_SIM_H__
+
+#include <arch/sim_def.h>
+#include <arch/abi.h>
+
+#ifndef __ASSEMBLER__
+
+#include <arch/spr_def.h>
+
+
+/**
+ * Return true if the current program is running under a simulator,
+ * rather than on real hardware.  If running on hardware, other "sim_xxx()"
+ * calls have no useful effect.
+ */
+static inline int
+sim_is_simulator(void)
+{
+  return __insn_mfspr(SPR_SIM_CONTROL) != 0;
+}
+
+
+/**
+ * Checkpoint the simulator state to a checkpoint file.
+ *
+ * The checkpoint file name is either the default or the name specified
+ * on the command line with "--checkpoint-file".
+ */
+static __inline void
+sim_checkpoint(void)
+{
+  __insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_CHECKPOINT);
+}
+
+
+/**
+ * Report whether or not various kinds of simulator tracing are enabled.
+ *
+ * @return The bitwise OR of these values:
+ *
+ * SIM_TRACE_CYCLES (--trace-cycles),
+ * SIM_TRACE_ROUTER (--trace-router),
+ * SIM_TRACE_REGISTER_WRITES (--trace-register-writes),
+ * SIM_TRACE_DISASM (--trace-disasm),
+ * SIM_TRACE_STALL_INFO (--trace-stall-info)
+ * SIM_TRACE_MEMORY_CONTROLLER (--trace-memory-controller)
+ * SIM_TRACE_L2_CACHE (--trace-l2)
+ * SIM_TRACE_LINES (--trace-lines)
+ */
+static __inline unsigned int
+sim_get_tracing(void)
+{
+  return __insn_mfspr(SPR_SIM_CONTROL) & SIM_TRACE_FLAG_MASK;
+}
+
+
+/**
+ * Turn on or off different kinds of simulator tracing.
+ *
+ * @param mask Either one of these special values:
+ *
+ * SIM_TRACE_NONE (turns off tracing),
+ * SIM_TRACE_ALL (turns on all possible tracing).
+ *
+ * or the bitwise OR of these values:
+ *
+ * SIM_TRACE_CYCLES (--trace-cycles),
+ * SIM_TRACE_ROUTER (--trace-router),
+ * SIM_TRACE_REGISTER_WRITES (--trace-register-writes),
+ * SIM_TRACE_DISASM (--trace-disasm),
+ * SIM_TRACE_STALL_INFO (--trace-stall-info)
+ * SIM_TRACE_MEMORY_CONTROLLER (--trace-memory-controller)
+ * SIM_TRACE_L2_CACHE (--trace-l2)
+ * SIM_TRACE_LINES (--trace-lines)
+ */
+static __inline void
+sim_set_tracing(unsigned int mask)
+{
+  __insn_mtspr(SPR_SIM_CONTROL, SIM_TRACE_SPR_ARG(mask));
+}
+
+
+/**
+ * Request dumping of different kinds of simulator state.
+ *
+ * @param mask Either this special value:
+ *
+ * SIM_DUMP_ALL (dump all known state)
+ *
+ * or the bitwise OR of these values:
+ *
+ * SIM_DUMP_REGS (the register file),
+ * SIM_DUMP_SPRS (the SPRs),
+ * SIM_DUMP_ITLB (the iTLB),
+ * SIM_DUMP_DTLB (the dTLB),
+ * SIM_DUMP_L1I (the L1 I-cache),
+ * SIM_DUMP_L1D (the L1 D-cache),
+ * SIM_DUMP_L2 (the L2 cache),
+ * SIM_DUMP_SNREGS (the switch register file),
+ * SIM_DUMP_SNITLB (the switch iTLB),
+ * SIM_DUMP_SNL1I (the switch L1 I-cache),
+ * SIM_DUMP_BACKTRACE (the current backtrace)
+ */
+static __inline void
+sim_dump(unsigned int mask)
+{
+  __insn_mtspr(SPR_SIM_CONTROL, SIM_DUMP_SPR_ARG(mask));
+}
+
+
+/**
+ * Print a string to the simulator stdout.
+ *
+ * @param str The string to be written.
+ */
+static __inline void
+sim_print(const char* str)
+{
+  for ( ; *str != '\0'; str++)
+  {
+    __insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_PUTC |
+                 (*str << _SIM_CONTROL_OPERATOR_BITS));
+  }
+  __insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_PUTC |
+               (SIM_PUTC_FLUSH_BINARY << _SIM_CONTROL_OPERATOR_BITS));
+}
+
+
+/**
+ * Print a string to the simulator stdout.
+ *
+ * @param str The string to be written (a newline is automatically added).
+ */
+static __inline void
+sim_print_string(const char* str)
+{
+  for ( ; *str != '\0'; str++)
+  {
+    __insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_PUTC |
+                 (*str << _SIM_CONTROL_OPERATOR_BITS));
+  }
+  __insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_PUTC |
+               (SIM_PUTC_FLUSH_STRING << _SIM_CONTROL_OPERATOR_BITS));
+}
+
+
+/**
+ * Execute a simulator command string.
+ *
+ * Type 'sim help' at the tile-monitor prompt to learn what commands
+ * are available.  Note the use of the tile-monitor "sim" command to
+ * pass commands to the simulator.
+ *
+ * The argument to sim_command() does not include the leading "sim"
+ * prefix used at the tile-monitor prompt; for example, you might call
+ * sim_command("trace disasm").
+ */
+static __inline void
+sim_command(const char* str)
+{
+  int c;
+  do
+  {
+    c = *str++;
+    __insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_COMMAND |
+                 (c << _SIM_CONTROL_OPERATOR_BITS));
+  }
+  while (c);
+}
+
+
+
+#ifndef __DOXYGEN__
+
+/**
+ * The underlying implementation of "_sim_syscall()".
+ *
+ * We use extra "and" instructions to ensure that all the values
+ * we are passing to the simulator are actually valid in the registers
+ * (i.e. returned from memory) prior to the SIM_CONTROL spr.
+ */
+static __inline long _sim_syscall0(int val)
+{
+  long result;
+  __asm__ __volatile__ ("mtspr SIM_CONTROL, r0"
+                        : "=R00" (result) : "R00" (val));
+  return result;
+}
+
+static __inline long _sim_syscall1(int val, long arg1)
+{
+  long result;
+  __asm__ __volatile__ ("{ and zero, r1, r1; mtspr SIM_CONTROL, r0 }"
+                        : "=R00" (result) : "R00" (val), "R01" (arg1));
+  return result;
+}
+
+static __inline long _sim_syscall2(int val, long arg1, long arg2)
+{
+  long result;
+  __asm__ __volatile__ ("{ and zero, r1, r2; mtspr SIM_CONTROL, r0 }"
+                        : "=R00" (result)
+                        : "R00" (val), "R01" (arg1), "R02" (arg2));
+  return result;
+}
+
+/* Note that _sim_syscall3() and higher are technically at risk of
+   receiving an interrupt right before the mtspr bundle, in which case
+   the register values for arguments 3 and up may still be in flight
+   to the core from a stack frame reload. */
+
+static __inline long _sim_syscall3(int val, long arg1, long arg2, long arg3)
+{
+  long result;
+  __asm__ __volatile__ ("{ and zero, r3, r3 };"
+                        "{ and zero, r1, r2; mtspr SIM_CONTROL, r0 }"
+                        : "=R00" (result)
+                        : "R00" (val), "R01" (arg1), "R02" (arg2),
+                          "R03" (arg3));
+  return result;
+}
+
+static __inline long _sim_syscall4(int val, long arg1, long arg2, long arg3,
+                                  long arg4)
+{
+  long result;
+  __asm__ __volatile__ ("{ and zero, r3, r4 };"
+                        "{ and zero, r1, r2; mtspr SIM_CONTROL, r0 }"
+                        : "=R00" (result)
+                        : "R00" (val), "R01" (arg1), "R02" (arg2),
+                          "R03" (arg3), "R04" (arg4));
+  return result;
+}
+
+static __inline long _sim_syscall5(int val, long arg1, long arg2, long arg3,
+                                  long arg4, long arg5)
+{
+  long result;
+  __asm__ __volatile__ ("{ and zero, r3, r4; and zero, r5, r5 };"
+                        "{ and zero, r1, r2; mtspr SIM_CONTROL, r0 }"
+                        : "=R00" (result)
+                        : "R00" (val), "R01" (arg1), "R02" (arg2),
+                          "R03" (arg3), "R04" (arg4), "R05" (arg5));
+  return result;
+}
+
+/**
+ * Make a special syscall to the simulator itself, if running under
+ * simulation. This is used as the implementation of other functions
+ * and should not be used outside this file.
+ *
+ * @param syscall_num The simulator syscall number.
+ * @param nr The number of additional arguments provided.
+ *
+ * @return Varies by syscall.
+ */
+#define _sim_syscall(syscall_num, nr, args...) \
+  _sim_syscall##nr( \
+    ((syscall_num) << _SIM_CONTROL_OPERATOR_BITS) | SIM_CONTROL_SYSCALL, \
+    ##args)
+
+
+/* Values for the "access_mask" parameters below. */
+#define SIM_WATCHPOINT_READ    1
+#define SIM_WATCHPOINT_WRITE   2
+#define SIM_WATCHPOINT_EXECUTE 4
+
+
+static __inline int
+sim_add_watchpoint(unsigned int process_id,
+                   unsigned long address,
+                   unsigned long size,
+                   unsigned int access_mask,
+                   unsigned long user_data)
+{
+  return _sim_syscall(SIM_SYSCALL_ADD_WATCHPOINT, 5, process_id,
+                     address, size, access_mask, user_data);
+}
+
+
+static __inline int
+sim_remove_watchpoint(unsigned int process_id,
+                      unsigned long address,
+                      unsigned long size,
+                      unsigned int access_mask,
+                      unsigned long user_data)
+{
+  return _sim_syscall(SIM_SYSCALL_REMOVE_WATCHPOINT, 5, process_id,
+                     address, size, access_mask, user_data);
+}
+
+
+/**
+ * Return value from sim_query_watchpoint.
+ */
+struct SimQueryWatchpointStatus
+{
+  /**
+   * 0 if a watchpoint fired, 1 if no watchpoint fired, or -1 for
+   * error (meaning a bad process_id).
+   */
+  int syscall_status;
+
+  /**
+   * The address of the watchpoint that fired (this is the address
+   * passed to sim_add_watchpoint, not an address within that range
+   * that actually triggered the watchpoint).
+   */
+  unsigned long address;
+
+  /** The arbitrary user_data installed by sim_add_watchpoint. */
+  unsigned long user_data;
+};
+
+
+static __inline struct SimQueryWatchpointStatus
+sim_query_watchpoint(unsigned int process_id)
+{
+  struct SimQueryWatchpointStatus status;
+  long val = SIM_CONTROL_SYSCALL |
+    (SIM_SYSCALL_QUERY_WATCHPOINT << _SIM_CONTROL_OPERATOR_BITS);
+  __asm__ __volatile__ ("{ and zero, r1, r1; mtspr SIM_CONTROL, r0 }"
+                        : "=R00" (status.syscall_status),
+                          "=R01" (status.address),
+                          "=R02" (status.user_data)
+                        : "R00" (val), "R01" (process_id));
+  return status;
+}
+
+
+/* On the simulator, confirm lines have been evicted everywhere. */
+static __inline void
+sim_validate_lines_evicted(unsigned long long pa, unsigned long length)
+{
+#ifdef __LP64__
+  _sim_syscall(SIM_SYSCALL_VALIDATE_LINES_EVICTED, 2, pa, length);
+#else
+  _sim_syscall(SIM_SYSCALL_VALIDATE_LINES_EVICTED, 4,
+               0 /* dummy */, (long)(pa), (long)(pa >> 32), length);
+#endif
+}
+
+
+/* Return the current CPU speed in cycles per second. */
+static __inline long
+sim_query_cpu_speed(void)
+{
+  return _sim_syscall(SIM_SYSCALL_QUERY_CPU_SPEED, 0);
+}
+
+#endif /* !__DOXYGEN__ */
+
+
+
+
+/**
+ * Modify the shaping parameters of a shim.
+ *
+ * @param shim The shim to modify. One of:
+ *   SIM_CONTROL_SHAPING_GBE_0
+ *   SIM_CONTROL_SHAPING_GBE_1
+ *   SIM_CONTROL_SHAPING_GBE_2
+ *   SIM_CONTROL_SHAPING_GBE_3
+ *   SIM_CONTROL_SHAPING_XGBE_0
+ *   SIM_CONTROL_SHAPING_XGBE_1
+ *
+ * @param type The type of shaping. This should be the same type of
+ * shaping that is already in place on the shim. One of:
+ *   SIM_CONTROL_SHAPING_MULTIPLIER
+ *   SIM_CONTROL_SHAPING_PPS
+ *   SIM_CONTROL_SHAPING_BPS
+ *
+ * @param units The magnitude of the rate. One of:
+ *   SIM_CONTROL_SHAPING_UNITS_SINGLE
+ *   SIM_CONTROL_SHAPING_UNITS_KILO
+ *   SIM_CONTROL_SHAPING_UNITS_MEGA
+ *   SIM_CONTROL_SHAPING_UNITS_GIGA
+ *
+ * @param rate The rate to which to change it. This must fit in
+ * SIM_CONTROL_SHAPING_RATE_BITS bits or a warning is issued and
+ * the shaping is not changed.
+ *
+ * @return 0 if no problems were detected in the arguments to sim_set_shaping
+ * or 1 if problems were detected (for example, rate does not fit in 17 bits).
+ */
+static __inline int
+sim_set_shaping(unsigned shim,
+                unsigned type,
+                unsigned units,
+                unsigned rate)
+{
+  if ((rate & ~((1 << SIM_CONTROL_SHAPING_RATE_BITS) - 1)) != 0)
+    return 1;
+
+  __insn_mtspr(SPR_SIM_CONTROL, SIM_SHAPING_SPR_ARG(shim, type, units, rate));
+  return 0;
+}
+
+#ifdef __tilegx__
+
+/** Enable a set of mPIPE links.  Pass a -1 link_mask to enable all links. */
+static __inline void
+sim_enable_mpipe_links(unsigned mpipe, unsigned long link_mask)
+{
+  __insn_mtspr(SPR_SIM_CONTROL,
+               (SIM_CONTROL_ENABLE_MPIPE_LINK_MAGIC_BYTE |
+                (mpipe << 8) | (1 << 16) | ((uint_reg_t)link_mask << 32)));
+}
+
+/** Disable a set of mPIPE links.  Pass a -1 link_mask to disable all links. */
+static __inline void
+sim_disable_mpipe_links(unsigned mpipe, unsigned long link_mask)
+{
+  __insn_mtspr(SPR_SIM_CONTROL,
+               (SIM_CONTROL_ENABLE_MPIPE_LINK_MAGIC_BYTE |
+                (mpipe << 8) | (0 << 16) | ((uint_reg_t)link_mask << 32)));
+}
+
+#endif /* __tilegx__ */
+
+
+/*
+ * An API for changing "functional" mode.
+ */
+
+#ifndef __DOXYGEN__
+
+#define sim_enable_functional() \
+  __insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_ENABLE_FUNCTIONAL)
+
+#define sim_disable_functional() \
+  __insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_DISABLE_FUNCTIONAL)
+
+#endif /* __DOXYGEN__ */
+
+
+/*
+ * Profiler support.
+ */
+
+/**
+ * Turn profiling on for the current task.
+ *
+ * Note that this has no effect if run in an environment without
+ * profiling support (thus, the proper flags to the simulator must
+ * be supplied).
+ */
+static __inline void
+sim_profiler_enable(void)
+{
+  __insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_PROFILER_ENABLE);
+}
+
+
+/** Turn profiling off for the current task. */
+static __inline void
+sim_profiler_disable(void)
+{
+  __insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_PROFILER_DISABLE);
+}
+
+
+/**
+ * Turn profiling on or off for the current task.
+ *
+ * @param enabled If true, turns on profiling. If false, turns it off.
+ *
+ * Note that this has no effect if run in an environment without
+ * profiling support (thus, the proper flags to the simulator must
+ * be supplied).
+ */
+static __inline void
+sim_profiler_set_enabled(int enabled)
+{
+  int val =
+    enabled ? SIM_CONTROL_PROFILER_ENABLE : SIM_CONTROL_PROFILER_DISABLE;
+  __insn_mtspr(SPR_SIM_CONTROL, val);
+}
+
+
+/**
+ * Return true if and only if profiling is currently enabled
+ * for the current task.
+ *
+ * This returns false even if sim_profiler_enable() was called
+ * if the current execution environment does not support profiling.
+ */
+static __inline int
+sim_profiler_is_enabled(void)
+{
+  return ((__insn_mfspr(SPR_SIM_CONTROL) & SIM_PROFILER_ENABLED_MASK) != 0);
+}
+
+
+/**
+ * Reset profiling counters to zero for the current task.
+ *
+ * Resetting can be done while profiling is enabled.  It does not affect
+ * the chip-wide profiling counters.
+ */
+static __inline void
+sim_profiler_clear(void)
+{
+  __insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_PROFILER_CLEAR);
+}
+
+
+/**
+ * Enable specified chip-level profiling counters.
+ *
+ * Does not affect the per-task profiling counters.
+ *
+ * @param mask Either this special value:
+ *
+ * SIM_CHIP_ALL (enables all chip-level components).
+ *
+ * or the bitwise OR of these values:
+ *
+ * SIM_CHIP_MEMCTL (enable all memory controllers)
+ * SIM_CHIP_XAUI (enable all XAUI controllers)
+ * SIM_CHIP_MPIPE (enable all MPIPE controllers)
+ */
+static __inline void
+sim_profiler_chip_enable(unsigned int mask)
+{
+  __insn_mtspr(SPR_SIM_CONTROL, SIM_PROFILER_CHIP_ENABLE_SPR_ARG(mask));
+}
+
+
+/**
+ * Disable specified chip-level profiling counters.
+ *
+ * Does not affect the per-task profiling counters.
+ *
+ * @param mask Either this special value:
+ *
+ * SIM_CHIP_ALL (disables all chip-level components).
+ *
+ * or the bitwise OR of these values:
+ *
+ * SIM_CHIP_MEMCTL (disable all memory controllers)
+ * SIM_CHIP_XAUI (disable all XAUI controllers)
+ * SIM_CHIP_MPIPE (disable all MPIPE controllers)
+ */
+static __inline void
+sim_profiler_chip_disable(unsigned int mask)
+{
+  __insn_mtspr(SPR_SIM_CONTROL, SIM_PROFILER_CHIP_DISABLE_SPR_ARG(mask));
+}
+
+
+/**
+ * Reset specified chip-level profiling counters to zero.
+ *
+ * Does not affect the per-task profiling counters.
+ *
+ * @param mask Either this special value:
+ *
+ * SIM_CHIP_ALL (clears all chip-level components).
+ *
+ * or the bitwise OR of these values:
+ *
+ * SIM_CHIP_MEMCTL (clear all memory controllers)
+ * SIM_CHIP_XAUI (clear all XAUI controllers)
+ * SIM_CHIP_MPIPE (clear all MPIPE controllers)
+ */
+static __inline void
+sim_profiler_chip_clear(unsigned int mask)
+{
+  __insn_mtspr(SPR_SIM_CONTROL, SIM_PROFILER_CHIP_CLEAR_SPR_ARG(mask));
+}
+
+
+/*
+ * Event support.
+ */
+
+#ifndef __DOXYGEN__
+
+static __inline void
+sim_event_begin(unsigned int x)
+{
+#if defined(__tile__) && !defined(__NO_EVENT_SPR__)
+  __insn_mtspr(SPR_EVENT_BEGIN, x);
+#endif
+}
+
+static __inline void
+sim_event_end(unsigned int x)
+{
+#if defined(__tile__) && !defined(__NO_EVENT_SPR__)
+  __insn_mtspr(SPR_EVENT_END, x);
+#endif
+}
+
+#endif /* !__DOXYGEN__ */
+
+#endif /* !__ASSEMBLER__ */
+
+#endif /* !__ARCH_SIM_H__ */
+
+/** @} */
diff --git a/arch/tile/include/arch/sim_def.h b/arch/tile/include/arch/sim_def.h
new file mode 100644
index 00000000..4b44a2b6
--- /dev/null
+++ b/arch/tile/include/arch/sim_def.h
@@ -0,0 +1,505 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+/**
+ * @file
+ *
+ * Some low-level simulator definitions.
+ */
+
+#ifndef __ARCH_SIM_DEF_H__
+#define __ARCH_SIM_DEF_H__
+
+
+/**
+ * Internal: the low bits of the SIM_CONTROL_* SPR values specify
+ * the operation to perform, and the remaining bits are
+ * an operation-specific parameter (often unused).
+ */
+#define _SIM_CONTROL_OPERATOR_BITS 8
+
+
+/*
+ * Values which can be written to SPR_SIM_CONTROL.
+ */
+
+/** If written to SPR_SIM_CONTROL, stops profiling. */
+#define SIM_CONTROL_PROFILER_DISABLE 0
+
+/** If written to SPR_SIM_CONTROL, starts profiling. */
+#define SIM_CONTROL_PROFILER_ENABLE 1
+
+/** If written to SPR_SIM_CONTROL, clears profiling counters. */
+#define SIM_CONTROL_PROFILER_CLEAR 2
+
+/** If written to SPR_SIM_CONTROL, checkpoints the simulator. */
+#define SIM_CONTROL_CHECKPOINT 3
+
+/**
+ * If written to SPR_SIM_CONTROL, combined with a mask (shifted by 8),
+ * sets the tracing mask to the given mask. See "sim_set_tracing()".
+ */
+#define SIM_CONTROL_SET_TRACING 4
+
+/**
+ * If written to SPR_SIM_CONTROL, combined with a mask (shifted by 8),
+ * dumps the requested items of machine state to the log.
+ */
+#define SIM_CONTROL_DUMP 5
+
+/** If written to SPR_SIM_CONTROL, clears chip-level profiling counters. */
+#define SIM_CONTROL_PROFILER_CHIP_CLEAR 6
+
+/** If written to SPR_SIM_CONTROL, disables chip-level profiling. */
+#define SIM_CONTROL_PROFILER_CHIP_DISABLE 7
+
+/** If written to SPR_SIM_CONTROL, enables chip-level profiling. */
+#define SIM_CONTROL_PROFILER_CHIP_ENABLE 8
+
+/** If written to SPR_SIM_CONTROL, enables chip-level functional mode */
+#define SIM_CONTROL_ENABLE_FUNCTIONAL 9
+
+/** If written to SPR_SIM_CONTROL, disables chip-level functional mode. */
+#define SIM_CONTROL_DISABLE_FUNCTIONAL 10
+
+/**
+ * If written to SPR_SIM_CONTROL, enables chip-level functional mode.
+ * All tiles must perform this write for functional mode to be enabled.
+ * Ignored in naked boot mode unless --functional is specified.
+ * WARNING: Only the hypervisor startup code should use this!
+ */
+#define SIM_CONTROL_ENABLE_FUNCTIONAL_BARRIER 11
+
+/**
+ * If written to SPR_SIM_CONTROL, combined with a character (shifted by 8),
+ * writes a string directly to the simulator output.  Written to once for
+ * each character in the string, plus a final NUL.  Instead of NUL,
+ * you can also use "SIM_PUTC_FLUSH_STRING" or "SIM_PUTC_FLUSH_BINARY".
+ */
+/* ISSUE: Document the meaning of "newline", and the handling of NUL. */
+#define SIM_CONTROL_PUTC 12
+
+/**
+ * If written to SPR_SIM_CONTROL, clears the --grind-coherence state for
+ * this core.  This is intended to be used before a loop that will
+ * invalidate the cache by loading new data and evicting all current data.
+ * Generally speaking, this API should only be used by system code.
+ */
+#define SIM_CONTROL_GRINDER_CLEAR 13
+
+/** If written to SPR_SIM_CONTROL, shuts down the simulator. */
+#define SIM_CONTROL_SHUTDOWN 14
+
+/**
+ * If written to SPR_SIM_CONTROL, combined with a pid (shifted by 8),
+ * indicates that a fork syscall just created the given process.
+ */
+#define SIM_CONTROL_OS_FORK 15
+
+/**
+ * If written to SPR_SIM_CONTROL, combined with a pid (shifted by 8),
+ * indicates that an exit syscall was just executed by the given process.
+ */
+#define SIM_CONTROL_OS_EXIT 16
+
+/**
+ * If written to SPR_SIM_CONTROL, combined with a pid (shifted by 8),
+ * indicates that the OS just switched to the given process.
+ */
+#define SIM_CONTROL_OS_SWITCH 17
+
+/**
+ * If written to SPR_SIM_CONTROL, combined with a character (shifted by 8),
+ * indicates that an exec syscall was just executed. Written to once for
+ * each character in the executable name, plus a final NUL.
+ */
+#define SIM_CONTROL_OS_EXEC 18
+
+/**
+ * If written to SPR_SIM_CONTROL, combined with a character (shifted by 8),
+ * indicates that an interpreter (PT_INTERP) was loaded.  Written to once
+ * for each character in "ADDR:PATH", plus a final NUL, where "ADDR" is a
+ * hex load address starting with "0x", and "PATH" is the executable name.
+ */
+#define SIM_CONTROL_OS_INTERP 19
+
+/**
+ * If written to SPR_SIM_CONTROL, combined with a character (shifted by 8),
+ * indicates that a dll was loaded.  Written to once for each character
+ * in "ADDR:PATH", plus a final NUL, where "ADDR" is a hexadecimal load
+ * address starting with "0x", and "PATH" is the executable name.
+ */
+#define SIM_CONTROL_DLOPEN 20
+
+/**
+ * If written to SPR_SIM_CONTROL, combined with a character (shifted by 8),
+ * indicates that a dll was unloaded.  Written to once for each character
+ * in "ADDR", plus a final NUL, where "ADDR" is a hexadecimal load
+ * address starting with "0x".
+ */
+#define SIM_CONTROL_DLCLOSE 21
+
+/**
+ * If written to SPR_SIM_CONTROL, combined with a flag (shifted by 8),
+ * indicates whether to allow data reads to remotely-cached
+ * dirty cache lines to be cached locally without grinder warnings or
+ * assertions (used by Linux kernel fast memcpy).
+ */
+#define SIM_CONTROL_ALLOW_MULTIPLE_CACHING 22
+
+/** If written to SPR_SIM_CONTROL, enables memory tracing. */
+#define SIM_CONTROL_ENABLE_MEM_LOGGING 23
+
+/** If written to SPR_SIM_CONTROL, disables memory tracing. */
+#define SIM_CONTROL_DISABLE_MEM_LOGGING 24
+
+/**
+ * If written to SPR_SIM_CONTROL, changes the shaping parameters of one of
+ * the gbe or xgbe shims. Must specify the shim id, the type, the units, and
+ * the rate, as defined in SIM_SHAPING_SPR_ARG.
+ */
+#define SIM_CONTROL_SHAPING 25
+
+/**
+ * If written to SPR_SIM_CONTROL, combined with character (shifted by 8),
+ * requests that a simulator command be executed.  Written to once for each
+ * character in the command, plus a final NUL.
+ */
+#define SIM_CONTROL_COMMAND 26
+
+/**
+ * If written to SPR_SIM_CONTROL, indicates that the simulated system
+ * is panicking, to allow debugging via --debug-on-panic.
+ */
+#define SIM_CONTROL_PANIC 27
+
+/**
+ * If written to SPR_SIM_CONTROL, triggers a simulator syscall.
+ * See "sim_syscall()" for more info.
+ */
+#define SIM_CONTROL_SYSCALL 32
+
+/**
+ * If written to SPR_SIM_CONTROL, combined with a pid (shifted by 8),
+ * provides the pid that subsequent SIM_CONTROL_OS_FORK writes should
+ * use as the pid, rather than the default previous SIM_CONTROL_OS_SWITCH.
+ */
+#define SIM_CONTROL_OS_FORK_PARENT 33
+
+/**
+ * If written to SPR_SIM_CONTROL, combined with a mPIPE shim number
+ * (shifted by 8), clears the pending magic data section.  The cleared
+ * pending magic data section and any subsequently appended magic bytes
+ * will only take effect when the classifier blast programmer is run.
+ */
+#define SIM_CONTROL_CLEAR_MPIPE_MAGIC_BYTES 34
+
+/**
+ * If written to SPR_SIM_CONTROL, combined with a mPIPE shim number
+ * (shifted by 8) and a byte of data (shifted by 16), appends that byte
+ * to the shim's pending magic data section.  The pending magic data
+ * section takes effect when the classifier blast programmer is run.
+ */
+#define SIM_CONTROL_APPEND_MPIPE_MAGIC_BYTE 35
+
+/**
+ * If written to SPR_SIM_CONTROL, combined with a mPIPE shim number
+ * (shifted by 8), an enable=1/disable=0 bit (shifted by 16), and a
+ * mask of links (shifted by 32), enable or disable the corresponding
+ * mPIPE links.
+ */
+#define SIM_CONTROL_ENABLE_MPIPE_LINK_MAGIC_BYTE 36
+
+
+/*
+ * Syscall numbers for use with "sim_syscall()".
+ */
+
+/** Syscall number for sim_add_watchpoint(). */
+#define SIM_SYSCALL_ADD_WATCHPOINT 2
+
+/** Syscall number for sim_remove_watchpoint(). */
+#define SIM_SYSCALL_REMOVE_WATCHPOINT 3
+
+/** Syscall number for sim_query_watchpoint(). */
+#define SIM_SYSCALL_QUERY_WATCHPOINT 4
+
+/**
+ * Syscall number that asserts that the cache lines whose 64-bit PA
+ * is passed as the second argument to sim_syscall(), and over a
+ * range passed as the third argument, are no longer in cache.
+ * The simulator raises an error if this is not the case.
+ */
+#define SIM_SYSCALL_VALIDATE_LINES_EVICTED 5
+
+/** Syscall number for sim_query_cpu_speed(). */
+#define SIM_SYSCALL_QUERY_CPU_SPEED 6
+
+
+/*
+ * Bit masks which can be shifted by 8, combined with
+ * SIM_CONTROL_SET_TRACING, and written to SPR_SIM_CONTROL.
+ */
+
+/**
+ * @addtogroup arch_sim
+ * @{
+ */
+
+/** Enable --trace-cycle when passed to simulator_set_tracing(). */
+#define SIM_TRACE_CYCLES          0x01
+
+/** Enable --trace-router when passed to simulator_set_tracing(). */
+#define SIM_TRACE_ROUTER          0x02
+
+/** Enable --trace-register-writes when passed to simulator_set_tracing(). */
+#define SIM_TRACE_REGISTER_WRITES 0x04
+
+/** Enable --trace-disasm when passed to simulator_set_tracing(). */
+#define SIM_TRACE_DISASM          0x08
+
+/** Enable --trace-stall-info when passed to simulator_set_tracing(). */
+#define SIM_TRACE_STALL_INFO      0x10
+
+/** Enable --trace-memory-controller when passed to simulator_set_tracing(). */
+#define SIM_TRACE_MEMORY_CONTROLLER 0x20
+
+/** Enable --trace-l2 when passed to simulator_set_tracing(). */
+#define SIM_TRACE_L2_CACHE 0x40
+
+/** Enable --trace-lines when passed to simulator_set_tracing(). */
+#define SIM_TRACE_LINES 0x80
+
+/** Turn off all tracing when passed to simulator_set_tracing(). */
+#define SIM_TRACE_NONE 0
+
+/** Turn on all tracing when passed to simulator_set_tracing(). */
+#define SIM_TRACE_ALL (-1)
+
+/** @} */
+
+/** Computes the value to write to SPR_SIM_CONTROL to set tracing flags. */
+#define SIM_TRACE_SPR_ARG(mask) \
+  (SIM_CONTROL_SET_TRACING | ((mask) << _SIM_CONTROL_OPERATOR_BITS))
+
+
+/*
+ * Bit masks which can be shifted by 8, combined with
+ * SIM_CONTROL_DUMP, and written to SPR_SIM_CONTROL.
+ */
+
+/**
+ * @addtogroup arch_sim
+ * @{
+ */
+
+/** Dump the general-purpose registers. */
+#define SIM_DUMP_REGS          0x001
+
+/** Dump the SPRs. */
+#define SIM_DUMP_SPRS          0x002
+
+/** Dump the ITLB. */
+#define SIM_DUMP_ITLB          0x004
+
+/** Dump the DTLB. */
+#define SIM_DUMP_DTLB          0x008
+
+/** Dump the L1 I-cache. */
+#define SIM_DUMP_L1I           0x010
+
+/** Dump the L1 D-cache. */
+#define SIM_DUMP_L1D           0x020
+
+/** Dump the L2 cache. */
+#define SIM_DUMP_L2            0x040
+
+/** Dump the switch registers. */
+#define SIM_DUMP_SNREGS        0x080
+
+/** Dump the switch ITLB. */
+#define SIM_DUMP_SNITLB        0x100
+
+/** Dump the switch L1 I-cache. */
+#define SIM_DUMP_SNL1I         0x200
+
+/** Dump the current backtrace. */
+#define SIM_DUMP_BACKTRACE     0x400
+
+/** Only dump valid lines in caches. */
+#define SIM_DUMP_VALID_LINES   0x800
+
+/** Dump everything that is dumpable. */
+#define SIM_DUMP_ALL (-1 & ~SIM_DUMP_VALID_LINES)
+
+/** @} */
+
+/** Computes the value to write to SPR_SIM_CONTROL to dump machine state. */
+#define SIM_DUMP_SPR_ARG(mask) \
+  (SIM_CONTROL_DUMP | ((mask) << _SIM_CONTROL_OPERATOR_BITS))
+
+
+/*
+ * Bit masks which can be shifted by 8, combined with
+ * SIM_CONTROL_PROFILER_CHIP_xxx, and written to SPR_SIM_CONTROL.
+ */
+
+/**
+ * @addtogroup arch_sim
+ * @{
+ */
+
+/** Use with with SIM_PROFILER_CHIP_xxx to control the memory controllers. */
+#define SIM_CHIP_MEMCTL        0x001
+
+/** Use with with SIM_PROFILER_CHIP_xxx to control the XAUI interface. */
+#define SIM_CHIP_XAUI          0x002
+
+/** Use with with SIM_PROFILER_CHIP_xxx to control the PCIe interface. */
+#define SIM_CHIP_PCIE          0x004
+
+/** Use with with SIM_PROFILER_CHIP_xxx to control the MPIPE interface. */
+#define SIM_CHIP_MPIPE         0x008
+
+/** Use with with SIM_PROFILER_CHIP_xxx to control the TRIO interface. */
+#define SIM_CHIP_TRIO          0x010
+
+/** Reference all chip devices. */
+#define SIM_CHIP_ALL (-1)
+
+/** @} */
+
+/** Computes the value to write to SPR_SIM_CONTROL to clear chip statistics. */
+#define SIM_PROFILER_CHIP_CLEAR_SPR_ARG(mask) \
+  (SIM_CONTROL_PROFILER_CHIP_CLEAR | ((mask) << _SIM_CONTROL_OPERATOR_BITS))
+
+/** Computes the value to write to SPR_SIM_CONTROL to disable chip statistics.*/
+#define SIM_PROFILER_CHIP_DISABLE_SPR_ARG(mask) \
+  (SIM_CONTROL_PROFILER_CHIP_DISABLE | ((mask) << _SIM_CONTROL_OPERATOR_BITS))
+
+/** Computes the value to write to SPR_SIM_CONTROL to enable chip statistics. */
+#define SIM_PROFILER_CHIP_ENABLE_SPR_ARG(mask) \
+  (SIM_CONTROL_PROFILER_CHIP_ENABLE | ((mask) << _SIM_CONTROL_OPERATOR_BITS))
+
+
+
+/* Shim bitrate controls. */
+
+/** The number of bits used to store the shim id. */
+#define SIM_CONTROL_SHAPING_SHIM_ID_BITS 3
+
+/**
+ * @addtogroup arch_sim
+ * @{
+ */
+
+/** Change the gbe 0 bitrate. */
+#define SIM_CONTROL_SHAPING_GBE_0 0x0
+
+/** Change the gbe 1 bitrate. */
+#define SIM_CONTROL_SHAPING_GBE_1 0x1
+
+/** Change the gbe 2 bitrate. */
+#define SIM_CONTROL_SHAPING_GBE_2 0x2
+
+/** Change the gbe 3 bitrate. */
+#define SIM_CONTROL_SHAPING_GBE_3 0x3
+
+/** Change the xgbe 0 bitrate. */
+#define SIM_CONTROL_SHAPING_XGBE_0 0x4
+
+/** Change the xgbe 1 bitrate. */
+#define SIM_CONTROL_SHAPING_XGBE_1 0x5
+
+/** The type of shaping to do. */
+#define SIM_CONTROL_SHAPING_TYPE_BITS 2
+
+/** Control the multiplier. */
+#define SIM_CONTROL_SHAPING_MULTIPLIER 0
+
+/** Control the PPS. */
+#define SIM_CONTROL_SHAPING_PPS 1
+
+/** Control the BPS. */
+#define SIM_CONTROL_SHAPING_BPS 2
+
+/** The number of bits for the units for the shaping parameter. */
+#define SIM_CONTROL_SHAPING_UNITS_BITS 2
+
+/** Provide a number in single units. */
+#define SIM_CONTROL_SHAPING_UNITS_SINGLE 0
+
+/** Provide a number in kilo units. */
+#define SIM_CONTROL_SHAPING_UNITS_KILO 1
+
+/** Provide a number in mega units. */
+#define SIM_CONTROL_SHAPING_UNITS_MEGA 2
+
+/** Provide a number in giga units. */
+#define SIM_CONTROL_SHAPING_UNITS_GIGA 3
+
+/** @} */
+
+/** How many bits are available for the rate. */
+#define SIM_CONTROL_SHAPING_RATE_BITS \
+  (32 - (_SIM_CONTROL_OPERATOR_BITS + \
+         SIM_CONTROL_SHAPING_SHIM_ID_BITS + \
+         SIM_CONTROL_SHAPING_TYPE_BITS + \
+         SIM_CONTROL_SHAPING_UNITS_BITS))
+
+/** Computes the value to write to SPR_SIM_CONTROL to change a bitrate. */
+#define SIM_SHAPING_SPR_ARG(shim, type, units, rate) \
+  (SIM_CONTROL_SHAPING | \
+   ((shim) | \
+   ((type) << (SIM_CONTROL_SHAPING_SHIM_ID_BITS)) | \
+   ((units) << (SIM_CONTROL_SHAPING_SHIM_ID_BITS + \
+                SIM_CONTROL_SHAPING_TYPE_BITS)) | \
+   ((rate) << (SIM_CONTROL_SHAPING_SHIM_ID_BITS + \
+               SIM_CONTROL_SHAPING_TYPE_BITS + \
+               SIM_CONTROL_SHAPING_UNITS_BITS))) << _SIM_CONTROL_OPERATOR_BITS)
+
+
+/*
+ * Values returned when reading SPR_SIM_CONTROL.
+ * ISSUE: These names should share a longer common prefix.
+ */
+
+/**
+ * When reading SPR_SIM_CONTROL, the mask of simulator tracing bits
+ * (SIM_TRACE_xxx values).
+ */
+#define SIM_TRACE_FLAG_MASK 0xFFFF
+
+/** When reading SPR_SIM_CONTROL, the mask for whether profiling is enabled. */
+#define SIM_PROFILER_ENABLED_MASK 0x10000
+
+
+/*
+ * Special arguments for "SIM_CONTROL_PUTC".
+ */
+
+/**
+ * Flag value for forcing a PUTC string-flush, including
+ * coordinate/cycle prefix and newline.
+ */
+#define SIM_PUTC_FLUSH_STRING 0x100
+
+/**
+ * Flag value for forcing a PUTC binary-data-flush, which skips the
+ * prefix and does not append a newline.
+ */
+#define SIM_PUTC_FLUSH_BINARY 0x101
+
+
+#endif /* __ARCH_SIM_DEF_H__ */
diff --git a/arch/tile/include/arch/spr_def.h b/arch/tile/include/arch/spr_def.h
new file mode 100644
index 00000000..d6ba449b
--- /dev/null
+++ b/arch/tile/include/arch/spr_def.h
@@ -0,0 +1,113 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+/* Include the proper base SPR definition file. */
+#ifdef __tilegx__
+#include <arch/spr_def_64.h>
+#else
+#include <arch/spr_def_32.h>
+#endif
+
+#ifdef __KERNEL__
+
+/*
+ * In addition to including the proper base SPR definition file, depending
+ * on machine architecture, this file defines several macros which allow
+ * kernel code to use protection-level dependent SPRs without worrying
+ * about which PL it's running at.  In these macros, the PL that the SPR
+ * or interrupt number applies to is replaced by K.
+ */
+
+#if CONFIG_KERNEL_PL != 1 && CONFIG_KERNEL_PL != 2
+#error CONFIG_KERNEL_PL must be 1 or 2
+#endif
+
+/* Concatenate 4 strings. */
+#define __concat4(a, b, c, d) a ## b ## c ## d
+#define _concat4(a, b, c, d)  __concat4(a, b, c, d)
+
+#ifdef __tilegx__
+
+/* TILE-Gx dependent, protection-level dependent SPRs. */
+
+#define SPR_INTERRUPT_MASK_K \
+	_concat4(SPR_INTERRUPT_MASK_, CONFIG_KERNEL_PL,,)
+#define SPR_INTERRUPT_MASK_SET_K \
+	_concat4(SPR_INTERRUPT_MASK_SET_, CONFIG_KERNEL_PL,,)
+#define SPR_INTERRUPT_MASK_RESET_K \
+	_concat4(SPR_INTERRUPT_MASK_RESET_, CONFIG_KERNEL_PL,,)
+#define SPR_INTERRUPT_VECTOR_BASE_K \
+	_concat4(SPR_INTERRUPT_VECTOR_BASE_, CONFIG_KERNEL_PL,,)
+
+#define SPR_IPI_MASK_K \
+	_concat4(SPR_IPI_MASK_, CONFIG_KERNEL_PL,,)
+#define SPR_IPI_MASK_RESET_K \
+	_concat4(SPR_IPI_MASK_RESET_, CONFIG_KERNEL_PL,,)
+#define SPR_IPI_MASK_SET_K \
+	_concat4(SPR_IPI_MASK_SET_, CONFIG_KERNEL_PL,,)
+#define SPR_IPI_EVENT_K \
+	_concat4(SPR_IPI_EVENT_, CONFIG_KERNEL_PL,,)
+#define SPR_IPI_EVENT_RESET_K \
+	_concat4(SPR_IPI_EVENT_RESET_, CONFIG_KERNEL_PL,,)
+#define SPR_IPI_EVENT_SET_K \
+	_concat4(SPR_IPI_EVENT_SET_, CONFIG_KERNEL_PL,,)
+#define INT_IPI_K \
+	_concat4(INT_IPI_, CONFIG_KERNEL_PL,,)
+
+#define SPR_SINGLE_STEP_CONTROL_K \
+	_concat4(SPR_SINGLE_STEP_CONTROL_, CONFIG_KERNEL_PL,,)
+#define SPR_SINGLE_STEP_EN_K_K \
+	_concat4(SPR_SINGLE_STEP_EN_, CONFIG_KERNEL_PL, _, CONFIG_KERNEL_PL)
+#define INT_SINGLE_STEP_K \
+	_concat4(INT_SINGLE_STEP_, CONFIG_KERNEL_PL,,)
+
+#else
+
+/* TILEPro dependent, protection-level dependent SPRs. */
+
+#define SPR_INTERRUPT_MASK_K_0 \
+	_concat4(SPR_INTERRUPT_MASK_, CONFIG_KERNEL_PL, _0,)
+#define SPR_INTERRUPT_MASK_K_1 \
+	_concat4(SPR_INTERRUPT_MASK_, CONFIG_KERNEL_PL, _1,)
+#define SPR_INTERRUPT_MASK_SET_K_0 \
+	_concat4(SPR_INTERRUPT_MASK_SET_, CONFIG_KERNEL_PL, _0,)
+#define SPR_INTERRUPT_MASK_SET_K_1 \
+	_concat4(SPR_INTERRUPT_MASK_SET_, CONFIG_KERNEL_PL, _1,)
+#define SPR_INTERRUPT_MASK_RESET_K_0 \
+	_concat4(SPR_INTERRUPT_MASK_RESET_, CONFIG_KERNEL_PL, _0,)
+#define SPR_INTERRUPT_MASK_RESET_K_1 \
+	_concat4(SPR_INTERRUPT_MASK_RESET_, CONFIG_KERNEL_PL, _1,)
+
+#endif
+
+/* Generic protection-level dependent SPRs. */
+
+#define SPR_SYSTEM_SAVE_K_0 \
+	_concat4(SPR_SYSTEM_SAVE_, CONFIG_KERNEL_PL, _0,)
+#define SPR_SYSTEM_SAVE_K_1 \
+	_concat4(SPR_SYSTEM_SAVE_, CONFIG_KERNEL_PL, _1,)
+#define SPR_SYSTEM_SAVE_K_2 \
+	_concat4(SPR_SYSTEM_SAVE_, CONFIG_KERNEL_PL, _2,)
+#define SPR_SYSTEM_SAVE_K_3 \
+	_concat4(SPR_SYSTEM_SAVE_, CONFIG_KERNEL_PL, _3,)
+#define SPR_EX_CONTEXT_K_0 \
+	_concat4(SPR_EX_CONTEXT_, CONFIG_KERNEL_PL, _0,)
+#define SPR_EX_CONTEXT_K_1 \
+	_concat4(SPR_EX_CONTEXT_, CONFIG_KERNEL_PL, _1,)
+#define SPR_INTCTRL_K_STATUS \
+	_concat4(SPR_INTCTRL_, CONFIG_KERNEL_PL, _STATUS,)
+#define INT_INTCTRL_K \
+	_concat4(INT_INTCTRL_, CONFIG_KERNEL_PL,,)
+
+#endif /* __KERNEL__ */
diff --git a/arch/tile/include/arch/spr_def_32.h b/arch/tile/include/arch/spr_def_32.h
new file mode 100644
index 00000000..bbc1f4c9
--- /dev/null
+++ b/arch/tile/include/arch/spr_def_32.h
@@ -0,0 +1,201 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef __DOXYGEN__
+
+#ifndef __ARCH_SPR_DEF_H__
+#define __ARCH_SPR_DEF_H__
+
+#define SPR_AUX_PERF_COUNT_0 0x6005
+#define SPR_AUX_PERF_COUNT_1 0x6006
+#define SPR_AUX_PERF_COUNT_CTL 0x6007
+#define SPR_AUX_PERF_COUNT_STS 0x6008
+#define SPR_CYCLE_HIGH 0x4e06
+#define SPR_CYCLE_LOW 0x4e07
+#define SPR_DMA_BYTE 0x3900
+#define SPR_DMA_CHUNK_SIZE 0x3901
+#define SPR_DMA_CTR 0x3902
+#define SPR_DMA_CTR__REQUEST_MASK  0x1
+#define SPR_DMA_CTR__SUSPEND_MASK  0x2
+#define SPR_DMA_DST_ADDR 0x3903
+#define SPR_DMA_DST_CHUNK_ADDR 0x3904
+#define SPR_DMA_SRC_ADDR 0x3905
+#define SPR_DMA_SRC_CHUNK_ADDR 0x3906
+#define SPR_DMA_STATUS__DONE_MASK  0x1
+#define SPR_DMA_STATUS__BUSY_MASK  0x2
+#define SPR_DMA_STATUS__RUNNING_MASK  0x10
+#define SPR_DMA_STRIDE 0x3907
+#define SPR_DMA_USER_STATUS 0x3908
+#define SPR_DONE 0x4e08
+#define SPR_EVENT_BEGIN 0x4e0d
+#define SPR_EVENT_END 0x4e0e
+#define SPR_EX_CONTEXT_0_0 0x4a05
+#define SPR_EX_CONTEXT_0_1 0x4a06
+#define SPR_EX_CONTEXT_0_1__PL_SHIFT 0
+#define SPR_EX_CONTEXT_0_1__PL_RMASK 0x3
+#define SPR_EX_CONTEXT_0_1__PL_MASK  0x3
+#define SPR_EX_CONTEXT_0_1__ICS_SHIFT 2
+#define SPR_EX_CONTEXT_0_1__ICS_RMASK 0x1
+#define SPR_EX_CONTEXT_0_1__ICS_MASK  0x4
+#define SPR_EX_CONTEXT_1_0 0x4805
+#define SPR_EX_CONTEXT_1_1 0x4806
+#define SPR_EX_CONTEXT_1_1__PL_SHIFT 0
+#define SPR_EX_CONTEXT_1_1__PL_RMASK 0x3
+#define SPR_EX_CONTEXT_1_1__PL_MASK  0x3
+#define SPR_EX_CONTEXT_1_1__ICS_SHIFT 2
+#define SPR_EX_CONTEXT_1_1__ICS_RMASK 0x1
+#define SPR_EX_CONTEXT_1_1__ICS_MASK  0x4
+#define SPR_EX_CONTEXT_2_0 0x4605
+#define SPR_EX_CONTEXT_2_1 0x4606
+#define SPR_EX_CONTEXT_2_1__PL_SHIFT 0
+#define SPR_EX_CONTEXT_2_1__PL_RMASK 0x3
+#define SPR_EX_CONTEXT_2_1__PL_MASK  0x3
+#define SPR_EX_CONTEXT_2_1__ICS_SHIFT 2
+#define SPR_EX_CONTEXT_2_1__ICS_RMASK 0x1
+#define SPR_EX_CONTEXT_2_1__ICS_MASK  0x4
+#define SPR_FAIL 0x4e09
+#define SPR_INTCTRL_0_STATUS 0x4a07
+#define SPR_INTCTRL_1_STATUS 0x4807
+#define SPR_INTCTRL_2_STATUS 0x4607
+#define SPR_INTERRUPT_CRITICAL_SECTION 0x4e0a
+#define SPR_INTERRUPT_MASK_0_0 0x4a08
+#define SPR_INTERRUPT_MASK_0_1 0x4a09
+#define SPR_INTERRUPT_MASK_1_0 0x4809
+#define SPR_INTERRUPT_MASK_1_1 0x480a
+#define SPR_INTERRUPT_MASK_2_0 0x4608
+#define SPR_INTERRUPT_MASK_2_1 0x4609
+#define SPR_INTERRUPT_MASK_RESET_0_0 0x4a0a
+#define SPR_INTERRUPT_MASK_RESET_0_1 0x4a0b
+#define SPR_INTERRUPT_MASK_RESET_1_0 0x480b
+#define SPR_INTERRUPT_MASK_RESET_1_1 0x480c
+#define SPR_INTERRUPT_MASK_RESET_2_0 0x460a
+#define SPR_INTERRUPT_MASK_RESET_2_1 0x460b
+#define SPR_INTERRUPT_MASK_SET_0_0 0x4a0c
+#define SPR_INTERRUPT_MASK_SET_0_1 0x4a0d
+#define SPR_INTERRUPT_MASK_SET_1_0 0x480d
+#define SPR_INTERRUPT_MASK_SET_1_1 0x480e
+#define SPR_INTERRUPT_MASK_SET_2_0 0x460c
+#define SPR_INTERRUPT_MASK_SET_2_1 0x460d
+#define SPR_MPL_DMA_CPL_SET_0 0x5800
+#define SPR_MPL_DMA_CPL_SET_1 0x5801
+#define SPR_MPL_DMA_CPL_SET_2 0x5802
+#define SPR_MPL_DMA_NOTIFY_SET_0 0x3800
+#define SPR_MPL_DMA_NOTIFY_SET_1 0x3801
+#define SPR_MPL_DMA_NOTIFY_SET_2 0x3802
+#define SPR_MPL_INTCTRL_0_SET_0 0x4a00
+#define SPR_MPL_INTCTRL_0_SET_1 0x4a01
+#define SPR_MPL_INTCTRL_0_SET_2 0x4a02
+#define SPR_MPL_INTCTRL_1_SET_0 0x4800
+#define SPR_MPL_INTCTRL_1_SET_1 0x4801
+#define SPR_MPL_INTCTRL_1_SET_2 0x4802
+#define SPR_MPL_INTCTRL_2_SET_0 0x4600
+#define SPR_MPL_INTCTRL_2_SET_1 0x4601
+#define SPR_MPL_INTCTRL_2_SET_2 0x4602
+#define SPR_MPL_SN_ACCESS_SET_0 0x0800
+#define SPR_MPL_SN_ACCESS_SET_1 0x0801
+#define SPR_MPL_SN_ACCESS_SET_2 0x0802
+#define SPR_MPL_SN_CPL_SET_0 0x5a00
+#define SPR_MPL_SN_CPL_SET_1 0x5a01
+#define SPR_MPL_SN_CPL_SET_2 0x5a02
+#define SPR_MPL_SN_FIREWALL_SET_0 0x2c00
+#define SPR_MPL_SN_FIREWALL_SET_1 0x2c01
+#define SPR_MPL_SN_FIREWALL_SET_2 0x2c02
+#define SPR_MPL_SN_NOTIFY_SET_0 0x2a00
+#define SPR_MPL_SN_NOTIFY_SET_1 0x2a01
+#define SPR_MPL_SN_NOTIFY_SET_2 0x2a02
+#define SPR_MPL_UDN_ACCESS_SET_0 0x0c00
+#define SPR_MPL_UDN_ACCESS_SET_1 0x0c01
+#define SPR_MPL_UDN_ACCESS_SET_2 0x0c02
+#define SPR_MPL_UDN_AVAIL_SET_0 0x4000
+#define SPR_MPL_UDN_AVAIL_SET_1 0x4001
+#define SPR_MPL_UDN_AVAIL_SET_2 0x4002
+#define SPR_MPL_UDN_CA_SET_0 0x3c00
+#define SPR_MPL_UDN_CA_SET_1 0x3c01
+#define SPR_MPL_UDN_CA_SET_2 0x3c02
+#define SPR_MPL_UDN_COMPLETE_SET_0 0x1400
+#define SPR_MPL_UDN_COMPLETE_SET_1 0x1401
+#define SPR_MPL_UDN_COMPLETE_SET_2 0x1402
+#define SPR_MPL_UDN_FIREWALL_SET_0 0x3000
+#define SPR_MPL_UDN_FIREWALL_SET_1 0x3001
+#define SPR_MPL_UDN_FIREWALL_SET_2 0x3002
+#define SPR_MPL_UDN_REFILL_SET_0 0x1000
+#define SPR_MPL_UDN_REFILL_SET_1 0x1001
+#define SPR_MPL_UDN_REFILL_SET_2 0x1002
+#define SPR_MPL_UDN_TIMER_SET_0 0x3600
+#define SPR_MPL_UDN_TIMER_SET_1 0x3601
+#define SPR_MPL_UDN_TIMER_SET_2 0x3602
+#define SPR_MPL_WORLD_ACCESS_SET_0 0x4e00
+#define SPR_MPL_WORLD_ACCESS_SET_1 0x4e01
+#define SPR_MPL_WORLD_ACCESS_SET_2 0x4e02
+#define SPR_PASS 0x4e0b
+#define SPR_PERF_COUNT_0 0x4205
+#define SPR_PERF_COUNT_1 0x4206
+#define SPR_PERF_COUNT_CTL 0x4207
+#define SPR_PERF_COUNT_DN_CTL 0x4210
+#define SPR_PERF_COUNT_STS 0x4208
+#define SPR_PROC_STATUS 0x4f00
+#define SPR_SIM_CONTROL 0x4e0c
+#define SPR_SNCTL 0x0805
+#define SPR_SNCTL__FRZFABRIC_MASK  0x1
+#define SPR_SNCTL__FRZPROC_MASK  0x2
+#define SPR_SNPC 0x080b
+#define SPR_SNSTATIC 0x080c
+#define SPR_SYSTEM_SAVE_0_0 0x4b00
+#define SPR_SYSTEM_SAVE_0_1 0x4b01
+#define SPR_SYSTEM_SAVE_0_2 0x4b02
+#define SPR_SYSTEM_SAVE_0_3 0x4b03
+#define SPR_SYSTEM_SAVE_1_0 0x4900
+#define SPR_SYSTEM_SAVE_1_1 0x4901
+#define SPR_SYSTEM_SAVE_1_2 0x4902
+#define SPR_SYSTEM_SAVE_1_3 0x4903
+#define SPR_SYSTEM_SAVE_2_0 0x4700
+#define SPR_SYSTEM_SAVE_2_1 0x4701
+#define SPR_SYSTEM_SAVE_2_2 0x4702
+#define SPR_SYSTEM_SAVE_2_3 0x4703
+#define SPR_TILE_COORD 0x4c17
+#define SPR_TILE_RTF_HWM 0x4e10
+#define SPR_TILE_TIMER_CONTROL 0x3205
+#define SPR_TILE_WRITE_PENDING 0x4e0f
+#define SPR_UDN_AVAIL_EN 0x4005
+#define SPR_UDN_CA_DATA 0x0d00
+#define SPR_UDN_DATA_AVAIL 0x0d03
+#define SPR_UDN_DEADLOCK_TIMEOUT 0x3606
+#define SPR_UDN_DEMUX_CA_COUNT 0x0c05
+#define SPR_UDN_DEMUX_COUNT_0 0x0c06
+#define SPR_UDN_DEMUX_COUNT_1 0x0c07
+#define SPR_UDN_DEMUX_COUNT_2 0x0c08
+#define SPR_UDN_DEMUX_COUNT_3 0x0c09
+#define SPR_UDN_DEMUX_CTL 0x0c0a
+#define SPR_UDN_DEMUX_QUEUE_SEL 0x0c0c
+#define SPR_UDN_DEMUX_STATUS 0x0c0d
+#define SPR_UDN_DEMUX_WRITE_FIFO 0x0c0e
+#define SPR_UDN_DIRECTION_PROTECT 0x3005
+#define SPR_UDN_REFILL_EN 0x1005
+#define SPR_UDN_SP_FIFO_DATA 0x0c11
+#define SPR_UDN_SP_FIFO_SEL 0x0c12
+#define SPR_UDN_SP_FREEZE 0x0c13
+#define SPR_UDN_SP_FREEZE__SP_FRZ_MASK  0x1
+#define SPR_UDN_SP_FREEZE__DEMUX_FRZ_MASK  0x2
+#define SPR_UDN_SP_FREEZE__NON_DEST_EXT_MASK  0x4
+#define SPR_UDN_SP_STATE 0x0c14
+#define SPR_UDN_TAG_0 0x0c15
+#define SPR_UDN_TAG_1 0x0c16
+#define SPR_UDN_TAG_2 0x0c17
+#define SPR_UDN_TAG_3 0x0c18
+#define SPR_UDN_TAG_VALID 0x0c19
+#define SPR_UDN_TILE_COORD 0x0c1a
+
+#endif /* !defined(__ARCH_SPR_DEF_H__) */
+
+#endif /* !defined(__DOXYGEN__) */
diff --git a/arch/tile/include/arch/spr_def_64.h b/arch/tile/include/arch/spr_def_64.h
new file mode 100644
index 00000000..cd3e5f95
--- /dev/null
+++ b/arch/tile/include/arch/spr_def_64.h
@@ -0,0 +1,173 @@
+/*
+ * Copyright 2011 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef __DOXYGEN__
+
+#ifndef __ARCH_SPR_DEF_H__
+#define __ARCH_SPR_DEF_H__
+
+#define SPR_AUX_PERF_COUNT_0 0x2105
+#define SPR_AUX_PERF_COUNT_1 0x2106
+#define SPR_AUX_PERF_COUNT_CTL 0x2107
+#define SPR_AUX_PERF_COUNT_STS 0x2108
+#define SPR_CMPEXCH_VALUE 0x2780
+#define SPR_CYCLE 0x2781
+#define SPR_DONE 0x2705
+#define SPR_DSTREAM_PF 0x2706
+#define SPR_EVENT_BEGIN 0x2782
+#define SPR_EVENT_END 0x2783
+#define SPR_EX_CONTEXT_0_0 0x2580
+#define SPR_EX_CONTEXT_0_1 0x2581
+#define SPR_EX_CONTEXT_0_1__PL_SHIFT 0
+#define SPR_EX_CONTEXT_0_1__PL_RMASK 0x3
+#define SPR_EX_CONTEXT_0_1__PL_MASK  0x3
+#define SPR_EX_CONTEXT_0_1__ICS_SHIFT 2
+#define SPR_EX_CONTEXT_0_1__ICS_RMASK 0x1
+#define SPR_EX_CONTEXT_0_1__ICS_MASK  0x4
+#define SPR_EX_CONTEXT_1_0 0x2480
+#define SPR_EX_CONTEXT_1_1 0x2481
+#define SPR_EX_CONTEXT_1_1__PL_SHIFT 0
+#define SPR_EX_CONTEXT_1_1__PL_RMASK 0x3
+#define SPR_EX_CONTEXT_1_1__PL_MASK  0x3
+#define SPR_EX_CONTEXT_1_1__ICS_SHIFT 2
+#define SPR_EX_CONTEXT_1_1__ICS_RMASK 0x1
+#define SPR_EX_CONTEXT_1_1__ICS_MASK  0x4
+#define SPR_EX_CONTEXT_2_0 0x2380
+#define SPR_EX_CONTEXT_2_1 0x2381
+#define SPR_EX_CONTEXT_2_1__PL_SHIFT 0
+#define SPR_EX_CONTEXT_2_1__PL_RMASK 0x3
+#define SPR_EX_CONTEXT_2_1__PL_MASK  0x3
+#define SPR_EX_CONTEXT_2_1__ICS_SHIFT 2
+#define SPR_EX_CONTEXT_2_1__ICS_RMASK 0x1
+#define SPR_EX_CONTEXT_2_1__ICS_MASK  0x4
+#define SPR_FAIL 0x2707
+#define SPR_ILL_TRANS_REASON__I_STREAM_VA_RMASK 0x1
+#define SPR_INTCTRL_0_STATUS 0x2505
+#define SPR_INTCTRL_1_STATUS 0x2405
+#define SPR_INTCTRL_2_STATUS 0x2305
+#define SPR_INTERRUPT_CRITICAL_SECTION 0x2708
+#define SPR_INTERRUPT_MASK_0 0x2506
+#define SPR_INTERRUPT_MASK_1 0x2406
+#define SPR_INTERRUPT_MASK_2 0x2306
+#define SPR_INTERRUPT_MASK_RESET_0 0x2507
+#define SPR_INTERRUPT_MASK_RESET_1 0x2407
+#define SPR_INTERRUPT_MASK_RESET_2 0x2307
+#define SPR_INTERRUPT_MASK_SET_0 0x2508
+#define SPR_INTERRUPT_MASK_SET_1 0x2408
+#define SPR_INTERRUPT_MASK_SET_2 0x2308
+#define SPR_INTERRUPT_VECTOR_BASE_0 0x2509
+#define SPR_INTERRUPT_VECTOR_BASE_1 0x2409
+#define SPR_INTERRUPT_VECTOR_BASE_2 0x2309
+#define SPR_INTERRUPT_VECTOR_BASE_3 0x2209
+#define SPR_IPI_EVENT_0 0x1f05
+#define SPR_IPI_EVENT_1 0x1e05
+#define SPR_IPI_EVENT_2 0x1d05
+#define SPR_IPI_EVENT_RESET_0 0x1f06
+#define SPR_IPI_EVENT_RESET_1 0x1e06
+#define SPR_IPI_EVENT_RESET_2 0x1d06
+#define SPR_IPI_EVENT_SET_0 0x1f07
+#define SPR_IPI_EVENT_SET_1 0x1e07
+#define SPR_IPI_EVENT_SET_2 0x1d07
+#define SPR_IPI_MASK_0 0x1f08
+#define SPR_IPI_MASK_1 0x1e08
+#define SPR_IPI_MASK_2 0x1d08
+#define SPR_IPI_MASK_RESET_0 0x1f09
+#define SPR_IPI_MASK_RESET_1 0x1e09
+#define SPR_IPI_MASK_RESET_2 0x1d09
+#define SPR_IPI_MASK_SET_0 0x1f0a
+#define SPR_IPI_MASK_SET_1 0x1e0a
+#define SPR_IPI_MASK_SET_2 0x1d0a
+#define SPR_MPL_AUX_TILE_TIMER_SET_0 0x1700
+#define SPR_MPL_AUX_TILE_TIMER_SET_1 0x1701
+#define SPR_MPL_AUX_TILE_TIMER_SET_2 0x1702
+#define SPR_MPL_INTCTRL_0_SET_0 0x2500
+#define SPR_MPL_INTCTRL_0_SET_1 0x2501
+#define SPR_MPL_INTCTRL_0_SET_2 0x2502
+#define SPR_MPL_INTCTRL_1_SET_0 0x2400
+#define SPR_MPL_INTCTRL_1_SET_1 0x2401
+#define SPR_MPL_INTCTRL_1_SET_2 0x2402
+#define SPR_MPL_INTCTRL_2_SET_0 0x2300
+#define SPR_MPL_INTCTRL_2_SET_1 0x2301
+#define SPR_MPL_INTCTRL_2_SET_2 0x2302
+#define SPR_MPL_UDN_ACCESS_SET_0 0x0b00
+#define SPR_MPL_UDN_ACCESS_SET_1 0x0b01
+#define SPR_MPL_UDN_ACCESS_SET_2 0x0b02
+#define SPR_MPL_UDN_AVAIL_SET_0 0x1b00
+#define SPR_MPL_UDN_AVAIL_SET_1 0x1b01
+#define SPR_MPL_UDN_AVAIL_SET_2 0x1b02
+#define SPR_MPL_UDN_COMPLETE_SET_0 0x0600
+#define SPR_MPL_UDN_COMPLETE_SET_1 0x0601
+#define SPR_MPL_UDN_COMPLETE_SET_2 0x0602
+#define SPR_MPL_UDN_FIREWALL_SET_0 0x1500
+#define SPR_MPL_UDN_FIREWALL_SET_1 0x1501
+#define SPR_MPL_UDN_FIREWALL_SET_2 0x1502
+#define SPR_MPL_UDN_TIMER_SET_0 0x1900
+#define SPR_MPL_UDN_TIMER_SET_1 0x1901
+#define SPR_MPL_UDN_TIMER_SET_2 0x1902
+#define SPR_MPL_WORLD_ACCESS_SET_0 0x2700
+#define SPR_MPL_WORLD_ACCESS_SET_1 0x2701
+#define SPR_MPL_WORLD_ACCESS_SET_2 0x2702
+#define SPR_PASS 0x2709
+#define SPR_PERF_COUNT_0 0x2005
+#define SPR_PERF_COUNT_1 0x2006
+#define SPR_PERF_COUNT_CTL 0x2007
+#define SPR_PERF_COUNT_DN_CTL 0x2008
+#define SPR_PERF_COUNT_STS 0x2009
+#define SPR_PROC_STATUS 0x2784
+#define SPR_SIM_CONTROL 0x2785
+#define SPR_SINGLE_STEP_CONTROL_0 0x0405
+#define SPR_SINGLE_STEP_CONTROL_0__CANCELED_MASK  0x1
+#define SPR_SINGLE_STEP_CONTROL_0__INHIBIT_MASK  0x2
+#define SPR_SINGLE_STEP_CONTROL_1 0x0305
+#define SPR_SINGLE_STEP_CONTROL_1__CANCELED_MASK  0x1
+#define SPR_SINGLE_STEP_CONTROL_1__INHIBIT_MASK  0x2
+#define SPR_SINGLE_STEP_CONTROL_2 0x0205
+#define SPR_SINGLE_STEP_CONTROL_2__CANCELED_MASK  0x1
+#define SPR_SINGLE_STEP_CONTROL_2__INHIBIT_MASK  0x2
+#define SPR_SINGLE_STEP_EN_0_0 0x250a
+#define SPR_SINGLE_STEP_EN_0_1 0x240a
+#define SPR_SINGLE_STEP_EN_0_2 0x230a
+#define SPR_SINGLE_STEP_EN_1_0 0x250b
+#define SPR_SINGLE_STEP_EN_1_1 0x240b
+#define SPR_SINGLE_STEP_EN_1_2 0x230b
+#define SPR_SINGLE_STEP_EN_2_0 0x250c
+#define SPR_SINGLE_STEP_EN_2_1 0x240c
+#define SPR_SINGLE_STEP_EN_2_2 0x230c
+#define SPR_SYSTEM_SAVE_0_0 0x2582
+#define SPR_SYSTEM_SAVE_0_1 0x2583
+#define SPR_SYSTEM_SAVE_0_2 0x2584
+#define SPR_SYSTEM_SAVE_0_3 0x2585
+#define SPR_SYSTEM_SAVE_1_0 0x2482
+#define SPR_SYSTEM_SAVE_1_1 0x2483
+#define SPR_SYSTEM_SAVE_1_2 0x2484
+#define SPR_SYSTEM_SAVE_1_3 0x2485
+#define SPR_SYSTEM_SAVE_2_0 0x2382
+#define SPR_SYSTEM_SAVE_2_1 0x2383
+#define SPR_SYSTEM_SAVE_2_2 0x2384
+#define SPR_SYSTEM_SAVE_2_3 0x2385
+#define SPR_TILE_COORD 0x270b
+#define SPR_TILE_RTF_HWM 0x270c
+#define SPR_TILE_TIMER_CONTROL 0x1605
+#define SPR_UDN_AVAIL_EN 0x1b05
+#define SPR_UDN_DATA_AVAIL 0x0b80
+#define SPR_UDN_DEADLOCK_TIMEOUT 0x1906
+#define SPR_UDN_DEMUX_COUNT_0 0x0b05
+#define SPR_UDN_DEMUX_COUNT_1 0x0b06
+#define SPR_UDN_DEMUX_COUNT_2 0x0b07
+#define SPR_UDN_DEMUX_COUNT_3 0x0b08
+#define SPR_UDN_DIRECTION_PROTECT 0x1505
+
+#endif /* !defined(__ARCH_SPR_DEF_H__) */
+
+#endif /* !defined(__DOXYGEN__) */
diff --git a/arch/tile/include/asm/Kbuild b/arch/tile/include/asm/Kbuild
new file mode 100644
index 00000000..0bb42642
--- /dev/null
+++ b/arch/tile/include/asm/Kbuild
@@ -0,0 +1,44 @@
+include include/asm-generic/Kbuild.asm
+
+header-y += ../arch/
+
+header-y += ucontext.h
+header-y += hardwall.h
+
+generic-y += bug.h
+generic-y += bugs.h
+generic-y += cputime.h
+generic-y += device.h
+generic-y += div64.h
+generic-y += emergency-restart.h
+generic-y += errno.h
+generic-y += fb.h
+generic-y += fcntl.h
+generic-y += ioctl.h
+generic-y += ioctls.h
+generic-y += ipc.h
+generic-y += ipcbuf.h
+generic-y += irq_regs.h
+generic-y += kdebug.h
+generic-y += local.h
+generic-y += module.h
+generic-y += msgbuf.h
+generic-y += mutex.h
+generic-y += param.h
+generic-y += parport.h
+generic-y += poll.h
+generic-y += posix_types.h
+generic-y += resource.h
+generic-y += scatterlist.h
+generic-y += sembuf.h
+generic-y += serial.h
+generic-y += shmbuf.h
+generic-y += shmparam.h
+generic-y += socket.h
+generic-y += sockios.h
+generic-y += statfs.h
+generic-y += termbits.h
+generic-y += termios.h
+generic-y += types.h
+generic-y += ucontext.h
+generic-y += xor.h
diff --git a/arch/tile/include/asm/asm-offsets.h b/arch/tile/include/asm/asm-offsets.h
new file mode 100644
index 00000000..d370ee36
--- /dev/null
+++ b/arch/tile/include/asm/asm-offsets.h
@@ -0,0 +1 @@
+#include <generated/asm-offsets.h>
diff --git a/arch/tile/include/asm/atomic.h b/arch/tile/include/asm/atomic.h
new file mode 100644
index 00000000..f2461429
--- /dev/null
+++ b/arch/tile/include/asm/atomic.h
@@ -0,0 +1,134 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ *
+ * Atomic primitives.
+ */
+
+#ifndef _ASM_TILE_ATOMIC_H
+#define _ASM_TILE_ATOMIC_H
+
+#include <asm/cmpxchg.h>
+
+#ifndef __ASSEMBLY__
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+
+#define ATOMIC_INIT(i)	{ (i) }
+
+/**
+ * atomic_read - read atomic variable
+ * @v: pointer of type atomic_t
+ *
+ * Atomically reads the value of @v.
+ */
+static inline int atomic_read(const atomic_t *v)
+{
+	return ACCESS_ONCE(v->counter);
+}
+
+/**
+ * atomic_sub_return - subtract integer and return
+ * @v: pointer of type atomic_t
+ * @i: integer value to subtract
+ *
+ * Atomically subtracts @i from @v and returns @v - @i
+ */
+#define atomic_sub_return(i, v)		atomic_add_return((int)(-(i)), (v))
+
+/**
+ * atomic_sub - subtract integer from atomic variable
+ * @i: integer value to subtract
+ * @v: pointer of type atomic_t
+ *
+ * Atomically subtracts @i from @v.
+ */
+#define atomic_sub(i, v)		atomic_add((int)(-(i)), (v))
+
+/**
+ * atomic_sub_and_test - subtract value from variable and test result
+ * @i: integer value to subtract
+ * @v: pointer of type atomic_t
+ *
+ * Atomically subtracts @i from @v and returns true if the result is
+ * zero, or false for all other cases.
+ */
+#define atomic_sub_and_test(i, v)	(atomic_sub_return((i), (v)) == 0)
+
+/**
+ * atomic_inc_return - increment memory and return
+ * @v: pointer of type atomic_t
+ *
+ * Atomically increments @v by 1 and returns the new value.
+ */
+#define atomic_inc_return(v)		atomic_add_return(1, (v))
+
+/**
+ * atomic_dec_return - decrement memory and return
+ * @v: pointer of type atomic_t
+ *
+ * Atomically decrements @v by 1 and returns the new value.
+ */
+#define atomic_dec_return(v)		atomic_sub_return(1, (v))
+
+/**
+ * atomic_inc - increment atomic variable
+ * @v: pointer of type atomic_t
+ *
+ * Atomically increments @v by 1.
+ */
+#define atomic_inc(v)			atomic_add(1, (v))
+
+/**
+ * atomic_dec - decrement atomic variable
+ * @v: pointer of type atomic_t
+ *
+ * Atomically decrements @v by 1.
+ */
+#define atomic_dec(v)			atomic_sub(1, (v))
+
+/**
+ * atomic_dec_and_test - decrement and test
+ * @v: pointer of type atomic_t
+ *
+ * Atomically decrements @v by 1 and returns true if the result is 0.
+ */
+#define atomic_dec_and_test(v)		(atomic_dec_return(v) == 0)
+
+/**
+ * atomic_inc_and_test - increment and test
+ * @v: pointer of type atomic_t
+ *
+ * Atomically increments @v by 1 and returns true if the result is 0.
+ */
+#define atomic_inc_and_test(v)		(atomic_inc_return(v) == 0)
+
+/**
+ * atomic_add_negative - add and test if negative
+ * @v: pointer of type atomic_t
+ * @i: integer value to add
+ *
+ * Atomically adds @i to @v and returns true if the result is
+ * negative, or false when result is greater than or equal to zero.
+ */
+#define atomic_add_negative(i, v)	(atomic_add_return((i), (v)) < 0)
+
+#endif /* __ASSEMBLY__ */
+
+#ifndef __tilegx__
+#include <asm/atomic_32.h>
+#else
+#include <asm/atomic_64.h>
+#endif
+
+#endif /* _ASM_TILE_ATOMIC_H */
diff --git a/arch/tile/include/asm/atomic_32.h b/arch/tile/include/asm/atomic_32.h
new file mode 100644
index 00000000..54d1da82
--- /dev/null
+++ b/arch/tile/include/asm/atomic_32.h
@@ -0,0 +1,324 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ *
+ * Do not include directly; use <linux/atomic.h>.
+ */
+
+#ifndef _ASM_TILE_ATOMIC_32_H
+#define _ASM_TILE_ATOMIC_32_H
+
+#include <asm/barrier.h>
+#include <arch/chip.h>
+
+#ifndef __ASSEMBLY__
+
+/* Tile-specific routines to support <linux/atomic.h>. */
+int _atomic_xchg(atomic_t *v, int n);
+int _atomic_xchg_add(atomic_t *v, int i);
+int _atomic_xchg_add_unless(atomic_t *v, int a, int u);
+int _atomic_cmpxchg(atomic_t *v, int o, int n);
+
+/**
+ * atomic_xchg - atomically exchange contents of memory with a new value
+ * @v: pointer of type atomic_t
+ * @i: integer value to store in memory
+ *
+ * Atomically sets @v to @i and returns old @v
+ */
+static inline int atomic_xchg(atomic_t *v, int n)
+{
+	smp_mb();  /* barrier for proper semantics */
+	return _atomic_xchg(v, n);
+}
+
+/**
+ * atomic_cmpxchg - atomically exchange contents of memory if it matches
+ * @v: pointer of type atomic_t
+ * @o: old value that memory should have
+ * @n: new value to write to memory if it matches
+ *
+ * Atomically checks if @v holds @o and replaces it with @n if so.
+ * Returns the old value at @v.
+ */
+static inline int atomic_cmpxchg(atomic_t *v, int o, int n)
+{
+	smp_mb();  /* barrier for proper semantics */
+	return _atomic_cmpxchg(v, o, n);
+}
+
+/**
+ * atomic_add - add integer to atomic variable
+ * @i: integer value to add
+ * @v: pointer of type atomic_t
+ *
+ * Atomically adds @i to @v.
+ */
+static inline void atomic_add(int i, atomic_t *v)
+{
+	_atomic_xchg_add(v, i);
+}
+
+/**
+ * atomic_add_return - add integer and return
+ * @v: pointer of type atomic_t
+ * @i: integer value to add
+ *
+ * Atomically adds @i to @v and returns @i + @v
+ */
+static inline int atomic_add_return(int i, atomic_t *v)
+{
+	smp_mb();  /* barrier for proper semantics */
+	return _atomic_xchg_add(v, i) + i;
+}
+
+/**
+ * __atomic_add_unless - add unless the number is already a given value
+ * @v: pointer of type atomic_t
+ * @a: the amount to add to v...
+ * @u: ...unless v is equal to u.
+ *
+ * Atomically adds @a to @v, so long as @v was not already @u.
+ * Returns the old value of @v.
+ */
+static inline int __atomic_add_unless(atomic_t *v, int a, int u)
+{
+	smp_mb();  /* barrier for proper semantics */
+	return _atomic_xchg_add_unless(v, a, u);
+}
+
+/**
+ * atomic_set - set atomic variable
+ * @v: pointer of type atomic_t
+ * @i: required value
+ *
+ * Atomically sets the value of @v to @i.
+ *
+ * atomic_set() can't be just a raw store, since it would be lost if it
+ * fell between the load and store of one of the other atomic ops.
+ */
+static inline void atomic_set(atomic_t *v, int n)
+{
+	_atomic_xchg(v, n);
+}
+
+/* A 64bit atomic type */
+
+typedef struct {
+	u64 __aligned(8) counter;
+} atomic64_t;
+
+#define ATOMIC64_INIT(val) { (val) }
+
+u64 _atomic64_xchg(atomic64_t *v, u64 n);
+u64 _atomic64_xchg_add(atomic64_t *v, u64 i);
+u64 _atomic64_xchg_add_unless(atomic64_t *v, u64 a, u64 u);
+u64 _atomic64_cmpxchg(atomic64_t *v, u64 o, u64 n);
+
+/**
+ * atomic64_read - read atomic variable
+ * @v: pointer of type atomic64_t
+ *
+ * Atomically reads the value of @v.
+ */
+static inline u64 atomic64_read(const atomic64_t *v)
+{
+	/*
+	 * Requires an atomic op to read both 32-bit parts consistently.
+	 * Casting away const is safe since the atomic support routines
+	 * do not write to memory if the value has not been modified.
+	 */
+	return _atomic64_xchg_add((atomic64_t *)v, 0);
+}
+
+/**
+ * atomic64_xchg - atomically exchange contents of memory with a new value
+ * @v: pointer of type atomic64_t
+ * @i: integer value to store in memory
+ *
+ * Atomically sets @v to @i and returns old @v
+ */
+static inline u64 atomic64_xchg(atomic64_t *v, u64 n)
+{
+	smp_mb();  /* barrier for proper semantics */
+	return _atomic64_xchg(v, n);
+}
+
+/**
+ * atomic64_cmpxchg - atomically exchange contents of memory if it matches
+ * @v: pointer of type atomic64_t
+ * @o: old value that memory should have
+ * @n: new value to write to memory if it matches
+ *
+ * Atomically checks if @v holds @o and replaces it with @n if so.
+ * Returns the old value at @v.
+ */
+static inline u64 atomic64_cmpxchg(atomic64_t *v, u64 o, u64 n)
+{
+	smp_mb();  /* barrier for proper semantics */
+	return _atomic64_cmpxchg(v, o, n);
+}
+
+/**
+ * atomic64_add - add integer to atomic variable
+ * @i: integer value to add
+ * @v: pointer of type atomic64_t
+ *
+ * Atomically adds @i to @v.
+ */
+static inline void atomic64_add(u64 i, atomic64_t *v)
+{
+	_atomic64_xchg_add(v, i);
+}
+
+/**
+ * atomic64_add_return - add integer and return
+ * @v: pointer of type atomic64_t
+ * @i: integer value to add
+ *
+ * Atomically adds @i to @v and returns @i + @v
+ */
+static inline u64 atomic64_add_return(u64 i, atomic64_t *v)
+{
+	smp_mb();  /* barrier for proper semantics */
+	return _atomic64_xchg_add(v, i) + i;
+}
+
+/**
+ * atomic64_add_unless - add unless the number is already a given value
+ * @v: pointer of type atomic64_t
+ * @a: the amount to add to v...
+ * @u: ...unless v is equal to u.
+ *
+ * Atomically adds @a to @v, so long as @v was not already @u.
+ * Returns non-zero if @v was not @u, and zero otherwise.
+ */
+static inline u64 atomic64_add_unless(atomic64_t *v, u64 a, u64 u)
+{
+	smp_mb();  /* barrier for proper semantics */
+	return _atomic64_xchg_add_unless(v, a, u) != u;
+}
+
+/**
+ * atomic64_set - set atomic variable
+ * @v: pointer of type atomic64_t
+ * @i: required value
+ *
+ * Atomically sets the value of @v to @i.
+ *
+ * atomic64_set() can't be just a raw store, since it would be lost if it
+ * fell between the load and store of one of the other atomic ops.
+ */
+static inline void atomic64_set(atomic64_t *v, u64 n)
+{
+	_atomic64_xchg(v, n);
+}
+
+#define atomic64_add_negative(a, v)	(atomic64_add_return((a), (v)) < 0)
+#define atomic64_inc(v)			atomic64_add(1LL, (v))
+#define atomic64_inc_return(v)		atomic64_add_return(1LL, (v))
+#define atomic64_inc_and_test(v)	(atomic64_inc_return(v) == 0)
+#define atomic64_sub_return(i, v)	atomic64_add_return(-(i), (v))
+#define atomic64_sub_and_test(a, v)	(atomic64_sub_return((a), (v)) == 0)
+#define atomic64_sub(i, v)		atomic64_add(-(i), (v))
+#define atomic64_dec(v)			atomic64_sub(1LL, (v))
+#define atomic64_dec_return(v)		atomic64_sub_return(1LL, (v))
+#define atomic64_dec_and_test(v)	(atomic64_dec_return((v)) == 0)
+#define atomic64_inc_not_zero(v)	atomic64_add_unless((v), 1LL, 0LL)
+
+/*
+ * We need to barrier before modifying the word, since the _atomic_xxx()
+ * routines just tns the lock and then read/modify/write of the word.
+ * But after the word is updated, the routine issues an "mf" before returning,
+ * and since it's a function call, we don't even need a compiler barrier.
+ */
+#define smp_mb__before_atomic_dec()	smp_mb()
+#define smp_mb__before_atomic_inc()	smp_mb()
+#define smp_mb__after_atomic_dec()	do { } while (0)
+#define smp_mb__after_atomic_inc()	do { } while (0)
+
+#endif /* !__ASSEMBLY__ */
+
+/*
+ * Internal definitions only beyond this point.
+ */
+
+#define ATOMIC_LOCKS_FOUND_VIA_TABLE() \
+  (!CHIP_HAS_CBOX_HOME_MAP() && defined(CONFIG_SMP))
+
+#if ATOMIC_LOCKS_FOUND_VIA_TABLE()
+
+/* Number of entries in atomic_lock_ptr[]. */
+#define ATOMIC_HASH_L1_SHIFT 6
+#define ATOMIC_HASH_L1_SIZE (1 << ATOMIC_HASH_L1_SHIFT)
+
+/* Number of locks in each struct pointed to by atomic_lock_ptr[]. */
+#define ATOMIC_HASH_L2_SHIFT (CHIP_L2_LOG_LINE_SIZE() - 2)
+#define ATOMIC_HASH_L2_SIZE (1 << ATOMIC_HASH_L2_SHIFT)
+
+#else /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */
+
+/*
+ * Number of atomic locks in atomic_locks[]. Must be a power of two.
+ * There is no reason for more than PAGE_SIZE / 8 entries, since that
+ * is the maximum number of pointer bits we can use to index this.
+ * And we cannot have more than PAGE_SIZE / 4, since this has to
+ * fit on a single page and each entry takes 4 bytes.
+ */
+#define ATOMIC_HASH_SHIFT (PAGE_SHIFT - 3)
+#define ATOMIC_HASH_SIZE (1 << ATOMIC_HASH_SHIFT)
+
+#ifndef __ASSEMBLY__
+extern int atomic_locks[];
+#endif
+
+#endif /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */
+
+/*
+ * All the code that may fault while holding an atomic lock must
+ * place the pointer to the lock in ATOMIC_LOCK_REG so the fault code
+ * can correctly release and reacquire the lock.  Note that we
+ * mention the register number in a comment in "lib/atomic_asm.S" to help
+ * assembly coders from using this register by mistake, so if it
+ * is changed here, change that comment as well.
+ */
+#define ATOMIC_LOCK_REG 20
+#define ATOMIC_LOCK_REG_NAME r20
+
+#ifndef __ASSEMBLY__
+/* Called from setup to initialize a hash table to point to per_cpu locks. */
+void __init_atomic_per_cpu(void);
+
+#ifdef CONFIG_SMP
+/* Support releasing the atomic lock in do_page_fault_ics(). */
+void __atomic_fault_unlock(int *lock_ptr);
+#endif
+
+/* Private helper routines in lib/atomic_asm_32.S */
+extern struct __get_user __atomic_cmpxchg(volatile int *p,
+					  int *lock, int o, int n);
+extern struct __get_user __atomic_xchg(volatile int *p, int *lock, int n);
+extern struct __get_user __atomic_xchg_add(volatile int *p, int *lock, int n);
+extern struct __get_user __atomic_xchg_add_unless(volatile int *p,
+						  int *lock, int o, int n);
+extern struct __get_user __atomic_or(volatile int *p, int *lock, int n);
+extern struct __get_user __atomic_andn(volatile int *p, int *lock, int n);
+extern struct __get_user __atomic_xor(volatile int *p, int *lock, int n);
+extern u64 __atomic64_cmpxchg(volatile u64 *p, int *lock, u64 o, u64 n);
+extern u64 __atomic64_xchg(volatile u64 *p, int *lock, u64 n);
+extern u64 __atomic64_xchg_add(volatile u64 *p, int *lock, u64 n);
+extern u64 __atomic64_xchg_add_unless(volatile u64 *p,
+				      int *lock, u64 o, u64 n);
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* _ASM_TILE_ATOMIC_32_H */
diff --git a/arch/tile/include/asm/atomic_64.h b/arch/tile/include/asm/atomic_64.h
new file mode 100644
index 00000000..f4500c68
--- /dev/null
+++ b/arch/tile/include/asm/atomic_64.h
@@ -0,0 +1,157 @@
+/*
+ * Copyright 2011 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ *
+ * Do not include directly; use <linux/atomic.h>.
+ */
+
+#ifndef _ASM_TILE_ATOMIC_64_H
+#define _ASM_TILE_ATOMIC_64_H
+
+#ifndef __ASSEMBLY__
+
+#include <asm/barrier.h>
+#include <arch/spr_def.h>
+
+/* First, the 32-bit atomic ops that are "real" on our 64-bit platform. */
+
+#define atomic_set(v, i) ((v)->counter = (i))
+
+/*
+ * The smp_mb() operations throughout are to support the fact that
+ * Linux requires memory barriers before and after the operation,
+ * on any routine which updates memory and returns a value.
+ */
+
+static inline int atomic_cmpxchg(atomic_t *v, int o, int n)
+{
+	int val;
+	__insn_mtspr(SPR_CMPEXCH_VALUE, o);
+	smp_mb();  /* barrier for proper semantics */
+	val = __insn_cmpexch4((void *)&v->counter, n);
+	smp_mb();  /* barrier for proper semantics */
+	return val;
+}
+
+static inline int atomic_xchg(atomic_t *v, int n)
+{
+	int val;
+	smp_mb();  /* barrier for proper semantics */
+	val = __insn_exch4((void *)&v->counter, n);
+	smp_mb();  /* barrier for proper semantics */
+	return val;
+}
+
+static inline void atomic_add(int i, atomic_t *v)
+{
+	__insn_fetchadd4((void *)&v->counter, i);
+}
+
+static inline int atomic_add_return(int i, atomic_t *v)
+{
+	int val;
+	smp_mb();  /* barrier for proper semantics */
+	val = __insn_fetchadd4((void *)&v->counter, i) + i;
+	barrier();  /* the "+ i" above will wait on memory */
+	return val;
+}
+
+static inline int __atomic_add_unless(atomic_t *v, int a, int u)
+{
+	int guess, oldval = v->counter;
+	do {
+		if (oldval == u)
+			break;
+		guess = oldval;
+		oldval = atomic_cmpxchg(v, guess, guess + a);
+	} while (guess != oldval);
+	return oldval;
+}
+
+/* Now the true 64-bit operations. */
+
+#define ATOMIC64_INIT(i)	{ (i) }
+
+#define atomic64_read(v)		((v)->counter)
+#define atomic64_set(v, i) ((v)->counter = (i))
+
+static inline long atomic64_cmpxchg(atomic64_t *v, long o, long n)
+{
+	long val;
+	smp_mb();  /* barrier for proper semantics */
+	__insn_mtspr(SPR_CMPEXCH_VALUE, o);
+	val = __insn_cmpexch((void *)&v->counter, n);
+	smp_mb();  /* barrier for proper semantics */
+	return val;
+}
+
+static inline long atomic64_xchg(atomic64_t *v, long n)
+{
+	long val;
+	smp_mb();  /* barrier for proper semantics */
+	val = __insn_exch((void *)&v->counter, n);
+	smp_mb();  /* barrier for proper semantics */
+	return val;
+}
+
+static inline void atomic64_add(long i, atomic64_t *v)
+{
+	__insn_fetchadd((void *)&v->counter, i);
+}
+
+static inline long atomic64_add_return(long i, atomic64_t *v)
+{
+	int val;
+	smp_mb();  /* barrier for proper semantics */
+	val = __insn_fetchadd((void *)&v->counter, i) + i;
+	barrier();  /* the "+ i" above will wait on memory */
+	return val;
+}
+
+static inline long atomic64_add_unless(atomic64_t *v, long a, long u)
+{
+	long guess, oldval = v->counter;
+	do {
+		if (oldval == u)
+			break;
+		guess = oldval;
+		oldval = atomic64_cmpxchg(v, guess, guess + a);
+	} while (guess != oldval);
+	return oldval != u;
+}
+
+#define atomic64_sub_return(i, v)	atomic64_add_return(-(i), (v))
+#define atomic64_sub(i, v)		atomic64_add(-(i), (v))
+#define atomic64_inc_return(v)		atomic64_add_return(1, (v))
+#define atomic64_dec_return(v)		atomic64_sub_return(1, (v))
+#define atomic64_inc(v)			atomic64_add(1, (v))
+#define atomic64_dec(v)			atomic64_sub(1, (v))
+
+#define atomic64_inc_and_test(v)	(atomic64_inc_return(v) == 0)
+#define atomic64_dec_and_test(v)	(atomic64_dec_return(v) == 0)
+#define atomic64_sub_and_test(i, v)	(atomic64_sub_return((i), (v)) == 0)
+#define atomic64_add_negative(i, v)	(atomic64_add_return((i), (v)) < 0)
+
+#define atomic64_inc_not_zero(v)	atomic64_add_unless((v), 1, 0)
+
+/* Atomic dec and inc don't implement barrier, so provide them if needed. */
+#define smp_mb__before_atomic_dec()	smp_mb()
+#define smp_mb__after_atomic_dec()	smp_mb()
+#define smp_mb__before_atomic_inc()	smp_mb()
+#define smp_mb__after_atomic_inc()	smp_mb()
+
+/* Define this to indicate that cmpxchg is an efficient operation. */
+#define __HAVE_ARCH_CMPXCHG
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* _ASM_TILE_ATOMIC_64_H */
diff --git a/arch/tile/include/asm/auxvec.h b/arch/tile/include/asm/auxvec.h
new file mode 100644
index 00000000..1d393edb
--- /dev/null
+++ b/arch/tile/include/asm/auxvec.h
@@ -0,0 +1,20 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_AUXVEC_H
+#define _ASM_TILE_AUXVEC_H
+
+/* No extensions to auxvec */
+
+#endif /* _ASM_TILE_AUXVEC_H */
diff --git a/arch/tile/include/asm/backtrace.h b/arch/tile/include/asm/backtrace.h
new file mode 100644
index 00000000..bd5399a6
--- /dev/null
+++ b/arch/tile/include/asm/backtrace.h
@@ -0,0 +1,162 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_BACKTRACE_H
+#define _ASM_TILE_BACKTRACE_H
+
+#include <linux/types.h>
+
+/* Reads 'size' bytes from 'address' and writes the data to 'result'.
+ * Returns true if successful, else false (e.g. memory not readable).
+ */
+typedef bool (*BacktraceMemoryReader)(void *result,
+				      unsigned long address,
+				      unsigned int size,
+				      void *extra);
+
+typedef struct {
+	/* Current PC. */
+	unsigned long pc;
+
+	/* Current stack pointer value. */
+	unsigned long sp;
+
+	/* Current frame pointer value (i.e. caller's stack pointer) */
+	unsigned long fp;
+
+	/* Internal use only: caller's PC for first frame. */
+	unsigned long initial_frame_caller_pc;
+
+	/* Internal use only: callback to read memory. */
+	BacktraceMemoryReader read_memory_func;
+
+	/* Internal use only: arbitrary argument to read_memory_func. */
+	void *read_memory_func_extra;
+
+} BacktraceIterator;
+
+
+typedef enum {
+
+	/* We have no idea what the caller's pc is. */
+	PC_LOC_UNKNOWN,
+
+	/* The caller's pc is currently in lr. */
+	PC_LOC_IN_LR,
+
+	/* The caller's pc can be found by dereferencing the caller's sp. */
+	PC_LOC_ON_STACK
+
+} CallerPCLocation;
+
+
+typedef enum {
+
+	/* We have no idea what the caller's sp is. */
+	SP_LOC_UNKNOWN,
+
+	/* The caller's sp is currently in r52. */
+	SP_LOC_IN_R52,
+
+	/* The caller's sp can be found by adding a certain constant
+	 * to the current value of sp.
+	 */
+	SP_LOC_OFFSET
+
+} CallerSPLocation;
+
+
+/* Bit values ORed into CALLER_* values for info ops. */
+enum {
+	/* Setting the low bit on any of these values means the info op
+	 * applies only to one bundle ago.
+	 */
+	ONE_BUNDLE_AGO_FLAG = 1,
+
+	/* Setting this bit on a CALLER_SP_* value means the PC is in LR.
+	 * If not set, PC is on the stack.
+	 */
+	PC_IN_LR_FLAG = 2,
+
+	/* This many of the low bits of a CALLER_SP_* value are for the
+	 * flag bits above.
+	 */
+	NUM_INFO_OP_FLAGS = 2,
+
+	/* We cannot have one in the memory pipe so this is the maximum. */
+	MAX_INFO_OPS_PER_BUNDLE = 2
+};
+
+
+/* Internal constants used to define 'info' operands. */
+enum {
+	/* 0 and 1 are reserved, as are all negative numbers. */
+
+	CALLER_UNKNOWN_BASE = 2,
+
+	CALLER_SP_IN_R52_BASE = 4,
+
+	CALLER_SP_OFFSET_BASE = 8,
+};
+
+
+/* Current backtracer state describing where it thinks the caller is. */
+typedef struct {
+	/*
+	 * Public fields
+	 */
+
+	/* How do we find the caller's PC? */
+	CallerPCLocation pc_location : 8;
+
+	/* How do we find the caller's SP? */
+	CallerSPLocation sp_location : 8;
+
+	/* If sp_location == SP_LOC_OFFSET, then caller_sp == sp +
+	 * loc->sp_offset. Else this field is undefined.
+	 */
+	uint16_t sp_offset;
+
+	/* In the most recently visited bundle a terminating bundle? */
+	bool at_terminating_bundle;
+
+	/*
+	 * Private fields
+	 */
+
+	/* Will the forward scanner see someone clobbering sp
+	 * (i.e. changing it with something other than addi sp, sp, N?)
+	 */
+	bool sp_clobber_follows;
+
+	/* Operand to next "visible" info op (no more than one bundle past
+	 * the next terminating bundle), or -32768 if none.
+	 */
+	int16_t next_info_operand;
+
+	/* Is the info of in next_info_op in the very next bundle? */
+	bool is_next_info_operand_adjacent;
+
+} CallerLocation;
+
+extern void backtrace_init(BacktraceIterator *state,
+                          BacktraceMemoryReader read_memory_func,
+                          void *read_memory_func_extra,
+                          unsigned long pc, unsigned long lr,
+                          unsigned long sp, unsigned long r52);
+
+
+extern bool backtrace_next(BacktraceIterator *state);
+
+#endif /* _ASM_TILE_BACKTRACE_H */
diff --git a/arch/tile/include/asm/barrier.h b/arch/tile/include/asm/barrier.h
new file mode 100644
index 00000000..990a217a
--- /dev/null
+++ b/arch/tile/include/asm/barrier.h
@@ -0,0 +1,148 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_BARRIER_H
+#define _ASM_TILE_BARRIER_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/types.h>
+#include <arch/chip.h>
+#include <arch/spr_def.h>
+#include <asm/timex.h>
+
+/*
+ * read_barrier_depends - Flush all pending reads that subsequents reads
+ * depend on.
+ *
+ * No data-dependent reads from memory-like regions are ever reordered
+ * over this barrier.  All reads preceding this primitive are guaranteed
+ * to access memory (but not necessarily other CPUs' caches) before any
+ * reads following this primitive that depend on the data return by
+ * any of the preceding reads.  This primitive is much lighter weight than
+ * rmb() on most CPUs, and is never heavier weight than is
+ * rmb().
+ *
+ * These ordering constraints are respected by both the local CPU
+ * and the compiler.
+ *
+ * Ordering is not guaranteed by anything other than these primitives,
+ * not even by data dependencies.  See the documentation for
+ * memory_barrier() for examples and URLs to more information.
+ *
+ * For example, the following code would force ordering (the initial
+ * value of "a" is zero, "b" is one, and "p" is "&a"):
+ *
+ * <programlisting>
+ *	CPU 0				CPU 1
+ *
+ *	b = 2;
+ *	memory_barrier();
+ *	p = &b;				q = p;
+ *					read_barrier_depends();
+ *					d = *q;
+ * </programlisting>
+ *
+ * because the read of "*q" depends on the read of "p" and these
+ * two reads are separated by a read_barrier_depends().  However,
+ * the following code, with the same initial values for "a" and "b":
+ *
+ * <programlisting>
+ *	CPU 0				CPU 1
+ *
+ *	a = 2;
+ *	memory_barrier();
+ *	b = 3;				y = b;
+ *					read_barrier_depends();
+ *					x = a;
+ * </programlisting>
+ *
+ * does not enforce ordering, since there is no data dependency between
+ * the read of "a" and the read of "b".  Therefore, on some CPUs, such
+ * as Alpha, "y" could be set to 3 and "x" to 0.  Use rmb()
+ * in cases like this where there are no data dependencies.
+ */
+#define read_barrier_depends()	do { } while (0)
+
+#define __sync()	__insn_mf()
+
+#if !CHIP_HAS_MF_WAITS_FOR_VICTIMS()
+#include <hv/syscall_public.h>
+/*
+ * Issue an uncacheable load to each memory controller, then
+ * wait until those loads have completed.
+ */
+static inline void __mb_incoherent(void)
+{
+	long clobber_r10;
+	asm volatile("swint2"
+		     : "=R10" (clobber_r10)
+		     : "R10" (HV_SYS_fence_incoherent)
+		     : "r0", "r1", "r2", "r3", "r4",
+		       "r5", "r6", "r7", "r8", "r9",
+		       "r11", "r12", "r13", "r14",
+		       "r15", "r16", "r17", "r18", "r19",
+		       "r20", "r21", "r22", "r23", "r24",
+		       "r25", "r26", "r27", "r28", "r29");
+}
+#endif
+
+/* Fence to guarantee visibility of stores to incoherent memory. */
+static inline void
+mb_incoherent(void)
+{
+	__insn_mf();
+
+#if !CHIP_HAS_MF_WAITS_FOR_VICTIMS()
+	{
+#if CHIP_HAS_TILE_WRITE_PENDING()
+		const unsigned long WRITE_TIMEOUT_CYCLES = 400;
+		unsigned long start = get_cycles_low();
+		do {
+			if (__insn_mfspr(SPR_TILE_WRITE_PENDING) == 0)
+				return;
+		} while ((get_cycles_low() - start) < WRITE_TIMEOUT_CYCLES);
+#endif /* CHIP_HAS_TILE_WRITE_PENDING() */
+		(void) __mb_incoherent();
+	}
+#endif /* CHIP_HAS_MF_WAITS_FOR_VICTIMS() */
+}
+
+#define fast_wmb()	__sync()
+#define fast_rmb()	__sync()
+#define fast_mb()	__sync()
+#define fast_iob()	mb_incoherent()
+
+#define wmb()		fast_wmb()
+#define rmb()		fast_rmb()
+#define mb()		fast_mb()
+#define iob()		fast_iob()
+
+#ifdef CONFIG_SMP
+#define smp_mb()	mb()
+#define smp_rmb()	rmb()
+#define smp_wmb()	wmb()
+#define smp_read_barrier_depends()	read_barrier_depends()
+#else
+#define smp_mb()	barrier()
+#define smp_rmb()	barrier()
+#define smp_wmb()	barrier()
+#define smp_read_barrier_depends()	do { } while (0)
+#endif
+
+#define set_mb(var, value) \
+	do { var = value; mb(); } while (0)
+
+#endif /* !__ASSEMBLY__ */
+#endif /* _ASM_TILE_BARRIER_H */
diff --git a/arch/tile/include/asm/bitops.h b/arch/tile/include/asm/bitops.h
new file mode 100644
index 00000000..bd186c4e
--- /dev/null
+++ b/arch/tile/include/asm/bitops.h
@@ -0,0 +1,128 @@
+/*
+ * Copyright 1992, Linus Torvalds.
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_BITOPS_H
+#define _ASM_TILE_BITOPS_H
+
+#include <linux/types.h>
+
+#ifndef _LINUX_BITOPS_H
+#error only <linux/bitops.h> can be included directly
+#endif
+
+#ifdef __tilegx__
+#include <asm/bitops_64.h>
+#else
+#include <asm/bitops_32.h>
+#endif
+
+/**
+ * __ffs - find first set bit in word
+ * @word: The word to search
+ *
+ * Undefined if no set bit exists, so code should check against 0 first.
+ */
+static inline unsigned long __ffs(unsigned long word)
+{
+	return __builtin_ctzl(word);
+}
+
+/**
+ * ffz - find first zero bit in word
+ * @word: The word to search
+ *
+ * Undefined if no zero exists, so code should check against ~0UL first.
+ */
+static inline unsigned long ffz(unsigned long word)
+{
+	return __builtin_ctzl(~word);
+}
+
+/**
+ * __fls - find last set bit in word
+ * @word: The word to search
+ *
+ * Undefined if no set bit exists, so code should check against 0 first.
+ */
+static inline unsigned long __fls(unsigned long word)
+{
+	return (sizeof(word) * 8) - 1 - __builtin_clzl(word);
+}
+
+/**
+ * ffs - find first set bit in word
+ * @x: the word to search
+ *
+ * This is defined the same way as the libc and compiler builtin ffs
+ * routines, therefore differs in spirit from the other bitops.
+ *
+ * ffs(value) returns 0 if value is 0 or the position of the first
+ * set bit if value is nonzero. The first (least significant) bit
+ * is at position 1.
+ */
+static inline int ffs(int x)
+{
+	return __builtin_ffs(x);
+}
+
+static inline int fls64(__u64 w)
+{
+	return (sizeof(__u64) * 8) - __builtin_clzll(w);
+}
+
+/**
+ * fls - find last set bit in word
+ * @x: the word to search
+ *
+ * This is defined in a similar way as the libc and compiler builtin
+ * ffs, but returns the position of the most significant set bit.
+ *
+ * fls(value) returns 0 if value is 0 or the position of the last
+ * set bit if value is nonzero. The last (most significant) bit is
+ * at position 32.
+ */
+static inline int fls(int x)
+{
+	return fls64((unsigned int) x);
+}
+
+static inline unsigned int __arch_hweight32(unsigned int w)
+{
+	return __builtin_popcount(w);
+}
+
+static inline unsigned int __arch_hweight16(unsigned int w)
+{
+	return __builtin_popcount(w & 0xffff);
+}
+
+static inline unsigned int __arch_hweight8(unsigned int w)
+{
+	return __builtin_popcount(w & 0xff);
+}
+
+static inline unsigned long __arch_hweight64(__u64 w)
+{
+	return __builtin_popcountll(w);
+}
+
+#include <asm-generic/bitops/const_hweight.h>
+#include <asm-generic/bitops/lock.h>
+#include <asm-generic/bitops/find.h>
+#include <asm-generic/bitops/sched.h>
+#include <asm-generic/bitops/non-atomic.h>
+#include <asm-generic/bitops/le.h>
+
+#endif /* _ASM_TILE_BITOPS_H */
diff --git a/arch/tile/include/asm/bitops_32.h b/arch/tile/include/asm/bitops_32.h
new file mode 100644
index 00000000..ddc4c1ef
--- /dev/null
+++ b/arch/tile/include/asm/bitops_32.h
@@ -0,0 +1,130 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_BITOPS_32_H
+#define _ASM_TILE_BITOPS_32_H
+
+#include <linux/compiler.h>
+#include <linux/atomic.h>
+
+/* Tile-specific routines to support <asm/bitops.h>. */
+unsigned long _atomic_or(volatile unsigned long *p, unsigned long mask);
+unsigned long _atomic_andn(volatile unsigned long *p, unsigned long mask);
+unsigned long _atomic_xor(volatile unsigned long *p, unsigned long mask);
+
+/**
+ * set_bit - Atomically set a bit in memory
+ * @nr: the bit to set
+ * @addr: the address to start counting from
+ *
+ * This function is atomic and may not be reordered.
+ * See __set_bit() if you do not require the atomic guarantees.
+ * Note that @nr may be almost arbitrarily large; this function is not
+ * restricted to acting on a single-word quantity.
+ */
+static inline void set_bit(unsigned nr, volatile unsigned long *addr)
+{
+	_atomic_or(addr + BIT_WORD(nr), BIT_MASK(nr));
+}
+
+/**
+ * clear_bit - Clears a bit in memory
+ * @nr: Bit to clear
+ * @addr: Address to start counting from
+ *
+ * clear_bit() is atomic and may not be reordered.
+ * See __clear_bit() if you do not require the atomic guarantees.
+ * Note that @nr may be almost arbitrarily large; this function is not
+ * restricted to acting on a single-word quantity.
+ *
+ * clear_bit() may not contain a memory barrier, so if it is used for
+ * locking purposes, you should call smp_mb__before_clear_bit() and/or
+ * smp_mb__after_clear_bit() to ensure changes are visible on other cpus.
+ */
+static inline void clear_bit(unsigned nr, volatile unsigned long *addr)
+{
+	_atomic_andn(addr + BIT_WORD(nr), BIT_MASK(nr));
+}
+
+/**
+ * change_bit - Toggle a bit in memory
+ * @nr: Bit to change
+ * @addr: Address to start counting from
+ *
+ * change_bit() is atomic and may not be reordered.
+ * See __change_bit() if you do not require the atomic guarantees.
+ * Note that @nr may be almost arbitrarily large; this function is not
+ * restricted to acting on a single-word quantity.
+ */
+static inline void change_bit(unsigned nr, volatile unsigned long *addr)
+{
+	_atomic_xor(addr + BIT_WORD(nr), BIT_MASK(nr));
+}
+
+/**
+ * test_and_set_bit - Set a bit and return its old value
+ * @nr: Bit to set
+ * @addr: Address to count from
+ *
+ * This operation is atomic and cannot be reordered.
+ * It also implies a memory barrier.
+ */
+static inline int test_and_set_bit(unsigned nr, volatile unsigned long *addr)
+{
+	unsigned long mask = BIT_MASK(nr);
+	addr += BIT_WORD(nr);
+	smp_mb();  /* barrier for proper semantics */
+	return (_atomic_or(addr, mask) & mask) != 0;
+}
+
+/**
+ * test_and_clear_bit - Clear a bit and return its old value
+ * @nr: Bit to clear
+ * @addr: Address to count from
+ *
+ * This operation is atomic and cannot be reordered.
+ * It also implies a memory barrier.
+ */
+static inline int test_and_clear_bit(unsigned nr, volatile unsigned long *addr)
+{
+	unsigned long mask = BIT_MASK(nr);
+	addr += BIT_WORD(nr);
+	smp_mb();  /* barrier for proper semantics */
+	return (_atomic_andn(addr, mask) & mask) != 0;
+}
+
+/**
+ * test_and_change_bit - Change a bit and return its old value
+ * @nr: Bit to change
+ * @addr: Address to count from
+ *
+ * This operation is atomic and cannot be reordered.
+ * It also implies a memory barrier.
+ */
+static inline int test_and_change_bit(unsigned nr,
+				      volatile unsigned long *addr)
+{
+	unsigned long mask = BIT_MASK(nr);
+	addr += BIT_WORD(nr);
+	smp_mb();  /* barrier for proper semantics */
+	return (_atomic_xor(addr, mask) & mask) != 0;
+}
+
+/* See discussion at smp_mb__before_atomic_dec() in <asm/atomic_32.h>. */
+#define smp_mb__before_clear_bit()	smp_mb()
+#define smp_mb__after_clear_bit()	do {} while (0)
+
+#include <asm-generic/bitops/ext2-atomic.h>
+
+#endif /* _ASM_TILE_BITOPS_32_H */
diff --git a/arch/tile/include/asm/bitops_64.h b/arch/tile/include/asm/bitops_64.h
new file mode 100644
index 00000000..60b87ee5
--- /dev/null
+++ b/arch/tile/include/asm/bitops_64.h
@@ -0,0 +1,101 @@
+/*
+ * Copyright 2011 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_BITOPS_64_H
+#define _ASM_TILE_BITOPS_64_H
+
+#include <linux/compiler.h>
+#include <linux/atomic.h>
+
+/* See <asm/bitops.h> for API comments. */
+
+static inline void set_bit(unsigned nr, volatile unsigned long *addr)
+{
+	unsigned long mask = (1UL << (nr % BITS_PER_LONG));
+	__insn_fetchor((void *)(addr + nr / BITS_PER_LONG), mask);
+}
+
+static inline void clear_bit(unsigned nr, volatile unsigned long *addr)
+{
+	unsigned long mask = (1UL << (nr % BITS_PER_LONG));
+	__insn_fetchand((void *)(addr + nr / BITS_PER_LONG), ~mask);
+}
+
+#define smp_mb__before_clear_bit()	smp_mb()
+#define smp_mb__after_clear_bit()	smp_mb()
+
+
+static inline void change_bit(unsigned nr, volatile unsigned long *addr)
+{
+	unsigned long mask = (1UL << (nr % BITS_PER_LONG));
+	unsigned long guess, oldval;
+	addr += nr / BITS_PER_LONG;
+	oldval = *addr;
+	do {
+		guess = oldval;
+		oldval = atomic64_cmpxchg((atomic64_t *)addr,
+					  guess, guess ^ mask);
+	} while (guess != oldval);
+}
+
+
+/*
+ * The test_and_xxx_bit() routines require a memory fence before we
+ * start the operation, and after the operation completes.  We use
+ * smp_mb() before, and rely on the "!= 0" comparison, plus a compiler
+ * barrier(), to block until the atomic op is complete.
+ */
+
+static inline int test_and_set_bit(unsigned nr, volatile unsigned long *addr)
+{
+	int val;
+	unsigned long mask = (1UL << (nr % BITS_PER_LONG));
+	smp_mb();  /* barrier for proper semantics */
+	val = (__insn_fetchor((void *)(addr + nr / BITS_PER_LONG), mask)
+	       & mask) != 0;
+	barrier();
+	return val;
+}
+
+
+static inline int test_and_clear_bit(unsigned nr, volatile unsigned long *addr)
+{
+	int val;
+	unsigned long mask = (1UL << (nr % BITS_PER_LONG));
+	smp_mb();  /* barrier for proper semantics */
+	val = (__insn_fetchand((void *)(addr + nr / BITS_PER_LONG), ~mask)
+	       & mask) != 0;
+	barrier();
+	return val;
+}
+
+
+static inline int test_and_change_bit(unsigned nr,
+				      volatile unsigned long *addr)
+{
+	unsigned long mask = (1UL << (nr % BITS_PER_LONG));
+	unsigned long guess, oldval;
+	addr += nr / BITS_PER_LONG;
+	oldval = *addr;
+	do {
+		guess = oldval;
+		oldval = atomic64_cmpxchg((atomic64_t *)addr,
+					  guess, guess ^ mask);
+	} while (guess != oldval);
+	return (oldval & mask) != 0;
+}
+
+#include <asm-generic/bitops/ext2-atomic-setbit.h>
+
+#endif /* _ASM_TILE_BITOPS_64_H */
diff --git a/arch/tile/include/asm/bitsperlong.h b/arch/tile/include/asm/bitsperlong.h
new file mode 100644
index 00000000..58c771f2
--- /dev/null
+++ b/arch/tile/include/asm/bitsperlong.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_BITSPERLONG_H
+#define _ASM_TILE_BITSPERLONG_H
+
+#ifdef __LP64__
+# define __BITS_PER_LONG 64
+#else
+# define __BITS_PER_LONG 32
+#endif
+
+#include <asm-generic/bitsperlong.h>
+
+#endif /* _ASM_TILE_BITSPERLONG_H */
diff --git a/arch/tile/include/asm/byteorder.h b/arch/tile/include/asm/byteorder.h
new file mode 100644
index 00000000..9558416d
--- /dev/null
+++ b/arch/tile/include/asm/byteorder.h
@@ -0,0 +1 @@
+#include <linux/byteorder/little_endian.h>
diff --git a/arch/tile/include/asm/cache.h b/arch/tile/include/asm/cache.h
new file mode 100644
index 00000000..392e5333
--- /dev/null
+++ b/arch/tile/include/asm/cache.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_CACHE_H
+#define _ASM_TILE_CACHE_H
+
+#include <arch/chip.h>
+
+/* bytes per L1 data cache line */
+#define L1_CACHE_SHIFT		CHIP_L1D_LOG_LINE_SIZE()
+#define L1_CACHE_BYTES		(1 << L1_CACHE_SHIFT)
+
+/* bytes per L2 cache line */
+#define L2_CACHE_SHIFT		CHIP_L2_LOG_LINE_SIZE()
+#define L2_CACHE_BYTES		(1 << L2_CACHE_SHIFT)
+#define L2_CACHE_ALIGN(x)	(((x)+(L2_CACHE_BYTES-1)) & -L2_CACHE_BYTES)
+
+/*
+ * TILE-Gx is fully coherent so we don't need to define ARCH_DMA_MINALIGN.
+ */
+#ifndef __tilegx__
+#define ARCH_DMA_MINALIGN	L2_CACHE_BYTES
+#endif
+
+/* use the cache line size for the L2, which is where it counts */
+#define SMP_CACHE_BYTES_SHIFT	L2_CACHE_SHIFT
+#define SMP_CACHE_BYTES		L2_CACHE_BYTES
+#define INTERNODE_CACHE_SHIFT   L2_CACHE_SHIFT
+#define INTERNODE_CACHE_BYTES   L2_CACHE_BYTES
+
+/* Group together read-mostly things to avoid cache false sharing */
+#define __read_mostly __attribute__((__section__(".data..read_mostly")))
+
+/*
+ * Attribute for data that is kept read/write coherent until the end of
+ * initialization, then bumped to read/only incoherent for performance.
+ */
+#define __write_once __attribute__((__section__(".w1data")))
+
+#endif /* _ASM_TILE_CACHE_H */
diff --git a/arch/tile/include/asm/cacheflush.h b/arch/tile/include/asm/cacheflush.h
new file mode 100644
index 00000000..0fc63c48
--- /dev/null
+++ b/arch/tile/include/asm/cacheflush.h
@@ -0,0 +1,164 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_CACHEFLUSH_H
+#define _ASM_TILE_CACHEFLUSH_H
+
+#include <arch/chip.h>
+
+/* Keep includes the same across arches.  */
+#include <linux/mm.h>
+#include <linux/cache.h>
+#include <arch/icache.h>
+
+/* Caches are physically-indexed and so don't need special treatment */
+#define flush_cache_all()			do { } while (0)
+#define flush_cache_mm(mm)			do { } while (0)
+#define flush_cache_dup_mm(mm)			do { } while (0)
+#define flush_cache_range(vma, start, end)	do { } while (0)
+#define flush_cache_page(vma, vmaddr, pfn)	do { } while (0)
+#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0
+#define flush_dcache_page(page)			do { } while (0)
+#define flush_dcache_mmap_lock(mapping)		do { } while (0)
+#define flush_dcache_mmap_unlock(mapping)	do { } while (0)
+#define flush_cache_vmap(start, end)		do { } while (0)
+#define flush_cache_vunmap(start, end)		do { } while (0)
+#define flush_icache_page(vma, pg)		do { } while (0)
+#define flush_icache_user_range(vma, pg, adr, len)	do { } while (0)
+
+/* Flush the icache just on this cpu */
+extern void __flush_icache_range(unsigned long start, unsigned long end);
+
+/* Flush the entire icache on this cpu. */
+#define __flush_icache() __flush_icache_range(0, CHIP_L1I_CACHE_SIZE())
+
+#ifdef CONFIG_SMP
+/*
+ * When the kernel writes to its own text we need to do an SMP
+ * broadcast to make the L1I coherent everywhere.  This includes
+ * module load and single step.
+ */
+extern void flush_icache_range(unsigned long start, unsigned long end);
+#else
+#define flush_icache_range __flush_icache_range
+#endif
+
+/*
+ * An update to an executable user page requires icache flushing.
+ * We could carefully update only tiles that are running this process,
+ * and rely on the fact that we flush the icache on every context
+ * switch to avoid doing extra work here.  But for now, I'll be
+ * conservative and just do a global icache flush.
+ */
+static inline void copy_to_user_page(struct vm_area_struct *vma,
+				     struct page *page, unsigned long vaddr,
+				     void *dst, void *src, int len)
+{
+	memcpy(dst, src, len);
+	if (vma->vm_flags & VM_EXEC) {
+		flush_icache_range((unsigned long) dst,
+				   (unsigned long) dst + len);
+	}
+}
+
+#define copy_from_user_page(vma, page, vaddr, dst, src, len) \
+	memcpy((dst), (src), (len))
+
+/*
+ * Invalidate a VA range; pads to L2 cacheline boundaries.
+ *
+ * Note that on TILE64, __inv_buffer() actually flushes modified
+ * cache lines in addition to invalidating them, i.e., it's the
+ * same as __finv_buffer().
+ */
+static inline void __inv_buffer(void *buffer, size_t size)
+{
+	char *next = (char *)((long)buffer & -L2_CACHE_BYTES);
+	char *finish = (char *)L2_CACHE_ALIGN((long)buffer + size);
+	while (next < finish) {
+		__insn_inv(next);
+		next += CHIP_INV_STRIDE();
+	}
+}
+
+/* Flush a VA range; pads to L2 cacheline boundaries. */
+static inline void __flush_buffer(void *buffer, size_t size)
+{
+	char *next = (char *)((long)buffer & -L2_CACHE_BYTES);
+	char *finish = (char *)L2_CACHE_ALIGN((long)buffer + size);
+	while (next < finish) {
+		__insn_flush(next);
+		next += CHIP_FLUSH_STRIDE();
+	}
+}
+
+/* Flush & invalidate a VA range; pads to L2 cacheline boundaries. */
+static inline void __finv_buffer(void *buffer, size_t size)
+{
+	char *next = (char *)((long)buffer & -L2_CACHE_BYTES);
+	char *finish = (char *)L2_CACHE_ALIGN((long)buffer + size);
+	while (next < finish) {
+		__insn_finv(next);
+		next += CHIP_FINV_STRIDE();
+	}
+}
+
+
+/* Invalidate a VA range and wait for it to be complete. */
+static inline void inv_buffer(void *buffer, size_t size)
+{
+	__inv_buffer(buffer, size);
+	mb();
+}
+
+/*
+ * Flush a locally-homecached VA range and wait for the evicted
+ * cachelines to hit memory.
+ */
+static inline void flush_buffer_local(void *buffer, size_t size)
+{
+	__flush_buffer(buffer, size);
+	mb_incoherent();
+}
+
+/*
+ * Flush and invalidate a locally-homecached VA range and wait for the
+ * evicted cachelines to hit memory.
+ */
+static inline void finv_buffer_local(void *buffer, size_t size)
+{
+	__finv_buffer(buffer, size);
+	mb_incoherent();
+}
+
+/*
+ * Flush and invalidate a VA range that is homed remotely, waiting
+ * until the memory controller holds the flushed values.  If "hfh" is
+ * true, we will do a more expensive flush involving additional loads
+ * to make sure we have touched all the possible home cpus of a buffer
+ * that is homed with "hash for home".
+ */
+void finv_buffer_remote(void *buffer, size_t size, int hfh);
+
+/*
+ * On SMP systems, when the scheduler does migration-cost autodetection,
+ * it needs a way to flush as much of the CPU's caches as possible:
+ *
+ * TODO: fill this in!
+ */
+static inline void sched_cacheflush(void)
+{
+}
+
+#endif /* _ASM_TILE_CACHEFLUSH_H */
diff --git a/arch/tile/include/asm/checksum.h b/arch/tile/include/asm/checksum.h
new file mode 100644
index 00000000..a120766c
--- /dev/null
+++ b/arch/tile/include/asm/checksum.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_CHECKSUM_H
+#define _ASM_TILE_CHECKSUM_H
+
+#include <asm-generic/checksum.h>
+
+/* Allow us to provide a more optimized do_csum(). */
+__wsum do_csum(const unsigned char *buff, int len);
+#define do_csum do_csum
+
+#endif /* _ASM_TILE_CHECKSUM_H */
diff --git a/arch/tile/include/asm/cmpxchg.h b/arch/tile/include/asm/cmpxchg.h
new file mode 100644
index 00000000..276f067e
--- /dev/null
+++ b/arch/tile/include/asm/cmpxchg.h
@@ -0,0 +1,73 @@
+/*
+ * cmpxchg.h -- forked from asm/atomic.h with this copyright:
+ *
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ *
+ */
+
+#ifndef _ASM_TILE_CMPXCHG_H
+#define _ASM_TILE_CMPXCHG_H
+
+#ifndef __ASSEMBLY__
+
+/* Nonexistent functions intended to cause link errors. */
+extern unsigned long __xchg_called_with_bad_pointer(void);
+extern unsigned long __cmpxchg_called_with_bad_pointer(void);
+
+#define xchg(ptr, x)							\
+	({								\
+		typeof(*(ptr)) __x;					\
+		switch (sizeof(*(ptr))) {				\
+		case 4:							\
+			__x = (typeof(__x))(typeof(__x-__x))atomic_xchg( \
+				(atomic_t *)(ptr),			\
+				(u32)(typeof((x)-(x)))(x));		\
+			break;						\
+		case 8:							\
+			__x = (typeof(__x))(typeof(__x-__x))atomic64_xchg( \
+				(atomic64_t *)(ptr),			\
+				(u64)(typeof((x)-(x)))(x));		\
+			break;						\
+		default:						\
+			__xchg_called_with_bad_pointer();		\
+		}							\
+		__x;							\
+	})
+
+#define cmpxchg(ptr, o, n)						\
+	({								\
+		typeof(*(ptr)) __x;					\
+		switch (sizeof(*(ptr))) {				\
+		case 4:							\
+			__x = (typeof(__x))(typeof(__x-__x))atomic_cmpxchg( \
+				(atomic_t *)(ptr),			\
+				(u32)(typeof((o)-(o)))(o),		\
+				(u32)(typeof((n)-(n)))(n));		\
+			break;						\
+		case 8:							\
+			__x = (typeof(__x))(typeof(__x-__x))atomic64_cmpxchg( \
+				(atomic64_t *)(ptr),			\
+				(u64)(typeof((o)-(o)))(o),		\
+				(u64)(typeof((n)-(n)))(n));		\
+			break;						\
+		default:						\
+			__cmpxchg_called_with_bad_pointer();		\
+		}							\
+		__x;							\
+	})
+
+#define tas(ptr) (xchg((ptr), 1))
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_TILE_CMPXCHG_H */
diff --git a/arch/tile/include/asm/compat.h b/arch/tile/include/asm/compat.h
new file mode 100644
index 00000000..4b4b2896
--- /dev/null
+++ b/arch/tile/include/asm/compat.h
@@ -0,0 +1,256 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_COMPAT_H
+#define _ASM_TILE_COMPAT_H
+
+/*
+ * Architecture specific compatibility types
+ */
+#include <linux/types.h>
+#include <linux/sched.h>
+
+#define COMPAT_USER_HZ	100
+
+/* "long" and pointer-based types are different. */
+typedef s32		compat_long_t;
+typedef u32		compat_ulong_t;
+typedef u32		compat_size_t;
+typedef s32		compat_ssize_t;
+typedef s32		compat_off_t;
+typedef s32		compat_time_t;
+typedef s32		compat_clock_t;
+typedef u32		compat_ino_t;
+typedef u32		compat_caddr_t;
+typedef	u32		compat_uptr_t;
+
+/* Many types are "int" or otherwise the same. */
+typedef __kernel_pid_t compat_pid_t;
+typedef __kernel_uid_t __compat_uid_t;
+typedef __kernel_gid_t __compat_gid_t;
+typedef __kernel_uid32_t __compat_uid32_t;
+typedef __kernel_uid32_t __compat_gid32_t;
+typedef __kernel_mode_t compat_mode_t;
+typedef __kernel_dev_t compat_dev_t;
+typedef __kernel_loff_t compat_loff_t;
+typedef __kernel_nlink_t compat_nlink_t;
+typedef __kernel_ipc_pid_t compat_ipc_pid_t;
+typedef __kernel_daddr_t compat_daddr_t;
+typedef __kernel_fsid_t	compat_fsid_t;
+typedef __kernel_timer_t compat_timer_t;
+typedef __kernel_key_t compat_key_t;
+typedef int compat_int_t;
+typedef s64 compat_s64;
+typedef uint compat_uint_t;
+typedef u64 compat_u64;
+
+/* We use the same register dump format in 32-bit images. */
+typedef unsigned long compat_elf_greg_t;
+#define COMPAT_ELF_NGREG (sizeof(struct pt_regs) / sizeof(compat_elf_greg_t))
+typedef compat_elf_greg_t compat_elf_gregset_t[COMPAT_ELF_NGREG];
+
+struct compat_timespec {
+	compat_time_t	tv_sec;
+	s32		tv_nsec;
+};
+
+struct compat_timeval {
+	compat_time_t	tv_sec;
+	s32		tv_usec;
+};
+
+#define compat_stat stat
+#define compat_statfs statfs
+
+struct compat_sysctl {
+	unsigned int	name;
+	int		nlen;
+	unsigned int	oldval;
+	unsigned int	oldlenp;
+	unsigned int	newval;
+	unsigned int	newlen;
+	unsigned int	__unused[4];
+};
+
+
+struct compat_flock {
+	short		l_type;
+	short		l_whence;
+	compat_off_t	l_start;
+	compat_off_t	l_len;
+	compat_pid_t	l_pid;
+};
+
+#define F_GETLK64	12	/*  using 'struct flock64' */
+#define F_SETLK64	13
+#define F_SETLKW64	14
+
+struct compat_flock64 {
+	short		l_type;
+	short		l_whence;
+	compat_loff_t	l_start;
+	compat_loff_t	l_len;
+	compat_pid_t	l_pid;
+};
+
+#define COMPAT_RLIM_INFINITY		0xffffffff
+
+#define _COMPAT_NSIG		64
+#define _COMPAT_NSIG_BPW	32
+
+typedef u32               compat_sigset_word;
+
+#define COMPAT_OFF_T_MAX	0x7fffffff
+#define COMPAT_LOFF_T_MAX	0x7fffffffffffffffL
+
+struct compat_ipc64_perm {
+	compat_key_t key;
+	__compat_uid32_t uid;
+	__compat_gid32_t gid;
+	__compat_uid32_t cuid;
+	__compat_gid32_t cgid;
+	unsigned short mode;
+	unsigned short __pad1;
+	unsigned short seq;
+	unsigned short __pad2;
+	compat_ulong_t unused1;
+	compat_ulong_t unused2;
+};
+
+struct compat_semid64_ds {
+	struct compat_ipc64_perm sem_perm;
+	compat_time_t  sem_otime;
+	compat_ulong_t __unused1;
+	compat_time_t  sem_ctime;
+	compat_ulong_t __unused2;
+	compat_ulong_t sem_nsems;
+	compat_ulong_t __unused3;
+	compat_ulong_t __unused4;
+};
+
+struct compat_msqid64_ds {
+	struct compat_ipc64_perm msg_perm;
+	compat_time_t  msg_stime;
+	compat_ulong_t __unused1;
+	compat_time_t  msg_rtime;
+	compat_ulong_t __unused2;
+	compat_time_t  msg_ctime;
+	compat_ulong_t __unused3;
+	compat_ulong_t msg_cbytes;
+	compat_ulong_t msg_qnum;
+	compat_ulong_t msg_qbytes;
+	compat_pid_t   msg_lspid;
+	compat_pid_t   msg_lrpid;
+	compat_ulong_t __unused4;
+	compat_ulong_t __unused5;
+};
+
+struct compat_shmid64_ds {
+	struct compat_ipc64_perm shm_perm;
+	compat_size_t  shm_segsz;
+	compat_time_t  shm_atime;
+	compat_ulong_t __unused1;
+	compat_time_t  shm_dtime;
+	compat_ulong_t __unused2;
+	compat_time_t  shm_ctime;
+	compat_ulong_t __unused3;
+	compat_pid_t   shm_cpid;
+	compat_pid_t   shm_lpid;
+	compat_ulong_t shm_nattch;
+	compat_ulong_t __unused4;
+	compat_ulong_t __unused5;
+};
+
+/*
+ * A pointer passed in from user mode. This should not
+ * be used for syscall parameters, just declare them
+ * as pointers because the syscall entry code will have
+ * appropriately converted them already.
+ */
+
+static inline void __user *compat_ptr(compat_uptr_t uptr)
+{
+	return (void __user *)(long)(s32)uptr;
+}
+
+static inline compat_uptr_t ptr_to_compat(void __user *uptr)
+{
+	return (u32)(unsigned long)uptr;
+}
+
+/* Sign-extend when storing a kernel pointer to a user's ptregs. */
+static inline unsigned long ptr_to_compat_reg(void __user *uptr)
+{
+	return (long)(int)(long __force)uptr;
+}
+
+static inline void __user *arch_compat_alloc_user_space(long len)
+{
+	struct pt_regs *regs = task_pt_regs(current);
+	return (void __user *)regs->sp - len;
+}
+
+static inline int is_compat_task(void)
+{
+	return current_thread_info()->status & TS_COMPAT;
+}
+
+extern int compat_setup_rt_frame(int sig, struct k_sigaction *ka,
+				 siginfo_t *info, sigset_t *set,
+				 struct pt_regs *regs);
+
+/* Compat syscalls. */
+struct compat_sigaction;
+struct compat_siginfo;
+struct compat_sigaltstack;
+long compat_sys_execve(const char __user *path,
+		       compat_uptr_t __user *argv,
+		       compat_uptr_t __user *envp, struct pt_regs *);
+long compat_sys_rt_sigaction(int sig, struct compat_sigaction __user *act,
+			     struct compat_sigaction __user *oact,
+			     size_t sigsetsize);
+long compat_sys_rt_sigqueueinfo(int pid, int sig,
+				struct compat_siginfo __user *uinfo);
+long compat_sys_rt_sigreturn(struct pt_regs *);
+long compat_sys_sigaltstack(const struct compat_sigaltstack __user *uss_ptr,
+			    struct compat_sigaltstack __user *uoss_ptr,
+			    struct pt_regs *);
+long compat_sys_truncate64(char __user *filename, u32 dummy, u32 low, u32 high);
+long compat_sys_ftruncate64(unsigned int fd, u32 dummy, u32 low, u32 high);
+long compat_sys_pread64(unsigned int fd, char __user *ubuf, size_t count,
+			u32 dummy, u32 low, u32 high);
+long compat_sys_pwrite64(unsigned int fd, char __user *ubuf, size_t count,
+			 u32 dummy, u32 low, u32 high);
+long compat_sys_lookup_dcookie(u32 low, u32 high, char __user *buf, size_t len);
+long compat_sys_sync_file_range2(int fd, unsigned int flags,
+				 u32 offset_lo, u32 offset_hi,
+				 u32 nbytes_lo, u32 nbytes_hi);
+long compat_sys_fallocate(int fd, int mode,
+			  u32 offset_lo, u32 offset_hi,
+			  u32 len_lo, u32 len_hi);
+long compat_sys_sched_rr_get_interval(compat_pid_t pid,
+				      struct compat_timespec __user *interval);
+
+/* Tilera Linux syscalls that don't have "compat" versions. */
+#define compat_sys_flush_cache sys_flush_cache
+
+/* These are the intvec_64.S trampolines. */
+long _compat_sys_execve(const char __user *path,
+			const compat_uptr_t __user *argv,
+			const compat_uptr_t __user *envp);
+long _compat_sys_sigaltstack(const struct compat_sigaltstack __user *uss_ptr,
+			    struct compat_sigaltstack __user *uoss_ptr);
+long _compat_sys_rt_sigreturn(void);
+
+#endif /* _ASM_TILE_COMPAT_H */
diff --git a/arch/tile/include/asm/current.h b/arch/tile/include/asm/current.h
new file mode 100644
index 00000000..da21acf0
--- /dev/null
+++ b/arch/tile/include/asm/current.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_CURRENT_H
+#define _ASM_TILE_CURRENT_H
+
+#include <linux/thread_info.h>
+
+struct task_struct;
+
+static inline struct task_struct *get_current(void)
+{
+	return current_thread_info()->task;
+}
+#define current get_current()
+
+/* Return a usable "task_struct" pointer even if the real one is corrupt. */
+struct task_struct *validate_current(void);
+
+#endif /* _ASM_TILE_CURRENT_H */
diff --git a/arch/tile/include/asm/delay.h b/arch/tile/include/asm/delay.h
new file mode 100644
index 00000000..97b0e69e
--- /dev/null
+++ b/arch/tile/include/asm/delay.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_DELAY_H
+#define _ASM_TILE_DELAY_H
+
+/* Undefined functions to get compile-time errors. */
+extern void __bad_udelay(void);
+extern void __bad_ndelay(void);
+
+extern void __udelay(unsigned long usecs);
+extern void __ndelay(unsigned long nsecs);
+extern void __delay(unsigned long loops);
+
+#define udelay(n) (__builtin_constant_p(n) ? \
+	((n) > 20000 ? __bad_udelay() : __ndelay((n) * 1000)) : \
+	__udelay(n))
+
+#define ndelay(n) (__builtin_constant_p(n) ? \
+	((n) > 20000 ? __bad_ndelay() : __ndelay(n)) : \
+	__ndelay(n))
+
+#endif /* _ASM_TILE_DELAY_H */
diff --git a/arch/tile/include/asm/dma-mapping.h b/arch/tile/include/asm/dma-mapping.h
new file mode 100644
index 00000000..eaa06d17
--- /dev/null
+++ b/arch/tile/include/asm/dma-mapping.h
@@ -0,0 +1,94 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_DMA_MAPPING_H
+#define _ASM_TILE_DMA_MAPPING_H
+
+#include <linux/mm.h>
+#include <linux/scatterlist.h>
+#include <linux/cache.h>
+#include <linux/io.h>
+
+/*
+ * Note that on x86 and powerpc, there is a "struct dma_mapping_ops"
+ * that is used for all the DMA operations.  For now, we don't have an
+ * equivalent on tile, because we only have a single way of doing DMA.
+ * (Tilera bug 7994 to use dma_mapping_ops.)
+ */
+
+#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
+#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
+
+extern dma_addr_t dma_map_single(struct device *dev, void *ptr, size_t size,
+			  enum dma_data_direction);
+extern void dma_unmap_single(struct device *dev, dma_addr_t dma_addr,
+			     size_t size, enum dma_data_direction);
+extern int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
+	       enum dma_data_direction);
+extern void dma_unmap_sg(struct device *dev, struct scatterlist *sg,
+			 int nhwentries, enum dma_data_direction);
+extern dma_addr_t dma_map_page(struct device *dev, struct page *page,
+			       unsigned long offset, size_t size,
+			       enum dma_data_direction);
+extern void dma_unmap_page(struct device *dev, dma_addr_t dma_address,
+			   size_t size, enum dma_data_direction);
+extern void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
+				int nelems, enum dma_data_direction);
+extern void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
+				   int nelems, enum dma_data_direction);
+
+
+void *dma_alloc_coherent(struct device *dev, size_t size,
+			   dma_addr_t *dma_handle, gfp_t flag);
+
+void dma_free_coherent(struct device *dev, size_t size,
+			 void *vaddr, dma_addr_t dma_handle);
+
+extern void dma_sync_single_for_cpu(struct device *, dma_addr_t, size_t,
+				    enum dma_data_direction);
+extern void dma_sync_single_for_device(struct device *, dma_addr_t,
+				       size_t, enum dma_data_direction);
+extern void dma_sync_single_range_for_cpu(struct device *, dma_addr_t,
+					  unsigned long offset, size_t,
+					  enum dma_data_direction);
+extern void dma_sync_single_range_for_device(struct device *, dma_addr_t,
+					     unsigned long offset, size_t,
+					     enum dma_data_direction);
+extern void dma_cache_sync(struct device *dev, void *vaddr, size_t,
+			   enum dma_data_direction);
+
+static inline int
+dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
+{
+	return 0;
+}
+
+static inline int
+dma_supported(struct device *dev, u64 mask)
+{
+	return 1;
+}
+
+static inline int
+dma_set_mask(struct device *dev, u64 mask)
+{
+	if (!dev->dma_mask || !dma_supported(dev, mask))
+		return -EIO;
+
+	*dev->dma_mask = mask;
+
+	return 0;
+}
+
+#endif /* _ASM_TILE_DMA_MAPPING_H */
diff --git a/arch/tile/include/asm/dma.h b/arch/tile/include/asm/dma.h
new file mode 100644
index 00000000..12a7ca16
--- /dev/null
+++ b/arch/tile/include/asm/dma.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_DMA_H
+#define _ASM_TILE_DMA_H
+
+#include <asm-generic/dma.h>
+
+/* Needed by drivers/pci/quirks.c */
+#ifdef CONFIG_PCI
+extern int isa_dma_bridge_buggy;
+#endif
+
+#endif /* _ASM_TILE_DMA_H */
diff --git a/arch/tile/include/asm/edac.h b/arch/tile/include/asm/edac.h
new file mode 100644
index 00000000..87fc83ee
--- /dev/null
+++ b/arch/tile/include/asm/edac.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2011 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_EDAC_H
+#define _ASM_TILE_EDAC_H
+
+/* ECC atomic, DMA, SMP and interrupt safe scrub function */
+
+static inline void atomic_scrub(void *va, u32 size)
+{
+	/*
+	 * These is nothing to be done here because CE is
+	 * corrected by the mshim.
+	 */
+	return;
+}
+
+#endif /* _ASM_TILE_EDAC_H */
diff --git a/arch/tile/include/asm/elf.h b/arch/tile/include/asm/elf.h
new file mode 100644
index 00000000..623a6bb7
--- /dev/null
+++ b/arch/tile/include/asm/elf.h
@@ -0,0 +1,167 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_ELF_H
+#define _ASM_TILE_ELF_H
+
+/*
+ * ELF register definitions.
+ */
+
+#include <arch/chip.h>
+
+#include <linux/ptrace.h>
+#include <asm/byteorder.h>
+#include <asm/page.h>
+
+typedef unsigned long elf_greg_t;
+
+#define ELF_NGREG (sizeof(struct pt_regs) / sizeof(elf_greg_t))
+typedef elf_greg_t elf_gregset_t[ELF_NGREG];
+
+#define EM_TILE64  187
+#define EM_TILEPRO 188
+#define EM_TILEGX  191
+
+/* Provide a nominal data structure. */
+#define ELF_NFPREG	0
+typedef double elf_fpreg_t;
+typedef elf_fpreg_t elf_fpregset_t[ELF_NFPREG];
+
+#ifdef __tilegx__
+#define ELF_CLASS	ELFCLASS64
+#else
+#define ELF_CLASS	ELFCLASS32
+#endif
+#define ELF_DATA	ELFDATA2LSB
+
+/*
+ * There seems to be a bug in how compat_binfmt_elf.c works: it
+ * #undefs ELF_ARCH, but it is then used in binfmt_elf.c for fill_note_info().
+ * Hack around this by providing an enum value of ELF_ARCH.
+ */
+enum { ELF_ARCH = CHIP_ELF_TYPE() };
+#define ELF_ARCH ELF_ARCH
+
+/*
+ * This is used to ensure we don't load something for the wrong architecture.
+ */
+#define elf_check_arch(x)  \
+	((x)->e_ident[EI_CLASS] == ELF_CLASS && \
+	 (x)->e_machine == CHIP_ELF_TYPE())
+
+/* The module loader only handles a few relocation types. */
+#ifndef __tilegx__
+#define R_TILE_32                 1
+#define R_TILE_JOFFLONG_X1       15
+#define R_TILE_IMM16_X0_LO       25
+#define R_TILE_IMM16_X1_LO       26
+#define R_TILE_IMM16_X0_HA       29
+#define R_TILE_IMM16_X1_HA       30
+#else
+#define R_TILEGX_64                       1
+#define R_TILEGX_JUMPOFF_X1              21
+#define R_TILEGX_IMM16_X0_HW0            36
+#define R_TILEGX_IMM16_X1_HW0            37
+#define R_TILEGX_IMM16_X0_HW1            38
+#define R_TILEGX_IMM16_X1_HW1            39
+#define R_TILEGX_IMM16_X0_HW2_LAST       48
+#define R_TILEGX_IMM16_X1_HW2_LAST       49
+#endif
+
+/* Use standard page size for core dumps. */
+#define ELF_EXEC_PAGESIZE	PAGE_SIZE
+
+/*
+ * This is the location that an ET_DYN program is loaded if exec'ed.  Typical
+ * use of this is to invoke "./ld.so someprog" to test out a new version of
+ * the loader.  We need to make sure that it is out of the way of the program
+ * that it will "exec", and that there is sufficient room for the brk.
+ */
+#define ELF_ET_DYN_BASE         (TASK_SIZE / 3 * 2)
+
+#define ELF_CORE_COPY_REGS(_dest, _regs)			\
+	memcpy((char *) &_dest, (char *) _regs,			\
+	       sizeof(struct pt_regs));
+
+/* No additional FP registers to copy. */
+#define ELF_CORE_COPY_FPREGS(t, fpu) 0
+
+/*
+ * This yields a mask that user programs can use to figure out what
+ * instruction set this CPU supports.  This could be done in user space,
+ * but it's not easy, and we've already done it here.
+ */
+#define ELF_HWCAP	(0)
+
+/*
+ * This yields a string that ld.so will use to load implementation
+ * specific libraries for optimization.  This is more specific in
+ * intent than poking at uname or /proc/cpuinfo.
+ */
+#define ELF_PLATFORM  (NULL)
+
+extern void elf_plat_init(struct pt_regs *regs, unsigned long load_addr);
+
+#define ELF_PLAT_INIT(_r, load_addr) elf_plat_init(_r, load_addr)
+
+extern int dump_task_regs(struct task_struct *, elf_gregset_t *);
+#define ELF_CORE_COPY_TASK_REGS(tsk, elf_regs) dump_task_regs(tsk, elf_regs)
+
+/* Tilera Linux has no personalities currently, so no need to do anything. */
+#define SET_PERSONALITY(ex) do { } while (0)
+
+#define ARCH_HAS_SETUP_ADDITIONAL_PAGES
+/* Support auto-mapping of the user interrupt vectors. */
+struct linux_binprm;
+extern int arch_setup_additional_pages(struct linux_binprm *bprm,
+				       int executable_stack);
+#ifdef CONFIG_COMPAT
+
+#define COMPAT_ELF_PLATFORM "tilegx-m32"
+
+/*
+ * "Compat" binaries have the same machine type, but 32-bit class,
+ * since they're not a separate machine type, but just a 32-bit
+ * variant of the standard 64-bit architecture.
+ */
+#define compat_elf_check_arch(x)  \
+	((x)->e_ident[EI_CLASS] == ELFCLASS32 && \
+	 (x)->e_machine == CHIP_ELF_TYPE())
+
+#define compat_start_thread(regs, ip, usp) do { \
+		regs->pc = ptr_to_compat_reg((void *)(ip)); \
+		regs->sp = ptr_to_compat_reg((void *)(usp)); \
+	} while (0)
+
+/*
+ * Use SET_PERSONALITY to indicate compatibility via TS_COMPAT.
+ */
+#undef SET_PERSONALITY
+#define SET_PERSONALITY(ex) \
+do { \
+	current->personality = PER_LINUX; \
+	current_thread_info()->status &= ~TS_COMPAT; \
+} while (0)
+#define COMPAT_SET_PERSONALITY(ex) \
+do { \
+	current->personality = PER_LINUX_32BIT; \
+	current_thread_info()->status |= TS_COMPAT; \
+} while (0)
+
+#define COMPAT_ELF_ET_DYN_BASE (0xffffffff / 3 * 2)
+
+#endif /* CONFIG_COMPAT */
+
+#endif /* _ASM_TILE_ELF_H */
diff --git a/arch/tile/include/asm/exec.h b/arch/tile/include/asm/exec.h
new file mode 100644
index 00000000..a714e195
--- /dev/null
+++ b/arch/tile/include/asm/exec.h
@@ -0,0 +1,20 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_EXEC_H
+#define _ASM_TILE_EXEC_H
+
+#define arch_align_stack(x) (x)
+
+#endif /* _ASM_TILE_EXEC_H */
diff --git a/arch/tile/include/asm/fixmap.h b/arch/tile/include/asm/fixmap.h
new file mode 100644
index 00000000..c66f7933
--- /dev/null
+++ b/arch/tile/include/asm/fixmap.h
@@ -0,0 +1,118 @@
+/*
+ * Copyright (C) 1998 Ingo Molnar
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_FIXMAP_H
+#define _ASM_TILE_FIXMAP_H
+
+#include <asm/page.h>
+
+#ifndef __ASSEMBLY__
+#include <linux/kernel.h>
+#ifdef CONFIG_HIGHMEM
+#include <linux/threads.h>
+#include <asm/kmap_types.h>
+#endif
+
+#define __fix_to_virt(x)	(FIXADDR_TOP - ((x) << PAGE_SHIFT))
+#define __virt_to_fix(x)	((FIXADDR_TOP - ((x)&PAGE_MASK)) >> PAGE_SHIFT)
+
+/*
+ * Here we define all the compile-time 'special' virtual
+ * addresses. The point is to have a constant address at
+ * compile time, but to set the physical address only
+ * in the boot process. We allocate these special addresses
+ * from the end of supervisor virtual memory backwards.
+ * Also this lets us do fail-safe vmalloc(), we
+ * can guarantee that these special addresses and
+ * vmalloc()-ed addresses never overlap.
+ *
+ * these 'compile-time allocated' memory buffers are
+ * fixed-size 4k pages. (or larger if used with an increment
+ * higher than 1) use fixmap_set(idx,phys) to associate
+ * physical memory with fixmap indices.
+ *
+ * TLB entries of such buffers will not be flushed across
+ * task switches.
+ *
+ * We don't bother with a FIX_HOLE since above the fixmaps
+ * is unmapped memory in any case.
+ */
+enum fixed_addresses {
+#ifdef CONFIG_HIGHMEM
+	FIX_KMAP_BEGIN,	/* reserved pte's for temporary kernel mappings */
+	FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,
+#endif
+	__end_of_permanent_fixed_addresses,
+
+	/*
+	 * Temporary boot-time mappings, used before ioremap() is functional.
+	 * Not currently needed by the Tile architecture.
+	 */
+#define NR_FIX_BTMAPS	0
+#if NR_FIX_BTMAPS
+	FIX_BTMAP_END = __end_of_permanent_fixed_addresses,
+	FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS - 1,
+	__end_of_fixed_addresses
+#else
+	__end_of_fixed_addresses = __end_of_permanent_fixed_addresses
+#endif
+};
+
+extern void __set_fixmap(enum fixed_addresses idx,
+			 unsigned long phys, pgprot_t flags);
+
+#define set_fixmap(idx, phys) \
+		__set_fixmap(idx, phys, PAGE_KERNEL)
+#define clear_fixmap(idx) \
+		__set_fixmap(idx, 0, __pgprot(0))
+
+#define __FIXADDR_SIZE	(__end_of_permanent_fixed_addresses << PAGE_SHIFT)
+#define __FIXADDR_BOOT_SIZE	(__end_of_fixed_addresses << PAGE_SHIFT)
+#define FIXADDR_START		(FIXADDR_TOP + PAGE_SIZE - __FIXADDR_SIZE)
+#define FIXADDR_BOOT_START	(FIXADDR_TOP + PAGE_SIZE - __FIXADDR_BOOT_SIZE)
+
+extern void __this_fixmap_does_not_exist(void);
+
+/*
+ * 'index to address' translation. If anyone tries to use the idx
+ * directly without tranlation, we catch the bug with a NULL-deference
+ * kernel oops. Illegal ranges of incoming indices are caught too.
+ */
+static __always_inline unsigned long fix_to_virt(const unsigned int idx)
+{
+	/*
+	 * this branch gets completely eliminated after inlining,
+	 * except when someone tries to use fixaddr indices in an
+	 * illegal way. (such as mixing up address types or using
+	 * out-of-range indices).
+	 *
+	 * If it doesn't get removed, the linker will complain
+	 * loudly with a reasonably clear error message..
+	 */
+	if (idx >= __end_of_fixed_addresses)
+		__this_fixmap_does_not_exist();
+
+	return __fix_to_virt(idx);
+}
+
+static inline unsigned long virt_to_fix(const unsigned long vaddr)
+{
+	BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START);
+	return __virt_to_fix(vaddr);
+}
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* _ASM_TILE_FIXMAP_H */
diff --git a/arch/tile/include/asm/ftrace.h b/arch/tile/include/asm/ftrace.h
new file mode 100644
index 00000000..461459b0
--- /dev/null
+++ b/arch/tile/include/asm/ftrace.h
@@ -0,0 +1,20 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_FTRACE_H
+#define _ASM_TILE_FTRACE_H
+
+/* empty */
+
+#endif /* _ASM_TILE_FTRACE_H */
diff --git a/arch/tile/include/asm/futex.h b/arch/tile/include/asm/futex.h
new file mode 100644
index 00000000..d03ec124
--- /dev/null
+++ b/arch/tile/include/asm/futex.h
@@ -0,0 +1,142 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ *
+ * These routines make two important assumptions:
+ *
+ * 1. atomic_t is really an int and can be freely cast back and forth
+ *    (validated in __init_atomic_per_cpu).
+ *
+ * 2. userspace uses sys_cmpxchg() for all atomic operations, thus using
+ *    the same locking convention that all the kernel atomic routines use.
+ */
+
+#ifndef _ASM_TILE_FUTEX_H
+#define _ASM_TILE_FUTEX_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/futex.h>
+#include <linux/uaccess.h>
+#include <linux/errno.h>
+
+extern struct __get_user futex_set(u32 __user *v, int i);
+extern struct __get_user futex_add(u32 __user *v, int n);
+extern struct __get_user futex_or(u32 __user *v, int n);
+extern struct __get_user futex_andn(u32 __user *v, int n);
+extern struct __get_user futex_cmpxchg(u32 __user *v, int o, int n);
+
+#ifndef __tilegx__
+extern struct __get_user futex_xor(u32 __user *v, int n);
+#else
+static inline struct __get_user futex_xor(u32 __user *uaddr, int n)
+{
+	struct __get_user asm_ret = __get_user_4(uaddr);
+	if (!asm_ret.err) {
+		int oldval, newval;
+		do {
+			oldval = asm_ret.val;
+			newval = oldval ^ n;
+			asm_ret = futex_cmpxchg(uaddr, oldval, newval);
+		} while (asm_ret.err == 0 && oldval != asm_ret.val);
+	}
+	return asm_ret;
+}
+#endif
+
+static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
+{
+	int op = (encoded_op >> 28) & 7;
+	int cmp = (encoded_op >> 24) & 15;
+	int oparg = (encoded_op << 8) >> 20;
+	int cmparg = (encoded_op << 20) >> 20;
+	int ret;
+	struct __get_user asm_ret;
+
+	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
+		oparg = 1 << oparg;
+
+	if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
+		return -EFAULT;
+
+	pagefault_disable();
+	switch (op) {
+	case FUTEX_OP_SET:
+		asm_ret = futex_set(uaddr, oparg);
+		break;
+	case FUTEX_OP_ADD:
+		asm_ret = futex_add(uaddr, oparg);
+		break;
+	case FUTEX_OP_OR:
+		asm_ret = futex_or(uaddr, oparg);
+		break;
+	case FUTEX_OP_ANDN:
+		asm_ret = futex_andn(uaddr, oparg);
+		break;
+	case FUTEX_OP_XOR:
+		asm_ret = futex_xor(uaddr, oparg);
+		break;
+	default:
+		asm_ret.err = -ENOSYS;
+	}
+	pagefault_enable();
+
+	ret = asm_ret.err;
+
+	if (!ret) {
+		switch (cmp) {
+		case FUTEX_OP_CMP_EQ:
+			ret = (asm_ret.val == cmparg);
+			break;
+		case FUTEX_OP_CMP_NE:
+			ret = (asm_ret.val != cmparg);
+			break;
+		case FUTEX_OP_CMP_LT:
+			ret = (asm_ret.val < cmparg);
+			break;
+		case FUTEX_OP_CMP_GE:
+			ret = (asm_ret.val >= cmparg);
+			break;
+		case FUTEX_OP_CMP_LE:
+			ret = (asm_ret.val <= cmparg);
+			break;
+		case FUTEX_OP_CMP_GT:
+			ret = (asm_ret.val > cmparg);
+			break;
+		default:
+			ret = -ENOSYS;
+		}
+	}
+	return ret;
+}
+
+static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
+						u32 oldval, u32 newval)
+{
+	struct __get_user asm_ret;
+
+	if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
+		return -EFAULT;
+
+	asm_ret = futex_cmpxchg(uaddr, oldval, newval);
+	*uval = asm_ret.val;
+	return asm_ret.err;
+}
+
+#ifndef __tilegx__
+/* Return failure from the atomic wrappers. */
+struct __get_user __atomic_bad_address(int __user *addr);
+#endif
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* _ASM_TILE_FUTEX_H */
diff --git a/arch/tile/include/asm/hardirq.h b/arch/tile/include/asm/hardirq.h
new file mode 100644
index 00000000..822390f9
--- /dev/null
+++ b/arch/tile/include/asm/hardirq.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_HARDIRQ_H
+#define _ASM_TILE_HARDIRQ_H
+
+#include <linux/threads.h>
+#include <linux/cache.h>
+
+#include <asm/irq.h>
+
+typedef struct {
+	unsigned int __softirq_pending;
+	long idle_timestamp;
+
+	/* Hard interrupt statistics. */
+	unsigned int irq_timer_count;
+	unsigned int irq_syscall_count;
+	unsigned int irq_resched_count;
+	unsigned int irq_hv_flush_count;
+	unsigned int irq_call_count;
+	unsigned int irq_hv_msg_count;
+	unsigned int irq_dev_intr_count;
+
+} ____cacheline_aligned irq_cpustat_t;
+
+DECLARE_PER_CPU(irq_cpustat_t, irq_stat);
+
+#define __ARCH_IRQ_STAT
+#define __IRQ_STAT(cpu, member) (per_cpu(irq_stat, cpu).member)
+
+#include <linux/irq_cpustat.h>	/* Standard mappings for irq_cpustat_t above */
+
+#define HARDIRQ_BITS	8
+
+#endif /* _ASM_TILE_HARDIRQ_H */
diff --git a/arch/tile/include/asm/hardwall.h b/arch/tile/include/asm/hardwall.h
new file mode 100644
index 00000000..2ac42284
--- /dev/null
+++ b/arch/tile/include/asm/hardwall.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ *
+ * Provide methods for the HARDWALL_FILE for accessing the UDN.
+ */
+
+#ifndef _ASM_TILE_HARDWALL_H
+#define _ASM_TILE_HARDWALL_H
+
+#include <linux/ioctl.h>
+
+#define HARDWALL_IOCTL_BASE 0xa2
+
+/*
+ * The HARDWALL_CREATE() ioctl is a macro with a "size" argument.
+ * The resulting ioctl value is passed to the kernel in conjunction
+ * with a pointer to a little-endian bitmask of cpus, which must be
+ * physically in a rectangular configuration on the chip.
+ * The "size" is the number of bytes of cpu mask data.
+ */
+#define _HARDWALL_CREATE 1
+#define HARDWALL_CREATE(size) \
+  _IOC(_IOC_READ, HARDWALL_IOCTL_BASE, _HARDWALL_CREATE, (size))
+
+#define _HARDWALL_ACTIVATE 2
+#define HARDWALL_ACTIVATE \
+  _IO(HARDWALL_IOCTL_BASE, _HARDWALL_ACTIVATE)
+
+#define _HARDWALL_DEACTIVATE 3
+#define HARDWALL_DEACTIVATE \
+ _IO(HARDWALL_IOCTL_BASE, _HARDWALL_DEACTIVATE)
+
+#define _HARDWALL_GET_ID 4
+#define HARDWALL_GET_ID \
+ _IO(HARDWALL_IOCTL_BASE, _HARDWALL_GET_ID)
+
+#ifndef __KERNEL__
+
+/* This is the canonical name expected by userspace. */
+#define HARDWALL_FILE "/dev/hardwall"
+
+#else
+
+/* /proc hooks for hardwall. */
+struct proc_dir_entry;
+#ifdef CONFIG_HARDWALL
+void proc_tile_hardwall_init(struct proc_dir_entry *root);
+int proc_pid_hardwall(struct task_struct *task, char *buffer);
+#else
+static inline void proc_tile_hardwall_init(struct proc_dir_entry *root) {}
+#endif
+
+#endif
+
+#endif /* _ASM_TILE_HARDWALL_H */
diff --git a/arch/tile/include/asm/highmem.h b/arch/tile/include/asm/highmem.h
new file mode 100644
index 00000000..fc8429a3
--- /dev/null
+++ b/arch/tile/include/asm/highmem.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (C) 1999 Gerhard Wichert, Siemens AG
+ *                   Gerhard.Wichert@pdb.siemens.de
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ *
+ * Used in CONFIG_HIGHMEM systems for memory pages which
+ * are not addressable by direct kernel virtual addresses.
+ *
+ */
+
+#ifndef _ASM_TILE_HIGHMEM_H
+#define _ASM_TILE_HIGHMEM_H
+
+#include <linux/interrupt.h>
+#include <linux/threads.h>
+#include <asm/tlbflush.h>
+#include <asm/homecache.h>
+
+/* declarations for highmem.c */
+extern unsigned long highstart_pfn, highend_pfn;
+
+extern pte_t *pkmap_page_table;
+
+/*
+ * Ordering is:
+ *
+ * FIXADDR_TOP
+ *			fixed_addresses
+ * FIXADDR_START
+ *			temp fixed addresses
+ * FIXADDR_BOOT_START
+ *			Persistent kmap area
+ * PKMAP_BASE
+ * VMALLOC_END
+ *			Vmalloc area
+ * VMALLOC_START
+ * high_memory
+ */
+#define LAST_PKMAP_MASK (LAST_PKMAP-1)
+#define PKMAP_NR(virt)  ((virt-PKMAP_BASE) >> PAGE_SHIFT)
+#define PKMAP_ADDR(nr)  (PKMAP_BASE + ((nr) << PAGE_SHIFT))
+
+void *kmap_high(struct page *page);
+void kunmap_high(struct page *page);
+void *kmap(struct page *page);
+void kunmap(struct page *page);
+void *kmap_fix_kpte(struct page *page, int finished);
+
+/* This macro is used only in map_new_virtual() to map "page". */
+#define kmap_prot page_to_kpgprot(page)
+
+void *kmap_atomic(struct page *page);
+void __kunmap_atomic(void *kvaddr);
+void *kmap_atomic_pfn(unsigned long pfn);
+void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot);
+struct page *kmap_atomic_to_page(void *ptr);
+void *kmap_atomic_prot(struct page *page, pgprot_t prot);
+void kmap_atomic_fix_kpte(struct page *page, int finished);
+
+#define flush_cache_kmaps()	do { } while (0)
+
+#endif /* _ASM_TILE_HIGHMEM_H */
diff --git a/arch/tile/include/asm/homecache.h b/arch/tile/include/asm/homecache.h
new file mode 100644
index 00000000..a8243865
--- /dev/null
+++ b/arch/tile/include/asm/homecache.h
@@ -0,0 +1,125 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ *
+ * Handle issues around the Tile "home cache" model of coherence.
+ */
+
+#ifndef _ASM_TILE_HOMECACHE_H
+#define _ASM_TILE_HOMECACHE_H
+
+#include <asm/page.h>
+#include <linux/cpumask.h>
+
+struct page;
+struct task_struct;
+struct vm_area_struct;
+struct zone;
+
+/*
+ * Coherence point for the page is its memory controller.
+ * It is not present in any cache (L1 or L2).
+ */
+#define PAGE_HOME_UNCACHED -1
+
+/*
+ * Is this page immutable (unwritable) and thus able to be cached more
+ * widely than would otherwise be possible?  On tile64 this means we
+ * mark the PTE to cache locally; on tilepro it means we have "nc" set.
+ */
+#define PAGE_HOME_IMMUTABLE -2
+
+/*
+ * Each cpu considers its own cache to be the home for the page,
+ * which makes it incoherent.
+ */
+#define PAGE_HOME_INCOHERENT -3
+
+#if CHIP_HAS_CBOX_HOME_MAP()
+/* Home for the page is distributed via hash-for-home. */
+#define PAGE_HOME_HASH -4
+#endif
+
+/* Homing is unknown or unspecified.  Not valid for page_home(). */
+#define PAGE_HOME_UNKNOWN -5
+
+/* Home on the current cpu.  Not valid for page_home(). */
+#define PAGE_HOME_HERE -6
+
+/* Support wrapper to use instead of explicit hv_flush_remote(). */
+extern void flush_remote(unsigned long cache_pfn, unsigned long cache_length,
+			 const struct cpumask *cache_cpumask,
+			 HV_VirtAddr tlb_va, unsigned long tlb_length,
+			 unsigned long tlb_pgsize,
+			 const struct cpumask *tlb_cpumask,
+			 HV_Remote_ASID *asids, int asidcount);
+
+/* Set homing-related bits in a PTE (can also pass a pgprot_t). */
+extern pte_t pte_set_home(pte_t pte, int home);
+
+/* Do a cache eviction on the specified cpus. */
+extern void homecache_evict(const struct cpumask *mask);
+
+/*
+ * Change a kernel page's homecache.  It must not be mapped in user space.
+ * If !CONFIG_HOMECACHE, only usable on LOWMEM, and can only be called when
+ * no other cpu can reference the page, and causes a full-chip cache/TLB flush.
+ */
+extern void homecache_change_page_home(struct page *, int order, int home);
+
+/*
+ * Flush a page out of whatever cache(s) it is in.
+ * This is more than just finv, since it properly handles waiting
+ * for the data to reach memory on tilepro, but it can be quite
+ * heavyweight, particularly on hash-for-home memory.
+ */
+extern void homecache_flush_cache(struct page *, int order);
+
+/*
+ * Allocate a page with the given GFP flags, home, and optionally
+ * node.  These routines are actually just wrappers around the normal
+ * alloc_pages() / alloc_pages_node() functions, which set and clear
+ * a per-cpu variable to communicate with homecache_new_kernel_page().
+ * If !CONFIG_HOMECACHE, uses homecache_change_page_home().
+ */
+extern struct page *homecache_alloc_pages(gfp_t gfp_mask,
+					  unsigned int order, int home);
+extern struct page *homecache_alloc_pages_node(int nid, gfp_t gfp_mask,
+					       unsigned int order, int home);
+#define homecache_alloc_page(gfp_mask, home) \
+  homecache_alloc_pages(gfp_mask, 0, home)
+
+/*
+ * These routines are just pass-throughs to free_pages() when
+ * we support full homecaching.  If !CONFIG_HOMECACHE, then these
+ * routines use homecache_change_page_home() to reset the home
+ * back to the default before returning the page to the allocator.
+ */
+void homecache_free_pages(unsigned long addr, unsigned int order);
+#define homecache_free_page(page) \
+  homecache_free_pages((page), 0)
+
+
+
+/*
+ * Report the page home for LOWMEM pages by examining their kernel PTE,
+ * or for highmem pages as the default home.
+ */
+extern int page_home(struct page *);
+
+#define homecache_migrate_kthread() do {} while (0)
+
+#define homecache_kpte_lock() 0
+#define homecache_kpte_unlock(flags) do {} while (0)
+
+
+#endif /* _ASM_TILE_HOMECACHE_H */
diff --git a/arch/tile/include/asm/hugetlb.h b/arch/tile/include/asm/hugetlb.h
new file mode 100644
index 00000000..d396d180
--- /dev/null
+++ b/arch/tile/include/asm/hugetlb.h
@@ -0,0 +1,109 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_HUGETLB_H
+#define _ASM_TILE_HUGETLB_H
+
+#include <asm/page.h>
+
+
+static inline int is_hugepage_only_range(struct mm_struct *mm,
+					 unsigned long addr,
+					 unsigned long len) {
+	return 0;
+}
+
+/*
+ * If the arch doesn't supply something else, assume that hugepage
+ * size aligned regions are ok without further preparation.
+ */
+static inline int prepare_hugepage_range(struct file *file,
+					 unsigned long addr, unsigned long len)
+{
+	struct hstate *h = hstate_file(file);
+	if (len & ~huge_page_mask(h))
+		return -EINVAL;
+	if (addr & ~huge_page_mask(h))
+		return -EINVAL;
+	return 0;
+}
+
+static inline void hugetlb_prefault_arch_hook(struct mm_struct *mm)
+{
+}
+
+static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb,
+					  unsigned long addr, unsigned long end,
+					  unsigned long floor,
+					  unsigned long ceiling)
+{
+	free_pgd_range(tlb, addr, end, floor, ceiling);
+}
+
+static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
+				   pte_t *ptep, pte_t pte)
+{
+	set_pte(ptep, pte);
+}
+
+static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
+					    unsigned long addr, pte_t *ptep)
+{
+	return ptep_get_and_clear(mm, addr, ptep);
+}
+
+static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
+					 unsigned long addr, pte_t *ptep)
+{
+	ptep_clear_flush(vma, addr, ptep);
+}
+
+static inline int huge_pte_none(pte_t pte)
+{
+	return pte_none(pte);
+}
+
+static inline pte_t huge_pte_wrprotect(pte_t pte)
+{
+	return pte_wrprotect(pte);
+}
+
+static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
+					   unsigned long addr, pte_t *ptep)
+{
+	ptep_set_wrprotect(mm, addr, ptep);
+}
+
+static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma,
+					     unsigned long addr, pte_t *ptep,
+					     pte_t pte, int dirty)
+{
+	return ptep_set_access_flags(vma, addr, ptep, pte, dirty);
+}
+
+static inline pte_t huge_ptep_get(pte_t *ptep)
+{
+	return *ptep;
+}
+
+static inline int arch_prepare_hugepage(struct page *page)
+{
+	return 0;
+}
+
+static inline void arch_release_hugepage(struct page *page)
+{
+}
+
+#endif /* _ASM_TILE_HUGETLB_H */
diff --git a/arch/tile/include/asm/hv_driver.h b/arch/tile/include/asm/hv_driver.h
new file mode 100644
index 00000000..ad614de8
--- /dev/null
+++ b/arch/tile/include/asm/hv_driver.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ *
+ * This header defines a wrapper interface for managing hypervisor
+ * device calls that will result in an interrupt at some later time.
+ * In particular, this provides wrappers for hv_preada() and
+ * hv_pwritea().
+ */
+
+#ifndef _ASM_TILE_HV_DRIVER_H
+#define _ASM_TILE_HV_DRIVER_H
+
+#include <hv/hypervisor.h>
+
+struct hv_driver_cb;
+
+/* A callback to be invoked when an operation completes. */
+typedef void hv_driver_callback_t(struct hv_driver_cb *cb, __hv32 result);
+
+/*
+ * A structure to hold information about an outstanding call.
+ * The driver must allocate a separate structure for each call.
+ */
+struct hv_driver_cb {
+	hv_driver_callback_t *callback;  /* Function to call on interrupt. */
+	void *dev;                       /* Driver-specific state variable. */
+};
+
+/* Wrapper for invoking hv_dev_preada(). */
+static inline int
+tile_hv_dev_preada(int devhdl, __hv32 flags, __hv32 sgl_len,
+		   HV_SGL sgl[/* sgl_len */], __hv64 offset,
+		   struct hv_driver_cb *callback)
+{
+	return hv_dev_preada(devhdl, flags, sgl_len, sgl,
+			     offset, (HV_IntArg)callback);
+}
+
+/* Wrapper for invoking hv_dev_pwritea(). */
+static inline int
+tile_hv_dev_pwritea(int devhdl, __hv32 flags, __hv32 sgl_len,
+		    HV_SGL sgl[/* sgl_len */], __hv64 offset,
+		    struct hv_driver_cb *callback)
+{
+	return hv_dev_pwritea(devhdl, flags, sgl_len, sgl,
+			      offset, (HV_IntArg)callback);
+}
+
+
+#endif /* _ASM_TILE_HV_DRIVER_H */
diff --git a/arch/tile/include/asm/hw_irq.h b/arch/tile/include/asm/hw_irq.h
new file mode 100644
index 00000000..4fac5fbf
--- /dev/null
+++ b/arch/tile/include/asm/hw_irq.h
@@ -0,0 +1,18 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_HW_IRQ_H
+#define _ASM_TILE_HW_IRQ_H
+
+#endif /* _ASM_TILE_HW_IRQ_H */
diff --git a/arch/tile/include/asm/ide.h b/arch/tile/include/asm/ide.h
new file mode 100644
index 00000000..3c6f2ed8
--- /dev/null
+++ b/arch/tile/include/asm/ide.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_IDE_H
+#define _ASM_TILE_IDE_H
+
+/* For IDE on PCI */
+#define MAX_HWIFS       10
+
+#define ide_default_io_ctl(base)	(0)
+
+#include <asm-generic/ide_iops.h>
+
+#endif /* _ASM_TILE_IDE_H */
diff --git a/arch/tile/include/asm/io.h b/arch/tile/include/asm/io.h
new file mode 100644
index 00000000..d2152deb
--- /dev/null
+++ b/arch/tile/include/asm/io.h
@@ -0,0 +1,305 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_IO_H
+#define _ASM_TILE_IO_H
+
+#include <linux/kernel.h>
+#include <linux/bug.h>
+#include <asm/page.h>
+
+#define IO_SPACE_LIMIT 0xfffffffful
+
+/*
+ * Convert a physical pointer to a virtual kernel pointer for /dev/mem
+ * access.
+ */
+#define xlate_dev_mem_ptr(p)	__va(p)
+
+/*
+ * Convert a virtual cached pointer to an uncached pointer.
+ */
+#define xlate_dev_kmem_ptr(p)	p
+
+/*
+ * Change "struct page" to physical address.
+ */
+#define page_to_phys(page)    ((dma_addr_t)page_to_pfn(page) << PAGE_SHIFT)
+
+/*
+ * Some places try to pass in an loff_t for PHYSADDR (?!), so we cast it to
+ * long before casting it to a pointer to avoid compiler warnings.
+ */
+#if CHIP_HAS_MMIO()
+extern void __iomem *ioremap(resource_size_t offset, unsigned long size);
+extern void __iomem *ioremap_prot(resource_size_t offset, unsigned long size,
+	pgprot_t pgprot);
+extern void iounmap(volatile void __iomem *addr);
+#else
+#define ioremap(physaddr, size)	((void __iomem *)(unsigned long)(physaddr))
+#define iounmap(addr)		((void)0)
+#endif
+
+#define ioremap_nocache(physaddr, size)		ioremap(physaddr, size)
+#define ioremap_wc(physaddr, size)		ioremap(physaddr, size)
+#define ioremap_writethrough(physaddr, size)	ioremap(physaddr, size)
+#define ioremap_fullcache(physaddr, size)	ioremap(physaddr, size)
+
+#define mmiowb()
+
+/* Conversion between virtual and physical mappings.  */
+#define mm_ptov(addr)		((void *)phys_to_virt(addr))
+#define mm_vtop(addr)		((unsigned long)virt_to_phys(addr))
+
+#ifdef CONFIG_PCI
+
+extern u8 _tile_readb(unsigned long addr);
+extern u16 _tile_readw(unsigned long addr);
+extern u32 _tile_readl(unsigned long addr);
+extern u64 _tile_readq(unsigned long addr);
+extern void _tile_writeb(u8  val, unsigned long addr);
+extern void _tile_writew(u16 val, unsigned long addr);
+extern void _tile_writel(u32 val, unsigned long addr);
+extern void _tile_writeq(u64 val, unsigned long addr);
+
+#else
+
+/*
+ * The Tile architecture does not support IOMEM unless PCI is enabled.
+ * Unfortunately we can't yet simply not declare these methods,
+ * since some generic code that compiles into the kernel, but
+ * we never run, uses them unconditionally.
+ */
+
+static inline int iomem_panic(void)
+{
+	panic("readb/writeb and friends do not exist on tile without PCI");
+	return 0;
+}
+
+static inline u8 _tile_readb(unsigned long addr)
+{
+	return iomem_panic();
+}
+
+static inline u16 _tile_readw(unsigned long addr)
+{
+	return iomem_panic();
+}
+
+static inline u32 _tile_readl(unsigned long addr)
+{
+	return iomem_panic();
+}
+
+static inline u64 _tile_readq(unsigned long addr)
+{
+	return iomem_panic();
+}
+
+static inline void _tile_writeb(u8  val, unsigned long addr)
+{
+	iomem_panic();
+}
+
+static inline void _tile_writew(u16 val, unsigned long addr)
+{
+	iomem_panic();
+}
+
+static inline void _tile_writel(u32 val, unsigned long addr)
+{
+	iomem_panic();
+}
+
+static inline void _tile_writeq(u64 val, unsigned long addr)
+{
+	iomem_panic();
+}
+
+#endif
+
+#define readb(addr) _tile_readb((unsigned long)addr)
+#define readw(addr) _tile_readw((unsigned long)addr)
+#define readl(addr) _tile_readl((unsigned long)addr)
+#define readq(addr) _tile_readq((unsigned long)addr)
+#define writeb(val, addr) _tile_writeb(val, (unsigned long)addr)
+#define writew(val, addr) _tile_writew(val, (unsigned long)addr)
+#define writel(val, addr) _tile_writel(val, (unsigned long)addr)
+#define writeq(val, addr) _tile_writeq(val, (unsigned long)addr)
+
+#define __raw_readb readb
+#define __raw_readw readw
+#define __raw_readl readl
+#define __raw_readq readq
+#define __raw_writeb writeb
+#define __raw_writew writew
+#define __raw_writel writel
+#define __raw_writeq writeq
+
+#define readb_relaxed readb
+#define readw_relaxed readw
+#define readl_relaxed readl
+#define readq_relaxed readq
+
+#define ioread8 readb
+#define ioread16 readw
+#define ioread32 readl
+#define ioread64 readq
+#define iowrite8 writeb
+#define iowrite16 writew
+#define iowrite32 writel
+#define iowrite64 writeq
+
+static inline void memset_io(void *dst, int val, size_t len)
+{
+	int x;
+	BUG_ON((unsigned long)dst & 0x3);
+	val = (val & 0xff) * 0x01010101;
+	for (x = 0; x < len; x += 4)
+		writel(val, dst + x);
+}
+
+static inline void memcpy_fromio(void *dst, const volatile void __iomem *src,
+				 size_t len)
+{
+	int x;
+	BUG_ON((unsigned long)src & 0x3);
+	for (x = 0; x < len; x += 4)
+		*(u32 *)(dst + x) = readl(src + x);
+}
+
+static inline void memcpy_toio(volatile void __iomem *dst, const void *src,
+				size_t len)
+{
+	int x;
+	BUG_ON((unsigned long)dst & 0x3);
+	for (x = 0; x < len; x += 4)
+		writel(*(u32 *)(src + x), dst + x);
+}
+
+/*
+ * The Tile architecture does not support IOPORT, even with PCI.
+ * Unfortunately we can't yet simply not declare these methods,
+ * since some generic code that compiles into the kernel, but
+ * we never run, uses them unconditionally.
+ */
+
+static inline long ioport_panic(void)
+{
+	panic("inb/outb and friends do not exist on tile");
+	return 0;
+}
+
+static inline void __iomem *ioport_map(unsigned long port, unsigned int len)
+{
+	pr_info("ioport_map: mapping IO resources is unsupported on tile.\n");
+	return NULL;
+}
+
+static inline void ioport_unmap(void __iomem *addr)
+{
+	ioport_panic();
+}
+
+static inline u8 inb(unsigned long addr)
+{
+	return ioport_panic();
+}
+
+static inline u16 inw(unsigned long addr)
+{
+	return ioport_panic();
+}
+
+static inline u32 inl(unsigned long addr)
+{
+	return ioport_panic();
+}
+
+static inline void outb(u8 b, unsigned long addr)
+{
+	ioport_panic();
+}
+
+static inline void outw(u16 b, unsigned long addr)
+{
+	ioport_panic();
+}
+
+static inline void outl(u32 b, unsigned long addr)
+{
+	ioport_panic();
+}
+
+#define inb_p(addr)	inb(addr)
+#define inw_p(addr)	inw(addr)
+#define inl_p(addr)	inl(addr)
+#define outb_p(x, addr)	outb((x), (addr))
+#define outw_p(x, addr)	outw((x), (addr))
+#define outl_p(x, addr)	outl((x), (addr))
+
+static inline void insb(unsigned long addr, void *buffer, int count)
+{
+	ioport_panic();
+}
+
+static inline void insw(unsigned long addr, void *buffer, int count)
+{
+	ioport_panic();
+}
+
+static inline void insl(unsigned long addr, void *buffer, int count)
+{
+	ioport_panic();
+}
+
+static inline void outsb(unsigned long addr, const void *buffer, int count)
+{
+	ioport_panic();
+}
+
+static inline void outsw(unsigned long addr, const void *buffer, int count)
+{
+	ioport_panic();
+}
+
+static inline void outsl(unsigned long addr, const void *buffer, int count)
+{
+	ioport_panic();
+}
+
+#define ioread16be(addr)	be16_to_cpu(ioread16(addr))
+#define ioread32be(addr)	be32_to_cpu(ioread32(addr))
+#define iowrite16be(v, addr)	iowrite16(be16_to_cpu(v), (addr))
+#define iowrite32be(v, addr)	iowrite32(be32_to_cpu(v), (addr))
+
+#define ioread8_rep(p, dst, count) \
+	insb((unsigned long) (p), (dst), (count))
+#define ioread16_rep(p, dst, count) \
+	insw((unsigned long) (p), (dst), (count))
+#define ioread32_rep(p, dst, count) \
+	insl((unsigned long) (p), (dst), (count))
+
+#define iowrite8_rep(p, src, count) \
+	outsb((unsigned long) (p), (src), (count))
+#define iowrite16_rep(p, src, count) \
+	outsw((unsigned long) (p), (src), (count))
+#define iowrite32_rep(p, src, count) \
+	outsl((unsigned long) (p), (src), (count))
+
+#define virt_to_bus     virt_to_phys
+#define bus_to_virt     phys_to_virt
+
+#endif /* _ASM_TILE_IO_H */
diff --git a/arch/tile/include/asm/irq.h b/arch/tile/include/asm/irq.h
new file mode 100644
index 00000000..33cff9a3
--- /dev/null
+++ b/arch/tile/include/asm/irq.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_IRQ_H
+#define _ASM_TILE_IRQ_H
+
+#include <linux/hardirq.h>
+
+/* The hypervisor interface provides 32 IRQs. */
+#define NR_IRQS 32
+
+/* IRQ numbers used for linux IPIs. */
+#define IRQ_RESCHEDULE 0
+
+#define irq_canonicalize(irq)   (irq)
+
+void ack_bad_irq(unsigned int irq);
+
+/*
+ * Different ways of handling interrupts.  Tile interrupts are always
+ * per-cpu; there is no global interrupt controller to implement
+ * enable/disable.  Most onboard devices can send their interrupts to
+ * many tiles at the same time, and Tile-specific drivers know how to
+ * deal with this.
+ *
+ * However, generic devices (usually PCIE based, sometimes GPIO)
+ * expect that interrupts will fire on a single core at a time and
+ * that the irq can be enabled or disabled from any core at any time.
+ * We implement this by directing such interrupts to a single core.
+ *
+ * One added wrinkle is that PCI interrupts can be either
+ * hardware-cleared (legacy interrupts) or software cleared (MSI).
+ * Other generic device systems (GPIO) are always software-cleared.
+ *
+ * The enums below are used by drivers for onboard devices, including
+ * the internals of PCI root complex and GPIO.  They allow the driver
+ * to tell the generic irq code what kind of interrupt is mapped to a
+ * particular IRQ number.
+ */
+enum {
+	/* per-cpu interrupt; use enable/disable_percpu_irq() to mask */
+	TILE_IRQ_PERCPU,
+	/* global interrupt, hardware responsible for clearing. */
+	TILE_IRQ_HW_CLEAR,
+	/* global interrupt, software responsible for clearing. */
+	TILE_IRQ_SW_CLEAR,
+};
+
+
+/*
+ * Paravirtualized drivers should call this when they dynamically
+ * allocate a new IRQ or discover an IRQ that was pre-allocated by the
+ * hypervisor for use with their particular device.  This gives the
+ * IRQ subsystem an opportunity to do interrupt-type-specific
+ * initialization.
+ *
+ * ISSUE: We should modify this API so that registering anything
+ * except percpu interrupts also requires providing callback methods
+ * for enabling and disabling the interrupt.  This would allow the
+ * generic IRQ code to proxy enable/disable_irq() calls back into the
+ * PCI subsystem, which in turn could enable or disable the interrupt
+ * at the PCI shim.
+ */
+void tile_irq_activate(unsigned int irq, int tile_irq_type);
+
+void setup_irq_regs(void);
+
+#endif /* _ASM_TILE_IRQ_H */
diff --git a/arch/tile/include/asm/irqflags.h b/arch/tile/include/asm/irqflags.h
new file mode 100644
index 00000000..5db0ce54
--- /dev/null
+++ b/arch/tile/include/asm/irqflags.h
@@ -0,0 +1,282 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_IRQFLAGS_H
+#define _ASM_TILE_IRQFLAGS_H
+
+#include <arch/interrupts.h>
+#include <arch/chip.h>
+
+#if !defined(__tilegx__) && defined(__ASSEMBLY__)
+
+/*
+ * The set of interrupts we want to allow when interrupts are nominally
+ * disabled.  The remainder are effectively "NMI" interrupts from
+ * the point of view of the generic Linux code.  Note that synchronous
+ * interrupts (aka "non-queued") are not blocked by the mask in any case.
+ */
+#if CHIP_HAS_AUX_PERF_COUNTERS()
+#define LINUX_MASKABLE_INTERRUPTS_HI \
+       (~(INT_MASK_HI(INT_PERF_COUNT) | INT_MASK_HI(INT_AUX_PERF_COUNT)))
+#else
+#define LINUX_MASKABLE_INTERRUPTS_HI \
+       (~(INT_MASK_HI(INT_PERF_COUNT)))
+#endif
+
+#else
+
+#if CHIP_HAS_AUX_PERF_COUNTERS()
+#define LINUX_MASKABLE_INTERRUPTS \
+	(~(INT_MASK(INT_PERF_COUNT) | INT_MASK(INT_AUX_PERF_COUNT)))
+#else
+#define LINUX_MASKABLE_INTERRUPTS \
+	(~(INT_MASK(INT_PERF_COUNT)))
+#endif
+
+#endif
+
+#ifndef __ASSEMBLY__
+
+/* NOTE: we can't include <linux/percpu.h> due to #include dependencies. */
+#include <asm/percpu.h>
+#include <arch/spr_def.h>
+
+/* Set and clear kernel interrupt masks. */
+#if CHIP_HAS_SPLIT_INTR_MASK()
+#if INT_PERF_COUNT < 32 || INT_AUX_PERF_COUNT < 32 || INT_MEM_ERROR >= 32
+# error Fix assumptions about which word various interrupts are in
+#endif
+#define interrupt_mask_set(n) do { \
+	int __n = (n); \
+	int __mask = 1 << (__n & 0x1f); \
+	if (__n < 32) \
+		__insn_mtspr(SPR_INTERRUPT_MASK_SET_K_0, __mask); \
+	else \
+		__insn_mtspr(SPR_INTERRUPT_MASK_SET_K_1, __mask); \
+} while (0)
+#define interrupt_mask_reset(n) do { \
+	int __n = (n); \
+	int __mask = 1 << (__n & 0x1f); \
+	if (__n < 32) \
+		__insn_mtspr(SPR_INTERRUPT_MASK_RESET_K_0, __mask); \
+	else \
+		__insn_mtspr(SPR_INTERRUPT_MASK_RESET_K_1, __mask); \
+} while (0)
+#define interrupt_mask_check(n) ({ \
+	int __n = (n); \
+	(((__n < 32) ? \
+	 __insn_mfspr(SPR_INTERRUPT_MASK_K_0) : \
+	 __insn_mfspr(SPR_INTERRUPT_MASK_K_1)) \
+	  >> (__n & 0x1f)) & 1; \
+})
+#define interrupt_mask_set_mask(mask) do { \
+	unsigned long long __m = (mask); \
+	__insn_mtspr(SPR_INTERRUPT_MASK_SET_K_0, (unsigned long)(__m)); \
+	__insn_mtspr(SPR_INTERRUPT_MASK_SET_K_1, (unsigned long)(__m>>32)); \
+} while (0)
+#define interrupt_mask_reset_mask(mask) do { \
+	unsigned long long __m = (mask); \
+	__insn_mtspr(SPR_INTERRUPT_MASK_RESET_K_0, (unsigned long)(__m)); \
+	__insn_mtspr(SPR_INTERRUPT_MASK_RESET_K_1, (unsigned long)(__m>>32)); \
+} while (0)
+#else
+#define interrupt_mask_set(n) \
+	__insn_mtspr(SPR_INTERRUPT_MASK_SET_K, (1UL << (n)))
+#define interrupt_mask_reset(n) \
+	__insn_mtspr(SPR_INTERRUPT_MASK_RESET_K, (1UL << (n)))
+#define interrupt_mask_check(n) \
+	((__insn_mfspr(SPR_INTERRUPT_MASK_K) >> (n)) & 1)
+#define interrupt_mask_set_mask(mask) \
+	__insn_mtspr(SPR_INTERRUPT_MASK_SET_K, (mask))
+#define interrupt_mask_reset_mask(mask) \
+	__insn_mtspr(SPR_INTERRUPT_MASK_RESET_K, (mask))
+#endif
+
+/*
+ * The set of interrupts we want active if irqs are enabled.
+ * Note that in particular, the tile timer interrupt comes and goes
+ * from this set, since we have no other way to turn off the timer.
+ * Likewise, INTCTRL_K is removed and re-added during device
+ * interrupts, as is the the hardwall UDN_FIREWALL interrupt.
+ * We use a low bit (MEM_ERROR) as our sentinel value and make sure it
+ * is always claimed as an "active interrupt" so we can query that bit
+ * to know our current state.
+ */
+DECLARE_PER_CPU(unsigned long long, interrupts_enabled_mask);
+#define INITIAL_INTERRUPTS_ENABLED INT_MASK(INT_MEM_ERROR)
+
+/* Disable interrupts. */
+#define arch_local_irq_disable() \
+	interrupt_mask_set_mask(LINUX_MASKABLE_INTERRUPTS)
+
+/* Disable all interrupts, including NMIs. */
+#define arch_local_irq_disable_all() \
+	interrupt_mask_set_mask(-1UL)
+
+/* Re-enable all maskable interrupts. */
+#define arch_local_irq_enable() \
+	interrupt_mask_reset_mask(__get_cpu_var(interrupts_enabled_mask))
+
+/* Disable or enable interrupts based on flag argument. */
+#define arch_local_irq_restore(disabled) do { \
+	if (disabled) \
+		arch_local_irq_disable(); \
+	else \
+		arch_local_irq_enable(); \
+} while (0)
+
+/* Return true if "flags" argument means interrupts are disabled. */
+#define arch_irqs_disabled_flags(flags) ((flags) != 0)
+
+/* Return true if interrupts are currently disabled. */
+#define arch_irqs_disabled() interrupt_mask_check(INT_MEM_ERROR)
+
+/* Save whether interrupts are currently disabled. */
+#define arch_local_save_flags() arch_irqs_disabled()
+
+/* Save whether interrupts are currently disabled, then disable them. */
+#define arch_local_irq_save() ({ \
+	unsigned long __flags = arch_local_save_flags(); \
+	arch_local_irq_disable(); \
+	__flags; })
+
+/* Prevent the given interrupt from being enabled next time we enable irqs. */
+#define arch_local_irq_mask(interrupt) \
+	(__get_cpu_var(interrupts_enabled_mask) &= ~INT_MASK(interrupt))
+
+/* Prevent the given interrupt from being enabled immediately. */
+#define arch_local_irq_mask_now(interrupt) do { \
+	arch_local_irq_mask(interrupt); \
+	interrupt_mask_set(interrupt); \
+} while (0)
+
+/* Allow the given interrupt to be enabled next time we enable irqs. */
+#define arch_local_irq_unmask(interrupt) \
+	(__get_cpu_var(interrupts_enabled_mask) |= INT_MASK(interrupt))
+
+/* Allow the given interrupt to be enabled immediately, if !irqs_disabled. */
+#define arch_local_irq_unmask_now(interrupt) do { \
+	arch_local_irq_unmask(interrupt); \
+	if (!irqs_disabled()) \
+		interrupt_mask_reset(interrupt); \
+} while (0)
+
+#else /* __ASSEMBLY__ */
+
+/* We provide a somewhat more restricted set for assembly. */
+
+#ifdef __tilegx__
+
+#if INT_MEM_ERROR != 0
+# error Fix IRQ_DISABLED() macro
+#endif
+
+/* Return 0 or 1 to indicate whether interrupts are currently disabled. */
+#define IRQS_DISABLED(tmp)					\
+	mfspr   tmp, SPR_INTERRUPT_MASK_K;			\
+	andi    tmp, tmp, 1
+
+/* Load up a pointer to &interrupts_enabled_mask. */
+#define GET_INTERRUPTS_ENABLED_MASK_PTR(reg)			\
+	moveli reg, hw2_last(interrupts_enabled_mask);		\
+	shl16insli reg, reg, hw1(interrupts_enabled_mask);	\
+	shl16insli reg, reg, hw0(interrupts_enabled_mask);	\
+	add     reg, reg, tp
+
+/* Disable interrupts. */
+#define IRQ_DISABLE(tmp0, tmp1)					\
+	moveli  tmp0, hw2_last(LINUX_MASKABLE_INTERRUPTS);	\
+	shl16insli tmp0, tmp0, hw1(LINUX_MASKABLE_INTERRUPTS);	\
+	shl16insli tmp0, tmp0, hw0(LINUX_MASKABLE_INTERRUPTS);	\
+	mtspr   SPR_INTERRUPT_MASK_SET_K, tmp0
+
+/* Disable ALL synchronous interrupts (used by NMI entry). */
+#define IRQ_DISABLE_ALL(tmp)					\
+	movei   tmp, -1;					\
+	mtspr   SPR_INTERRUPT_MASK_SET_K, tmp
+
+/* Enable interrupts. */
+#define IRQ_ENABLE(tmp0, tmp1)					\
+	GET_INTERRUPTS_ENABLED_MASK_PTR(tmp0);			\
+	ld      tmp0, tmp0;					\
+	mtspr   SPR_INTERRUPT_MASK_RESET_K, tmp0
+
+#else /* !__tilegx__ */
+
+/*
+ * Return 0 or 1 to indicate whether interrupts are currently disabled.
+ * Note that it's important that we use a bit from the "low" mask word,
+ * since when we are enabling, that is the word we write first, so if we
+ * are interrupted after only writing half of the mask, the interrupt
+ * handler will correctly observe that we have interrupts enabled, and
+ * will enable interrupts itself on return from the interrupt handler
+ * (making the original code's write of the "high" mask word idempotent).
+ */
+#define IRQS_DISABLED(tmp)					\
+	mfspr   tmp, SPR_INTERRUPT_MASK_K_0;			\
+	shri    tmp, tmp, INT_MEM_ERROR;			\
+	andi    tmp, tmp, 1
+
+/* Load up a pointer to &interrupts_enabled_mask. */
+#define GET_INTERRUPTS_ENABLED_MASK_PTR(reg)			\
+	moveli  reg, lo16(interrupts_enabled_mask);		\
+	auli    reg, reg, ha16(interrupts_enabled_mask);	\
+	add     reg, reg, tp
+
+/* Disable interrupts. */
+#define IRQ_DISABLE(tmp0, tmp1)					\
+	{							\
+	 movei  tmp0, -1;					\
+	 moveli tmp1, lo16(LINUX_MASKABLE_INTERRUPTS_HI)	\
+	};							\
+	{							\
+	 mtspr  SPR_INTERRUPT_MASK_SET_K_0, tmp0;		\
+	 auli   tmp1, tmp1, ha16(LINUX_MASKABLE_INTERRUPTS_HI)	\
+	};							\
+	mtspr   SPR_INTERRUPT_MASK_SET_K_1, tmp1
+
+/* Disable ALL synchronous interrupts (used by NMI entry). */
+#define IRQ_DISABLE_ALL(tmp)					\
+	movei   tmp, -1;					\
+	mtspr   SPR_INTERRUPT_MASK_SET_K_0, tmp;		\
+	mtspr   SPR_INTERRUPT_MASK_SET_K_1, tmp
+
+/* Enable interrupts. */
+#define IRQ_ENABLE(tmp0, tmp1)					\
+	GET_INTERRUPTS_ENABLED_MASK_PTR(tmp0);			\
+	{							\
+	 lw     tmp0, tmp0;					\
+	 addi   tmp1, tmp0, 4					\
+	};							\
+	lw      tmp1, tmp1;					\
+	mtspr   SPR_INTERRUPT_MASK_RESET_K_0, tmp0;		\
+	mtspr   SPR_INTERRUPT_MASK_RESET_K_1, tmp1
+#endif
+
+/*
+ * Do the CPU's IRQ-state tracing from assembly code. We call a
+ * C function, but almost everywhere we do, we don't mind clobbering
+ * all the caller-saved registers.
+ */
+#ifdef CONFIG_TRACE_IRQFLAGS
+# define TRACE_IRQS_ON  jal trace_hardirqs_on
+# define TRACE_IRQS_OFF jal trace_hardirqs_off
+#else
+# define TRACE_IRQS_ON
+# define TRACE_IRQS_OFF
+#endif
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_TILE_IRQFLAGS_H */
diff --git a/arch/tile/include/asm/kexec.h b/arch/tile/include/asm/kexec.h
new file mode 100644
index 00000000..c11a6cc7
--- /dev/null
+++ b/arch/tile/include/asm/kexec.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ *
+ * based on kexec.h from other architectures in linux-2.6.18
+ */
+
+#ifndef _ASM_TILE_KEXEC_H
+#define _ASM_TILE_KEXEC_H
+
+#include <asm/page.h>
+
+/* Maximum physical address we can use pages from. */
+#define KEXEC_SOURCE_MEMORY_LIMIT TASK_SIZE
+/* Maximum address we can reach in physical address mode. */
+#define KEXEC_DESTINATION_MEMORY_LIMIT TASK_SIZE
+/* Maximum address we can use for the control code buffer. */
+#define KEXEC_CONTROL_MEMORY_LIMIT TASK_SIZE
+
+#define KEXEC_CONTROL_PAGE_SIZE	PAGE_SIZE
+
+/*
+ * We don't bother to provide a unique identifier, since we can only
+ * reboot with a single type of kernel image anyway.
+ */
+#define KEXEC_ARCH KEXEC_ARCH_DEFAULT
+
+/* Use the tile override for the page allocator. */
+struct page *kimage_alloc_pages_arch(gfp_t gfp_mask, unsigned int order);
+#define kimage_alloc_pages_arch kimage_alloc_pages_arch
+
+#define MAX_NOTE_BYTES 1024
+
+/* Defined in arch/tile/kernel/relocate_kernel.S */
+extern const unsigned char relocate_new_kernel[];
+extern const unsigned long relocate_new_kernel_size;
+extern void relocate_new_kernel_end(void);
+
+/* Provide a dummy definition to avoid build failures. */
+static inline void crash_setup_regs(struct pt_regs *n, struct pt_regs *o)
+{
+}
+
+#endif /* _ASM_TILE_KEXEC_H */
diff --git a/arch/tile/include/asm/kmap_types.h b/arch/tile/include/asm/kmap_types.h
new file mode 100644
index 00000000..3d0f2024
--- /dev/null
+++ b/arch/tile/include/asm/kmap_types.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_KMAP_TYPES_H
+#define _ASM_TILE_KMAP_TYPES_H
+
+/*
+ * In 32-bit TILE Linux we have to balance the desire to have a lot of
+ * nested atomic mappings with the fact that large page sizes and many
+ * processors chew up address space quickly.  In a typical
+ * 64-processor, 64KB-page layout build, making KM_TYPE_NR one larger
+ * adds 4MB of required address-space.  For now we leave KM_TYPE_NR
+ * set to depth 8.
+ */
+enum km_type {
+	KM_TYPE_NR = 8
+};
+
+/*
+ * We provide dummy definitions of all the stray values that used to be
+ * required for kmap_atomic() and no longer are.
+ */
+enum {
+	KM_BOUNCE_READ,
+	KM_SKB_SUNRPC_DATA,
+	KM_SKB_DATA_SOFTIRQ,
+	KM_USER0,
+	KM_USER1,
+	KM_BIO_SRC_IRQ,
+	KM_BIO_DST_IRQ,
+	KM_PTE0,
+	KM_PTE1,
+	KM_IRQ0,
+	KM_IRQ1,
+	KM_SOFTIRQ0,
+	KM_SOFTIRQ1,
+	KM_SYNC_ICACHE,
+	KM_SYNC_DCACHE,
+	KM_UML_USERCOPY,
+	KM_IRQ_PTE,
+	KM_NMI,
+	KM_NMI_PTE,
+	KM_KDB
+};
+
+#endif /* _ASM_TILE_KMAP_TYPES_H */
diff --git a/arch/tile/include/asm/linkage.h b/arch/tile/include/asm/linkage.h
new file mode 100644
index 00000000..e121c397
--- /dev/null
+++ b/arch/tile/include/asm/linkage.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_LINKAGE_H
+#define _ASM_TILE_LINKAGE_H
+
+#include <feedback.h>
+
+#define __ALIGN .align 8
+
+/*
+ * The STD_ENTRY and STD_ENDPROC macros put the function in a
+ * self-named .text.foo section, and if linker feedback collection
+ * is enabled, add a suitable call to the feedback collection code.
+ * STD_ENTRY_SECTION lets you specify a non-standard section name.
+ */
+
+#define STD_ENTRY(name) \
+  .pushsection .text.##name, "ax"; \
+  ENTRY(name); \
+  FEEDBACK_ENTER(name)
+
+#define STD_ENTRY_SECTION(name, section) \
+  .pushsection section, "ax"; \
+  ENTRY(name); \
+  FEEDBACK_ENTER_EXPLICIT(name, section, .Lend_##name - name)
+
+#define STD_ENDPROC(name) \
+  ENDPROC(name); \
+  .Lend_##name:; \
+  .popsection
+
+/* Create a file-static function entry set up for feedback gathering. */
+#define STD_ENTRY_LOCAL(name) \
+  .pushsection .text.##name, "ax"; \
+  ALIGN; \
+  name:; \
+  FEEDBACK_ENTER(name)
+
+#endif /* _ASM_TILE_LINKAGE_H */
diff --git a/arch/tile/include/asm/memprof.h b/arch/tile/include/asm/memprof.h
new file mode 100644
index 00000000..359949be
--- /dev/null
+++ b/arch/tile/include/asm/memprof.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ *
+ * The hypervisor's memory controller profiling infrastructure allows
+ * the programmer to find out what fraction of the available memory
+ * bandwidth is being consumed at each memory controller.  The
+ * profiler provides start, stop, and clear operations to allows
+ * profiling over a specific time window, as well as an interface for
+ * reading the most recent profile values.
+ *
+ * This header declares IOCTL codes necessary to control memprof.
+ */
+#ifndef _ASM_TILE_MEMPROF_H
+#define _ASM_TILE_MEMPROF_H
+
+#include <linux/ioctl.h>
+
+#define MEMPROF_IOCTL_TYPE 0xB4
+#define MEMPROF_IOCTL_START _IO(MEMPROF_IOCTL_TYPE, 0)
+#define MEMPROF_IOCTL_STOP _IO(MEMPROF_IOCTL_TYPE, 1)
+#define MEMPROF_IOCTL_CLEAR _IO(MEMPROF_IOCTL_TYPE, 2)
+
+#endif /* _ASM_TILE_MEMPROF_H */
diff --git a/arch/tile/include/asm/mman.h b/arch/tile/include/asm/mman.h
new file mode 100644
index 00000000..81b8fc34
--- /dev/null
+++ b/arch/tile/include/asm/mman.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_MMAN_H
+#define _ASM_TILE_MMAN_H
+
+#include <asm-generic/mman-common.h>
+#include <arch/chip.h>
+
+/* Standard Linux flags */
+
+#define MAP_POPULATE	0x0040		/* populate (prefault) pagetables */
+#define MAP_NONBLOCK	0x0080		/* do not block on IO */
+#define MAP_GROWSDOWN	0x0100		/* stack-like segment */
+#define MAP_STACK	MAP_GROWSDOWN	/* provide convenience alias */
+#define MAP_LOCKED	0x0200		/* pages are locked */
+#define MAP_NORESERVE	0x0400		/* don't check for reservations */
+#define MAP_DENYWRITE	0x0800		/* ETXTBSY */
+#define MAP_EXECUTABLE	0x1000		/* mark it as an executable */
+#define MAP_HUGETLB	0x4000		/* create a huge page mapping */
+
+
+/*
+ * Flags for mlockall
+ */
+#define MCL_CURRENT	1		/* lock all current mappings */
+#define MCL_FUTURE	2		/* lock all future mappings */
+
+
+#endif /* _ASM_TILE_MMAN_H */
diff --git a/arch/tile/include/asm/mmu.h b/arch/tile/include/asm/mmu.h
new file mode 100644
index 00000000..92f94c77
--- /dev/null
+++ b/arch/tile/include/asm/mmu.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_MMU_H
+#define _ASM_TILE_MMU_H
+
+/* Capture any arch- and mm-specific information. */
+struct mm_context {
+	/*
+	 * Written under the mmap_sem semaphore; read without the
+	 * semaphore but atomically, but it is conservatively set.
+	 */
+	unsigned int priority_cached;
+};
+
+typedef struct mm_context mm_context_t;
+
+void leave_mm(int cpu);
+
+#endif /* _ASM_TILE_MMU_H */
diff --git a/arch/tile/include/asm/mmu_context.h b/arch/tile/include/asm/mmu_context.h
new file mode 100644
index 00000000..15fb2464
--- /dev/null
+++ b/arch/tile/include/asm/mmu_context.h
@@ -0,0 +1,131 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_MMU_CONTEXT_H
+#define _ASM_TILE_MMU_CONTEXT_H
+
+#include <linux/smp.h>
+#include <asm/setup.h>
+#include <asm/page.h>
+#include <asm/pgalloc.h>
+#include <asm/pgtable.h>
+#include <asm/tlbflush.h>
+#include <asm/homecache.h>
+#include <asm-generic/mm_hooks.h>
+
+static inline int
+init_new_context(struct task_struct *tsk, struct mm_struct *mm)
+{
+	return 0;
+}
+
+/* Note that arch/tile/kernel/head.S also calls hv_install_context() */
+static inline void __install_page_table(pgd_t *pgdir, int asid, pgprot_t prot)
+{
+	/* FIXME: DIRECTIO should not always be set. FIXME. */
+	int rc = hv_install_context(__pa(pgdir), prot, asid, HV_CTX_DIRECTIO);
+	if (rc < 0)
+		panic("hv_install_context failed: %d", rc);
+}
+
+static inline void install_page_table(pgd_t *pgdir, int asid)
+{
+	pte_t *ptep = virt_to_pte(NULL, (unsigned long)pgdir);
+	__install_page_table(pgdir, asid, *ptep);
+}
+
+/*
+ * "Lazy" TLB mode is entered when we are switching to a kernel task,
+ * which borrows the mm of the previous task.  The goal of this
+ * optimization is to avoid having to install a new page table.  On
+ * early x86 machines (where the concept originated) you couldn't do
+ * anything short of a full page table install for invalidation, so
+ * handling a remote TLB invalidate required doing a page table
+ * re-install.  Someone clearly decided that it was silly to keep
+ * doing this while in "lazy" TLB mode, so the optimization involves
+ * installing the swapper page table instead the first time one
+ * occurs, and clearing the cpu out of cpu_vm_mask, so the cpu running
+ * the kernel task doesn't need to take any more interrupts.  At that
+ * point it's then necessary to explicitly reinstall it when context
+ * switching back to the original mm.
+ *
+ * On Tile, we have to do a page-table install whenever DMA is enabled,
+ * so in that case lazy mode doesn't help anyway.  And more generally,
+ * we have efficient per-page TLB shootdown, and don't expect to spend
+ * that much time in kernel tasks in general, so just leaving the
+ * kernel task borrowing the old page table, but handling TLB
+ * shootdowns, is a reasonable thing to do.  And importantly, this
+ * lets us use the hypervisor's internal APIs for TLB shootdown, which
+ * means we don't have to worry about having TLB shootdowns blocked
+ * when Linux is disabling interrupts; see the page migration code for
+ * an example of where it's important for TLB shootdowns to complete
+ * even when interrupts are disabled at the Linux level.
+ */
+static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *t)
+{
+#if CHIP_HAS_TILE_DMA()
+	/*
+	 * We have to do an "identity" page table switch in order to
+	 * clear any pending DMA interrupts.
+	 */
+	if (current->thread.tile_dma_state.enabled)
+		install_page_table(mm->pgd, __get_cpu_var(current_asid));
+#endif
+}
+
+static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
+			     struct task_struct *tsk)
+{
+	if (likely(prev != next)) {
+
+		int cpu = smp_processor_id();
+
+		/* Pick new ASID. */
+		int asid = __get_cpu_var(current_asid) + 1;
+		if (asid > max_asid) {
+			asid = min_asid;
+			local_flush_tlb();
+		}
+		__get_cpu_var(current_asid) = asid;
+
+		/* Clear cpu from the old mm, and set it in the new one. */
+		cpumask_clear_cpu(cpu, mm_cpumask(prev));
+		cpumask_set_cpu(cpu, mm_cpumask(next));
+
+		/* Re-load page tables */
+		install_page_table(next->pgd, asid);
+
+		/* See how we should set the red/black cache info */
+		check_mm_caching(prev, next);
+
+		/*
+		 * Since we're changing to a new mm, we have to flush
+		 * the icache in case some physical page now being mapped
+		 * has subsequently been repurposed and has new code.
+		 */
+		__flush_icache();
+
+	}
+}
+
+static inline void activate_mm(struct mm_struct *prev_mm,
+			       struct mm_struct *next_mm)
+{
+	switch_mm(prev_mm, next_mm, NULL);
+}
+
+#define destroy_context(mm)		do { } while (0)
+#define deactivate_mm(tsk, mm)          do { } while (0)
+
+#endif /* _ASM_TILE_MMU_CONTEXT_H */
diff --git a/arch/tile/include/asm/mmzone.h b/arch/tile/include/asm/mmzone.h
new file mode 100644
index 00000000..9d3dbce8
--- /dev/null
+++ b/arch/tile/include/asm/mmzone.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_MMZONE_H
+#define _ASM_TILE_MMZONE_H
+
+extern struct pglist_data node_data[];
+#define NODE_DATA(nid)	(&node_data[nid])
+
+extern void get_memcfg_numa(void);
+
+#ifdef CONFIG_DISCONTIGMEM
+
+#include <asm/page.h>
+
+/*
+ * Generally, memory ranges are always doled out by the hypervisor in
+ * fixed-size, power-of-two increments.  That would make computing the node
+ * very easy.  We could just take a couple high bits of the PA, which
+ * denote the memory shim, and we'd be done.  However, when we're doing
+ * memory striping, this may not be true; PAs with different high bit
+ * values might be in the same node.  Thus, we keep a lookup table to
+ * translate the high bits of the PFN to the node number.
+ */
+extern int highbits_to_node[];
+
+static inline int pfn_to_nid(unsigned long pfn)
+{
+	return highbits_to_node[__pfn_to_highbits(pfn)];
+}
+
+#define kern_addr_valid(kaddr)	virt_addr_valid((void *)kaddr)
+
+static inline int pfn_valid(int pfn)
+{
+	int nid = pfn_to_nid(pfn);
+
+	if (nid >= 0)
+		return (pfn < node_end_pfn(nid));
+	return 0;
+}
+
+/* Information on the NUMA nodes that we compute early */
+extern unsigned long node_start_pfn[];
+extern unsigned long node_end_pfn[];
+extern unsigned long node_memmap_pfn[];
+extern unsigned long node_percpu_pfn[];
+extern unsigned long node_free_pfn[];
+#ifdef CONFIG_HIGHMEM
+extern unsigned long node_lowmem_end_pfn[];
+#endif
+#ifdef CONFIG_PCI
+extern unsigned long pci_reserve_start_pfn;
+extern unsigned long pci_reserve_end_pfn;
+#endif
+
+#endif /* CONFIG_DISCONTIGMEM */
+
+#endif /* _ASM_TILE_MMZONE_H */
diff --git a/arch/tile/include/asm/page.h b/arch/tile/include/asm/page.h
new file mode 100644
index 00000000..db93518f
--- /dev/null
+++ b/arch/tile/include/asm/page.h
@@ -0,0 +1,336 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_PAGE_H
+#define _ASM_TILE_PAGE_H
+
+#include <linux/const.h>
+#include <hv/hypervisor.h>
+#include <arch/chip.h>
+
+/* PAGE_SHIFT and HPAGE_SHIFT determine the page sizes. */
+#define PAGE_SHIFT	HV_LOG2_PAGE_SIZE_SMALL
+#define HPAGE_SHIFT	HV_LOG2_PAGE_SIZE_LARGE
+
+#define PAGE_SIZE	(_AC(1, UL) << PAGE_SHIFT)
+#define HPAGE_SIZE	(_AC(1, UL) << HPAGE_SHIFT)
+
+#define PAGE_MASK	(~(PAGE_SIZE - 1))
+#define HPAGE_MASK	(~(HPAGE_SIZE - 1))
+
+/*
+ * If the Kconfig doesn't specify, set a maximum zone order that
+ * is enough so that we can create huge pages from small pages given
+ * the respective sizes of the two page types.  See <linux/mmzone.h>.
+ */
+#ifndef CONFIG_FORCE_MAX_ZONEORDER
+#define CONFIG_FORCE_MAX_ZONEORDER (HPAGE_SHIFT - PAGE_SHIFT + 1)
+#endif
+
+#ifndef __ASSEMBLY__
+
+#include <linux/types.h>
+#include <linux/string.h>
+
+struct page;
+
+static inline void clear_page(void *page)
+{
+	memset(page, 0, PAGE_SIZE);
+}
+
+static inline void copy_page(void *to, void *from)
+{
+	memcpy(to, from, PAGE_SIZE);
+}
+
+static inline void clear_user_page(void *page, unsigned long vaddr,
+				struct page *pg)
+{
+	clear_page(page);
+}
+
+static inline void copy_user_page(void *to, void *from, unsigned long vaddr,
+				struct page *topage)
+{
+	copy_page(to, from);
+}
+
+/*
+ * Hypervisor page tables are made of the same basic structure.
+ */
+
+typedef HV_PTE pte_t;
+typedef HV_PTE pgd_t;
+typedef HV_PTE pgprot_t;
+
+/*
+ * User L2 page tables are managed as one L2 page table per page,
+ * because we use the page allocator for them.  This keeps the allocation
+ * simple and makes it potentially useful to implement HIGHPTE at some point.
+ * However, it's also inefficient, since L2 page tables are much smaller
+ * than pages (currently 2KB vs 64KB).  So we should revisit this.
+ */
+typedef struct page *pgtable_t;
+
+/* Must be a macro since it is used to create constants. */
+#define __pgprot(val) hv_pte(val)
+
+/* Rarely-used initializers, typically with a "zero" value. */
+#define __pte(x) hv_pte(x)
+#define __pgd(x) hv_pte(x)
+
+static inline u64 pgprot_val(pgprot_t pgprot)
+{
+	return hv_pte_val(pgprot);
+}
+
+static inline u64 pte_val(pte_t pte)
+{
+	return hv_pte_val(pte);
+}
+
+static inline u64 pgd_val(pgd_t pgd)
+{
+	return hv_pte_val(pgd);
+}
+
+#ifdef __tilegx__
+
+typedef HV_PTE pmd_t;
+
+#define __pmd(x) hv_pte(x)
+
+static inline u64 pmd_val(pmd_t pmd)
+{
+	return hv_pte_val(pmd);
+}
+
+#endif
+
+static inline __attribute_const__ int get_order(unsigned long size)
+{
+	return BITS_PER_LONG - __builtin_clzl((size - 1) >> PAGE_SHIFT);
+}
+
+#endif /* !__ASSEMBLY__ */
+
+#define HUGETLB_PAGE_ORDER	(HPAGE_SHIFT - PAGE_SHIFT)
+
+#define HUGE_MAX_HSTATE		2
+
+#ifdef CONFIG_HUGETLB_PAGE
+#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
+#endif
+
+/* Each memory controller has PAs distinct in their high bits. */
+#define NR_PA_HIGHBIT_SHIFT (CHIP_PA_WIDTH() - CHIP_LOG_NUM_MSHIMS())
+#define NR_PA_HIGHBIT_VALUES (1 << CHIP_LOG_NUM_MSHIMS())
+#define __pa_to_highbits(pa) ((phys_addr_t)(pa) >> NR_PA_HIGHBIT_SHIFT)
+#define __pfn_to_highbits(pfn) ((pfn) >> (NR_PA_HIGHBIT_SHIFT - PAGE_SHIFT))
+
+#ifdef __tilegx__
+
+/*
+ * We reserve the lower half of memory for user-space programs, and the
+ * upper half for system code.  We re-map all of physical memory in the
+ * upper half, which takes a quarter of our VA space.  Then we have
+ * the vmalloc regions.  The supervisor code lives at 0xfffffff700000000,
+ * with the hypervisor above that.
+ *
+ * Loadable kernel modules are placed immediately after the static
+ * supervisor code, with each being allocated a 256MB region of
+ * address space, so we don't have to worry about the range of "jal"
+ * and other branch instructions.
+ *
+ * For now we keep life simple and just allocate one pmd (4GB) for vmalloc.
+ * Similarly, for now we don't play any struct page mapping games.
+ */
+
+#if CHIP_PA_WIDTH() + 2 > CHIP_VA_WIDTH()
+# error Too much PA to map with the VA available!
+#endif
+#define HALF_VA_SPACE           (_AC(1, UL) << (CHIP_VA_WIDTH() - 1))
+
+#define MEM_LOW_END		(HALF_VA_SPACE - 1)         /* low half */
+#define MEM_HIGH_START		(-HALF_VA_SPACE)            /* high half */
+#define PAGE_OFFSET		MEM_HIGH_START
+#define _VMALLOC_START		_AC(0xfffffff500000000, UL) /* 4 GB */
+#define HUGE_VMAP_BASE		_AC(0xfffffff600000000, UL) /* 4 GB */
+#define MEM_SV_START		_AC(0xfffffff700000000, UL) /* 256 MB */
+#define MEM_SV_INTRPT		MEM_SV_START
+#define MEM_MODULE_START	_AC(0xfffffff710000000, UL) /* 256 MB */
+#define MEM_MODULE_END		(MEM_MODULE_START + (256*1024*1024))
+#define MEM_HV_START		_AC(0xfffffff800000000, UL) /* 32 GB */
+
+/* Highest DTLB address we will use */
+#define KERNEL_HIGH_VADDR	MEM_SV_START
+
+/* Since we don't currently provide any fixmaps, we use an impossible VA. */
+#define FIXADDR_TOP             MEM_HV_START
+
+#else /* !__tilegx__ */
+
+/*
+ * A PAGE_OFFSET of 0xC0000000 means that the kernel has
+ * a virtual address space of one gigabyte, which limits the
+ * amount of physical memory you can use to about 768MB.
+ * If you want more physical memory than this then see the CONFIG_HIGHMEM
+ * option in the kernel configuration.
+ *
+ * The top 16MB chunk in the table below is unavailable to Linux.  Since
+ * the kernel interrupt vectors must live at ether 0xfe000000 or 0xfd000000
+ * (depending on whether the kernel is at PL2 or Pl1), we map all of the
+ * bottom of RAM at this address with a huge page table entry to minimize
+ * its ITLB footprint (as well as at PAGE_OFFSET).  The last architected
+ * requirement is that user interrupt vectors live at 0xfc000000, so we
+ * make that range of memory available to user processes.  The remaining
+ * regions are sized as shown; the first four addresses use the PL 1
+ * values, and after that, we show "typical" values, since the actual
+ * addresses depend on kernel #defines.
+ *
+ * MEM_HV_INTRPT                   0xfe000000
+ * MEM_SV_INTRPT (kernel code)     0xfd000000
+ * MEM_USER_INTRPT (user vector)   0xfc000000
+ * FIX_KMAP_xxx                    0xf8000000 (via NR_CPUS * KM_TYPE_NR)
+ * PKMAP_BASE                      0xf7000000 (via LAST_PKMAP)
+ * HUGE_VMAP                       0xf3000000 (via CONFIG_NR_HUGE_VMAPS)
+ * VMALLOC_START                   0xf0000000 (via __VMALLOC_RESERVE)
+ * mapped LOWMEM                   0xc0000000
+ */
+
+#define MEM_USER_INTRPT		_AC(0xfc000000, UL)
+#if CONFIG_KERNEL_PL == 1
+#define MEM_SV_INTRPT		_AC(0xfd000000, UL)
+#define MEM_HV_INTRPT		_AC(0xfe000000, UL)
+#else
+#define MEM_GUEST_INTRPT	_AC(0xfd000000, UL)
+#define MEM_SV_INTRPT		_AC(0xfe000000, UL)
+#define MEM_HV_INTRPT		_AC(0xff000000, UL)
+#endif
+
+#define INTRPT_SIZE		0x4000
+
+/* Tolerate page size larger than the architecture interrupt region size. */
+#if PAGE_SIZE > INTRPT_SIZE
+#undef INTRPT_SIZE
+#define INTRPT_SIZE PAGE_SIZE
+#endif
+
+#define KERNEL_HIGH_VADDR	MEM_USER_INTRPT
+#define FIXADDR_TOP		(KERNEL_HIGH_VADDR - PAGE_SIZE)
+
+#define PAGE_OFFSET		_AC(CONFIG_PAGE_OFFSET, UL)
+
+/* On 32-bit architectures we mix kernel modules in with other vmaps. */
+#define MEM_MODULE_START	VMALLOC_START
+#define MEM_MODULE_END		VMALLOC_END
+
+#endif /* __tilegx__ */
+
+#ifndef __ASSEMBLY__
+
+#ifdef CONFIG_HIGHMEM
+
+/* Map kernel virtual addresses to page frames, in HPAGE_SIZE chunks. */
+extern unsigned long pbase_map[];
+extern void *vbase_map[];
+
+static inline unsigned long kaddr_to_pfn(const volatile void *_kaddr)
+{
+	unsigned long kaddr = (unsigned long)_kaddr;
+	return pbase_map[kaddr >> HPAGE_SHIFT] +
+		((kaddr & (HPAGE_SIZE - 1)) >> PAGE_SHIFT);
+}
+
+static inline void *pfn_to_kaddr(unsigned long pfn)
+{
+	return vbase_map[__pfn_to_highbits(pfn)] + (pfn << PAGE_SHIFT);
+}
+
+static inline phys_addr_t virt_to_phys(const volatile void *kaddr)
+{
+	unsigned long pfn = kaddr_to_pfn(kaddr);
+	return ((phys_addr_t)pfn << PAGE_SHIFT) +
+		((unsigned long)kaddr & (PAGE_SIZE-1));
+}
+
+static inline void *phys_to_virt(phys_addr_t paddr)
+{
+	return pfn_to_kaddr(paddr >> PAGE_SHIFT) + (paddr & (PAGE_SIZE-1));
+}
+
+/* With HIGHMEM, we pack PAGE_OFFSET through high_memory with all valid VAs. */
+static inline int virt_addr_valid(const volatile void *kaddr)
+{
+	extern void *high_memory;  /* copied from <linux/mm.h> */
+	return ((unsigned long)kaddr >= PAGE_OFFSET && kaddr < high_memory);
+}
+
+#else /* !CONFIG_HIGHMEM */
+
+static inline unsigned long kaddr_to_pfn(const volatile void *kaddr)
+{
+	return ((unsigned long)kaddr - PAGE_OFFSET) >> PAGE_SHIFT;
+}
+
+static inline void *pfn_to_kaddr(unsigned long pfn)
+{
+	return (void *)((pfn << PAGE_SHIFT) + PAGE_OFFSET);
+}
+
+static inline phys_addr_t virt_to_phys(const volatile void *kaddr)
+{
+	return (phys_addr_t)((unsigned long)kaddr - PAGE_OFFSET);
+}
+
+static inline void *phys_to_virt(phys_addr_t paddr)
+{
+	return (void *)((unsigned long)paddr + PAGE_OFFSET);
+}
+
+/* Check that the given address is within some mapped range of PAs. */
+#define virt_addr_valid(kaddr) pfn_valid(kaddr_to_pfn(kaddr))
+
+#endif /* !CONFIG_HIGHMEM */
+
+/* All callers are not consistent in how they call these functions. */
+#define __pa(kaddr) virt_to_phys((void *)(unsigned long)(kaddr))
+#define __va(paddr) phys_to_virt((phys_addr_t)(paddr))
+
+extern int devmem_is_allowed(unsigned long pagenr);
+
+#ifdef CONFIG_FLATMEM
+static inline int pfn_valid(unsigned long pfn)
+{
+	return pfn < max_mapnr;
+}
+#endif
+
+/* Provide as macros since these require some other headers included. */
+#define page_to_pa(page) ((phys_addr_t)(page_to_pfn(page)) << PAGE_SHIFT)
+#define virt_to_page(kaddr) pfn_to_page(kaddr_to_pfn((void *)(kaddr)))
+#define page_to_virt(page) pfn_to_kaddr(page_to_pfn(page))
+
+struct mm_struct;
+extern pte_t *virt_to_pte(struct mm_struct *mm, unsigned long addr);
+
+#endif /* !__ASSEMBLY__ */
+
+#define VM_DATA_DEFAULT_FLAGS \
+	(VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+
+#include <asm-generic/memory_model.h>
+
+#endif /* _ASM_TILE_PAGE_H */
diff --git a/arch/tile/include/asm/pci.h b/arch/tile/include/asm/pci.h
new file mode 100644
index 00000000..32e6cbe8
--- /dev/null
+++ b/arch/tile/include/asm/pci.h
@@ -0,0 +1,97 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_PCI_H
+#define _ASM_TILE_PCI_H
+
+#include <linux/pci.h>
+#include <asm-generic/pci_iomap.h>
+
+/*
+ * Structure of a PCI controller (host bridge)
+ */
+struct pci_controller {
+	int index;		/* PCI domain number */
+	struct pci_bus *root_bus;
+
+	int first_busno;
+	int last_busno;
+
+	int hv_cfg_fd[2];	/* config{0,1} fds for this PCIe controller */
+	int hv_mem_fd;		/* fd to Hypervisor for MMIO operations */
+
+	struct pci_ops *ops;
+
+	int irq_base;		/* Base IRQ from the Hypervisor	*/
+	int plx_gen1;		/* flag for PLX Gen 1 configuration */
+
+	/* Address ranges that are routed to this controller/bridge. */
+	struct resource mem_resources[3];
+};
+
+/*
+ * The hypervisor maps the entirety of CPA-space as bus addresses, so
+ * bus addresses are physical addresses.  The networking and block
+ * device layers use this boolean for bounce buffer decisions.
+ */
+#define PCI_DMA_BUS_IS_PHYS     1
+
+int __init tile_pci_init(void);
+int __init pcibios_init(void);
+
+static inline void pci_iounmap(struct pci_dev *dev, void __iomem *addr) {}
+
+void __devinit pcibios_fixup_bus(struct pci_bus *bus);
+
+#define	TILE_NUM_PCIE	2
+
+#define pci_domain_nr(bus) (((struct pci_controller *)(bus)->sysdata)->index)
+
+/*
+ * This decides whether to display the domain number in /proc.
+ */
+static inline int pci_proc_domain(struct pci_bus *bus)
+{
+	return 1;
+}
+
+/*
+ * pcibios_assign_all_busses() tells whether or not the bus numbers
+ * should be reassigned, in case the BIOS didn't do it correctly, or
+ * in case we don't have a BIOS and we want to let Linux do it.
+ */
+static inline int pcibios_assign_all_busses(void)
+{
+	return 1;
+}
+
+#define PCIBIOS_MIN_MEM		0
+#define PCIBIOS_MIN_IO		0
+
+/*
+ * This flag tells if the platform is TILEmpower that needs
+ * special configuration for the PLX switch chip.
+ */
+extern int tile_plx_gen1;
+
+/* Use any cpu for PCI. */
+#define cpumask_of_pcibus(bus) cpu_online_mask
+
+/* implement the pci_ DMA API in terms of the generic device dma_ one */
+#include <asm-generic/pci-dma-compat.h>
+
+/* generic pci stuff */
+#include <asm-generic/pci.h>
+
+#endif /* _ASM_TILE_PCI_H */
diff --git a/arch/tile/include/asm/percpu.h b/arch/tile/include/asm/percpu.h
new file mode 100644
index 00000000..63294f5a
--- /dev/null
+++ b/arch/tile/include/asm/percpu.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_PERCPU_H
+#define _ASM_TILE_PERCPU_H
+
+register unsigned long __my_cpu_offset __asm__("tp");
+#define __my_cpu_offset __my_cpu_offset
+#define set_my_cpu_offset(tp) (__my_cpu_offset = (tp))
+
+#include <asm-generic/percpu.h>
+
+#endif /* _ASM_TILE_PERCPU_H */
diff --git a/arch/tile/include/asm/pgalloc.h b/arch/tile/include/asm/pgalloc.h
new file mode 100644
index 00000000..e919c0bd
--- /dev/null
+++ b/arch/tile/include/asm/pgalloc.h
@@ -0,0 +1,122 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_PGALLOC_H
+#define _ASM_TILE_PGALLOC_H
+
+#include <linux/threads.h>
+#include <linux/mm.h>
+#include <linux/mmzone.h>
+#include <asm/fixmap.h>
+#include <hv/hypervisor.h>
+
+/* Bits for the size of the second-level page table. */
+#define L2_KERNEL_PGTABLE_SHIFT \
+  (HV_LOG2_PAGE_SIZE_LARGE - HV_LOG2_PAGE_SIZE_SMALL + HV_LOG2_PTE_SIZE)
+
+/* We currently allocate user L2 page tables by page (unlike kernel L2s). */
+#if L2_KERNEL_PGTABLE_SHIFT < HV_LOG2_PAGE_SIZE_SMALL
+#define L2_USER_PGTABLE_SHIFT HV_LOG2_PAGE_SIZE_SMALL
+#else
+#define L2_USER_PGTABLE_SHIFT L2_KERNEL_PGTABLE_SHIFT
+#endif
+
+/* How many pages do we need, as an "order", for a user L2 page table? */
+#define L2_USER_PGTABLE_ORDER (L2_USER_PGTABLE_SHIFT - HV_LOG2_PAGE_SIZE_SMALL)
+
+/* How big is a kernel L2 page table? */
+#define L2_KERNEL_PGTABLE_SIZE (1 << L2_KERNEL_PGTABLE_SHIFT)
+
+static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
+{
+#ifdef CONFIG_64BIT
+	set_pte(pmdp, pmd);
+#else
+	set_pte(&pmdp->pud.pgd, pmd.pud.pgd);
+#endif
+}
+
+static inline void pmd_populate_kernel(struct mm_struct *mm,
+				       pmd_t *pmd, pte_t *ptep)
+{
+	set_pmd(pmd, ptfn_pmd(__pa(ptep) >> HV_LOG2_PAGE_TABLE_ALIGN,
+			      __pgprot(_PAGE_PRESENT)));
+}
+
+static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
+				pgtable_t page)
+{
+	set_pmd(pmd, ptfn_pmd(HV_PFN_TO_PTFN(page_to_pfn(page)),
+			      __pgprot(_PAGE_PRESENT)));
+}
+
+/*
+ * Allocate and free page tables.
+ */
+
+extern pgd_t *pgd_alloc(struct mm_struct *mm);
+extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
+
+extern pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address);
+extern void pte_free(struct mm_struct *mm, struct page *pte);
+
+#define pmd_pgtable(pmd) pmd_page(pmd)
+
+static inline pte_t *
+pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
+{
+	return pfn_to_kaddr(page_to_pfn(pte_alloc_one(mm, address)));
+}
+
+static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
+{
+	BUG_ON((unsigned long)pte & (PAGE_SIZE-1));
+	pte_free(mm, virt_to_page(pte));
+}
+
+extern void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte,
+			   unsigned long address);
+
+#define check_pgt_cache()	do { } while (0)
+
+/*
+ * Get the small-page pte_t lowmem entry for a given pfn.
+ * This may or may not be in use, depending on whether the initial
+ * huge-page entry for the page has already been shattered.
+ */
+pte_t *get_prealloc_pte(unsigned long pfn);
+
+/* During init, we can shatter kernel huge pages if needed. */
+void shatter_pmd(pmd_t *pmd);
+
+/* After init, a more complex technique is required. */
+void shatter_huge_page(unsigned long addr);
+
+#ifdef __tilegx__
+/* We share a single page allocator for both L1 and L2 page tables. */
+#if HV_L1_SIZE != HV_L2_SIZE
+# error Rework assumption that L1 and L2 page tables are same size.
+#endif
+#define L1_USER_PGTABLE_ORDER L2_USER_PGTABLE_ORDER
+#define pud_populate(mm, pud, pmd) \
+  pmd_populate_kernel((mm), (pmd_t *)(pud), (pte_t *)(pmd))
+#define pmd_alloc_one(mm, addr) \
+  ((pmd_t *)page_to_virt(pte_alloc_one((mm), (addr))))
+#define pmd_free(mm, pmdp) \
+  pte_free((mm), virt_to_page(pmdp))
+#define __pmd_free_tlb(tlb, pmdp, address) \
+  __pte_free_tlb((tlb), virt_to_page(pmdp), (address))
+#endif
+
+#endif /* _ASM_TILE_PGALLOC_H */
diff --git a/arch/tile/include/asm/pgtable.h b/arch/tile/include/asm/pgtable.h
new file mode 100644
index 00000000..67490910
--- /dev/null
+++ b/arch/tile/include/asm/pgtable.h
@@ -0,0 +1,465 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ *
+ * This file contains the functions and defines necessary to modify and use
+ * the TILE page table tree.
+ */
+
+#ifndef _ASM_TILE_PGTABLE_H
+#define _ASM_TILE_PGTABLE_H
+
+#include <hv/hypervisor.h>
+
+#ifndef __ASSEMBLY__
+
+#include <linux/bitops.h>
+#include <linux/threads.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <asm/processor.h>
+#include <asm/fixmap.h>
+
+struct mm_struct;
+struct vm_area_struct;
+
+/*
+ * ZERO_PAGE is a global shared page that is always zero: used
+ * for zero-mapped memory areas etc..
+ */
+extern unsigned long empty_zero_page[PAGE_SIZE/sizeof(unsigned long)];
+#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
+
+extern pgd_t swapper_pg_dir[];
+extern pgprot_t swapper_pgprot;
+extern struct kmem_cache *pgd_cache;
+extern spinlock_t pgd_lock;
+extern struct list_head pgd_list;
+
+/*
+ * The very last slots in the pgd_t are for addresses unusable by Linux
+ * (pgd_addr_invalid() returns true).  So we use them for the list structure.
+ * The x86 code we are modelled on uses the page->private/index fields
+ * (older 2.6 kernels) or the lru list (newer 2.6 kernels), but since
+ * our pgds are so much smaller than a page, it seems a waste to
+ * spend a whole page on each pgd.
+ */
+#define PGD_LIST_OFFSET \
+  ((PTRS_PER_PGD * sizeof(pgd_t)) - sizeof(struct list_head))
+#define pgd_to_list(pgd) \
+  ((struct list_head *)((char *)(pgd) + PGD_LIST_OFFSET))
+#define list_to_pgd(list) \
+  ((pgd_t *)((char *)(list) - PGD_LIST_OFFSET))
+
+extern void pgtable_cache_init(void);
+extern void paging_init(void);
+extern void set_page_homes(void);
+
+#define FIRST_USER_ADDRESS	0
+
+#define _PAGE_PRESENT           HV_PTE_PRESENT
+#define _PAGE_HUGE_PAGE         HV_PTE_PAGE
+#define _PAGE_READABLE          HV_PTE_READABLE
+#define _PAGE_WRITABLE          HV_PTE_WRITABLE
+#define _PAGE_EXECUTABLE        HV_PTE_EXECUTABLE
+#define _PAGE_ACCESSED          HV_PTE_ACCESSED
+#define _PAGE_DIRTY             HV_PTE_DIRTY
+#define _PAGE_GLOBAL            HV_PTE_GLOBAL
+#define _PAGE_USER              HV_PTE_USER
+
+/*
+ * All the "standard" bits.  Cache-control bits are managed elsewhere.
+ * This is used to test for valid level-2 page table pointers by checking
+ * all the bits, and to mask away the cache control bits for mprotect.
+ */
+#define _PAGE_ALL (\
+  _PAGE_PRESENT | \
+  _PAGE_HUGE_PAGE | \
+  _PAGE_READABLE | \
+  _PAGE_WRITABLE | \
+  _PAGE_EXECUTABLE | \
+  _PAGE_ACCESSED | \
+  _PAGE_DIRTY | \
+  _PAGE_GLOBAL | \
+  _PAGE_USER \
+)
+
+#define PAGE_NONE \
+	__pgprot(_PAGE_PRESENT | _PAGE_ACCESSED)
+#define PAGE_SHARED \
+	__pgprot(_PAGE_PRESENT | _PAGE_READABLE | _PAGE_WRITABLE | \
+		 _PAGE_USER | _PAGE_ACCESSED)
+
+#define PAGE_SHARED_EXEC \
+	__pgprot(_PAGE_PRESENT | _PAGE_READABLE | _PAGE_WRITABLE | \
+		 _PAGE_EXECUTABLE | _PAGE_USER | _PAGE_ACCESSED)
+#define PAGE_COPY_NOEXEC \
+	__pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_READABLE)
+#define PAGE_COPY_EXEC \
+	__pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | \
+		 _PAGE_READABLE | _PAGE_EXECUTABLE)
+#define PAGE_COPY \
+	PAGE_COPY_NOEXEC
+#define PAGE_READONLY \
+	__pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_READABLE)
+#define PAGE_READONLY_EXEC \
+	__pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | \
+		 _PAGE_READABLE | _PAGE_EXECUTABLE)
+
+#define _PAGE_KERNEL_RO \
+ (_PAGE_PRESENT | _PAGE_GLOBAL | _PAGE_READABLE | _PAGE_ACCESSED)
+#define _PAGE_KERNEL \
+ (_PAGE_KERNEL_RO | _PAGE_WRITABLE | _PAGE_DIRTY)
+#define _PAGE_KERNEL_EXEC       (_PAGE_KERNEL_RO | _PAGE_EXECUTABLE)
+
+#define PAGE_KERNEL		__pgprot(_PAGE_KERNEL)
+#define PAGE_KERNEL_RO		__pgprot(_PAGE_KERNEL_RO)
+#define PAGE_KERNEL_EXEC	__pgprot(_PAGE_KERNEL_EXEC)
+
+#define page_to_kpgprot(p) PAGE_KERNEL
+
+/*
+ * We could tighten these up, but for now writable or executable
+ * implies readable.
+ */
+#define __P000	PAGE_NONE
+#define __P001	PAGE_READONLY
+#define __P010	PAGE_COPY      /* this is write-only, which we won't support */
+#define __P011	PAGE_COPY
+#define __P100	PAGE_READONLY_EXEC
+#define __P101	PAGE_READONLY_EXEC
+#define __P110	PAGE_COPY_EXEC
+#define __P111	PAGE_COPY_EXEC
+
+#define __S000	PAGE_NONE
+#define __S001	PAGE_READONLY
+#define __S010	PAGE_SHARED
+#define __S011	PAGE_SHARED
+#define __S100	PAGE_READONLY_EXEC
+#define __S101	PAGE_READONLY_EXEC
+#define __S110	PAGE_SHARED_EXEC
+#define __S111	PAGE_SHARED_EXEC
+
+/*
+ * All the normal _PAGE_ALL bits are ignored for PMDs, except PAGE_PRESENT
+ * and PAGE_HUGE_PAGE, which must be one and zero, respectively.
+ * We set the ignored bits to zero.
+ */
+#define _PAGE_TABLE     _PAGE_PRESENT
+
+/* Inherit the caching flags from the old protection bits. */
+#define pgprot_modify(oldprot, newprot) \
+  (pgprot_t) { ((oldprot).val & ~_PAGE_ALL) | (newprot).val }
+
+/* Just setting the PFN to zero suffices. */
+#define pte_pgprot(x) hv_pte_set_pfn((x), 0)
+
+/*
+ * For PTEs and PDEs, we must clear the Present bit first when
+ * clearing a page table entry, so clear the bottom half first and
+ * enforce ordering with a barrier.
+ */
+static inline void __pte_clear(pte_t *ptep)
+{
+#ifdef __tilegx__
+	ptep->val = 0;
+#else
+	u32 *tmp = (u32 *)ptep;
+	tmp[0] = 0;
+	barrier();
+	tmp[1] = 0;
+#endif
+}
+#define pte_clear(mm, addr, ptep) __pte_clear(ptep)
+
+/*
+ * The following only work if pte_present() is true.
+ * Undefined behaviour if not..
+ */
+#define pte_present hv_pte_get_present
+#define pte_user hv_pte_get_user
+#define pte_read hv_pte_get_readable
+#define pte_dirty hv_pte_get_dirty
+#define pte_young hv_pte_get_accessed
+#define pte_write hv_pte_get_writable
+#define pte_exec hv_pte_get_executable
+#define pte_huge hv_pte_get_page
+#define pte_rdprotect hv_pte_clear_readable
+#define pte_exprotect hv_pte_clear_executable
+#define pte_mkclean hv_pte_clear_dirty
+#define pte_mkold hv_pte_clear_accessed
+#define pte_wrprotect hv_pte_clear_writable
+#define pte_mksmall hv_pte_clear_page
+#define pte_mkread hv_pte_set_readable
+#define pte_mkexec hv_pte_set_executable
+#define pte_mkdirty hv_pte_set_dirty
+#define pte_mkyoung hv_pte_set_accessed
+#define pte_mkwrite hv_pte_set_writable
+#define pte_mkhuge hv_pte_set_page
+
+#define pte_special(pte) 0
+#define pte_mkspecial(pte) (pte)
+
+/*
+ * Use some spare bits in the PTE for user-caching tags.
+ */
+#define pte_set_forcecache hv_pte_set_client0
+#define pte_get_forcecache hv_pte_get_client0
+#define pte_clear_forcecache hv_pte_clear_client0
+#define pte_set_anyhome hv_pte_set_client1
+#define pte_get_anyhome hv_pte_get_client1
+#define pte_clear_anyhome hv_pte_clear_client1
+
+/*
+ * A migrating PTE has PAGE_PRESENT clear but all the other bits preserved.
+ */
+#define pte_migrating hv_pte_get_migrating
+#define pte_mkmigrate(x) hv_pte_set_migrating(hv_pte_clear_present(x))
+#define pte_donemigrate(x) hv_pte_set_present(hv_pte_clear_migrating(x))
+
+#define pte_ERROR(e) \
+	pr_err("%s:%d: bad pte 0x%016llx.\n", __FILE__, __LINE__, pte_val(e))
+#define pgd_ERROR(e) \
+	pr_err("%s:%d: bad pgd 0x%016llx.\n", __FILE__, __LINE__, pgd_val(e))
+
+/* Return PA and protection info for a given kernel VA. */
+int va_to_cpa_and_pte(void *va, phys_addr_t *cpa, pte_t *pte);
+
+/*
+ * __set_pte() ensures we write the 64-bit PTE with 32-bit words in
+ * the right order on 32-bit platforms and also allows us to write
+ * hooks to check valid PTEs, etc., if we want.
+ */
+void __set_pte(pte_t *ptep, pte_t pte);
+
+/*
+ * set_pte() sets the given PTE and also sanity-checks the
+ * requested PTE against the page homecaching.  Unspecified parts
+ * of the PTE are filled in when it is written to memory, i.e. all
+ * caching attributes if "!forcecache", or the home cpu if "anyhome".
+ */
+extern void set_pte(pte_t *ptep, pte_t pte);
+#define set_pte_at(mm, addr, ptep, pteval) set_pte(ptep, pteval)
+#define set_pte_atomic(pteptr, pteval) set_pte(pteptr, pteval)
+
+#define pte_page(x)		pfn_to_page(pte_pfn(x))
+
+static inline int pte_none(pte_t pte)
+{
+	return !pte.val;
+}
+
+static inline unsigned long pte_pfn(pte_t pte)
+{
+	return hv_pte_get_pfn(pte);
+}
+
+/* Set or get the remote cache cpu in a pgprot with remote caching. */
+extern pgprot_t set_remote_cache_cpu(pgprot_t prot, int cpu);
+extern int get_remote_cache_cpu(pgprot_t prot);
+
+static inline pte_t pfn_pte(unsigned long pfn, pgprot_t prot)
+{
+	return hv_pte_set_pfn(prot, pfn);
+}
+
+/* Support for priority mappings. */
+extern void start_mm_caching(struct mm_struct *mm);
+extern void check_mm_caching(struct mm_struct *prev, struct mm_struct *next);
+
+/*
+ * Support non-linear file mappings (see sys_remap_file_pages).
+ * This is defined by CLIENT1 set but CLIENT0 and _PAGE_PRESENT clear, and the
+ * file offset in the 32 high bits.
+ */
+#define _PAGE_FILE        HV_PTE_CLIENT1
+#define PTE_FILE_MAX_BITS 32
+#define pte_file(pte)     (hv_pte_get_client1(pte) && !hv_pte_get_client0(pte))
+#define pte_to_pgoff(pte) ((pte).val >> 32)
+#define pgoff_to_pte(off) ((pte_t) { (((long long)(off)) << 32) | _PAGE_FILE })
+
+/*
+ * Encode and de-code a swap entry (see <linux/swapops.h>).
+ * We put the swap file type+offset in the 32 high bits;
+ * I believe we can just leave the low bits clear.
+ */
+#define __swp_type(swp)		((swp).val & 0x1f)
+#define __swp_offset(swp)	((swp).val >> 5)
+#define __swp_entry(type, off)	((swp_entry_t) { (type) | ((off) << 5) })
+#define __pte_to_swp_entry(pte)	((swp_entry_t) { (pte).val >> 32 })
+#define __swp_entry_to_pte(swp)	((pte_t) { (((long long) ((swp).val)) << 32) })
+
+/*
+ * Conversion functions: convert a page and protection to a page entry,
+ * and a page entry and page directory to the page they refer to.
+ */
+
+#define mk_pte(page, pgprot)	pfn_pte(page_to_pfn(page), (pgprot))
+
+/*
+ * If we are doing an mprotect(), just accept the new vma->vm_page_prot
+ * value and combine it with the PFN from the old PTE to get a new PTE.
+ */
+static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
+{
+	return pfn_pte(hv_pte_get_pfn(pte), newprot);
+}
+
+/*
+ * The pgd page can be thought of an array like this: pgd_t[PTRS_PER_PGD]
+ *
+ * This macro returns the index of the entry in the pgd page which would
+ * control the given virtual address.
+ */
+#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1))
+
+/*
+ * pgd_offset() returns a (pgd_t *)
+ * pgd_index() is used get the offset into the pgd page's array of pgd_t's.
+ */
+#define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address))
+
+/*
+ * A shortcut which implies the use of the kernel's pgd, instead
+ * of a process's.
+ */
+#define pgd_offset_k(address) pgd_offset(&init_mm, address)
+
+#if defined(CONFIG_HIGHPTE)
+extern pte_t *pte_offset_map(pmd_t *, unsigned long address);
+#define pte_unmap(pte) kunmap_atomic(pte)
+#else
+#define pte_offset_map(dir, address) pte_offset_kernel(dir, address)
+#define pte_unmap(pte) do { } while (0)
+#endif
+
+/* Clear a non-executable kernel PTE and flush it from the TLB. */
+#define kpte_clear_flush(ptep, vaddr)		\
+do {						\
+	pte_clear(&init_mm, (vaddr), (ptep));	\
+	local_flush_tlb_page(FLUSH_NONEXEC, (vaddr), PAGE_SIZE); \
+} while (0)
+
+/*
+ * The kernel page tables contain what we need, and we flush when we
+ * change specific page table entries.
+ */
+#define update_mmu_cache(vma, address, pte) do { } while (0)
+
+#ifdef CONFIG_FLATMEM
+#define kern_addr_valid(addr)	(1)
+#endif /* CONFIG_FLATMEM */
+
+#define io_remap_pfn_range(vma, vaddr, pfn, size, prot)		\
+		remap_pfn_range(vma, vaddr, pfn, size, prot)
+
+extern void vmalloc_sync_all(void);
+
+#endif /* !__ASSEMBLY__ */
+
+#ifdef __tilegx__
+#include <asm/pgtable_64.h>
+#else
+#include <asm/pgtable_32.h>
+#endif
+
+#ifndef __ASSEMBLY__
+
+static inline int pmd_none(pmd_t pmd)
+{
+	/*
+	 * Only check low word on 32-bit platforms, since it might be
+	 * out of sync with upper half.
+	 */
+	return (unsigned long)pmd_val(pmd) == 0;
+}
+
+static inline int pmd_present(pmd_t pmd)
+{
+	return pmd_val(pmd) & _PAGE_PRESENT;
+}
+
+static inline int pmd_bad(pmd_t pmd)
+{
+	return ((pmd_val(pmd) & _PAGE_ALL) != _PAGE_TABLE);
+}
+
+static inline unsigned long pages_to_mb(unsigned long npg)
+{
+	return npg >> (20 - PAGE_SHIFT);
+}
+
+/*
+ * The pmd can be thought of an array like this: pmd_t[PTRS_PER_PMD]
+ *
+ * This function returns the index of the entry in the pmd which would
+ * control the given virtual address.
+ */
+static inline unsigned long pmd_index(unsigned long address)
+{
+	return (address >> PMD_SHIFT) & (PTRS_PER_PMD - 1);
+}
+
+/*
+ * A given kernel pmd_t maps to a specific virtual address (either a
+ * kernel huge page or a kernel pte_t table).  Since kernel pte_t
+ * tables can be aligned at sub-page granularity, this function can
+ * return non-page-aligned pointers, despite its name.
+ */
+static inline unsigned long pmd_page_vaddr(pmd_t pmd)
+{
+	phys_addr_t pa =
+		(phys_addr_t)pmd_ptfn(pmd) << HV_LOG2_PAGE_TABLE_ALIGN;
+	return (unsigned long)__va(pa);
+}
+
+/*
+ * A pmd_t points to the base of a huge page or to a pte_t array.
+ * If a pte_t array, since we can have multiple per page, we don't
+ * have a one-to-one mapping of pmd_t's to pages.  However, this is
+ * OK for pte_lockptr(), since we just end up with potentially one
+ * lock being used for several pte_t arrays.
+ */
+#define pmd_page(pmd) pfn_to_page(HV_PTFN_TO_PFN(pmd_ptfn(pmd)))
+
+/*
+ * The pte page can be thought of an array like this: pte_t[PTRS_PER_PTE]
+ *
+ * This macro returns the index of the entry in the pte page which would
+ * control the given virtual address.
+ */
+static inline unsigned long pte_index(unsigned long address)
+{
+	return (address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1);
+}
+
+static inline pte_t *pte_offset_kernel(pmd_t *pmd, unsigned long address)
+{
+       return (pte_t *)pmd_page_vaddr(*pmd) + pte_index(address);
+}
+
+static inline int pmd_huge_page(pmd_t pmd)
+{
+	return pmd_val(pmd) & _PAGE_HUGE_PAGE;
+}
+
+#include <asm-generic/pgtable.h>
+
+/* Support /proc/NN/pgtable API. */
+struct seq_file;
+int arch_proc_pgtable_show(struct seq_file *m, struct mm_struct *mm,
+			   unsigned long vaddr, pte_t *ptep, void **datap);
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* _ASM_TILE_PGTABLE_H */
diff --git a/arch/tile/include/asm/pgtable_32.h b/arch/tile/include/asm/pgtable_32.h
new file mode 100644
index 00000000..9f985297
--- /dev/null
+++ b/arch/tile/include/asm/pgtable_32.h
@@ -0,0 +1,135 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ *
+ */
+
+#ifndef _ASM_TILE_PGTABLE_32_H
+#define _ASM_TILE_PGTABLE_32_H
+
+/*
+ * The level-1 index is defined by the huge page size.  A PGD is composed
+ * of PTRS_PER_PGD pgd_t's and is the top level of the page table.
+ */
+#define PGDIR_SHIFT	HV_LOG2_PAGE_SIZE_LARGE
+#define PGDIR_SIZE	HV_PAGE_SIZE_LARGE
+#define PGDIR_MASK	(~(PGDIR_SIZE-1))
+#define PTRS_PER_PGD	(1 << (32 - PGDIR_SHIFT))
+#define SIZEOF_PGD	(PTRS_PER_PGD * sizeof(pgd_t))
+
+/*
+ * The level-2 index is defined by the difference between the huge
+ * page size and the normal page size.  A PTE is composed of
+ * PTRS_PER_PTE pte_t's and is the bottom level of the page table.
+ * Note that the hypervisor docs use PTE for what we call pte_t, so
+ * this nomenclature is somewhat confusing.
+ */
+#define PTRS_PER_PTE (1 << (HV_LOG2_PAGE_SIZE_LARGE - HV_LOG2_PAGE_SIZE_SMALL))
+#define SIZEOF_PTE	(PTRS_PER_PTE * sizeof(pte_t))
+
+#ifndef __ASSEMBLY__
+
+/*
+ * Right now we initialize only a single pte table. It can be extended
+ * easily, subsequent pte tables have to be allocated in one physical
+ * chunk of RAM.
+ *
+ * HOWEVER, if we are using an allocation scheme with slop after the
+ * end of the page table (e.g. where our L2 page tables are 2KB but
+ * our pages are 64KB and we are allocating via the page allocator)
+ * we can't extend it easily.
+ */
+#define LAST_PKMAP PTRS_PER_PTE
+
+#define PKMAP_BASE   ((FIXADDR_BOOT_START - PAGE_SIZE*LAST_PKMAP) & PGDIR_MASK)
+
+#ifdef CONFIG_HIGHMEM
+# define __VMAPPING_END	(PKMAP_BASE & ~(HPAGE_SIZE-1))
+#else
+# define __VMAPPING_END	(FIXADDR_START & ~(HPAGE_SIZE-1))
+#endif
+
+#ifdef CONFIG_HUGEVMAP
+#define HUGE_VMAP_END	__VMAPPING_END
+#define HUGE_VMAP_BASE	(HUGE_VMAP_END - CONFIG_NR_HUGE_VMAPS * HPAGE_SIZE)
+#define _VMALLOC_END	HUGE_VMAP_BASE
+#else
+#define _VMALLOC_END	__VMAPPING_END
+#endif
+
+/*
+ * Align the vmalloc area to an L2 page table, and leave a guard page
+ * at the beginning and end.  The vmalloc code also puts in an internal
+ * guard page between each allocation.
+ */
+#define VMALLOC_END	(_VMALLOC_END - PAGE_SIZE)
+extern unsigned long VMALLOC_RESERVE /* = CONFIG_VMALLOC_RESERVE */;
+#define _VMALLOC_START	(_VMALLOC_END - VMALLOC_RESERVE)
+#define VMALLOC_START	(_VMALLOC_START + PAGE_SIZE)
+
+/* This is the maximum possible amount of lowmem. */
+#define MAXMEM		(_VMALLOC_START - PAGE_OFFSET)
+
+/* We have no pmd or pud since we are strictly a two-level page table */
+#include <asm-generic/pgtable-nopmd.h>
+
+/* We don't define any pgds for these addresses. */
+static inline int pgd_addr_invalid(unsigned long addr)
+{
+	return addr >= MEM_HV_INTRPT;
+}
+
+/*
+ * Provide versions of these routines that can be used safely when
+ * the hypervisor may be asynchronously modifying dirty/accessed bits.
+ * ptep_get_and_clear() matches the generic one but we provide it to
+ * be parallel with the 64-bit code.
+ */
+#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
+#define __HAVE_ARCH_PTEP_SET_WRPROTECT
+
+extern int ptep_test_and_clear_young(struct vm_area_struct *,
+				     unsigned long addr, pte_t *);
+extern void ptep_set_wrprotect(struct mm_struct *,
+			       unsigned long addr, pte_t *);
+
+#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
+static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
+				       unsigned long addr, pte_t *ptep)
+{
+	pte_t pte = *ptep;
+	pte_clear(_mm, addr, ptep);
+	return pte;
+}
+
+static inline void __set_pmd(pmd_t *pmdp, pmd_t pmdval)
+{
+	set_pte(&pmdp->pud.pgd, pmdval.pud.pgd);
+}
+
+/* Create a pmd from a PTFN. */
+static inline pmd_t ptfn_pmd(unsigned long ptfn, pgprot_t prot)
+{
+	return (pmd_t){ { hv_pte_set_ptfn(prot, ptfn) } };
+}
+
+/* Return the page-table frame number (ptfn) that a pmd_t points at. */
+#define pmd_ptfn(pmd) hv_pte_get_ptfn((pmd).pud.pgd)
+
+static inline void pmd_clear(pmd_t *pmdp)
+{
+	__pte_clear(&pmdp->pud.pgd);
+}
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_TILE_PGTABLE_32_H */
diff --git a/arch/tile/include/asm/pgtable_64.h b/arch/tile/include/asm/pgtable_64.h
new file mode 100644
index 00000000..fd803285
--- /dev/null
+++ b/arch/tile/include/asm/pgtable_64.h
@@ -0,0 +1,175 @@
+/*
+ * Copyright 2011 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ *
+ */
+
+#ifndef _ASM_TILE_PGTABLE_64_H
+#define _ASM_TILE_PGTABLE_64_H
+
+/* The level-0 page table breaks the address space into 32-bit chunks. */
+#define PGDIR_SHIFT	HV_LOG2_L1_SPAN
+#define PGDIR_SIZE	HV_L1_SPAN
+#define PGDIR_MASK	(~(PGDIR_SIZE-1))
+#define PTRS_PER_PGD	HV_L0_ENTRIES
+#define SIZEOF_PGD	(PTRS_PER_PGD * sizeof(pgd_t))
+
+/*
+ * The level-1 index is defined by the huge page size.  A PMD is composed
+ * of PTRS_PER_PMD pgd_t's and is the middle level of the page table.
+ */
+#define PMD_SHIFT	HV_LOG2_PAGE_SIZE_LARGE
+#define PMD_SIZE	HV_PAGE_SIZE_LARGE
+#define PMD_MASK	(~(PMD_SIZE-1))
+#define PTRS_PER_PMD	(1 << (PGDIR_SHIFT - PMD_SHIFT))
+#define SIZEOF_PMD	(PTRS_PER_PMD * sizeof(pmd_t))
+
+/*
+ * The level-2 index is defined by the difference between the huge
+ * page size and the normal page size.  A PTE is composed of
+ * PTRS_PER_PTE pte_t's and is the bottom level of the page table.
+ * Note that the hypervisor docs use PTE for what we call pte_t, so
+ * this nomenclature is somewhat confusing.
+ */
+#define PTRS_PER_PTE (1 << (HV_LOG2_PAGE_SIZE_LARGE - HV_LOG2_PAGE_SIZE_SMALL))
+#define SIZEOF_PTE	(PTRS_PER_PTE * sizeof(pte_t))
+
+/*
+ * Align the vmalloc area to an L2 page table, and leave a guard page
+ * at the beginning and end.  The vmalloc code also puts in an internal
+ * guard page between each allocation.
+ */
+#define _VMALLOC_END	HUGE_VMAP_BASE
+#define VMALLOC_END	(_VMALLOC_END - PAGE_SIZE)
+#define VMALLOC_START	(_VMALLOC_START + PAGE_SIZE)
+
+#define HUGE_VMAP_END	(HUGE_VMAP_BASE + PGDIR_SIZE)
+
+#ifndef __ASSEMBLY__
+
+/* We have no pud since we are a three-level page table. */
+#include <asm-generic/pgtable-nopud.h>
+
+static inline int pud_none(pud_t pud)
+{
+	return pud_val(pud) == 0;
+}
+
+static inline int pud_present(pud_t pud)
+{
+	return pud_val(pud) & _PAGE_PRESENT;
+}
+
+#define pmd_ERROR(e) \
+	pr_err("%s:%d: bad pmd 0x%016llx.\n", __FILE__, __LINE__, pmd_val(e))
+
+static inline void pud_clear(pud_t *pudp)
+{
+	__pte_clear(&pudp->pgd);
+}
+
+static inline int pud_bad(pud_t pud)
+{
+	return ((pud_val(pud) & _PAGE_ALL) != _PAGE_TABLE);
+}
+
+/* Return the page-table frame number (ptfn) that a pud_t points at. */
+#define pud_ptfn(pud) hv_pte_get_ptfn((pud).pgd)
+
+/*
+ * A given kernel pud_t maps to a kernel pmd_t table at a specific
+ * virtual address.  Since kernel pmd_t tables can be aligned at
+ * sub-page granularity, this macro can return non-page-aligned
+ * pointers, despite its name.
+ */
+#define pud_page_vaddr(pud) \
+	(__va((phys_addr_t)pud_ptfn(pud) << HV_LOG2_PAGE_TABLE_ALIGN))
+
+/*
+ * A pud_t points to a pmd_t array.  Since we can have multiple per
+ * page, we don't have a one-to-one mapping of pud_t's to pages.
+ */
+#define pud_page(pud) pfn_to_page(HV_PTFN_TO_PFN(pud_ptfn(pud)))
+
+static inline unsigned long pud_index(unsigned long address)
+{
+	return (address >> PUD_SHIFT) & (PTRS_PER_PUD - 1);
+}
+
+#define pmd_offset(pud, address) \
+	((pmd_t *)pud_page_vaddr(*(pud)) + pmd_index(address))
+
+static inline void __set_pmd(pmd_t *pmdp, pmd_t pmdval)
+{
+	set_pte(pmdp, pmdval);
+}
+
+/* Create a pmd from a PTFN and pgprot. */
+static inline pmd_t ptfn_pmd(unsigned long ptfn, pgprot_t prot)
+{
+	return hv_pte_set_ptfn(prot, ptfn);
+}
+
+/* Return the page-table frame number (ptfn) that a pmd_t points at. */
+static inline unsigned long pmd_ptfn(pmd_t pmd)
+{
+	return hv_pte_get_ptfn(pmd);
+}
+
+static inline void pmd_clear(pmd_t *pmdp)
+{
+	__pte_clear(pmdp);
+}
+
+/* Normalize an address to having the correct high bits set. */
+#define pgd_addr_normalize pgd_addr_normalize
+static inline unsigned long pgd_addr_normalize(unsigned long addr)
+{
+	return ((long)addr << (CHIP_WORD_SIZE() - CHIP_VA_WIDTH())) >>
+		(CHIP_WORD_SIZE() - CHIP_VA_WIDTH());
+}
+
+/* We don't define any pgds for these addresses. */
+static inline int pgd_addr_invalid(unsigned long addr)
+{
+	return addr >= MEM_HV_START ||
+		(addr > MEM_LOW_END && addr < MEM_HIGH_START);
+}
+
+/*
+ * Use atomic instructions to provide atomicity against the hypervisor.
+ */
+#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
+static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
+					    unsigned long addr, pte_t *ptep)
+{
+	return (__insn_fetchand(&ptep->val, ~HV_PTE_ACCESSED) >>
+		HV_PTE_INDEX_ACCESSED) & 0x1;
+}
+
+#define __HAVE_ARCH_PTEP_SET_WRPROTECT
+static inline void ptep_set_wrprotect(struct mm_struct *mm,
+				      unsigned long addr, pte_t *ptep)
+{
+	__insn_fetchand(&ptep->val, ~HV_PTE_WRITABLE);
+}
+
+#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
+static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
+				       unsigned long addr, pte_t *ptep)
+{
+	return hv_pte(__insn_exch(&ptep->val, 0UL));
+}
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_TILE_PGTABLE_64_H */
diff --git a/arch/tile/include/asm/processor.h b/arch/tile/include/asm/processor.h
new file mode 100644
index 00000000..34c1e01f
--- /dev/null
+++ b/arch/tile/include/asm/processor.h
@@ -0,0 +1,357 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_PROCESSOR_H
+#define _ASM_TILE_PROCESSOR_H
+
+#ifndef __ASSEMBLY__
+
+/*
+ * NOTE: we don't include <linux/ptrace.h> or <linux/percpu.h> as one
+ * normally would, due to #include dependencies.
+ */
+#include <linux/types.h>
+#include <asm/ptrace.h>
+#include <asm/percpu.h>
+
+#include <arch/chip.h>
+#include <arch/spr_def.h>
+
+struct task_struct;
+struct thread_struct;
+
+typedef struct {
+	unsigned long seg;
+} mm_segment_t;
+
+/*
+ * Default implementation of macro that returns current
+ * instruction pointer ("program counter").
+ */
+void *current_text_addr(void);
+
+#if CHIP_HAS_TILE_DMA()
+/* Capture the state of a suspended DMA. */
+struct tile_dma_state {
+	int enabled;
+	unsigned long src;
+	unsigned long dest;
+	unsigned long strides;
+	unsigned long chunk_size;
+	unsigned long src_chunk;
+	unsigned long dest_chunk;
+	unsigned long byte;
+	unsigned long status;
+};
+
+/*
+ * A mask of the DMA status register for selecting only the 'running'
+ * and 'done' bits.
+ */
+#define DMA_STATUS_MASK \
+  (SPR_DMA_STATUS__RUNNING_MASK | SPR_DMA_STATUS__DONE_MASK)
+#endif
+
+/*
+ * Track asynchronous TLB events (faults and access violations)
+ * that occur while we are in kernel mode from DMA or the SN processor.
+ */
+struct async_tlb {
+	short fault_num;         /* original fault number; 0 if none */
+	char is_fault;           /* was it a fault (vs an access violation) */
+	char is_write;           /* for fault: was it caused by a write? */
+	unsigned long address;   /* what address faulted? */
+};
+
+#ifdef CONFIG_HARDWALL
+struct hardwall_info;
+#endif
+
+struct thread_struct {
+	/* kernel stack pointer */
+	unsigned long  ksp;
+	/* kernel PC */
+	unsigned long  pc;
+	/* starting user stack pointer (for page migration) */
+	unsigned long  usp0;
+	/* pid of process that created this one */
+	pid_t creator_pid;
+#if CHIP_HAS_TILE_DMA()
+	/* DMA info for suspended threads (byte == 0 means no DMA state) */
+	struct tile_dma_state tile_dma_state;
+#endif
+	/* User EX_CONTEXT registers */
+	unsigned long ex_context[2];
+	/* User SYSTEM_SAVE registers */
+	unsigned long system_save[4];
+	/* User interrupt mask */
+	unsigned long long interrupt_mask;
+	/* User interrupt-control 0 state */
+	unsigned long intctrl_0;
+#if CHIP_HAS_PROC_STATUS_SPR()
+	/* Any other miscellaneous processor state bits */
+	unsigned long proc_status;
+#endif
+#if !CHIP_HAS_FIXED_INTVEC_BASE()
+	/* Interrupt base for PL0 interrupts */
+	unsigned long interrupt_vector_base;
+#endif
+#if CHIP_HAS_TILE_RTF_HWM()
+	/* Tile cache retry fifo high-water mark */
+	unsigned long tile_rtf_hwm;
+#endif
+#if CHIP_HAS_DSTREAM_PF()
+	/* Data stream prefetch control */
+	unsigned long dstream_pf;
+#endif
+#ifdef CONFIG_HARDWALL
+	/* Is this task tied to an activated hardwall? */
+	struct hardwall_info *hardwall;
+	/* Chains this task into the list at hardwall->list. */
+	struct list_head hardwall_list;
+#endif
+#if CHIP_HAS_TILE_DMA()
+	/* Async DMA TLB fault information */
+	struct async_tlb dma_async_tlb;
+#endif
+#if CHIP_HAS_SN_PROC()
+	/* Was static network processor when we were switched out? */
+	int sn_proc_running;
+	/* Async SNI TLB fault information */
+	struct async_tlb sn_async_tlb;
+#endif
+};
+
+#endif /* !__ASSEMBLY__ */
+
+/*
+ * Start with "sp" this many bytes below the top of the kernel stack.
+ * This preserves the invariant that a called function may write to *sp.
+ */
+#define STACK_TOP_DELTA 8
+
+/*
+ * When entering the kernel via a fault, start with the top of the
+ * pt_regs structure this many bytes below the top of the page.
+ * This aligns the pt_regs structure optimally for cache-line access.
+ */
+#ifdef __tilegx__
+#define KSTK_PTREGS_GAP  48
+#else
+#define KSTK_PTREGS_GAP  56
+#endif
+
+#ifndef __ASSEMBLY__
+
+#ifdef __tilegx__
+#define TASK_SIZE_MAX		(MEM_LOW_END + 1)
+#else
+#define TASK_SIZE_MAX		PAGE_OFFSET
+#endif
+
+/* TASK_SIZE and related variables are always checked in "current" context. */
+#ifdef CONFIG_COMPAT
+#define COMPAT_TASK_SIZE	(1UL << 31)
+#define TASK_SIZE		((current_thread_info()->status & TS_COMPAT) ?\
+				 COMPAT_TASK_SIZE : TASK_SIZE_MAX)
+#else
+#define TASK_SIZE		TASK_SIZE_MAX
+#endif
+
+/* We provide a minimal "vdso" a la x86; just the sigreturn code for now. */
+#define VDSO_BASE		(TASK_SIZE - PAGE_SIZE)
+
+#define STACK_TOP		VDSO_BASE
+
+/* STACK_TOP_MAX is used temporarily in execve and should not check COMPAT. */
+#define STACK_TOP_MAX		TASK_SIZE_MAX
+
+/*
+ * This decides where the kernel will search for a free chunk of vm
+ * space during mmap's, if it is using bottom-up mapping.
+ */
+#define TASK_UNMAPPED_BASE	(PAGE_ALIGN(TASK_SIZE / 3))
+
+#define HAVE_ARCH_PICK_MMAP_LAYOUT
+
+#define INIT_THREAD {                                                   \
+	.ksp = (unsigned long)init_stack + THREAD_SIZE - STACK_TOP_DELTA, \
+	.interrupt_mask = -1ULL                                         \
+}
+
+/* Kernel stack top for the task that first boots on this cpu. */
+DECLARE_PER_CPU(unsigned long, boot_sp);
+
+/* PC to boot from on this cpu. */
+DECLARE_PER_CPU(unsigned long, boot_pc);
+
+/* Do necessary setup to start up a newly executed thread. */
+static inline void start_thread(struct pt_regs *regs,
+				unsigned long pc, unsigned long usp)
+{
+	regs->pc = pc;
+	regs->sp = usp;
+}
+
+/* Free all resources held by a thread. */
+static inline void release_thread(struct task_struct *dead_task)
+{
+	/* Nothing for now */
+}
+
+/* Prepare to copy thread state - unlazy all lazy status. */
+#define prepare_to_copy(tsk)	do { } while (0)
+
+extern int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
+
+extern int do_work_pending(struct pt_regs *regs, u32 flags);
+
+
+/*
+ * Return saved (kernel) PC of a blocked thread.
+ * Only used in a printk() in kernel/sched.c, so don't work too hard.
+ */
+#define thread_saved_pc(t)   ((t)->thread.pc)
+
+unsigned long get_wchan(struct task_struct *p);
+
+/* Return initial ksp value for given task. */
+#define task_ksp0(task) ((unsigned long)(task)->stack + THREAD_SIZE)
+
+/* Return some info about the user process TASK. */
+#define KSTK_TOP(task)	(task_ksp0(task) - STACK_TOP_DELTA)
+#define task_pt_regs(task) \
+  ((struct pt_regs *)(task_ksp0(task) - KSTK_PTREGS_GAP) - 1)
+#define task_sp(task)	(task_pt_regs(task)->sp)
+#define task_pc(task)	(task_pt_regs(task)->pc)
+/* Aliases for pc and sp (used in fs/proc/array.c) */
+#define KSTK_EIP(task)	task_pc(task)
+#define KSTK_ESP(task)	task_sp(task)
+
+/* Standard format for printing registers and other word-size data. */
+#ifdef __tilegx__
+# define REGFMT "0x%016lx"
+#else
+# define REGFMT "0x%08lx"
+#endif
+
+/*
+ * Do some slow action (e.g. read a slow SPR).
+ * Note that this must also have compiler-barrier semantics since
+ * it may be used in a busy loop reading memory.
+ */
+static inline void cpu_relax(void)
+{
+	__insn_mfspr(SPR_PASS);
+	barrier();
+}
+
+/* Info on this processor (see fs/proc/cpuinfo.c) */
+struct seq_operations;
+extern const struct seq_operations cpuinfo_op;
+
+/* Provide information about the chip model. */
+extern char chip_model[64];
+
+/* Data on which physical memory controller corresponds to which NUMA node. */
+extern int node_controller[];
+
+#if CHIP_HAS_CBOX_HOME_MAP()
+/* Does the heap allocator return hash-for-home pages by default? */
+extern int hash_default;
+
+/* Should kernel stack pages be hash-for-home? */
+extern int kstack_hash;
+
+/* Does MAP_ANONYMOUS return hash-for-home pages by default? */
+#define uheap_hash hash_default
+
+#else
+#define hash_default 0
+#define kstack_hash 0
+#define uheap_hash 0
+#endif
+
+/* Are we using huge pages in the TLB for kernel data? */
+extern int kdata_huge;
+
+/* Support standard Linux prefetching. */
+#define ARCH_HAS_PREFETCH
+#define prefetch(x) __builtin_prefetch(x)
+#define PREFETCH_STRIDE CHIP_L2_LINE_SIZE()
+
+/* Bring a value into the L1D, faulting the TLB if necessary. */
+#ifdef __tilegx__
+#define prefetch_L1(x) __insn_prefetch_l1_fault((void *)(x))
+#else
+#define prefetch_L1(x) __insn_prefetch_L1((void *)(x))
+#endif
+
+#else /* __ASSEMBLY__ */
+
+/* Do some slow action (e.g. read a slow SPR). */
+#define CPU_RELAX       mfspr zero, SPR_PASS
+
+#endif /* !__ASSEMBLY__ */
+
+/* Assembly code assumes that the PL is in the low bits. */
+#if SPR_EX_CONTEXT_1_1__PL_SHIFT != 0
+# error Fix assembly assumptions about PL
+#endif
+
+/* We sometimes use these macros for EX_CONTEXT_0_1 as well. */
+#if SPR_EX_CONTEXT_1_1__PL_SHIFT != SPR_EX_CONTEXT_0_1__PL_SHIFT || \
+    SPR_EX_CONTEXT_1_1__PL_RMASK != SPR_EX_CONTEXT_0_1__PL_RMASK || \
+    SPR_EX_CONTEXT_1_1__ICS_SHIFT != SPR_EX_CONTEXT_0_1__ICS_SHIFT || \
+    SPR_EX_CONTEXT_1_1__ICS_RMASK != SPR_EX_CONTEXT_0_1__ICS_RMASK
+# error Fix assumptions that EX1 macros work for both PL0 and PL1
+#endif
+
+/* Allow pulling apart and recombining the PL and ICS bits in EX_CONTEXT. */
+#define EX1_PL(ex1) \
+  (((ex1) >> SPR_EX_CONTEXT_1_1__PL_SHIFT) & SPR_EX_CONTEXT_1_1__PL_RMASK)
+#define EX1_ICS(ex1) \
+  (((ex1) >> SPR_EX_CONTEXT_1_1__ICS_SHIFT) & SPR_EX_CONTEXT_1_1__ICS_RMASK)
+#define PL_ICS_EX1(pl, ics) \
+  (((pl) << SPR_EX_CONTEXT_1_1__PL_SHIFT) | \
+   ((ics) << SPR_EX_CONTEXT_1_1__ICS_SHIFT))
+
+/*
+ * Provide symbolic constants for PLs.
+ * Note that assembly code assumes that USER_PL is zero.
+ */
+#define USER_PL 0
+#if CONFIG_KERNEL_PL == 2
+#define GUEST_PL 1
+#endif
+#define KERNEL_PL CONFIG_KERNEL_PL
+
+/* SYSTEM_SAVE_K_0 holds the current cpu number ORed with ksp0. */
+#define CPU_LOG_MASK_VALUE 12
+#define CPU_MASK_VALUE ((1 << CPU_LOG_MASK_VALUE) - 1)
+#if CONFIG_NR_CPUS > CPU_MASK_VALUE
+# error Too many cpus!
+#endif
+#define raw_smp_processor_id() \
+	((int)__insn_mfspr(SPR_SYSTEM_SAVE_K_0) & CPU_MASK_VALUE)
+#define get_current_ksp0() \
+	(__insn_mfspr(SPR_SYSTEM_SAVE_K_0) & ~CPU_MASK_VALUE)
+#define next_current_ksp0(task) ({ \
+	unsigned long __ksp0 = task_ksp0(task); \
+	int __cpu = raw_smp_processor_id(); \
+	BUG_ON(__ksp0 & CPU_MASK_VALUE); \
+	__ksp0 | __cpu; \
+})
+
+#endif /* _ASM_TILE_PROCESSOR_H */
diff --git a/arch/tile/include/asm/ptrace.h b/arch/tile/include/asm/ptrace.h
new file mode 100644
index 00000000..c6cddd7e
--- /dev/null
+++ b/arch/tile/include/asm/ptrace.h
@@ -0,0 +1,164 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_PTRACE_H
+#define _ASM_TILE_PTRACE_H
+
+#include <arch/chip.h>
+#include <arch/abi.h>
+
+/* These must match struct pt_regs, below. */
+#if CHIP_WORD_SIZE() == 32
+#define PTREGS_OFFSET_REG(n)    ((n)*4)
+#else
+#define PTREGS_OFFSET_REG(n)    ((n)*8)
+#endif
+#define PTREGS_OFFSET_BASE      0
+#define PTREGS_OFFSET_TP        PTREGS_OFFSET_REG(53)
+#define PTREGS_OFFSET_SP        PTREGS_OFFSET_REG(54)
+#define PTREGS_OFFSET_LR        PTREGS_OFFSET_REG(55)
+#define PTREGS_NR_GPRS          56
+#define PTREGS_OFFSET_PC        PTREGS_OFFSET_REG(56)
+#define PTREGS_OFFSET_EX1       PTREGS_OFFSET_REG(57)
+#define PTREGS_OFFSET_FAULTNUM  PTREGS_OFFSET_REG(58)
+#define PTREGS_OFFSET_ORIG_R0   PTREGS_OFFSET_REG(59)
+#define PTREGS_OFFSET_FLAGS     PTREGS_OFFSET_REG(60)
+#if CHIP_HAS_CMPEXCH()
+#define PTREGS_OFFSET_CMPEXCH   PTREGS_OFFSET_REG(61)
+#endif
+#define PTREGS_SIZE             PTREGS_OFFSET_REG(64)
+
+#ifndef __ASSEMBLY__
+
+#ifdef __KERNEL__
+/* Benefit from consistent use of "long" on all chips. */
+typedef unsigned long pt_reg_t;
+#else
+/* Provide appropriate length type to userspace regardless of -m32/-m64. */
+typedef uint_reg_t pt_reg_t;
+#endif
+
+/*
+ * This struct defines the way the registers are stored on the stack during a
+ * system call or exception.  "struct sigcontext" has the same shape.
+ */
+struct pt_regs {
+	/* Saved main processor registers; 56..63 are special. */
+	/* tp, sp, and lr must immediately follow regs[] for aliasing. */
+	pt_reg_t regs[53];
+	pt_reg_t tp;		/* aliases regs[TREG_TP] */
+	pt_reg_t sp;		/* aliases regs[TREG_SP] */
+	pt_reg_t lr;		/* aliases regs[TREG_LR] */
+
+	/* Saved special registers. */
+	pt_reg_t pc;		/* stored in EX_CONTEXT_K_0 */
+	pt_reg_t ex1;		/* stored in EX_CONTEXT_K_1 (PL and ICS bit) */
+	pt_reg_t faultnum;	/* fault number (INT_SWINT_1 for syscall) */
+	pt_reg_t orig_r0;	/* r0 at syscall entry, else zero */
+	pt_reg_t flags;		/* flags (see below) */
+#if !CHIP_HAS_CMPEXCH()
+	pt_reg_t pad[3];
+#else
+	pt_reg_t cmpexch;	/* value of CMPEXCH_VALUE SPR at interrupt */
+	pt_reg_t pad[2];
+#endif
+};
+
+#endif /* __ASSEMBLY__ */
+
+#define PTRACE_GETREGS		12
+#define PTRACE_SETREGS		13
+#define PTRACE_GETFPREGS	14
+#define PTRACE_SETFPREGS	15
+
+/* Support TILE-specific ptrace options, with events starting at 16. */
+#define PTRACE_O_TRACEMIGRATE	0x00010000
+#define PTRACE_EVENT_MIGRATE	16
+#ifdef __KERNEL__
+#define PTRACE_O_MASK_TILE	(PTRACE_O_TRACEMIGRATE)
+#define PT_TRACE_MIGRATE	0x00080000
+#define PT_TRACE_MASK_TILE	(PT_TRACE_MIGRATE)
+#endif
+
+#ifdef __KERNEL__
+
+/* Flag bits in pt_regs.flags */
+#define PT_FLAGS_DISABLE_IRQ    1  /* on return to kernel, disable irqs */
+#define PT_FLAGS_CALLER_SAVES   2  /* caller-save registers are valid */
+#define PT_FLAGS_RESTORE_REGS   4  /* restore callee-save regs on return */
+
+#ifndef __ASSEMBLY__
+
+#define instruction_pointer(regs) ((regs)->pc)
+#define profile_pc(regs) instruction_pointer(regs)
+
+/* Does the process account for user or for system time? */
+#define user_mode(regs) (EX1_PL((regs)->ex1) == USER_PL)
+
+/* Fill in a struct pt_regs with the current kernel registers. */
+struct pt_regs *get_pt_regs(struct pt_regs *);
+
+/* Trace the current syscall. */
+extern void do_syscall_trace(void);
+
+#define arch_has_single_step()	(1)
+
+/*
+ * A structure for all single-stepper state.
+ *
+ * Also update defines in assembler section if it changes
+ */
+struct single_step_state {
+	/* the page to which we will write hacked-up bundles */
+	void __user *buffer;
+
+	union {
+		int flags;
+		struct {
+			unsigned long is_enabled:1, update:1, update_reg:6;
+		};
+	};
+
+	unsigned long orig_pc;		/* the original PC */
+	unsigned long next_pc;		/* return PC if no branch (PC + 1) */
+	unsigned long branch_next_pc;	/* return PC if we did branch/jump */
+	unsigned long update_value;	/* value to restore to update_target */
+};
+
+/* Single-step the instruction at regs->pc */
+extern void single_step_once(struct pt_regs *regs);
+
+/* Clean up after execve(). */
+extern void single_step_execve(void);
+
+struct task_struct;
+
+extern void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs,
+			 int error_code);
+
+#ifdef __tilegx__
+/* We need this since sigval_t has a user pointer in it, for GETSIGINFO etc. */
+#define __ARCH_WANT_COMPAT_SYS_PTRACE
+#endif
+
+#endif /* !__ASSEMBLY__ */
+
+#define SINGLESTEP_STATE_MASK_IS_ENABLED      0x1
+#define SINGLESTEP_STATE_MASK_UPDATE          0x2
+#define SINGLESTEP_STATE_TARGET_LB              2
+#define SINGLESTEP_STATE_TARGET_UB              7
+
+#endif /* !__KERNEL__ */
+
+#endif /* _ASM_TILE_PTRACE_H */
diff --git a/arch/tile/include/asm/sections.h b/arch/tile/include/asm/sections.h
new file mode 100644
index 00000000..d062d463
--- /dev/null
+++ b/arch/tile/include/asm/sections.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_SECTIONS_H
+#define _ASM_TILE_SECTIONS_H
+
+#define arch_is_kernel_data arch_is_kernel_data
+
+#include <asm-generic/sections.h>
+
+/* Text and data are at different areas in the kernel VA space. */
+extern char _sinitdata[], _einitdata[];
+
+/* Write-once data is writable only till the end of initialization. */
+extern char __w1data_begin[], __w1data_end[];
+
+
+/* Not exactly sections, but PC comparison points in the code. */
+extern char __rt_sigreturn[], __rt_sigreturn_end[];
+#ifndef __tilegx__
+extern char sys_cmpxchg[], __sys_cmpxchg_end[];
+extern char __sys_cmpxchg_grab_lock[];
+extern char __start_atomic_asm_code[], __end_atomic_asm_code[];
+#endif
+
+/* Handle the discontiguity between _sdata and _stext. */
+static inline int arch_is_kernel_data(unsigned long addr)
+{
+	return addr >= (unsigned long)_sdata &&
+		addr < (unsigned long)_end;
+}
+
+#endif /* _ASM_TILE_SECTIONS_H */
diff --git a/arch/tile/include/asm/setup.h b/arch/tile/include/asm/setup.h
new file mode 100644
index 00000000..e58613e0
--- /dev/null
+++ b/arch/tile/include/asm/setup.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_SETUP_H
+#define _ASM_TILE_SETUP_H
+
+#define COMMAND_LINE_SIZE	2048
+
+#ifdef __KERNEL__
+
+#include <linux/pfn.h>
+#include <linux/init.h>
+
+/*
+ * Reserved space for vmalloc and iomap - defined in asm/page.h
+ */
+#define MAXMEM_PFN	PFN_DOWN(MAXMEM)
+
+void early_panic(const char *fmt, ...);
+void warn_early_printk(void);
+void __init disable_early_printk(void);
+
+/* Init-time routine to do tile-specific per-cpu setup. */
+void setup_cpu(int boot);
+
+/* User-level DMA management functions */
+void grant_dma_mpls(void);
+void restrict_dma_mpls(void);
+
+#ifdef CONFIG_HARDWALL
+/* User-level network management functions */
+void reset_network_state(void);
+void grant_network_mpls(void);
+void restrict_network_mpls(void);
+struct task_struct;
+int hardwall_deactivate(struct task_struct *task);
+
+/* Hook hardwall code into changes in affinity. */
+#define arch_set_cpus_allowed(p, new_mask) do { \
+	if (p->thread.hardwall && !cpumask_equal(&p->cpus_allowed, new_mask)) \
+		hardwall_deactivate(p); \
+} while (0)
+#endif
+
+#endif /* __KERNEL__ */
+
+#endif /* _ASM_TILE_SETUP_H */
diff --git a/arch/tile/include/asm/sigcontext.h b/arch/tile/include/asm/sigcontext.h
new file mode 100644
index 00000000..6348e59d
--- /dev/null
+++ b/arch/tile/include/asm/sigcontext.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_SIGCONTEXT_H
+#define _ASM_TILE_SIGCONTEXT_H
+
+/* Don't pollute the namespace since <signal.h> includes this file. */
+#define __need_int_reg_t
+#include <arch/abi.h>
+
+/*
+ * struct sigcontext has the same shape as struct pt_regs,
+ * but is simplified since we know the fault is from userspace.
+ */
+struct sigcontext {
+	__uint_reg_t gregs[53];	/* General-purpose registers.  */
+	__uint_reg_t tp;	/* Aliases gregs[TREG_TP].  */
+	__uint_reg_t sp;	/* Aliases gregs[TREG_SP].  */
+	__uint_reg_t lr;	/* Aliases gregs[TREG_LR].  */
+	__uint_reg_t pc;	/* Program counter.  */
+	__uint_reg_t ics;	/* In Interrupt Critical Section?  */
+	__uint_reg_t faultnum;	/* Fault number.  */
+	__uint_reg_t pad[5];
+};
+
+#endif /* _ASM_TILE_SIGCONTEXT_H */
diff --git a/arch/tile/include/asm/sigframe.h b/arch/tile/include/asm/sigframe.h
new file mode 100644
index 00000000..994d3d30
--- /dev/null
+++ b/arch/tile/include/asm/sigframe.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_SIGFRAME_H
+#define _ASM_TILE_SIGFRAME_H
+
+/* Indicate that syscall return should not examine r0 */
+#define INT_SWINT_1_SIGRETURN (~0)
+
+#ifndef __ASSEMBLY__
+
+#include <arch/abi.h>
+
+struct rt_sigframe {
+	unsigned char save_area[C_ABI_SAVE_AREA_SIZE]; /* caller save area */
+	struct siginfo info;
+	struct ucontext uc;
+};
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* _ASM_TILE_SIGFRAME_H */
diff --git a/arch/tile/include/asm/siginfo.h b/arch/tile/include/asm/siginfo.h
new file mode 100644
index 00000000..56d661bb
--- /dev/null
+++ b/arch/tile/include/asm/siginfo.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_SIGINFO_H
+#define _ASM_TILE_SIGINFO_H
+
+#define __ARCH_SI_TRAPNO
+
+#ifdef __LP64__
+# define __ARCH_SI_PREAMBLE_SIZE	(4 * sizeof(int))
+#endif
+
+#include <asm-generic/siginfo.h>
+
+/*
+ * Additional Tile-specific SIGILL si_codes
+ */
+#define ILL_DBLFLT	(__SI_FAULT|9)	/* double fault */
+#define ILL_HARDWALL	(__SI_FAULT|10)	/* user networks hardwall violation */
+#undef NSIGILL
+#define NSIGILL		10
+
+#endif /* _ASM_TILE_SIGINFO_H */
diff --git a/arch/tile/include/asm/signal.h b/arch/tile/include/asm/signal.h
new file mode 100644
index 00000000..1e5e49aa
--- /dev/null
+++ b/arch/tile/include/asm/signal.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_SIGNAL_H
+#define _ASM_TILE_SIGNAL_H
+
+/* Do not notify a ptracer when this signal is handled. */
+#define SA_NOPTRACE 0x02000000u
+
+/* Used in earlier Tilera releases, so keeping for binary compatibility. */
+#define SA_RESTORER 0x04000000u
+
+#include <asm-generic/signal.h>
+
+#if defined(__KERNEL__)
+#if !defined(__ASSEMBLY__)
+struct pt_regs;
+int restore_sigcontext(struct pt_regs *, struct sigcontext __user *);
+int setup_sigcontext(struct sigcontext __user *, struct pt_regs *);
+void do_signal(struct pt_regs *regs);
+void signal_fault(const char *type, struct pt_regs *,
+		  void __user *frame, int sig);
+void trace_unhandled_signal(const char *type, struct pt_regs *regs,
+			    unsigned long address, int signo);
+#endif
+#endif
+
+#endif /* _ASM_TILE_SIGNAL_H */
diff --git a/arch/tile/include/asm/smp.h b/arch/tile/include/asm/smp.h
new file mode 100644
index 00000000..1aa759ae
--- /dev/null
+++ b/arch/tile/include/asm/smp.h
@@ -0,0 +1,140 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_SMP_H
+#define _ASM_TILE_SMP_H
+
+#ifdef CONFIG_SMP
+
+#include <asm/processor.h>
+#include <linux/cpumask.h>
+#include <linux/irqreturn.h>
+#include <hv/hypervisor.h>
+
+/* Set up this tile to support receiving hypervisor messages */
+void init_messaging(void);
+
+/* Set up this tile to support receiving device interrupts and IPIs. */
+void init_per_tile_IRQs(void);
+
+/* Send a message to processors specified in mask */
+void send_IPI_many(const struct cpumask *mask, int tag);
+
+/* Send a message to all but the sending processor */
+void send_IPI_allbutself(int tag);
+
+/* Send a message to a specific processor */
+void send_IPI_single(int dest, int tag);
+
+/* Process an IPI message */
+void evaluate_message(int tag);
+
+/* Boot a secondary cpu */
+void online_secondary(void);
+
+/* Topology of the supervisor tile grid, and coordinates of boot processor */
+extern HV_Topology smp_topology;
+
+/* Accessors for grid size */
+#define smp_height		(smp_topology.height)
+#define smp_width		(smp_topology.width)
+
+/* Convenience functions for converting cpu <-> coords. */
+static inline int cpu_x(int cpu)
+{
+	return cpu % smp_width;
+}
+static inline int cpu_y(int cpu)
+{
+	return cpu / smp_width;
+}
+static inline int xy_to_cpu(int x, int y)
+{
+	return y * smp_width + x;
+}
+
+/* Hypervisor message tags sent via the tile send_IPI*() routines. */
+#define MSG_TAG_START_CPU		1
+#define MSG_TAG_STOP_CPU		2
+#define MSG_TAG_CALL_FUNCTION_MANY	3
+#define MSG_TAG_CALL_FUNCTION_SINGLE	4
+
+/* Hook for the generic smp_call_function_many() routine. */
+static inline void arch_send_call_function_ipi_mask(struct cpumask *mask)
+{
+	send_IPI_many(mask, MSG_TAG_CALL_FUNCTION_MANY);
+}
+
+/* Hook for the generic smp_call_function_single() routine. */
+static inline void arch_send_call_function_single_ipi(int cpu)
+{
+	send_IPI_single(cpu, MSG_TAG_CALL_FUNCTION_SINGLE);
+}
+
+/* Print out the boot string describing which cpus were disabled. */
+void print_disabled_cpus(void);
+
+#else /* !CONFIG_SMP */
+
+#define smp_master_cpu		0
+#define smp_height		1
+#define smp_width		1
+#define cpu_x(cpu)		0
+#define cpu_y(cpu)		0
+#define xy_to_cpu(x, y)		0
+
+#endif /* !CONFIG_SMP */
+
+
+/* Which cpus may be used as the lotar in a page table entry. */
+extern struct cpumask cpu_lotar_map;
+#define cpu_is_valid_lotar(cpu) cpumask_test_cpu((cpu), &cpu_lotar_map)
+
+#if CHIP_HAS_CBOX_HOME_MAP()
+/* Which processors are used for hash-for-home mapping */
+extern struct cpumask hash_for_home_map;
+#endif
+
+/* Which cpus can have their cache flushed by hv_flush_remote(). */
+extern struct cpumask cpu_cacheable_map;
+#define cpu_cacheable(cpu) cpumask_test_cpu((cpu), &cpu_cacheable_map)
+
+/* Convert an HV_LOTAR value into a cpu. */
+static inline int hv_lotar_to_cpu(HV_LOTAR lotar)
+{
+	return HV_LOTAR_X(lotar) + (HV_LOTAR_Y(lotar) * smp_width);
+}
+
+/*
+ * Extension of <linux/cpumask.h> functionality when you just want
+ * to express a mask or suppression or inclusion region without
+ * being too concerned about exactly which cpus are valid in that region.
+ */
+int bitmap_parselist_crop(const char *bp, unsigned long *maskp, int nmaskbits);
+
+#define cpulist_parse_crop(buf, dst) \
+			__cpulist_parse_crop((buf), (dst), NR_CPUS)
+static inline int __cpulist_parse_crop(const char *buf, struct cpumask *dstp,
+					int nbits)
+{
+	return bitmap_parselist_crop(buf, cpumask_bits(dstp), nbits);
+}
+
+/* Initialize the IPI subsystem. */
+void ipi_init(void);
+
+/* Function for start-cpu message to cause us to jump to. */
+extern unsigned long start_cpu_function_addr;
+
+#endif /* _ASM_TILE_SMP_H */
diff --git a/arch/tile/include/asm/spinlock.h b/arch/tile/include/asm/spinlock.h
new file mode 100644
index 00000000..1a8bd474
--- /dev/null
+++ b/arch/tile/include/asm/spinlock.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_SPINLOCK_H
+#define _ASM_TILE_SPINLOCK_H
+
+#ifdef __tilegx__
+#include <asm/spinlock_64.h>
+#else
+#include <asm/spinlock_32.h>
+#endif
+
+#endif /* _ASM_TILE_SPINLOCK_H */
diff --git a/arch/tile/include/asm/spinlock_32.h b/arch/tile/include/asm/spinlock_32.h
new file mode 100644
index 00000000..c0a77b38
--- /dev/null
+++ b/arch/tile/include/asm/spinlock_32.h
@@ -0,0 +1,129 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ *
+ * 32-bit SMP spinlocks.
+ */
+
+#ifndef _ASM_TILE_SPINLOCK_32_H
+#define _ASM_TILE_SPINLOCK_32_H
+
+#include <linux/atomic.h>
+#include <asm/page.h>
+#include <linux/compiler.h>
+
+/*
+ * We only use even ticket numbers so the '1' inserted by a tns is
+ * an unambiguous "ticket is busy" flag.
+ */
+#define TICKET_QUANTUM 2
+
+
+/*
+ * SMP ticket spinlocks, allowing only a single CPU anywhere
+ *
+ * (the type definitions are in asm/spinlock_types.h)
+ */
+static inline int arch_spin_is_locked(arch_spinlock_t *lock)
+{
+	/*
+	 * Note that even if a new ticket is in the process of being
+	 * acquired, so lock->next_ticket is 1, it's still reasonable
+	 * to claim the lock is held, since it will be momentarily
+	 * if not already.  There's no need to wait for a "valid"
+	 * lock->next_ticket to become available.
+	 */
+	return lock->next_ticket != lock->current_ticket;
+}
+
+void arch_spin_lock(arch_spinlock_t *lock);
+
+/* We cannot take an interrupt after getting a ticket, so don't enable them. */
+#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
+
+int arch_spin_trylock(arch_spinlock_t *lock);
+
+static inline void arch_spin_unlock(arch_spinlock_t *lock)
+{
+	/* For efficiency, overlap fetching the old ticket with the wmb(). */
+	int old_ticket = lock->current_ticket;
+	wmb();  /* guarantee anything modified under the lock is visible */
+	lock->current_ticket = old_ticket + TICKET_QUANTUM;
+}
+
+void arch_spin_unlock_wait(arch_spinlock_t *lock);
+
+/*
+ * Read-write spinlocks, allowing multiple readers
+ * but only one writer.
+ *
+ * We use a "tns/store-back" technique on a single word to manage
+ * the lock state, looping around to retry if the tns returns 1.
+ */
+
+/* Internal layout of the word; do not use. */
+#define _WR_NEXT_SHIFT	8
+#define _WR_CURR_SHIFT  16
+#define _WR_WIDTH       8
+#define _RD_COUNT_SHIFT 24
+#define _RD_COUNT_WIDTH 8
+
+/**
+ * arch_read_can_lock() - would read_trylock() succeed?
+ */
+static inline int arch_read_can_lock(arch_rwlock_t *rwlock)
+{
+	return (rwlock->lock << _RD_COUNT_WIDTH) == 0;
+}
+
+/**
+ * arch_write_can_lock() - would write_trylock() succeed?
+ */
+static inline int arch_write_can_lock(arch_rwlock_t *rwlock)
+{
+	return rwlock->lock == 0;
+}
+
+/**
+ * arch_read_lock() - acquire a read lock.
+ */
+void arch_read_lock(arch_rwlock_t *rwlock);
+
+/**
+ * arch_write_lock() - acquire a write lock.
+ */
+void arch_write_lock(arch_rwlock_t *rwlock);
+
+/**
+ * arch_read_trylock() - try to acquire a read lock.
+ */
+int arch_read_trylock(arch_rwlock_t *rwlock);
+
+/**
+ * arch_write_trylock() - try to acquire a write lock.
+ */
+int arch_write_trylock(arch_rwlock_t *rwlock);
+
+/**
+ * arch_read_unlock() - release a read lock.
+ */
+void arch_read_unlock(arch_rwlock_t *rwlock);
+
+/**
+ * arch_write_unlock() - release a write lock.
+ */
+void arch_write_unlock(arch_rwlock_t *rwlock);
+
+#define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
+#define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
+
+#endif /* _ASM_TILE_SPINLOCK_32_H */
diff --git a/arch/tile/include/asm/spinlock_64.h b/arch/tile/include/asm/spinlock_64.h
new file mode 100644
index 00000000..5f8b6a09
--- /dev/null
+++ b/arch/tile/include/asm/spinlock_64.h
@@ -0,0 +1,161 @@
+/*
+ * Copyright 2011 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ *
+ * 64-bit SMP ticket spinlocks, allowing only a single CPU anywhere
+ * (the type definitions are in asm/spinlock_types.h)
+ */
+
+#ifndef _ASM_TILE_SPINLOCK_64_H
+#define _ASM_TILE_SPINLOCK_64_H
+
+/* Shifts and masks for the various fields in "lock". */
+#define __ARCH_SPIN_CURRENT_SHIFT	17
+#define __ARCH_SPIN_NEXT_MASK		0x7fff
+#define __ARCH_SPIN_NEXT_OVERFLOW	0x8000
+
+/*
+ * Return the "current" portion of a ticket lock value,
+ * i.e. the number that currently owns the lock.
+ */
+static inline int arch_spin_current(u32 val)
+{
+	return val >> __ARCH_SPIN_CURRENT_SHIFT;
+}
+
+/*
+ * Return the "next" portion of a ticket lock value,
+ * i.e. the number that the next task to try to acquire the lock will get.
+ */
+static inline int arch_spin_next(u32 val)
+{
+	return val & __ARCH_SPIN_NEXT_MASK;
+}
+
+/* The lock is locked if a task would have to wait to get it. */
+static inline int arch_spin_is_locked(arch_spinlock_t *lock)
+{
+	u32 val = lock->lock;
+	return arch_spin_current(val) != arch_spin_next(val);
+}
+
+/* Bump the current ticket so the next task owns the lock. */
+static inline void arch_spin_unlock(arch_spinlock_t *lock)
+{
+	wmb();  /* guarantee anything modified under the lock is visible */
+	__insn_fetchadd4(&lock->lock, 1U << __ARCH_SPIN_CURRENT_SHIFT);
+}
+
+void arch_spin_unlock_wait(arch_spinlock_t *lock);
+
+void arch_spin_lock_slow(arch_spinlock_t *lock, u32 val);
+
+/* Grab the "next" ticket number and bump it atomically.
+ * If the current ticket is not ours, go to the slow path.
+ * We also take the slow path if the "next" value overflows.
+ */
+static inline void arch_spin_lock(arch_spinlock_t *lock)
+{
+	u32 val = __insn_fetchadd4(&lock->lock, 1);
+	u32 ticket = val & (__ARCH_SPIN_NEXT_MASK | __ARCH_SPIN_NEXT_OVERFLOW);
+	if (unlikely(arch_spin_current(val) != ticket))
+		arch_spin_lock_slow(lock, ticket);
+}
+
+/* Try to get the lock, and return whether we succeeded. */
+int arch_spin_trylock(arch_spinlock_t *lock);
+
+/* We cannot take an interrupt after getting a ticket, so don't enable them. */
+#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
+
+/*
+ * Read-write spinlocks, allowing multiple readers
+ * but only one writer.
+ *
+ * We use fetchadd() for readers, and fetchor() with the sign bit
+ * for writers.
+ */
+
+#define __WRITE_LOCK_BIT (1 << 31)
+
+static inline int arch_write_val_locked(int val)
+{
+	return val < 0;  /* Optimize "val & __WRITE_LOCK_BIT". */
+}
+
+/**
+ * read_can_lock - would read_trylock() succeed?
+ * @lock: the rwlock in question.
+ */
+static inline int arch_read_can_lock(arch_rwlock_t *rw)
+{
+	return !arch_write_val_locked(rw->lock);
+}
+
+/**
+ * write_can_lock - would write_trylock() succeed?
+ * @lock: the rwlock in question.
+ */
+static inline int arch_write_can_lock(arch_rwlock_t *rw)
+{
+	return rw->lock == 0;
+}
+
+extern void __read_lock_failed(arch_rwlock_t *rw);
+
+static inline void arch_read_lock(arch_rwlock_t *rw)
+{
+	u32 val = __insn_fetchaddgez4(&rw->lock, 1);
+	if (unlikely(arch_write_val_locked(val)))
+		__read_lock_failed(rw);
+}
+
+extern void __write_lock_failed(arch_rwlock_t *rw, u32 val);
+
+static inline void arch_write_lock(arch_rwlock_t *rw)
+{
+	u32 val = __insn_fetchor4(&rw->lock, __WRITE_LOCK_BIT);
+	if (unlikely(val != 0))
+		__write_lock_failed(rw, val);
+}
+
+static inline void arch_read_unlock(arch_rwlock_t *rw)
+{
+	__insn_mf();
+	__insn_fetchadd4(&rw->lock, -1);
+}
+
+static inline void arch_write_unlock(arch_rwlock_t *rw)
+{
+	__insn_mf();
+	__insn_exch4(&rw->lock, 0);  /* Avoid waiting in the write buffer. */
+}
+
+static inline int arch_read_trylock(arch_rwlock_t *rw)
+{
+	return !arch_write_val_locked(__insn_fetchaddgez4(&rw->lock, 1));
+}
+
+static inline int arch_write_trylock(arch_rwlock_t *rw)
+{
+	u32 val = __insn_fetchor4(&rw->lock, __WRITE_LOCK_BIT);
+	if (likely(val == 0))
+		return 1;
+	if (!arch_write_val_locked(val))
+		__insn_fetchand4(&rw->lock, ~__WRITE_LOCK_BIT);
+	return 0;
+}
+
+#define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
+#define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
+
+#endif /* _ASM_TILE_SPINLOCK_64_H */
diff --git a/arch/tile/include/asm/spinlock_types.h b/arch/tile/include/asm/spinlock_types.h
new file mode 100644
index 00000000..a71f59b4
--- /dev/null
+++ b/arch/tile/include/asm/spinlock_types.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_SPINLOCK_TYPES_H
+#define _ASM_TILE_SPINLOCK_TYPES_H
+
+#ifndef __LINUX_SPINLOCK_TYPES_H
+# error "please don't include this file directly"
+#endif
+
+#ifdef __tilegx__
+
+/* Low 15 bits are "next"; high 15 bits are "current". */
+typedef struct arch_spinlock {
+	unsigned int lock;
+} arch_spinlock_t;
+
+#define __ARCH_SPIN_LOCK_UNLOCKED	{ 0 }
+
+/* High bit is "writer owns"; low 31 bits are a count of readers. */
+typedef struct arch_rwlock {
+	unsigned int lock;
+} arch_rwlock_t;
+
+#define __ARCH_RW_LOCK_UNLOCKED		{ 0 }
+
+#else
+
+typedef struct arch_spinlock {
+	/* Next ticket number to hand out. */
+	int next_ticket;
+	/* The ticket number that currently owns this lock. */
+	int current_ticket;
+} arch_spinlock_t;
+
+#define __ARCH_SPIN_LOCK_UNLOCKED	{ 0, 0 }
+
+/*
+ * Byte 0 for tns (only the low bit is used), byte 1 for ticket-lock "next",
+ * byte 2 for ticket-lock "current", byte 3 for reader count.
+ */
+typedef struct arch_rwlock {
+	unsigned int lock;
+} arch_rwlock_t;
+
+#define __ARCH_RW_LOCK_UNLOCKED		{ 0 }
+
+#endif
+#endif /* _ASM_TILE_SPINLOCK_TYPES_H */
diff --git a/arch/tile/include/asm/stack.h b/arch/tile/include/asm/stack.h
new file mode 100644
index 00000000..0e9d382a
--- /dev/null
+++ b/arch/tile/include/asm/stack.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_STACK_H
+#define _ASM_TILE_STACK_H
+
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <asm/backtrace.h>
+#include <asm/page.h>
+#include <hv/hypervisor.h>
+
+/* Everything we need to keep track of a backtrace iteration */
+struct KBacktraceIterator {
+	BacktraceIterator it;
+	struct task_struct *task;     /* task we are backtracing */
+	int end;		      /* iteration complete. */
+	int new_context;              /* new context is starting */
+	int profile;                  /* profiling, so stop on async intrpt */
+	int verbose;		      /* printk extra info (don't want to
+				       * do this for profiling) */
+	int is_current;               /* backtracing current task */
+};
+
+/* Iteration methods for kernel backtraces */
+
+/*
+ * Initialize a KBacktraceIterator from a task_struct, and optionally from
+ * a set of registers.  If the registers are omitted, the process is
+ * assumed to be descheduled, and registers are read from the process's
+ * thread_struct and stack.  "verbose" means to printk some additional
+ * information about fault handlers as we pass them on the stack.
+ */
+extern void KBacktraceIterator_init(struct KBacktraceIterator *kbt,
+				    struct task_struct *, struct pt_regs *);
+
+/* Initialize iterator based on current stack. */
+extern void KBacktraceIterator_init_current(struct KBacktraceIterator *kbt);
+
+/* Helper method for above. */
+extern void _KBacktraceIterator_init_current(struct KBacktraceIterator *kbt,
+				ulong pc, ulong lr, ulong sp, ulong r52);
+
+/* No more frames? */
+extern int KBacktraceIterator_end(struct KBacktraceIterator *kbt);
+
+/* Advance to the next frame. */
+extern void KBacktraceIterator_next(struct KBacktraceIterator *kbt);
+
+/*
+ * Dump stack given complete register info. Use only from the
+ * architecture-specific code; show_stack()
+ * and dump_stack() (in entry.S) are architecture-independent entry points.
+ */
+extern void tile_show_stack(struct KBacktraceIterator *, int headers);
+
+/* Dump stack of current process, with registers to seed the backtrace. */
+extern void dump_stack_regs(struct pt_regs *);
+
+/* Helper method for assembly dump_stack(). */
+extern void _dump_stack(int dummy, ulong pc, ulong lr, ulong sp, ulong r52);
+
+#endif /* _ASM_TILE_STACK_H */
diff --git a/arch/tile/include/asm/stat.h b/arch/tile/include/asm/stat.h
new file mode 100644
index 00000000..c0db34d5
--- /dev/null
+++ b/arch/tile/include/asm/stat.h
@@ -0,0 +1,4 @@
+#if defined(__KERNEL__) && defined(CONFIG_COMPAT)
+#define __ARCH_WANT_STAT64	/* Used for compat_sys_stat64() etc. */
+#endif
+#include <asm-generic/stat.h>
diff --git a/arch/tile/include/asm/string.h b/arch/tile/include/asm/string.h
new file mode 100644
index 00000000..7535cf1a
--- /dev/null
+++ b/arch/tile/include/asm/string.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_STRING_H
+#define _ASM_TILE_STRING_H
+
+#define __HAVE_ARCH_MEMCHR
+#define __HAVE_ARCH_MEMSET
+#define __HAVE_ARCH_MEMCPY
+#define __HAVE_ARCH_MEMMOVE
+#define __HAVE_ARCH_STRCHR
+#define __HAVE_ARCH_STRLEN
+
+extern __kernel_size_t strlen(const char *);
+extern char *strchr(const char *s, int c);
+extern void *memchr(const void *s, int c, size_t n);
+extern void *memset(void *, int, __kernel_size_t);
+extern void *memcpy(void *, const void *, __kernel_size_t);
+extern void *memmove(void *, const void *, __kernel_size_t);
+
+#endif /* _ASM_TILE_STRING_H */
diff --git a/arch/tile/include/asm/swab.h b/arch/tile/include/asm/swab.h
new file mode 100644
index 00000000..7c37b38f
--- /dev/null
+++ b/arch/tile/include/asm/swab.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_SWAB_H
+#define _ASM_TILE_SWAB_H
+
+/* Tile gcc is always >= 4.3.0, so we use __builtin_bswap. */
+#define __arch_swab32(x) __builtin_bswap32(x)
+#define __arch_swab64(x) __builtin_bswap64(x)
+#define __arch_swab16(x) (__builtin_bswap32(x) >> 16)
+
+#endif /* _ASM_TILE_SWAB_H */
diff --git a/arch/tile/include/asm/switch_to.h b/arch/tile/include/asm/switch_to.h
new file mode 100644
index 00000000..1d48c5fe
--- /dev/null
+++ b/arch/tile/include/asm/switch_to.h
@@ -0,0 +1,76 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_SWITCH_TO_H
+#define _ASM_TILE_SWITCH_TO_H
+
+#include <arch/sim_def.h>
+
+/*
+ * switch_to(n) should switch tasks to task nr n, first
+ * checking that n isn't the current task, in which case it does nothing.
+ * The number of callee-saved registers saved on the kernel stack
+ * is defined here for use in copy_thread() and must agree with __switch_to().
+ */
+#define CALLEE_SAVED_FIRST_REG 30
+#define CALLEE_SAVED_REGS_COUNT 24   /* r30 to r52, plus an empty to align */
+
+#ifndef __ASSEMBLY__
+
+struct task_struct;
+
+/*
+ * Pause the DMA engine and static network before task switching.
+ */
+#define prepare_arch_switch(next) _prepare_arch_switch(next)
+void _prepare_arch_switch(struct task_struct *next);
+
+struct task_struct;
+#define switch_to(prev, next, last) ((last) = _switch_to((prev), (next)))
+extern struct task_struct *_switch_to(struct task_struct *prev,
+				      struct task_struct *next);
+
+/* Helper function for _switch_to(). */
+extern struct task_struct *__switch_to(struct task_struct *prev,
+				       struct task_struct *next,
+				       unsigned long new_system_save_k_0);
+
+/* Address that switched-away from tasks are at. */
+extern unsigned long get_switch_to_pc(void);
+
+/*
+ * Kernel threads can check to see if they need to migrate their
+ * stack whenever they return from a context switch; for user
+ * threads, we defer until they are returning to user-space.
+ */
+#define finish_arch_switch(prev) do {                                     \
+	if (unlikely((prev)->state == TASK_DEAD))                         \
+		__insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_OS_EXIT |       \
+			((prev)->pid << _SIM_CONTROL_OPERATOR_BITS));     \
+	__insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_OS_SWITCH |             \
+		(current->pid << _SIM_CONTROL_OPERATOR_BITS));            \
+	if (current->mm == NULL && !kstack_hash &&                        \
+	    current_thread_info()->homecache_cpu != smp_processor_id())   \
+		homecache_migrate_kthread();                              \
+} while (0)
+
+/* Support function for forking a new task. */
+void ret_from_fork(void);
+
+/* Called from ret_from_fork() when a new process starts up. */
+struct task_struct *sim_notify_fork(struct task_struct *prev);
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* _ASM_TILE_SWITCH_TO_H */
diff --git a/arch/tile/include/asm/syscall.h b/arch/tile/include/asm/syscall.h
new file mode 100644
index 00000000..d35e0dcb
--- /dev/null
+++ b/arch/tile/include/asm/syscall.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright (C) 2008-2009 Red Hat, Inc.  All rights reserved.
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ *
+ * See asm-generic/syscall.h for descriptions of what we must do here.
+ */
+
+#ifndef _ASM_TILE_SYSCALL_H
+#define _ASM_TILE_SYSCALL_H
+
+#include <linux/sched.h>
+#include <linux/err.h>
+#include <arch/abi.h>
+
+/*
+ * Only the low 32 bits of orig_r0 are meaningful, so we return int.
+ * This importantly ignores the high bits on 64-bit, so comparisons
+ * sign-extend the low 32 bits.
+ */
+static inline int syscall_get_nr(struct task_struct *t, struct pt_regs *regs)
+{
+	return regs->regs[TREG_SYSCALL_NR];
+}
+
+static inline void syscall_rollback(struct task_struct *task,
+				    struct pt_regs *regs)
+{
+	regs->regs[0] = regs->orig_r0;
+}
+
+static inline long syscall_get_error(struct task_struct *task,
+				     struct pt_regs *regs)
+{
+	unsigned long error = regs->regs[0];
+	return IS_ERR_VALUE(error) ? error : 0;
+}
+
+static inline long syscall_get_return_value(struct task_struct *task,
+					    struct pt_regs *regs)
+{
+	return regs->regs[0];
+}
+
+static inline void syscall_set_return_value(struct task_struct *task,
+					    struct pt_regs *regs,
+					    int error, long val)
+{
+	regs->regs[0] = (long) error ?: val;
+}
+
+static inline void syscall_get_arguments(struct task_struct *task,
+					 struct pt_regs *regs,
+					 unsigned int i, unsigned int n,
+					 unsigned long *args)
+{
+	BUG_ON(i + n > 6);
+	memcpy(args, &regs[i], n * sizeof(args[0]));
+}
+
+static inline void syscall_set_arguments(struct task_struct *task,
+					 struct pt_regs *regs,
+					 unsigned int i, unsigned int n,
+					 const unsigned long *args)
+{
+	BUG_ON(i + n > 6);
+	memcpy(&regs[i], args, n * sizeof(args[0]));
+}
+
+#endif	/* _ASM_TILE_SYSCALL_H */
diff --git a/arch/tile/include/asm/syscalls.h b/arch/tile/include/asm/syscalls.h
new file mode 100644
index 00000000..3b5507c3
--- /dev/null
+++ b/arch/tile/include/asm/syscalls.h
@@ -0,0 +1,76 @@
+/*
+ * syscalls.h - Linux syscall interfaces (arch-specific)
+ *
+ * Copyright (c) 2008 Jaswinder Singh Rajput
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_SYSCALLS_H
+#define _ASM_TILE_SYSCALLS_H
+
+#include <linux/compiler.h>
+#include <linux/linkage.h>
+#include <linux/signal.h>
+#include <linux/types.h>
+#include <linux/compat.h>
+
+/* The array of function pointers for syscalls. */
+extern void *sys_call_table[];
+#ifdef CONFIG_COMPAT
+extern void *compat_sys_call_table[];
+#endif
+
+/*
+ * Note that by convention, any syscall which requires the current
+ * register set takes an additional "struct pt_regs *" pointer; a
+ * _sys_xxx() trampoline in intvec*.S just sets up the pointer and
+ * jumps to sys_xxx().
+ */
+
+/* kernel/sys.c */
+ssize_t sys32_readahead(int fd, u32 offset_lo, u32 offset_hi, u32 count);
+long sys32_fadvise64(int fd, u32 offset_lo, u32 offset_hi,
+		     u32 len, int advice);
+int sys32_fadvise64_64(int fd, u32 offset_lo, u32 offset_hi,
+		       u32 len_lo, u32 len_hi, int advice);
+long sys_flush_cache(void);
+#ifndef __tilegx__  /* No mmap() in the 32-bit kernel. */
+#define sys_mmap sys_mmap
+#endif
+
+#ifndef __tilegx__
+/* mm/fault.c */
+long sys_cmpxchg_badaddr(unsigned long address, struct pt_regs *);
+long _sys_cmpxchg_badaddr(unsigned long address);
+#endif
+
+#ifdef CONFIG_COMPAT
+/* These four are not defined for 64-bit, but serve as "compat" syscalls. */
+long sys_fcntl64(unsigned int fd, unsigned int cmd, unsigned long arg);
+long sys_fstat64(unsigned long fd, struct stat64 __user *statbuf);
+long sys_truncate64(const char __user *path, loff_t length);
+long sys_ftruncate64(unsigned int fd, loff_t length);
+#endif
+
+/* These are the intvec*.S trampolines. */
+long _sys_sigaltstack(const stack_t __user *, stack_t __user *);
+long _sys_rt_sigreturn(void);
+long _sys_clone(unsigned long clone_flags, unsigned long newsp,
+		void __user *parent_tid, void __user *child_tid);
+long _sys_execve(const char __user *filename,
+		 const char __user *const __user *argv,
+		 const char __user *const __user *envp);
+
+#include <asm-generic/syscalls.h>
+
+#endif /* _ASM_TILE_SYSCALLS_H */
diff --git a/arch/tile/include/asm/thread_info.h b/arch/tile/include/asm/thread_info.h
new file mode 100644
index 00000000..7594764d
--- /dev/null
+++ b/arch/tile/include/asm/thread_info.h
@@ -0,0 +1,175 @@
+/*
+ * Copyright (C) 2002  David Howells (dhowells@redhat.com)
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_THREAD_INFO_H
+#define _ASM_TILE_THREAD_INFO_H
+
+#include <asm/processor.h>
+#include <asm/page.h>
+#ifndef __ASSEMBLY__
+
+/*
+ * Low level task data that assembly code needs immediate access to.
+ * The structure is placed at the bottom of the supervisor stack.
+ */
+struct thread_info {
+	struct task_struct	*task;		/* main task structure */
+	struct exec_domain	*exec_domain;	/* execution domain */
+	unsigned long		flags;		/* low level flags */
+	unsigned long		status;		/* thread-synchronous flags */
+	__u32			homecache_cpu;	/* CPU we are homecached on */
+	__u32			cpu;		/* current CPU */
+	int			preempt_count;	/* 0 => preemptable,
+						   <0 => BUG */
+
+	mm_segment_t		addr_limit;	/* thread address space
+						   (KERNEL_DS or USER_DS) */
+	struct restart_block	restart_block;
+	struct single_step_state *step_state;	/* single step state
+						   (if non-zero) */
+};
+
+/*
+ * macros/functions for gaining access to the thread information structure.
+ */
+#define INIT_THREAD_INFO(tsk)			\
+{						\
+	.task		= &tsk,			\
+	.exec_domain	= &default_exec_domain,	\
+	.flags		= 0,			\
+	.cpu		= 0,			\
+	.preempt_count	= INIT_PREEMPT_COUNT,	\
+	.addr_limit	= KERNEL_DS,		\
+	.restart_block	= {			\
+		.fn = do_no_restart_syscall,	\
+	},					\
+	.step_state	= NULL,			\
+}
+
+#define init_thread_info	(init_thread_union.thread_info)
+#define init_stack		(init_thread_union.stack)
+
+#endif /* !__ASSEMBLY__ */
+
+#if PAGE_SIZE < 8192
+#define THREAD_SIZE_ORDER (13 - PAGE_SHIFT)
+#else
+#define THREAD_SIZE_ORDER (0)
+#endif
+#define THREAD_SIZE_PAGES (1 << THREAD_SIZE_ORDER)
+
+#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER)
+#define LOG2_THREAD_SIZE (PAGE_SHIFT + THREAD_SIZE_ORDER)
+
+#define STACK_WARN             (THREAD_SIZE/8)
+
+#ifndef __ASSEMBLY__
+
+/* How to get the thread information struct from C. */
+register unsigned long stack_pointer __asm__("sp");
+
+#define current_thread_info() \
+  ((struct thread_info *)(stack_pointer & -THREAD_SIZE))
+
+#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
+extern struct thread_info *alloc_thread_info_node(struct task_struct *task, int node);
+extern void free_thread_info(struct thread_info *info);
+
+/* Sit on a nap instruction until interrupted. */
+extern void smp_nap(void);
+
+/* Enable interrupts racelessly and nap forever: helper for cpu_idle(). */
+extern void _cpu_idle(void);
+
+/* Switch boot idle thread to a freshly-allocated stack and free old stack. */
+extern void cpu_idle_on_new_stack(struct thread_info *old_ti,
+				  unsigned long new_sp,
+				  unsigned long new_ss10);
+
+#else /* __ASSEMBLY__ */
+
+/*
+ * How to get the thread information struct from assembly.
+ * Note that we use different macros since different architectures
+ * have different semantics in their "mm" instruction and we would
+ * like to guarantee that the macro expands to exactly one instruction.
+ */
+#ifdef __tilegx__
+#define EXTRACT_THREAD_INFO(reg) mm reg, zero, LOG2_THREAD_SIZE, 63
+#else
+#define GET_THREAD_INFO(reg) mm reg, sp, zero, LOG2_THREAD_SIZE, 31
+#endif
+
+#endif /* !__ASSEMBLY__ */
+
+#define PREEMPT_ACTIVE		0x10000000
+
+/*
+ * Thread information flags that various assembly files may need to access.
+ * Keep flags accessed frequently in low bits, particular since it makes
+ * it easier to build constants in assembly.
+ */
+#define TIF_SIGPENDING		0	/* signal pending */
+#define TIF_NEED_RESCHED	1	/* rescheduling necessary */
+#define TIF_SINGLESTEP		2	/* restore singlestep on return to
+					   user mode */
+#define TIF_ASYNC_TLB		3	/* got an async TLB fault in kernel */
+#define TIF_SYSCALL_TRACE	4	/* syscall trace active */
+#define TIF_SYSCALL_AUDIT	5	/* syscall auditing active */
+#define TIF_SECCOMP		6	/* secure computing */
+#define TIF_MEMDIE		7	/* OOM killer at work */
+#define TIF_NOTIFY_RESUME	8	/* callback before returning to user */
+
+#define _TIF_SIGPENDING		(1<<TIF_SIGPENDING)
+#define _TIF_NEED_RESCHED	(1<<TIF_NEED_RESCHED)
+#define _TIF_SINGLESTEP		(1<<TIF_SINGLESTEP)
+#define _TIF_ASYNC_TLB		(1<<TIF_ASYNC_TLB)
+#define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
+#define _TIF_SYSCALL_AUDIT	(1<<TIF_SYSCALL_AUDIT)
+#define _TIF_SECCOMP		(1<<TIF_SECCOMP)
+#define _TIF_MEMDIE		(1<<TIF_MEMDIE)
+#define _TIF_NOTIFY_RESUME	(1<<TIF_NOTIFY_RESUME)
+
+/* Work to do on any return to user space. */
+#define _TIF_ALLWORK_MASK \
+  (_TIF_SIGPENDING|_TIF_NEED_RESCHED|_TIF_SINGLESTEP|\
+   _TIF_ASYNC_TLB|_TIF_NOTIFY_RESUME)
+
+/*
+ * Thread-synchronous status.
+ *
+ * This is different from the flags in that nobody else
+ * ever touches our thread-synchronous status, so we don't
+ * have to worry about atomic accesses.
+ */
+#ifdef __tilegx__
+#define TS_COMPAT		0x0001	/* 32-bit compatibility mode */
+#endif
+#define TS_POLLING		0x0004	/* in idle loop but not sleeping */
+#define TS_RESTORE_SIGMASK	0x0008	/* restore signal mask in do_signal */
+
+#define tsk_is_polling(t) (task_thread_info(t)->status & TS_POLLING)
+
+#ifndef __ASSEMBLY__
+#define HAVE_SET_RESTORE_SIGMASK	1
+static inline void set_restore_sigmask(void)
+{
+	struct thread_info *ti = current_thread_info();
+	ti->status |= TS_RESTORE_SIGMASK;
+	set_bit(TIF_SIGPENDING, &ti->flags);
+}
+#endif	/* !__ASSEMBLY__ */
+
+#endif /* _ASM_TILE_THREAD_INFO_H */
diff --git a/arch/tile/include/asm/tile-desc.h b/arch/tile/include/asm/tile-desc.h
new file mode 100644
index 00000000..43849bf7
--- /dev/null
+++ b/arch/tile/include/asm/tile-desc.h
@@ -0,0 +1,19 @@
+/*
+ * Copyright 2011 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef __tilegx__
+#include <asm/tile-desc_32.h>
+#else
+#include <asm/tile-desc_64.h>
+#endif
diff --git a/arch/tile/include/asm/tile-desc_32.h b/arch/tile/include/asm/tile-desc_32.h
new file mode 100644
index 00000000..f09c5c43
--- /dev/null
+++ b/arch/tile/include/asm/tile-desc_32.h
@@ -0,0 +1,553 @@
+/* TILEPro opcode information.
+ *
+ * Copyright 2011 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ *
+ *
+ *
+ *
+ *
+ */
+
+#ifndef opcode_tilepro_h
+#define opcode_tilepro_h
+
+#include <arch/opcode.h>
+
+
+enum
+{
+  TILEPRO_MAX_OPERANDS = 5 /* mm */
+};
+
+typedef enum
+{
+  TILEPRO_OPC_BPT,
+  TILEPRO_OPC_INFO,
+  TILEPRO_OPC_INFOL,
+  TILEPRO_OPC_J,
+  TILEPRO_OPC_JAL,
+  TILEPRO_OPC_MOVE,
+  TILEPRO_OPC_MOVE_SN,
+  TILEPRO_OPC_MOVEI,
+  TILEPRO_OPC_MOVEI_SN,
+  TILEPRO_OPC_MOVELI,
+  TILEPRO_OPC_MOVELI_SN,
+  TILEPRO_OPC_MOVELIS,
+  TILEPRO_OPC_PREFETCH,
+  TILEPRO_OPC_RAISE,
+  TILEPRO_OPC_ADD,
+  TILEPRO_OPC_ADD_SN,
+  TILEPRO_OPC_ADDB,
+  TILEPRO_OPC_ADDB_SN,
+  TILEPRO_OPC_ADDBS_U,
+  TILEPRO_OPC_ADDBS_U_SN,
+  TILEPRO_OPC_ADDH,
+  TILEPRO_OPC_ADDH_SN,
+  TILEPRO_OPC_ADDHS,
+  TILEPRO_OPC_ADDHS_SN,
+  TILEPRO_OPC_ADDI,
+  TILEPRO_OPC_ADDI_SN,
+  TILEPRO_OPC_ADDIB,
+  TILEPRO_OPC_ADDIB_SN,
+  TILEPRO_OPC_ADDIH,
+  TILEPRO_OPC_ADDIH_SN,
+  TILEPRO_OPC_ADDLI,
+  TILEPRO_OPC_ADDLI_SN,
+  TILEPRO_OPC_ADDLIS,
+  TILEPRO_OPC_ADDS,
+  TILEPRO_OPC_ADDS_SN,
+  TILEPRO_OPC_ADIFFB_U,
+  TILEPRO_OPC_ADIFFB_U_SN,
+  TILEPRO_OPC_ADIFFH,
+  TILEPRO_OPC_ADIFFH_SN,
+  TILEPRO_OPC_AND,
+  TILEPRO_OPC_AND_SN,
+  TILEPRO_OPC_ANDI,
+  TILEPRO_OPC_ANDI_SN,
+  TILEPRO_OPC_AULI,
+  TILEPRO_OPC_AVGB_U,
+  TILEPRO_OPC_AVGB_U_SN,
+  TILEPRO_OPC_AVGH,
+  TILEPRO_OPC_AVGH_SN,
+  TILEPRO_OPC_BBNS,
+  TILEPRO_OPC_BBNS_SN,
+  TILEPRO_OPC_BBNST,
+  TILEPRO_OPC_BBNST_SN,
+  TILEPRO_OPC_BBS,
+  TILEPRO_OPC_BBS_SN,
+  TILEPRO_OPC_BBST,
+  TILEPRO_OPC_BBST_SN,
+  TILEPRO_OPC_BGEZ,
+  TILEPRO_OPC_BGEZ_SN,
+  TILEPRO_OPC_BGEZT,
+  TILEPRO_OPC_BGEZT_SN,
+  TILEPRO_OPC_BGZ,
+  TILEPRO_OPC_BGZ_SN,
+  TILEPRO_OPC_BGZT,
+  TILEPRO_OPC_BGZT_SN,
+  TILEPRO_OPC_BITX,
+  TILEPRO_OPC_BITX_SN,
+  TILEPRO_OPC_BLEZ,
+  TILEPRO_OPC_BLEZ_SN,
+  TILEPRO_OPC_BLEZT,
+  TILEPRO_OPC_BLEZT_SN,
+  TILEPRO_OPC_BLZ,
+  TILEPRO_OPC_BLZ_SN,
+  TILEPRO_OPC_BLZT,
+  TILEPRO_OPC_BLZT_SN,
+  TILEPRO_OPC_BNZ,
+  TILEPRO_OPC_BNZ_SN,
+  TILEPRO_OPC_BNZT,
+  TILEPRO_OPC_BNZT_SN,
+  TILEPRO_OPC_BYTEX,
+  TILEPRO_OPC_BYTEX_SN,
+  TILEPRO_OPC_BZ,
+  TILEPRO_OPC_BZ_SN,
+  TILEPRO_OPC_BZT,
+  TILEPRO_OPC_BZT_SN,
+  TILEPRO_OPC_CLZ,
+  TILEPRO_OPC_CLZ_SN,
+  TILEPRO_OPC_CRC32_32,
+  TILEPRO_OPC_CRC32_32_SN,
+  TILEPRO_OPC_CRC32_8,
+  TILEPRO_OPC_CRC32_8_SN,
+  TILEPRO_OPC_CTZ,
+  TILEPRO_OPC_CTZ_SN,
+  TILEPRO_OPC_DRAIN,
+  TILEPRO_OPC_DTLBPR,
+  TILEPRO_OPC_DWORD_ALIGN,
+  TILEPRO_OPC_DWORD_ALIGN_SN,
+  TILEPRO_OPC_FINV,
+  TILEPRO_OPC_FLUSH,
+  TILEPRO_OPC_FNOP,
+  TILEPRO_OPC_ICOH,
+  TILEPRO_OPC_ILL,
+  TILEPRO_OPC_INTHB,
+  TILEPRO_OPC_INTHB_SN,
+  TILEPRO_OPC_INTHH,
+  TILEPRO_OPC_INTHH_SN,
+  TILEPRO_OPC_INTLB,
+  TILEPRO_OPC_INTLB_SN,
+  TILEPRO_OPC_INTLH,
+  TILEPRO_OPC_INTLH_SN,
+  TILEPRO_OPC_INV,
+  TILEPRO_OPC_IRET,
+  TILEPRO_OPC_JALB,
+  TILEPRO_OPC_JALF,
+  TILEPRO_OPC_JALR,
+  TILEPRO_OPC_JALRP,
+  TILEPRO_OPC_JB,
+  TILEPRO_OPC_JF,
+  TILEPRO_OPC_JR,
+  TILEPRO_OPC_JRP,
+  TILEPRO_OPC_LB,
+  TILEPRO_OPC_LB_SN,
+  TILEPRO_OPC_LB_U,
+  TILEPRO_OPC_LB_U_SN,
+  TILEPRO_OPC_LBADD,
+  TILEPRO_OPC_LBADD_SN,
+  TILEPRO_OPC_LBADD_U,
+  TILEPRO_OPC_LBADD_U_SN,
+  TILEPRO_OPC_LH,
+  TILEPRO_OPC_LH_SN,
+  TILEPRO_OPC_LH_U,
+  TILEPRO_OPC_LH_U_SN,
+  TILEPRO_OPC_LHADD,
+  TILEPRO_OPC_LHADD_SN,
+  TILEPRO_OPC_LHADD_U,
+  TILEPRO_OPC_LHADD_U_SN,
+  TILEPRO_OPC_LNK,
+  TILEPRO_OPC_LNK_SN,
+  TILEPRO_OPC_LW,
+  TILEPRO_OPC_LW_SN,
+  TILEPRO_OPC_LW_NA,
+  TILEPRO_OPC_LW_NA_SN,
+  TILEPRO_OPC_LWADD,
+  TILEPRO_OPC_LWADD_SN,
+  TILEPRO_OPC_LWADD_NA,
+  TILEPRO_OPC_LWADD_NA_SN,
+  TILEPRO_OPC_MAXB_U,
+  TILEPRO_OPC_MAXB_U_SN,
+  TILEPRO_OPC_MAXH,
+  TILEPRO_OPC_MAXH_SN,
+  TILEPRO_OPC_MAXIB_U,
+  TILEPRO_OPC_MAXIB_U_SN,
+  TILEPRO_OPC_MAXIH,
+  TILEPRO_OPC_MAXIH_SN,
+  TILEPRO_OPC_MF,
+  TILEPRO_OPC_MFSPR,
+  TILEPRO_OPC_MINB_U,
+  TILEPRO_OPC_MINB_U_SN,
+  TILEPRO_OPC_MINH,
+  TILEPRO_OPC_MINH_SN,
+  TILEPRO_OPC_MINIB_U,
+  TILEPRO_OPC_MINIB_U_SN,
+  TILEPRO_OPC_MINIH,
+  TILEPRO_OPC_MINIH_SN,
+  TILEPRO_OPC_MM,
+  TILEPRO_OPC_MNZ,
+  TILEPRO_OPC_MNZ_SN,
+  TILEPRO_OPC_MNZB,
+  TILEPRO_OPC_MNZB_SN,
+  TILEPRO_OPC_MNZH,
+  TILEPRO_OPC_MNZH_SN,
+  TILEPRO_OPC_MTSPR,
+  TILEPRO_OPC_MULHH_SS,
+  TILEPRO_OPC_MULHH_SS_SN,
+  TILEPRO_OPC_MULHH_SU,
+  TILEPRO_OPC_MULHH_SU_SN,
+  TILEPRO_OPC_MULHH_UU,
+  TILEPRO_OPC_MULHH_UU_SN,
+  TILEPRO_OPC_MULHHA_SS,
+  TILEPRO_OPC_MULHHA_SS_SN,
+  TILEPRO_OPC_MULHHA_SU,
+  TILEPRO_OPC_MULHHA_SU_SN,
+  TILEPRO_OPC_MULHHA_UU,
+  TILEPRO_OPC_MULHHA_UU_SN,
+  TILEPRO_OPC_MULHHSA_UU,
+  TILEPRO_OPC_MULHHSA_UU_SN,
+  TILEPRO_OPC_MULHL_SS,
+  TILEPRO_OPC_MULHL_SS_SN,
+  TILEPRO_OPC_MULHL_SU,
+  TILEPRO_OPC_MULHL_SU_SN,
+  TILEPRO_OPC_MULHL_US,
+  TILEPRO_OPC_MULHL_US_SN,
+  TILEPRO_OPC_MULHL_UU,
+  TILEPRO_OPC_MULHL_UU_SN,
+  TILEPRO_OPC_MULHLA_SS,
+  TILEPRO_OPC_MULHLA_SS_SN,
+  TILEPRO_OPC_MULHLA_SU,
+  TILEPRO_OPC_MULHLA_SU_SN,
+  TILEPRO_OPC_MULHLA_US,
+  TILEPRO_OPC_MULHLA_US_SN,
+  TILEPRO_OPC_MULHLA_UU,
+  TILEPRO_OPC_MULHLA_UU_SN,
+  TILEPRO_OPC_MULHLSA_UU,
+  TILEPRO_OPC_MULHLSA_UU_SN,
+  TILEPRO_OPC_MULLL_SS,
+  TILEPRO_OPC_MULLL_SS_SN,
+  TILEPRO_OPC_MULLL_SU,
+  TILEPRO_OPC_MULLL_SU_SN,
+  TILEPRO_OPC_MULLL_UU,
+  TILEPRO_OPC_MULLL_UU_SN,
+  TILEPRO_OPC_MULLLA_SS,
+  TILEPRO_OPC_MULLLA_SS_SN,
+  TILEPRO_OPC_MULLLA_SU,
+  TILEPRO_OPC_MULLLA_SU_SN,
+  TILEPRO_OPC_MULLLA_UU,
+  TILEPRO_OPC_MULLLA_UU_SN,
+  TILEPRO_OPC_MULLLSA_UU,
+  TILEPRO_OPC_MULLLSA_UU_SN,
+  TILEPRO_OPC_MVNZ,
+  TILEPRO_OPC_MVNZ_SN,
+  TILEPRO_OPC_MVZ,
+  TILEPRO_OPC_MVZ_SN,
+  TILEPRO_OPC_MZ,
+  TILEPRO_OPC_MZ_SN,
+  TILEPRO_OPC_MZB,
+  TILEPRO_OPC_MZB_SN,
+  TILEPRO_OPC_MZH,
+  TILEPRO_OPC_MZH_SN,
+  TILEPRO_OPC_NAP,
+  TILEPRO_OPC_NOP,
+  TILEPRO_OPC_NOR,
+  TILEPRO_OPC_NOR_SN,
+  TILEPRO_OPC_OR,
+  TILEPRO_OPC_OR_SN,
+  TILEPRO_OPC_ORI,
+  TILEPRO_OPC_ORI_SN,
+  TILEPRO_OPC_PACKBS_U,
+  TILEPRO_OPC_PACKBS_U_SN,
+  TILEPRO_OPC_PACKHB,
+  TILEPRO_OPC_PACKHB_SN,
+  TILEPRO_OPC_PACKHS,
+  TILEPRO_OPC_PACKHS_SN,
+  TILEPRO_OPC_PACKLB,
+  TILEPRO_OPC_PACKLB_SN,
+  TILEPRO_OPC_PCNT,
+  TILEPRO_OPC_PCNT_SN,
+  TILEPRO_OPC_RL,
+  TILEPRO_OPC_RL_SN,
+  TILEPRO_OPC_RLI,
+  TILEPRO_OPC_RLI_SN,
+  TILEPRO_OPC_S1A,
+  TILEPRO_OPC_S1A_SN,
+  TILEPRO_OPC_S2A,
+  TILEPRO_OPC_S2A_SN,
+  TILEPRO_OPC_S3A,
+  TILEPRO_OPC_S3A_SN,
+  TILEPRO_OPC_SADAB_U,
+  TILEPRO_OPC_SADAB_U_SN,
+  TILEPRO_OPC_SADAH,
+  TILEPRO_OPC_SADAH_SN,
+  TILEPRO_OPC_SADAH_U,
+  TILEPRO_OPC_SADAH_U_SN,
+  TILEPRO_OPC_SADB_U,
+  TILEPRO_OPC_SADB_U_SN,
+  TILEPRO_OPC_SADH,
+  TILEPRO_OPC_SADH_SN,
+  TILEPRO_OPC_SADH_U,
+  TILEPRO_OPC_SADH_U_SN,
+  TILEPRO_OPC_SB,
+  TILEPRO_OPC_SBADD,
+  TILEPRO_OPC_SEQ,
+  TILEPRO_OPC_SEQ_SN,
+  TILEPRO_OPC_SEQB,
+  TILEPRO_OPC_SEQB_SN,
+  TILEPRO_OPC_SEQH,
+  TILEPRO_OPC_SEQH_SN,
+  TILEPRO_OPC_SEQI,
+  TILEPRO_OPC_SEQI_SN,
+  TILEPRO_OPC_SEQIB,
+  TILEPRO_OPC_SEQIB_SN,
+  TILEPRO_OPC_SEQIH,
+  TILEPRO_OPC_SEQIH_SN,
+  TILEPRO_OPC_SH,
+  TILEPRO_OPC_SHADD,
+  TILEPRO_OPC_SHL,
+  TILEPRO_OPC_SHL_SN,
+  TILEPRO_OPC_SHLB,
+  TILEPRO_OPC_SHLB_SN,
+  TILEPRO_OPC_SHLH,
+  TILEPRO_OPC_SHLH_SN,
+  TILEPRO_OPC_SHLI,
+  TILEPRO_OPC_SHLI_SN,
+  TILEPRO_OPC_SHLIB,
+  TILEPRO_OPC_SHLIB_SN,
+  TILEPRO_OPC_SHLIH,
+  TILEPRO_OPC_SHLIH_SN,
+  TILEPRO_OPC_SHR,
+  TILEPRO_OPC_SHR_SN,
+  TILEPRO_OPC_SHRB,
+  TILEPRO_OPC_SHRB_SN,
+  TILEPRO_OPC_SHRH,
+  TILEPRO_OPC_SHRH_SN,
+  TILEPRO_OPC_SHRI,
+  TILEPRO_OPC_SHRI_SN,
+  TILEPRO_OPC_SHRIB,
+  TILEPRO_OPC_SHRIB_SN,
+  TILEPRO_OPC_SHRIH,
+  TILEPRO_OPC_SHRIH_SN,
+  TILEPRO_OPC_SLT,
+  TILEPRO_OPC_SLT_SN,
+  TILEPRO_OPC_SLT_U,
+  TILEPRO_OPC_SLT_U_SN,
+  TILEPRO_OPC_SLTB,
+  TILEPRO_OPC_SLTB_SN,
+  TILEPRO_OPC_SLTB_U,
+  TILEPRO_OPC_SLTB_U_SN,
+  TILEPRO_OPC_SLTE,
+  TILEPRO_OPC_SLTE_SN,
+  TILEPRO_OPC_SLTE_U,
+  TILEPRO_OPC_SLTE_U_SN,
+  TILEPRO_OPC_SLTEB,
+  TILEPRO_OPC_SLTEB_SN,
+  TILEPRO_OPC_SLTEB_U,
+  TILEPRO_OPC_SLTEB_U_SN,
+  TILEPRO_OPC_SLTEH,
+  TILEPRO_OPC_SLTEH_SN,
+  TILEPRO_OPC_SLTEH_U,
+  TILEPRO_OPC_SLTEH_U_SN,
+  TILEPRO_OPC_SLTH,
+  TILEPRO_OPC_SLTH_SN,
+  TILEPRO_OPC_SLTH_U,
+  TILEPRO_OPC_SLTH_U_SN,
+  TILEPRO_OPC_SLTI,
+  TILEPRO_OPC_SLTI_SN,
+  TILEPRO_OPC_SLTI_U,
+  TILEPRO_OPC_SLTI_U_SN,
+  TILEPRO_OPC_SLTIB,
+  TILEPRO_OPC_SLTIB_SN,
+  TILEPRO_OPC_SLTIB_U,
+  TILEPRO_OPC_SLTIB_U_SN,
+  TILEPRO_OPC_SLTIH,
+  TILEPRO_OPC_SLTIH_SN,
+  TILEPRO_OPC_SLTIH_U,
+  TILEPRO_OPC_SLTIH_U_SN,
+  TILEPRO_OPC_SNE,
+  TILEPRO_OPC_SNE_SN,
+  TILEPRO_OPC_SNEB,
+  TILEPRO_OPC_SNEB_SN,
+  TILEPRO_OPC_SNEH,
+  TILEPRO_OPC_SNEH_SN,
+  TILEPRO_OPC_SRA,
+  TILEPRO_OPC_SRA_SN,
+  TILEPRO_OPC_SRAB,
+  TILEPRO_OPC_SRAB_SN,
+  TILEPRO_OPC_SRAH,
+  TILEPRO_OPC_SRAH_SN,
+  TILEPRO_OPC_SRAI,
+  TILEPRO_OPC_SRAI_SN,
+  TILEPRO_OPC_SRAIB,
+  TILEPRO_OPC_SRAIB_SN,
+  TILEPRO_OPC_SRAIH,
+  TILEPRO_OPC_SRAIH_SN,
+  TILEPRO_OPC_SUB,
+  TILEPRO_OPC_SUB_SN,
+  TILEPRO_OPC_SUBB,
+  TILEPRO_OPC_SUBB_SN,
+  TILEPRO_OPC_SUBBS_U,
+  TILEPRO_OPC_SUBBS_U_SN,
+  TILEPRO_OPC_SUBH,
+  TILEPRO_OPC_SUBH_SN,
+  TILEPRO_OPC_SUBHS,
+  TILEPRO_OPC_SUBHS_SN,
+  TILEPRO_OPC_SUBS,
+  TILEPRO_OPC_SUBS_SN,
+  TILEPRO_OPC_SW,
+  TILEPRO_OPC_SWADD,
+  TILEPRO_OPC_SWINT0,
+  TILEPRO_OPC_SWINT1,
+  TILEPRO_OPC_SWINT2,
+  TILEPRO_OPC_SWINT3,
+  TILEPRO_OPC_TBLIDXB0,
+  TILEPRO_OPC_TBLIDXB0_SN,
+  TILEPRO_OPC_TBLIDXB1,
+  TILEPRO_OPC_TBLIDXB1_SN,
+  TILEPRO_OPC_TBLIDXB2,
+  TILEPRO_OPC_TBLIDXB2_SN,
+  TILEPRO_OPC_TBLIDXB3,
+  TILEPRO_OPC_TBLIDXB3_SN,
+  TILEPRO_OPC_TNS,
+  TILEPRO_OPC_TNS_SN,
+  TILEPRO_OPC_WH64,
+  TILEPRO_OPC_XOR,
+  TILEPRO_OPC_XOR_SN,
+  TILEPRO_OPC_XORI,
+  TILEPRO_OPC_XORI_SN,
+  TILEPRO_OPC_NONE
+} tilepro_mnemonic;
+
+
+
+
+typedef enum
+{
+  TILEPRO_PIPELINE_X0,
+  TILEPRO_PIPELINE_X1,
+  TILEPRO_PIPELINE_Y0,
+  TILEPRO_PIPELINE_Y1,
+  TILEPRO_PIPELINE_Y2,
+} tilepro_pipeline;
+
+#define tilepro_is_x_pipeline(p) ((int)(p) <= (int)TILEPRO_PIPELINE_X1)
+
+typedef enum
+{
+  TILEPRO_OP_TYPE_REGISTER,
+  TILEPRO_OP_TYPE_IMMEDIATE,
+  TILEPRO_OP_TYPE_ADDRESS,
+  TILEPRO_OP_TYPE_SPR
+} tilepro_operand_type;
+
+struct tilepro_operand
+{
+  /* Is this operand a register, immediate or address? */
+  tilepro_operand_type type;
+
+  /* The default relocation type for this operand.  */
+  signed int default_reloc : 16;
+
+  /* How many bits is this value? (used for range checking) */
+  unsigned int num_bits : 5;
+
+  /* Is the value signed? (used for range checking) */
+  unsigned int is_signed : 1;
+
+  /* Is this operand a source register? */
+  unsigned int is_src_reg : 1;
+
+  /* Is this operand written? (i.e. is it a destination register) */
+  unsigned int is_dest_reg : 1;
+
+  /* Is this operand PC-relative? */
+  unsigned int is_pc_relative : 1;
+
+  /* By how many bits do we right shift the value before inserting? */
+  unsigned int rightshift : 2;
+
+  /* Return the bits for this operand to be ORed into an existing bundle. */
+  tilepro_bundle_bits (*insert) (int op);
+
+  /* Extract this operand and return it. */
+  unsigned int (*extract) (tilepro_bundle_bits bundle);
+};
+
+
+extern const struct tilepro_operand tilepro_operands[];
+
+/* One finite-state machine per pipe for rapid instruction decoding. */
+extern const unsigned short * const
+tilepro_bundle_decoder_fsms[TILEPRO_NUM_PIPELINE_ENCODINGS];
+
+
+struct tilepro_opcode
+{
+  /* The opcode mnemonic, e.g. "add" */
+  const char *name;
+
+  /* The enum value for this mnemonic. */
+  tilepro_mnemonic mnemonic;
+
+  /* A bit mask of which of the five pipes this instruction
+     is compatible with:
+     X0  0x01
+     X1  0x02
+     Y0  0x04
+     Y1  0x08
+     Y2  0x10 */
+  unsigned char pipes;
+
+  /* How many operands are there? */
+  unsigned char num_operands;
+
+  /* Which register does this write implicitly, or TREG_ZERO if none? */
+  unsigned char implicitly_written_register;
+
+  /* Can this be bundled with other instructions (almost always true). */
+  unsigned char can_bundle;
+
+  /* The description of the operands. Each of these is an
+   * index into the tilepro_operands[] table. */
+  unsigned char operands[TILEPRO_NUM_PIPELINE_ENCODINGS][TILEPRO_MAX_OPERANDS];
+
+};
+
+extern const struct tilepro_opcode tilepro_opcodes[];
+
+
+/* Used for non-textual disassembly into structs. */
+struct tilepro_decoded_instruction
+{
+  const struct tilepro_opcode *opcode;
+  const struct tilepro_operand *operands[TILEPRO_MAX_OPERANDS];
+  int operand_values[TILEPRO_MAX_OPERANDS];
+};
+
+
+/* Disassemble a bundle into a struct for machine processing. */
+extern int parse_insn_tilepro(tilepro_bundle_bits bits,
+                              unsigned int pc,
+                              struct tilepro_decoded_instruction
+                              decoded[TILEPRO_MAX_INSTRUCTIONS_PER_BUNDLE]);
+
+
+/* Given a set of bundle bits and a specific pipe, returns which
+ * instruction the bundle contains in that pipe.
+ */
+extern const struct tilepro_opcode *
+find_opcode(tilepro_bundle_bits bits, tilepro_pipeline pipe);
+
+
+
+#endif /* opcode_tilepro_h */
diff --git a/arch/tile/include/asm/tile-desc_64.h b/arch/tile/include/asm/tile-desc_64.h
new file mode 100644
index 00000000..1819efcb
--- /dev/null
+++ b/arch/tile/include/asm/tile-desc_64.h
@@ -0,0 +1,483 @@
+/* TILE-Gx opcode information.
+ *
+ * Copyright 2011 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ *
+ *
+ *
+ *
+ *
+ */
+
+#ifndef opcode_tile_h
+#define opcode_tile_h
+
+#include <arch/opcode.h>
+
+
+enum
+{
+  TILEGX_MAX_OPERANDS = 4 /* bfexts */
+};
+
+typedef enum
+{
+  TILEGX_OPC_BPT,
+  TILEGX_OPC_INFO,
+  TILEGX_OPC_INFOL,
+  TILEGX_OPC_MOVE,
+  TILEGX_OPC_MOVEI,
+  TILEGX_OPC_MOVELI,
+  TILEGX_OPC_PREFETCH,
+  TILEGX_OPC_PREFETCH_ADD_L1,
+  TILEGX_OPC_PREFETCH_ADD_L1_FAULT,
+  TILEGX_OPC_PREFETCH_ADD_L2,
+  TILEGX_OPC_PREFETCH_ADD_L2_FAULT,
+  TILEGX_OPC_PREFETCH_ADD_L3,
+  TILEGX_OPC_PREFETCH_ADD_L3_FAULT,
+  TILEGX_OPC_PREFETCH_L1,
+  TILEGX_OPC_PREFETCH_L1_FAULT,
+  TILEGX_OPC_PREFETCH_L2,
+  TILEGX_OPC_PREFETCH_L2_FAULT,
+  TILEGX_OPC_PREFETCH_L3,
+  TILEGX_OPC_PREFETCH_L3_FAULT,
+  TILEGX_OPC_RAISE,
+  TILEGX_OPC_ADD,
+  TILEGX_OPC_ADDI,
+  TILEGX_OPC_ADDLI,
+  TILEGX_OPC_ADDX,
+  TILEGX_OPC_ADDXI,
+  TILEGX_OPC_ADDXLI,
+  TILEGX_OPC_ADDXSC,
+  TILEGX_OPC_AND,
+  TILEGX_OPC_ANDI,
+  TILEGX_OPC_BEQZ,
+  TILEGX_OPC_BEQZT,
+  TILEGX_OPC_BFEXTS,
+  TILEGX_OPC_BFEXTU,
+  TILEGX_OPC_BFINS,
+  TILEGX_OPC_BGEZ,
+  TILEGX_OPC_BGEZT,
+  TILEGX_OPC_BGTZ,
+  TILEGX_OPC_BGTZT,
+  TILEGX_OPC_BLBC,
+  TILEGX_OPC_BLBCT,
+  TILEGX_OPC_BLBS,
+  TILEGX_OPC_BLBST,
+  TILEGX_OPC_BLEZ,
+  TILEGX_OPC_BLEZT,
+  TILEGX_OPC_BLTZ,
+  TILEGX_OPC_BLTZT,
+  TILEGX_OPC_BNEZ,
+  TILEGX_OPC_BNEZT,
+  TILEGX_OPC_CLZ,
+  TILEGX_OPC_CMOVEQZ,
+  TILEGX_OPC_CMOVNEZ,
+  TILEGX_OPC_CMPEQ,
+  TILEGX_OPC_CMPEQI,
+  TILEGX_OPC_CMPEXCH,
+  TILEGX_OPC_CMPEXCH4,
+  TILEGX_OPC_CMPLES,
+  TILEGX_OPC_CMPLEU,
+  TILEGX_OPC_CMPLTS,
+  TILEGX_OPC_CMPLTSI,
+  TILEGX_OPC_CMPLTU,
+  TILEGX_OPC_CMPLTUI,
+  TILEGX_OPC_CMPNE,
+  TILEGX_OPC_CMUL,
+  TILEGX_OPC_CMULA,
+  TILEGX_OPC_CMULAF,
+  TILEGX_OPC_CMULF,
+  TILEGX_OPC_CMULFR,
+  TILEGX_OPC_CMULH,
+  TILEGX_OPC_CMULHR,
+  TILEGX_OPC_CRC32_32,
+  TILEGX_OPC_CRC32_8,
+  TILEGX_OPC_CTZ,
+  TILEGX_OPC_DBLALIGN,
+  TILEGX_OPC_DBLALIGN2,
+  TILEGX_OPC_DBLALIGN4,
+  TILEGX_OPC_DBLALIGN6,
+  TILEGX_OPC_DRAIN,
+  TILEGX_OPC_DTLBPR,
+  TILEGX_OPC_EXCH,
+  TILEGX_OPC_EXCH4,
+  TILEGX_OPC_FDOUBLE_ADD_FLAGS,
+  TILEGX_OPC_FDOUBLE_ADDSUB,
+  TILEGX_OPC_FDOUBLE_MUL_FLAGS,
+  TILEGX_OPC_FDOUBLE_PACK1,
+  TILEGX_OPC_FDOUBLE_PACK2,
+  TILEGX_OPC_FDOUBLE_SUB_FLAGS,
+  TILEGX_OPC_FDOUBLE_UNPACK_MAX,
+  TILEGX_OPC_FDOUBLE_UNPACK_MIN,
+  TILEGX_OPC_FETCHADD,
+  TILEGX_OPC_FETCHADD4,
+  TILEGX_OPC_FETCHADDGEZ,
+  TILEGX_OPC_FETCHADDGEZ4,
+  TILEGX_OPC_FETCHAND,
+  TILEGX_OPC_FETCHAND4,
+  TILEGX_OPC_FETCHOR,
+  TILEGX_OPC_FETCHOR4,
+  TILEGX_OPC_FINV,
+  TILEGX_OPC_FLUSH,
+  TILEGX_OPC_FLUSHWB,
+  TILEGX_OPC_FNOP,
+  TILEGX_OPC_FSINGLE_ADD1,
+  TILEGX_OPC_FSINGLE_ADDSUB2,
+  TILEGX_OPC_FSINGLE_MUL1,
+  TILEGX_OPC_FSINGLE_MUL2,
+  TILEGX_OPC_FSINGLE_PACK1,
+  TILEGX_OPC_FSINGLE_PACK2,
+  TILEGX_OPC_FSINGLE_SUB1,
+  TILEGX_OPC_ICOH,
+  TILEGX_OPC_ILL,
+  TILEGX_OPC_INV,
+  TILEGX_OPC_IRET,
+  TILEGX_OPC_J,
+  TILEGX_OPC_JAL,
+  TILEGX_OPC_JALR,
+  TILEGX_OPC_JALRP,
+  TILEGX_OPC_JR,
+  TILEGX_OPC_JRP,
+  TILEGX_OPC_LD,
+  TILEGX_OPC_LD1S,
+  TILEGX_OPC_LD1S_ADD,
+  TILEGX_OPC_LD1U,
+  TILEGX_OPC_LD1U_ADD,
+  TILEGX_OPC_LD2S,
+  TILEGX_OPC_LD2S_ADD,
+  TILEGX_OPC_LD2U,
+  TILEGX_OPC_LD2U_ADD,
+  TILEGX_OPC_LD4S,
+  TILEGX_OPC_LD4S_ADD,
+  TILEGX_OPC_LD4U,
+  TILEGX_OPC_LD4U_ADD,
+  TILEGX_OPC_LD_ADD,
+  TILEGX_OPC_LDNA,
+  TILEGX_OPC_LDNA_ADD,
+  TILEGX_OPC_LDNT,
+  TILEGX_OPC_LDNT1S,
+  TILEGX_OPC_LDNT1S_ADD,
+  TILEGX_OPC_LDNT1U,
+  TILEGX_OPC_LDNT1U_ADD,
+  TILEGX_OPC_LDNT2S,
+  TILEGX_OPC_LDNT2S_ADD,
+  TILEGX_OPC_LDNT2U,
+  TILEGX_OPC_LDNT2U_ADD,
+  TILEGX_OPC_LDNT4S,
+  TILEGX_OPC_LDNT4S_ADD,
+  TILEGX_OPC_LDNT4U,
+  TILEGX_OPC_LDNT4U_ADD,
+  TILEGX_OPC_LDNT_ADD,
+  TILEGX_OPC_LNK,
+  TILEGX_OPC_MF,
+  TILEGX_OPC_MFSPR,
+  TILEGX_OPC_MM,
+  TILEGX_OPC_MNZ,
+  TILEGX_OPC_MTSPR,
+  TILEGX_OPC_MUL_HS_HS,
+  TILEGX_OPC_MUL_HS_HU,
+  TILEGX_OPC_MUL_HS_LS,
+  TILEGX_OPC_MUL_HS_LU,
+  TILEGX_OPC_MUL_HU_HU,
+  TILEGX_OPC_MUL_HU_LS,
+  TILEGX_OPC_MUL_HU_LU,
+  TILEGX_OPC_MUL_LS_LS,
+  TILEGX_OPC_MUL_LS_LU,
+  TILEGX_OPC_MUL_LU_LU,
+  TILEGX_OPC_MULA_HS_HS,
+  TILEGX_OPC_MULA_HS_HU,
+  TILEGX_OPC_MULA_HS_LS,
+  TILEGX_OPC_MULA_HS_LU,
+  TILEGX_OPC_MULA_HU_HU,
+  TILEGX_OPC_MULA_HU_LS,
+  TILEGX_OPC_MULA_HU_LU,
+  TILEGX_OPC_MULA_LS_LS,
+  TILEGX_OPC_MULA_LS_LU,
+  TILEGX_OPC_MULA_LU_LU,
+  TILEGX_OPC_MULAX,
+  TILEGX_OPC_MULX,
+  TILEGX_OPC_MZ,
+  TILEGX_OPC_NAP,
+  TILEGX_OPC_NOP,
+  TILEGX_OPC_NOR,
+  TILEGX_OPC_OR,
+  TILEGX_OPC_ORI,
+  TILEGX_OPC_PCNT,
+  TILEGX_OPC_REVBITS,
+  TILEGX_OPC_REVBYTES,
+  TILEGX_OPC_ROTL,
+  TILEGX_OPC_ROTLI,
+  TILEGX_OPC_SHL,
+  TILEGX_OPC_SHL16INSLI,
+  TILEGX_OPC_SHL1ADD,
+  TILEGX_OPC_SHL1ADDX,
+  TILEGX_OPC_SHL2ADD,
+  TILEGX_OPC_SHL2ADDX,
+  TILEGX_OPC_SHL3ADD,
+  TILEGX_OPC_SHL3ADDX,
+  TILEGX_OPC_SHLI,
+  TILEGX_OPC_SHLX,
+  TILEGX_OPC_SHLXI,
+  TILEGX_OPC_SHRS,
+  TILEGX_OPC_SHRSI,
+  TILEGX_OPC_SHRU,
+  TILEGX_OPC_SHRUI,
+  TILEGX_OPC_SHRUX,
+  TILEGX_OPC_SHRUXI,
+  TILEGX_OPC_SHUFFLEBYTES,
+  TILEGX_OPC_ST,
+  TILEGX_OPC_ST1,
+  TILEGX_OPC_ST1_ADD,
+  TILEGX_OPC_ST2,
+  TILEGX_OPC_ST2_ADD,
+  TILEGX_OPC_ST4,
+  TILEGX_OPC_ST4_ADD,
+  TILEGX_OPC_ST_ADD,
+  TILEGX_OPC_STNT,
+  TILEGX_OPC_STNT1,
+  TILEGX_OPC_STNT1_ADD,
+  TILEGX_OPC_STNT2,
+  TILEGX_OPC_STNT2_ADD,
+  TILEGX_OPC_STNT4,
+  TILEGX_OPC_STNT4_ADD,
+  TILEGX_OPC_STNT_ADD,
+  TILEGX_OPC_SUB,
+  TILEGX_OPC_SUBX,
+  TILEGX_OPC_SUBXSC,
+  TILEGX_OPC_SWINT0,
+  TILEGX_OPC_SWINT1,
+  TILEGX_OPC_SWINT2,
+  TILEGX_OPC_SWINT3,
+  TILEGX_OPC_TBLIDXB0,
+  TILEGX_OPC_TBLIDXB1,
+  TILEGX_OPC_TBLIDXB2,
+  TILEGX_OPC_TBLIDXB3,
+  TILEGX_OPC_V1ADD,
+  TILEGX_OPC_V1ADDI,
+  TILEGX_OPC_V1ADDUC,
+  TILEGX_OPC_V1ADIFFU,
+  TILEGX_OPC_V1AVGU,
+  TILEGX_OPC_V1CMPEQ,
+  TILEGX_OPC_V1CMPEQI,
+  TILEGX_OPC_V1CMPLES,
+  TILEGX_OPC_V1CMPLEU,
+  TILEGX_OPC_V1CMPLTS,
+  TILEGX_OPC_V1CMPLTSI,
+  TILEGX_OPC_V1CMPLTU,
+  TILEGX_OPC_V1CMPLTUI,
+  TILEGX_OPC_V1CMPNE,
+  TILEGX_OPC_V1DDOTPU,
+  TILEGX_OPC_V1DDOTPUA,
+  TILEGX_OPC_V1DDOTPUS,
+  TILEGX_OPC_V1DDOTPUSA,
+  TILEGX_OPC_V1DOTP,
+  TILEGX_OPC_V1DOTPA,
+  TILEGX_OPC_V1DOTPU,
+  TILEGX_OPC_V1DOTPUA,
+  TILEGX_OPC_V1DOTPUS,
+  TILEGX_OPC_V1DOTPUSA,
+  TILEGX_OPC_V1INT_H,
+  TILEGX_OPC_V1INT_L,
+  TILEGX_OPC_V1MAXU,
+  TILEGX_OPC_V1MAXUI,
+  TILEGX_OPC_V1MINU,
+  TILEGX_OPC_V1MINUI,
+  TILEGX_OPC_V1MNZ,
+  TILEGX_OPC_V1MULTU,
+  TILEGX_OPC_V1MULU,
+  TILEGX_OPC_V1MULUS,
+  TILEGX_OPC_V1MZ,
+  TILEGX_OPC_V1SADAU,
+  TILEGX_OPC_V1SADU,
+  TILEGX_OPC_V1SHL,
+  TILEGX_OPC_V1SHLI,
+  TILEGX_OPC_V1SHRS,
+  TILEGX_OPC_V1SHRSI,
+  TILEGX_OPC_V1SHRU,
+  TILEGX_OPC_V1SHRUI,
+  TILEGX_OPC_V1SUB,
+  TILEGX_OPC_V1SUBUC,
+  TILEGX_OPC_V2ADD,
+  TILEGX_OPC_V2ADDI,
+  TILEGX_OPC_V2ADDSC,
+  TILEGX_OPC_V2ADIFFS,
+  TILEGX_OPC_V2AVGS,
+  TILEGX_OPC_V2CMPEQ,
+  TILEGX_OPC_V2CMPEQI,
+  TILEGX_OPC_V2CMPLES,
+  TILEGX_OPC_V2CMPLEU,
+  TILEGX_OPC_V2CMPLTS,
+  TILEGX_OPC_V2CMPLTSI,
+  TILEGX_OPC_V2CMPLTU,
+  TILEGX_OPC_V2CMPLTUI,
+  TILEGX_OPC_V2CMPNE,
+  TILEGX_OPC_V2DOTP,
+  TILEGX_OPC_V2DOTPA,
+  TILEGX_OPC_V2INT_H,
+  TILEGX_OPC_V2INT_L,
+  TILEGX_OPC_V2MAXS,
+  TILEGX_OPC_V2MAXSI,
+  TILEGX_OPC_V2MINS,
+  TILEGX_OPC_V2MINSI,
+  TILEGX_OPC_V2MNZ,
+  TILEGX_OPC_V2MULFSC,
+  TILEGX_OPC_V2MULS,
+  TILEGX_OPC_V2MULTS,
+  TILEGX_OPC_V2MZ,
+  TILEGX_OPC_V2PACKH,
+  TILEGX_OPC_V2PACKL,
+  TILEGX_OPC_V2PACKUC,
+  TILEGX_OPC_V2SADAS,
+  TILEGX_OPC_V2SADAU,
+  TILEGX_OPC_V2SADS,
+  TILEGX_OPC_V2SADU,
+  TILEGX_OPC_V2SHL,
+  TILEGX_OPC_V2SHLI,
+  TILEGX_OPC_V2SHLSC,
+  TILEGX_OPC_V2SHRS,
+  TILEGX_OPC_V2SHRSI,
+  TILEGX_OPC_V2SHRU,
+  TILEGX_OPC_V2SHRUI,
+  TILEGX_OPC_V2SUB,
+  TILEGX_OPC_V2SUBSC,
+  TILEGX_OPC_V4ADD,
+  TILEGX_OPC_V4ADDSC,
+  TILEGX_OPC_V4INT_H,
+  TILEGX_OPC_V4INT_L,
+  TILEGX_OPC_V4PACKSC,
+  TILEGX_OPC_V4SHL,
+  TILEGX_OPC_V4SHLSC,
+  TILEGX_OPC_V4SHRS,
+  TILEGX_OPC_V4SHRU,
+  TILEGX_OPC_V4SUB,
+  TILEGX_OPC_V4SUBSC,
+  TILEGX_OPC_WH64,
+  TILEGX_OPC_XOR,
+  TILEGX_OPC_XORI,
+  TILEGX_OPC_NONE
+} tilegx_mnemonic;
+
+
+
+typedef enum
+{
+  TILEGX_PIPELINE_X0,
+  TILEGX_PIPELINE_X1,
+  TILEGX_PIPELINE_Y0,
+  TILEGX_PIPELINE_Y1,
+  TILEGX_PIPELINE_Y2,
+} tilegx_pipeline;
+
+#define tilegx_is_x_pipeline(p) ((int)(p) <= (int)TILEGX_PIPELINE_X1)
+
+typedef enum
+{
+  TILEGX_OP_TYPE_REGISTER,
+  TILEGX_OP_TYPE_IMMEDIATE,
+  TILEGX_OP_TYPE_ADDRESS,
+  TILEGX_OP_TYPE_SPR
+} tilegx_operand_type;
+
+struct tilegx_operand
+{
+  /* Is this operand a register, immediate or address? */
+  tilegx_operand_type type;
+
+  /* The default relocation type for this operand.  */
+  signed int default_reloc : 16;
+
+  /* How many bits is this value? (used for range checking) */
+  unsigned int num_bits : 5;
+
+  /* Is the value signed? (used for range checking) */
+  unsigned int is_signed : 1;
+
+  /* Is this operand a source register? */
+  unsigned int is_src_reg : 1;
+
+  /* Is this operand written? (i.e. is it a destination register) */
+  unsigned int is_dest_reg : 1;
+
+  /* Is this operand PC-relative? */
+  unsigned int is_pc_relative : 1;
+
+  /* By how many bits do we right shift the value before inserting? */
+  unsigned int rightshift : 2;
+
+  /* Return the bits for this operand to be ORed into an existing bundle. */
+  tilegx_bundle_bits (*insert) (int op);
+
+  /* Extract this operand and return it. */
+  unsigned int (*extract) (tilegx_bundle_bits bundle);
+};
+
+
+extern const struct tilegx_operand tilegx_operands[];
+
+/* One finite-state machine per pipe for rapid instruction decoding. */
+extern const unsigned short * const
+tilegx_bundle_decoder_fsms[TILEGX_NUM_PIPELINE_ENCODINGS];
+
+
+struct tilegx_opcode
+{
+  /* The opcode mnemonic, e.g. "add" */
+  const char *name;
+
+  /* The enum value for this mnemonic. */
+  tilegx_mnemonic mnemonic;
+
+  /* A bit mask of which of the five pipes this instruction
+     is compatible with:
+     X0  0x01
+     X1  0x02
+     Y0  0x04
+     Y1  0x08
+     Y2  0x10 */
+  unsigned char pipes;
+
+  /* How many operands are there? */
+  unsigned char num_operands;
+
+  /* Which register does this write implicitly, or TREG_ZERO if none? */
+  unsigned char implicitly_written_register;
+
+  /* Can this be bundled with other instructions (almost always true). */
+  unsigned char can_bundle;
+
+  /* The description of the operands. Each of these is an
+   * index into the tilegx_operands[] table. */
+  unsigned char operands[TILEGX_NUM_PIPELINE_ENCODINGS][TILEGX_MAX_OPERANDS];
+
+};
+
+extern const struct tilegx_opcode tilegx_opcodes[];
+
+/* Used for non-textual disassembly into structs. */
+struct tilegx_decoded_instruction
+{
+  const struct tilegx_opcode *opcode;
+  const struct tilegx_operand *operands[TILEGX_MAX_OPERANDS];
+  long long operand_values[TILEGX_MAX_OPERANDS];
+};
+
+
+/* Disassemble a bundle into a struct for machine processing. */
+extern int parse_insn_tilegx(tilegx_bundle_bits bits,
+                             unsigned long long pc,
+                             struct tilegx_decoded_instruction
+                             decoded[TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE]);
+
+
+
+#endif /* opcode_tilegx_h */
diff --git a/arch/tile/include/asm/timex.h b/arch/tile/include/asm/timex.h
new file mode 100644
index 00000000..dc987d53
--- /dev/null
+++ b/arch/tile/include/asm/timex.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_TIMEX_H
+#define _ASM_TILE_TIMEX_H
+
+/*
+ * This rate should be a multiple of the possible HZ values (100, 250, 1000)
+ * and a fraction of the possible hardware timer frequencies.  Our timer
+ * frequency is highly tunable but also quite precise, so for the primary use
+ * of this value (setting ACT_HZ from HZ) we just pick a value that causes
+ * ACT_HZ to be set to HZ.  We make the value somewhat large just to be
+ * more robust in case someone tries out a new value of HZ.
+ */
+#define CLOCK_TICK_RATE	1000000
+
+typedef unsigned long long cycles_t;
+
+#if CHIP_HAS_SPLIT_CYCLE()
+cycles_t get_cycles(void);
+#define get_cycles_low() __insn_mfspr(SPR_CYCLE_LOW)
+#else
+static inline cycles_t get_cycles(void)
+{
+	return __insn_mfspr(SPR_CYCLE);
+}
+#define get_cycles_low() __insn_mfspr(SPR_CYCLE)   /* just get all 64 bits */
+#endif
+
+cycles_t get_clock_rate(void);
+
+/* Convert nanoseconds to core clock cycles. */
+cycles_t ns2cycles(unsigned long nsecs);
+
+/* Called at cpu initialization to set some low-level constants. */
+void setup_clock(void);
+
+/* Called at cpu initialization to start the tile-timer clock device. */
+void setup_tile_timer(void);
+
+#endif /* _ASM_TILE_TIMEX_H */
diff --git a/arch/tile/include/asm/tlb.h b/arch/tile/include/asm/tlb.h
new file mode 100644
index 00000000..4a891a1a
--- /dev/null
+++ b/arch/tile/include/asm/tlb.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_TLB_H
+#define _ASM_TILE_TLB_H
+
+#define tlb_start_vma(tlb, vma) do { } while (0)
+#define tlb_end_vma(tlb, vma) do { } while (0)
+#define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0)
+#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm)
+
+#include <asm-generic/tlb.h>
+
+#endif /* _ASM_TILE_TLB_H */
diff --git a/arch/tile/include/asm/tlbflush.h b/arch/tile/include/asm/tlbflush.h
new file mode 100644
index 00000000..96199d21
--- /dev/null
+++ b/arch/tile/include/asm/tlbflush.h
@@ -0,0 +1,128 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_TLBFLUSH_H
+#define _ASM_TILE_TLBFLUSH_H
+
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/smp.h>
+#include <asm/cacheflush.h>
+#include <asm/page.h>
+#include <hv/hypervisor.h>
+
+/*
+ * Rather than associating each mm with its own ASID, we just use
+ * ASIDs to allow us to lazily flush the TLB when we switch mms.
+ * This way we only have to do an actual TLB flush on mm switch
+ * every time we wrap ASIDs, not every single time we switch.
+ *
+ * FIXME: We might improve performance by keeping ASIDs around
+ * properly, though since the hypervisor direct-maps VAs to TSB
+ * entries, we're likely to have lost at least the executable page
+ * mappings by the time we switch back to the original mm.
+ */
+DECLARE_PER_CPU(int, current_asid);
+
+/* The hypervisor tells us what ASIDs are available to us. */
+extern int min_asid, max_asid;
+
+static inline unsigned long hv_page_size(const struct vm_area_struct *vma)
+{
+	return (vma->vm_flags & VM_HUGETLB) ? HPAGE_SIZE : PAGE_SIZE;
+}
+
+/* Pass as vma pointer for non-executable mapping, if no vma available. */
+#define FLUSH_NONEXEC ((const struct vm_area_struct *)-1UL)
+
+/* Flush a single user page on this cpu. */
+static inline void local_flush_tlb_page(const struct vm_area_struct *vma,
+					unsigned long addr,
+					unsigned long page_size)
+{
+	int rc = hv_flush_page(addr, page_size);
+	if (rc < 0)
+		panic("hv_flush_page(%#lx,%#lx) failed: %d",
+		      addr, page_size, rc);
+	if (!vma || (vma != FLUSH_NONEXEC && (vma->vm_flags & VM_EXEC)))
+		__flush_icache();
+}
+
+/* Flush range of user pages on this cpu. */
+static inline void local_flush_tlb_pages(const struct vm_area_struct *vma,
+					 unsigned long addr,
+					 unsigned long page_size,
+					 unsigned long len)
+{
+	int rc = hv_flush_pages(addr, page_size, len);
+	if (rc < 0)
+		panic("hv_flush_pages(%#lx,%#lx,%#lx) failed: %d",
+		      addr, page_size, len, rc);
+	if (!vma || (vma != FLUSH_NONEXEC && (vma->vm_flags & VM_EXEC)))
+		__flush_icache();
+}
+
+/* Flush all user pages on this cpu. */
+static inline void local_flush_tlb(void)
+{
+	int rc = hv_flush_all(1);   /* preserve global mappings */
+	if (rc < 0)
+		panic("hv_flush_all(1) failed: %d", rc);
+	__flush_icache();
+}
+
+/*
+ * Global pages have to be flushed a bit differently. Not a real
+ * performance problem because this does not happen often.
+ */
+static inline void local_flush_tlb_all(void)
+{
+	int i;
+	for (i = 0; ; ++i) {
+		HV_VirtAddrRange r = hv_inquire_virtual(i);
+		if (r.size == 0)
+			break;
+		local_flush_tlb_pages(NULL, r.start, PAGE_SIZE, r.size);
+		local_flush_tlb_pages(NULL, r.start, HPAGE_SIZE, r.size);
+	}
+}
+
+/*
+ * TLB flushing:
+ *
+ *  - flush_tlb() flushes the current mm struct TLBs
+ *  - flush_tlb_all() flushes all processes TLBs
+ *  - flush_tlb_mm(mm) flushes the specified mm context TLB's
+ *  - flush_tlb_page(vma, vmaddr) flushes one page
+ *  - flush_tlb_range(vma, start, end) flushes a range of pages
+ *  - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
+ *  - flush_tlb_others(cpumask, mm, va) flushes TLBs on other cpus
+ *
+ * Here (as in vm_area_struct), "end" means the first byte after
+ * our end address.
+ */
+
+extern void flush_tlb_all(void);
+extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);
+extern void flush_tlb_current_task(void);
+extern void flush_tlb_mm(struct mm_struct *);
+extern void flush_tlb_page(const struct vm_area_struct *, unsigned long);
+extern void flush_tlb_page_mm(const struct vm_area_struct *,
+			      struct mm_struct *, unsigned long);
+extern void flush_tlb_range(const struct vm_area_struct *,
+			    unsigned long start, unsigned long end);
+
+#define flush_tlb()     flush_tlb_current_task()
+
+#endif /* _ASM_TILE_TLBFLUSH_H */
diff --git a/arch/tile/include/asm/topology.h b/arch/tile/include/asm/topology.h
new file mode 100644
index 00000000..6fdd0c86
--- /dev/null
+++ b/arch/tile/include/asm/topology.h
@@ -0,0 +1,124 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_TOPOLOGY_H
+#define _ASM_TILE_TOPOLOGY_H
+
+#ifdef CONFIG_NUMA
+
+#include <linux/cpumask.h>
+
+/* Mappings between logical cpu number and node number. */
+extern struct cpumask node_2_cpu_mask[];
+extern char cpu_2_node[];
+
+/* Returns the number of the node containing CPU 'cpu'. */
+static inline int cpu_to_node(int cpu)
+{
+	return cpu_2_node[cpu];
+}
+
+/*
+ * Returns the number of the node containing Node 'node'.
+ * This architecture is flat, so it is a pretty simple function!
+ */
+#define parent_node(node) (node)
+
+/* Returns a bitmask of CPUs on Node 'node'. */
+static inline const struct cpumask *cpumask_of_node(int node)
+{
+	return &node_2_cpu_mask[node];
+}
+
+/* For now, use numa node -1 for global allocation. */
+#define pcibus_to_node(bus)		((void)(bus), -1)
+
+/*
+ * TILE architecture has many cores integrated in one processor, so we need
+ * setup bigger balance_interval for both CPU/NODE scheduling domains to
+ * reduce process scheduling costs.
+ */
+
+/* sched_domains SD_CPU_INIT for TILE architecture */
+#define SD_CPU_INIT (struct sched_domain) {				\
+	.min_interval		= 4,					\
+	.max_interval		= 128,					\
+	.busy_factor		= 64,					\
+	.imbalance_pct		= 125,					\
+	.cache_nice_tries	= 1,					\
+	.busy_idx		= 2,					\
+	.idle_idx		= 1,					\
+	.newidle_idx		= 0,					\
+	.wake_idx		= 0,					\
+	.forkexec_idx		= 0,					\
+									\
+	.flags			= 1*SD_LOAD_BALANCE			\
+				| 1*SD_BALANCE_NEWIDLE			\
+				| 1*SD_BALANCE_EXEC			\
+				| 1*SD_BALANCE_FORK			\
+				| 0*SD_BALANCE_WAKE			\
+				| 0*SD_WAKE_AFFINE			\
+				| 0*SD_PREFER_LOCAL			\
+				| 0*SD_SHARE_CPUPOWER			\
+				| 0*SD_SHARE_PKG_RESOURCES		\
+				| 0*SD_SERIALIZE			\
+				,					\
+	.last_balance		= jiffies,				\
+	.balance_interval	= 32,					\
+}
+
+/* sched_domains SD_NODE_INIT for TILE architecture */
+#define SD_NODE_INIT (struct sched_domain) {				\
+	.min_interval		= 16,					\
+	.max_interval		= 512,					\
+	.busy_factor		= 32,					\
+	.imbalance_pct		= 125,					\
+	.cache_nice_tries	= 1,					\
+	.busy_idx		= 3,					\
+	.idle_idx		= 1,					\
+	.newidle_idx		= 2,					\
+	.wake_idx		= 1,					\
+	.flags			= 1*SD_LOAD_BALANCE			\
+				| 1*SD_BALANCE_NEWIDLE			\
+				| 1*SD_BALANCE_EXEC			\
+				| 1*SD_BALANCE_FORK			\
+				| 0*SD_BALANCE_WAKE			\
+				| 0*SD_WAKE_AFFINE			\
+				| 0*SD_PREFER_LOCAL			\
+				| 0*SD_SHARE_CPUPOWER			\
+				| 0*SD_SHARE_PKG_RESOURCES		\
+				| 1*SD_SERIALIZE			\
+				,					\
+	.last_balance		= jiffies,				\
+	.balance_interval	= 128,					\
+}
+
+/* By definition, we create nodes based on online memory. */
+#define node_has_online_mem(nid) 1
+
+#endif /* CONFIG_NUMA */
+
+#include <asm-generic/topology.h>
+
+#ifdef CONFIG_SMP
+#define topology_physical_package_id(cpu)       ((void)(cpu), 0)
+#define topology_core_id(cpu)                   (cpu)
+#define topology_core_cpumask(cpu)              ((void)(cpu), cpu_online_mask)
+#define topology_thread_cpumask(cpu)            cpumask_of(cpu)
+
+/* indicates that pointers to the topology struct cpumask maps are valid */
+#define arch_provides_topology_pointers         yes
+#endif
+
+#endif /* _ASM_TILE_TOPOLOGY_H */
diff --git a/arch/tile/include/asm/traps.h b/arch/tile/include/asm/traps.h
new file mode 100644
index 00000000..e28c3df4
--- /dev/null
+++ b/arch/tile/include/asm/traps.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_TRAPS_H
+#define _ASM_TILE_TRAPS_H
+
+#include <arch/chip.h>
+
+/* mm/fault.c */
+void do_page_fault(struct pt_regs *, int fault_num,
+		   unsigned long address, unsigned long write);
+#if CHIP_HAS_TILE_DMA() || CHIP_HAS_SN_PROC()
+void do_async_page_fault(struct pt_regs *);
+#endif
+
+#ifndef __tilegx__
+/*
+ * We return this structure in registers to avoid having to write
+ * additional save/restore code in the intvec.S caller.
+ */
+struct intvec_state {
+	void *handler;
+	unsigned long vecnum;
+	unsigned long fault_num;
+	unsigned long info;
+	unsigned long retval;
+};
+struct intvec_state do_page_fault_ics(struct pt_regs *regs, int fault_num,
+				      unsigned long address,
+				      unsigned long info);
+#endif
+
+/* kernel/traps.c */
+void do_trap(struct pt_regs *, int fault_num, unsigned long reason);
+void kernel_double_fault(int dummy, ulong pc, ulong lr, ulong sp, ulong r52);
+
+/* kernel/time.c */
+void do_timer_interrupt(struct pt_regs *, int fault_num);
+
+/* kernel/messaging.c */
+void hv_message_intr(struct pt_regs *, int intnum);
+
+/* kernel/irq.c */
+void tile_dev_intr(struct pt_regs *, int intnum);
+
+#ifdef CONFIG_HARDWALL
+/* kernel/hardwall.c */
+void do_hardwall_trap(struct pt_regs *, int fault_num);
+#endif
+
+/* kernel/ptrace.c */
+void do_breakpoint(struct pt_regs *, int fault_num);
+
+
+#ifdef __tilegx__
+/* kernel/single_step.c */
+void gx_singlestep_handle(struct pt_regs *, int fault_num);
+
+/* kernel/intvec_64.S */
+void fill_ra_stack(void);
+#endif
+
+#endif /* _ASM_TILE_TRAPS_H */
diff --git a/arch/tile/include/asm/uaccess.h b/arch/tile/include/asm/uaccess.h
new file mode 100644
index 00000000..ef34d2ca
--- /dev/null
+++ b/arch/tile/include/asm/uaccess.h
@@ -0,0 +1,580 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_UACCESS_H
+#define _ASM_TILE_UACCESS_H
+
+/*
+ * User space memory access functions
+ */
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <asm-generic/uaccess-unaligned.h>
+#include <asm/processor.h>
+#include <asm/page.h>
+
+#define VERIFY_READ	0
+#define VERIFY_WRITE	1
+
+/*
+ * The fs value determines whether argument validity checking should be
+ * performed or not.  If get_fs() == USER_DS, checking is performed, with
+ * get_fs() == KERNEL_DS, checking is bypassed.
+ *
+ * For historical reasons, these macros are grossly misnamed.
+ */
+#define MAKE_MM_SEG(a)  ((mm_segment_t) { (a) })
+
+#define KERNEL_DS	MAKE_MM_SEG(-1UL)
+#define USER_DS		MAKE_MM_SEG(PAGE_OFFSET)
+
+#define get_ds()	(KERNEL_DS)
+#define get_fs()	(current_thread_info()->addr_limit)
+#define set_fs(x)	(current_thread_info()->addr_limit = (x))
+
+#define segment_eq(a, b) ((a).seg == (b).seg)
+
+#ifndef __tilegx__
+/*
+ * We could allow mapping all 16 MB at 0xfc000000, but we set up a
+ * special hack in arch_setup_additional_pages() to auto-create a mapping
+ * for the first 16 KB, and it would seem strange to have different
+ * user-accessible semantics for memory at 0xfc000000 and above 0xfc004000.
+ */
+static inline int is_arch_mappable_range(unsigned long addr,
+					 unsigned long size)
+{
+	return (addr >= MEM_USER_INTRPT &&
+		addr < (MEM_USER_INTRPT + INTRPT_SIZE) &&
+		size <= (MEM_USER_INTRPT + INTRPT_SIZE) - addr);
+}
+#define is_arch_mappable_range is_arch_mappable_range
+#else
+#define is_arch_mappable_range(addr, size) 0
+#endif
+
+/*
+ * Test whether a block of memory is a valid user space address.
+ * Returns 0 if the range is valid, nonzero otherwise.
+ */
+int __range_ok(unsigned long addr, unsigned long size);
+
+/**
+ * access_ok: - Checks if a user space pointer is valid
+ * @type: Type of access: %VERIFY_READ or %VERIFY_WRITE.  Note that
+ *        %VERIFY_WRITE is a superset of %VERIFY_READ - if it is safe
+ *        to write to a block, it is always safe to read from it.
+ * @addr: User space pointer to start of block to check
+ * @size: Size of block to check
+ *
+ * Context: User context only.  This function may sleep.
+ *
+ * Checks if a pointer to a block of memory in user space is valid.
+ *
+ * Returns true (nonzero) if the memory block may be valid, false (zero)
+ * if it is definitely invalid.
+ *
+ * Note that, depending on architecture, this function probably just
+ * checks that the pointer is in the user space range - after calling
+ * this function, memory access functions may still return -EFAULT.
+ */
+#define access_ok(type, addr, size) ({ \
+	__chk_user_ptr(addr); \
+	likely(__range_ok((unsigned long)(addr), (size)) == 0);	\
+})
+
+/*
+ * The exception table consists of pairs of addresses: the first is the
+ * address of an instruction that is allowed to fault, and the second is
+ * the address at which the program should continue.  No registers are
+ * modified, so it is entirely up to the continuation code to figure out
+ * what to do.
+ *
+ * All the routines below use bits of fixup code that are out of line
+ * with the main instruction path.  This means when everything is well,
+ * we don't even have to jump over them.  Further, they do not intrude
+ * on our cache or tlb entries.
+ */
+
+struct exception_table_entry {
+	unsigned long insn, fixup;
+};
+
+extern int fixup_exception(struct pt_regs *regs);
+
+/*
+ * We return the __get_user_N function results in a structure,
+ * thus in r0 and r1.  If "err" is zero, "val" is the result
+ * of the read; otherwise, "err" is -EFAULT.
+ *
+ * We rarely need 8-byte values on a 32-bit architecture, but
+ * we size the structure to accommodate.  In practice, for the
+ * the smaller reads, we can zero the high word for free, and
+ * the caller will ignore it by virtue of casting anyway.
+ */
+struct __get_user {
+	unsigned long long val;
+	int err;
+};
+
+/*
+ * FIXME: we should express these as inline extended assembler, since
+ * they're fundamentally just a variable dereference and some
+ * supporting exception_table gunk.  Note that (a la i386) we can
+ * extend the copy_to_user and copy_from_user routines to call into
+ * such extended assembler routines, though we will have to use a
+ * different return code in that case (1, 2, or 4, rather than -EFAULT).
+ */
+extern struct __get_user __get_user_1(const void __user *);
+extern struct __get_user __get_user_2(const void __user *);
+extern struct __get_user __get_user_4(const void __user *);
+extern struct __get_user __get_user_8(const void __user *);
+extern int __put_user_1(long, void __user *);
+extern int __put_user_2(long, void __user *);
+extern int __put_user_4(long, void __user *);
+extern int __put_user_8(long long, void __user *);
+
+/* Unimplemented routines to cause linker failures */
+extern struct __get_user __get_user_bad(void);
+extern int __put_user_bad(void);
+
+/*
+ * Careful: we have to cast the result to the type of the pointer
+ * for sign reasons.
+ */
+/**
+ * __get_user: - Get a simple variable from user space, with less checking.
+ * @x:   Variable to store result.
+ * @ptr: Source address, in user space.
+ *
+ * Context: User context only.  This function may sleep.
+ *
+ * This macro copies a single simple variable from user space to kernel
+ * space.  It supports simple types like char and int, but not larger
+ * data types like structures or arrays.
+ *
+ * @ptr must have pointer-to-simple-variable type, and the result of
+ * dereferencing @ptr must be assignable to @x without a cast.
+ *
+ * Returns zero on success, or -EFAULT on error.
+ * On error, the variable @x is set to zero.
+ *
+ * Caller must check the pointer with access_ok() before calling this
+ * function.
+ */
+#define __get_user(x, ptr)						\
+({	struct __get_user __ret;					\
+	__typeof__(*(ptr)) const __user *__gu_addr = (ptr);		\
+	__chk_user_ptr(__gu_addr);					\
+	switch (sizeof(*(__gu_addr))) {					\
+	case 1:								\
+		__ret = __get_user_1(__gu_addr);			\
+		break;							\
+	case 2:								\
+		__ret = __get_user_2(__gu_addr);			\
+		break;							\
+	case 4:								\
+		__ret = __get_user_4(__gu_addr);			\
+		break;							\
+	case 8:								\
+		__ret = __get_user_8(__gu_addr);			\
+		break;							\
+	default:							\
+		__ret = __get_user_bad();				\
+		break;							\
+	}								\
+	(x) = (__typeof__(*__gu_addr)) (__typeof__(*__gu_addr - *__gu_addr)) \
+	  __ret.val;			                                \
+	__ret.err;							\
+})
+
+/**
+ * __put_user: - Write a simple value into user space, with less checking.
+ * @x:   Value to copy to user space.
+ * @ptr: Destination address, in user space.
+ *
+ * Context: User context only.  This function may sleep.
+ *
+ * This macro copies a single simple value from kernel space to user
+ * space.  It supports simple types like char and int, but not larger
+ * data types like structures or arrays.
+ *
+ * @ptr must have pointer-to-simple-variable type, and @x must be assignable
+ * to the result of dereferencing @ptr.
+ *
+ * Caller must check the pointer with access_ok() before calling this
+ * function.
+ *
+ * Returns zero on success, or -EFAULT on error.
+ *
+ * Implementation note: The "case 8" logic of casting to the type of
+ * the result of subtracting the value from itself is basically a way
+ * of keeping all integer types the same, but casting any pointers to
+ * ptrdiff_t, i.e. also an integer type.  This way there are no
+ * questionable casts seen by the compiler on an ILP32 platform.
+ */
+#define __put_user(x, ptr)						\
+({									\
+	int __pu_err = 0;						\
+	__typeof__(*(ptr)) __user *__pu_addr = (ptr);			\
+	typeof(*__pu_addr) __pu_val = (x);				\
+	__chk_user_ptr(__pu_addr);					\
+	switch (sizeof(__pu_val)) {					\
+	case 1:								\
+		__pu_err = __put_user_1((long)__pu_val, __pu_addr);	\
+		break;							\
+	case 2:								\
+		__pu_err = __put_user_2((long)__pu_val, __pu_addr);	\
+		break;							\
+	case 4:								\
+		__pu_err = __put_user_4((long)__pu_val, __pu_addr);	\
+		break;							\
+	case 8:								\
+		__pu_err =						\
+		  __put_user_8((__typeof__(__pu_val - __pu_val))__pu_val,\
+			__pu_addr);					\
+		break;							\
+	default:							\
+		__pu_err = __put_user_bad();				\
+		break;							\
+	}								\
+	__pu_err;							\
+})
+
+/*
+ * The versions of get_user and put_user without initial underscores
+ * check the address of their arguments to make sure they are not
+ * in kernel space.
+ */
+#define put_user(x, ptr)						\
+({									\
+	__typeof__(*(ptr)) __user *__Pu_addr = (ptr);			\
+	access_ok(VERIFY_WRITE, (__Pu_addr), sizeof(*(__Pu_addr))) ?	\
+		__put_user((x), (__Pu_addr)) :				\
+		-EFAULT;						\
+})
+
+#define get_user(x, ptr)						\
+({									\
+	__typeof__(*(ptr)) const __user *__Gu_addr = (ptr);		\
+	access_ok(VERIFY_READ, (__Gu_addr), sizeof(*(__Gu_addr))) ?	\
+		__get_user((x), (__Gu_addr)) :				\
+		((x) = 0, -EFAULT);					\
+})
+
+/**
+ * __copy_to_user() - copy data into user space, with less checking.
+ * @to:   Destination address, in user space.
+ * @from: Source address, in kernel space.
+ * @n:    Number of bytes to copy.
+ *
+ * Context: User context only.  This function may sleep.
+ *
+ * Copy data from kernel space to user space.  Caller must check
+ * the specified block with access_ok() before calling this function.
+ *
+ * Returns number of bytes that could not be copied.
+ * On success, this will be zero.
+ *
+ * An alternate version - __copy_to_user_inatomic() - is designed
+ * to be called from atomic context, typically bracketed by calls
+ * to pagefault_disable() and pagefault_enable().
+ */
+extern unsigned long __must_check __copy_to_user_inatomic(
+	void __user *to, const void *from, unsigned long n);
+
+static inline unsigned long __must_check
+__copy_to_user(void __user *to, const void *from, unsigned long n)
+{
+	might_fault();
+	return __copy_to_user_inatomic(to, from, n);
+}
+
+static inline unsigned long __must_check
+copy_to_user(void __user *to, const void *from, unsigned long n)
+{
+	if (access_ok(VERIFY_WRITE, to, n))
+		n = __copy_to_user(to, from, n);
+	return n;
+}
+
+/**
+ * __copy_from_user() - copy data from user space, with less checking.
+ * @to:   Destination address, in kernel space.
+ * @from: Source address, in user space.
+ * @n:    Number of bytes to copy.
+ *
+ * Context: User context only.  This function may sleep.
+ *
+ * Copy data from user space to kernel space.  Caller must check
+ * the specified block with access_ok() before calling this function.
+ *
+ * Returns number of bytes that could not be copied.
+ * On success, this will be zero.
+ *
+ * If some data could not be copied, this function will pad the copied
+ * data to the requested size using zero bytes.
+ *
+ * An alternate version - __copy_from_user_inatomic() - is designed
+ * to be called from atomic context, typically bracketed by calls
+ * to pagefault_disable() and pagefault_enable().  This version
+ * does *NOT* pad with zeros.
+ */
+extern unsigned long __must_check __copy_from_user_inatomic(
+	void *to, const void __user *from, unsigned long n);
+extern unsigned long __must_check __copy_from_user_zeroing(
+	void *to, const void __user *from, unsigned long n);
+
+static inline unsigned long __must_check
+__copy_from_user(void *to, const void __user *from, unsigned long n)
+{
+       might_fault();
+       return __copy_from_user_zeroing(to, from, n);
+}
+
+static inline unsigned long __must_check
+_copy_from_user(void *to, const void __user *from, unsigned long n)
+{
+	if (access_ok(VERIFY_READ, from, n))
+		n = __copy_from_user(to, from, n);
+	else
+		memset(to, 0, n);
+	return n;
+}
+
+#ifdef CONFIG_DEBUG_COPY_FROM_USER
+extern void copy_from_user_overflow(void)
+	__compiletime_warning("copy_from_user() size is not provably correct");
+
+static inline unsigned long __must_check copy_from_user(void *to,
+					  const void __user *from,
+					  unsigned long n)
+{
+	int sz = __compiletime_object_size(to);
+
+	if (likely(sz == -1 || sz >= n))
+		n = _copy_from_user(to, from, n);
+	else
+		copy_from_user_overflow();
+
+	return n;
+}
+#else
+#define copy_from_user _copy_from_user
+#endif
+
+#ifdef __tilegx__
+/**
+ * __copy_in_user() - copy data within user space, with less checking.
+ * @to:   Destination address, in user space.
+ * @from: Source address, in kernel space.
+ * @n:    Number of bytes to copy.
+ *
+ * Context: User context only.  This function may sleep.
+ *
+ * Copy data from user space to user space.  Caller must check
+ * the specified blocks with access_ok() before calling this function.
+ *
+ * Returns number of bytes that could not be copied.
+ * On success, this will be zero.
+ */
+extern unsigned long __copy_in_user_inatomic(
+	void __user *to, const void __user *from, unsigned long n);
+
+static inline unsigned long __must_check
+__copy_in_user(void __user *to, const void __user *from, unsigned long n)
+{
+	might_sleep();
+	return __copy_in_user_inatomic(to, from, n);
+}
+
+static inline unsigned long __must_check
+copy_in_user(void __user *to, const void __user *from, unsigned long n)
+{
+	if (access_ok(VERIFY_WRITE, to, n) && access_ok(VERIFY_READ, from, n))
+		n = __copy_in_user(to, from, n);
+	return n;
+}
+#endif
+
+
+/**
+ * strlen_user: - Get the size of a string in user space.
+ * @str: The string to measure.
+ *
+ * Context: User context only.  This function may sleep.
+ *
+ * Get the size of a NUL-terminated string in user space.
+ *
+ * Returns the size of the string INCLUDING the terminating NUL.
+ * On exception, returns 0.
+ *
+ * If there is a limit on the length of a valid string, you may wish to
+ * consider using strnlen_user() instead.
+ */
+extern long strnlen_user_asm(const char __user *str, long n);
+static inline long __must_check strnlen_user(const char __user *str, long n)
+{
+	might_fault();
+	return strnlen_user_asm(str, n);
+}
+#define strlen_user(str) strnlen_user(str, LONG_MAX)
+
+/**
+ * strncpy_from_user: - Copy a NUL terminated string from userspace, with less checking.
+ * @dst:   Destination address, in kernel space.  This buffer must be at
+ *         least @count bytes long.
+ * @src:   Source address, in user space.
+ * @count: Maximum number of bytes to copy, including the trailing NUL.
+ *
+ * Copies a NUL-terminated string from userspace to kernel space.
+ * Caller must check the specified block with access_ok() before calling
+ * this function.
+ *
+ * On success, returns the length of the string (not including the trailing
+ * NUL).
+ *
+ * If access to userspace fails, returns -EFAULT (some data may have been
+ * copied).
+ *
+ * If @count is smaller than the length of the string, copies @count bytes
+ * and returns @count.
+ */
+extern long strncpy_from_user_asm(char *dst, const char __user *src, long);
+static inline long __must_check __strncpy_from_user(
+	char *dst, const char __user *src, long count)
+{
+	might_fault();
+	return strncpy_from_user_asm(dst, src, count);
+}
+static inline long __must_check strncpy_from_user(
+	char *dst, const char __user *src, long count)
+{
+	if (access_ok(VERIFY_READ, src, 1))
+		return __strncpy_from_user(dst, src, count);
+	return -EFAULT;
+}
+
+/**
+ * clear_user: - Zero a block of memory in user space.
+ * @mem:   Destination address, in user space.
+ * @len:   Number of bytes to zero.
+ *
+ * Zero a block of memory in user space.
+ *
+ * Returns number of bytes that could not be cleared.
+ * On success, this will be zero.
+ */
+extern unsigned long clear_user_asm(void __user *mem, unsigned long len);
+static inline unsigned long __must_check __clear_user(
+	void __user *mem, unsigned long len)
+{
+	might_fault();
+	return clear_user_asm(mem, len);
+}
+static inline unsigned long __must_check clear_user(
+	void __user *mem, unsigned long len)
+{
+	if (access_ok(VERIFY_WRITE, mem, len))
+		return __clear_user(mem, len);
+	return len;
+}
+
+/**
+ * flush_user: - Flush a block of memory in user space from cache.
+ * @mem:   Destination address, in user space.
+ * @len:   Number of bytes to flush.
+ *
+ * Returns number of bytes that could not be flushed.
+ * On success, this will be zero.
+ */
+extern unsigned long flush_user_asm(void __user *mem, unsigned long len);
+static inline unsigned long __must_check __flush_user(
+	void __user *mem, unsigned long len)
+{
+	int retval;
+
+	might_fault();
+	retval = flush_user_asm(mem, len);
+	mb_incoherent();
+	return retval;
+}
+
+static inline unsigned long __must_check flush_user(
+	void __user *mem, unsigned long len)
+{
+	if (access_ok(VERIFY_WRITE, mem, len))
+		return __flush_user(mem, len);
+	return len;
+}
+
+/**
+ * inv_user: - Invalidate a block of memory in user space from cache.
+ * @mem:   Destination address, in user space.
+ * @len:   Number of bytes to invalidate.
+ *
+ * Returns number of bytes that could not be invalidated.
+ * On success, this will be zero.
+ *
+ * Note that on Tile64, the "inv" operation is in fact a
+ * "flush and invalidate", so cache write-backs will occur prior
+ * to the cache being marked invalid.
+ */
+extern unsigned long inv_user_asm(void __user *mem, unsigned long len);
+static inline unsigned long __must_check __inv_user(
+	void __user *mem, unsigned long len)
+{
+	int retval;
+
+	might_fault();
+	retval = inv_user_asm(mem, len);
+	mb_incoherent();
+	return retval;
+}
+static inline unsigned long __must_check inv_user(
+	void __user *mem, unsigned long len)
+{
+	if (access_ok(VERIFY_WRITE, mem, len))
+		return __inv_user(mem, len);
+	return len;
+}
+
+/**
+ * finv_user: - Flush-inval a block of memory in user space from cache.
+ * @mem:   Destination address, in user space.
+ * @len:   Number of bytes to invalidate.
+ *
+ * Returns number of bytes that could not be flush-invalidated.
+ * On success, this will be zero.
+ */
+extern unsigned long finv_user_asm(void __user *mem, unsigned long len);
+static inline unsigned long __must_check __finv_user(
+	void __user *mem, unsigned long len)
+{
+	int retval;
+
+	might_fault();
+	retval = finv_user_asm(mem, len);
+	mb_incoherent();
+	return retval;
+}
+static inline unsigned long __must_check finv_user(
+	void __user *mem, unsigned long len)
+{
+	if (access_ok(VERIFY_WRITE, mem, len))
+		return __finv_user(mem, len);
+	return len;
+}
+
+#endif /* _ASM_TILE_UACCESS_H */
diff --git a/arch/tile/include/asm/unaligned.h b/arch/tile/include/asm/unaligned.h
new file mode 100644
index 00000000..37dfbe59
--- /dev/null
+++ b/arch/tile/include/asm/unaligned.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_UNALIGNED_H
+#define _ASM_TILE_UNALIGNED_H
+
+#include <linux/unaligned/le_struct.h>
+#include <linux/unaligned/be_byteshift.h>
+#include <linux/unaligned/generic.h>
+#define get_unaligned	__get_unaligned_le
+#define put_unaligned	__put_unaligned_le
+
+/*
+ * Is the kernel doing fixups of unaligned accesses?  If <0, no kernel
+ * intervention occurs and SIGBUS is delivered with no data address
+ * info.  If 0, the kernel single-steps the instruction to discover
+ * the data address to provide with the SIGBUS.  If 1, the kernel does
+ * a fixup.
+ */
+extern int unaligned_fixup;
+
+/* Is the kernel printing on each unaligned fixup? */
+extern int unaligned_printk;
+
+/* Number of unaligned fixups performed */
+extern unsigned int unaligned_fixup_count;
+
+#endif /* _ASM_TILE_UNALIGNED_H */
diff --git a/arch/tile/include/asm/unistd.h b/arch/tile/include/asm/unistd.h
new file mode 100644
index 00000000..f70bf1c5
--- /dev/null
+++ b/arch/tile/include/asm/unistd.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#if !defined(_ASM_TILE_UNISTD_H) || defined(__SYSCALL)
+#define _ASM_TILE_UNISTD_H
+
+#if !defined(__LP64__) || defined(__SYSCALL_COMPAT)
+/* Use the flavor of this syscall that matches the 32-bit API better. */
+#define __ARCH_WANT_SYNC_FILE_RANGE2
+#endif
+
+/* Use the standard ABI for syscalls. */
+#include <asm-generic/unistd.h>
+
+/* Additional Tilera-specific syscalls. */
+#define __NR_flush_cache	(__NR_arch_specific_syscall + 1)
+__SYSCALL(__NR_flush_cache, sys_flush_cache)
+
+#ifndef __tilegx__
+/* "Fast" syscalls provide atomic support for 32-bit chips. */
+#define __NR_FAST_cmpxchg	-1
+#define __NR_FAST_atomic_update	-2
+#define __NR_FAST_cmpxchg64	-3
+#define __NR_cmpxchg_badaddr	(__NR_arch_specific_syscall + 0)
+__SYSCALL(__NR_cmpxchg_badaddr, sys_cmpxchg_badaddr)
+#endif
+
+#ifdef __KERNEL__
+/* In compat mode, we use sys_llseek() for compat_sys_llseek(). */
+#ifdef CONFIG_COMPAT
+#define __ARCH_WANT_SYS_LLSEEK
+#endif
+#define __ARCH_WANT_SYS_NEWFSTATAT
+#endif
+
+#endif /* _ASM_TILE_UNISTD_H */
diff --git a/arch/tile/include/asm/user.h b/arch/tile/include/asm/user.h
new file mode 100644
index 00000000..cbc8b4d5
--- /dev/null
+++ b/arch/tile/include/asm/user.h
@@ -0,0 +1,21 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ *
+ */
+
+#ifndef _ASM_TILE_USER_H
+#define _ASM_TILE_USER_H
+
+/* This header is for a.out file formats, which TILE does not support. */
+
+#endif /* _ASM_TILE_USER_H */
diff --git a/arch/tile/include/asm/vga.h b/arch/tile/include/asm/vga.h
new file mode 100644
index 00000000..7b46e754
--- /dev/null
+++ b/arch/tile/include/asm/vga.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ *
+ * Access to VGA videoram.
+ */
+
+#ifndef _ASM_TILE_VGA_H
+#define _ASM_TILE_VGA_H
+
+#include <asm/io.h>
+
+#define VT_BUF_HAVE_RW
+
+static inline void scr_writew(u16 val, volatile u16 *addr)
+{
+	__raw_writew(val, (volatile u16 __iomem *) addr);
+}
+
+static inline u16 scr_readw(volatile const u16 *addr)
+{
+	return __raw_readw((volatile const u16 __iomem *) addr);
+}
+
+#define vga_readb(a)	readb((u8 __iomem *)(a))
+#define vga_writeb(v,a)	writeb(v, (u8 __iomem *)(a))
+
+#define VGA_MAP_MEM(x,s)	((unsigned long) ioremap(x, s))
+
+#endif
diff --git a/arch/tile/include/hv/drv_mshim_intf.h b/arch/tile/include/hv/drv_mshim_intf.h
new file mode 100644
index 00000000..c6ef3bdc
--- /dev/null
+++ b/arch/tile/include/hv/drv_mshim_intf.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright 2011 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+/**
+ * @file drv_mshim_intf.h
+ * Interface definitions for the Linux EDAC memory controller driver.
+ */
+
+#ifndef _SYS_HV_INCLUDE_DRV_MSHIM_INTF_H
+#define _SYS_HV_INCLUDE_DRV_MSHIM_INTF_H
+
+/** Number of memory controllers in the public API. */
+#define TILE_MAX_MSHIMS 4
+
+/** Memory info under each memory controller. */
+struct mshim_mem_info
+{
+  uint64_t mem_size;     /**< Total memory size in bytes. */
+  uint8_t mem_type;      /**< Memory type, DDR2 or DDR3. */
+  uint8_t mem_ecc;       /**< Memory supports ECC. */
+};
+
+/**
+ * DIMM error structure.
+ * For now, only correctable errors are counted and the mshim doesn't record
+ * the error PA. HV takes panic upon uncorrectable errors.
+ */
+struct mshim_mem_error
+{
+  uint32_t sbe_count;     /**< Number of single-bit errors. */
+};
+
+/** Read this offset to get the memory info per mshim. */
+#define MSHIM_MEM_INFO_OFF 0x100
+
+/** Read this offset to check DIMM error. */
+#define MSHIM_MEM_ERROR_OFF 0x200
+
+#endif /* _SYS_HV_INCLUDE_DRV_MSHIM_INTF_H */
diff --git a/arch/tile/include/hv/drv_pcie_rc_intf.h b/arch/tile/include/hv/drv_pcie_rc_intf.h
new file mode 100644
index 00000000..9bd2243b
--- /dev/null
+++ b/arch/tile/include/hv/drv_pcie_rc_intf.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+/**
+ * @file drv_pcie_rc_intf.h
+ * Interface definitions for the PCIE Root Complex.
+ */
+
+#ifndef _SYS_HV_DRV_PCIE_RC_INTF_H
+#define _SYS_HV_DRV_PCIE_RC_INTF_H
+
+/** File offset for reading the interrupt base number used for PCIE legacy
+    interrupts and PLX Gen 1 requirement flag */
+#define PCIE_RC_CONFIG_MASK_OFF 0
+
+
+/**
+ * Structure used for obtaining PCIe config information, read from the PCIE
+ * subsystem /ctl file at initialization
+ */
+typedef struct pcie_rc_config
+{
+  int intr;                     /**< interrupt number used for downcall */
+  int plx_gen1;                 /**< flag for PLX Gen 1 configuration */
+} pcie_rc_config_t;
+
+#endif  /* _SYS_HV_DRV_PCIE_RC_INTF_H */
diff --git a/arch/tile/include/hv/drv_srom_intf.h b/arch/tile/include/hv/drv_srom_intf.h
new file mode 100644
index 00000000..6395faa6
--- /dev/null
+++ b/arch/tile/include/hv/drv_srom_intf.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright 2011 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+/**
+ * @file drv_srom_intf.h
+ * Interface definitions for the SPI Flash ROM driver.
+ */
+
+#ifndef _SYS_HV_INCLUDE_DRV_SROM_INTF_H
+#define _SYS_HV_INCLUDE_DRV_SROM_INTF_H
+
+/** Read this offset to get the total device size. */
+#define SROM_TOTAL_SIZE_OFF   0xF0000000
+
+/** Read this offset to get the device sector size. */
+#define SROM_SECTOR_SIZE_OFF  0xF0000004
+
+/** Read this offset to get the device page size. */
+#define SROM_PAGE_SIZE_OFF    0xF0000008
+
+/** Write this offset to flush any pending writes. */
+#define SROM_FLUSH_OFF        0xF1000000
+
+/** Write this offset, plus the byte offset of the start of a sector, to
+ *  erase a sector.  Any write data is ignored, but there must be at least
+ *  one byte of write data.  Only applies when the driver is in MTD mode.
+ */
+#define SROM_ERASE_OFF        0xF2000000
+
+#endif /* _SYS_HV_INCLUDE_DRV_SROM_INTF_H */
diff --git a/arch/tile/include/hv/drv_xgbe_impl.h b/arch/tile/include/hv/drv_xgbe_impl.h
new file mode 100644
index 00000000..3a73b2b4
--- /dev/null
+++ b/arch/tile/include/hv/drv_xgbe_impl.h
@@ -0,0 +1,300 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+/**
+ * @file drivers/xgbe/impl.h
+ * Implementation details for the NetIO library.
+ */
+
+#ifndef __DRV_XGBE_IMPL_H__
+#define __DRV_XGBE_IMPL_H__
+
+#include <hv/netio_errors.h>
+#include <hv/netio_intf.h>
+#include <hv/drv_xgbe_intf.h>
+
+
+/** How many groups we have (log2). */
+#define LOG2_NUM_GROUPS (12)
+/** How many groups we have. */
+#define NUM_GROUPS (1 << LOG2_NUM_GROUPS)
+
+/** Number of output requests we'll buffer per tile. */
+#define EPP_REQS_PER_TILE (32)
+
+/** Words used in an eDMA command without checksum acceleration. */
+#define EDMA_WDS_NO_CSUM      8
+/** Words used in an eDMA command with checksum acceleration. */
+#define EDMA_WDS_CSUM        10
+/** Total available words in the eDMA command FIFO. */
+#define EDMA_WDS_TOTAL      128
+
+
+/*
+ * FIXME: These definitions are internal and should have underscores!
+ * NOTE: The actual numeric values here are intentional and allow us to
+ * optimize the concept "if small ... else if large ... else ...", by
+ * checking for the low bit being set, and then for non-zero.
+ * These are used as array indices, so they must have the values (0, 1, 2)
+ * in some order.
+ */
+#define SIZE_SMALL (1)       /**< Small packet queue. */
+#define SIZE_LARGE (2)       /**< Large packet queue. */
+#define SIZE_JUMBO (0)       /**< Jumbo packet queue. */
+
+/** The number of "SIZE_xxx" values. */
+#define NETIO_NUM_SIZES 3
+
+
+/*
+ * Default numbers of packets for IPP drivers.  These values are chosen
+ * such that CIPP1 will not overflow its L2 cache.
+ */
+
+/** The default number of small packets. */
+#define NETIO_DEFAULT_SMALL_PACKETS 2750
+/** The default number of large packets. */
+#define NETIO_DEFAULT_LARGE_PACKETS 2500
+/** The default number of jumbo packets. */
+#define NETIO_DEFAULT_JUMBO_PACKETS 250
+
+
+/** Log2 of the size of a memory arena. */
+#define NETIO_ARENA_SHIFT      24      /* 16 MB */
+/** Size of a memory arena. */
+#define NETIO_ARENA_SIZE       (1 << NETIO_ARENA_SHIFT)
+
+
+/** A queue of packets.
+ *
+ * This structure partially defines a queue of packets waiting to be
+ * processed.  The queue as a whole is written to by an interrupt handler and
+ * read by non-interrupt code; this data structure is what's touched by the
+ * interrupt handler.  The other part of the queue state, the read offset, is
+ * kept in user space, not in hypervisor space, so it is in a separate data
+ * structure.
+ *
+ * The read offset (__packet_receive_read in the user part of the queue
+ * structure) points to the next packet to be read. When the read offset is
+ * equal to the write offset, the queue is empty; therefore the queue must
+ * contain one more slot than the required maximum queue size.
+ *
+ * Here's an example of all 3 state variables and what they mean.  All
+ * pointers move left to right.
+ *
+ * @code
+ *   I   I   V   V   V   V   I   I   I   I
+ *   0   1   2   3   4   5   6   7   8   9  10
+ *           ^       ^       ^               ^
+ *           |               |               |
+ *           |               |               __last_packet_plus_one
+ *           |               __buffer_write
+ *           __packet_receive_read
+ * @endcode
+ *
+ * This queue has 10 slots, and thus can hold 9 packets (_last_packet_plus_one
+ * = 10).  The read pointer is at 2, and the write pointer is at 6; thus,
+ * there are valid, unread packets in slots 2, 3, 4, and 5.  The remaining
+ * slots are invalid (do not contain a packet).
+ */
+typedef struct {
+  /** Byte offset of the next notify packet to be written: zero for the first
+   *  packet on the queue, sizeof (netio_pkt_t) for the second packet on the
+   *  queue, etc. */
+  volatile uint32_t __packet_write;
+
+  /** Offset of the packet after the last valid packet (i.e., when any
+   *  pointer is incremented to this value, it wraps back to zero). */
+  uint32_t __last_packet_plus_one;
+}
+__netio_packet_queue_t;
+
+
+/** A queue of buffers.
+ *
+ * This structure partially defines a queue of empty buffers which have been
+ * obtained via requests to the IPP.  (The elements of the queue are packet
+ * handles, which are transformed into a full netio_pkt_t when the buffer is
+ * retrieved.)  The queue as a whole is written to by an interrupt handler and
+ * read by non-interrupt code; this data structure is what's touched by the
+ * interrupt handler.  The other parts of the queue state, the read offset and
+ * requested write offset, are kept in user space, not in hypervisor space, so
+ * they are in a separate data structure.
+ *
+ * The read offset (__buffer_read in the user part of the queue structure)
+ * points to the next buffer to be read. When the read offset is equal to the
+ * write offset, the queue is empty; therefore the queue must contain one more
+ * slot than the required maximum queue size.
+ *
+ * The requested write offset (__buffer_requested_write in the user part of
+ * the queue structure) points to the slot which will hold the next buffer we
+ * request from the IPP, once we get around to sending such a request.  When
+ * the requested write offset is equal to the write offset, no requests for
+ * new buffers are outstanding; when the requested write offset is one greater
+ * than the read offset, no more requests may be sent.
+ *
+ * Note that, unlike the packet_queue, the buffer_queue places incoming
+ * buffers at decreasing addresses.  This makes the check for "is it time to
+ * wrap the buffer pointer" cheaper in the assembly code which receives new
+ * buffers, and means that the value which defines the queue size,
+ * __last_buffer, is different than in the packet queue.  Also, the offset
+ * used in the packet_queue is already scaled by the size of a packet; here we
+ * use unscaled slot indices for the offsets.  (These differences are
+ * historical, and in the future it's possible that the packet_queue will look
+ * more like this queue.)
+ *
+ * @code
+ * Here's an example of all 4 state variables and what they mean.  Remember:
+ * all pointers move right to left.
+ *
+ *   V   V   V   I   I   R   R   V   V   V
+ *   0   1   2   3   4   5   6   7   8   9
+ *           ^       ^       ^           ^
+ *           |       |       |           |
+ *           |       |       |           __last_buffer
+ *           |       |       __buffer_write
+ *           |       __buffer_requested_write
+ *           __buffer_read
+ * @endcode
+ *
+ * This queue has 10 slots, and thus can hold 9 buffers (_last_buffer = 9).
+ * The read pointer is at 2, and the write pointer is at 6; thus, there are
+ * valid, unread buffers in slots 2, 1, 0, 9, 8, and 7.  The requested write
+ * pointer is at 4; thus, requests have been made to the IPP for buffers which
+ * will be placed in slots 6 and 5 when they arrive.  Finally, the remaining
+ * slots are invalid (do not contain a buffer).
+ */
+typedef struct
+{
+  /** Ordinal number of the next buffer to be written: 0 for the first slot in
+   *  the queue, 1 for the second slot in the queue, etc. */
+  volatile uint32_t __buffer_write;
+
+  /** Ordinal number of the last buffer (i.e., when any pointer is decremented
+   *  below zero, it is reloaded with this value). */
+  uint32_t __last_buffer;
+}
+__netio_buffer_queue_t;
+
+
+/**
+ * An object for providing Ethernet packets to a process.
+ */
+typedef struct __netio_queue_impl_t
+{
+  /** The queue of packets waiting to be received. */
+  __netio_packet_queue_t __packet_receive_queue;
+  /** The intr bit mask that IDs this device. */
+  unsigned int __intr_id;
+  /** Offset to queues of empty buffers, one per size. */
+  uint32_t __buffer_queue[NETIO_NUM_SIZES];
+  /** The address of the first EPP tile, or -1 if no EPP. */
+  /* ISSUE: Actually this is always "0" or "~0". */
+  uint32_t __epp_location;
+  /** The queue ID that this queue represents. */
+  unsigned int __queue_id;
+  /** Number of acknowledgements received. */
+  volatile uint32_t __acks_received;
+  /** Last completion number received for packet_sendv. */
+  volatile uint32_t __last_completion_rcv;
+  /** Number of packets allowed to be outstanding. */
+  uint32_t __max_outstanding;
+  /** First VA available for packets. */
+  void* __va_0;
+  /** First VA in second range available for packets. */
+  void* __va_1;
+  /** Padding to align the "__packets" field to the size of a netio_pkt_t. */
+  uint32_t __padding[3];
+  /** The packets themselves. */
+  netio_pkt_t __packets[0];
+}
+netio_queue_impl_t;
+
+
+/**
+ * An object for managing the user end of a NetIO queue.
+ */
+typedef struct __netio_queue_user_impl_t
+{
+  /** The next incoming packet to be read. */
+  uint32_t __packet_receive_read;
+  /** The next empty buffers to be read, one index per size. */
+  uint8_t __buffer_read[NETIO_NUM_SIZES];
+  /** Where the empty buffer we next request from the IPP will go, one index
+   * per size. */
+  uint8_t __buffer_requested_write[NETIO_NUM_SIZES];
+  /** PCIe interface flag. */
+  uint8_t __pcie;
+  /** Number of packets left to be received before we send a credit update. */
+  uint32_t __receive_credit_remaining;
+  /** Value placed in __receive_credit_remaining when it reaches zero. */
+  uint32_t __receive_credit_interval;
+  /** First fast I/O routine index. */
+  uint32_t __fastio_index;
+  /** Number of acknowledgements expected. */
+  uint32_t __acks_outstanding;
+  /** Last completion number requested. */
+  uint32_t __last_completion_req;
+  /** File descriptor for driver. */
+  int __fd;
+}
+netio_queue_user_impl_t;
+
+
+#define NETIO_GROUP_CHUNK_SIZE   64   /**< Max # groups in one IPP request */
+#define NETIO_BUCKET_CHUNK_SIZE  64   /**< Max # buckets in one IPP request */
+
+
+/** Internal structure used to convey packet send information to the
+ * hypervisor.  FIXME: Actually, it's not used for that anymore, but
+ * netio_packet_send() still uses it internally.
+ */
+typedef struct
+{
+  uint16_t flags;              /**< Packet flags (__NETIO_SEND_FLG_xxx) */
+  uint16_t transfer_size;      /**< Size of packet */
+  uint32_t va;                 /**< VA of start of packet */
+  __netio_pkt_handle_t handle; /**< Packet handle */
+  uint32_t csum0;              /**< First checksum word */
+  uint32_t csum1;              /**< Second checksum word */
+}
+__netio_send_cmd_t;
+
+
+/** Flags used in two contexts:
+ *  - As the "flags" member in the __netio_send_cmd_t, above; used only
+ *    for netio_pkt_send_{prepare,commit}.
+ *  - As part of the flags passed to the various send packet fast I/O calls.
+ */
+
+/** Need acknowledgement on this packet.  Note that some code in the
+ *  normal send_pkt fast I/O handler assumes that this is equal to 1. */
+#define __NETIO_SEND_FLG_ACK    0x1
+
+/** Do checksum on this packet.  (Only used with the __netio_send_cmd_t;
+ *  normal packet sends use a special fast I/O index to denote checksumming,
+ *  and multi-segment sends test the checksum descriptor.) */
+#define __NETIO_SEND_FLG_CSUM   0x2
+
+/** Get a completion on this packet.  Only used with multi-segment sends.  */
+#define __NETIO_SEND_FLG_COMPLETION 0x4
+
+/** Position of the number-of-extra-segments value in the flags word.
+    Only used with multi-segment sends. */
+#define __NETIO_SEND_FLG_XSEG_SHIFT 3
+
+/** Width of the number-of-extra-segments value in the flags word. */
+#define __NETIO_SEND_FLG_XSEG_WIDTH 2
+
+#endif /* __DRV_XGBE_IMPL_H__ */
diff --git a/arch/tile/include/hv/drv_xgbe_intf.h b/arch/tile/include/hv/drv_xgbe_intf.h
new file mode 100644
index 00000000..f13188ac
--- /dev/null
+++ b/arch/tile/include/hv/drv_xgbe_intf.h
@@ -0,0 +1,615 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+/**
+ * @file drv_xgbe_intf.h
+ * Interface to the hypervisor XGBE driver.
+ */
+
+#ifndef __DRV_XGBE_INTF_H__
+#define __DRV_XGBE_INTF_H__
+
+/**
+ * An object for forwarding VAs and PAs to the hypervisor.
+ * @ingroup types
+ *
+ * This allows the supervisor to specify a number of areas of memory to
+ * store packet buffers.
+ */
+typedef struct
+{
+  /** The physical address of the memory. */
+  HV_PhysAddr pa;
+  /** Page table entry for the memory.  This is only used to derive the
+   *  memory's caching mode; the PA bits are ignored. */
+  HV_PTE pte;
+  /** The virtual address of the memory. */
+  HV_VirtAddr va;
+  /** Size (in bytes) of the memory area. */
+  int size;
+
+}
+netio_ipp_address_t;
+
+/** The various pread/pwrite offsets into the hypervisor-level driver.
+ * @ingroup types
+ */
+typedef enum
+{
+  /** Inform the Linux driver of the address of the NetIO arena memory.
+   *  This offset is actually only used to convey information from netio
+   *  to the Linux driver; it never makes it from there to the hypervisor.
+   *  Write-only; takes a uint32_t specifying the VA address. */
+  NETIO_FIXED_ADDR               = 0x5000000000000000ULL,
+
+  /** Inform the Linux driver of the size of the NetIO arena memory.
+   *  This offset is actually only used to convey information from netio
+   *  to the Linux driver; it never makes it from there to the hypervisor.
+   *  Write-only; takes a uint32_t specifying the VA size. */
+  NETIO_FIXED_SIZE               = 0x5100000000000000ULL,
+
+  /** Register current tile with IPP.  Write then read: write, takes a
+   *  netio_input_config_t, read returns a pointer to a netio_queue_impl_t. */
+  NETIO_IPP_INPUT_REGISTER_OFF   = 0x6000000000000000ULL,
+
+  /** Unregister current tile from IPP.  Write-only, takes a dummy argument. */
+  NETIO_IPP_INPUT_UNREGISTER_OFF = 0x6100000000000000ULL,
+
+  /** Start packets flowing.  Write-only, takes a dummy argument. */
+  NETIO_IPP_INPUT_INIT_OFF       = 0x6200000000000000ULL,
+
+  /** Stop packets flowing.  Write-only, takes a dummy argument. */
+  NETIO_IPP_INPUT_UNINIT_OFF     = 0x6300000000000000ULL,
+
+  /** Configure group (typically we group on VLAN).  Write-only: takes an
+   *  array of netio_group_t's, low 24 bits of the offset is the base group
+   *  number times the size of a netio_group_t. */
+  NETIO_IPP_INPUT_GROUP_CFG_OFF  = 0x6400000000000000ULL,
+
+  /** Configure bucket.  Write-only: takes an array of netio_bucket_t's, low
+   *  24 bits of the offset is the base bucket number times the size of a
+   *  netio_bucket_t. */
+  NETIO_IPP_INPUT_BUCKET_CFG_OFF = 0x6500000000000000ULL,
+
+  /** Get/set a parameter.  Read or write: read or write data is the parameter
+   *  value, low 32 bits of the offset is a __netio_getset_offset_t. */
+  NETIO_IPP_PARAM_OFF            = 0x6600000000000000ULL,
+
+  /** Get fast I/O index.  Read-only; returns a 4-byte base index value. */
+  NETIO_IPP_GET_FASTIO_OFF       = 0x6700000000000000ULL,
+
+  /** Configure hijack IP address.  Packets with this IPv4 dest address
+   *  go to bucket NETIO_NUM_BUCKETS - 1.  Write-only: takes an IP address
+   *  in some standard form.  FIXME: Define the form! */
+  NETIO_IPP_INPUT_HIJACK_CFG_OFF  = 0x6800000000000000ULL,
+
+  /**
+   * Offsets beyond this point are reserved for the supervisor (although that
+   * enforcement must be done by the supervisor driver itself).
+   */
+  NETIO_IPP_USER_MAX_OFF         = 0x6FFFFFFFFFFFFFFFULL,
+
+  /** Register I/O memory.  Write-only, takes a netio_ipp_address_t. */
+  NETIO_IPP_IOMEM_REGISTER_OFF   = 0x7000000000000000ULL,
+
+  /** Unregister I/O memory.  Write-only, takes a netio_ipp_address_t. */
+  NETIO_IPP_IOMEM_UNREGISTER_OFF = 0x7100000000000000ULL,
+
+  /* Offsets greater than 0x7FFFFFFF can't be used directly from Linux
+   * userspace code due to limitations in the pread/pwrite syscalls. */
+
+  /** Drain LIPP buffers. */
+  NETIO_IPP_DRAIN_OFF              = 0xFA00000000000000ULL,
+
+  /** Supply a netio_ipp_address_t to be used as shared memory for the
+   *  LEPP command queue. */
+  NETIO_EPP_SHM_OFF              = 0xFB00000000000000ULL,
+
+  /* 0xFC... is currently unused. */
+
+  /** Stop IPP/EPP tiles.  Write-only, takes a dummy argument.  */
+  NETIO_IPP_STOP_SHIM_OFF        = 0xFD00000000000000ULL,
+
+  /** Start IPP/EPP tiles.  Write-only, takes a dummy argument.  */
+  NETIO_IPP_START_SHIM_OFF       = 0xFE00000000000000ULL,
+
+  /** Supply packet arena.  Write-only, takes an array of
+    * netio_ipp_address_t values. */
+  NETIO_IPP_ADDRESS_OFF          = 0xFF00000000000000ULL,
+} netio_hv_offset_t;
+
+/** Extract the base offset from an offset */
+#define NETIO_BASE_OFFSET(off)    ((off) & 0xFF00000000000000ULL)
+/** Extract the local offset from an offset */
+#define NETIO_LOCAL_OFFSET(off)   ((off) & 0x00FFFFFFFFFFFFFFULL)
+
+
+/**
+ * Get/set offset.
+ */
+typedef union
+{
+  struct
+  {
+    uint64_t addr:48;        /**< Class-specific address */
+    unsigned int class:8;    /**< Class (e.g., NETIO_PARAM) */
+    unsigned int opcode:8;   /**< High 8 bits of NETIO_IPP_PARAM_OFF */
+  }
+  bits;                      /**< Bitfields */
+  uint64_t word;             /**< Aggregated value to use as the offset */
+}
+__netio_getset_offset_t;
+
+/**
+ * Fast I/O index offsets (must be contiguous).
+ */
+typedef enum
+{
+  NETIO_FASTIO_ALLOCATE         = 0, /**< Get empty packet buffer */
+  NETIO_FASTIO_FREE_BUFFER      = 1, /**< Give buffer back to IPP */
+  NETIO_FASTIO_RETURN_CREDITS   = 2, /**< Give credits to IPP */
+  NETIO_FASTIO_SEND_PKT_NOCK    = 3, /**< Send a packet, no checksum */
+  NETIO_FASTIO_SEND_PKT_CK      = 4, /**< Send a packet, with checksum */
+  NETIO_FASTIO_SEND_PKT_VEC     = 5, /**< Send a vector of packets */
+  NETIO_FASTIO_SENDV_PKT        = 6, /**< Sendv one packet */
+  NETIO_FASTIO_NUM_INDEX        = 7, /**< Total number of fast I/O indices */
+} netio_fastio_index_t;
+
+/** 3-word return type for Fast I/O call. */
+typedef struct
+{
+  int err;            /**< Error code. */
+  uint32_t val0;      /**< Value.  Meaning depends upon the specific call. */
+  uint32_t val1;      /**< Value.  Meaning depends upon the specific call. */
+} netio_fastio_rv3_t;
+
+/** 0-argument fast I/O call */
+int __netio_fastio0(uint32_t fastio_index);
+/** 1-argument fast I/O call */
+int __netio_fastio1(uint32_t fastio_index, uint32_t arg0);
+/** 3-argument fast I/O call, 2-word return value */
+netio_fastio_rv3_t __netio_fastio3_rv3(uint32_t fastio_index, uint32_t arg0,
+                                       uint32_t arg1, uint32_t arg2);
+/** 4-argument fast I/O call */
+int __netio_fastio4(uint32_t fastio_index, uint32_t arg0, uint32_t arg1,
+                    uint32_t arg2, uint32_t arg3);
+/** 6-argument fast I/O call */
+int __netio_fastio6(uint32_t fastio_index, uint32_t arg0, uint32_t arg1,
+                    uint32_t arg2, uint32_t arg3, uint32_t arg4, uint32_t arg5);
+/** 9-argument fast I/O call */
+int __netio_fastio9(uint32_t fastio_index, uint32_t arg0, uint32_t arg1,
+                    uint32_t arg2, uint32_t arg3, uint32_t arg4, uint32_t arg5,
+                    uint32_t arg6, uint32_t arg7, uint32_t arg8);
+
+/** Allocate an empty packet.
+ * @param fastio_index Fast I/O index.
+ * @param size Size of the packet to allocate.
+ */
+#define __netio_fastio_allocate(fastio_index, size) \
+  __netio_fastio1((fastio_index) + NETIO_FASTIO_ALLOCATE, size)
+
+/** Free a buffer.
+ * @param fastio_index Fast I/O index.
+ * @param handle Handle for the packet to free.
+ */
+#define __netio_fastio_free_buffer(fastio_index, handle) \
+  __netio_fastio1((fastio_index) + NETIO_FASTIO_FREE_BUFFER, handle)
+
+/** Increment our receive credits.
+ * @param fastio_index Fast I/O index.
+ * @param credits Number of credits to add.
+ */
+#define __netio_fastio_return_credits(fastio_index, credits) \
+  __netio_fastio1((fastio_index) + NETIO_FASTIO_RETURN_CREDITS, credits)
+
+/** Send packet, no checksum.
+ * @param fastio_index Fast I/O index.
+ * @param ackflag Nonzero if we want an ack.
+ * @param size Size of the packet.
+ * @param va Virtual address of start of packet.
+ * @param handle Packet handle.
+ */
+#define __netio_fastio_send_pkt_nock(fastio_index, ackflag, size, va, handle) \
+  __netio_fastio4((fastio_index) + NETIO_FASTIO_SEND_PKT_NOCK, ackflag, \
+                  size, va, handle)
+
+/** Send packet, calculate checksum.
+ * @param fastio_index Fast I/O index.
+ * @param ackflag Nonzero if we want an ack.
+ * @param size Size of the packet.
+ * @param va Virtual address of start of packet.
+ * @param handle Packet handle.
+ * @param csum0 Shim checksum header.
+ * @param csum1 Checksum seed.
+ */
+#define __netio_fastio_send_pkt_ck(fastio_index, ackflag, size, va, handle, \
+                                   csum0, csum1) \
+  __netio_fastio6((fastio_index) + NETIO_FASTIO_SEND_PKT_CK, ackflag, \
+                  size, va, handle, csum0, csum1)
+
+
+/** Format for the "csum0" argument to the __netio_fastio_send routines
+ * and LEPP.  Note that this is currently exactly identical to the
+ * ShimProtocolOffloadHeader.
+ */
+typedef union
+{
+  struct
+  {
+    unsigned int start_byte:7;       /**< The first byte to be checksummed */
+    unsigned int count:14;           /**< Number of bytes to be checksummed. */
+    unsigned int destination_byte:7; /**< The byte to write the checksum to. */
+    unsigned int reserved:4;         /**< Reserved. */
+  } bits;                            /**< Decomposed method of access. */
+  unsigned int word;                 /**< To send out the IDN. */
+} __netio_checksum_header_t;
+
+
+/** Sendv packet with 1 or 2 segments.
+ * @param fastio_index Fast I/O index.
+ * @param flags Ack/csum/notify flags in low 3 bits; number of segments minus
+ *        1 in next 2 bits; expected checksum in high 16 bits.
+ * @param confno Confirmation number to request, if notify flag set.
+ * @param csum0 Checksum descriptor; if zero, no checksum.
+ * @param va_F Virtual address of first segment.
+ * @param va_L Virtual address of last segment, if 2 segments.
+ * @param len_F_L Length of first segment in low 16 bits; length of last
+ *        segment, if 2 segments, in high 16 bits.
+ */
+#define __netio_fastio_sendv_pkt_1_2(fastio_index, flags, confno, csum0, \
+                                     va_F, va_L, len_F_L) \
+  __netio_fastio6((fastio_index) + NETIO_FASTIO_SENDV_PKT, flags, confno, \
+                  csum0, va_F, va_L, len_F_L)
+
+/** Send packet on PCIe interface.
+ * @param fastio_index Fast I/O index.
+ * @param flags Ack/csum/notify flags in low 3 bits.
+ * @param confno Confirmation number to request, if notify flag set.
+ * @param csum0 Checksum descriptor; Hard wired 0, not needed for PCIe.
+ * @param va_F Virtual address of the packet buffer.
+ * @param va_L Virtual address of last segment, if 2 segments. Hard wired 0.
+ * @param len_F_L Length of the packet buffer in low 16 bits.
+ */
+#define __netio_fastio_send_pcie_pkt(fastio_index, flags, confno, csum0, \
+                                     va_F, va_L, len_F_L) \
+  __netio_fastio6((fastio_index) + PCIE_FASTIO_SENDV_PKT, flags, confno, \
+                  csum0, va_F, va_L, len_F_L)
+
+/** Sendv packet with 3 or 4 segments.
+ * @param fastio_index Fast I/O index.
+ * @param flags Ack/csum/notify flags in low 3 bits; number of segments minus
+ *        1 in next 2 bits; expected checksum in high 16 bits.
+ * @param confno Confirmation number to request, if notify flag set.
+ * @param csum0 Checksum descriptor; if zero, no checksum.
+ * @param va_F Virtual address of first segment.
+ * @param va_L Virtual address of last segment (third segment if 3 segments,
+ *        fourth segment if 4 segments).
+ * @param len_F_L Length of first segment in low 16 bits; length of last
+ *        segment in high 16 bits.
+ * @param va_M0 Virtual address of "middle 0" segment; this segment is sent
+ *        second when there are three segments, and third if there are four.
+ * @param va_M1 Virtual address of "middle 1" segment; this segment is sent
+ *        second when there are four segments.
+ * @param len_M0_M1 Length of middle 0 segment in low 16 bits; length of middle
+ *        1 segment, if 4 segments, in high 16 bits.
+ */
+#define __netio_fastio_sendv_pkt_3_4(fastio_index, flags, confno, csum0, va_F, \
+                                     va_L, len_F_L, va_M0, va_M1, len_M0_M1) \
+  __netio_fastio9((fastio_index) + NETIO_FASTIO_SENDV_PKT, flags, confno, \
+                  csum0, va_F, va_L, len_F_L, va_M0, va_M1, len_M0_M1)
+
+/** Send vector of packets.
+ * @param fastio_index Fast I/O index.
+ * @param seqno Number of packets transmitted so far on this interface;
+ *        used to decide which packets should be acknowledged.
+ * @param nentries Number of entries in vector.
+ * @param va Virtual address of start of vector entry array.
+ * @return 3-word netio_fastio_rv3_t structure.  The structure's err member
+ *         is an error code, or zero if no error.  The val0 member is the
+ *         updated value of seqno; it has been incremented by 1 for each
+ *         packet sent.  That increment may be less than nentries if an
+ *         error occurred, or if some of the entries in the vector contain
+ *         handles equal to NETIO_PKT_HANDLE_NONE.  The val1 member is the
+ *         updated value of nentries; it has been decremented by 1 for each
+ *         vector entry processed.  Again, that decrement may be less than
+ *         nentries (leaving the returned value positive) if an error
+ *         occurred.
+ */
+#define __netio_fastio_send_pkt_vec(fastio_index, seqno, nentries, va) \
+  __netio_fastio3_rv3((fastio_index) + NETIO_FASTIO_SEND_PKT_VEC, seqno, \
+                      nentries, va)
+
+
+/** An egress DMA command for LEPP. */
+typedef struct
+{
+  /** Is this a TSO transfer?
+   *
+   * NOTE: This field is always 0, to distinguish it from
+   * lepp_tso_cmd_t.  It must come first!
+   */
+  uint8_t tso               : 1;
+
+  /** Unused padding bits. */
+  uint8_t _unused           : 3;
+
+  /** Should this packet be sent directly from caches instead of DRAM,
+   * using hash-for-home to locate the packet data?
+   */
+  uint8_t hash_for_home     : 1;
+
+  /** Should we compute a checksum? */
+  uint8_t compute_checksum  : 1;
+
+  /** Is this the final buffer for this packet?
+   *
+   * A single packet can be split over several input buffers (a "gather"
+   * operation).  This flag indicates that this is the last buffer
+   * in a packet.
+   */
+  uint8_t end_of_packet     : 1;
+
+  /** Should LEPP advance 'comp_busy' when this DMA is fully finished? */
+  uint8_t send_completion   : 1;
+
+  /** High bits of Client Physical Address of the start of the buffer
+   *  to be egressed.
+   *
+   *  NOTE: Only 6 bits are actually needed here, as CPAs are
+   *  currently 38 bits.  So two bits could be scavenged from this.
+   */
+  uint8_t cpa_hi;
+
+  /** The number of bytes to be egressed. */
+  uint16_t length;
+
+  /** Low 32 bits of Client Physical Address of the start of the buffer
+   *  to be egressed.
+   */
+  uint32_t cpa_lo;
+
+  /** Checksum information (only used if 'compute_checksum'). */
+  __netio_checksum_header_t checksum_data;
+
+} lepp_cmd_t;
+
+
+/** A chunk of physical memory for a TSO egress. */
+typedef struct
+{
+  /** The low bits of the CPA. */
+  uint32_t cpa_lo;
+  /** The high bits of the CPA. */
+  uint16_t cpa_hi		: 15;
+  /** Should this packet be sent directly from caches instead of DRAM,
+   *  using hash-for-home to locate the packet data?
+   */
+  uint16_t hash_for_home	: 1;
+  /** The length in bytes. */
+  uint16_t length;
+} lepp_frag_t;
+
+
+/** An LEPP command that handles TSO. */
+typedef struct
+{
+  /** Is this a TSO transfer?
+   *
+   *  NOTE: This field is always 1, to distinguish it from
+   *  lepp_cmd_t.  It must come first!
+   */
+  uint8_t tso             : 1;
+
+  /** Unused padding bits. */
+  uint8_t _unused         : 7;
+
+  /** Size of the header[] array in bytes.  It must be in the range
+   *  [40, 127], which are the smallest header for a TCP packet over
+   *  Ethernet and the maximum possible prepend size supported by
+   *  hardware, respectively.  Note that the array storage must be
+   *  padded out to a multiple of four bytes so that the following
+   *  LEPP command is aligned properly.
+   */
+  uint8_t header_size;
+
+  /** Byte offset of the IP header in header[]. */
+  uint8_t ip_offset;
+
+  /** Byte offset of the TCP header in header[]. */
+  uint8_t tcp_offset;
+
+  /** The number of bytes to use for the payload of each packet,
+   *  except of course the last one, which may not have enough bytes.
+   *  This means that each Ethernet packet except the last will have a
+   *  size of header_size + payload_size.
+   */
+  uint16_t payload_size;
+
+  /** The length of the 'frags' array that follows this struct. */
+  uint16_t num_frags;
+
+  /** The actual frags. */
+  lepp_frag_t frags[0 /* Variable-sized; num_frags entries. */];
+
+  /*
+   * The packet header template logically follows frags[],
+   * but you can't declare that in C.
+   *
+   * uint32_t header[header_size_in_words_rounded_up];
+   */
+
+} lepp_tso_cmd_t;
+
+
+/** An LEPP completion ring entry. */
+typedef void* lepp_comp_t;
+
+
+/** Maximum number of frags for one TSO command.  This is adapted from
+ *  linux's "MAX_SKB_FRAGS", and presumably over-estimates by one, for
+ *  our page size of exactly 65536.  We add one for a "body" fragment.
+ */
+#define LEPP_MAX_FRAGS (65536 / HV_PAGE_SIZE_SMALL + 2 + 1)
+
+/** Total number of bytes needed for an lepp_tso_cmd_t. */
+#define LEPP_TSO_CMD_SIZE(num_frags, header_size) \
+  (sizeof(lepp_tso_cmd_t) + \
+   (num_frags) * sizeof(lepp_frag_t) + \
+   (((header_size) + 3) & -4))
+
+/** The size of the lepp "cmd" queue. */
+#define LEPP_CMD_QUEUE_BYTES \
+ (((CHIP_L2_CACHE_SIZE() - 2 * CHIP_L2_LINE_SIZE()) / \
+  (sizeof(lepp_cmd_t) + sizeof(lepp_comp_t))) * sizeof(lepp_cmd_t))
+
+/** The largest possible command that can go in lepp_queue_t::cmds[]. */
+#define LEPP_MAX_CMD_SIZE LEPP_TSO_CMD_SIZE(LEPP_MAX_FRAGS, 128)
+
+/** The largest possible value of lepp_queue_t::cmd_{head, tail} (inclusive).
+ */
+#define LEPP_CMD_LIMIT \
+  (LEPP_CMD_QUEUE_BYTES - LEPP_MAX_CMD_SIZE)
+
+/** The maximum number of completions in an LEPP queue. */
+#define LEPP_COMP_QUEUE_SIZE \
+  ((LEPP_CMD_LIMIT + sizeof(lepp_cmd_t) - 1) / sizeof(lepp_cmd_t))
+
+/** Increment an index modulo the queue size. */
+#define LEPP_QINC(var) \
+  (var = __insn_mnz(var - (LEPP_COMP_QUEUE_SIZE - 1), var + 1))
+
+/** A queue used to convey egress commands from the client to LEPP. */
+typedef struct
+{
+  /** Index of first completion not yet processed by user code.
+   *  If this is equal to comp_busy, there are no such completions.
+   *
+   *  NOTE: This is only read/written by the user.
+   */
+  unsigned int comp_head;
+
+  /** Index of first completion record not yet completed.
+   *  If this is equal to comp_tail, there are no such completions.
+   *  This index gets advanced (modulo LEPP_QUEUE_SIZE) whenever
+   *  a command with the 'completion' bit set is finished.
+   *
+   *  NOTE: This is only written by LEPP, only read by the user.
+   */
+  volatile unsigned int comp_busy;
+
+  /** Index of the first empty slot in the completion ring.
+   *  Entries from this up to but not including comp_head (in ring order)
+   *  can be filled in with completion data.
+   *
+   *  NOTE: This is only read/written by the user.
+   */
+  unsigned int comp_tail;
+
+  /** Byte index of first command enqueued for LEPP but not yet processed.
+   *
+   *  This is always divisible by sizeof(void*) and always <= LEPP_CMD_LIMIT.
+   *
+   *  NOTE: LEPP advances this counter as soon as it no longer needs
+   *  the cmds[] storage for this entry, but the transfer is not actually
+   *  complete (i.e. the buffer pointed to by the command is no longer
+   *  needed) until comp_busy advances.
+   *
+   *  If this is equal to cmd_tail, the ring is empty.
+   *
+   *  NOTE: This is only written by LEPP, only read by the user.
+   */
+  volatile unsigned int cmd_head;
+
+  /** Byte index of first empty slot in the command ring.  This field can
+   *  be incremented up to but not equal to cmd_head (because that would
+   *  mean the ring is empty).
+   *
+   *  This is always divisible by sizeof(void*) and always <= LEPP_CMD_LIMIT.
+   *
+   *  NOTE: This is read/written by the user, only read by LEPP.
+   */
+  volatile unsigned int cmd_tail;
+
+  /** A ring of variable-sized egress DMA commands.
+   *
+   *  NOTE: Only written by the user, only read by LEPP.
+   */
+  char cmds[LEPP_CMD_QUEUE_BYTES]
+    __attribute__((aligned(CHIP_L2_LINE_SIZE())));
+
+  /** A ring of user completion data.
+   *  NOTE: Only read/written by the user.
+   */
+  lepp_comp_t comps[LEPP_COMP_QUEUE_SIZE]
+    __attribute__((aligned(CHIP_L2_LINE_SIZE())));
+} lepp_queue_t;
+
+
+/** An internal helper function for determining the number of entries
+ *  available in a ring buffer, given that there is one sentinel.
+ */
+static inline unsigned int
+_lepp_num_free_slots(unsigned int head, unsigned int tail)
+{
+  /*
+   * One entry is reserved for use as a sentinel, to distinguish
+   * "empty" from "full".  So we compute
+   * (head - tail - 1) % LEPP_QUEUE_SIZE, but without using a slow % operation.
+   */
+  return (head - tail - 1) + ((head <= tail) ? LEPP_COMP_QUEUE_SIZE : 0);
+}
+
+
+/** Returns how many new comp entries can be enqueued. */
+static inline unsigned int
+lepp_num_free_comp_slots(const lepp_queue_t* q)
+{
+  return _lepp_num_free_slots(q->comp_head, q->comp_tail);
+}
+
+static inline int
+lepp_qsub(int v1, int v2)
+{
+  int delta = v1 - v2;
+  return delta + ((delta >> 31) & LEPP_COMP_QUEUE_SIZE);
+}
+
+
+/** FIXME: Check this from linux, via a new "pwrite()" call. */
+#define LIPP_VERSION 1
+
+
+/** We use exactly two bytes of alignment padding. */
+#define LIPP_PACKET_PADDING 2
+
+/** The minimum size of a "small" buffer (including the padding). */
+#define LIPP_SMALL_PACKET_SIZE 128
+
+/*
+ * NOTE: The following two values should total to less than around
+ * 13582, to keep the total size used for "lipp_state_t" below 64K.
+ */
+
+/** The maximum number of "small" buffers.
+ *  This is enough for 53 network cpus with 128 credits.  Note that
+ *  if these are exhausted, we will fall back to using large buffers.
+ */
+#define LIPP_SMALL_BUFFERS 6785
+
+/** The maximum number of "large" buffers.
+ *  This is enough for 53 network cpus with 128 credits.
+ */
+#define LIPP_LARGE_BUFFERS 6785
+
+#endif /* __DRV_XGBE_INTF_H__ */
diff --git a/arch/tile/include/hv/hypervisor.h b/arch/tile/include/hv/hypervisor.h
new file mode 100644
index 00000000..72ec1e97
--- /dev/null
+++ b/arch/tile/include/hv/hypervisor.h
@@ -0,0 +1,2427 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+/**
+ * @file hypervisor.h
+ * The hypervisor's public API.
+ */
+
+#ifndef _TILE_HV_H
+#define _TILE_HV_H
+
+#include <arch/chip.h>
+
+/* Linux builds want unsigned long constants, but assembler wants numbers */
+#ifdef __ASSEMBLER__
+/** One, for assembler */
+#define __HV_SIZE_ONE 1
+#elif !defined(__tile__) && CHIP_VA_WIDTH() > 32
+/** One, for 64-bit on host */
+#define __HV_SIZE_ONE 1ULL
+#else
+/** One, for Linux */
+#define __HV_SIZE_ONE 1UL
+#endif
+
+/** The log2 of the span of a level-1 page table, in bytes.
+ */
+#define HV_LOG2_L1_SPAN 32
+
+/** The span of a level-1 page table, in bytes.
+ */
+#define HV_L1_SPAN (__HV_SIZE_ONE << HV_LOG2_L1_SPAN)
+
+/** The log2 of the size of small pages, in bytes. This value should
+ * be verified at runtime by calling hv_sysconf(HV_SYSCONF_PAGE_SIZE_SMALL).
+ */
+#define HV_LOG2_PAGE_SIZE_SMALL 16
+
+/** The size of small pages, in bytes. This value should be verified
+ * at runtime by calling hv_sysconf(HV_SYSCONF_PAGE_SIZE_SMALL).
+ */
+#define HV_PAGE_SIZE_SMALL (__HV_SIZE_ONE << HV_LOG2_PAGE_SIZE_SMALL)
+
+/** The log2 of the size of large pages, in bytes. This value should be
+ * verified at runtime by calling hv_sysconf(HV_SYSCONF_PAGE_SIZE_LARGE).
+ */
+#define HV_LOG2_PAGE_SIZE_LARGE 24
+
+/** The size of large pages, in bytes. This value should be verified
+ * at runtime by calling hv_sysconf(HV_SYSCONF_PAGE_SIZE_LARGE).
+ */
+#define HV_PAGE_SIZE_LARGE (__HV_SIZE_ONE << HV_LOG2_PAGE_SIZE_LARGE)
+
+/** The log2 of the granularity at which page tables must be aligned;
+ *  in other words, the CPA for a page table must have this many zero
+ *  bits at the bottom of the address.
+ */
+#define HV_LOG2_PAGE_TABLE_ALIGN 11
+
+/** The granularity at which page tables must be aligned.
+ */
+#define HV_PAGE_TABLE_ALIGN (__HV_SIZE_ONE << HV_LOG2_PAGE_TABLE_ALIGN)
+
+/** Normal start of hypervisor glue in client physical memory. */
+#define HV_GLUE_START_CPA 0x10000
+
+/** This much space is reserved at HV_GLUE_START_CPA
+ * for the hypervisor glue. The client program must start at
+ * some address higher than this, and in particular the address of
+ * its text section should be equal to zero modulo HV_PAGE_SIZE_LARGE
+ * so that relative offsets to the HV glue are correct.
+ */
+#define HV_GLUE_RESERVED_SIZE 0x10000
+
+/** Each entry in the hv dispatch array takes this many bytes. */
+#define HV_DISPATCH_ENTRY_SIZE 32
+
+/** Version of the hypervisor interface defined by this file */
+#define _HV_VERSION 11
+
+/* Index into hypervisor interface dispatch code blocks.
+ *
+ * Hypervisor calls are invoked from user space by calling code
+ * at an address HV_BASE_ADDRESS + (index) * HV_DISPATCH_ENTRY_SIZE,
+ * where index is one of these enum values.
+ *
+ * Normally a supervisor is expected to produce a set of symbols
+ * starting at HV_BASE_ADDRESS that obey this convention, but a user
+ * program could call directly through function pointers if desired.
+ *
+ * These numbers are part of the binary API and will not be changed
+ * without updating HV_VERSION, which should be a rare event.
+ */
+
+/** reserved. */
+#define _HV_DISPATCH_RESERVED                     0
+
+/** hv_init  */
+#define HV_DISPATCH_INIT                          1
+
+/** hv_install_context */
+#define HV_DISPATCH_INSTALL_CONTEXT               2
+
+/** hv_sysconf */
+#define HV_DISPATCH_SYSCONF                       3
+
+/** hv_get_rtc */
+#define HV_DISPATCH_GET_RTC                       4
+
+/** hv_set_rtc */
+#define HV_DISPATCH_SET_RTC                       5
+
+/** hv_flush_asid */
+#define HV_DISPATCH_FLUSH_ASID                    6
+
+/** hv_flush_page */
+#define HV_DISPATCH_FLUSH_PAGE                    7
+
+/** hv_flush_pages */
+#define HV_DISPATCH_FLUSH_PAGES                   8
+
+/** hv_restart */
+#define HV_DISPATCH_RESTART                       9
+
+/** hv_halt */
+#define HV_DISPATCH_HALT                          10
+
+/** hv_power_off */
+#define HV_DISPATCH_POWER_OFF                     11
+
+/** hv_inquire_physical */
+#define HV_DISPATCH_INQUIRE_PHYSICAL              12
+
+/** hv_inquire_memory_controller */
+#define HV_DISPATCH_INQUIRE_MEMORY_CONTROLLER     13
+
+/** hv_inquire_virtual */
+#define HV_DISPATCH_INQUIRE_VIRTUAL               14
+
+/** hv_inquire_asid */
+#define HV_DISPATCH_INQUIRE_ASID                  15
+
+/** hv_nanosleep */
+#define HV_DISPATCH_NANOSLEEP                     16
+
+/** hv_console_read_if_ready */
+#define HV_DISPATCH_CONSOLE_READ_IF_READY         17
+
+/** hv_console_write */
+#define HV_DISPATCH_CONSOLE_WRITE                 18
+
+/** hv_downcall_dispatch */
+#define HV_DISPATCH_DOWNCALL_DISPATCH             19
+
+/** hv_inquire_topology */
+#define HV_DISPATCH_INQUIRE_TOPOLOGY              20
+
+/** hv_fs_findfile */
+#define HV_DISPATCH_FS_FINDFILE                   21
+
+/** hv_fs_fstat */
+#define HV_DISPATCH_FS_FSTAT                      22
+
+/** hv_fs_pread */
+#define HV_DISPATCH_FS_PREAD                      23
+
+/** hv_physaddr_read64 */
+#define HV_DISPATCH_PHYSADDR_READ64               24
+
+/** hv_physaddr_write64 */
+#define HV_DISPATCH_PHYSADDR_WRITE64              25
+
+/** hv_get_command_line */
+#define HV_DISPATCH_GET_COMMAND_LINE              26
+
+/** hv_set_caching */
+#define HV_DISPATCH_SET_CACHING                   27
+
+/** hv_bzero_page */
+#define HV_DISPATCH_BZERO_PAGE                    28
+
+/** hv_register_message_state */
+#define HV_DISPATCH_REGISTER_MESSAGE_STATE        29
+
+/** hv_send_message */
+#define HV_DISPATCH_SEND_MESSAGE                  30
+
+/** hv_receive_message */
+#define HV_DISPATCH_RECEIVE_MESSAGE               31
+
+/** hv_inquire_context */
+#define HV_DISPATCH_INQUIRE_CONTEXT               32
+
+/** hv_start_all_tiles */
+#define HV_DISPATCH_START_ALL_TILES               33
+
+/** hv_dev_open */
+#define HV_DISPATCH_DEV_OPEN                      34
+
+/** hv_dev_close */
+#define HV_DISPATCH_DEV_CLOSE                     35
+
+/** hv_dev_pread */
+#define HV_DISPATCH_DEV_PREAD                     36
+
+/** hv_dev_pwrite */
+#define HV_DISPATCH_DEV_PWRITE                    37
+
+/** hv_dev_poll */
+#define HV_DISPATCH_DEV_POLL                      38
+
+/** hv_dev_poll_cancel */
+#define HV_DISPATCH_DEV_POLL_CANCEL               39
+
+/** hv_dev_preada */
+#define HV_DISPATCH_DEV_PREADA                    40
+
+/** hv_dev_pwritea */
+#define HV_DISPATCH_DEV_PWRITEA                   41
+
+/** hv_flush_remote */
+#define HV_DISPATCH_FLUSH_REMOTE                  42
+
+/** hv_console_putc */
+#define HV_DISPATCH_CONSOLE_PUTC                  43
+
+/** hv_inquire_tiles */
+#define HV_DISPATCH_INQUIRE_TILES                 44
+
+/** hv_confstr */
+#define HV_DISPATCH_CONFSTR                       45
+
+/** hv_reexec */
+#define HV_DISPATCH_REEXEC                        46
+
+/** hv_set_command_line */
+#define HV_DISPATCH_SET_COMMAND_LINE              47
+
+#if !CHIP_HAS_IPI()
+
+/** hv_clear_intr */
+#define HV_DISPATCH_CLEAR_INTR                    48
+
+/** hv_enable_intr */
+#define HV_DISPATCH_ENABLE_INTR                   49
+
+/** hv_disable_intr */
+#define HV_DISPATCH_DISABLE_INTR                  50
+
+/** hv_raise_intr */
+#define HV_DISPATCH_RAISE_INTR                    51
+
+/** hv_trigger_ipi */
+#define HV_DISPATCH_TRIGGER_IPI                   52
+
+#endif /* !CHIP_HAS_IPI() */
+
+/** hv_store_mapping */
+#define HV_DISPATCH_STORE_MAPPING                 53
+
+/** hv_inquire_realpa */
+#define HV_DISPATCH_INQUIRE_REALPA                54
+
+/** hv_flush_all */
+#define HV_DISPATCH_FLUSH_ALL                     55
+
+#if CHIP_HAS_IPI()
+/** hv_get_ipi_pte */
+#define HV_DISPATCH_GET_IPI_PTE                   56
+#endif
+
+/** One more than the largest dispatch value */
+#define _HV_DISPATCH_END                          57
+
+
+#ifndef __ASSEMBLER__
+
+#ifdef __KERNEL__
+#include <asm/types.h>
+typedef u32 __hv32;        /**< 32-bit value */
+typedef u64 __hv64;        /**< 64-bit value */
+#else
+#include <stdint.h>
+typedef uint32_t __hv32;   /**< 32-bit value */
+typedef uint64_t __hv64;   /**< 64-bit value */
+#endif
+
+
+/** Hypervisor physical address. */
+typedef __hv64 HV_PhysAddr;
+
+#if CHIP_VA_WIDTH() > 32
+/** Hypervisor virtual address. */
+typedef __hv64 HV_VirtAddr;
+#else
+/** Hypervisor virtual address. */
+typedef __hv32 HV_VirtAddr;
+#endif /* CHIP_VA_WIDTH() > 32 */
+
+/** Hypervisor ASID. */
+typedef unsigned int HV_ASID;
+
+/** Hypervisor tile location for a memory access
+ * ("location overridden target").
+ */
+typedef unsigned int HV_LOTAR;
+
+/** Hypervisor size of a page. */
+typedef unsigned long HV_PageSize;
+
+/** A page table entry.
+ */
+typedef struct
+{
+  __hv64 val;                /**< Value of PTE */
+} HV_PTE;
+
+/** Hypervisor error code. */
+typedef int HV_Errno;
+
+#endif /* !__ASSEMBLER__ */
+
+#define HV_OK           0    /**< No error */
+#define HV_EINVAL      -801  /**< Invalid argument */
+#define HV_ENODEV      -802  /**< No such device */
+#define HV_ENOENT      -803  /**< No such file or directory */
+#define HV_EBADF       -804  /**< Bad file number */
+#define HV_EFAULT      -805  /**< Bad address */
+#define HV_ERECIP      -806  /**< Bad recipients */
+#define HV_E2BIG       -807  /**< Message too big */
+#define HV_ENOTSUP     -808  /**< Service not supported */
+#define HV_EBUSY       -809  /**< Device busy */
+#define HV_ENOSYS      -810  /**< Invalid syscall */
+#define HV_EPERM       -811  /**< No permission */
+#define HV_ENOTREADY   -812  /**< Device not ready */
+#define HV_EIO         -813  /**< I/O error */
+#define HV_ENOMEM      -814  /**< Out of memory */
+#define HV_EAGAIN      -815  /**< Try again */
+
+#define HV_ERR_MAX     -801  /**< Largest HV error code */
+#define HV_ERR_MIN     -815  /**< Smallest HV error code */
+
+#ifndef __ASSEMBLER__
+
+/** Pass HV_VERSION to hv_init to request this version of the interface. */
+typedef enum { HV_VERSION = _HV_VERSION } HV_VersionNumber;
+
+/** Initializes the hypervisor.
+ *
+ * @param interface_version_number The version of the hypervisor interface
+ * that this program expects, typically HV_VERSION.
+ * @param chip_num Architecture number of the chip the client was built for.
+ * @param chip_rev_num Revision number of the chip the client was built for.
+ */
+void hv_init(HV_VersionNumber interface_version_number,
+             int chip_num, int chip_rev_num);
+
+
+/** Queries we can make for hv_sysconf().
+ *
+ * These numbers are part of the binary API and guaranteed not to change.
+ */
+typedef enum {
+  /** An invalid value; do not use. */
+  _HV_SYSCONF_RESERVED       = 0,
+
+  /** The length of the glue section containing the hv_ procs, in bytes. */
+  HV_SYSCONF_GLUE_SIZE       = 1,
+
+  /** The size of small pages, in bytes. */
+  HV_SYSCONF_PAGE_SIZE_SMALL = 2,
+
+  /** The size of large pages, in bytes. */
+  HV_SYSCONF_PAGE_SIZE_LARGE = 3,
+
+  /** Processor clock speed, in hertz. */
+  HV_SYSCONF_CPU_SPEED       = 4,
+
+  /** Processor temperature, in degrees Kelvin.  The value
+   *  HV_SYSCONF_TEMP_KTOC may be subtracted from this to get degrees
+   *  Celsius.  If that Celsius value is HV_SYSCONF_OVERTEMP, this indicates
+   *  that the temperature has hit an upper limit and is no longer being
+   *  accurately tracked.
+   */
+  HV_SYSCONF_CPU_TEMP        = 5,
+
+  /** Board temperature, in degrees Kelvin.  The value
+   *  HV_SYSCONF_TEMP_KTOC may be subtracted from this to get degrees
+   *  Celsius.  If that Celsius value is HV_SYSCONF_OVERTEMP, this indicates
+   *  that the temperature has hit an upper limit and is no longer being
+   *  accurately tracked.
+   */
+  HV_SYSCONF_BOARD_TEMP      = 6
+
+} HV_SysconfQuery;
+
+/** Offset to subtract from returned Kelvin temperature to get degrees
+    Celsius. */
+#define HV_SYSCONF_TEMP_KTOC 273
+
+/** Pseudo-temperature value indicating that the temperature has
+ *  pegged at its upper limit and is no longer accurate; note that this is
+ *  the value after subtracting HV_SYSCONF_TEMP_KTOC. */
+#define HV_SYSCONF_OVERTEMP 999
+
+/** Query a configuration value from the hypervisor.
+ * @param query Which value is requested (HV_SYSCONF_xxx).
+ * @return The requested value, or -1 the requested value is illegal or
+ *         unavailable.
+ */
+long hv_sysconf(HV_SysconfQuery query);
+
+
+/** Queries we can make for hv_confstr().
+ *
+ * These numbers are part of the binary API and guaranteed not to change.
+ */
+typedef enum {
+  /** An invalid value; do not use. */
+  _HV_CONFSTR_RESERVED        = 0,
+
+  /** Board part number. */
+  HV_CONFSTR_BOARD_PART_NUM   = 1,
+
+  /** Board serial number. */
+  HV_CONFSTR_BOARD_SERIAL_NUM = 2,
+
+  /** Chip serial number. */
+  HV_CONFSTR_CHIP_SERIAL_NUM  = 3,
+
+  /** Board revision level. */
+  HV_CONFSTR_BOARD_REV        = 4,
+
+  /** Hypervisor software version. */
+  HV_CONFSTR_HV_SW_VER        = 5,
+
+  /** The name for this chip model. */
+  HV_CONFSTR_CHIP_MODEL       = 6,
+
+  /** Human-readable board description. */
+  HV_CONFSTR_BOARD_DESC       = 7,
+
+  /** Human-readable description of the hypervisor configuration. */
+  HV_CONFSTR_HV_CONFIG        = 8,
+
+  /** Human-readable version string for the boot image (for instance,
+   *  who built it and when, what configuration file was used). */
+  HV_CONFSTR_HV_CONFIG_VER    = 9,
+
+  /** Mezzanine part number. */
+  HV_CONFSTR_MEZZ_PART_NUM   = 10,
+
+  /** Mezzanine serial number. */
+  HV_CONFSTR_MEZZ_SERIAL_NUM = 11,
+
+  /** Mezzanine revision level. */
+  HV_CONFSTR_MEZZ_REV        = 12,
+
+  /** Human-readable mezzanine description. */
+  HV_CONFSTR_MEZZ_DESC       = 13,
+
+  /** Control path for the onboard network switch. */
+  HV_CONFSTR_SWITCH_CONTROL  = 14,
+
+  /** Chip revision level. */
+  HV_CONFSTR_CHIP_REV        = 15
+
+} HV_ConfstrQuery;
+
+/** Query a configuration string from the hypervisor.
+ *
+ * @param query Identifier for the specific string to be retrieved
+ *        (HV_CONFSTR_xxx).
+ * @param buf Buffer in which to place the string.
+ * @param len Length of the buffer.
+ * @return If query is valid, then the length of the corresponding string,
+ *        including the trailing null; if this is greater than len, the string
+ *        was truncated.  If query is invalid, HV_EINVAL.  If the specified
+ *        buffer is not writable by the client, HV_EFAULT.
+ */
+int hv_confstr(HV_ConfstrQuery query, HV_VirtAddr buf, int len);
+
+/** Tile coordinate */
+typedef struct
+{
+  /** X coordinate, relative to supervisor's top-left coordinate */
+  int x;
+
+  /** Y coordinate, relative to supervisor's top-left coordinate */
+  int y;
+} HV_Coord;
+
+
+#if CHIP_HAS_IPI()
+
+/** Get the PTE for sending an IPI to a particular tile.
+ *
+ * @param tile Tile which will receive the IPI.
+ * @param pl Indicates which IPI registers: 0 = IPI_0, 1 = IPI_1.
+ * @param pte Filled with resulting PTE.
+ * @result Zero if no error, non-zero for invalid parameters.
+ */
+int hv_get_ipi_pte(HV_Coord tile, int pl, HV_PTE* pte);
+
+#else /* !CHIP_HAS_IPI() */
+
+/** A set of interrupts. */
+typedef __hv32 HV_IntrMask;
+
+/** The low interrupt numbers are reserved for use by the client in
+ *  delivering IPIs.  Any interrupt numbers higher than this value are
+ *  reserved for use by HV device drivers. */
+#define HV_MAX_IPI_INTERRUPT 7
+
+/** Enable a set of device interrupts.
+ *
+ * @param enab_mask Bitmap of interrupts to enable.
+ */
+void hv_enable_intr(HV_IntrMask enab_mask);
+
+/** Disable a set of device interrupts.
+ *
+ * @param disab_mask Bitmap of interrupts to disable.
+ */
+void hv_disable_intr(HV_IntrMask disab_mask);
+
+/** Clear a set of device interrupts.
+ *
+ * @param clear_mask Bitmap of interrupts to clear.
+ */
+void hv_clear_intr(HV_IntrMask clear_mask);
+
+/** Raise a set of device interrupts.
+ *
+ * @param raise_mask Bitmap of interrupts to raise.
+ */
+void hv_raise_intr(HV_IntrMask raise_mask);
+
+/** Trigger a one-shot interrupt on some tile
+ *
+ * @param tile Which tile to interrupt.
+ * @param interrupt Interrupt number to trigger; must be between 0 and
+ *        HV_MAX_IPI_INTERRUPT.
+ * @return HV_OK on success, or a hypervisor error code.
+ */
+HV_Errno hv_trigger_ipi(HV_Coord tile, int interrupt);
+
+#endif /* !CHIP_HAS_IPI() */
+
+/** Store memory mapping in debug memory so that external debugger can read it.
+ * A maximum of 16 entries can be stored.
+ *
+ * @param va VA of memory that is mapped.
+ * @param len Length of mapped memory.
+ * @param pa PA of memory that is mapped.
+ * @return 0 on success, -1 if the maximum number of mappings is exceeded.
+ */
+int hv_store_mapping(HV_VirtAddr va, unsigned int len, HV_PhysAddr pa);
+
+/** Given a client PA and a length, return its real (HV) PA.
+ *
+ * @param cpa Client physical address.
+ * @param len Length of mapped memory.
+ * @return physical address, or -1 if cpa or len is not valid.
+ */
+HV_PhysAddr hv_inquire_realpa(HV_PhysAddr cpa, unsigned int len);
+
+/** RTC return flag for no RTC chip present.
+ */
+#define HV_RTC_NO_CHIP     0x1
+
+/** RTC return flag for low-voltage condition, indicating that battery had
+ * died and time read is unreliable.
+ */
+#define HV_RTC_LOW_VOLTAGE 0x2
+
+/** Date/Time of day */
+typedef struct {
+#if CHIP_WORD_SIZE() > 32
+  __hv64 tm_sec;   /**< Seconds, 0-59 */
+  __hv64 tm_min;   /**< Minutes, 0-59 */
+  __hv64 tm_hour;  /**< Hours, 0-23 */
+  __hv64 tm_mday;  /**< Day of month, 0-30 */
+  __hv64 tm_mon;   /**< Month, 0-11 */
+  __hv64 tm_year;  /**< Years since 1900, 0-199 */
+  __hv64 flags;    /**< Return flags, 0 if no error */
+#else
+  __hv32 tm_sec;   /**< Seconds, 0-59 */
+  __hv32 tm_min;   /**< Minutes, 0-59 */
+  __hv32 tm_hour;  /**< Hours, 0-23 */
+  __hv32 tm_mday;  /**< Day of month, 0-30 */
+  __hv32 tm_mon;   /**< Month, 0-11 */
+  __hv32 tm_year;  /**< Years since 1900, 0-199 */
+  __hv32 flags;    /**< Return flags, 0 if no error */
+#endif
+} HV_RTCTime;
+
+/** Read the current time-of-day clock.
+ * @return HV_RTCTime of current time (GMT).
+ */
+HV_RTCTime hv_get_rtc(void);
+
+
+/** Set the current time-of-day clock.
+ * @param time time to reset time-of-day to (GMT).
+ */
+void hv_set_rtc(HV_RTCTime time);
+
+/** Installs a context, comprising a page table and other attributes.
+ *
+ *  Once this service completes, page_table will be used to translate
+ *  subsequent virtual address references to physical memory.
+ *
+ *  Installing a context does not cause an implicit TLB flush.  Before
+ *  reusing an ASID value for a different address space, the client is
+ *  expected to flush old references from the TLB with hv_flush_asid().
+ *  (Alternately, hv_flush_all() may be used to flush many ASIDs at once.)
+ *  After invalidating a page table entry, changing its attributes, or
+ *  changing its target CPA, the client is expected to flush old references
+ *  from the TLB with hv_flush_page() or hv_flush_pages(). Making a
+ *  previously invalid page valid does not require a flush.
+ *
+ *  Specifying an invalid ASID, or an invalid CPA (client physical address)
+ *  (either as page_table_pointer, or within the referenced table),
+ *  or another page table data item documented as above as illegal may
+ *  lead to client termination; since the validation of the table is
+ *  done as needed, this may happen before the service returns, or at
+ *  some later time, or never, depending upon the client's pattern of
+ *  memory references.  Page table entries which supply translations for
+ *  invalid virtual addresses may result in client termination, or may
+ *  be silently ignored.  "Invalid" in this context means a value which
+ *  was not provided to the client via the appropriate hv_inquire_* routine.
+ *
+ *  To support changing the instruction VAs at the same time as
+ *  installing the new page table, this call explicitly supports
+ *  setting the "lr" register to a different address and then jumping
+ *  directly to the hv_install_context() routine.  In this case, the
+ *  new page table does not need to contain any mapping for the
+ *  hv_install_context address itself.
+ *
+ * @param page_table Root of the page table.
+ * @param access PTE providing info on how to read the page table.  This
+ *   value must be consistent between multiple tiles sharing a page table,
+ *   and must also be consistent with any virtual mappings the client
+ *   may be using to access the page table.
+ * @param asid HV_ASID the page table is to be used for.
+ * @param flags Context flags, denoting attributes or privileges of the
+ *   current context (HV_CTX_xxx).
+ * @return Zero on success, or a hypervisor error code on failure.
+ */
+int hv_install_context(HV_PhysAddr page_table, HV_PTE access, HV_ASID asid,
+                       __hv32 flags);
+
+#endif /* !__ASSEMBLER__ */
+
+#define HV_CTX_DIRECTIO     0x1   /**< Direct I/O requests are accepted from
+                                       PL0. */
+
+#ifndef __ASSEMBLER__
+
+/** Value returned from hv_inquire_context(). */
+typedef struct
+{
+  /** Physical address of page table */
+  HV_PhysAddr page_table;
+
+  /** PTE which defines access method for top of page table */
+  HV_PTE access;
+
+  /** ASID associated with this page table */
+  HV_ASID asid;
+
+  /** Context flags */
+  __hv32 flags;
+} HV_Context;
+
+/** Retrieve information about the currently installed context.
+ * @return The data passed to the last successful hv_install_context call.
+ */
+HV_Context hv_inquire_context(void);
+
+
+/** Flushes all translations associated with the named address space
+ *  identifier from the TLB and any other hypervisor data structures.
+ *  Translations installed with the "global" bit are not flushed.
+ *
+ *  Specifying an invalid ASID may lead to client termination.  "Invalid"
+ *  in this context means a value which was not provided to the client
+ *  via <tt>hv_inquire_asid()</tt>.
+ *
+ * @param asid HV_ASID whose entries are to be flushed.
+ * @return Zero on success, or a hypervisor error code on failure.
+*/
+int hv_flush_asid(HV_ASID asid);
+
+
+/** Flushes all translations associated with the named virtual address
+ *  and page size from the TLB and other hypervisor data structures. Only
+ *  pages visible to the current ASID are affected; note that this includes
+ *  global pages in addition to pages specific to the current ASID.
+ *
+ *  The supplied VA need not be aligned; it may be anywhere in the
+ *  subject page.
+ *
+ *  Specifying an invalid virtual address may lead to client termination,
+ *  or may silently succeed.  "Invalid" in this context means a value
+ *  which was not provided to the client via hv_inquire_virtual.
+ *
+ * @param address Address of the page to flush.
+ * @param page_size Size of pages to assume.
+ * @return Zero on success, or a hypervisor error code on failure.
+ */
+int hv_flush_page(HV_VirtAddr address, HV_PageSize page_size);
+
+
+/** Flushes all translations associated with the named virtual address range
+ *  and page size from the TLB and other hypervisor data structures. Only
+ *  pages visible to the current ASID are affected; note that this includes
+ *  global pages in addition to pages specific to the current ASID.
+ *
+ *  The supplied VA need not be aligned; it may be anywhere in the
+ *  subject page.
+ *
+ *  Specifying an invalid virtual address may lead to client termination,
+ *  or may silently succeed.  "Invalid" in this context means a value
+ *  which was not provided to the client via hv_inquire_virtual.
+ *
+ * @param start Address to flush.
+ * @param page_size Size of pages to assume.
+ * @param size The number of bytes to flush. Any page in the range
+ *        [start, start + size) will be flushed from the TLB.
+ * @return Zero on success, or a hypervisor error code on failure.
+ */
+int hv_flush_pages(HV_VirtAddr start, HV_PageSize page_size,
+                   unsigned long size);
+
+
+/** Flushes all non-global translations (if preserve_global is true),
+ *  or absolutely all translations (if preserve_global is false).
+ *
+ * @param preserve_global Non-zero if we want to preserve "global" mappings.
+ * @return Zero on success, or a hypervisor error code on failure.
+*/
+int hv_flush_all(int preserve_global);
+
+
+/** Restart machine with optional restart command and optional args.
+ * @param cmd Const pointer to command to restart with, or NULL
+ * @param args Const pointer to argument string to restart with, or NULL
+ */
+void hv_restart(HV_VirtAddr cmd, HV_VirtAddr args);
+
+
+/** Halt machine. */
+void hv_halt(void);
+
+
+/** Power off machine. */
+void hv_power_off(void);
+
+
+/** Re-enter virtual-is-physical memory translation mode and restart
+ *  execution at a given address.
+ * @param entry Client physical address at which to begin execution.
+ * @return A hypervisor error code on failure; if the operation is
+ *         successful the call does not return.
+ */
+int hv_reexec(HV_PhysAddr entry);
+
+
+/** Chip topology */
+typedef struct
+{
+  /** Relative coordinates of the querying tile */
+  HV_Coord coord;
+
+  /** Width of the querying supervisor's tile rectangle. */
+  int width;
+
+  /** Height of the querying supervisor's tile rectangle. */
+  int height;
+
+} HV_Topology;
+
+/** Returns information about the tile coordinate system.
+ *
+ * Each supervisor is given a rectangle of tiles it potentially controls.
+ * These tiles are labeled using a relative coordinate system with (0,0) as
+ * the upper left tile regardless of their physical location on the chip.
+ *
+ * This call returns both the size of that rectangle and the position
+ * within that rectangle of the querying tile.
+ *
+ * Not all tiles within that rectangle may be available to the supervisor;
+ * to get the precise set of available tiles, you must also call
+ * hv_inquire_tiles(HV_INQ_TILES_AVAIL, ...).
+ **/
+HV_Topology hv_inquire_topology(void);
+
+/** Sets of tiles we can retrieve with hv_inquire_tiles().
+ *
+ * These numbers are part of the binary API and guaranteed not to change.
+ */
+typedef enum {
+  /** An invalid value; do not use. */
+  _HV_INQ_TILES_RESERVED       = 0,
+
+  /** All available tiles within the supervisor's tile rectangle. */
+  HV_INQ_TILES_AVAIL           = 1,
+
+  /** The set of tiles used for hash-for-home caching. */
+  HV_INQ_TILES_HFH_CACHE       = 2,
+
+  /** The set of tiles that can be legally used as a LOTAR for a PTE. */
+  HV_INQ_TILES_LOTAR           = 3
+} HV_InqTileSet;
+
+/** Returns specific information about various sets of tiles within the
+ *  supervisor's tile rectangle.
+ *
+ * @param set Which set of tiles to retrieve.
+ * @param cpumask Pointer to a returned bitmask (in row-major order,
+ *        supervisor-relative) of tiles.  The low bit of the first word
+ *        corresponds to the tile at the upper left-hand corner of the
+ *        supervisor's rectangle.  In order for the supervisor to know the
+ *        buffer length to supply, it should first call hv_inquire_topology.
+ * @param length Number of bytes available for the returned bitmask.
+ **/
+HV_Errno hv_inquire_tiles(HV_InqTileSet set, HV_VirtAddr cpumask, int length);
+
+
+/** An identifier for a memory controller. Multiple memory controllers
+ * may be connected to one chip, and this uniquely identifies each one.
+ */
+typedef int HV_MemoryController;
+
+/** A range of physical memory. */
+typedef struct
+{
+  HV_PhysAddr start;   /**< Starting address. */
+  __hv64 size;         /**< Size in bytes. */
+  HV_MemoryController controller;  /**< Which memory controller owns this. */
+} HV_PhysAddrRange;
+
+/** Returns information about a range of physical memory.
+ *
+ * hv_inquire_physical() returns one of the ranges of client
+ * physical addresses which are available to this client.
+ *
+ * The first range is retrieved by specifying an idx of 0, and
+ * successive ranges are returned with subsequent idx values.  Ranges
+ * are ordered by increasing start address (i.e., as idx increases,
+ * so does start), do not overlap, and do not touch (i.e., the
+ * available memory is described with the fewest possible ranges).
+ *
+ * If an out-of-range idx value is specified, the returned size will be zero.
+ * A client can count the number of ranges by increasing idx until the
+ * returned size is zero. There will always be at least one valid range.
+ *
+ * Some clients might not be prepared to deal with more than one
+ * physical address range; they still ought to call this routine and
+ * issue a warning message if they're given more than one range, on the
+ * theory that whoever configured the hypervisor to provide that memory
+ * should know that it's being wasted.
+ */
+HV_PhysAddrRange hv_inquire_physical(int idx);
+
+/** Possible DIMM types. */
+typedef enum
+{
+  NO_DIMM                    = 0,  /**< No DIMM */
+  DDR2                       = 1,  /**< DDR2 */
+  DDR3                       = 2   /**< DDR3 */
+} HV_DIMM_Type;
+
+#ifdef __tilegx__
+
+/** Log2 of minimum DIMM bytes supported by the memory controller. */
+#define HV_MSH_MIN_DIMM_SIZE_SHIFT 29
+
+/** Max number of DIMMs contained by one memory controller. */
+#define HV_MSH_MAX_DIMMS 8
+
+#else
+
+/** Log2 of minimum DIMM bytes supported by the memory controller. */
+#define HV_MSH_MIN_DIMM_SIZE_SHIFT 26
+
+/** Max number of DIMMs contained by one memory controller. */
+#define HV_MSH_MAX_DIMMS 2
+
+#endif
+
+/** Number of bits to right-shift to get the DIMM type. */
+#define HV_DIMM_TYPE_SHIFT 0
+
+/** Bits to mask to get the DIMM type. */
+#define HV_DIMM_TYPE_MASK 0xf
+
+/** Number of bits to right-shift to get the DIMM size. */
+#define HV_DIMM_SIZE_SHIFT 4
+
+/** Bits to mask to get the DIMM size. */
+#define HV_DIMM_SIZE_MASK 0xf
+
+/** Memory controller information. */
+typedef struct
+{
+  HV_Coord coord;   /**< Relative tile coordinates of the port used by a
+                         specified tile to communicate with this controller. */
+  __hv64 speed;     /**< Speed of this controller in bytes per second. */
+} HV_MemoryControllerInfo;
+
+/** Returns information about a particular memory controller.
+ *
+ *  hv_inquire_memory_controller(coord,idx) returns information about a
+ *  particular controller.  Two pieces of information are returned:
+ *  - The relative coordinates of the port on the controller that the specified
+ *    tile would use to contact it.  The relative coordinates may lie
+ *    outside the supervisor's rectangle, i.e. the controller may not
+ *    be attached to a node managed by the querying node's supervisor.
+ *    In particular note that x or y may be negative.
+ *  - The speed of the memory controller.  (This is a not-to-exceed value
+ *    based on the raw hardware data rate, and may not be achievable in
+ *    practice; it is provided to give clients information on the relative
+ *    performance of the available controllers.)
+ *
+ *  Clients should avoid calling this interface with invalid values.
+ *  A client who does may be terminated.
+ * @param coord Tile for which to calculate the relative port position.
+ * @param controller Index of the controller; identical to value returned
+ *        from other routines like hv_inquire_physical.
+ * @return Information about the controller.
+ */
+HV_MemoryControllerInfo hv_inquire_memory_controller(HV_Coord coord,
+                                                     int controller);
+
+
+/** A range of virtual memory. */
+typedef struct
+{
+  HV_VirtAddr start;   /**< Starting address. */
+  __hv64 size;         /**< Size in bytes. */
+} HV_VirtAddrRange;
+
+/** Returns information about a range of virtual memory.
+ *
+ * hv_inquire_virtual() returns one of the ranges of client
+ * virtual addresses which are available to this client.
+ *
+ * The first range is retrieved by specifying an idx of 0, and
+ * successive ranges are returned with subsequent idx values.  Ranges
+ * are ordered by increasing start address (i.e., as idx increases,
+ * so does start), do not overlap, and do not touch (i.e., the
+ * available memory is described with the fewest possible ranges).
+ *
+ * If an out-of-range idx value is specified, the returned size will be zero.
+ * A client can count the number of ranges by increasing idx until the
+ * returned size is zero. There will always be at least one valid range.
+ *
+ * Some clients may well have various virtual addresses hardwired
+ * into themselves; for instance, their instruction stream may
+ * have been compiled expecting to live at a particular address.
+ * Such clients should use this interface to verify they've been
+ * given the virtual address space they expect, and issue a (potentially
+ * fatal) warning message otherwise.
+ *
+ * Note that the returned size is a __hv64, not a __hv32, so it is
+ * possible to express a single range spanning the entire 32-bit
+ * address space.
+ */
+HV_VirtAddrRange hv_inquire_virtual(int idx);
+
+
+/** A range of ASID values. */
+typedef struct
+{
+  HV_ASID start;        /**< First ASID in the range. */
+  unsigned int size;    /**< Number of ASIDs. Zero for an invalid range. */
+} HV_ASIDRange;
+
+/** Returns information about a range of ASIDs.
+ *
+ * hv_inquire_asid() returns one of the ranges of address
+ * space identifiers which are available to this client.
+ *
+ * The first range is retrieved by specifying an idx of 0, and
+ * successive ranges are returned with subsequent idx values.  Ranges
+ * are ordered by increasing start value (i.e., as idx increases,
+ * so does start), do not overlap, and do not touch (i.e., the
+ * available ASIDs are described with the fewest possible ranges).
+ *
+ * If an out-of-range idx value is specified, the returned size will be zero.
+ * A client can count the number of ranges by increasing idx until the
+ * returned size is zero. There will always be at least one valid range.
+ */
+HV_ASIDRange hv_inquire_asid(int idx);
+
+
+/** Waits for at least the specified number of nanoseconds then returns.
+ *
+ * NOTE: this deprecated function currently assumes a 750 MHz clock,
+ * and is thus not generally suitable for use.  New code should call
+ * hv_sysconf(HV_SYSCONF_CPU_SPEED), compute a cycle count to wait for,
+ * and delay by looping while checking the cycle counter SPR.
+ *
+ * @param nanosecs The number of nanoseconds to sleep.
+ */
+void hv_nanosleep(int nanosecs);
+
+
+/** Reads a character from the console without blocking.
+ *
+ * @return A value from 0-255 indicates the value successfully read.
+ * A negative value means no value was ready.
+ */
+int hv_console_read_if_ready(void);
+
+
+/** Writes a character to the console, blocking if the console is busy.
+ *
+ *  This call cannot fail. If the console is broken for some reason,
+ *  output will simply vanish.
+ * @param byte Character to write.
+ */
+void hv_console_putc(int byte);
+
+
+/** Writes a string to the console, blocking if the console is busy.
+ * @param bytes Pointer to characters to write.
+ * @param len Number of characters to write.
+ * @return Number of characters written, or HV_EFAULT if the buffer is invalid.
+ */
+int hv_console_write(HV_VirtAddr bytes, int len);
+
+
+/** Dispatch the next interrupt from the client downcall mechanism.
+ *
+ *  The hypervisor uses downcalls to notify the client of asynchronous
+ *  events.  Some of these events are hypervisor-created (like incoming
+ *  messages).  Some are regular interrupts which initially occur in
+ *  the hypervisor, and are normally handled directly by the client;
+ *  when these occur in a client's interrupt critical section, they must
+ *  be delivered through the downcall mechanism.
+ *
+ *  A downcall is initially delivered to the client as an INTCTRL_CL
+ *  interrupt, where CL is the client's PL.  Upon entry to the INTCTRL_CL
+ *  vector, the client must immediately invoke the hv_downcall_dispatch
+ *  service.  This service will not return; instead it will cause one of
+ *  the client's actual downcall-handling interrupt vectors to be entered.
+ *  The EX_CONTEXT registers in the client will be set so that when the
+ *  client irets, it will return to the code which was interrupted by the
+ *  INTCTRL_CL interrupt.
+ *
+ *  Under some circumstances, the firing of INTCTRL_CL can race with
+ *  the lowering of a device interrupt.  In such a case, the
+ *  hv_downcall_dispatch service may issue an iret instruction instead
+ *  of entering one of the client's actual downcall-handling interrupt
+ *  vectors.  This will return execution to the location that was
+ *  interrupted by INTCTRL_CL.
+ *
+ *  Any saving of registers should be done by the actual handling
+ *  vectors; no registers should be changed by the INTCTRL_CL handler.
+ *  In particular, the client should not use a jal instruction to invoke
+ *  the hv_downcall_dispatch service, as that would overwrite the client's
+ *  lr register.  Note that the hv_downcall_dispatch service may overwrite
+ *  one or more of the client's system save registers.
+ *
+ *  The client must not modify the INTCTRL_CL_STATUS SPR.  The hypervisor
+ *  will set this register to cause a downcall to happen, and will clear
+ *  it when no further downcalls are pending.
+ *
+ *  When a downcall vector is entered, the INTCTRL_CL interrupt will be
+ *  masked.  When the client is done processing a downcall, and is ready
+ *  to accept another, it must unmask this interrupt; if more downcalls
+ *  are pending, this will cause the INTCTRL_CL vector to be reentered.
+ *  Currently the following interrupt vectors can be entered through a
+ *  downcall:
+ *
+ *  INT_MESSAGE_RCV_DWNCL   (hypervisor message available)
+ *  INT_DEV_INTR_DWNCL      (device interrupt)
+ *  INT_DMATLB_MISS_DWNCL   (DMA TLB miss)
+ *  INT_SNITLB_MISS_DWNCL   (SNI TLB miss)
+ *  INT_DMATLB_ACCESS_DWNCL (DMA TLB access violation)
+ */
+void hv_downcall_dispatch(void);
+
+#endif /* !__ASSEMBLER__ */
+
+/** We use actual interrupt vectors which never occur (they're only there
+ *  to allow setting MPLs for related SPRs) for our downcall vectors.
+ */
+/** Message receive downcall interrupt vector */
+#define INT_MESSAGE_RCV_DWNCL    INT_BOOT_ACCESS
+/** DMA TLB miss downcall interrupt vector */
+#define INT_DMATLB_MISS_DWNCL    INT_DMA_ASID
+/** Static nework processor instruction TLB miss interrupt vector */
+#define INT_SNITLB_MISS_DWNCL    INT_SNI_ASID
+/** DMA TLB access violation downcall interrupt vector */
+#define INT_DMATLB_ACCESS_DWNCL  INT_DMA_CPL
+/** Device interrupt downcall interrupt vector */
+#define INT_DEV_INTR_DWNCL       INT_WORLD_ACCESS
+
+#ifndef __ASSEMBLER__
+
+/** Requests the inode for a specific full pathname.
+ *
+ * Performs a lookup in the hypervisor filesystem for a given filename.
+ * Multiple calls with the same filename will always return the same inode.
+ * If there is no such filename, HV_ENOENT is returned.
+ * A bad filename pointer may result in HV_EFAULT instead.
+ *
+ * @param filename Constant pointer to name of requested file
+ * @return Inode of requested file
+ */
+int hv_fs_findfile(HV_VirtAddr filename);
+
+
+/** Data returned from an fstat request.
+ * Note that this structure should be no more than 40 bytes in size so
+ * that it can always be returned completely in registers.
+ */
+typedef struct
+{
+  int size;             /**< Size of file (or HV_Errno on error) */
+  unsigned int flags;   /**< Flags (see HV_FS_FSTAT_FLAGS) */
+} HV_FS_StatInfo;
+
+/** Bitmask flags for fstat request */
+typedef enum
+{
+  HV_FS_ISDIR    = 0x0001   /**< Is the entry a directory? */
+} HV_FS_FSTAT_FLAGS;
+
+/** Get stat information on a given file inode.
+ *
+ * Return information on the file with the given inode.
+ *
+ * IF the HV_FS_ISDIR bit is set, the "file" is a directory.  Reading
+ * it will return NUL-separated filenames (no directory part) relative
+ * to the path to the inode of the directory "file".  These can be
+ * appended to the path to the directory "file" after a forward slash
+ * to create additional filenames.  Note that it is not required
+ * that all valid paths be decomposable into valid parent directories;
+ * a filesystem may validly have just a few files, none of which have
+ * HV_FS_ISDIR set.  However, if clients may wish to enumerate the
+ * files in the filesystem, it is recommended to include all the
+ * appropriate parent directory "files" to give a consistent view.
+ *
+ * An invalid file inode will cause an HV_EBADF error to be returned.
+ *
+ * @param inode The inode number of the query
+ * @return An HV_FS_StatInfo structure
+ */
+HV_FS_StatInfo hv_fs_fstat(int inode);
+
+
+/** Read data from a specific hypervisor file.
+ * On error, may return HV_EBADF for a bad inode or HV_EFAULT for a bad buf.
+ * Reads near the end of the file will return fewer bytes than requested.
+ * Reads at or beyond the end of a file will return zero.
+ *
+ * @param inode the hypervisor file to read
+ * @param buf the buffer to read data into
+ * @param length the number of bytes of data to read
+ * @param offset the offset into the file to read the data from
+ * @return number of bytes successfully read, or an HV_Errno code
+ */
+int hv_fs_pread(int inode, HV_VirtAddr buf, int length, int offset);
+
+
+/** Read a 64-bit word from the specified physical address.
+ * The address must be 8-byte aligned.
+ * Specifying an invalid physical address will lead to client termination.
+ * @param addr The physical address to read
+ * @param access The PTE describing how to read the memory
+ * @return The 64-bit value read from the given address
+ */
+unsigned long long hv_physaddr_read64(HV_PhysAddr addr, HV_PTE access);
+
+
+/** Write a 64-bit word to the specified physical address.
+ * The address must be 8-byte aligned.
+ * Specifying an invalid physical address will lead to client termination.
+ * @param addr The physical address to write
+ * @param access The PTE that says how to write the memory
+ * @param val The 64-bit value to write to the given address
+ */
+void hv_physaddr_write64(HV_PhysAddr addr, HV_PTE access,
+                         unsigned long long val);
+
+
+/** Get the value of the command-line for the supervisor, if any.
+ * This will not include the filename of the booted supervisor, but may
+ * include configured-in boot arguments or the hv_restart() arguments.
+ * If the buffer is not long enough the hypervisor will NUL the first
+ * character of the buffer but not write any other data.
+ * @param buf The virtual address to write the command-line string to.
+ * @param length The length of buf, in characters.
+ * @return The actual length of the command line, including the trailing NUL
+ *         (may be larger than "length").
+ */
+int hv_get_command_line(HV_VirtAddr buf, int length);
+
+
+/** Set a new value for the command-line for the supervisor, which will
+ *  be returned from subsequent invocations of hv_get_command_line() on
+ *  this tile.
+ * @param buf The virtual address to read the command-line string from.
+ * @param length The length of buf, in characters; must be no more than
+ *        HV_COMMAND_LINE_LEN.
+ * @return Zero if successful, or a hypervisor error code.
+ */
+HV_Errno hv_set_command_line(HV_VirtAddr buf, int length);
+
+/** Maximum size of a command line passed to hv_set_command_line(); note
+ *  that a line returned from hv_get_command_line() could be larger than
+ *  this.*/
+#define HV_COMMAND_LINE_LEN  256
+
+/** Tell the hypervisor how to cache non-priority pages
+ * (its own as well as pages explicitly represented in page tables).
+ * Normally these will be represented as red/black pages, but
+ * when the supervisor starts to allocate "priority" pages in the PTE
+ * the hypervisor will need to start marking those pages as (e.g.) "red"
+ * and non-priority pages as either "black" (if they cache-alias
+ * with the existing priority pages) or "red/black" (if they don't).
+ * The bitmask provides information on which parts of the cache
+ * have been used for pinned pages so far on this tile; if (1 << N)
+ * appears in the bitmask, that indicates that a page has been marked
+ * "priority" whose PFN equals N, mod 8.
+ * @param bitmask A bitmap of priority page set values
+ */
+void hv_set_caching(unsigned int bitmask);
+
+
+/** Zero out a specified number of pages.
+ * The va and size must both be multiples of 4096.
+ * Caches are bypassed and memory is directly set to zero.
+ * This API is implemented only in the magic hypervisor and is intended
+ * to provide a performance boost to the minimal supervisor by
+ * giving it a fast way to zero memory pages when allocating them.
+ * @param va Virtual address where the page has been mapped
+ * @param size Number of bytes (must be a page size multiple)
+ */
+void hv_bzero_page(HV_VirtAddr va, unsigned int size);
+
+
+/** State object for the hypervisor messaging subsystem. */
+typedef struct
+{
+#if CHIP_VA_WIDTH() > 32
+  __hv64 opaque[2]; /**< No user-serviceable parts inside */
+#else
+  __hv32 opaque[2]; /**< No user-serviceable parts inside */
+#endif
+}
+HV_MsgState;
+
+/** Register to receive incoming messages.
+ *
+ *  This routine configures the current tile so that it can receive
+ *  incoming messages.  It must be called before the client can receive
+ *  messages with the hv_receive_message routine, and must be called on
+ *  each tile which will receive messages.
+ *
+ *  msgstate is the virtual address of a state object of type HV_MsgState.
+ *  Once the state is registered, the client must not read or write the
+ *  state object; doing so will cause undefined results.
+ *
+ *  If this routine is called with msgstate set to 0, the client's message
+ *  state will be freed and it will no longer be able to receive messages.
+ *  Note that this may cause the loss of any as-yet-undelivered messages
+ *  for the client.
+ *
+ *  If another client attempts to send a message to a client which has
+ *  not yet called hv_register_message_state, or which has freed its
+ *  message state, the message will not be delivered, as if the client
+ *  had insufficient buffering.
+ *
+ *  This routine returns HV_OK if the registration was successful, and
+ *  HV_EINVAL if the supplied state object is unsuitable.  Note that some
+ *  errors may not be detected during this routine, but might be detected
+ *  during a subsequent message delivery.
+ * @param msgstate State object.
+ **/
+HV_Errno hv_register_message_state(HV_MsgState* msgstate);
+
+/** Possible message recipient states. */
+typedef enum
+{
+  HV_TO_BE_SENT,    /**< Not sent (not attempted, or recipient not ready) */
+  HV_SENT,          /**< Successfully sent */
+  HV_BAD_RECIP      /**< Bad recipient coordinates (permanent error) */
+} HV_Recip_State;
+
+/** Message recipient. */
+typedef struct
+{
+  /** X coordinate, relative to supervisor's top-left coordinate */
+  unsigned int x:11;
+
+  /** Y coordinate, relative to supervisor's top-left coordinate */
+  unsigned int y:11;
+
+  /** Status of this recipient */
+  HV_Recip_State state:10;
+} HV_Recipient;
+
+/** Send a message to a set of recipients.
+ *
+ *  This routine sends a message to a set of recipients.
+ *
+ *  recips is an array of HV_Recipient structures.  Each specifies a tile,
+ *  and a message state; initially, it is expected that the state will
+ *  be set to HV_TO_BE_SENT.  nrecip specifies the number of recipients
+ *  in the recips array.
+ *
+ *  For each recipient whose state is HV_TO_BE_SENT, the hypervisor attempts
+ *  to send that tile the specified message.  In order to successfully
+ *  receive the message, the receiver must be a valid tile to which the
+ *  sender has access, must not be the sending tile itself, and must have
+ *  sufficient free buffer space.  (The hypervisor guarantees that each
+ *  tile which has called hv_register_message_state() will be able to
+ *  buffer one message from every other tile which can legally send to it;
+ *  more space may be provided but is not guaranteed.)  If an invalid tile
+ *  is specified, the recipient's state is set to HV_BAD_RECIP; this is a
+ *  permanent delivery error.  If the message is successfully delivered
+ *  to the recipient's buffer, the recipient's state is set to HV_SENT.
+ *  Otherwise, the recipient's state is unchanged.  Message delivery is
+ *  synchronous; all attempts to send messages are completed before this
+ *  routine returns.
+ *
+ *  If no permanent delivery errors were encountered, the routine returns
+ *  the number of messages successfully sent: that is, the number of
+ *  recipients whose states changed from HV_TO_BE_SENT to HV_SENT during
+ *  this operation.  If any permanent delivery errors were encountered,
+ *  the routine returns HV_ERECIP.  In the event of permanent delivery
+ *  errors, it may be the case that delivery was not attempted to all
+ *  recipients; if any messages were successfully delivered, however,
+ *  recipients' state values will be updated appropriately.
+ *
+ *  It is explicitly legal to specify a recipient structure whose state
+ *  is not HV_TO_BE_SENT; such a recipient is ignored.  One suggested way
+ *  of using hv_send_message to send a message to multiple tiles is to set
+ *  up a list of recipients, and then call the routine repeatedly with the
+ *  same list, each time accumulating the number of messages successfully
+ *  sent, until all messages are sent, a permanent error is encountered,
+ *  or the desired number of attempts have been made.  When used in this
+ *  way, the routine will deliver each message no more than once to each
+ *  recipient.
+ *
+ *  Note that a message being successfully delivered to the recipient's
+ *  buffer space does not guarantee that it is received by the recipient,
+ *  either immediately or at any time in the future; the recipient might
+ *  never call hv_receive_message, or could register a different state
+ *  buffer, losing the message.
+ *
+ *  Specifying the same recipient more than once in the recipient list
+ *  is an error, which will not result in an error return but which may
+ *  or may not result in more than one message being delivered to the
+ *  recipient tile.
+ *
+ *  buf and buflen specify the message to be sent.  buf is a virtual address
+ *  which must be currently mapped in the client's page table; if not, the
+ *  routine returns HV_EFAULT.  buflen must be greater than zero and less
+ *  than or equal to HV_MAX_MESSAGE_SIZE, and nrecip must be less than the
+ *  number of tiles to which the sender has access; if not, the routine
+ *  returns HV_EINVAL.
+ * @param recips List of recipients.
+ * @param nrecip Number of recipients.
+ * @param buf Address of message data.
+ * @param buflen Length of message data.
+ **/
+int hv_send_message(HV_Recipient *recips, int nrecip,
+                    HV_VirtAddr buf, int buflen);
+
+/** Maximum hypervisor message size, in bytes */
+#define HV_MAX_MESSAGE_SIZE 28
+
+
+/** Return value from hv_receive_message() */
+typedef struct
+{
+  int msglen;     /**< Message length in bytes, or an error code */
+  __hv32 source;  /**< Code identifying message sender (HV_MSG_xxx) */
+} HV_RcvMsgInfo;
+
+#define HV_MSG_TILE 0x0         /**< Message source is another tile */
+#define HV_MSG_INTR 0x1         /**< Message source is a driver interrupt */
+
+/** Receive a message.
+ *
+ * This routine retrieves a message from the client's incoming message
+ * buffer.
+ *
+ * Multiple messages sent from a particular sending tile to a particular
+ * receiving tile are received in the order that they were sent; however,
+ * no ordering is guaranteed between messages sent by different tiles.
+ *
+ * Whenever the a client's message buffer is empty, the first message
+ * subsequently received will cause the client's MESSAGE_RCV_DWNCL
+ * interrupt vector to be invoked through the interrupt downcall mechanism
+ * (see the description of the hv_downcall_dispatch() routine for details
+ * on downcalls).
+ *
+ * Another message-available downcall will not occur until a call to
+ * this routine is made when the message buffer is empty, and a message
+ * subsequently arrives.  Note that such a downcall could occur while
+ * this routine is executing.  If the calling code does not wish this
+ * to happen, it is recommended that this routine be called with the
+ * INTCTRL_1 interrupt masked, or inside an interrupt critical section.
+ *
+ * msgstate is the value previously passed to hv_register_message_state().
+ * buf is the virtual address of the buffer into which the message will
+ * be written; buflen is the length of the buffer.
+ *
+ * This routine returns an HV_RcvMsgInfo structure.  The msglen member
+ * of that structure is the length of the message received, zero if no
+ * message is available, or HV_E2BIG if the message is too large for the
+ * specified buffer.  If the message is too large, it is not consumed,
+ * and may be retrieved by a subsequent call to this routine specifying
+ * a sufficiently large buffer.  A buffer which is HV_MAX_MESSAGE_SIZE
+ * bytes long is guaranteed to be able to receive any possible message.
+ *
+ * The source member of the HV_RcvMsgInfo structure describes the sender
+ * of the message.  For messages sent by another client tile via an
+ * hv_send_message() call, this value is HV_MSG_TILE; for messages sent
+ * as a result of a device interrupt, this value is HV_MSG_INTR.
+ */
+
+HV_RcvMsgInfo hv_receive_message(HV_MsgState msgstate, HV_VirtAddr buf,
+                                 int buflen);
+
+
+/** Start remaining tiles owned by this supervisor.  Initially, only one tile
+ *  executes the client program; after it calls this service, the other tiles
+ *  are started.  This allows the initial tile to do one-time configuration
+ *  of shared data structures without having to lock them against simultaneous
+ *  access.
+ */
+void hv_start_all_tiles(void);
+
+
+/** Open a hypervisor device.
+ *
+ *  This service initializes an I/O device and its hypervisor driver software,
+ *  and makes it available for use.  The open operation is per-device per-chip;
+ *  once it has been performed, the device handle returned may be used in other
+ *  device services calls made by any tile.
+ *
+ * @param name Name of the device.  A base device name is just a text string
+ *        (say, "pcie").  If there is more than one instance of a device, the
+ *        base name is followed by a slash and a device number (say, "pcie/0").
+ *        Some devices may support further structure beneath those components;
+ *        most notably, devices which require control operations do so by
+ *        supporting reads and/or writes to a control device whose name
+ *        includes a trailing "/ctl" (say, "pcie/0/ctl").
+ * @param flags Flags (HV_DEV_xxx).
+ * @return A positive integer device handle, or a negative error code.
+ */
+int hv_dev_open(HV_VirtAddr name, __hv32 flags);
+
+
+/** Close a hypervisor device.
+ *
+ *  This service uninitializes an I/O device and its hypervisor driver
+ *  software, and makes it unavailable for use.  The close operation is
+ *  per-device per-chip; once it has been performed, the device is no longer
+ *  available.  Normally there is no need to ever call the close service.
+ *
+ * @param devhdl Device handle of the device to be closed.
+ * @return Zero if the close is successful, otherwise, a negative error code.
+ */
+int hv_dev_close(int devhdl);
+
+
+/** Read data from a hypervisor device synchronously.
+ *
+ *  This service transfers data from a hypervisor device to a memory buffer.
+ *  When the service returns, the data has been written from the memory buffer,
+ *  and the buffer will not be further modified by the driver.
+ *
+ *  No ordering is guaranteed between requests issued from different tiles.
+ *
+ *  Devices may choose to support both the synchronous and asynchronous read
+ *  operations, only one of them, or neither of them.
+ *
+ * @param devhdl Device handle of the device to be read from.
+ * @param flags Flags (HV_DEV_xxx).
+ * @param va Virtual address of the target data buffer.  This buffer must
+ *        be mapped in the currently installed page table; if not, HV_EFAULT
+ *        may be returned.
+ * @param len Number of bytes to be transferred.
+ * @param offset Driver-dependent offset.  For a random-access device, this is
+ *        often a byte offset from the beginning of the device; in other cases,
+ *        like on a control device, it may have a different meaning.
+ * @return A non-negative value if the read was at least partially successful;
+ *         otherwise, a negative error code.  The precise interpretation of
+ *         the return value is driver-dependent, but many drivers will return
+ *         the number of bytes successfully transferred.
+ */
+int hv_dev_pread(int devhdl, __hv32 flags, HV_VirtAddr va, __hv32 len,
+                 __hv64 offset);
+
+#define HV_DEV_NB_EMPTY     0x1   /**< Don't block when no bytes of data can
+                                       be transferred. */
+#define HV_DEV_NB_PARTIAL   0x2   /**< Don't block when some bytes, but not all
+                                       of the requested bytes, can be
+                                       transferred. */
+#define HV_DEV_NOCACHE      0x4   /**< The caller warrants that none of the
+                                       cache lines which might contain data
+                                       from the requested buffer are valid.
+                                       Useful with asynchronous operations
+                                       only. */
+
+#define HV_DEV_ALLFLAGS     (HV_DEV_NB_EMPTY | HV_DEV_NB_PARTIAL | \
+                             HV_DEV_NOCACHE)   /**< All HV_DEV_xxx flags */
+
+/** Write data to a hypervisor device synchronously.
+ *
+ *  This service transfers data from a memory buffer to a hypervisor device.
+ *  When the service returns, the data has been read from the memory buffer,
+ *  and the buffer may be overwritten by the client; the data may not
+ *  necessarily have been conveyed to the actual hardware I/O interface.
+ *
+ *  No ordering is guaranteed between requests issued from different tiles.
+ *
+ *  Devices may choose to support both the synchronous and asynchronous write
+ *  operations, only one of them, or neither of them.
+ *
+ * @param devhdl Device handle of the device to be written to.
+ * @param flags Flags (HV_DEV_xxx).
+ * @param va Virtual address of the source data buffer.  This buffer must
+ *        be mapped in the currently installed page table; if not, HV_EFAULT
+ *        may be returned.
+ * @param len Number of bytes to be transferred.
+ * @param offset Driver-dependent offset.  For a random-access device, this is
+ *        often a byte offset from the beginning of the device; in other cases,
+ *        like on a control device, it may have a different meaning.
+ * @return A non-negative value if the write was at least partially successful;
+ *         otherwise, a negative error code.  The precise interpretation of
+ *         the return value is driver-dependent, but many drivers will return
+ *         the number of bytes successfully transferred.
+ */
+int hv_dev_pwrite(int devhdl, __hv32 flags, HV_VirtAddr va, __hv32 len,
+                  __hv64 offset);
+
+
+/** Interrupt arguments, used in the asynchronous I/O interfaces. */
+#if CHIP_VA_WIDTH() > 32
+typedef __hv64 HV_IntArg;
+#else
+typedef __hv32 HV_IntArg;
+#endif
+
+/** Interrupt messages are delivered via the mechanism as normal messages,
+ *  but have a message source of HV_DEV_INTR.  The message is formatted
+ *  as an HV_IntrMsg structure.
+ */
+
+typedef struct
+{
+  HV_IntArg intarg;  /**< Interrupt argument, passed to the poll/preada/pwritea
+                          services */
+  HV_IntArg intdata; /**< Interrupt-specific interrupt data */
+} HV_IntrMsg;
+
+/** Request an interrupt message when a device condition is satisfied.
+ *
+ *  This service requests that an interrupt message be delivered to the
+ *  requesting tile when a device becomes readable or writable, or when any
+ *  data queued to the device via previous write operations from this tile
+ *  has been actually sent out on the hardware I/O interface.  Devices may
+ *  choose to support any, all, or none of the available conditions.
+ *
+ *  If multiple conditions are specified, only one message will be
+ *  delivered.  If the event mask delivered to that interrupt handler
+ *  indicates that some of the conditions have not yet occurred, the
+ *  client must issue another poll() call if it wishes to wait for those
+ *  conditions.
+ *
+ *  Only one poll may be outstanding per device handle per tile.  If more than
+ *  one tile is polling on the same device and condition, they will all be
+ *  notified when it happens.  Because of this, clients may not assume that
+ *  the condition signaled is necessarily still true when they request a
+ *  subsequent service; for instance, the readable data which caused the
+ *  poll call to interrupt may have been read by another tile in the interim.
+ *
+ *  The notification interrupt message could come directly, or via the
+ *  downcall (intctrl1) method, depending on what the tile is doing
+ *  when the condition is satisfied.  Note that it is possible for the
+ *  requested interrupt to be delivered after this service is called but
+ *  before it returns.
+ *
+ * @param devhdl Device handle of the device to be polled.
+ * @param events Flags denoting the events which will cause the interrupt to
+ *        be delivered (HV_DEVPOLL_xxx).
+ * @param intarg Value which will be delivered as the intarg member of the
+ *        eventual interrupt message; the intdata member will be set to a
+ *        mask of HV_DEVPOLL_xxx values indicating which conditions have been
+ *        satisifed.
+ * @return Zero if the interrupt was successfully scheduled; otherwise, a
+ *         negative error code.
+ */
+int hv_dev_poll(int devhdl, __hv32 events, HV_IntArg intarg);
+
+#define HV_DEVPOLL_READ     0x1   /**< Test device for readability */
+#define HV_DEVPOLL_WRITE    0x2   /**< Test device for writability */
+#define HV_DEVPOLL_FLUSH    0x4   /**< Test device for output drained */
+
+
+/** Cancel a request for an interrupt when a device event occurs.
+ *
+ *  This service requests that no interrupt be delivered when the events
+ *  noted in the last-issued poll() call happen.  Once this service returns,
+ *  the interrupt has been canceled; however, it is possible for the interrupt
+ *  to be delivered after this service is called but before it returns.
+ *
+ * @param devhdl Device handle of the device on which to cancel polling.
+ * @return Zero if the poll was successfully canceled; otherwise, a negative
+ *         error code.
+ */
+int hv_dev_poll_cancel(int devhdl);
+
+
+/** Scatter-gather list for preada/pwritea calls. */
+typedef struct
+#if CHIP_VA_WIDTH() <= 32
+__attribute__ ((packed, aligned(4)))
+#endif
+{
+  HV_PhysAddr pa;  /**< Client physical address of the buffer segment. */
+  HV_PTE pte;      /**< Page table entry describing the caching and location
+                        override characteristics of the buffer segment.  Some
+                        drivers ignore this element and will require that
+                        the NOCACHE flag be set on their requests. */
+  __hv32 len;      /**< Length of the buffer segment. */
+} HV_SGL;
+
+#define HV_SGL_MAXLEN 16  /**< Maximum number of entries in a scatter-gather
+                               list */
+
+/** Read data from a hypervisor device asynchronously.
+ *
+ *  This service transfers data from a hypervisor device to a memory buffer.
+ *  When the service returns, the read has been scheduled.  When the read
+ *  completes, an interrupt message will be delivered, and the buffer will
+ *  not be further modified by the driver.
+ *
+ *  The number of possible outstanding asynchronous requests is defined by
+ *  each driver, but it is recommended that it be at least two requests
+ *  per tile per device.
+ *
+ *  No ordering is guaranteed between synchronous and asynchronous requests,
+ *  even those issued on the same tile.
+ *
+ *  The completion interrupt message could come directly, or via the downcall
+ *  (intctrl1) method, depending on what the tile is doing when the read
+ *  completes.  Interrupts do not coalesce; one is delivered for each
+ *  asynchronous I/O request.  Note that it is possible for the requested
+ *  interrupt to be delivered after this service is called but before it
+ *  returns.
+ *
+ *  Devices may choose to support both the synchronous and asynchronous read
+ *  operations, only one of them, or neither of them.
+ *
+ * @param devhdl Device handle of the device to be read from.
+ * @param flags Flags (HV_DEV_xxx).
+ * @param sgl_len Number of elements in the scatter-gather list.
+ * @param sgl Scatter-gather list describing the memory to which data will be
+ *        written.
+ * @param offset Driver-dependent offset.  For a random-access device, this is
+ *        often a byte offset from the beginning of the device; in other cases,
+ *        like on a control device, it may have a different meaning.
+ * @param intarg Value which will be delivered as the intarg member of the
+ *        eventual interrupt message; the intdata member will be set to the
+ *        normal return value from the read request.
+ * @return Zero if the read was successfully scheduled; otherwise, a negative
+ *         error code.  Note that some drivers may choose to pre-validate
+ *         their arguments, and may thus detect certain device error
+ *         conditions at this time rather than when the completion notification
+ *         occurs, but this is not required.
+ */
+int hv_dev_preada(int devhdl, __hv32 flags, __hv32 sgl_len,
+                  HV_SGL sgl[/* sgl_len */], __hv64 offset, HV_IntArg intarg);
+
+
+/** Write data to a hypervisor device asynchronously.
+ *
+ *  This service transfers data from a memory buffer to a hypervisor
+ *  device.  When the service returns, the write has been scheduled.
+ *  When the write completes, an interrupt message will be delivered,
+ *  and the buffer may be overwritten by the client; the data may not
+ *  necessarily have been conveyed to the actual hardware I/O interface.
+ *
+ *  The number of possible outstanding asynchronous requests is defined by
+ *  each driver, but it is recommended that it be at least two requests
+ *  per tile per device.
+ *
+ *  No ordering is guaranteed between synchronous and asynchronous requests,
+ *  even those issued on the same tile.
+ *
+ *  The completion interrupt message could come directly, or via the downcall
+ *  (intctrl1) method, depending on what the tile is doing when the read
+ *  completes.  Interrupts do not coalesce; one is delivered for each
+ *  asynchronous I/O request.  Note that it is possible for the requested
+ *  interrupt to be delivered after this service is called but before it
+ *  returns.
+ *
+ *  Devices may choose to support both the synchronous and asynchronous write
+ *  operations, only one of them, or neither of them.
+ *
+ * @param devhdl Device handle of the device to be read from.
+ * @param flags Flags (HV_DEV_xxx).
+ * @param sgl_len Number of elements in the scatter-gather list.
+ * @param sgl Scatter-gather list describing the memory from which data will be
+ *        read.
+ * @param offset Driver-dependent offset.  For a random-access device, this is
+ *        often a byte offset from the beginning of the device; in other cases,
+ *        like on a control device, it may have a different meaning.
+ * @param intarg Value which will be delivered as the intarg member of the
+ *        eventual interrupt message; the intdata member will be set to the
+ *        normal return value from the write request.
+ * @return Zero if the write was successfully scheduled; otherwise, a negative
+ *         error code.  Note that some drivers may choose to pre-validate
+ *         their arguments, and may thus detect certain device error
+ *         conditions at this time rather than when the completion notification
+ *         occurs, but this is not required.
+ */
+int hv_dev_pwritea(int devhdl, __hv32 flags, __hv32 sgl_len,
+                   HV_SGL sgl[/* sgl_len */], __hv64 offset, HV_IntArg intarg);
+
+
+/** Define a pair of tile and ASID to identify a user process context. */
+typedef struct
+{
+  /** X coordinate, relative to supervisor's top-left coordinate */
+  unsigned int x:11;
+
+  /** Y coordinate, relative to supervisor's top-left coordinate */
+  unsigned int y:11;
+
+  /** ASID of the process on this x,y tile */
+  HV_ASID asid:10;
+} HV_Remote_ASID;
+
+/** Flush cache and/or TLB state on remote tiles.
+ *
+ * @param cache_pa Client physical address to flush from cache (ignored if
+ *        the length encoded in cache_control is zero, or if
+ *        HV_FLUSH_EVICT_L2 is set, or if cache_cpumask is NULL).
+ * @param cache_control This argument allows you to specify a length of
+ *        physical address space to flush (maximum HV_FLUSH_MAX_CACHE_LEN).
+ *        You can "or" in HV_FLUSH_EVICT_L2 to flush the whole L2 cache.
+ *        You can "or" in HV_FLUSH_EVICT_L1I to flush the whole L1I cache.
+ *        HV_FLUSH_ALL flushes all caches.
+ * @param cache_cpumask Bitmask (in row-major order, supervisor-relative) of
+ *        tile indices to perform cache flush on.  The low bit of the first
+ *        word corresponds to the tile at the upper left-hand corner of the
+ *        supervisor's rectangle.  If passed as a NULL pointer, equivalent
+ *        to an empty bitmask.  On chips which support hash-for-home caching,
+ *        if passed as -1, equivalent to a mask containing tiles which could
+ *        be doing hash-for-home caching.
+ * @param tlb_va Virtual address to flush from TLB (ignored if
+ *        tlb_length is zero or tlb_cpumask is NULL).
+ * @param tlb_length Number of bytes of data to flush from the TLB.
+ * @param tlb_pgsize Page size to use for TLB flushes.
+ *        tlb_va and tlb_length need not be aligned to this size.
+ * @param tlb_cpumask Bitmask for tlb flush, like cache_cpumask.
+ *        If passed as a NULL pointer, equivalent to an empty bitmask.
+ * @param asids Pointer to an HV_Remote_ASID array of tile/ASID pairs to flush.
+ * @param asidcount Number of HV_Remote_ASID entries in asids[].
+ * @return Zero for success, or else HV_EINVAL or HV_EFAULT for errors that
+ *        are detected while parsing the arguments.
+ */
+int hv_flush_remote(HV_PhysAddr cache_pa, unsigned long cache_control,
+                    unsigned long* cache_cpumask,
+                    HV_VirtAddr tlb_va, unsigned long tlb_length,
+                    unsigned long tlb_pgsize, unsigned long* tlb_cpumask,
+                    HV_Remote_ASID* asids, int asidcount);
+
+/** Include in cache_control to ensure a flush of the entire L2. */
+#define HV_FLUSH_EVICT_L2 (1UL << 31)
+
+/** Include in cache_control to ensure a flush of the entire L1I. */
+#define HV_FLUSH_EVICT_L1I (1UL << 30)
+
+/** Maximum legal size to use for the "length" component of cache_control. */
+#define HV_FLUSH_MAX_CACHE_LEN ((1UL << 30) - 1)
+
+/** Use for cache_control to ensure a flush of all caches. */
+#define HV_FLUSH_ALL -1UL
+
+#else   /* __ASSEMBLER__ */
+
+/** Include in cache_control to ensure a flush of the entire L2. */
+#define HV_FLUSH_EVICT_L2 (1 << 31)
+
+/** Include in cache_control to ensure a flush of the entire L1I. */
+#define HV_FLUSH_EVICT_L1I (1 << 30)
+
+/** Maximum legal size to use for the "length" component of cache_control. */
+#define HV_FLUSH_MAX_CACHE_LEN ((1 << 30) - 1)
+
+/** Use for cache_control to ensure a flush of all caches. */
+#define HV_FLUSH_ALL -1
+
+#endif  /* __ASSEMBLER__ */
+
+#ifndef __ASSEMBLER__
+
+/** Return a 64-bit value corresponding to the PTE if needed */
+#define hv_pte_val(pte) ((pte).val)
+
+/** Cast a 64-bit value to an HV_PTE */
+#define hv_pte(val) ((HV_PTE) { val })
+
+#endif  /* !__ASSEMBLER__ */
+
+
+/** Bits in the size of an HV_PTE */
+#define HV_LOG2_PTE_SIZE 3
+
+/** Size of an HV_PTE */
+#define HV_PTE_SIZE (1 << HV_LOG2_PTE_SIZE)
+
+
+/* Bits in HV_PTE's low word. */
+#define HV_PTE_INDEX_PRESENT          0  /**< PTE is valid */
+#define HV_PTE_INDEX_MIGRATING        1  /**< Page is migrating */
+#define HV_PTE_INDEX_CLIENT0          2  /**< Page client state 0 */
+#define HV_PTE_INDEX_CLIENT1          3  /**< Page client state 1 */
+#define HV_PTE_INDEX_NC               4  /**< L1$/L2$ incoherent with L3$ */
+#define HV_PTE_INDEX_NO_ALLOC_L1      5  /**< Page is uncached in local L1$ */
+#define HV_PTE_INDEX_NO_ALLOC_L2      6  /**< Page is uncached in local L2$ */
+#define HV_PTE_INDEX_CACHED_PRIORITY  7  /**< Page is priority cached */
+#define HV_PTE_INDEX_PAGE             8  /**< PTE describes a page */
+#define HV_PTE_INDEX_GLOBAL           9  /**< Page is global */
+#define HV_PTE_INDEX_USER            10  /**< Page is user-accessible */
+#define HV_PTE_INDEX_ACCESSED        11  /**< Page has been accessed */
+#define HV_PTE_INDEX_DIRTY           12  /**< Page has been written */
+                                         /*   Bits 13-15 are reserved for
+                                              future use. */
+#define HV_PTE_INDEX_MODE            16  /**< Page mode; see HV_PTE_MODE_xxx */
+#define HV_PTE_MODE_BITS              3  /**< Number of bits in mode */
+                                         /*   Bit 19 is reserved for
+                                              future use. */
+#define HV_PTE_INDEX_LOTAR           20  /**< Page's LOTAR; must be high bits
+                                              of word */
+#define HV_PTE_LOTAR_BITS            12  /**< Number of bits in a LOTAR */
+
+/* Bits in HV_PTE's high word. */
+#define HV_PTE_INDEX_READABLE        32  /**< Page is readable */
+#define HV_PTE_INDEX_WRITABLE        33  /**< Page is writable */
+#define HV_PTE_INDEX_EXECUTABLE      34  /**< Page is executable */
+#define HV_PTE_INDEX_PTFN            35  /**< Page's PTFN; must be high bits
+                                              of word */
+#define HV_PTE_PTFN_BITS             29  /**< Number of bits in a PTFN */
+
+/** Position of the PFN field within the PTE (subset of the PTFN). */
+#define HV_PTE_INDEX_PFN (HV_PTE_INDEX_PTFN + (HV_LOG2_PAGE_SIZE_SMALL - \
+                                               HV_LOG2_PAGE_TABLE_ALIGN))
+
+/** Length of the PFN field within the PTE (subset of the PTFN). */
+#define HV_PTE_INDEX_PFN_BITS (HV_PTE_INDEX_PTFN_BITS - \
+                               (HV_LOG2_PAGE_SIZE_SMALL - \
+                                HV_LOG2_PAGE_TABLE_ALIGN))
+
+/*
+ * Legal values for the PTE's mode field
+ */
+/** Data is not resident in any caches; loads and stores access memory
+ *  directly.
+ */
+#define HV_PTE_MODE_UNCACHED          1
+
+/** Data is resident in the tile's local L1 and/or L2 caches; if a load
+ *  or store misses there, it goes to memory.
+ *
+ *  The copy in the local L1$/L2$ is not invalidated when the copy in
+ *  memory is changed.
+ */
+#define HV_PTE_MODE_CACHE_NO_L3       2
+
+/** Data is resident in the tile's local L1 and/or L2 caches.  If a load
+ *  or store misses there, it goes to an L3 cache in a designated tile;
+ *  if it misses there, it goes to memory.
+ *
+ *  If the NC bit is not set, the copy in the local L1$/L2$ is invalidated
+ *  when the copy in the remote L3$ is changed.  Otherwise, such
+ *  invalidation will not occur.
+ *
+ *  Chips for which CHIP_HAS_COHERENT_LOCAL_CACHE() is 0 do not support
+ *  invalidation from an L3$ to another tile's L1$/L2$.  If the NC bit is
+ *  clear on such a chip, no copy is kept in the local L1$/L2$ in this mode.
+ */
+#define HV_PTE_MODE_CACHE_TILE_L3     3
+
+/** Data is resident in the tile's local L1 and/or L2 caches.  If a load
+ *  or store misses there, it goes to an L3 cache in one of a set of
+ *  designated tiles; if it misses there, it goes to memory.  Which tile
+ *  is chosen from the set depends upon a hash function applied to the
+ *  physical address.  This mode is not supported on chips for which
+ *  CHIP_HAS_CBOX_HOME_MAP() is 0.
+ *
+ *  If the NC bit is not set, the copy in the local L1$/L2$ is invalidated
+ *  when the copy in the remote L3$ is changed.  Otherwise, such
+ *  invalidation will not occur.
+ *
+ *  Chips for which CHIP_HAS_COHERENT_LOCAL_CACHE() is 0 do not support
+ *  invalidation from an L3$ to another tile's L1$/L2$.  If the NC bit is
+ *  clear on such a chip, no copy is kept in the local L1$/L2$ in this mode.
+ */
+#define HV_PTE_MODE_CACHE_HASH_L3     4
+
+/** Data is not resident in memory; accesses are instead made to an I/O
+ *  device, whose tile coordinates are given by the PTE's LOTAR field.
+ *  This mode is only supported on chips for which CHIP_HAS_MMIO() is 1.
+ *  The EXECUTABLE bit may not be set in an MMIO PTE.
+ */
+#define HV_PTE_MODE_MMIO              5
+
+
+/* C wants 1ULL so it is typed as __hv64, but the assembler needs just numbers.
+ * The assembler can't handle shifts greater than 31, but treats them
+ * as shifts mod 32, so assembler code must be aware of which word
+ * the bit belongs in when using these macros.
+ */
+#ifdef __ASSEMBLER__
+#define __HV_PTE_ONE 1        /**< One, for assembler */
+#else
+#define __HV_PTE_ONE 1ULL     /**< One, for C */
+#endif
+
+/** Is this PTE present?
+ *
+ * If this bit is set, this PTE represents a valid translation or level-2
+ * page table pointer.  Otherwise, the page table does not contain a
+ * translation for the subject virtual pages.
+ *
+ * If this bit is not set, the other bits in the PTE are not
+ * interpreted by the hypervisor, and may contain any value.
+ */
+#define HV_PTE_PRESENT               (__HV_PTE_ONE << HV_PTE_INDEX_PRESENT)
+
+/** Does this PTE map a page?
+ *
+ * If this bit is set in the level-1 page table, the entry should be
+ * interpreted as a level-2 page table entry mapping a large page.
+ *
+ * This bit should not be modified by the client while PRESENT is set, as
+ * doing so may race with the hypervisor's update of ACCESSED and DIRTY bits.
+ *
+ * In a level-2 page table, this bit is ignored and must be zero.
+ */
+#define HV_PTE_PAGE                  (__HV_PTE_ONE << HV_PTE_INDEX_PAGE)
+
+/** Is this a global (non-ASID) mapping?
+ *
+ * If this bit is set, the translations established by this PTE will
+ * not be flushed from the TLB by the hv_flush_asid() service; they
+ * will be flushed by the hv_flush_page() or hv_flush_pages() services.
+ *
+ * Setting this bit for translations which are identical in all page
+ * tables (for instance, code and data belonging to a client OS) can
+ * be very beneficial, as it will reduce the number of TLB misses.
+ * Note that, while it is not an error which will be detected by the
+ * hypervisor, it is an extremely bad idea to set this bit for
+ * translations which are _not_ identical in all page tables.
+ *
+ * This bit should not be modified by the client while PRESENT is set, as
+ * doing so may race with the hypervisor's update of ACCESSED and DIRTY bits.
+ *
+ * This bit is ignored in level-1 PTEs unless the Page bit is set.
+ */
+#define HV_PTE_GLOBAL                (__HV_PTE_ONE << HV_PTE_INDEX_GLOBAL)
+
+/** Is this mapping accessible to users?
+ *
+ * If this bit is set, code running at any PL will be permitted to
+ * access the virtual addresses mapped by this PTE.  Otherwise, only
+ * code running at PL 1 or above will be allowed to do so.
+ *
+ * This bit should not be modified by the client while PRESENT is set, as
+ * doing so may race with the hypervisor's update of ACCESSED and DIRTY bits.
+ *
+ * This bit is ignored in level-1 PTEs unless the Page bit is set.
+ */
+#define HV_PTE_USER                  (__HV_PTE_ONE << HV_PTE_INDEX_USER)
+
+/** Has this mapping been accessed?
+ *
+ * This bit is set by the hypervisor when the memory described by the
+ * translation is accessed for the first time.  It is never cleared by
+ * the hypervisor, but may be cleared by the client.  After the bit
+ * has been cleared, subsequent references are not guaranteed to set
+ * it again until the translation has been flushed from the TLB.
+ *
+ * This bit is ignored in level-1 PTEs unless the Page bit is set.
+ */
+#define HV_PTE_ACCESSED              (__HV_PTE_ONE << HV_PTE_INDEX_ACCESSED)
+
+/** Is this mapping dirty?
+ *
+ * This bit is set by the hypervisor when the memory described by the
+ * translation is written for the first time.  It is never cleared by
+ * the hypervisor, but may be cleared by the client.  After the bit
+ * has been cleared, subsequent references are not guaranteed to set
+ * it again until the translation has been flushed from the TLB.
+ *
+ * This bit is ignored in level-1 PTEs unless the Page bit is set.
+ */
+#define HV_PTE_DIRTY                 (__HV_PTE_ONE << HV_PTE_INDEX_DIRTY)
+
+/** Migrating bit in PTE.
+ *
+ * This bit is guaranteed not to be inspected or modified by the
+ * hypervisor.  The name is indicative of the suggested use by the client
+ * to tag pages whose L3 cache is being migrated from one cpu to another.
+ */
+#define HV_PTE_MIGRATING             (__HV_PTE_ONE << HV_PTE_INDEX_MIGRATING)
+
+/** Client-private bit in PTE.
+ *
+ * This bit is guaranteed not to be inspected or modified by the
+ * hypervisor.
+ */
+#define HV_PTE_CLIENT0               (__HV_PTE_ONE << HV_PTE_INDEX_CLIENT0)
+
+/** Client-private bit in PTE.
+ *
+ * This bit is guaranteed not to be inspected or modified by the
+ * hypervisor.
+ */
+#define HV_PTE_CLIENT1               (__HV_PTE_ONE << HV_PTE_INDEX_CLIENT1)
+
+/** Non-coherent (NC) bit in PTE.
+ *
+ * If this bit is set, the mapping that is set up will be non-coherent
+ * (also known as non-inclusive).  This means that changes to the L3
+ * cache will not cause a local copy to be invalidated.  It is generally
+ * recommended only for read-only mappings.
+ *
+ * In level-1 PTEs, if the Page bit is clear, this bit determines how the
+ * level-2 page table is accessed.
+ */
+#define HV_PTE_NC                    (__HV_PTE_ONE << HV_PTE_INDEX_NC)
+
+/** Is this page prevented from filling the L1$?
+ *
+ * If this bit is set, the page described by the PTE will not be cached
+ * the local cpu's L1 cache.
+ *
+ * If CHIP_HAS_NC_AND_NOALLOC_BITS() is not true in <chip.h> for this chip,
+ * it is illegal to use this attribute, and may cause client termination.
+ *
+ * In level-1 PTEs, if the Page bit is clear, this bit
+ * determines how the level-2 page table is accessed.
+ */
+#define HV_PTE_NO_ALLOC_L1           (__HV_PTE_ONE << HV_PTE_INDEX_NO_ALLOC_L1)
+
+/** Is this page prevented from filling the L2$?
+ *
+ * If this bit is set, the page described by the PTE will not be cached
+ * the local cpu's L2 cache.
+ *
+ * If CHIP_HAS_NC_AND_NOALLOC_BITS() is not true in <chip.h> for this chip,
+ * it is illegal to use this attribute, and may cause client termination.
+ *
+ * In level-1 PTEs, if the Page bit is clear, this bit determines how the
+ * level-2 page table is accessed.
+ */
+#define HV_PTE_NO_ALLOC_L2           (__HV_PTE_ONE << HV_PTE_INDEX_NO_ALLOC_L2)
+
+/** Is this a priority page?
+ *
+ * If this bit is set, the page described by the PTE will be given
+ * priority in the cache.  Normally this translates into allowing the
+ * page to use only the "red" half of the cache.  The client may wish to
+ * then use the hv_set_caching service to specify that other pages which
+ * alias this page will use only the "black" half of the cache.
+ *
+ * If the Cached Priority bit is clear, the hypervisor uses the
+ * current hv_set_caching() value to choose how to cache the page.
+ *
+ * It is illegal to set the Cached Priority bit if the Non-Cached bit
+ * is set and the Cached Remotely bit is clear, i.e. if requests to
+ * the page map directly to memory.
+ *
+ * This bit is ignored in level-1 PTEs unless the Page bit is set.
+ */
+#define HV_PTE_CACHED_PRIORITY       (__HV_PTE_ONE << \
+                                      HV_PTE_INDEX_CACHED_PRIORITY)
+
+/** Is this a readable mapping?
+ *
+ * If this bit is set, code will be permitted to read from (e.g.,
+ * issue load instructions against) the virtual addresses mapped by
+ * this PTE.
+ *
+ * It is illegal for this bit to be clear if the Writable bit is set.
+ *
+ * This bit is ignored in level-1 PTEs unless the Page bit is set.
+ */
+#define HV_PTE_READABLE              (__HV_PTE_ONE << HV_PTE_INDEX_READABLE)
+
+/** Is this a writable mapping?
+ *
+ * If this bit is set, code will be permitted to write to (e.g., issue
+ * store instructions against) the virtual addresses mapped by this
+ * PTE.
+ *
+ * This bit is ignored in level-1 PTEs unless the Page bit is set.
+ */
+#define HV_PTE_WRITABLE              (__HV_PTE_ONE << HV_PTE_INDEX_WRITABLE)
+
+/** Is this an executable mapping?
+ *
+ * If this bit is set, code will be permitted to execute from
+ * (e.g., jump to) the virtual addresses mapped by this PTE.
+ *
+ * This bit applies to any processor on the tile, if there are more
+ * than one.
+ *
+ * This bit is ignored in level-1 PTEs unless the Page bit is set.
+ */
+#define HV_PTE_EXECUTABLE            (__HV_PTE_ONE << HV_PTE_INDEX_EXECUTABLE)
+
+/** The width of a LOTAR's x or y bitfield. */
+#define HV_LOTAR_WIDTH 11
+
+/** Converts an x,y pair to a LOTAR value. */
+#define HV_XY_TO_LOTAR(x, y) ((HV_LOTAR)(((x) << HV_LOTAR_WIDTH) | (y)))
+
+/** Extracts the X component of a lotar. */
+#define HV_LOTAR_X(lotar) ((lotar) >> HV_LOTAR_WIDTH)
+
+/** Extracts the Y component of a lotar. */
+#define HV_LOTAR_Y(lotar) ((lotar) & ((1 << HV_LOTAR_WIDTH) - 1))
+
+#ifndef __ASSEMBLER__
+
+/** Define accessor functions for a PTE bit. */
+#define _HV_BIT(name, bit)                                      \
+static __inline int                                             \
+hv_pte_get_##name(HV_PTE pte)                                   \
+{                                                               \
+  return (pte.val >> HV_PTE_INDEX_##bit) & 1;                   \
+}                                                               \
+                                                                \
+static __inline HV_PTE                                          \
+hv_pte_set_##name(HV_PTE pte)                                   \
+{                                                               \
+  pte.val |= 1ULL << HV_PTE_INDEX_##bit;                        \
+  return pte;                                                   \
+}                                                               \
+                                                                \
+static __inline HV_PTE                                          \
+hv_pte_clear_##name(HV_PTE pte)                                 \
+{                                                               \
+  pte.val &= ~(1ULL << HV_PTE_INDEX_##bit);                     \
+  return pte;                                                   \
+}
+
+/* Generate accessors to get, set, and clear various PTE flags.
+ */
+_HV_BIT(present,         PRESENT)
+_HV_BIT(page,            PAGE)
+_HV_BIT(client0,         CLIENT0)
+_HV_BIT(client1,         CLIENT1)
+_HV_BIT(migrating,       MIGRATING)
+_HV_BIT(nc,              NC)
+_HV_BIT(readable,        READABLE)
+_HV_BIT(writable,        WRITABLE)
+_HV_BIT(executable,      EXECUTABLE)
+_HV_BIT(accessed,        ACCESSED)
+_HV_BIT(dirty,           DIRTY)
+_HV_BIT(no_alloc_l1,     NO_ALLOC_L1)
+_HV_BIT(no_alloc_l2,     NO_ALLOC_L2)
+_HV_BIT(cached_priority, CACHED_PRIORITY)
+_HV_BIT(global,          GLOBAL)
+_HV_BIT(user,            USER)
+
+#undef _HV_BIT
+
+/** Get the page mode from the PTE.
+ *
+ * This field generally determines whether and how accesses to the page
+ * are cached; the HV_PTE_MODE_xxx symbols define the legal values for the
+ * page mode.  The NC, NO_ALLOC_L1, and NO_ALLOC_L2 bits modify this
+ * general policy.
+ */
+static __inline unsigned int
+hv_pte_get_mode(const HV_PTE pte)
+{
+  return (((__hv32) pte.val) >> HV_PTE_INDEX_MODE) &
+         ((1 << HV_PTE_MODE_BITS) - 1);
+}
+
+/** Set the page mode into a PTE.  See hv_pte_get_mode. */
+static __inline HV_PTE
+hv_pte_set_mode(HV_PTE pte, unsigned int val)
+{
+  pte.val &= ~(((1ULL << HV_PTE_MODE_BITS) - 1) << HV_PTE_INDEX_MODE);
+  pte.val |= val << HV_PTE_INDEX_MODE;
+  return pte;
+}
+
+/** Get the page frame number from the PTE.
+ *
+ * This field contains the upper bits of the CPA (client physical
+ * address) of the target page; the complete CPA is this field with
+ * HV_LOG2_PAGE_SIZE_SMALL zero bits appended to it.
+ *
+ * For PTEs in a level-1 page table where the Page bit is set, the
+ * CPA must be aligned modulo the large page size.
+ */
+static __inline unsigned int
+hv_pte_get_pfn(const HV_PTE pte)
+{
+  return pte.val >> HV_PTE_INDEX_PFN;
+}
+
+
+/** Set the page frame number into a PTE.  See hv_pte_get_pfn. */
+static __inline HV_PTE
+hv_pte_set_pfn(HV_PTE pte, unsigned int val)
+{
+  /*
+   * Note that the use of "PTFN" in the next line is intentional; we
+   * don't want any garbage lower bits left in that field.
+   */
+  pte.val &= ~(((1ULL << HV_PTE_PTFN_BITS) - 1) << HV_PTE_INDEX_PTFN);
+  pte.val |= (__hv64) val << HV_PTE_INDEX_PFN;
+  return pte;
+}
+
+/** Get the page table frame number from the PTE.
+ *
+ * This field contains the upper bits of the CPA (client physical
+ * address) of the target page table; the complete CPA is this field with
+ * with HV_PAGE_TABLE_ALIGN zero bits appended to it.
+ *
+ * For PTEs in a level-1 page table when the Page bit is not set, the
+ * CPA must be aligned modulo the sticter of HV_PAGE_TABLE_ALIGN and
+ * the level-2 page table size.
+ */
+static __inline unsigned long
+hv_pte_get_ptfn(const HV_PTE pte)
+{
+  return pte.val >> HV_PTE_INDEX_PTFN;
+}
+
+
+/** Set the page table frame number into a PTE.  See hv_pte_get_ptfn. */
+static __inline HV_PTE
+hv_pte_set_ptfn(HV_PTE pte, unsigned long val)
+{
+  pte.val &= ~(((1ULL << HV_PTE_PTFN_BITS)-1) << HV_PTE_INDEX_PTFN);
+  pte.val |= (__hv64) val << HV_PTE_INDEX_PTFN;
+  return pte;
+}
+
+
+/** Get the remote tile caching this page.
+ *
+ * Specifies the remote tile which is providing the L3 cache for this page.
+ *
+ * This field is ignored unless the page mode is HV_PTE_MODE_CACHE_TILE_L3.
+ *
+ * In level-1 PTEs, if the Page bit is clear, this field determines how the
+ * level-2 page table is accessed.
+ */
+static __inline unsigned int
+hv_pte_get_lotar(const HV_PTE pte)
+{
+  unsigned int lotar = ((__hv32) pte.val) >> HV_PTE_INDEX_LOTAR;
+
+  return HV_XY_TO_LOTAR( (lotar >> (HV_PTE_LOTAR_BITS / 2)),
+                         (lotar & ((1 << (HV_PTE_LOTAR_BITS / 2)) - 1)) );
+}
+
+
+/** Set the remote tile caching a page into a PTE.  See hv_pte_get_lotar. */
+static __inline HV_PTE
+hv_pte_set_lotar(HV_PTE pte, unsigned int val)
+{
+  unsigned int x = HV_LOTAR_X(val);
+  unsigned int y = HV_LOTAR_Y(val);
+
+  pte.val &= ~(((1ULL << HV_PTE_LOTAR_BITS)-1) << HV_PTE_INDEX_LOTAR);
+  pte.val |= (x << (HV_PTE_INDEX_LOTAR + HV_PTE_LOTAR_BITS / 2)) |
+             (y << HV_PTE_INDEX_LOTAR);
+  return pte;
+}
+
+#endif  /* !__ASSEMBLER__ */
+
+/** Converts a client physical address to a pfn. */
+#define HV_CPA_TO_PFN(p) ((p) >> HV_LOG2_PAGE_SIZE_SMALL)
+
+/** Converts a pfn to a client physical address. */
+#define HV_PFN_TO_CPA(p) (((HV_PhysAddr)(p)) << HV_LOG2_PAGE_SIZE_SMALL)
+
+/** Converts a client physical address to a ptfn. */
+#define HV_CPA_TO_PTFN(p) ((p) >> HV_LOG2_PAGE_TABLE_ALIGN)
+
+/** Converts a ptfn to a client physical address. */
+#define HV_PTFN_TO_CPA(p) (((HV_PhysAddr)(p)) << HV_LOG2_PAGE_TABLE_ALIGN)
+
+/** Converts a ptfn to a pfn. */
+#define HV_PTFN_TO_PFN(p) \
+  ((p) >> (HV_LOG2_PAGE_SIZE_SMALL - HV_LOG2_PAGE_TABLE_ALIGN))
+
+/** Converts a pfn to a ptfn. */
+#define HV_PFN_TO_PTFN(p) \
+  ((p) << (HV_LOG2_PAGE_SIZE_SMALL - HV_LOG2_PAGE_TABLE_ALIGN))
+
+#if CHIP_VA_WIDTH() > 32
+
+/** Log number of HV_PTE entries in L0 page table */
+#define HV_LOG2_L0_ENTRIES (CHIP_VA_WIDTH() - HV_LOG2_L1_SPAN)
+
+/** Number of HV_PTE entries in L0 page table */
+#define HV_L0_ENTRIES (1 << HV_LOG2_L0_ENTRIES)
+
+/** Log size of L0 page table in bytes */
+#define HV_LOG2_L0_SIZE (HV_LOG2_PTE_SIZE + HV_LOG2_L0_ENTRIES)
+
+/** Size of L0 page table in bytes */
+#define HV_L0_SIZE (1 << HV_LOG2_L0_SIZE)
+
+#ifdef __ASSEMBLER__
+
+/** Index in L0 for a specific VA */
+#define HV_L0_INDEX(va) \
+  (((va) >> HV_LOG2_L1_SPAN) & (HV_L0_ENTRIES - 1))
+
+#else
+
+/** Index in L1 for a specific VA */
+#define HV_L0_INDEX(va) \
+  (((HV_VirtAddr)(va) >> HV_LOG2_L1_SPAN) & (HV_L0_ENTRIES - 1))
+
+#endif
+
+#endif /* CHIP_VA_WIDTH() > 32 */
+
+/** Log number of HV_PTE entries in L1 page table */
+#define HV_LOG2_L1_ENTRIES (HV_LOG2_L1_SPAN - HV_LOG2_PAGE_SIZE_LARGE)
+
+/** Number of HV_PTE entries in L1 page table */
+#define HV_L1_ENTRIES (1 << HV_LOG2_L1_ENTRIES)
+
+/** Log size of L1 page table in bytes */
+#define HV_LOG2_L1_SIZE (HV_LOG2_PTE_SIZE + HV_LOG2_L1_ENTRIES)
+
+/** Size of L1 page table in bytes */
+#define HV_L1_SIZE (1 << HV_LOG2_L1_SIZE)
+
+/** Log number of HV_PTE entries in level-2 page table */
+#define HV_LOG2_L2_ENTRIES (HV_LOG2_PAGE_SIZE_LARGE - HV_LOG2_PAGE_SIZE_SMALL)
+
+/** Number of HV_PTE entries in level-2 page table */
+#define HV_L2_ENTRIES (1 << HV_LOG2_L2_ENTRIES)
+
+/** Log size of level-2 page table in bytes */
+#define HV_LOG2_L2_SIZE (HV_LOG2_PTE_SIZE + HV_LOG2_L2_ENTRIES)
+
+/** Size of level-2 page table in bytes */
+#define HV_L2_SIZE (1 << HV_LOG2_L2_SIZE)
+
+#ifdef __ASSEMBLER__
+
+#if CHIP_VA_WIDTH() > 32
+
+/** Index in L1 for a specific VA */
+#define HV_L1_INDEX(va) \
+  (((va) >> HV_LOG2_PAGE_SIZE_LARGE) & (HV_L1_ENTRIES - 1))
+
+#else /* CHIP_VA_WIDTH() > 32 */
+
+/** Index in L1 for a specific VA */
+#define HV_L1_INDEX(va) \
+  (((va) >> HV_LOG2_PAGE_SIZE_LARGE))
+
+#endif /* CHIP_VA_WIDTH() > 32 */
+
+/** Index in level-2 page table for a specific VA */
+#define HV_L2_INDEX(va) \
+  (((va) >> HV_LOG2_PAGE_SIZE_SMALL) & (HV_L2_ENTRIES - 1))
+
+#else /* __ASSEMBLER __ */
+
+#if CHIP_VA_WIDTH() > 32
+
+/** Index in L1 for a specific VA */
+#define HV_L1_INDEX(va) \
+  (((HV_VirtAddr)(va) >> HV_LOG2_PAGE_SIZE_LARGE) & (HV_L1_ENTRIES - 1))
+
+#else /* CHIP_VA_WIDTH() > 32 */
+
+/** Index in L1 for a specific VA */
+#define HV_L1_INDEX(va) \
+  (((HV_VirtAddr)(va) >> HV_LOG2_PAGE_SIZE_LARGE))
+
+#endif /* CHIP_VA_WIDTH() > 32 */
+
+/** Index in level-2 page table for a specific VA */
+#define HV_L2_INDEX(va) \
+  (((HV_VirtAddr)(va) >> HV_LOG2_PAGE_SIZE_SMALL) & (HV_L2_ENTRIES - 1))
+
+#endif /* __ASSEMBLER __ */
+
+#endif /* _TILE_HV_H */
diff --git a/arch/tile/include/hv/netio_errors.h b/arch/tile/include/hv/netio_errors.h
new file mode 100644
index 00000000..e1591bff
--- /dev/null
+++ b/arch/tile/include/hv/netio_errors.h
@@ -0,0 +1,122 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+/**
+ * Error codes returned from NetIO routines.
+ */
+
+#ifndef __NETIO_ERRORS_H__
+#define __NETIO_ERRORS_H__
+
+/**
+ * @addtogroup error
+ *
+ * @brief The error codes returned by NetIO functions.
+ *
+ * NetIO functions return 0 (defined as ::NETIO_NO_ERROR) on success, and
+ * a negative value if an error occurs.
+ *
+ * In cases where a NetIO function failed due to a error reported by
+ * system libraries, the error code will be the negation of the
+ * system errno at the time of failure.  The @ref netio_strerror()
+ * function will deliver error strings for both NetIO and system error
+ * codes.
+ *
+ * @{
+ */
+
+/** The set of all NetIO errors. */
+typedef enum
+{
+  /** Operation successfully completed. */
+  NETIO_NO_ERROR        = 0,
+
+  /** A packet was successfully retrieved from an input queue. */
+  NETIO_PKT             = 0,
+
+  /** Largest NetIO error number. */
+  NETIO_ERR_MAX         = -701,
+
+  /** The tile is not registered with the IPP. */
+  NETIO_NOT_REGISTERED  = -701,
+
+  /** No packet was available to retrieve from the input queue. */
+  NETIO_NOPKT           = -702,
+
+  /** The requested function is not implemented. */
+  NETIO_NOT_IMPLEMENTED = -703,
+
+  /** On a registration operation, the target queue already has the maximum
+   *  number of tiles registered for it, and no more may be added.  On a
+   *  packet send operation, the output queue is full and nothing more can
+   *  be queued until some of the queued packets are actually transmitted. */
+  NETIO_QUEUE_FULL      = -704,
+
+  /** The calling process or thread is not bound to exactly one CPU. */
+  NETIO_BAD_AFFINITY    = -705,
+
+  /** Cannot allocate memory on requested controllers. */
+  NETIO_CANNOT_HOME     = -706,
+
+  /** On a registration operation, the IPP specified is not configured
+   *  to support the options requested; for instance, the application
+   *  wants a specific type of tagged headers which the configured IPP
+   *  doesn't support.  Or, the supplied configuration information is
+   *  not self-consistent, or is out of range; for instance, specifying
+   *  both NETIO_RECV and NETIO_NO_RECV, or asking for more than
+   *  NETIO_MAX_SEND_BUFFERS to be preallocated.  On a VLAN or bucket
+   *  configure operation, the number of items, or the base item, was
+   *  out of range.
+   */
+  NETIO_BAD_CONFIG      = -707,
+
+  /** Too many tiles have registered to transmit packets. */
+  NETIO_TOOMANY_XMIT    = -708,
+
+  /** Packet transmission was attempted on a queue which was registered
+      with transmit disabled. */
+  NETIO_UNREG_XMIT      = -709,
+
+  /** This tile is already registered with the IPP. */
+  NETIO_ALREADY_REGISTERED = -710,
+
+  /** The Ethernet link is down. The application should try again later. */
+  NETIO_LINK_DOWN       = -711,
+
+  /** An invalid memory buffer has been specified.  This may be an unmapped
+   * virtual address, or one which does not meet alignment requirements.
+   * For netio_input_register(), this error may be returned when multiple
+   * processes specify different memory regions to be used for NetIO
+   * buffers.  That can happen if these processes specify explicit memory
+   * regions with the ::NETIO_FIXED_BUFFER_VA flag, or if tmc_cmem_init()
+   * has not been called by a common ancestor of the processes.
+   */
+  NETIO_FAULT           = -712,
+
+  /** Cannot combine user-managed shared memory and cache coherence. */
+  NETIO_BAD_CACHE_CONFIG = -713,
+
+  /** Smallest NetIO error number. */
+  NETIO_ERR_MIN         = -713,
+
+#ifndef __DOXYGEN__
+  /** Used internally to mean that no response is needed; never returned to
+   *  an application. */
+  NETIO_NO_RESPONSE     = 1
+#endif
+} netio_error_t;
+
+/** @} */
+
+#endif /* __NETIO_ERRORS_H__ */
diff --git a/arch/tile/include/hv/netio_intf.h b/arch/tile/include/hv/netio_intf.h
new file mode 100644
index 00000000..8d20972a
--- /dev/null
+++ b/arch/tile/include/hv/netio_intf.h
@@ -0,0 +1,2975 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+/**
+ * NetIO interface structures and macros.
+ */
+
+#ifndef __NETIO_INTF_H__
+#define __NETIO_INTF_H__
+
+#include <hv/netio_errors.h>
+
+#ifdef __KERNEL__
+#include <linux/types.h>
+#else
+#include <stdint.h>
+#endif
+
+#if !defined(__HV__) && !defined(__BOGUX__) && !defined(__KERNEL__)
+#include <assert.h>
+#define netio_assert assert  /**< Enable assertions from macros */
+#else
+#define netio_assert(...) ((void)(0))  /**< Disable assertions from macros */
+#endif
+
+/*
+ * If none of these symbols are defined, we're building libnetio in an
+ * environment where we have pthreads, so we'll enable locking.
+ */
+#if !defined(__HV__) && !defined(__BOGUX__) && !defined(__KERNEL__) && \
+    !defined(__NEWLIB__)
+#define _NETIO_PTHREAD       /**< Include a mutex in netio_queue_t below */
+
+/*
+ * If NETIO_UNLOCKED is defined, we don't do use per-cpu locks on
+ * per-packet NetIO operations.  We still do pthread locking on things
+ * like netio_input_register, though.  This is used for building
+ * libnetio_unlocked.
+ */
+#ifndef NETIO_UNLOCKED
+
+/* Avoid PLT overhead by using our own inlined per-cpu lock. */
+#include <sched.h>
+typedef int _netio_percpu_mutex_t;
+
+static __inline int
+_netio_percpu_mutex_init(_netio_percpu_mutex_t* lock)
+{
+  *lock = 0;
+  return 0;
+}
+
+static __inline int
+_netio_percpu_mutex_lock(_netio_percpu_mutex_t* lock)
+{
+  while (__builtin_expect(__insn_tns(lock), 0))
+    sched_yield();
+  return 0;
+}
+
+static __inline int
+_netio_percpu_mutex_unlock(_netio_percpu_mutex_t* lock)
+{
+  *lock = 0;
+  return 0;
+}
+
+#else /* NETIO_UNLOCKED */
+
+/* Don't do any locking for per-packet NetIO operations. */
+typedef int _netio_percpu_mutex_t;
+#define _netio_percpu_mutex_init(L)
+#define _netio_percpu_mutex_lock(L)
+#define _netio_percpu_mutex_unlock(L)
+
+#endif /* NETIO_UNLOCKED */
+#endif /* !__HV__, !__BOGUX, !__KERNEL__, !__NEWLIB__ */
+
+/** How many tiles can register for a given queue.
+ *  @ingroup setup */
+#define NETIO_MAX_TILES_PER_QUEUE  64
+
+
+/** Largest permissible queue identifier.
+ *  @ingroup setup  */
+#define NETIO_MAX_QUEUE_ID        255
+
+
+#ifndef __DOXYGEN__
+
+/* Metadata packet checksum/ethertype flags. */
+
+/** The L4 checksum has not been calculated. */
+#define _NETIO_PKT_NO_L4_CSUM_SHIFT           0
+#define _NETIO_PKT_NO_L4_CSUM_RMASK           1
+#define _NETIO_PKT_NO_L4_CSUM_MASK \
+         (_NETIO_PKT_NO_L4_CSUM_RMASK << _NETIO_PKT_NO_L4_CSUM_SHIFT)
+
+/** The L3 checksum has not been calculated. */
+#define _NETIO_PKT_NO_L3_CSUM_SHIFT           1
+#define _NETIO_PKT_NO_L3_CSUM_RMASK           1
+#define _NETIO_PKT_NO_L3_CSUM_MASK \
+         (_NETIO_PKT_NO_L3_CSUM_RMASK << _NETIO_PKT_NO_L3_CSUM_SHIFT)
+
+/** The L3 checksum is incorrect (or perhaps has not been calculated). */
+#define _NETIO_PKT_BAD_L3_CSUM_SHIFT          2
+#define _NETIO_PKT_BAD_L3_CSUM_RMASK          1
+#define _NETIO_PKT_BAD_L3_CSUM_MASK \
+         (_NETIO_PKT_BAD_L3_CSUM_RMASK << _NETIO_PKT_BAD_L3_CSUM_SHIFT)
+
+/** The Ethernet packet type is unrecognized. */
+#define _NETIO_PKT_TYPE_UNRECOGNIZED_SHIFT    3
+#define _NETIO_PKT_TYPE_UNRECOGNIZED_RMASK    1
+#define _NETIO_PKT_TYPE_UNRECOGNIZED_MASK \
+         (_NETIO_PKT_TYPE_UNRECOGNIZED_RMASK << \
+          _NETIO_PKT_TYPE_UNRECOGNIZED_SHIFT)
+
+/* Metadata packet type flags. */
+
+/** Where the packet type bits are; this field is the index into
+ *  _netio_pkt_info. */
+#define _NETIO_PKT_TYPE_SHIFT        4
+#define _NETIO_PKT_TYPE_RMASK        0x3F
+
+/** How many VLAN tags the packet has, and, if we have two, which one we
+ *  actually grouped on.  A VLAN within a proprietary (Marvell or Broadcom)
+ *  tag is counted here. */
+#define _NETIO_PKT_VLAN_SHIFT        4
+#define _NETIO_PKT_VLAN_RMASK        0x3
+#define _NETIO_PKT_VLAN_MASK \
+         (_NETIO_PKT_VLAN_RMASK << _NETIO_PKT_VLAN_SHIFT)
+#define _NETIO_PKT_VLAN_NONE         0   /* No VLAN tag. */
+#define _NETIO_PKT_VLAN_ONE          1   /* One VLAN tag. */
+#define _NETIO_PKT_VLAN_TWO_OUTER    2   /* Two VLAN tags, outer one used. */
+#define _NETIO_PKT_VLAN_TWO_INNER    3   /* Two VLAN tags, inner one used. */
+
+/** Which proprietary tags the packet has. */
+#define _NETIO_PKT_TAG_SHIFT         6
+#define _NETIO_PKT_TAG_RMASK         0x3
+#define _NETIO_PKT_TAG_MASK \
+          (_NETIO_PKT_TAG_RMASK << _NETIO_PKT_TAG_SHIFT)
+#define _NETIO_PKT_TAG_NONE          0   /* No proprietary tags. */
+#define _NETIO_PKT_TAG_MRVL          1   /* Marvell HyperG.Stack tags. */
+#define _NETIO_PKT_TAG_MRVL_EXT      2   /* HyperG.Stack extended tags. */
+#define _NETIO_PKT_TAG_BRCM          3   /* Broadcom HiGig tags. */
+
+/** Whether a packet has an LLC + SNAP header. */
+#define _NETIO_PKT_SNAP_SHIFT        8
+#define _NETIO_PKT_SNAP_RMASK        0x1
+#define _NETIO_PKT_SNAP_MASK \
+          (_NETIO_PKT_SNAP_RMASK << _NETIO_PKT_SNAP_SHIFT)
+
+/* NOTE: Bits 9 and 10 are unused. */
+
+/** Length of any custom data before the L2 header, in words. */
+#define _NETIO_PKT_CUSTOM_LEN_SHIFT  11
+#define _NETIO_PKT_CUSTOM_LEN_RMASK  0x1F
+#define _NETIO_PKT_CUSTOM_LEN_MASK \
+          (_NETIO_PKT_CUSTOM_LEN_RMASK << _NETIO_PKT_CUSTOM_LEN_SHIFT)
+
+/** The L4 checksum is incorrect (or perhaps has not been calculated). */
+#define _NETIO_PKT_BAD_L4_CSUM_SHIFT 16
+#define _NETIO_PKT_BAD_L4_CSUM_RMASK 0x1
+#define _NETIO_PKT_BAD_L4_CSUM_MASK \
+          (_NETIO_PKT_BAD_L4_CSUM_RMASK << _NETIO_PKT_BAD_L4_CSUM_SHIFT)
+
+/** Length of the L2 header, in words. */
+#define _NETIO_PKT_L2_LEN_SHIFT  17
+#define _NETIO_PKT_L2_LEN_RMASK  0x1F
+#define _NETIO_PKT_L2_LEN_MASK \
+          (_NETIO_PKT_L2_LEN_RMASK << _NETIO_PKT_L2_LEN_SHIFT)
+
+
+/* Flags in minimal packet metadata. */
+
+/** We need an eDMA checksum on this packet. */
+#define _NETIO_PKT_NEED_EDMA_CSUM_SHIFT            0
+#define _NETIO_PKT_NEED_EDMA_CSUM_RMASK            1
+#define _NETIO_PKT_NEED_EDMA_CSUM_MASK \
+         (_NETIO_PKT_NEED_EDMA_CSUM_RMASK << _NETIO_PKT_NEED_EDMA_CSUM_SHIFT)
+
+/* Data within the packet information table. */
+
+/* Note that, for efficiency, code which uses these fields assumes that none
+ * of the shift values below are zero.  See uses below for an explanation. */
+
+/** Offset within the L2 header of the innermost ethertype (in halfwords). */
+#define _NETIO_PKT_INFO_ETYPE_SHIFT       6
+#define _NETIO_PKT_INFO_ETYPE_RMASK    0x1F
+
+/** Offset within the L2 header of the VLAN tag (in halfwords). */
+#define _NETIO_PKT_INFO_VLAN_SHIFT       11
+#define _NETIO_PKT_INFO_VLAN_RMASK     0x1F
+
+#endif
+
+
+/** The size of a memory buffer representing a small packet.
+ *  @ingroup egress */
+#define SMALL_PACKET_SIZE 256
+
+/** The size of a memory buffer representing a large packet.
+ *  @ingroup egress */
+#define LARGE_PACKET_SIZE 2048
+
+/** The size of a memory buffer representing a jumbo packet.
+ *  @ingroup egress */
+#define JUMBO_PACKET_SIZE (12 * 1024)
+
+
+/* Common ethertypes.
+ * @ingroup ingress */
+/** @{ */
+/** The ethertype of IPv4. */
+#define ETHERTYPE_IPv4 (0x0800)
+/** The ethertype of ARP. */
+#define ETHERTYPE_ARP (0x0806)
+/** The ethertype of VLANs. */
+#define ETHERTYPE_VLAN (0x8100)
+/** The ethertype of a Q-in-Q header. */
+#define ETHERTYPE_Q_IN_Q (0x9100)
+/** The ethertype of IPv6. */
+#define ETHERTYPE_IPv6 (0x86DD)
+/** The ethertype of MPLS. */
+#define ETHERTYPE_MPLS (0x8847)
+/** @} */
+
+
+/** The possible return values of NETIO_PKT_STATUS.
+ * @ingroup ingress
+ */
+typedef enum
+{
+  /** No problems were detected with this packet. */
+  NETIO_PKT_STATUS_OK,
+  /** The packet is undersized; this is expected behavior if the packet's
+    * ethertype is unrecognized, but otherwise the packet is likely corrupt. */
+  NETIO_PKT_STATUS_UNDERSIZE,
+  /** The packet is oversized and some trailing bytes have been discarded.
+      This is expected behavior for short packets, since it's impossible to
+      precisely determine the amount of padding which may have been added to
+      them to make them meet the minimum Ethernet packet size. */
+  NETIO_PKT_STATUS_OVERSIZE,
+  /** The packet was judged to be corrupt by hardware (for instance, it had
+      a bad CRC, or part of it was discarded due to lack of buffer space in
+      the I/O shim) and should be discarded. */
+  NETIO_PKT_STATUS_BAD
+} netio_pkt_status_t;
+
+
+/** Log2 of how many buckets we have. */
+#define NETIO_LOG2_NUM_BUCKETS (10)
+
+/** How many buckets we have.
+ * @ingroup ingress */
+#define NETIO_NUM_BUCKETS (1 << NETIO_LOG2_NUM_BUCKETS)
+
+
+/**
+ * @brief A group-to-bucket identifier.
+ *
+ * @ingroup setup
+ *
+ * This tells us what to do with a given group.
+ */
+typedef union {
+  /** The header broken down into bits. */
+  struct {
+    /** Whether we should balance on L4, if available */
+    unsigned int __balance_on_l4:1;
+    /** Whether we should balance on L3, if available */
+    unsigned int __balance_on_l3:1;
+    /** Whether we should balance on L2, if available */
+    unsigned int __balance_on_l2:1;
+    /** Reserved for future use */
+    unsigned int __reserved:1;
+    /** The base bucket to use to send traffic */
+    unsigned int __bucket_base:NETIO_LOG2_NUM_BUCKETS;
+    /** The mask to apply to the balancing value. This must be one less
+     * than a power of two, e.g. 0x3 or 0xFF.
+     */
+    unsigned int __bucket_mask:NETIO_LOG2_NUM_BUCKETS;
+    /** Pad to 32 bits */
+    unsigned int __padding:(32 - 4 - 2 * NETIO_LOG2_NUM_BUCKETS);
+  } bits;
+  /** To send out the IDN. */
+  unsigned int word;
+}
+netio_group_t;
+
+
+/**
+ * @brief A VLAN-to-bucket identifier.
+ *
+ * @ingroup setup
+ *
+ * This tells us what to do with a given VLAN.
+ */
+typedef netio_group_t netio_vlan_t;
+
+
+/**
+ * A bucket-to-queue mapping.
+ * @ingroup setup
+ */
+typedef unsigned char netio_bucket_t;
+
+
+/**
+ * A packet size can always fit in a netio_size_t.
+ * @ingroup setup
+ */
+typedef unsigned int netio_size_t;
+
+
+/**
+ * @brief Ethernet standard (ingress) packet metadata.
+ *
+ * @ingroup ingress
+ *
+ * This is additional data associated with each packet.
+ * This structure is opaque and accessed through the @ref ingress.
+ *
+ * Also, the buffer population operation currently assumes that standard
+ * metadata is at least as large as minimal metadata, and will need to be
+ * modified if that is no longer the case.
+ */
+typedef struct
+{
+#ifdef __DOXYGEN__
+  /** This structure is opaque. */
+  unsigned char opaque[24];
+#else
+  /** The overall ordinal of the packet */
+  unsigned int __packet_ordinal;
+  /** The ordinal of the packet within the group */
+  unsigned int __group_ordinal;
+  /** The best flow hash IPP could compute. */
+  unsigned int __flow_hash;
+  /** Flags pertaining to checksum calculation, packet type, etc. */
+  unsigned int __flags;
+  /** The first word of "user data". */
+  unsigned int __user_data_0;
+  /** The second word of "user data". */
+  unsigned int __user_data_1;
+#endif
+}
+netio_pkt_metadata_t;
+
+
+/** To ensure that the L3 header is aligned mod 4, the L2 header should be
+ * aligned mod 4 plus 2, since every supported L2 header is 4n + 2 bytes
+ * long.  The standard way to do this is to simply add 2 bytes of padding
+ * before the L2 header.
+ */
+#define NETIO_PACKET_PADDING 2
+
+
+
+/**
+ * @brief Ethernet minimal (egress) packet metadata.
+ *
+ * @ingroup egress
+ *
+ * This structure represents information about packets which have
+ * been processed by @ref netio_populate_buffer() or
+ * @ref netio_populate_prepend_buffer().  This structure is opaque
+ * and accessed through the @ref egress.
+ *
+ * @internal This structure is actually copied into the memory used by
+ * standard metadata, which is assumed to be large enough.
+ */
+typedef struct
+{
+#ifdef __DOXYGEN__
+  /** This structure is opaque. */
+  unsigned char opaque[14];
+#else
+  /** The offset of the L2 header from the start of the packet data. */
+  unsigned short l2_offset;
+  /** The offset of the L3 header from the start of the packet data. */
+  unsigned short l3_offset;
+  /** Where to write the checksum. */
+  unsigned char csum_location;
+  /** Where to start checksumming from. */
+  unsigned char csum_start;
+  /** Flags pertaining to checksum calculation etc. */
+  unsigned short flags;
+  /** The L2 length of the packet. */
+  unsigned short l2_length;
+  /** The checksum with which to seed the checksum generator. */
+  unsigned short csum_seed;
+  /** How much to checksum. */
+  unsigned short csum_length;
+#endif
+}
+netio_pkt_minimal_metadata_t;
+
+
+#ifndef __DOXYGEN__
+
+/**
+ * @brief An I/O notification header.
+ *
+ * This is the first word of data received from an I/O shim in a notification
+ * packet. It contains framing and status information.
+ */
+typedef union
+{
+  unsigned int word; /**< The whole word. */
+  /** The various fields. */
+  struct
+  {
+    unsigned int __channel:7;    /**< Resource channel. */
+    unsigned int __type:4;       /**< Type. */
+    unsigned int __ack:1;        /**< Whether an acknowledgement is needed. */
+    unsigned int __reserved:1;   /**< Reserved. */
+    unsigned int __protocol:1;   /**< A protocol-specific word is added. */
+    unsigned int __status:2;     /**< Status of the transfer. */
+    unsigned int __framing:2;    /**< Framing of the transfer. */
+    unsigned int __transfer_size:14; /**< Transfer size in bytes (total). */
+  } bits;
+}
+__netio_pkt_notif_t;
+
+
+/**
+ * Returns the base address of the packet.
+ */
+#define _NETIO_PKT_HANDLE_BASE(p) \
+  ((unsigned char*)((p).word & 0xFFFFFFC0))
+
+/**
+ * Returns the base address of the packet.
+ */
+#define _NETIO_PKT_BASE(p) \
+  _NETIO_PKT_HANDLE_BASE(p->__packet)
+
+/**
+ * @brief An I/O notification packet (second word)
+ *
+ * This is the second word of data received from an I/O shim in a notification
+ * packet.  This is the virtual address of the packet buffer, plus some flag
+ * bits.  (The virtual address of the packet is always 256-byte aligned so we
+ * have room for 8 bits' worth of flags in the low 8 bits.)
+ *
+ * @internal
+ * NOTE: The low two bits must contain "__queue", so the "packet size"
+ * (SIZE_SMALL, SIZE_LARGE, or SIZE_JUMBO) can be determined quickly.
+ *
+ * If __addr or __offset are moved, _NETIO_PKT_BASE
+ * (defined right below this) must be changed.
+ */
+typedef union
+{
+  unsigned int word; /**< The whole word. */
+  /** The various fields. */
+  struct
+  {
+    /** Which queue the packet will be returned to once it is sent back to
+        the IPP.  This is one of the SIZE_xxx values. */
+    unsigned int __queue:2;
+
+    /** The IPP handle of the sending IPP. */
+    unsigned int __ipp_handle:2;
+
+    /** Reserved for future use. */
+    unsigned int __reserved:1;
+
+    /** If 1, this packet has minimal (egress) metadata; otherwise, it
+        has standard (ingress) metadata. */
+    unsigned int __minimal:1;
+
+    /** Offset of the metadata within the packet.  This value is multiplied
+     *  by 64 and added to the base packet address to get the metadata
+     *  address.  Note that this field is aligned within the word such that
+     *  you can easily extract the metadata address with a 26-bit mask. */
+    unsigned int __offset:2;
+
+    /** The top 24 bits of the packet's virtual address. */
+    unsigned int __addr:24;
+  } bits;
+}
+__netio_pkt_handle_t;
+
+#endif /* !__DOXYGEN__ */
+
+
+/**
+ * @brief A handle for an I/O packet's storage.
+ * @ingroup ingress
+ *
+ * netio_pkt_handle_t encodes the concept of a ::netio_pkt_t with its
+ * packet metadata removed.  It is a much smaller type that exists to
+ * facilitate applications where the full ::netio_pkt_t type is too
+ * large, such as those that cache enormous numbers of packets or wish
+ * to transmit packet descriptors over the UDN.
+ *
+ * Because there is no metadata, most ::netio_pkt_t operations cannot be
+ * performed on a netio_pkt_handle_t.  It supports only
+ * netio_free_handle() (to free the buffer) and
+ * NETIO_PKT_CUSTOM_DATA_H() (to access a pointer to its contents).
+ * The application must acquire any additional metadata it wants from the
+ * original ::netio_pkt_t and record it separately.
+ *
+ * A netio_pkt_handle_t can be extracted from a ::netio_pkt_t by calling
+ * NETIO_PKT_HANDLE().  An invalid handle (analogous to NULL) can be
+ * created by assigning the value ::NETIO_PKT_HANDLE_NONE. A handle can
+ * be tested for validity with NETIO_PKT_HANDLE_IS_VALID().
+ */
+typedef struct
+{
+  unsigned int word; /**< Opaque bits. */
+} netio_pkt_handle_t;
+
+/**
+ * @brief A packet descriptor.
+ *
+ * @ingroup ingress
+ * @ingroup egress
+ *
+ * This data structure represents a packet.  The structure is manipulated
+ * through the @ref ingress and the @ref egress.
+ *
+ * While the contents of a netio_pkt_t are opaque, the structure itself is
+ * portable.  This means that it may be shared between all tiles which have
+ * done a netio_input_register() call for the interface on which the pkt_t
+ * was initially received (via netio_get_packet()) or retrieved (via
+ * netio_get_buffer()).  The contents of a netio_pkt_t can be transmitted to
+ * another tile via shared memory, or via a UDN message, or by other means.
+ * The destination tile may then use the pkt_t as if it had originally been
+ * received locally; it may read or write the packet's data, read its
+ * metadata, free the packet, send the packet, transfer the netio_pkt_t to
+ * yet another tile, and so forth.
+ *
+ * Once a netio_pkt_t has been transferred to a second tile, the first tile
+ * should not reference the original copy; in particular, if more than one
+ * tile frees or sends the same netio_pkt_t, the IPP's packet free lists will
+ * become corrupted.  Note also that each tile which reads or modifies
+ * packet data must obey the memory coherency rules outlined in @ref input.
+ */
+typedef struct
+{
+#ifdef __DOXYGEN__
+  /** This structure is opaque. */
+  unsigned char opaque[32];
+#else
+  /** For an ingress packet (one with standard metadata), this is the
+   *  notification header we got from the I/O shim.  For an egress packet
+   *  (one with minimal metadata), this word is zero if the packet has not
+   *  been populated, and nonzero if it has. */
+  __netio_pkt_notif_t __notif_header;
+
+  /** Virtual address of the packet buffer, plus state flags. */
+  __netio_pkt_handle_t __packet;
+
+  /** Metadata associated with the packet. */
+  netio_pkt_metadata_t __metadata;
+#endif
+}
+netio_pkt_t;
+
+
+#ifndef __DOXYGEN__
+
+#define __NETIO_PKT_NOTIF_HEADER(pkt) ((pkt)->__notif_header)
+#define __NETIO_PKT_IPP_HANDLE(pkt) ((pkt)->__packet.bits.__ipp_handle)
+#define __NETIO_PKT_QUEUE(pkt) ((pkt)->__packet.bits.__queue)
+#define __NETIO_PKT_NOTIF_HEADER_M(mda, pkt) ((pkt)->__notif_header)
+#define __NETIO_PKT_IPP_HANDLE_M(mda, pkt) ((pkt)->__packet.bits.__ipp_handle)
+#define __NETIO_PKT_MINIMAL(pkt) ((pkt)->__packet.bits.__minimal)
+#define __NETIO_PKT_QUEUE_M(mda, pkt) ((pkt)->__packet.bits.__queue)
+#define __NETIO_PKT_FLAGS_M(mda, pkt) ((mda)->__flags)
+
+/* Packet information table, used by the attribute access functions below. */
+extern const uint16_t _netio_pkt_info[];
+
+#endif /* __DOXYGEN__ */
+
+
+#ifndef __DOXYGEN__
+/* These macros are deprecated and will disappear in a future MDE release. */
+#define NETIO_PKT_GOOD_CHECKSUM(pkt) \
+  NETIO_PKT_L4_CSUM_CORRECT(pkt)
+#define NETIO_PKT_GOOD_CHECKSUM_M(mda, pkt) \
+  NETIO_PKT_L4_CSUM_CORRECT_M(mda, pkt)
+#endif /* __DOXYGEN__ */
+
+
+/* Packet attribute access functions. */
+
+/** Return a pointer to the metadata for a packet.
+ * @ingroup ingress
+ *
+ * Calling this function once and passing the result to other retrieval
+ * functions with a "_M" suffix usually improves performance.  This
+ * function must be called on an 'ingress' packet (i.e. one retrieved
+ * by @ref netio_get_packet(), on which @ref netio_populate_buffer() or
+ * @ref netio_populate_prepend_buffer have not been called). Use of this
+ * function on an 'egress' packet will cause an assertion failure.
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return A pointer to the packet's standard metadata.
+ */
+static __inline netio_pkt_metadata_t*
+NETIO_PKT_METADATA(netio_pkt_t* pkt)
+{
+  netio_assert(!pkt->__packet.bits.__minimal);
+  return &pkt->__metadata;
+}
+
+
+/** Return a pointer to the minimal metadata for a packet.
+ * @ingroup egress
+ *
+ * Calling this function once and passing the result to other retrieval
+ * functions with a "_MM" suffix usually improves performance.  This
+ * function must be called on an 'egress' packet (i.e. one on which
+ * @ref netio_populate_buffer() or @ref netio_populate_prepend_buffer()
+ * have been called, or one retrieved by @ref netio_get_buffer()). Use of
+ * this function on an 'ingress' packet will cause an assertion failure.
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return A pointer to the packet's standard metadata.
+ */
+static __inline netio_pkt_minimal_metadata_t*
+NETIO_PKT_MINIMAL_METADATA(netio_pkt_t* pkt)
+{
+  netio_assert(pkt->__packet.bits.__minimal);
+  return (netio_pkt_minimal_metadata_t*) &pkt->__metadata;
+}
+
+
+/** Determine whether a packet has 'minimal' metadata.
+ * @ingroup pktfuncs
+ *
+ * This function will return nonzero if the packet is an 'egress'
+ * packet (i.e. one on which @ref netio_populate_buffer() or
+ * @ref netio_populate_prepend_buffer() have been called, or one
+ * retrieved by @ref netio_get_buffer()), and zero if the packet
+ * is an 'ingress' packet (i.e. one retrieved by @ref netio_get_packet(),
+ * which has not been converted into an 'egress' packet).
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return Nonzero if the packet has minimal metadata.
+ */
+static __inline unsigned int
+NETIO_PKT_IS_MINIMAL(netio_pkt_t* pkt)
+{
+  return pkt->__packet.bits.__minimal;
+}
+
+
+/** Return a handle for a packet's storage.
+ * @ingroup pktfuncs
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return A handle for the packet's storage.
+ */
+static __inline netio_pkt_handle_t
+NETIO_PKT_HANDLE(netio_pkt_t* pkt)
+{
+  netio_pkt_handle_t h;
+  h.word = pkt->__packet.word;
+  return h;
+}
+
+
+/** A special reserved value indicating the absence of a packet handle.
+ *
+ * @ingroup pktfuncs
+ */
+#define NETIO_PKT_HANDLE_NONE ((netio_pkt_handle_t) { 0 })
+
+
+/** Test whether a packet handle is valid.
+ *
+ * Applications may wish to use the reserved value NETIO_PKT_HANDLE_NONE
+ * to indicate no packet at all.  This function tests to see if a packet
+ * handle is a real handle, not this special reserved value.
+ *
+ * @ingroup pktfuncs
+ *
+ * @param[in] handle Handle on which to operate.
+ * @return One if the packet handle is valid, else zero.
+ */
+static __inline unsigned int
+NETIO_PKT_HANDLE_IS_VALID(netio_pkt_handle_t handle)
+{
+  return handle.word != 0;
+}
+
+
+
+/** Return a pointer to the start of the packet's custom header.
+ *  A custom header may or may not be present, depending upon the IPP; its
+ *  contents and alignment are also IPP-dependent.  Currently, none of the
+ *  standard IPPs supplied by Tilera produce a custom header.  If present,
+ *  the custom header precedes the L2 header in the packet buffer.
+ * @ingroup ingress
+ *
+ * @param[in] handle Handle on which to operate.
+ * @return A pointer to start of the packet.
+ */
+static __inline unsigned char*
+NETIO_PKT_CUSTOM_DATA_H(netio_pkt_handle_t handle)
+{
+  return _NETIO_PKT_HANDLE_BASE(handle) + NETIO_PACKET_PADDING;
+}
+
+
+/** Return the length of the packet's custom header.
+ *  A custom header may or may not be present, depending upon the IPP; its
+ *  contents and alignment are also IPP-dependent.  Currently, none of the
+ *  standard IPPs supplied by Tilera produce a custom header.  If present,
+ *  the custom header precedes the L2 header in the packet buffer.
+ *
+ * @ingroup ingress
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return The length of the packet's custom header, in bytes.
+ */
+static __inline netio_size_t
+NETIO_PKT_CUSTOM_HEADER_LENGTH_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+  /*
+   * Note that we effectively need to extract a quantity from the flags word
+   * which is measured in words, and then turn it into bytes by shifting
+   * it left by 2.  We do this all at once by just shifting right two less
+   * bits, and shifting the mask up two bits.
+   */
+  return ((mda->__flags >> (_NETIO_PKT_CUSTOM_LEN_SHIFT - 2)) &
+          (_NETIO_PKT_CUSTOM_LEN_RMASK << 2));
+}
+
+
+/** Return the length of the packet, starting with the custom header.
+ *  A custom header may or may not be present, depending upon the IPP; its
+ *  contents and alignment are also IPP-dependent.  Currently, none of the
+ *  standard IPPs supplied by Tilera produce a custom header.  If present,
+ *  the custom header precedes the L2 header in the packet buffer.
+ * @ingroup ingress
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return The length of the packet, in bytes.
+ */
+static __inline netio_size_t
+NETIO_PKT_CUSTOM_LENGTH_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+  return (__NETIO_PKT_NOTIF_HEADER(pkt).bits.__transfer_size -
+          NETIO_PACKET_PADDING);
+}
+
+
+/** Return a pointer to the start of the packet's custom header.
+ *  A custom header may or may not be present, depending upon the IPP; its
+ *  contents and alignment are also IPP-dependent.  Currently, none of the
+ *  standard IPPs supplied by Tilera produce a custom header.  If present,
+ *  the custom header precedes the L2 header in the packet buffer.
+ * @ingroup ingress
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return A pointer to start of the packet.
+ */
+static __inline unsigned char*
+NETIO_PKT_CUSTOM_DATA_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+  return NETIO_PKT_CUSTOM_DATA_H(NETIO_PKT_HANDLE(pkt));
+}
+
+
+/** Return the length of the packet's L2 (Ethernet plus VLAN or SNAP) header.
+ * @ingroup ingress
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return The length of the packet's L2 header, in bytes.
+ */
+static __inline netio_size_t
+NETIO_PKT_L2_HEADER_LENGTH_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+  /*
+   * Note that we effectively need to extract a quantity from the flags word
+   * which is measured in words, and then turn it into bytes by shifting
+   * it left by 2.  We do this all at once by just shifting right two less
+   * bits, and shifting the mask up two bits.  We then add two bytes.
+   */
+  return ((mda->__flags >> (_NETIO_PKT_L2_LEN_SHIFT - 2)) &
+          (_NETIO_PKT_L2_LEN_RMASK << 2)) + 2;
+}
+
+
+/** Return the length of the packet, starting with the L2 (Ethernet) header.
+ * @ingroup ingress
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return The length of the packet, in bytes.
+ */
+static __inline netio_size_t
+NETIO_PKT_L2_LENGTH_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+  return (NETIO_PKT_CUSTOM_LENGTH_M(mda, pkt) -
+          NETIO_PKT_CUSTOM_HEADER_LENGTH_M(mda,pkt));
+}
+
+
+/** Return a pointer to the start of the packet's L2 (Ethernet) header.
+ * @ingroup ingress
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return A pointer to start of the packet.
+ */
+static __inline unsigned char*
+NETIO_PKT_L2_DATA_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+  return (NETIO_PKT_CUSTOM_DATA_M(mda, pkt) +
+          NETIO_PKT_CUSTOM_HEADER_LENGTH_M(mda, pkt));
+}
+
+
+/** Retrieve the length of the packet, starting with the L3 (generally,
+ *  the IP) header.
+ * @ingroup ingress
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return Length of the packet's L3 header and data, in bytes.
+ */
+static __inline netio_size_t
+NETIO_PKT_L3_LENGTH_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+  return (NETIO_PKT_L2_LENGTH_M(mda, pkt) -
+          NETIO_PKT_L2_HEADER_LENGTH_M(mda,pkt));
+}
+
+
+/** Return a pointer to the packet's L3 (generally, the IP) header.
+ * @ingroup ingress
+ *
+ * Note that we guarantee word alignment of the L3 header.
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return A pointer to the packet's L3 header.
+ */
+static __inline unsigned char*
+NETIO_PKT_L3_DATA_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+  return (NETIO_PKT_L2_DATA_M(mda, pkt) +
+          NETIO_PKT_L2_HEADER_LENGTH_M(mda, pkt));
+}
+
+
+/** Return the ordinal of the packet.
+ * @ingroup ingress
+ *
+ * Each packet is given an ordinal number when it is delivered by the IPP.
+ * In the medium term, the ordinal is unique and monotonically increasing,
+ * being incremented by 1 for each packet; the ordinal of the first packet
+ * delivered after the IPP starts is zero.  (Since the ordinal is of finite
+ * size, given enough input packets, it will eventually wrap around to zero;
+ * in the long term, therefore, ordinals are not unique.)  The ordinals
+ * handed out by different IPPs are not disjoint, so two packets from
+ * different IPPs may have identical ordinals.  Packets dropped by the
+ * IPP or by the I/O shim are not assigned ordinals.
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return The packet's per-IPP packet ordinal.
+ */
+static __inline unsigned int
+NETIO_PKT_ORDINAL_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+  return mda->__packet_ordinal;
+}
+
+
+/** Return the per-group ordinal of the packet.
+ * @ingroup ingress
+ *
+ * Each packet is given a per-group ordinal number when it is
+ * delivered by the IPP. By default, the group is the packet's VLAN,
+ * although IPP can be recompiled to use different values.  In
+ * the medium term, the ordinal is unique and monotonically
+ * increasing, being incremented by 1 for each packet; the ordinal of
+ * the first packet distributed to a particular group is zero.
+ * (Since the ordinal is of finite size, given enough input packets,
+ * it will eventually wrap around to zero; in the long term,
+ * therefore, ordinals are not unique.)  The ordinals handed out by
+ * different IPPs are not disjoint, so two packets from different IPPs
+ * may have identical ordinals; similarly, packets distributed to
+ * different groups may have identical ordinals.  Packets dropped by
+ * the IPP or by the I/O shim are not assigned ordinals.
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return The packet's per-IPP, per-group ordinal.
+ */
+static __inline unsigned int
+NETIO_PKT_GROUP_ORDINAL_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+  return mda->__group_ordinal;
+}
+
+
+/** Return the VLAN ID assigned to the packet.
+ * @ingroup ingress
+ *
+ * This value is usually contained within the packet header.
+ *
+ * This value will be zero if the packet does not have a VLAN tag, or if
+ * this value was not extracted from the packet.
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return The packet's VLAN ID.
+ */
+static __inline unsigned short
+NETIO_PKT_VLAN_ID_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+  int vl = (mda->__flags >> _NETIO_PKT_VLAN_SHIFT) & _NETIO_PKT_VLAN_RMASK;
+  unsigned short* pkt_p;
+  int index;
+  unsigned short val;
+
+  if (vl == _NETIO_PKT_VLAN_NONE)
+    return 0;
+
+  pkt_p = (unsigned short*) NETIO_PKT_L2_DATA_M(mda, pkt);
+  index = (mda->__flags >> _NETIO_PKT_TYPE_SHIFT) & _NETIO_PKT_TYPE_RMASK;
+
+  val = pkt_p[(_netio_pkt_info[index] >> _NETIO_PKT_INFO_VLAN_SHIFT) &
+              _NETIO_PKT_INFO_VLAN_RMASK];
+
+#ifdef __TILECC__
+  return (__insn_bytex(val) >> 16) & 0xFFF;
+#else
+  return (__builtin_bswap32(val) >> 16) & 0xFFF;
+#endif
+}
+
+
+/** Return the ethertype of the packet.
+ * @ingroup ingress
+ *
+ * This value is usually contained within the packet header.
+ *
+ * This value is reliable if @ref NETIO_PKT_ETHERTYPE_RECOGNIZED_M()
+ * returns true, and otherwise, may not be well defined.
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return The packet's ethertype.
+ */
+static __inline unsigned short
+NETIO_PKT_ETHERTYPE_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+  unsigned short* pkt_p = (unsigned short*) NETIO_PKT_L2_DATA_M(mda, pkt);
+  int index = (mda->__flags >> _NETIO_PKT_TYPE_SHIFT) & _NETIO_PKT_TYPE_RMASK;
+
+  unsigned short val =
+    pkt_p[(_netio_pkt_info[index] >> _NETIO_PKT_INFO_ETYPE_SHIFT) &
+          _NETIO_PKT_INFO_ETYPE_RMASK];
+
+  return __builtin_bswap32(val) >> 16;
+}
+
+
+/** Return the flow hash computed on the packet.
+ * @ingroup ingress
+ *
+ * For TCP and UDP packets, this hash is calculated by hashing together
+ * the "5-tuple" values, specifically the source IP address, destination
+ * IP address, protocol type, source port and destination port.
+ * The hash value is intended to be helpful for millions of distinct
+ * flows.
+ *
+ * For IPv4 or IPv6 packets which are neither TCP nor UDP, the flow hash is
+ * derived by hashing together the source and destination IP addresses.
+ *
+ * For MPLS-encapsulated packets, the flow hash is derived by hashing
+ * the first MPLS label.
+ *
+ * For all other packets the flow hash is computed from the source
+ * and destination Ethernet addresses.
+ *
+ * The hash is symmetric, meaning it produces the same value if the
+ * source and destination are swapped. The only exceptions are
+ * tunneling protocols 0x04 (IP in IP Encapsulation), 0x29 (Simple
+ * Internet Protocol), 0x2F (General Routing Encapsulation) and 0x32
+ * (Encap Security Payload), which use only the destination address
+ * since the source address is not meaningful.
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return The packet's 32-bit flow hash.
+ */
+static __inline unsigned int
+NETIO_PKT_FLOW_HASH_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+  return mda->__flow_hash;
+}
+
+
+/** Return the first word of "user data" for the packet.
+ *
+ * The contents of the user data words depend on the IPP.
+ *
+ * When using the standard ipp1, ipp2, or ipp4 sub-drivers, the first
+ * word of user data contains the least significant bits of the 64-bit
+ * arrival cycle count (see @c get_cycle_count_low()).
+ *
+ * See the <em>System Programmer's Guide</em> for details.
+ *
+ * @ingroup ingress
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return The packet's first word of "user data".
+ */
+static __inline unsigned int
+NETIO_PKT_USER_DATA_0_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+  return mda->__user_data_0;
+}
+
+
+/** Return the second word of "user data" for the packet.
+ *
+ * The contents of the user data words depend on the IPP.
+ *
+ * When using the standard ipp1, ipp2, or ipp4 sub-drivers, the second
+ * word of user data contains the most significant bits of the 64-bit
+ * arrival cycle count (see @c get_cycle_count_high()).
+ *
+ * See the <em>System Programmer's Guide</em> for details.
+ *
+ * @ingroup ingress
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return The packet's second word of "user data".
+ */
+static __inline unsigned int
+NETIO_PKT_USER_DATA_1_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+  return mda->__user_data_1;
+}
+
+
+/** Determine whether the L4 (TCP/UDP) checksum was calculated.
+ * @ingroup ingress
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return Nonzero if the L4 checksum was calculated.
+ */
+static __inline unsigned int
+NETIO_PKT_L4_CSUM_CALCULATED_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+  return !(mda->__flags & _NETIO_PKT_NO_L4_CSUM_MASK);
+}
+
+
+/** Determine whether the L4 (TCP/UDP) checksum was calculated and found to
+ *  be correct.
+ * @ingroup ingress
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return Nonzero if the checksum was calculated and is correct.
+ */
+static __inline unsigned int
+NETIO_PKT_L4_CSUM_CORRECT_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+  return !(mda->__flags &
+           (_NETIO_PKT_BAD_L4_CSUM_MASK | _NETIO_PKT_NO_L4_CSUM_MASK));
+}
+
+
+/** Determine whether the L3 (IP) checksum was calculated.
+ * @ingroup ingress
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return Nonzero if the L3 (IP) checksum was calculated.
+*/
+static __inline unsigned int
+NETIO_PKT_L3_CSUM_CALCULATED_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+  return !(mda->__flags & _NETIO_PKT_NO_L3_CSUM_MASK);
+}
+
+
+/** Determine whether the L3 (IP) checksum was calculated and found to be
+ *  correct.
+ * @ingroup ingress
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return Nonzero if the checksum was calculated and is correct.
+ */
+static __inline unsigned int
+NETIO_PKT_L3_CSUM_CORRECT_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+  return !(mda->__flags &
+           (_NETIO_PKT_BAD_L3_CSUM_MASK | _NETIO_PKT_NO_L3_CSUM_MASK));
+}
+
+
+/** Determine whether the ethertype was recognized and L3 packet data was
+ *  processed.
+ * @ingroup ingress
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return Nonzero if the ethertype was recognized and L3 packet data was
+ *   processed.
+ */
+static __inline unsigned int
+NETIO_PKT_ETHERTYPE_RECOGNIZED_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+  return !(mda->__flags & _NETIO_PKT_TYPE_UNRECOGNIZED_MASK);
+}
+
+
+/** Retrieve the status of a packet and any errors that may have occurred
+ * during ingress processing (length mismatches, CRC errors, etc.).
+ * @ingroup ingress
+ *
+ * Note that packets for which @ref NETIO_PKT_ETHERTYPE_RECOGNIZED()
+ * returns zero are always reported as underlength, as there is no a priori
+ * means to determine their length.  Normally, applications should use
+ * @ref NETIO_PKT_BAD_M() instead of explicitly checking status with this
+ * function.
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return The packet's status.
+ */
+static __inline netio_pkt_status_t
+NETIO_PKT_STATUS_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+  return (netio_pkt_status_t) __NETIO_PKT_NOTIF_HEADER(pkt).bits.__status;
+}
+
+
+/** Report whether a packet is bad (i.e., was shorter than expected based on
+ *  its headers, or had a bad CRC).
+ * @ingroup ingress
+ *
+ * Note that this function does not verify L3 or L4 checksums.
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return Nonzero if the packet is bad and should be discarded.
+ */
+static __inline unsigned int
+NETIO_PKT_BAD_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+  return ((NETIO_PKT_STATUS_M(mda, pkt) & 1) &&
+          (NETIO_PKT_ETHERTYPE_RECOGNIZED_M(mda, pkt) ||
+           NETIO_PKT_STATUS_M(mda, pkt) == NETIO_PKT_STATUS_BAD));
+}
+
+
+/** Return the length of the packet, starting with the L2 (Ethernet) header.
+ * @ingroup egress
+ *
+ * @param[in] mmd Pointer to packet's minimal metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return The length of the packet, in bytes.
+ */
+static __inline netio_size_t
+NETIO_PKT_L2_LENGTH_MM(netio_pkt_minimal_metadata_t* mmd, netio_pkt_t* pkt)
+{
+  return mmd->l2_length;
+}
+
+
+/** Return the length of the L2 (Ethernet) header.
+ * @ingroup egress
+ *
+ * @param[in] mmd Pointer to packet's minimal metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return The length of the packet's L2 header, in bytes.
+ */
+static __inline netio_size_t
+NETIO_PKT_L2_HEADER_LENGTH_MM(netio_pkt_minimal_metadata_t* mmd,
+                              netio_pkt_t* pkt)
+{
+  return mmd->l3_offset - mmd->l2_offset;
+}
+
+
+/** Return the length of the packet, starting with the L3 (IP) header.
+ * @ingroup egress
+ *
+ * @param[in] mmd Pointer to packet's minimal metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return Length of the packet's L3 header and data, in bytes.
+ */
+static __inline netio_size_t
+NETIO_PKT_L3_LENGTH_MM(netio_pkt_minimal_metadata_t* mmd, netio_pkt_t* pkt)
+{
+  return (NETIO_PKT_L2_LENGTH_MM(mmd, pkt) -
+          NETIO_PKT_L2_HEADER_LENGTH_MM(mmd, pkt));
+}
+
+
+/** Return a pointer to the packet's L3 (generally, the IP) header.
+ * @ingroup egress
+ *
+ * Note that we guarantee word alignment of the L3 header.
+ *
+ * @param[in] mmd Pointer to packet's minimal metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return A pointer to the packet's L3 header.
+ */
+static __inline unsigned char*
+NETIO_PKT_L3_DATA_MM(netio_pkt_minimal_metadata_t* mmd, netio_pkt_t* pkt)
+{
+  return _NETIO_PKT_BASE(pkt) + mmd->l3_offset;
+}
+
+
+/** Return a pointer to the packet's L2 (Ethernet) header.
+ * @ingroup egress
+ *
+ * @param[in] mmd Pointer to packet's minimal metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return A pointer to start of the packet.
+ */
+static __inline unsigned char*
+NETIO_PKT_L2_DATA_MM(netio_pkt_minimal_metadata_t* mmd, netio_pkt_t* pkt)
+{
+  return _NETIO_PKT_BASE(pkt) + mmd->l2_offset;
+}
+
+
+/** Retrieve the status of a packet and any errors that may have occurred
+ * during ingress processing (length mismatches, CRC errors, etc.).
+ * @ingroup ingress
+ *
+ * Note that packets for which @ref NETIO_PKT_ETHERTYPE_RECOGNIZED()
+ * returns zero are always reported as underlength, as there is no a priori
+ * means to determine their length.  Normally, applications should use
+ * @ref NETIO_PKT_BAD() instead of explicitly checking status with this
+ * function.
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return The packet's status.
+ */
+static __inline netio_pkt_status_t
+NETIO_PKT_STATUS(netio_pkt_t* pkt)
+{
+  netio_assert(!pkt->__packet.bits.__minimal);
+
+  return (netio_pkt_status_t) __NETIO_PKT_NOTIF_HEADER(pkt).bits.__status;
+}
+
+
+/** Report whether a packet is bad (i.e., was shorter than expected based on
+ *  its headers, or had a bad CRC).
+ * @ingroup ingress
+ *
+ * Note that this function does not verify L3 or L4 checksums.
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return Nonzero if the packet is bad and should be discarded.
+ */
+static __inline unsigned int
+NETIO_PKT_BAD(netio_pkt_t* pkt)
+{
+  netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+  return NETIO_PKT_BAD_M(mda, pkt);
+}
+
+
+/** Return the length of the packet's custom header.
+ *  A custom header may or may not be present, depending upon the IPP; its
+ *  contents and alignment are also IPP-dependent.  Currently, none of the
+ *  standard IPPs supplied by Tilera produce a custom header.  If present,
+ *  the custom header precedes the L2 header in the packet buffer.
+ * @ingroup pktfuncs
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return The length of the packet's custom header, in bytes.
+ */
+static __inline netio_size_t
+NETIO_PKT_CUSTOM_HEADER_LENGTH(netio_pkt_t* pkt)
+{
+  netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+  return NETIO_PKT_CUSTOM_HEADER_LENGTH_M(mda, pkt);
+}
+
+
+/** Return the length of the packet, starting with the custom header.
+ *  A custom header may or may not be present, depending upon the IPP; its
+ *  contents and alignment are also IPP-dependent.  Currently, none of the
+ *  standard IPPs supplied by Tilera produce a custom header.  If present,
+ *  the custom header precedes the L2 header in the packet buffer.
+ * @ingroup pktfuncs
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return  The length of the packet, in bytes.
+ */
+static __inline netio_size_t
+NETIO_PKT_CUSTOM_LENGTH(netio_pkt_t* pkt)
+{
+  netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+  return NETIO_PKT_CUSTOM_LENGTH_M(mda, pkt);
+}
+
+
+/** Return a pointer to the packet's custom header.
+ *  A custom header may or may not be present, depending upon the IPP; its
+ *  contents and alignment are also IPP-dependent.  Currently, none of the
+ *  standard IPPs supplied by Tilera produce a custom header.  If present,
+ *  the custom header precedes the L2 header in the packet buffer.
+ * @ingroup pktfuncs
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return A pointer to start of the packet.
+ */
+static __inline unsigned char*
+NETIO_PKT_CUSTOM_DATA(netio_pkt_t* pkt)
+{
+  netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+  return NETIO_PKT_CUSTOM_DATA_M(mda, pkt);
+}
+
+
+/** Return the length of the packet's L2 (Ethernet plus VLAN or SNAP) header.
+ * @ingroup pktfuncs
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return The length of the packet's L2 header, in bytes.
+ */
+static __inline netio_size_t
+NETIO_PKT_L2_HEADER_LENGTH(netio_pkt_t* pkt)
+{
+  if (NETIO_PKT_IS_MINIMAL(pkt))
+  {
+    netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt);
+
+    return NETIO_PKT_L2_HEADER_LENGTH_MM(mmd, pkt);
+  }
+  else
+  {
+    netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+    return NETIO_PKT_L2_HEADER_LENGTH_M(mda, pkt);
+  }
+}
+
+
+/** Return the length of the packet, starting with the L2 (Ethernet) header.
+ * @ingroup pktfuncs
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return  The length of the packet, in bytes.
+ */
+static __inline netio_size_t
+NETIO_PKT_L2_LENGTH(netio_pkt_t* pkt)
+{
+  if (NETIO_PKT_IS_MINIMAL(pkt))
+  {
+    netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt);
+
+    return NETIO_PKT_L2_LENGTH_MM(mmd, pkt);
+  }
+  else
+  {
+    netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+    return NETIO_PKT_L2_LENGTH_M(mda, pkt);
+  }
+}
+
+
+/** Return a pointer to the packet's L2 (Ethernet) header.
+ * @ingroup pktfuncs
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return A pointer to start of the packet.
+ */
+static __inline unsigned char*
+NETIO_PKT_L2_DATA(netio_pkt_t* pkt)
+{
+  if (NETIO_PKT_IS_MINIMAL(pkt))
+  {
+    netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt);
+
+    return NETIO_PKT_L2_DATA_MM(mmd, pkt);
+  }
+  else
+  {
+    netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+    return NETIO_PKT_L2_DATA_M(mda, pkt);
+  }
+}
+
+
+/** Retrieve the length of the packet, starting with the L3 (generally, the IP)
+ * header.
+ * @ingroup pktfuncs
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return Length of the packet's L3 header and data, in bytes.
+ */
+static __inline netio_size_t
+NETIO_PKT_L3_LENGTH(netio_pkt_t* pkt)
+{
+  if (NETIO_PKT_IS_MINIMAL(pkt))
+  {
+    netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt);
+
+    return NETIO_PKT_L3_LENGTH_MM(mmd, pkt);
+  }
+  else
+  {
+    netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+    return NETIO_PKT_L3_LENGTH_M(mda, pkt);
+  }
+}
+
+
+/** Return a pointer to the packet's L3 (generally, the IP) header.
+ * @ingroup pktfuncs
+ *
+ * Note that we guarantee word alignment of the L3 header.
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return A pointer to the packet's L3 header.
+ */
+static __inline unsigned char*
+NETIO_PKT_L3_DATA(netio_pkt_t* pkt)
+{
+  if (NETIO_PKT_IS_MINIMAL(pkt))
+  {
+    netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt);
+
+    return NETIO_PKT_L3_DATA_MM(mmd, pkt);
+  }
+  else
+  {
+    netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+    return NETIO_PKT_L3_DATA_M(mda, pkt);
+  }
+}
+
+
+/** Return the ordinal of the packet.
+ * @ingroup ingress
+ *
+ * Each packet is given an ordinal number when it is delivered by the IPP.
+ * In the medium term, the ordinal is unique and monotonically increasing,
+ * being incremented by 1 for each packet; the ordinal of the first packet
+ * delivered after the IPP starts is zero.  (Since the ordinal is of finite
+ * size, given enough input packets, it will eventually wrap around to zero;
+ * in the long term, therefore, ordinals are not unique.)  The ordinals
+ * handed out by different IPPs are not disjoint, so two packets from
+ * different IPPs may have identical ordinals.  Packets dropped by the
+ * IPP or by the I/O shim are not assigned ordinals.
+ *
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return The packet's per-IPP packet ordinal.
+ */
+static __inline unsigned int
+NETIO_PKT_ORDINAL(netio_pkt_t* pkt)
+{
+  netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+  return NETIO_PKT_ORDINAL_M(mda, pkt);
+}
+
+
+/** Return the per-group ordinal of the packet.
+ * @ingroup ingress
+ *
+ * Each packet is given a per-group ordinal number when it is
+ * delivered by the IPP. By default, the group is the packet's VLAN,
+ * although IPP can be recompiled to use different values.  In
+ * the medium term, the ordinal is unique and monotonically
+ * increasing, being incremented by 1 for each packet; the ordinal of
+ * the first packet distributed to a particular group is zero.
+ * (Since the ordinal is of finite size, given enough input packets,
+ * it will eventually wrap around to zero; in the long term,
+ * therefore, ordinals are not unique.)  The ordinals handed out by
+ * different IPPs are not disjoint, so two packets from different IPPs
+ * may have identical ordinals; similarly, packets distributed to
+ * different groups may have identical ordinals.  Packets dropped by
+ * the IPP or by the I/O shim are not assigned ordinals.
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return The packet's per-IPP, per-group ordinal.
+ */
+static __inline unsigned int
+NETIO_PKT_GROUP_ORDINAL(netio_pkt_t* pkt)
+{
+  netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+  return NETIO_PKT_GROUP_ORDINAL_M(mda, pkt);
+}
+
+
+/** Return the VLAN ID assigned to the packet.
+ * @ingroup ingress
+ *
+ * This is usually also contained within the packet header.  If the packet
+ * does not have a VLAN tag, the VLAN ID returned by this function is zero.
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return The packet's VLAN ID.
+ */
+static __inline unsigned short
+NETIO_PKT_VLAN_ID(netio_pkt_t* pkt)
+{
+  netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+  return NETIO_PKT_VLAN_ID_M(mda, pkt);
+}
+
+
+/** Return the ethertype of the packet.
+ * @ingroup ingress
+ *
+ * This value is reliable if @ref NETIO_PKT_ETHERTYPE_RECOGNIZED()
+ * returns true, and otherwise, may not be well defined.
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return The packet's ethertype.
+ */
+static __inline unsigned short
+NETIO_PKT_ETHERTYPE(netio_pkt_t* pkt)
+{
+  netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+  return NETIO_PKT_ETHERTYPE_M(mda, pkt);
+}
+
+
+/** Return the flow hash computed on the packet.
+ * @ingroup ingress
+ *
+ * For TCP and UDP packets, this hash is calculated by hashing together
+ * the "5-tuple" values, specifically the source IP address, destination
+ * IP address, protocol type, source port and destination port.
+ * The hash value is intended to be helpful for millions of distinct
+ * flows.
+ *
+ * For IPv4 or IPv6 packets which are neither TCP nor UDP, the flow hash is
+ * derived by hashing together the source and destination IP addresses.
+ *
+ * For MPLS-encapsulated packets, the flow hash is derived by hashing
+ * the first MPLS label.
+ *
+ * For all other packets the flow hash is computed from the source
+ * and destination Ethernet addresses.
+ *
+ * The hash is symmetric, meaning it produces the same value if the
+ * source and destination are swapped. The only exceptions are
+ * tunneling protocols 0x04 (IP in IP Encapsulation), 0x29 (Simple
+ * Internet Protocol), 0x2F (General Routing Encapsulation) and 0x32
+ * (Encap Security Payload), which use only the destination address
+ * since the source address is not meaningful.
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return The packet's 32-bit flow hash.
+ */
+static __inline unsigned int
+NETIO_PKT_FLOW_HASH(netio_pkt_t* pkt)
+{
+  netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+  return NETIO_PKT_FLOW_HASH_M(mda, pkt);
+}
+
+
+/** Return the first word of "user data" for the packet.
+ *
+ * The contents of the user data words depend on the IPP.
+ *
+ * When using the standard ipp1, ipp2, or ipp4 sub-drivers, the first
+ * word of user data contains the least significant bits of the 64-bit
+ * arrival cycle count (see @c get_cycle_count_low()).
+ *
+ * See the <em>System Programmer's Guide</em> for details.
+ *
+ * @ingroup ingress
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return The packet's first word of "user data".
+ */
+static __inline unsigned int
+NETIO_PKT_USER_DATA_0(netio_pkt_t* pkt)
+{
+  netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+  return NETIO_PKT_USER_DATA_0_M(mda, pkt);
+}
+
+
+/** Return the second word of "user data" for the packet.
+ *
+ * The contents of the user data words depend on the IPP.
+ *
+ * When using the standard ipp1, ipp2, or ipp4 sub-drivers, the second
+ * word of user data contains the most significant bits of the 64-bit
+ * arrival cycle count (see @c get_cycle_count_high()).
+ *
+ * See the <em>System Programmer's Guide</em> for details.
+ *
+ * @ingroup ingress
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return The packet's second word of "user data".
+ */
+static __inline unsigned int
+NETIO_PKT_USER_DATA_1(netio_pkt_t* pkt)
+{
+  netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+  return NETIO_PKT_USER_DATA_1_M(mda, pkt);
+}
+
+
+/** Determine whether the L4 (TCP/UDP) checksum was calculated.
+ * @ingroup ingress
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return Nonzero if the L4 checksum was calculated.
+ */
+static __inline unsigned int
+NETIO_PKT_L4_CSUM_CALCULATED(netio_pkt_t* pkt)
+{
+  netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+  return NETIO_PKT_L4_CSUM_CALCULATED_M(mda, pkt);
+}
+
+
+/** Determine whether the L4 (TCP/UDP) checksum was calculated and found to
+ *  be correct.
+ * @ingroup ingress
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return Nonzero if the checksum was calculated and is correct.
+ */
+static __inline unsigned int
+NETIO_PKT_L4_CSUM_CORRECT(netio_pkt_t* pkt)
+{
+  netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+  return NETIO_PKT_L4_CSUM_CORRECT_M(mda, pkt);
+}
+
+
+/** Determine whether the L3 (IP) checksum was calculated.
+ * @ingroup ingress
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return Nonzero if the L3 (IP) checksum was calculated.
+*/
+static __inline unsigned int
+NETIO_PKT_L3_CSUM_CALCULATED(netio_pkt_t* pkt)
+{
+  netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+  return NETIO_PKT_L3_CSUM_CALCULATED_M(mda, pkt);
+}
+
+
+/** Determine whether the L3 (IP) checksum was calculated and found to be
+ *  correct.
+ * @ingroup ingress
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return Nonzero if the checksum was calculated and is correct.
+ */
+static __inline unsigned int
+NETIO_PKT_L3_CSUM_CORRECT(netio_pkt_t* pkt)
+{
+  netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+  return NETIO_PKT_L3_CSUM_CORRECT_M(mda, pkt);
+}
+
+
+/** Determine whether the Ethertype was recognized and L3 packet data was
+ *  processed.
+ * @ingroup ingress
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return Nonzero if the Ethertype was recognized and L3 packet data was
+ *   processed.
+ */
+static __inline unsigned int
+NETIO_PKT_ETHERTYPE_RECOGNIZED(netio_pkt_t* pkt)
+{
+  netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+  return NETIO_PKT_ETHERTYPE_RECOGNIZED_M(mda, pkt);
+}
+
+
+/** Set an egress packet's L2 length, using a metadata pointer to speed the
+ * computation.
+ * @ingroup egress
+ *
+ * @param[in,out] mmd Pointer to packet's minimal metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @param[in] len Packet L2 length, in bytes.
+ */
+static __inline void
+NETIO_PKT_SET_L2_LENGTH_MM(netio_pkt_minimal_metadata_t* mmd, netio_pkt_t* pkt,
+                           int len)
+{
+  mmd->l2_length = len;
+}
+
+
+/** Set an egress packet's L2 length.
+ * @ingroup egress
+ *
+ * @param[in,out] pkt Packet on which to operate.
+ * @param[in] len Packet L2 length, in bytes.
+ */
+static __inline void
+NETIO_PKT_SET_L2_LENGTH(netio_pkt_t* pkt, int len)
+{
+  netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt);
+
+  NETIO_PKT_SET_L2_LENGTH_MM(mmd, pkt, len);
+}
+
+
+/** Set an egress packet's L2 header length, using a metadata pointer to
+ *  speed the computation.
+ * @ingroup egress
+ *
+ * It is not normally necessary to call this routine; only the L2 length,
+ * not the header length, is needed to transmit a packet.  It may be useful if
+ * the egress packet will later be processed by code which expects to use
+ * functions like @ref NETIO_PKT_L3_DATA() to get a pointer to the L3 payload.
+ *
+ * @param[in,out] mmd Pointer to packet's minimal metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @param[in] len Packet L2 header length, in bytes.
+ */
+static __inline void
+NETIO_PKT_SET_L2_HEADER_LENGTH_MM(netio_pkt_minimal_metadata_t* mmd,
+                                  netio_pkt_t* pkt, int len)
+{
+  mmd->l3_offset = mmd->l2_offset + len;
+}
+
+
+/** Set an egress packet's L2 header length.
+ * @ingroup egress
+ *
+ * It is not normally necessary to call this routine; only the L2 length,
+ * not the header length, is needed to transmit a packet.  It may be useful if
+ * the egress packet will later be processed by code which expects to use
+ * functions like @ref NETIO_PKT_L3_DATA() to get a pointer to the L3 payload.
+ *
+ * @param[in,out] pkt Packet on which to operate.
+ * @param[in] len Packet L2 header length, in bytes.
+ */
+static __inline void
+NETIO_PKT_SET_L2_HEADER_LENGTH(netio_pkt_t* pkt, int len)
+{
+  netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt);
+
+  NETIO_PKT_SET_L2_HEADER_LENGTH_MM(mmd, pkt, len);
+}
+
+
+/** Set up an egress packet for hardware checksum computation, using a
+ *  metadata pointer to speed the operation.
+ * @ingroup egress
+ *
+ *  NetIO provides the ability to automatically calculate a standard
+ *  16-bit Internet checksum on transmitted packets.  The application
+ *  may specify the point in the packet where the checksum starts, the
+ *  number of bytes to be checksummed, and the two bytes in the packet
+ *  which will be replaced with the completed checksum.  (If the range
+ *  of bytes to be checksummed includes the bytes to be replaced, the
+ *  initial values of those bytes will be included in the checksum.)
+ *
+ *  For some protocols, the packet checksum covers data which is not present
+ *  in the packet, or is at least not contiguous to the main data payload.
+ *  For instance, the TCP checksum includes a "pseudo-header" which includes
+ *  the source and destination IP addresses of the packet.  To accommodate
+ *  this, the checksum engine may be "seeded" with an initial value, which
+ *  the application would need to compute based on the specific protocol's
+ *  requirements.  Note that the seed is given in host byte order (little-
+ *  endian), not network byte order (big-endian); code written to compute a
+ *  pseudo-header checksum in network byte order will need to byte-swap it
+ *  before use as the seed.
+ *
+ *  Note that the checksum is computed as part of the transmission process,
+ *  so it will not be present in the packet upon completion of this routine.
+ *
+ * @param[in,out] mmd Pointer to packet's minimal metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @param[in] start Offset within L2 packet of the first byte to include in
+ *   the checksum.
+ * @param[in] length Number of bytes to include in the checksum.
+ *   the checksum.
+ * @param[in] location Offset within L2 packet of the first of the two bytes
+ *   to be replaced with the calculated checksum.
+ * @param[in] seed Initial value of the running checksum before any of the
+ *   packet data is added.
+ */
+static __inline void
+NETIO_PKT_DO_EGRESS_CSUM_MM(netio_pkt_minimal_metadata_t* mmd,
+                            netio_pkt_t* pkt, int start, int length,
+                            int location, uint16_t seed)
+{
+  mmd->csum_start = start;
+  mmd->csum_length = length;
+  mmd->csum_location = location;
+  mmd->csum_seed = seed;
+  mmd->flags |= _NETIO_PKT_NEED_EDMA_CSUM_MASK;
+}
+
+
+/** Set up an egress packet for hardware checksum computation.
+ * @ingroup egress
+ *
+ *  NetIO provides the ability to automatically calculate a standard
+ *  16-bit Internet checksum on transmitted packets.  The application
+ *  may specify the point in the packet where the checksum starts, the
+ *  number of bytes to be checksummed, and the two bytes in the packet
+ *  which will be replaced with the completed checksum.  (If the range
+ *  of bytes to be checksummed includes the bytes to be replaced, the
+ *  initial values of those bytes will be included in the checksum.)
+ *
+ *  For some protocols, the packet checksum covers data which is not present
+ *  in the packet, or is at least not contiguous to the main data payload.
+ *  For instance, the TCP checksum includes a "pseudo-header" which includes
+ *  the source and destination IP addresses of the packet.  To accommodate
+ *  this, the checksum engine may be "seeded" with an initial value, which
+ *  the application would need to compute based on the specific protocol's
+ *  requirements.  Note that the seed is given in host byte order (little-
+ *  endian), not network byte order (big-endian); code written to compute a
+ *  pseudo-header checksum in network byte order will need to byte-swap it
+ *  before use as the seed.
+ *
+ *  Note that the checksum is computed as part of the transmission process,
+ *  so it will not be present in the packet upon completion of this routine.
+ *
+ * @param[in,out] pkt Packet on which to operate.
+ * @param[in] start Offset within L2 packet of the first byte to include in
+ *   the checksum.
+ * @param[in] length Number of bytes to include in the checksum.
+ *   the checksum.
+ * @param[in] location Offset within L2 packet of the first of the two bytes
+ *   to be replaced with the calculated checksum.
+ * @param[in] seed Initial value of the running checksum before any of the
+ *   packet data is added.
+ */
+static __inline void
+NETIO_PKT_DO_EGRESS_CSUM(netio_pkt_t* pkt, int start, int length,
+                         int location, uint16_t seed)
+{
+  netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt);
+
+  NETIO_PKT_DO_EGRESS_CSUM_MM(mmd, pkt, start, length, location, seed);
+}
+
+
+/** Return the number of bytes which could be prepended to a packet, using a
+ *  metadata pointer to speed the operation.
+ *  See @ref netio_populate_prepend_buffer() to get a full description of
+ *  prepending.
+ *
+ * @param[in,out] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ */
+static __inline int
+NETIO_PKT_PREPEND_AVAIL_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+  return (pkt->__packet.bits.__offset << 6) +
+         NETIO_PKT_CUSTOM_HEADER_LENGTH_M(mda, pkt);
+}
+
+
+/** Return the number of bytes which could be prepended to a packet, using a
+ *  metadata pointer to speed the operation.
+ *  See @ref netio_populate_prepend_buffer() to get a full description of
+ *  prepending.
+ * @ingroup egress
+ *
+ * @param[in,out] mmd Pointer to packet's minimal metadata.
+ * @param[in] pkt Packet on which to operate.
+ */
+static __inline int
+NETIO_PKT_PREPEND_AVAIL_MM(netio_pkt_minimal_metadata_t* mmd, netio_pkt_t* pkt)
+{
+  return (pkt->__packet.bits.__offset << 6) + mmd->l2_offset;
+}
+
+
+/** Return the number of bytes which could be prepended to a packet.
+ *  See @ref netio_populate_prepend_buffer() to get a full description of
+ *  prepending.
+ * @ingroup egress
+ *
+ * @param[in] pkt Packet on which to operate.
+ */
+static __inline int
+NETIO_PKT_PREPEND_AVAIL(netio_pkt_t* pkt)
+{
+  if (NETIO_PKT_IS_MINIMAL(pkt))
+  {
+    netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt);
+
+    return NETIO_PKT_PREPEND_AVAIL_MM(mmd, pkt);
+  }
+  else
+  {
+    netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+    return NETIO_PKT_PREPEND_AVAIL_M(mda, pkt);
+  }
+}
+
+
+/** Flush a packet's minimal metadata from the cache, using a metadata pointer
+ *  to speed the operation.
+ * @ingroup egress
+ *
+ * @param[in] mmd Pointer to packet's minimal metadata.
+ * @param[in] pkt Packet on which to operate.
+ */
+static __inline void
+NETIO_PKT_FLUSH_MINIMAL_METADATA_MM(netio_pkt_minimal_metadata_t* mmd,
+                                    netio_pkt_t* pkt)
+{
+}
+
+
+/** Invalidate a packet's minimal metadata from the cache, using a metadata
+ *  pointer to speed the operation.
+ * @ingroup egress
+ *
+ * @param[in] mmd Pointer to packet's minimal metadata.
+ * @param[in] pkt Packet on which to operate.
+ */
+static __inline void
+NETIO_PKT_INV_MINIMAL_METADATA_MM(netio_pkt_minimal_metadata_t* mmd,
+                                  netio_pkt_t* pkt)
+{
+}
+
+
+/** Flush and then invalidate a packet's minimal metadata from the cache,
+ *  using a metadata pointer to speed the operation.
+ * @ingroup egress
+ *
+ * @param[in] mmd Pointer to packet's minimal metadata.
+ * @param[in] pkt Packet on which to operate.
+ */
+static __inline void
+NETIO_PKT_FLUSH_INV_MINIMAL_METADATA_MM(netio_pkt_minimal_metadata_t* mmd,
+                                        netio_pkt_t* pkt)
+{
+}
+
+
+/** Flush a packet's metadata from the cache, using a metadata pointer
+ *  to speed the operation.
+ * @ingroup ingress
+ *
+ * @param[in] mda Pointer to packet's minimal metadata.
+ * @param[in] pkt Packet on which to operate.
+ */
+static __inline void
+NETIO_PKT_FLUSH_METADATA_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+}
+
+
+/** Invalidate a packet's metadata from the cache, using a metadata
+ *  pointer to speed the operation.
+ * @ingroup ingress
+ *
+ * @param[in] mda Pointer to packet's metadata.
+ * @param[in] pkt Packet on which to operate.
+ */
+static __inline void
+NETIO_PKT_INV_METADATA_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+}
+
+
+/** Flush and then invalidate a packet's metadata from the cache,
+ *  using a metadata pointer to speed the operation.
+ * @ingroup ingress
+ *
+ * @param[in] mda Pointer to packet's metadata.
+ * @param[in] pkt Packet on which to operate.
+ */
+static __inline void
+NETIO_PKT_FLUSH_INV_METADATA_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+}
+
+
+/** Flush a packet's minimal metadata from the cache.
+ * @ingroup egress
+ *
+ * @param[in] pkt Packet on which to operate.
+ */
+static __inline void
+NETIO_PKT_FLUSH_MINIMAL_METADATA(netio_pkt_t* pkt)
+{
+}
+
+
+/** Invalidate a packet's minimal metadata from the cache.
+ * @ingroup egress
+ *
+ * @param[in] pkt Packet on which to operate.
+ */
+static __inline void
+NETIO_PKT_INV_MINIMAL_METADATA(netio_pkt_t* pkt)
+{
+}
+
+
+/** Flush and then invalidate a packet's minimal metadata from the cache.
+ * @ingroup egress
+ *
+ * @param[in] pkt Packet on which to operate.
+ */
+static __inline void
+NETIO_PKT_FLUSH_INV_MINIMAL_METADATA(netio_pkt_t* pkt)
+{
+}
+
+
+/** Flush a packet's metadata from the cache.
+ * @ingroup ingress
+ *
+ * @param[in] pkt Packet on which to operate.
+ */
+static __inline void
+NETIO_PKT_FLUSH_METADATA(netio_pkt_t* pkt)
+{
+}
+
+
+/** Invalidate a packet's metadata from the cache.
+ * @ingroup ingress
+ *
+ * @param[in] pkt Packet on which to operate.
+ */
+static __inline void
+NETIO_PKT_INV_METADATA(netio_pkt_t* pkt)
+{
+}
+
+
+/** Flush and then invalidate a packet's metadata from the cache.
+ * @ingroup ingress
+ *
+ * @param[in] pkt Packet on which to operate.
+ */
+static __inline void
+NETIO_PKT_FLUSH_INV_METADATA(netio_pkt_t* pkt)
+{
+}
+
+/** Number of NUMA nodes we can distribute buffers to.
+ * @ingroup setup */
+#define NETIO_NUM_NODE_WEIGHTS  16
+
+/**
+ * @brief An object for specifying the characteristics of NetIO communication
+ * endpoint.
+ *
+ * @ingroup setup
+ *
+ * The @ref netio_input_register() function uses this structure to define
+ * how an application tile will communicate with an IPP.
+ *
+ *
+ * Future updates to NetIO may add new members to this structure,
+ * which can affect the success of the registration operation.  Thus,
+ * if dynamically initializing the structure, applications are urged to
+ * zero it out first, for example:
+ *
+ * @code
+ * netio_input_config_t config;
+ * memset(&config, 0, sizeof (config));
+ * config.flags = NETIO_RECV | NETIO_XMIT_CSUM | NETIO_TAG_NONE;
+ * config.num_receive_packets = NETIO_MAX_RECEIVE_PKTS;
+ * config.queue_id = 0;
+ *     .
+ *     .
+ *     .
+ * @endcode
+ *
+ * since that guarantees that any unused structure members, including
+ * members which did not exist when the application was first developed,
+ * will not have unexpected values.
+ *
+ * If statically initializing the structure, we strongly recommend use of
+ * C99-style named initializers, for example:
+ *
+ * @code
+ * netio_input_config_t config = {
+ *    .flags = NETIO_RECV | NETIO_XMIT_CSUM | NETIO_TAG_NONE,
+ *    .num_receive_packets = NETIO_MAX_RECEIVE_PKTS,
+ *    .queue_id = 0,
+ * },
+ * @endcode
+ *
+ * instead of the old-style structure initialization:
+ *
+ * @code
+ * // Bad example! Currently equivalent to the above, but don't do this.
+ * netio_input_config_t config = {
+ *    NETIO_RECV | NETIO_XMIT_CSUM | NETIO_TAG_NONE, NETIO_MAX_RECEIVE_PKTS, 0
+ * },
+ * @endcode
+ *
+ * since the C99 style requires no changes to the code if elements of the
+ * config structure are rearranged.  (It also makes the initialization much
+ * easier to understand.)
+ *
+ * Except for items which address a particular tile's transmit or receive
+ * characteristics, such as the ::NETIO_RECV flag, applications are advised
+ * to specify the same set of configuration data on all registrations.
+ * This prevents differing results if multiple tiles happen to do their
+ * registration operations in a different order on different invocations of
+ * the application.  This is particularly important for things like link
+ * management flags, and buffer size and homing specifications.
+ *
+ * Unless the ::NETIO_FIXED_BUFFER_VA flag is specified in flags, the NetIO
+ * buffer pool is automatically created and mapped into the application's
+ * virtual address space at an address chosen by the operating system,
+ * using the common memory (cmem) facility in the Tilera Multicore
+ * Components library.  The cmem facility allows multiple processes to gain
+ * access to shared memory which is mapped into each process at an
+ * identical virtual address.  In order for this to work, the processes
+ * must have a common ancestor, which must create the common memory using
+ * tmc_cmem_init().
+ *
+ * In programs using the iLib process creation API, or in programs which use
+ * only one process (which include programs using the pthreads library),
+ * tmc_cmem_init() is called automatically.  All other applications
+ * must call it explicitly, before any child processes which might call
+ * netio_input_register() are created.
+ */
+typedef struct
+{
+  /** Registration characteristics.
+
+      This value determines several characteristics of the registration;
+      flags for different types of behavior are ORed together to make the
+      final flag value.  Generally applications should specify exactly
+      one flag from each of the following categories:
+
+      - Whether the application will be receiving packets on this queue
+        (::NETIO_RECV or ::NETIO_NO_RECV).
+
+      - Whether the application will be transmitting packets on this queue,
+        and if so, whether it will request egress checksum calculation
+        (::NETIO_XMIT, ::NETIO_XMIT_CSUM, or ::NETIO_NO_XMIT).  It is
+        legal to call netio_get_buffer() without one of the XMIT flags,
+        as long as ::NETIO_RECV is specified; in this case, the retrieved
+        buffers must be passed to another tile for transmission.
+
+      - Whether the application expects any vendor-specific tags in
+        its packets' L2 headers (::NETIO_TAG_NONE, ::NETIO_TAG_BRCM,
+        or ::NETIO_TAG_MRVL).  This must match the configuration of the
+        target IPP.
+
+      To accommodate applications written to previous versions of the NetIO
+      interface, none of the flags above are currently required; if omitted,
+      NetIO behaves more or less as if ::NETIO_RECV | ::NETIO_XMIT_CSUM |
+      ::NETIO_TAG_NONE were used.  However, explicit specification of
+      the relevant flags allows NetIO to do a better job of resource
+      allocation, allows earlier detection of certain configuration errors,
+      and may enable advanced features or higher performance in the future,
+      so their use is strongly recommended.
+
+      Note that specifying ::NETIO_NO_RECV along with ::NETIO_NO_XMIT
+      is a special case, intended primarily for use by programs which
+      retrieve network statistics or do link management operations.
+      When these flags are both specified, the resulting queue may not
+      be used with NetIO routines other than netio_get(), netio_set(),
+      and netio_input_unregister().  See @ref link for more information
+      on link management.
+
+      Other flags are optional; their use is described below.
+  */
+  int flags;
+
+  /** Interface name.  This is a string which identifies the specific
+      Ethernet controller hardware to be used.  The format of the string
+      is a device type and a device index, separated by a slash; so,
+      the first 10 Gigabit Ethernet controller is named "xgbe/0", while
+      the second 10/100/1000 Megabit Ethernet controller is named "gbe/1".
+   */
+  const char* interface;
+
+  /** Receive packet queue size.  This specifies the maximum number
+      of ingress packets that can be received on this queue without
+      being retrieved by @ref netio_get_packet().  If the IPP's distribution
+      algorithm calls for a packet to be sent to this queue, and this
+      number of packets are already pending there, the new packet
+      will either be discarded, or sent to another tile registered
+      for the same queue_id (see @ref drops).  This value must
+      be at least ::NETIO_MIN_RECEIVE_PKTS, can always be at least
+      ::NETIO_MAX_RECEIVE_PKTS, and may be larger than that on certain
+      interfaces.
+   */
+  int num_receive_packets;
+
+  /** The queue ID being requested.  Legal values for this range from 0
+      to ::NETIO_MAX_QUEUE_ID, inclusive.  ::NETIO_MAX_QUEUE_ID is always
+      greater than or equal to the number of tiles; this allows one queue
+      for each tile, plus at least one additional queue.  Some applications
+      may wish to use the additional queue as a destination for unwanted
+      packets, since packets delivered to queues for which no tiles have
+      registered are discarded.
+   */
+  unsigned int queue_id;
+
+  /** Maximum number of small send buffers to be held in the local empty
+      buffer cache.  This specifies the size of the area which holds
+      empty small egress buffers requested from the IPP but not yet
+      retrieved via @ref netio_get_buffer().  This value must be greater
+      than zero if the application will ever use @ref netio_get_buffer()
+      to allocate empty small egress buffers; it may be no larger than
+      ::NETIO_MAX_SEND_BUFFERS.  See @ref epp for more details on empty
+      buffer caching.
+   */
+  int num_send_buffers_small_total;
+
+  /** Number of small send buffers to be preallocated at registration.
+      If this value is nonzero, the specified number of empty small egress
+      buffers will be requested from the IPP during the netio_input_register
+      operation; this may speed the execution of @ref netio_get_buffer().
+      This may be no larger than @ref num_send_buffers_small_total.  See @ref
+      epp for more details on empty buffer caching.
+   */
+  int num_send_buffers_small_prealloc;
+
+  /** Maximum number of large send buffers to be held in the local empty
+      buffer cache.  This specifies the size of the area which holds empty
+      large egress buffers requested from the IPP but not yet retrieved via
+      @ref netio_get_buffer().  This value must be greater than zero if the
+      application will ever use @ref netio_get_buffer() to allocate empty
+      large egress buffers; it may be no larger than ::NETIO_MAX_SEND_BUFFERS.
+      See @ref epp for more details on empty buffer caching.
+   */
+  int num_send_buffers_large_total;
+
+  /** Number of large send buffers to be preallocated at registration.
+      If this value is nonzero, the specified number of empty large egress
+      buffers will be requested from the IPP during the netio_input_register
+      operation; this may speed the execution of @ref netio_get_buffer().
+      This may be no larger than @ref num_send_buffers_large_total.  See @ref
+      epp for more details on empty buffer caching.
+   */
+  int num_send_buffers_large_prealloc;
+
+  /** Maximum number of jumbo send buffers to be held in the local empty
+      buffer cache.  This specifies the size of the area which holds empty
+      jumbo egress buffers requested from the IPP but not yet retrieved via
+      @ref netio_get_buffer().  This value must be greater than zero if the
+      application will ever use @ref netio_get_buffer() to allocate empty
+      jumbo egress buffers; it may be no larger than ::NETIO_MAX_SEND_BUFFERS.
+      See @ref epp for more details on empty buffer caching.
+   */
+  int num_send_buffers_jumbo_total;
+
+  /** Number of jumbo send buffers to be preallocated at registration.
+      If this value is nonzero, the specified number of empty jumbo egress
+      buffers will be requested from the IPP during the netio_input_register
+      operation; this may speed the execution of @ref netio_get_buffer().
+      This may be no larger than @ref num_send_buffers_jumbo_total.  See @ref
+      epp for more details on empty buffer caching.
+   */
+  int num_send_buffers_jumbo_prealloc;
+
+  /** Total packet buffer size.  This determines the total size, in bytes,
+      of the NetIO buffer pool.  Note that the maximum number of available
+      buffers of each size is determined during hypervisor configuration
+      (see the <em>System Programmer's Guide</em> for details); this just
+      influences how much host memory is allocated for those buffers.
+
+      The buffer pool is allocated from common memory, which will be
+      automatically initialized if needed.  If your buffer pool is larger
+      than 240 MB, you might need to explicitly call @c tmc_cmem_init(),
+      as described in the Application Libraries Reference Manual (UG227).
+
+      Packet buffers are currently allocated in chunks of 16 MB; this
+      value will be rounded up to the next larger multiple of 16 MB.
+      If this value is zero, a default of 32 MB will be used; this was
+      the value used by previous versions of NetIO.  Note that taking this
+      default also affects the placement of buffers on Linux NUMA nodes.
+      See @ref buffer_node_weights for an explanation of buffer placement.
+
+      In order to successfully allocate packet buffers, Linux must have
+      available huge pages on the relevant Linux NUMA nodes.  See the
+      <em>System Programmer's Guide</em> for information on configuring
+      huge page support in Linux.
+   */
+  uint64_t total_buffer_size;
+
+  /** Buffer placement weighting factors.
+
+      This array specifies the relative amount of buffering to place
+      on each of the available Linux NUMA nodes.  This array is
+      indexed by the NUMA node, and the values in the array are
+      proportional to the amount of buffer space to allocate on that
+      node.
+
+      If memory striping is enabled in the Hypervisor, then there is
+      only one logical NUMA node (node 0). In that case, NetIO will by
+      default ignore the suggested buffer node weights, and buffers
+      will be striped across the physical memory controllers. See
+      UG209 System Programmer's Guide for a description of the
+      hypervisor option that controls memory striping.
+
+      If memory striping is disabled, then there are up to four NUMA
+      nodes, corresponding to the four DDRAM controllers in the TILE
+      processor architecture.  See UG100 Tile Processor Architecture
+      Overview for a diagram showing the location of each of the DDRAM
+      controllers relative to the tile array.
+
+      For instance, if memory striping is disabled, the following
+      configuration strucure:
+
+      @code
+      netio_input_config_t config = {
+            .
+            .
+            .
+        .total_buffer_size = 4 * 16 * 1024 * 1024;
+        .buffer_node_weights = { 1, 0, 1, 0 },
+      },
+      @endcode
+
+      would result in 32 MB of buffers being placed on controller 0, and
+      32 MB on controller 2.  (Since buffers are allocated in units of
+      16 MB, some sets of weights will not be able to be matched exactly.)
+
+      For the weights to be effective, @ref total_buffer_size must be
+      nonzero.  If @ref total_buffer_size is zero, causing the default
+      32 MB of buffer space to be used, then any specified weights will
+      be ignored, and buffers will positioned as they were in previous
+      versions of NetIO:
+
+      - For xgbe/0 and gbe/0, 16 MB of buffers will be placed on controller 1,
+        and the other 16 MB will be placed on controller 2.
+
+      - For xgbe/1 and gbe/1, 16 MB of buffers will be placed on controller 2,
+        and the other 16 MB will be placed on controller 3.
+
+      If @ref total_buffer_size is nonzero, but all weights are zero,
+      then all buffer space will be allocated on Linux NUMA node zero.
+
+      By default, the specified buffer placement is treated as a hint;
+      if sufficient free memory is not available on the specified
+      controllers, the buffers will be allocated elsewhere.  However,
+      if the ::NETIO_STRICT_HOMING flag is specified in @ref flags, then a
+      failure to allocate buffer space exactly as requested will cause the
+      registration operation to fail with an error of ::NETIO_CANNOT_HOME.
+
+      Note that maximal network performance cannot be achieved with
+      only one memory controller.
+   */
+  uint8_t buffer_node_weights[NETIO_NUM_NODE_WEIGHTS];
+
+  /** Fixed virtual address for packet buffers.  Only valid when
+      ::NETIO_FIXED_BUFFER_VA is specified in @ref flags; see the
+      description of that flag for details.
+   */
+  void* fixed_buffer_va;
+
+  /**
+      Maximum number of outstanding send packet requests.  This value is
+      only relevant when an EPP is in use; it determines the number of
+      slots in the EPP's outgoing packet queue which this tile is allowed
+      to consume, and thus the number of packets which may be sent before
+      the sending tile must wait for an acknowledgment from the EPP.
+      Modifying this value is generally only helpful when using @ref
+      netio_send_packet_vector(), where it can help improve performance by
+      allowing a single vector send operation to process more packets.
+      Typically it is not specified, and the default, which divides the
+      outgoing packet slots evenly between all tiles on the chip, is used.
+
+      If a registration asks for more outgoing packet queue slots than are
+      available, ::NETIO_TOOMANY_XMIT will be returned.  The total number
+      of packet queue slots which are available for all tiles for each EPP
+      is subject to change, but is currently ::NETIO_TOTAL_SENDS_OUTSTANDING.
+
+
+      This value is ignored if ::NETIO_XMIT is not specified in flags.
+      If you want to specify a large value here for a specific tile, you are
+      advised to specify NETIO_NO_XMIT on other, non-transmitting tiles so
+      that they do not consume a default number of packet slots.  Any tile
+      transmitting is required to have at least ::NETIO_MIN_SENDS_OUTSTANDING
+      slots allocated to it; values less than that will be silently
+      increased by the NetIO library.
+   */
+  int num_sends_outstanding;
+}
+netio_input_config_t;
+
+
+/** Registration flags; used in the @ref netio_input_config_t structure.
+ * @addtogroup setup
+ */
+/** @{ */
+
+/** Fail a registration request if we can't put packet buffers
+    on the specified memory controllers. */
+#define NETIO_STRICT_HOMING   0x00000002
+
+/** This application expects no tags on its L2 headers. */
+#define NETIO_TAG_NONE        0x00000004
+
+/** This application expects Marvell extended tags on its L2 headers. */
+#define NETIO_TAG_MRVL        0x00000008
+
+/** This application expects Broadcom tags on its L2 headers. */
+#define NETIO_TAG_BRCM        0x00000010
+
+/** This registration may call routines which receive packets. */
+#define NETIO_RECV            0x00000020
+
+/** This registration may not call routines which receive packets. */
+#define NETIO_NO_RECV         0x00000040
+
+/** This registration may call routines which transmit packets. */
+#define NETIO_XMIT            0x00000080
+
+/** This registration may call routines which transmit packets with
+    checksum acceleration. */
+#define NETIO_XMIT_CSUM       0x00000100
+
+/** This registration may not call routines which transmit packets. */
+#define NETIO_NO_XMIT         0x00000200
+
+/** This registration wants NetIO buffers mapped at an application-specified
+    virtual address.
+
+    NetIO buffers are by default created by the TMC common memory facility,
+    which must be configured by a common ancestor of all processes sharing
+    a network interface.  When this flag is specified, NetIO buffers are
+    instead mapped at an address chosen by the application (and specified
+    in @ref netio_input_config_t::fixed_buffer_va).  This allows multiple
+    unrelated but cooperating processes to share a NetIO interface.
+    All processes sharing the same interface must specify this flag,
+    and all must specify the same fixed virtual address.
+
+    @ref netio_input_config_t::fixed_buffer_va must be a
+    multiple of 16 MB, and the packet buffers will occupy @ref
+    netio_input_config_t::total_buffer_size bytes of virtual address
+    space, beginning at that address.  If any of those virtual addresses
+    are currently occupied by other memory objects, like application or
+    shared library code or data, @ref netio_input_register() will return
+    ::NETIO_FAULT.  While it is impossible to provide a fixed_buffer_va
+    which will work for all applications, a good first guess might be to
+    use 0xb0000000 minus @ref netio_input_config_t::total_buffer_size.
+    If that fails, it might be helpful to consult the running application's
+    virtual address description file (/proc/<em>pid</em>/maps) to see
+    which regions of virtual address space are available.
+ */
+#define NETIO_FIXED_BUFFER_VA 0x00000400
+
+/** This registration call will not complete unless the network link
+    is up.  The process will wait several seconds for this to happen (the
+    precise interval is link-dependent), but if the link does not come up,
+    ::NETIO_LINK_DOWN will be returned.  This flag is the default if
+    ::NETIO_NOREQUIRE_LINK_UP is not specified.  Note that this flag by
+    itself does not request that the link be brought up; that can be done
+    with the ::NETIO_AUTO_LINK_UPDN or ::NETIO_AUTO_LINK_UP flags (the
+    latter is the default if no NETIO_AUTO_LINK_xxx flags are specified),
+    or by explicitly setting the link's desired state via netio_set().
+    If the link is not brought up by one of those methods, and this flag
+    is specified, the registration operation will return ::NETIO_LINK_DOWN.
+    This flag is ignored if it is specified along with ::NETIO_NO_XMIT and
+    ::NETIO_NO_RECV.  See @ref link for more information on link
+    management.
+ */
+#define NETIO_REQUIRE_LINK_UP    0x00000800
+
+/** This registration call will complete even if the network link is not up.
+    Whenever the link is not up, packets will not be sent or received:
+    netio_get_packet() will return ::NETIO_NOPKT once all queued packets
+    have been drained, and netio_send_packet() and similar routines will
+    return NETIO_QUEUE_FULL once the outgoing packet queue in the EPP
+    or the I/O shim is full.  See @ref link for more information on link
+    management.
+ */
+#define NETIO_NOREQUIRE_LINK_UP  0x00001000
+
+#ifndef __DOXYGEN__
+/*
+ * These are part of the implementation of the NETIO_AUTO_LINK_xxx flags,
+ * but should not be used directly by applications, and are thus not
+ * documented.
+ */
+#define _NETIO_AUTO_UP        0x00002000
+#define _NETIO_AUTO_DN        0x00004000
+#define _NETIO_AUTO_PRESENT   0x00008000
+#endif
+
+/** Set the desired state of the link to up, allowing any speeds which are
+    supported by the link hardware, as part of this registration operation.
+    Do not take down the link automatically.  This is the default if
+    no other NETIO_AUTO_LINK_xxx flags are specified.  This flag is ignored
+    if it is specified along with ::NETIO_NO_XMIT and ::NETIO_NO_RECV.
+    See @ref link for more information on link management.
+ */
+#define NETIO_AUTO_LINK_UP     (_NETIO_AUTO_PRESENT | _NETIO_AUTO_UP)
+
+/** Set the desired state of the link to up, allowing any speeds which are
+    supported by the link hardware, as part of this registration operation.
+    Set the desired state of the link to down the next time no tiles are
+    registered for packet reception or transmission.  This flag is ignored
+    if it is specified along with ::NETIO_NO_XMIT and ::NETIO_NO_RECV.
+    See @ref link for more information on link management.
+ */
+#define NETIO_AUTO_LINK_UPDN   (_NETIO_AUTO_PRESENT | _NETIO_AUTO_UP | \
+                                _NETIO_AUTO_DN)
+
+/** Set the desired state of the link to down the next time no tiles are
+    registered for packet reception or transmission.  This flag is ignored
+    if it is specified along with ::NETIO_NO_XMIT and ::NETIO_NO_RECV.
+    See @ref link for more information on link management.
+ */
+#define NETIO_AUTO_LINK_DN     (_NETIO_AUTO_PRESENT | _NETIO_AUTO_DN)
+
+/** Do not bring up the link automatically as part of this registration
+    operation.  Do not take down the link automatically.  This flag
+    is ignored if it is specified along with ::NETIO_NO_XMIT and
+    ::NETIO_NO_RECV.  See @ref link for more information on link management.
+  */
+#define NETIO_AUTO_LINK_NONE   _NETIO_AUTO_PRESENT
+
+
+/** Minimum number of receive packets. */
+#define NETIO_MIN_RECEIVE_PKTS            16
+
+/** Lower bound on the maximum number of receive packets; may be higher
+    than this on some interfaces. */
+#define NETIO_MAX_RECEIVE_PKTS           128
+
+/** Maximum number of send buffers, per packet size. */
+#define NETIO_MAX_SEND_BUFFERS            16
+
+/** Number of EPP queue slots, and thus outstanding sends, per EPP. */
+#define NETIO_TOTAL_SENDS_OUTSTANDING   2015
+
+/** Minimum number of EPP queue slots, and thus outstanding sends, per
+ *  transmitting tile. */
+#define NETIO_MIN_SENDS_OUTSTANDING       16
+
+
+/**@}*/
+
+#ifndef __DOXYGEN__
+
+/**
+ * An object for providing Ethernet packets to a process.
+ */
+struct __netio_queue_impl_t;
+
+/**
+ * An object for managing the user end of a NetIO queue.
+ */
+struct __netio_queue_user_impl_t;
+
+#endif /* !__DOXYGEN__ */
+
+
+/** A netio_queue_t describes a NetIO communications endpoint.
+ * @ingroup setup
+ */
+typedef struct
+{
+#ifdef __DOXYGEN__
+  uint8_t opaque[8];                 /**< This is an opaque structure. */
+#else
+  struct __netio_queue_impl_t* __system_part;    /**< The system part. */
+  struct __netio_queue_user_impl_t* __user_part; /**< The user part. */
+#ifdef _NETIO_PTHREAD
+  _netio_percpu_mutex_t lock;                    /**< Queue lock. */
+#endif
+#endif
+}
+netio_queue_t;
+
+
+/**
+ * @brief Packet send context.
+ *
+ * @ingroup egress
+ *
+ * Packet send context for use with netio_send_packet_prepare and _commit.
+ */
+typedef struct
+{
+#ifdef __DOXYGEN__
+  uint8_t opaque[44];   /**< This is an opaque structure. */
+#else
+  uint8_t flags;        /**< Defined below */
+  uint8_t datalen;      /**< Number of valid words pointed to by data. */
+  uint32_t request[9];  /**< Request to be sent to the EPP or shim.  Note
+                             that this is smaller than the 11-word maximum
+                             request size, since some constant values are
+                             not saved in the context. */
+  uint32_t *data;       /**< Data to be sent to the EPP or shim via IDN. */
+#endif
+}
+netio_send_pkt_context_t;
+
+
+#ifndef __DOXYGEN__
+#define SEND_PKT_CTX_USE_EPP   1  /**< We're sending to an EPP. */
+#define SEND_PKT_CTX_SEND_CSUM 2  /**< Request includes a checksum. */
+#endif
+
+/**
+ * @brief Packet vector entry.
+ *
+ * @ingroup egress
+ *
+ * This data structure is used with netio_send_packet_vector() to send multiple
+ * packets with one NetIO call.  The structure should be initialized by
+ * calling netio_pkt_vector_set(), rather than by setting the fields
+ * directly.
+ *
+ * This structure is guaranteed to be a power of two in size, no
+ * bigger than one L2 cache line, and to be aligned modulo its size.
+ */
+typedef struct
+#ifndef __DOXYGEN__
+__attribute__((aligned(8)))
+#endif
+{
+  /** Reserved for use by the user application.  When initialized with
+   *  the netio_set_pkt_vector_entry() function, this field is guaranteed
+   *  to be visible to readers only after all other fields are already
+   *  visible.  This way it can be used as a valid flag or generation
+   *  counter. */
+  uint8_t user_data;
+
+  /* Structure members below this point should not be accessed directly by
+   * applications, as they may change in the future. */
+
+  /** Low 8 bits of the packet address to send.  The high bits are
+   *  acquired from the 'handle' field. */
+  uint8_t buffer_address_low;
+
+  /** Number of bytes to transmit. */
+  uint16_t size;
+
+  /** The raw handle from a netio_pkt_t.  If this is NETIO_PKT_HANDLE_NONE,
+   *  this vector entry will be skipped and no packet will be transmitted. */
+  netio_pkt_handle_t handle;
+}
+netio_pkt_vector_entry_t;
+
+
+/**
+ * @brief Initialize fields in a packet vector entry.
+ *
+ * @ingroup egress
+ *
+ * @param[out] v Pointer to the vector entry to be initialized.
+ * @param[in] pkt Packet to be transmitted when the vector entry is passed to
+ *        netio_send_packet_vector().  Note that the packet's attributes
+ *        (e.g., its L2 offset and length) are captured at the time this
+ *        routine is called; subsequent changes in those attributes will not
+ *        be reflected in the packet which is actually transmitted.
+ *        Changes in the packet's contents, however, will be so reflected.
+ *        If this is NULL, no packet will be transmitted.
+ * @param[in] user_data User data to be set in the vector entry.
+ *        This function guarantees that the "user_data" field will become
+ *        visible to a reader only after all other fields have become visible.
+ *        This allows a structure in a ring buffer to be written and read
+ *        by a polling reader without any locks or other synchronization.
+ */
+static __inline void
+netio_pkt_vector_set(volatile netio_pkt_vector_entry_t* v, netio_pkt_t* pkt,
+                     uint8_t user_data)
+{
+  if (pkt)
+  {
+    if (NETIO_PKT_IS_MINIMAL(pkt))
+    {
+      netio_pkt_minimal_metadata_t* mmd =
+        (netio_pkt_minimal_metadata_t*) &pkt->__metadata;
+      v->buffer_address_low = (uintptr_t) NETIO_PKT_L2_DATA_MM(mmd, pkt) & 0xFF;
+      v->size = NETIO_PKT_L2_LENGTH_MM(mmd, pkt);
+    }
+    else
+    {
+      netio_pkt_metadata_t* mda = &pkt->__metadata;
+      v->buffer_address_low = (uintptr_t) NETIO_PKT_L2_DATA_M(mda, pkt) & 0xFF;
+      v->size = NETIO_PKT_L2_LENGTH_M(mda, pkt);
+    }
+    v->handle.word = pkt->__packet.word;
+  }
+  else
+  {
+    v->handle.word = 0;   /* Set handle to NETIO_PKT_HANDLE_NONE. */
+  }
+
+  __asm__("" : : : "memory");
+
+  v->user_data = user_data;
+}
+
+
+/**
+ * Flags and structures for @ref netio_get() and @ref netio_set().
+ * @ingroup config
+ */
+
+/** @{ */
+/** Parameter class; addr is a NETIO_PARAM_xxx value. */
+#define NETIO_PARAM       0
+/** Interface MAC address. This address is only valid with @ref netio_get().
+ *  The value is a 6-byte MAC address.  Depending upon the overall system
+ *  design, a MAC address may or may not be available for each interface. */
+#define NETIO_PARAM_MAC        0
+
+/** Determine whether to suspend output on the receipt of pause frames.
+ *  If the value is nonzero, the I/O shim will suspend output when a pause
+ *  frame is received.  If the value is zero, pause frames will be ignored. */
+#define NETIO_PARAM_PAUSE_IN   1
+
+/** Determine whether to send pause frames if the I/O shim packet FIFOs are
+ *  nearly full.  If the value is zero, pause frames are not sent.  If
+ *  the value is nonzero, it is the delay value which will be sent in any
+ *  pause frames which are output, in units of 512 bit times. */
+#define NETIO_PARAM_PAUSE_OUT  2
+
+/** Jumbo frame support.  The value is a 4-byte integer.  If the value is
+ *  nonzero, the MAC will accept frames of up to 10240 bytes.  If the value
+ *  is zero, the MAC will only accept frames of up to 1544 bytes. */
+#define NETIO_PARAM_JUMBO      3
+
+/** I/O shim's overflow statistics register.  The value is two 16-bit integers.
+ *  The first 16-bit value (or the low 16 bits, if the value is treated as a
+ *  32-bit number) is the count of packets which were completely dropped and
+ *  not delivered by the shim.  The second 16-bit value (or the high 16 bits,
+ *  if the value is treated as a 32-bit number) is the count of packets
+ *  which were truncated and thus only partially delivered by the shim.  This
+ *  register is automatically reset to zero after it has been read.
+ */
+#define NETIO_PARAM_OVERFLOW   4
+
+/** IPP statistics.  This address is only valid with @ref netio_get().  The
+ *  value is a netio_stat_t structure.  Unlike the I/O shim statistics, the
+ *  IPP statistics are not all reset to zero on read; see the description
+ *  of the netio_stat_t for details. */
+#define NETIO_PARAM_STAT 5
+
+/** Possible link state.  The value is a combination of "NETIO_LINK_xxx"
+ *  flags.  With @ref netio_get(), this will indicate which flags are
+ *  actually supported by the hardware.
+ *
+ *  For historical reasons, specifying this value to netio_set() will have
+ *  the same behavior as using ::NETIO_PARAM_LINK_CONFIG, but this usage is
+ *  discouraged.
+ */
+#define NETIO_PARAM_LINK_POSSIBLE_STATE 6
+
+/** Link configuration. The value is a combination of "NETIO_LINK_xxx" flags.
+ *  With @ref netio_set(), this will attempt to immediately bring up the
+ *  link using whichever of the requested flags are supported by the
+ *  hardware, or take down the link if the flags are zero; if this is
+ *  not possible, an error will be returned.  Many programs will want
+ *  to use ::NETIO_PARAM_LINK_DESIRED_STATE instead.
+ *
+ *  For historical reasons, specifying this value to netio_get() will
+ *  have the same behavior as using ::NETIO_PARAM_LINK_POSSIBLE_STATE,
+ *  but this usage is discouraged.
+ */
+#define NETIO_PARAM_LINK_CONFIG NETIO_PARAM_LINK_POSSIBLE_STATE
+
+/** Current link state. This address is only valid with @ref netio_get().
+ *  The value is zero or more of the "NETIO_LINK_xxx" flags, ORed together.
+ *  If the link is down, the value ANDed with NETIO_LINK_SPEED will be
+ *  zero; if the link is up, the value ANDed with NETIO_LINK_SPEED will
+ *  result in exactly one of the NETIO_LINK_xxx values, indicating the
+ *  current speed. */
+#define NETIO_PARAM_LINK_CURRENT_STATE 7
+
+/** Variant symbol for current state, retained for compatibility with
+ *  pre-MDE-2.1 programs. */
+#define NETIO_PARAM_LINK_STATUS NETIO_PARAM_LINK_CURRENT_STATE
+
+/** Packet Coherence protocol. This address is only valid with @ref netio_get().
+ *  The value is nonzero if the interface is configured for cache-coherent DMA.
+ */
+#define NETIO_PARAM_COHERENT 8
+
+/** Desired link state. The value is a conbination of "NETIO_LINK_xxx"
+ *  flags, which specify the desired state for the link.  With @ref
+ *  netio_set(), this will, in the background, attempt to bring up the link
+ *  using whichever of the requested flags are reasonable, or take down the
+ *  link if the flags are zero.  The actual link up or down operation may
+ *  happen after this call completes.  If the link state changes in the
+ *  future, the system will continue to try to get back to the desired link
+ *  state; for instance, if the link is brought up successfully, and then
+ *  the network cable is disconnected, the link will go down.  However, the
+ *  desired state of the link is still up, so if the cable is reconnected,
+ *  the link will be brought up again.
+ *
+ *  With @ref netio_get(), this will indicate the desired state for the
+ *  link, as set with a previous netio_set() call, or implicitly by a
+ *  netio_input_register() or netio_input_unregister() operation.  This may
+ *  not reflect the current state of the link; to get that, use
+ *  ::NETIO_PARAM_LINK_CURRENT_STATE. */
+#define NETIO_PARAM_LINK_DESIRED_STATE 9
+
+/** NetIO statistics structure.  Retrieved using the ::NETIO_PARAM_STAT
+ *  address passed to @ref netio_get(). */
+typedef struct
+{
+  /** Number of packets which have been received by the IPP and forwarded
+   *  to a tile's receive queue for processing.  This value wraps at its
+   *  maximum, and is not cleared upon read. */
+  uint32_t packets_received;
+
+  /** Number of packets which have been dropped by the IPP, because they could
+   *  not be received, or could not be forwarded to a tile.  The former happens
+   *  when the IPP does not have a free packet buffer of suitable size for an
+   *  incoming frame.  The latter happens when all potential destination tiles
+   *  for a packet, as defined by the group, bucket, and queue configuration,
+   *  have full receive queues.   This value wraps at its maximum, and is not
+   *  cleared upon read. */
+  uint32_t packets_dropped;
+
+  /*
+   * Note: the #defines after each of the following four one-byte values
+   * denote their location within the third word of the netio_stat_t.  They
+   * are intended for use only by the IPP implementation and are thus omitted
+   * from the Doxygen output.
+   */
+
+  /** Number of packets dropped because no worker was able to accept a new
+   *  packet.  This value saturates at its maximum, and is cleared upon
+   *  read. */
+  uint8_t drops_no_worker;
+#ifndef __DOXYGEN__
+#define NETIO_STAT_DROPS_NO_WORKER   0
+#endif
+
+  /** Number of packets dropped because no small buffers were available.
+   *  This value saturates at its maximum, and is cleared upon read. */
+  uint8_t drops_no_smallbuf;
+#ifndef __DOXYGEN__
+#define NETIO_STAT_DROPS_NO_SMALLBUF 1
+#endif
+
+  /** Number of packets dropped because no large buffers were available.
+   *  This value saturates at its maximum, and is cleared upon read. */
+  uint8_t drops_no_largebuf;
+#ifndef __DOXYGEN__
+#define NETIO_STAT_DROPS_NO_LARGEBUF 2
+#endif
+
+  /** Number of packets dropped because no jumbo buffers were available.
+   *  This value saturates at its maximum, and is cleared upon read. */
+  uint8_t drops_no_jumbobuf;
+#ifndef __DOXYGEN__
+#define NETIO_STAT_DROPS_NO_JUMBOBUF 3
+#endif
+}
+netio_stat_t;
+
+
+/** Link can run, should run, or is running at 10 Mbps. */
+#define NETIO_LINK_10M         0x01
+
+/** Link can run, should run, or is running at 100 Mbps. */
+#define NETIO_LINK_100M        0x02
+
+/** Link can run, should run, or is running at 1 Gbps. */
+#define NETIO_LINK_1G          0x04
+
+/** Link can run, should run, or is running at 10 Gbps. */
+#define NETIO_LINK_10G         0x08
+
+/** Link should run at the highest speed supported by the link and by
+ *  the device connected to the link.  Only usable as a value for
+ *  the link's desired state; never returned as a value for the current
+ *  or possible states. */
+#define NETIO_LINK_ANYSPEED    0x10
+
+/** All legal link speeds. */
+#define NETIO_LINK_SPEED  (NETIO_LINK_10M  | \
+                           NETIO_LINK_100M | \
+                           NETIO_LINK_1G   | \
+                           NETIO_LINK_10G  | \
+                           NETIO_LINK_ANYSPEED)
+
+
+/** MAC register class.  Addr is a register offset within the MAC.
+ *  Registers within the XGbE and GbE MACs are documented in the Tile
+ *  Processor I/O Device Guide (UG104). MAC registers start at address
+ *  0x4000, and do not include the MAC_INTERFACE registers. */
+#define NETIO_MAC             1
+
+/** MDIO register class (IEEE 802.3 clause 22 format).  Addr is the "addr"
+ *  member of a netio_mdio_addr_t structure. */
+#define NETIO_MDIO            2
+
+/** MDIO register class (IEEE 802.3 clause 45 format).  Addr is the "addr"
+ *  member of a netio_mdio_addr_t structure. */
+#define NETIO_MDIO_CLAUSE45   3
+
+/** NetIO MDIO address type.  Retrieved or provided using the ::NETIO_MDIO
+ *  address passed to @ref netio_get() or @ref netio_set(). */
+typedef union
+{
+  struct
+  {
+    unsigned int reg:16;  /**< MDIO register offset.  For clause 22 access,
+                               must be less than 32. */
+    unsigned int phy:5;   /**< Which MDIO PHY to access. */
+    unsigned int dev:5;   /**< Which MDIO device to access within that PHY.
+                               Applicable for clause 45 access only; ignored
+                               for clause 22 access. */
+  }
+  bits;                   /**< Container for bitfields. */
+  uint64_t addr;          /**< Value to pass to @ref netio_get() or
+                           *   @ref netio_set(). */
+}
+netio_mdio_addr_t;
+
+/** @} */
+
+#endif /* __NETIO_INTF_H__ */
diff --git a/arch/tile/include/hv/syscall_public.h b/arch/tile/include/hv/syscall_public.h
new file mode 100644
index 00000000..9cc0837e
--- /dev/null
+++ b/arch/tile/include/hv/syscall_public.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+/**
+ * @file syscall.h
+ * Indices for the hypervisor system calls that are intended to be called
+ * directly, rather than only through hypervisor-generated "glue" code.
+ */
+
+#ifndef _SYS_HV_INCLUDE_SYSCALL_PUBLIC_H
+#define _SYS_HV_INCLUDE_SYSCALL_PUBLIC_H
+
+/** Fast syscall flag bit location.  When this bit is set, the hypervisor
+ *  handles the syscall specially.
+ */
+#define HV_SYS_FAST_SHIFT                 14
+
+/** Fast syscall flag bit mask. */
+#define HV_SYS_FAST_MASK                  (1 << HV_SYS_FAST_SHIFT)
+
+/** Bit location for flagging fast syscalls that can be called from PL0. */
+#define HV_SYS_FAST_PLO_SHIFT             13
+
+/** Fast syscall allowing PL0 bit mask. */
+#define HV_SYS_FAST_PL0_MASK              (1 << HV_SYS_FAST_PLO_SHIFT)
+
+/** Perform an MF that waits for all victims to reach DRAM. */
+#define HV_SYS_fence_incoherent         (51 | HV_SYS_FAST_MASK \
+                                       | HV_SYS_FAST_PL0_MASK)
+
+#endif /* !_SYS_HV_INCLUDE_SYSCALL_PUBLIC_H */
author	Srikant Patnaik	2015-01-11 12:28:04 +0530
committer	Srikant Patnaik	2015-01-11 12:28:04 +0530
commit	871480933a1c28f8a9fed4c4d34d06c439a7a422 (patch)
tree	8718f573808810c2a1e8cb8fb6ac469093ca2784 /arch/tile/include
parent	9d40ac5867b9aefe0722bc1f110b965ff294d30d (diff)
download	FOSSEE-netbook-kernel-source-871480933a1c28f8a9fed4c4d34d06c439a7a422.tar.gz FOSSEE-netbook-kernel-source-871480933a1c28f8a9fed4c4d34d06c439a7a422.tar.bz2 FOSSEE-netbook-kernel-source-871480933a1c28f8a9fed4c4d34d06c439a7a422.zip