diff options
Diffstat (limited to 'arch/tile/kernel')
42 files changed, 19458 insertions, 0 deletions
diff --git a/arch/tile/kernel/Makefile b/arch/tile/kernel/Makefile new file mode 100644 index 00000000..b4dbc057 --- /dev/null +++ b/arch/tile/kernel/Makefile @@ -0,0 +1,18 @@ +# +# Makefile for the Linux/TILE kernel. +# + +extra-y := vmlinux.lds head_$(BITS).o +obj-y := backtrace.o entry.o init_task.o irq.o messaging.o \ + pci-dma.o proc.o process.o ptrace.o reboot.o \ + setup.o signal.o single_step.o stack.o sys.o sysfs.o time.o traps.o \ + intvec_$(BITS).o regs_$(BITS).o tile-desc_$(BITS).o + +obj-$(CONFIG_HARDWALL) += hardwall.o +obj-$(CONFIG_TILEGX) += futex_64.o +obj-$(CONFIG_COMPAT) += compat.o compat_signal.o +obj-$(CONFIG_SMP) += smpboot.o smp.o tlb.o +obj-$(CONFIG_MODULES) += module.o +obj-$(CONFIG_EARLY_PRINTK) += early_printk.o +obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o +obj-$(CONFIG_PCI) += pci.o diff --git a/arch/tile/kernel/asm-offsets.c b/arch/tile/kernel/asm-offsets.c new file mode 100644 index 00000000..01ddf19c --- /dev/null +++ b/arch/tile/kernel/asm-offsets.c @@ -0,0 +1,76 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * Generates definitions from c-type structures used by assembly sources. + */ + +#include <linux/kbuild.h> +#include <linux/thread_info.h> +#include <linux/sched.h> +#include <linux/hardirq.h> +#include <linux/ptrace.h> +#include <hv/hypervisor.h> + +/* Check for compatible compiler early in the build. */ +#ifdef CONFIG_TILEGX +# ifndef __tilegx__ +# error Can only build TILE-Gx configurations with tilegx compiler +# endif +# ifndef __LP64__ +# error Must not specify -m32 when building the TILE-Gx kernel +# endif +#else +# ifdef __tilegx__ +# error Can not build TILEPro/TILE64 configurations with tilegx compiler +# endif +#endif + +void foo(void) +{ + DEFINE(SINGLESTEP_STATE_BUFFER_OFFSET, \ + offsetof(struct single_step_state, buffer)); + DEFINE(SINGLESTEP_STATE_FLAGS_OFFSET, \ + offsetof(struct single_step_state, flags)); + DEFINE(SINGLESTEP_STATE_ORIG_PC_OFFSET, \ + offsetof(struct single_step_state, orig_pc)); + DEFINE(SINGLESTEP_STATE_NEXT_PC_OFFSET, \ + offsetof(struct single_step_state, next_pc)); + DEFINE(SINGLESTEP_STATE_BRANCH_NEXT_PC_OFFSET, \ + offsetof(struct single_step_state, branch_next_pc)); + DEFINE(SINGLESTEP_STATE_UPDATE_VALUE_OFFSET, \ + offsetof(struct single_step_state, update_value)); + + DEFINE(THREAD_INFO_TASK_OFFSET, \ + offsetof(struct thread_info, task)); + DEFINE(THREAD_INFO_FLAGS_OFFSET, \ + offsetof(struct thread_info, flags)); + DEFINE(THREAD_INFO_STATUS_OFFSET, \ + offsetof(struct thread_info, status)); + DEFINE(THREAD_INFO_HOMECACHE_CPU_OFFSET, \ + offsetof(struct thread_info, homecache_cpu)); + DEFINE(THREAD_INFO_STEP_STATE_OFFSET, \ + offsetof(struct thread_info, step_state)); + + DEFINE(TASK_STRUCT_THREAD_KSP_OFFSET, + offsetof(struct task_struct, thread.ksp)); + DEFINE(TASK_STRUCT_THREAD_PC_OFFSET, + offsetof(struct task_struct, thread.pc)); + + DEFINE(HV_TOPOLOGY_WIDTH_OFFSET, \ + offsetof(HV_Topology, width)); + DEFINE(HV_TOPOLOGY_HEIGHT_OFFSET, \ + offsetof(HV_Topology, height)); + + DEFINE(IRQ_CPUSTAT_SYSCALL_COUNT_OFFSET, \ + offsetof(irq_cpustat_t, irq_syscall_count)); +} diff --git a/arch/tile/kernel/backtrace.c b/arch/tile/kernel/backtrace.c new file mode 100644 index 00000000..9092ce8a --- /dev/null +++ b/arch/tile/kernel/backtrace.c @@ -0,0 +1,678 @@ +/* + * Copyright 2011 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#include <linux/kernel.h> +#include <linux/string.h> +#include <asm/backtrace.h> +#include <asm/tile-desc.h> +#include <arch/abi.h> + +#ifdef __tilegx__ +#define TILE_MAX_INSTRUCTIONS_PER_BUNDLE TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE +#define tile_decoded_instruction tilegx_decoded_instruction +#define tile_mnemonic tilegx_mnemonic +#define parse_insn_tile parse_insn_tilegx +#define TILE_OPC_IRET TILEGX_OPC_IRET +#define TILE_OPC_ADDI TILEGX_OPC_ADDI +#define TILE_OPC_ADDLI TILEGX_OPC_ADDLI +#define TILE_OPC_INFO TILEGX_OPC_INFO +#define TILE_OPC_INFOL TILEGX_OPC_INFOL +#define TILE_OPC_JRP TILEGX_OPC_JRP +#define TILE_OPC_MOVE TILEGX_OPC_MOVE +#define OPCODE_STORE TILEGX_OPC_ST +typedef long long bt_int_reg_t; +#else +#define TILE_MAX_INSTRUCTIONS_PER_BUNDLE TILEPRO_MAX_INSTRUCTIONS_PER_BUNDLE +#define tile_decoded_instruction tilepro_decoded_instruction +#define tile_mnemonic tilepro_mnemonic +#define parse_insn_tile parse_insn_tilepro +#define TILE_OPC_IRET TILEPRO_OPC_IRET +#define TILE_OPC_ADDI TILEPRO_OPC_ADDI +#define TILE_OPC_ADDLI TILEPRO_OPC_ADDLI +#define TILE_OPC_INFO TILEPRO_OPC_INFO +#define TILE_OPC_INFOL TILEPRO_OPC_INFOL +#define TILE_OPC_JRP TILEPRO_OPC_JRP +#define TILE_OPC_MOVE TILEPRO_OPC_MOVE +#define OPCODE_STORE TILEPRO_OPC_SW +typedef int bt_int_reg_t; +#endif + +/* A decoded bundle used for backtracer analysis. */ +struct BacktraceBundle { + tile_bundle_bits bits; + int num_insns; + struct tile_decoded_instruction + insns[TILE_MAX_INSTRUCTIONS_PER_BUNDLE]; +}; + + +/* Locates an instruction inside the given bundle that + * has the specified mnemonic, and whose first 'num_operands_to_match' + * operands exactly match those in 'operand_values'. + */ +static const struct tile_decoded_instruction *find_matching_insn( + const struct BacktraceBundle *bundle, + tile_mnemonic mnemonic, + const int *operand_values, + int num_operands_to_match) +{ + int i, j; + bool match; + + for (i = 0; i < bundle->num_insns; i++) { + const struct tile_decoded_instruction *insn = + &bundle->insns[i]; + + if (insn->opcode->mnemonic != mnemonic) + continue; + + match = true; + for (j = 0; j < num_operands_to_match; j++) { + if (operand_values[j] != insn->operand_values[j]) { + match = false; + break; + } + } + + if (match) + return insn; + } + + return NULL; +} + +/* Does this bundle contain an 'iret' instruction? */ +static inline bool bt_has_iret(const struct BacktraceBundle *bundle) +{ + return find_matching_insn(bundle, TILE_OPC_IRET, NULL, 0) != NULL; +} + +/* Does this bundle contain an 'addi sp, sp, OFFSET' or + * 'addli sp, sp, OFFSET' instruction, and if so, what is OFFSET? + */ +static bool bt_has_addi_sp(const struct BacktraceBundle *bundle, int *adjust) +{ + static const int vals[2] = { TREG_SP, TREG_SP }; + + const struct tile_decoded_instruction *insn = + find_matching_insn(bundle, TILE_OPC_ADDI, vals, 2); + if (insn == NULL) + insn = find_matching_insn(bundle, TILE_OPC_ADDLI, vals, 2); +#ifdef __tilegx__ + if (insn == NULL) + insn = find_matching_insn(bundle, TILEGX_OPC_ADDXLI, vals, 2); + if (insn == NULL) + insn = find_matching_insn(bundle, TILEGX_OPC_ADDXI, vals, 2); +#endif + if (insn == NULL) + return false; + + *adjust = insn->operand_values[2]; + return true; +} + +/* Does this bundle contain any 'info OP' or 'infol OP' + * instruction, and if so, what are their OP? Note that OP is interpreted + * as an unsigned value by this code since that's what the caller wants. + * Returns the number of info ops found. + */ +static int bt_get_info_ops(const struct BacktraceBundle *bundle, + int operands[MAX_INFO_OPS_PER_BUNDLE]) +{ + int num_ops = 0; + int i; + + for (i = 0; i < bundle->num_insns; i++) { + const struct tile_decoded_instruction *insn = + &bundle->insns[i]; + + if (insn->opcode->mnemonic == TILE_OPC_INFO || + insn->opcode->mnemonic == TILE_OPC_INFOL) { + operands[num_ops++] = insn->operand_values[0]; + } + } + + return num_ops; +} + +/* Does this bundle contain a jrp instruction, and if so, to which + * register is it jumping? + */ +static bool bt_has_jrp(const struct BacktraceBundle *bundle, int *target_reg) +{ + const struct tile_decoded_instruction *insn = + find_matching_insn(bundle, TILE_OPC_JRP, NULL, 0); + if (insn == NULL) + return false; + + *target_reg = insn->operand_values[0]; + return true; +} + +/* Does this bundle modify the specified register in any way? */ +static bool bt_modifies_reg(const struct BacktraceBundle *bundle, int reg) +{ + int i, j; + for (i = 0; i < bundle->num_insns; i++) { + const struct tile_decoded_instruction *insn = + &bundle->insns[i]; + + if (insn->opcode->implicitly_written_register == reg) + return true; + + for (j = 0; j < insn->opcode->num_operands; j++) + if (insn->operands[j]->is_dest_reg && + insn->operand_values[j] == reg) + return true; + } + + return false; +} + +/* Does this bundle modify sp? */ +static inline bool bt_modifies_sp(const struct BacktraceBundle *bundle) +{ + return bt_modifies_reg(bundle, TREG_SP); +} + +/* Does this bundle modify lr? */ +static inline bool bt_modifies_lr(const struct BacktraceBundle *bundle) +{ + return bt_modifies_reg(bundle, TREG_LR); +} + +/* Does this bundle contain the instruction 'move fp, sp'? */ +static inline bool bt_has_move_r52_sp(const struct BacktraceBundle *bundle) +{ + static const int vals[2] = { 52, TREG_SP }; + return find_matching_insn(bundle, TILE_OPC_MOVE, vals, 2) != NULL; +} + +/* Does this bundle contain a store of lr to sp? */ +static inline bool bt_has_sw_sp_lr(const struct BacktraceBundle *bundle) +{ + static const int vals[2] = { TREG_SP, TREG_LR }; + return find_matching_insn(bundle, OPCODE_STORE, vals, 2) != NULL; +} + +#ifdef __tilegx__ +/* Track moveli values placed into registers. */ +static inline void bt_update_moveli(const struct BacktraceBundle *bundle, + int moveli_args[]) +{ + int i; + for (i = 0; i < bundle->num_insns; i++) { + const struct tile_decoded_instruction *insn = + &bundle->insns[i]; + + if (insn->opcode->mnemonic == TILEGX_OPC_MOVELI) { + int reg = insn->operand_values[0]; + moveli_args[reg] = insn->operand_values[1]; + } + } +} + +/* Does this bundle contain an 'add sp, sp, reg' instruction + * from a register that we saw a moveli into, and if so, what + * is the value in the register? + */ +static bool bt_has_add_sp(const struct BacktraceBundle *bundle, int *adjust, + int moveli_args[]) +{ + static const int vals[2] = { TREG_SP, TREG_SP }; + + const struct tile_decoded_instruction *insn = + find_matching_insn(bundle, TILEGX_OPC_ADDX, vals, 2); + if (insn) { + int reg = insn->operand_values[2]; + if (moveli_args[reg]) { + *adjust = moveli_args[reg]; + return true; + } + } + return false; +} +#endif + +/* Locates the caller's PC and SP for a program starting at the + * given address. + */ +static void find_caller_pc_and_caller_sp(CallerLocation *location, + const unsigned long start_pc, + BacktraceMemoryReader read_memory_func, + void *read_memory_func_extra) +{ + /* Have we explicitly decided what the sp is, + * rather than just the default? + */ + bool sp_determined = false; + + /* Has any bundle seen so far modified lr? */ + bool lr_modified = false; + + /* Have we seen a move from sp to fp? */ + bool sp_moved_to_r52 = false; + + /* Have we seen a terminating bundle? */ + bool seen_terminating_bundle = false; + + /* Cut down on round-trip reading overhead by reading several + * bundles at a time. + */ + tile_bundle_bits prefetched_bundles[32]; + int num_bundles_prefetched = 0; + int next_bundle = 0; + unsigned long pc; + +#ifdef __tilegx__ + /* Naively try to track moveli values to support addx for -m32. */ + int moveli_args[TILEGX_NUM_REGISTERS] = { 0 }; +#endif + + /* Default to assuming that the caller's sp is the current sp. + * This is necessary to handle the case where we start backtracing + * right at the end of the epilog. + */ + location->sp_location = SP_LOC_OFFSET; + location->sp_offset = 0; + + /* Default to having no idea where the caller PC is. */ + location->pc_location = PC_LOC_UNKNOWN; + + /* Don't even try if the PC is not aligned. */ + if (start_pc % TILE_BUNDLE_ALIGNMENT_IN_BYTES != 0) + return; + + for (pc = start_pc;; pc += sizeof(tile_bundle_bits)) { + + struct BacktraceBundle bundle; + int num_info_ops, info_operands[MAX_INFO_OPS_PER_BUNDLE]; + int one_ago, jrp_reg; + bool has_jrp; + + if (next_bundle >= num_bundles_prefetched) { + /* Prefetch some bytes, but don't cross a page + * boundary since that might cause a read failure we + * don't care about if we only need the first few + * bytes. Note: we don't care what the actual page + * size is; using the minimum possible page size will + * prevent any problems. + */ + unsigned int bytes_to_prefetch = 4096 - (pc & 4095); + if (bytes_to_prefetch > sizeof prefetched_bundles) + bytes_to_prefetch = sizeof prefetched_bundles; + + if (!read_memory_func(prefetched_bundles, pc, + bytes_to_prefetch, + read_memory_func_extra)) { + if (pc == start_pc) { + /* The program probably called a bad + * address, such as a NULL pointer. + * So treat this as if we are at the + * start of the function prolog so the + * backtrace will show how we got here. + */ + location->pc_location = PC_LOC_IN_LR; + return; + } + + /* Unreadable address. Give up. */ + break; + } + + next_bundle = 0; + num_bundles_prefetched = + bytes_to_prefetch / sizeof(tile_bundle_bits); + } + + /* Decode the next bundle. */ + bundle.bits = prefetched_bundles[next_bundle++]; + bundle.num_insns = + parse_insn_tile(bundle.bits, pc, bundle.insns); + num_info_ops = bt_get_info_ops(&bundle, info_operands); + + /* First look at any one_ago info ops if they are interesting, + * since they should shadow any non-one-ago info ops. + */ + for (one_ago = (pc != start_pc) ? 1 : 0; + one_ago >= 0; one_ago--) { + int i; + for (i = 0; i < num_info_ops; i++) { + int info_operand = info_operands[i]; + if (info_operand < CALLER_UNKNOWN_BASE) { + /* Weird; reserved value, ignore it. */ + continue; + } + + /* Skip info ops which are not in the + * "one_ago" mode we want right now. + */ + if (((info_operand & ONE_BUNDLE_AGO_FLAG) != 0) + != (one_ago != 0)) + continue; + + /* Clear the flag to make later checking + * easier. */ + info_operand &= ~ONE_BUNDLE_AGO_FLAG; + + /* Default to looking at PC_IN_LR_FLAG. */ + if (info_operand & PC_IN_LR_FLAG) + location->pc_location = + PC_LOC_IN_LR; + else + location->pc_location = + PC_LOC_ON_STACK; + + switch (info_operand) { + case CALLER_UNKNOWN_BASE: + location->pc_location = PC_LOC_UNKNOWN; + location->sp_location = SP_LOC_UNKNOWN; + return; + + case CALLER_SP_IN_R52_BASE: + case CALLER_SP_IN_R52_BASE | PC_IN_LR_FLAG: + location->sp_location = SP_LOC_IN_R52; + return; + + default: + { + const unsigned int val = info_operand + - CALLER_SP_OFFSET_BASE; + const unsigned int sp_offset = + (val >> NUM_INFO_OP_FLAGS) * 8; + if (sp_offset < 32768) { + /* This is a properly encoded + * SP offset. */ + location->sp_location = + SP_LOC_OFFSET; + location->sp_offset = + sp_offset; + return; + } else { + /* This looked like an SP + * offset, but it's outside + * the legal range, so this + * must be an unrecognized + * info operand. Ignore it. + */ + } + } + break; + } + } + } + + if (seen_terminating_bundle) { + /* We saw a terminating bundle during the previous + * iteration, so we were only looking for an info op. + */ + break; + } + + if (bundle.bits == 0) { + /* Wacky terminating bundle. Stop looping, and hope + * we've already seen enough to find the caller. + */ + break; + } + + /* + * Try to determine caller's SP. + */ + + if (!sp_determined) { + int adjust; + if (bt_has_addi_sp(&bundle, &adjust) +#ifdef __tilegx__ + || bt_has_add_sp(&bundle, &adjust, moveli_args) +#endif + ) { + location->sp_location = SP_LOC_OFFSET; + + if (adjust <= 0) { + /* We are in prolog about to adjust + * SP. */ + location->sp_offset = 0; + } else { + /* We are in epilog restoring SP. */ + location->sp_offset = adjust; + } + + sp_determined = true; + } else { + if (bt_has_move_r52_sp(&bundle)) { + /* Maybe in prolog, creating an + * alloca-style frame. But maybe in + * the middle of a fixed-size frame + * clobbering r52 with SP. + */ + sp_moved_to_r52 = true; + } + + if (bt_modifies_sp(&bundle)) { + if (sp_moved_to_r52) { + /* We saw SP get saved into + * r52 earlier (or now), which + * must have been in the + * prolog, so we now know that + * SP is still holding the + * caller's sp value. + */ + location->sp_location = + SP_LOC_OFFSET; + location->sp_offset = 0; + } else { + /* Someone must have saved + * aside the caller's SP value + * into r52, so r52 holds the + * current value. + */ + location->sp_location = + SP_LOC_IN_R52; + } + sp_determined = true; + } + } + +#ifdef __tilegx__ + /* Track moveli arguments for -m32 mode. */ + bt_update_moveli(&bundle, moveli_args); +#endif + } + + if (bt_has_iret(&bundle)) { + /* This is a terminating bundle. */ + seen_terminating_bundle = true; + continue; + } + + /* + * Try to determine caller's PC. + */ + + jrp_reg = -1; + has_jrp = bt_has_jrp(&bundle, &jrp_reg); + if (has_jrp) + seen_terminating_bundle = true; + + if (location->pc_location == PC_LOC_UNKNOWN) { + if (has_jrp) { + if (jrp_reg == TREG_LR && !lr_modified) { + /* Looks like a leaf function, or else + * lr is already restored. */ + location->pc_location = + PC_LOC_IN_LR; + } else { + location->pc_location = + PC_LOC_ON_STACK; + } + } else if (bt_has_sw_sp_lr(&bundle)) { + /* In prolog, spilling initial lr to stack. */ + location->pc_location = PC_LOC_IN_LR; + } else if (bt_modifies_lr(&bundle)) { + lr_modified = true; + } + } + } +} + +/* Initializes a backtracer to start from the given location. + * + * If the frame pointer cannot be determined it is set to -1. + * + * state: The state to be filled in. + * read_memory_func: A callback that reads memory. + * read_memory_func_extra: An arbitrary argument to read_memory_func. + * pc: The current PC. + * lr: The current value of the 'lr' register. + * sp: The current value of the 'sp' register. + * r52: The current value of the 'r52' register. + */ +void backtrace_init(BacktraceIterator *state, + BacktraceMemoryReader read_memory_func, + void *read_memory_func_extra, + unsigned long pc, unsigned long lr, + unsigned long sp, unsigned long r52) +{ + CallerLocation location; + unsigned long fp, initial_frame_caller_pc; + + /* Find out where we are in the initial frame. */ + find_caller_pc_and_caller_sp(&location, pc, + read_memory_func, read_memory_func_extra); + + switch (location.sp_location) { + case SP_LOC_UNKNOWN: + /* Give up. */ + fp = -1; + break; + + case SP_LOC_IN_R52: + fp = r52; + break; + + case SP_LOC_OFFSET: + fp = sp + location.sp_offset; + break; + + default: + /* Give up. */ + fp = -1; + break; + } + + /* If the frame pointer is not aligned to the basic word size + * something terrible happened and we should mark it as invalid. + */ + if (fp % sizeof(bt_int_reg_t) != 0) + fp = -1; + + /* -1 means "don't know initial_frame_caller_pc". */ + initial_frame_caller_pc = -1; + + switch (location.pc_location) { + case PC_LOC_UNKNOWN: + /* Give up. */ + fp = -1; + break; + + case PC_LOC_IN_LR: + if (lr == 0 || lr % TILE_BUNDLE_ALIGNMENT_IN_BYTES != 0) { + /* Give up. */ + fp = -1; + } else { + initial_frame_caller_pc = lr; + } + break; + + case PC_LOC_ON_STACK: + /* Leave initial_frame_caller_pc as -1, + * meaning check the stack. + */ + break; + + default: + /* Give up. */ + fp = -1; + break; + } + + state->pc = pc; + state->sp = sp; + state->fp = fp; + state->initial_frame_caller_pc = initial_frame_caller_pc; + state->read_memory_func = read_memory_func; + state->read_memory_func_extra = read_memory_func_extra; +} + +/* Handle the case where the register holds more bits than the VA. */ +static bool valid_addr_reg(bt_int_reg_t reg) +{ + return ((unsigned long)reg == reg); +} + +/* Advances the backtracing state to the calling frame, returning + * true iff successful. + */ +bool backtrace_next(BacktraceIterator *state) +{ + unsigned long next_fp, next_pc; + bt_int_reg_t next_frame[2]; + + if (state->fp == -1) { + /* No parent frame. */ + return false; + } + + /* Try to read the frame linkage data chaining to the next function. */ + if (!state->read_memory_func(&next_frame, state->fp, sizeof next_frame, + state->read_memory_func_extra)) { + return false; + } + + next_fp = next_frame[1]; + if (!valid_addr_reg(next_frame[1]) || + next_fp % sizeof(bt_int_reg_t) != 0) { + /* Caller's frame pointer is suspect, so give up. */ + return false; + } + + if (state->initial_frame_caller_pc != -1) { + /* We must be in the initial stack frame and already know the + * caller PC. + */ + next_pc = state->initial_frame_caller_pc; + + /* Force reading stack next time, in case we were in the + * initial frame. We don't do this above just to paranoidly + * avoid changing the struct at all when we return false. + */ + state->initial_frame_caller_pc = -1; + } else { + /* Get the caller PC from the frame linkage area. */ + next_pc = next_frame[0]; + if (!valid_addr_reg(next_frame[0]) || next_pc == 0 || + next_pc % TILE_BUNDLE_ALIGNMENT_IN_BYTES != 0) { + /* The PC is suspect, so give up. */ + return false; + } + } + + /* Update state to become the caller's stack frame. */ + state->pc = next_pc; + state->sp = state->fp; + state->fp = next_fp; + + return true; +} diff --git a/arch/tile/kernel/compat.c b/arch/tile/kernel/compat.c new file mode 100644 index 00000000..d67459b9 --- /dev/null +++ b/arch/tile/kernel/compat.c @@ -0,0 +1,118 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/* Adjust unistd.h to provide 32-bit numbers and functions. */ +#define __SYSCALL_COMPAT + +#include <linux/compat.h> +#include <linux/syscalls.h> +#include <linux/kdev_t.h> +#include <linux/fs.h> +#include <linux/fcntl.h> +#include <linux/uaccess.h> +#include <linux/signal.h> +#include <asm/syscalls.h> + +/* + * Syscalls that take 64-bit numbers traditionally take them in 32-bit + * "high" and "low" value parts on 32-bit architectures. + * In principle, one could imagine passing some register arguments as + * fully 64-bit on TILE-Gx in 32-bit mode, but it seems easier to + * adapt the usual convention. + */ + +long compat_sys_truncate64(char __user *filename, u32 dummy, u32 low, u32 high) +{ + return sys_truncate(filename, ((loff_t)high << 32) | low); +} + +long compat_sys_ftruncate64(unsigned int fd, u32 dummy, u32 low, u32 high) +{ + return sys_ftruncate(fd, ((loff_t)high << 32) | low); +} + +long compat_sys_pread64(unsigned int fd, char __user *ubuf, size_t count, + u32 dummy, u32 low, u32 high) +{ + return sys_pread64(fd, ubuf, count, ((loff_t)high << 32) | low); +} + +long compat_sys_pwrite64(unsigned int fd, char __user *ubuf, size_t count, + u32 dummy, u32 low, u32 high) +{ + return sys_pwrite64(fd, ubuf, count, ((loff_t)high << 32) | low); +} + +long compat_sys_lookup_dcookie(u32 low, u32 high, char __user *buf, size_t len) +{ + return sys_lookup_dcookie(((loff_t)high << 32) | low, buf, len); +} + +long compat_sys_sync_file_range2(int fd, unsigned int flags, + u32 offset_lo, u32 offset_hi, + u32 nbytes_lo, u32 nbytes_hi) +{ + return sys_sync_file_range(fd, ((loff_t)offset_hi << 32) | offset_lo, + ((loff_t)nbytes_hi << 32) | nbytes_lo, + flags); +} + +long compat_sys_fallocate(int fd, int mode, + u32 offset_lo, u32 offset_hi, + u32 len_lo, u32 len_hi) +{ + return sys_fallocate(fd, mode, ((loff_t)offset_hi << 32) | offset_lo, + ((loff_t)len_hi << 32) | len_lo); +} + + + +long compat_sys_sched_rr_get_interval(compat_pid_t pid, + struct compat_timespec __user *interval) +{ + struct timespec t; + int ret; + mm_segment_t old_fs = get_fs(); + + set_fs(KERNEL_DS); + ret = sys_sched_rr_get_interval(pid, + (struct timespec __force __user *)&t); + set_fs(old_fs); + if (put_compat_timespec(&t, interval)) + return -EFAULT; + return ret; +} + +/* Provide the compat syscall number to call mapping. */ +#undef __SYSCALL +#define __SYSCALL(nr, call) [nr] = (call), + +/* See comments in sys.c */ +#define compat_sys_fadvise64_64 sys32_fadvise64_64 +#define compat_sys_readahead sys32_readahead + +/* Call the trampolines to manage pt_regs where necessary. */ +#define compat_sys_execve _compat_sys_execve +#define compat_sys_sigaltstack _compat_sys_sigaltstack +#define compat_sys_rt_sigreturn _compat_sys_rt_sigreturn +#define sys_clone _sys_clone + +/* + * Note that we can't include <linux/unistd.h> here since the header + * guard will defeat us; <asm/unistd.h> checks for __SYSCALL as well. + */ +void *compat_sys_call_table[__NR_syscalls] = { + [0 ... __NR_syscalls-1] = sys_ni_syscall, +#include <asm/unistd.h> +}; diff --git a/arch/tile/kernel/compat_signal.c b/arch/tile/kernel/compat_signal.c new file mode 100644 index 00000000..cdef6e5e --- /dev/null +++ b/arch/tile/kernel/compat_signal.c @@ -0,0 +1,431 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#include <linux/sched.h> +#include <linux/mm.h> +#include <linux/smp.h> +#include <linux/kernel.h> +#include <linux/signal.h> +#include <linux/errno.h> +#include <linux/wait.h> +#include <linux/unistd.h> +#include <linux/stddef.h> +#include <linux/personality.h> +#include <linux/suspend.h> +#include <linux/ptrace.h> +#include <linux/elf.h> +#include <linux/compat.h> +#include <linux/syscalls.h> +#include <linux/uaccess.h> +#include <asm/processor.h> +#include <asm/ucontext.h> +#include <asm/sigframe.h> +#include <asm/syscalls.h> +#include <arch/interrupts.h> + +struct compat_sigaction { + compat_uptr_t sa_handler; + compat_ulong_t sa_flags; + compat_uptr_t sa_restorer; + sigset_t sa_mask __packed; +}; + +struct compat_sigaltstack { + compat_uptr_t ss_sp; + int ss_flags; + compat_size_t ss_size; +}; + +struct compat_ucontext { + compat_ulong_t uc_flags; + compat_uptr_t uc_link; + struct compat_sigaltstack uc_stack; + struct sigcontext uc_mcontext; + sigset_t uc_sigmask; /* mask last for extensibility */ +}; + +#define COMPAT_SI_PAD_SIZE ((SI_MAX_SIZE - 3 * sizeof(int)) / sizeof(int)) + +struct compat_siginfo { + int si_signo; + int si_errno; + int si_code; + + union { + int _pad[COMPAT_SI_PAD_SIZE]; + + /* kill() */ + struct { + unsigned int _pid; /* sender's pid */ + unsigned int _uid; /* sender's uid */ + } _kill; + + /* POSIX.1b timers */ + struct { + compat_timer_t _tid; /* timer id */ + int _overrun; /* overrun count */ + compat_sigval_t _sigval; /* same as below */ + int _sys_private; /* not to be passed to user */ + int _overrun_incr; /* amount to add to overrun */ + } _timer; + + /* POSIX.1b signals */ + struct { + unsigned int _pid; /* sender's pid */ + unsigned int _uid; /* sender's uid */ + compat_sigval_t _sigval; + } _rt; + + /* SIGCHLD */ + struct { + unsigned int _pid; /* which child */ + unsigned int _uid; /* sender's uid */ + int _status; /* exit code */ + compat_clock_t _utime; + compat_clock_t _stime; + } _sigchld; + + /* SIGILL, SIGFPE, SIGSEGV, SIGBUS */ + struct { + unsigned int _addr; /* faulting insn/memory ref. */ +#ifdef __ARCH_SI_TRAPNO + int _trapno; /* TRAP # which caused the signal */ +#endif + } _sigfault; + + /* SIGPOLL */ + struct { + int _band; /* POLL_IN, POLL_OUT, POLL_MSG */ + int _fd; + } _sigpoll; + } _sifields; +}; + +struct compat_rt_sigframe { + unsigned char save_area[C_ABI_SAVE_AREA_SIZE]; /* caller save area */ + struct compat_siginfo info; + struct compat_ucontext uc; +}; + +#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) + +long compat_sys_rt_sigaction(int sig, struct compat_sigaction __user *act, + struct compat_sigaction __user *oact, + size_t sigsetsize) +{ + struct k_sigaction new_sa, old_sa; + int ret = -EINVAL; + + /* XXX: Don't preclude handling different sized sigset_t's. */ + if (sigsetsize != sizeof(sigset_t)) + goto out; + + if (act) { + compat_uptr_t handler, restorer; + + if (!access_ok(VERIFY_READ, act, sizeof(*act)) || + __get_user(handler, &act->sa_handler) || + __get_user(new_sa.sa.sa_flags, &act->sa_flags) || + __get_user(restorer, &act->sa_restorer) || + __copy_from_user(&new_sa.sa.sa_mask, &act->sa_mask, + sizeof(sigset_t))) + return -EFAULT; + new_sa.sa.sa_handler = compat_ptr(handler); + new_sa.sa.sa_restorer = compat_ptr(restorer); + } + + ret = do_sigaction(sig, act ? &new_sa : NULL, oact ? &old_sa : NULL); + + if (!ret && oact) { + if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) || + __put_user(ptr_to_compat(old_sa.sa.sa_handler), + &oact->sa_handler) || + __put_user(ptr_to_compat(old_sa.sa.sa_restorer), + &oact->sa_restorer) || + __put_user(old_sa.sa.sa_flags, &oact->sa_flags) || + __copy_to_user(&oact->sa_mask, &old_sa.sa.sa_mask, + sizeof(sigset_t))) + return -EFAULT; + } +out: + return ret; +} + +long compat_sys_rt_sigqueueinfo(int pid, int sig, + struct compat_siginfo __user *uinfo) +{ + siginfo_t info; + int ret; + mm_segment_t old_fs = get_fs(); + + if (copy_siginfo_from_user32(&info, uinfo)) + return -EFAULT; + set_fs(KERNEL_DS); + ret = sys_rt_sigqueueinfo(pid, sig, (siginfo_t __force __user *)&info); + set_fs(old_fs); + return ret; +} + +int copy_siginfo_to_user32(struct compat_siginfo __user *to, siginfo_t *from) +{ + int err; + + if (!access_ok(VERIFY_WRITE, to, sizeof(struct compat_siginfo))) + return -EFAULT; + + /* If you change siginfo_t structure, please make sure that + this code is fixed accordingly. + It should never copy any pad contained in the structure + to avoid security leaks, but must copy the generic + 3 ints plus the relevant union member. */ + err = __put_user(from->si_signo, &to->si_signo); + err |= __put_user(from->si_errno, &to->si_errno); + err |= __put_user((short)from->si_code, &to->si_code); + + if (from->si_code < 0) { + err |= __put_user(from->si_pid, &to->si_pid); + err |= __put_user(from->si_uid, &to->si_uid); + err |= __put_user(ptr_to_compat(from->si_ptr), &to->si_ptr); + } else { + /* + * First 32bits of unions are always present: + * si_pid === si_band === si_tid === si_addr(LS half) + */ + err |= __put_user(from->_sifields._pad[0], + &to->_sifields._pad[0]); + switch (from->si_code >> 16) { + case __SI_FAULT >> 16: + break; + case __SI_CHLD >> 16: + err |= __put_user(from->si_utime, &to->si_utime); + err |= __put_user(from->si_stime, &to->si_stime); + err |= __put_user(from->si_status, &to->si_status); + /* FALL THROUGH */ + default: + case __SI_KILL >> 16: + err |= __put_user(from->si_uid, &to->si_uid); + break; + case __SI_POLL >> 16: + err |= __put_user(from->si_fd, &to->si_fd); + break; + case __SI_TIMER >> 16: + err |= __put_user(from->si_overrun, &to->si_overrun); + err |= __put_user(ptr_to_compat(from->si_ptr), + &to->si_ptr); + break; + /* This is not generated by the kernel as of now. */ + case __SI_RT >> 16: + case __SI_MESGQ >> 16: + err |= __put_user(from->si_uid, &to->si_uid); + err |= __put_user(from->si_int, &to->si_int); + break; + } + } + return err; +} + +int copy_siginfo_from_user32(siginfo_t *to, struct compat_siginfo __user *from) +{ + int err; + u32 ptr32; + + if (!access_ok(VERIFY_READ, from, sizeof(struct compat_siginfo))) + return -EFAULT; + + err = __get_user(to->si_signo, &from->si_signo); + err |= __get_user(to->si_errno, &from->si_errno); + err |= __get_user(to->si_code, &from->si_code); + + err |= __get_user(to->si_pid, &from->si_pid); + err |= __get_user(to->si_uid, &from->si_uid); + err |= __get_user(ptr32, &from->si_ptr); + to->si_ptr = compat_ptr(ptr32); + + return err; +} + +long compat_sys_sigaltstack(const struct compat_sigaltstack __user *uss_ptr, + struct compat_sigaltstack __user *uoss_ptr, + struct pt_regs *regs) +{ + stack_t uss, uoss; + int ret; + mm_segment_t seg; + + if (uss_ptr) { + u32 ptr; + + memset(&uss, 0, sizeof(stack_t)); + if (!access_ok(VERIFY_READ, uss_ptr, sizeof(*uss_ptr)) || + __get_user(ptr, &uss_ptr->ss_sp) || + __get_user(uss.ss_flags, &uss_ptr->ss_flags) || + __get_user(uss.ss_size, &uss_ptr->ss_size)) + return -EFAULT; + uss.ss_sp = compat_ptr(ptr); + } + seg = get_fs(); + set_fs(KERNEL_DS); + ret = do_sigaltstack(uss_ptr ? (stack_t __user __force *)&uss : NULL, + (stack_t __user __force *)&uoss, + (unsigned long)compat_ptr(regs->sp)); + set_fs(seg); + if (ret >= 0 && uoss_ptr) { + if (!access_ok(VERIFY_WRITE, uoss_ptr, sizeof(*uoss_ptr)) || + __put_user(ptr_to_compat(uoss.ss_sp), &uoss_ptr->ss_sp) || + __put_user(uoss.ss_flags, &uoss_ptr->ss_flags) || + __put_user(uoss.ss_size, &uoss_ptr->ss_size)) + ret = -EFAULT; + } + return ret; +} + +/* The assembly shim for this function arranges to ignore the return value. */ +long compat_sys_rt_sigreturn(struct pt_regs *regs) +{ + struct compat_rt_sigframe __user *frame = + (struct compat_rt_sigframe __user *) compat_ptr(regs->sp); + sigset_t set; + + if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) + goto badframe; + if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) + goto badframe; + + sigdelsetmask(&set, ~_BLOCKABLE); + set_current_blocked(&set); + + if (restore_sigcontext(regs, &frame->uc.uc_mcontext)) + goto badframe; + + if (compat_sys_sigaltstack(&frame->uc.uc_stack, NULL, regs) != 0) + goto badframe; + + return 0; + +badframe: + signal_fault("bad sigreturn frame", regs, frame, 0); + return 0; +} + +/* + * Determine which stack to use.. + */ +static inline void __user *compat_get_sigframe(struct k_sigaction *ka, + struct pt_regs *regs, + size_t frame_size) +{ + unsigned long sp; + + /* Default to using normal stack */ + sp = (unsigned long)compat_ptr(regs->sp); + + /* + * If we are on the alternate signal stack and would overflow + * it, don't. Return an always-bogus address instead so we + * will die with SIGSEGV. + */ + if (on_sig_stack(sp) && !likely(on_sig_stack(sp - frame_size))) + return (void __user __force *)-1UL; + + /* This is the X/Open sanctioned signal stack switching. */ + if (ka->sa.sa_flags & SA_ONSTACK) { + if (sas_ss_flags(sp) == 0) + sp = current->sas_ss_sp + current->sas_ss_size; + } + + sp -= frame_size; + /* + * Align the stack pointer according to the TILE ABI, + * i.e. so that on function entry (sp & 15) == 0. + */ + sp &= -16UL; + return (void __user *) sp; +} + +int compat_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, + sigset_t *set, struct pt_regs *regs) +{ + unsigned long restorer; + struct compat_rt_sigframe __user *frame; + int err = 0; + int usig; + + frame = compat_get_sigframe(ka, regs, sizeof(*frame)); + + if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) + goto give_sigsegv; + + usig = current_thread_info()->exec_domain + && current_thread_info()->exec_domain->signal_invmap + && sig < 32 + ? current_thread_info()->exec_domain->signal_invmap[sig] + : sig; + + /* Always write at least the signal number for the stack backtracer. */ + if (ka->sa.sa_flags & SA_SIGINFO) { + /* At sigreturn time, restore the callee-save registers too. */ + err |= copy_siginfo_to_user32(&frame->info, info); + regs->flags |= PT_FLAGS_RESTORE_REGS; + } else { + err |= __put_user(info->si_signo, &frame->info.si_signo); + } + + /* Create the ucontext. */ + err |= __clear_user(&frame->save_area, sizeof(frame->save_area)); + err |= __put_user(0, &frame->uc.uc_flags); + err |= __put_user(0, &frame->uc.uc_link); + err |= __put_user(ptr_to_compat((void *)(current->sas_ss_sp)), + &frame->uc.uc_stack.ss_sp); + err |= __put_user(sas_ss_flags(regs->sp), + &frame->uc.uc_stack.ss_flags); + err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size); + err |= setup_sigcontext(&frame->uc.uc_mcontext, regs); + err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); + if (err) + goto give_sigsegv; + + restorer = VDSO_BASE; + if (ka->sa.sa_flags & SA_RESTORER) + restorer = ptr_to_compat_reg(ka->sa.sa_restorer); + + /* + * Set up registers for signal handler. + * Registers that we don't modify keep the value they had from + * user-space at the time we took the signal. + * We always pass siginfo and mcontext, regardless of SA_SIGINFO, + * since some things rely on this (e.g. glibc's debug/segfault.c). + */ + regs->pc = ptr_to_compat_reg(ka->sa.sa_handler); + regs->ex1 = PL_ICS_EX1(USER_PL, 1); /* set crit sec in handler */ + regs->sp = ptr_to_compat_reg(frame); + regs->lr = restorer; + regs->regs[0] = (unsigned long) usig; + regs->regs[1] = ptr_to_compat_reg(&frame->info); + regs->regs[2] = ptr_to_compat_reg(&frame->uc); + regs->flags |= PT_FLAGS_CALLER_SAVES; + + /* + * Notify any tracer that was single-stepping it. + * The tracer may want to single-step inside the + * handler too. + */ + if (test_thread_flag(TIF_SINGLESTEP)) + ptrace_notify(SIGTRAP); + + return 0; + +give_sigsegv: + signal_fault("bad setup frame", regs, frame, sig); + return -EFAULT; +} diff --git a/arch/tile/kernel/early_printk.c b/arch/tile/kernel/early_printk.c new file mode 100644 index 00000000..afb9c9a0 --- /dev/null +++ b/arch/tile/kernel/early_printk.c @@ -0,0 +1,110 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#include <linux/console.h> +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/string.h> +#include <linux/irqflags.h> +#include <asm/setup.h> +#include <hv/hypervisor.h> + +static void early_hv_write(struct console *con, const char *s, unsigned n) +{ + hv_console_write((HV_VirtAddr) s, n); +} + +static struct console early_hv_console = { + .name = "earlyhv", + .write = early_hv_write, + .flags = CON_PRINTBUFFER, + .index = -1, +}; + +/* Direct interface for emergencies */ +static struct console *early_console = &early_hv_console; +static int early_console_initialized; +static int early_console_complete; + +static void early_vprintk(const char *fmt, va_list ap) +{ + char buf[512]; + int n = vscnprintf(buf, sizeof(buf), fmt, ap); + early_console->write(early_console, buf, n); +} + +void early_printk(const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + early_vprintk(fmt, ap); + va_end(ap); +} + +void early_panic(const char *fmt, ...) +{ + va_list ap; + arch_local_irq_disable_all(); + va_start(ap, fmt); + early_printk("Kernel panic - not syncing: "); + early_vprintk(fmt, ap); + early_console->write(early_console, "\n", 1); + va_end(ap); + dump_stack(); + hv_halt(); +} + +static int __initdata keep_early; + +static int __init setup_early_printk(char *str) +{ + if (early_console_initialized) + return 1; + + if (str != NULL && strncmp(str, "keep", 4) == 0) + keep_early = 1; + + early_console = &early_hv_console; + early_console_initialized = 1; + register_console(early_console); + + return 0; +} + +void __init disable_early_printk(void) +{ + early_console_complete = 1; + if (!early_console_initialized || !early_console) + return; + if (!keep_early) { + early_printk("disabling early console\n"); + unregister_console(early_console); + early_console_initialized = 0; + } else { + early_printk("keeping early console\n"); + } +} + +void warn_early_printk(void) +{ + if (early_console_complete || early_console_initialized) + return; + early_printk("\ +Machine shutting down before console output is fully initialized.\n\ +You may wish to reboot and add the option 'earlyprintk' to your\n\ +boot command line to see any diagnostic early console output.\n\ +"); +} + +early_param("earlyprintk", setup_early_printk); diff --git a/arch/tile/kernel/entry.S b/arch/tile/kernel/entry.S new file mode 100644 index 00000000..ec91568d --- /dev/null +++ b/arch/tile/kernel/entry.S @@ -0,0 +1,111 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#include <linux/linkage.h> +#include <linux/unistd.h> +#include <asm/irqflags.h> +#include <asm/processor.h> +#include <arch/abi.h> +#include <arch/spr_def.h> + +#ifdef __tilegx__ +#define bnzt bnezt +#endif + +STD_ENTRY(current_text_addr) + { move r0, lr; jrp lr } + STD_ENDPROC(current_text_addr) + +/* + * Implement execve(). The i386 code has a note that forking from kernel + * space results in no copy on write until the execve, so we should be + * careful not to write to the stack here. + */ +STD_ENTRY(kernel_execve) + moveli TREG_SYSCALL_NR_NAME, __NR_execve + swint1 + jrp lr + STD_ENDPROC(kernel_execve) + +/* + * We don't run this function directly, but instead copy it to a page + * we map into every user process. See vdso_setup(). + * + * Note that libc has a copy of this function that it uses to compare + * against the PC when a stack backtrace ends, so if this code is + * changed, the libc implementation(s) should also be updated. + */ + .pushsection .data +ENTRY(__rt_sigreturn) + moveli TREG_SYSCALL_NR_NAME,__NR_rt_sigreturn + swint1 + ENDPROC(__rt_sigreturn) + ENTRY(__rt_sigreturn_end) + .popsection + +STD_ENTRY(dump_stack) + { move r2, lr; lnk r1 } + { move r4, r52; addli r1, r1, dump_stack - . } + { move r3, sp; j _dump_stack } + jrp lr /* keep backtracer happy */ + STD_ENDPROC(dump_stack) + +STD_ENTRY(KBacktraceIterator_init_current) + { move r2, lr; lnk r1 } + { move r4, r52; addli r1, r1, KBacktraceIterator_init_current - . } + { move r3, sp; j _KBacktraceIterator_init_current } + jrp lr /* keep backtracer happy */ + STD_ENDPROC(KBacktraceIterator_init_current) + +/* + * Reset our stack to r1/r2 (sp and ksp0+cpu respectively), then + * free the old stack (passed in r0) and re-invoke cpu_idle(). + * We update sp and ksp0 simultaneously to avoid backtracer warnings. + */ +STD_ENTRY(cpu_idle_on_new_stack) + { + move sp, r1 + mtspr SPR_SYSTEM_SAVE_K_0, r2 + } + jal free_thread_info + j cpu_idle + STD_ENDPROC(cpu_idle_on_new_stack) + +/* Loop forever on a nap during SMP boot. */ +STD_ENTRY(smp_nap) + nap + nop /* avoid provoking the icache prefetch with a jump */ + j smp_nap /* we are not architecturally guaranteed not to exit nap */ + jrp lr /* clue in the backtracer */ + STD_ENDPROC(smp_nap) + +/* + * Enable interrupts racelessly and then nap until interrupted. + * Architecturally, we are guaranteed that enabling interrupts via + * mtspr to INTERRUPT_CRITICAL_SECTION only interrupts at the next PC. + * This function's _cpu_idle_nap address is special; see intvec.S. + * When interrupted at _cpu_idle_nap, we bump the PC forward 8, and + * as a result return to the function that called _cpu_idle(). + */ +STD_ENTRY(_cpu_idle) + movei r1, 1 + mtspr INTERRUPT_CRITICAL_SECTION, r1 + IRQ_ENABLE(r2, r3) /* unmask, but still with ICS set */ + mtspr INTERRUPT_CRITICAL_SECTION, zero + .global _cpu_idle_nap +_cpu_idle_nap: + nap + nop /* avoid provoking the icache prefetch with a jump */ + jrp lr + STD_ENDPROC(_cpu_idle) diff --git a/arch/tile/kernel/futex_64.S b/arch/tile/kernel/futex_64.S new file mode 100644 index 00000000..f465d1ed --- /dev/null +++ b/arch/tile/kernel/futex_64.S @@ -0,0 +1,55 @@ +/* + * Copyright 2011 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * Atomically access user memory, but use MMU to avoid propagating + * kernel exceptions. + */ + +#include <linux/linkage.h> +#include <asm/errno.h> +#include <asm/futex.h> +#include <asm/page.h> +#include <asm/processor.h> + +/* + * Provide a set of atomic memory operations supporting <asm/futex.h>. + * + * r0: user address to manipulate + * r1: new value to write, or for cmpxchg, old value to compare against + * r2: (cmpxchg only) new value to write + * + * Return __get_user struct, r0 with value, r1 with error. + */ +#define FUTEX_OP(name, ...) \ +STD_ENTRY(futex_##name) \ + __VA_ARGS__; \ + { \ + move r1, zero; \ + jrp lr \ + }; \ + STD_ENDPROC(futex_##name); \ + .pushsection __ex_table,"a"; \ + .quad 1b, get_user_fault; \ + .popsection + + .pushsection .fixup,"ax" +get_user_fault: + { movei r1, -EFAULT; jrp lr } + ENDPROC(get_user_fault) + .popsection + +FUTEX_OP(cmpxchg, mtspr CMPEXCH_VALUE, r1; 1: cmpexch4 r0, r0, r2) +FUTEX_OP(set, 1: exch4 r0, r0, r1) +FUTEX_OP(add, 1: fetchadd4 r0, r0, r1) +FUTEX_OP(or, 1: fetchor4 r0, r0, r1) +FUTEX_OP(andn, nor r1, r1, zero; 1: fetchand4 r0, r0, r1) diff --git a/arch/tile/kernel/hardwall.c b/arch/tile/kernel/hardwall.c new file mode 100644 index 00000000..8c41891a --- /dev/null +++ b/arch/tile/kernel/hardwall.c @@ -0,0 +1,837 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#include <linux/fs.h> +#include <linux/proc_fs.h> +#include <linux/seq_file.h> +#include <linux/rwsem.h> +#include <linux/kprobes.h> +#include <linux/sched.h> +#include <linux/hardirq.h> +#include <linux/uaccess.h> +#include <linux/smp.h> +#include <linux/cdev.h> +#include <linux/compat.h> +#include <asm/hardwall.h> +#include <asm/traps.h> +#include <asm/siginfo.h> +#include <asm/irq_regs.h> + +#include <arch/interrupts.h> +#include <arch/spr_def.h> + + +/* + * This data structure tracks the rectangle data, etc., associated + * one-to-one with a "struct file *" from opening HARDWALL_FILE. + * Note that the file's private data points back to this structure. + */ +struct hardwall_info { + struct list_head list; /* "rectangles" list */ + struct list_head task_head; /* head of tasks in this hardwall */ + struct cpumask cpumask; /* cpus in the rectangle */ + int ulhc_x; /* upper left hand corner x coord */ + int ulhc_y; /* upper left hand corner y coord */ + int width; /* rectangle width */ + int height; /* rectangle height */ + int id; /* integer id for this hardwall */ + int teardown_in_progress; /* are we tearing this one down? */ +}; + +/* Currently allocated hardwall rectangles */ +static LIST_HEAD(rectangles); + +/* /proc/tile/hardwall */ +static struct proc_dir_entry *hardwall_proc_dir; + +/* Functions to manage files in /proc/tile/hardwall. */ +static void hardwall_add_proc(struct hardwall_info *rect); +static void hardwall_remove_proc(struct hardwall_info *rect); + +/* + * Guard changes to the hardwall data structures. + * This could be finer grained (e.g. one lock for the list of hardwall + * rectangles, then separate embedded locks for each one's list of tasks), + * but there are subtle correctness issues when trying to start with + * a task's "hardwall" pointer and lock the correct rectangle's embedded + * lock in the presence of a simultaneous deactivation, so it seems + * easier to have a single lock, given that none of these data + * structures are touched very frequently during normal operation. + */ +static DEFINE_SPINLOCK(hardwall_lock); + +/* Allow disabling UDN access. */ +static int udn_disabled; +static int __init noudn(char *str) +{ + pr_info("User-space UDN access is disabled\n"); + udn_disabled = 1; + return 0; +} +early_param("noudn", noudn); + + +/* + * Low-level primitives + */ + +/* Set a CPU bit if the CPU is online. */ +#define cpu_online_set(cpu, dst) do { \ + if (cpu_online(cpu)) \ + cpumask_set_cpu(cpu, dst); \ +} while (0) + + +/* Does the given rectangle contain the given x,y coordinate? */ +static int contains(struct hardwall_info *r, int x, int y) +{ + return (x >= r->ulhc_x && x < r->ulhc_x + r->width) && + (y >= r->ulhc_y && y < r->ulhc_y + r->height); +} + +/* Compute the rectangle parameters and validate the cpumask. */ +static int setup_rectangle(struct hardwall_info *r, struct cpumask *mask) +{ + int x, y, cpu, ulhc, lrhc; + + /* The first cpu is the ULHC, the last the LRHC. */ + ulhc = find_first_bit(cpumask_bits(mask), nr_cpumask_bits); + lrhc = find_last_bit(cpumask_bits(mask), nr_cpumask_bits); + + /* Compute the rectangle attributes from the cpus. */ + r->ulhc_x = cpu_x(ulhc); + r->ulhc_y = cpu_y(ulhc); + r->width = cpu_x(lrhc) - r->ulhc_x + 1; + r->height = cpu_y(lrhc) - r->ulhc_y + 1; + cpumask_copy(&r->cpumask, mask); + r->id = ulhc; /* The ulhc cpu id can be the hardwall id. */ + + /* Width and height must be positive */ + if (r->width <= 0 || r->height <= 0) + return -EINVAL; + + /* Confirm that the cpumask is exactly the rectangle. */ + for (y = 0, cpu = 0; y < smp_height; ++y) + for (x = 0; x < smp_width; ++x, ++cpu) + if (cpumask_test_cpu(cpu, mask) != contains(r, x, y)) + return -EINVAL; + + /* + * Note that offline cpus can't be drained when this UDN + * rectangle eventually closes. We used to detect this + * situation and print a warning, but it annoyed users and + * they ignored it anyway, so now we just return without a + * warning. + */ + return 0; +} + +/* Do the two given rectangles overlap on any cpu? */ +static int overlaps(struct hardwall_info *a, struct hardwall_info *b) +{ + return a->ulhc_x + a->width > b->ulhc_x && /* A not to the left */ + b->ulhc_x + b->width > a->ulhc_x && /* B not to the left */ + a->ulhc_y + a->height > b->ulhc_y && /* A not above */ + b->ulhc_y + b->height > a->ulhc_y; /* B not above */ +} + + +/* + * Hardware management of hardwall setup, teardown, trapping, + * and enabling/disabling PL0 access to the networks. + */ + +/* Bit field values to mask together for writes to SPR_XDN_DIRECTION_PROTECT */ +enum direction_protect { + N_PROTECT = (1 << 0), + E_PROTECT = (1 << 1), + S_PROTECT = (1 << 2), + W_PROTECT = (1 << 3) +}; + +static void enable_firewall_interrupts(void) +{ + arch_local_irq_unmask_now(INT_UDN_FIREWALL); +} + +static void disable_firewall_interrupts(void) +{ + arch_local_irq_mask_now(INT_UDN_FIREWALL); +} + +/* Set up hardwall on this cpu based on the passed hardwall_info. */ +static void hardwall_setup_ipi_func(void *info) +{ + struct hardwall_info *r = info; + int cpu = smp_processor_id(); + int x = cpu % smp_width; + int y = cpu / smp_width; + int bits = 0; + if (x == r->ulhc_x) + bits |= W_PROTECT; + if (x == r->ulhc_x + r->width - 1) + bits |= E_PROTECT; + if (y == r->ulhc_y) + bits |= N_PROTECT; + if (y == r->ulhc_y + r->height - 1) + bits |= S_PROTECT; + BUG_ON(bits == 0); + __insn_mtspr(SPR_UDN_DIRECTION_PROTECT, bits); + enable_firewall_interrupts(); + +} + +/* Set up all cpus on edge of rectangle to enable/disable hardwall SPRs. */ +static void hardwall_setup(struct hardwall_info *r) +{ + int x, y, cpu, delta; + struct cpumask rect_cpus; + + cpumask_clear(&rect_cpus); + + /* First include the top and bottom edges */ + cpu = r->ulhc_y * smp_width + r->ulhc_x; + delta = (r->height - 1) * smp_width; + for (x = 0; x < r->width; ++x, ++cpu) { + cpu_online_set(cpu, &rect_cpus); + cpu_online_set(cpu + delta, &rect_cpus); + } + + /* Then the left and right edges */ + cpu -= r->width; + delta = r->width - 1; + for (y = 0; y < r->height; ++y, cpu += smp_width) { + cpu_online_set(cpu, &rect_cpus); + cpu_online_set(cpu + delta, &rect_cpus); + } + + /* Then tell all the cpus to set up their protection SPR */ + on_each_cpu_mask(&rect_cpus, hardwall_setup_ipi_func, r, 1); +} + +void __kprobes do_hardwall_trap(struct pt_regs* regs, int fault_num) +{ + struct hardwall_info *rect; + struct task_struct *p; + struct siginfo info; + int x, y; + int cpu = smp_processor_id(); + int found_processes; + unsigned long flags; + + struct pt_regs *old_regs = set_irq_regs(regs); + irq_enter(); + + /* This tile trapped a network access; find the rectangle. */ + x = cpu % smp_width; + y = cpu / smp_width; + spin_lock_irqsave(&hardwall_lock, flags); + list_for_each_entry(rect, &rectangles, list) { + if (contains(rect, x, y)) + break; + } + + /* + * It shouldn't be possible not to find this cpu on the + * rectangle list, since only cpus in rectangles get hardwalled. + * The hardwall is only removed after the UDN is drained. + */ + BUG_ON(&rect->list == &rectangles); + + /* + * If we already started teardown on this hardwall, don't worry; + * the abort signal has been sent and we are just waiting for things + * to quiesce. + */ + if (rect->teardown_in_progress) { + pr_notice("cpu %d: detected hardwall violation %#lx" + " while teardown already in progress\n", + cpu, (long) __insn_mfspr(SPR_UDN_DIRECTION_PROTECT)); + goto done; + } + + /* + * Kill off any process that is activated in this rectangle. + * We bypass security to deliver the signal, since it must be + * one of the activated processes that generated the UDN + * message that caused this trap, and all the activated + * processes shared a single open file so are pretty tightly + * bound together from a security point of view to begin with. + */ + rect->teardown_in_progress = 1; + wmb(); /* Ensure visibility of rectangle before notifying processes. */ + pr_notice("cpu %d: detected hardwall violation %#lx...\n", + cpu, (long) __insn_mfspr(SPR_UDN_DIRECTION_PROTECT)); + info.si_signo = SIGILL; + info.si_errno = 0; + info.si_code = ILL_HARDWALL; + found_processes = 0; + list_for_each_entry(p, &rect->task_head, thread.hardwall_list) { + BUG_ON(p->thread.hardwall != rect); + if (!(p->flags & PF_EXITING)) { + found_processes = 1; + pr_notice("hardwall: killing %d\n", p->pid); + do_send_sig_info(info.si_signo, &info, p, false); + } + } + if (!found_processes) + pr_notice("hardwall: no associated processes!\n"); + + done: + spin_unlock_irqrestore(&hardwall_lock, flags); + + /* + * We have to disable firewall interrupts now, or else when we + * return from this handler, we will simply re-interrupt back to + * it. However, we can't clear the protection bits, since we + * haven't yet drained the network, and that would allow packets + * to cross out of the hardwall region. + */ + disable_firewall_interrupts(); + + irq_exit(); + set_irq_regs(old_regs); +} + +/* Allow access from user space to the UDN. */ +void grant_network_mpls(void) +{ + __insn_mtspr(SPR_MPL_UDN_ACCESS_SET_0, 1); + __insn_mtspr(SPR_MPL_UDN_AVAIL_SET_0, 1); + __insn_mtspr(SPR_MPL_UDN_COMPLETE_SET_0, 1); + __insn_mtspr(SPR_MPL_UDN_TIMER_SET_0, 1); +#if !CHIP_HAS_REV1_XDN() + __insn_mtspr(SPR_MPL_UDN_REFILL_SET_0, 1); + __insn_mtspr(SPR_MPL_UDN_CA_SET_0, 1); +#endif +} + +/* Deny access from user space to the UDN. */ +void restrict_network_mpls(void) +{ + __insn_mtspr(SPR_MPL_UDN_ACCESS_SET_1, 1); + __insn_mtspr(SPR_MPL_UDN_AVAIL_SET_1, 1); + __insn_mtspr(SPR_MPL_UDN_COMPLETE_SET_1, 1); + __insn_mtspr(SPR_MPL_UDN_TIMER_SET_1, 1); +#if !CHIP_HAS_REV1_XDN() + __insn_mtspr(SPR_MPL_UDN_REFILL_SET_1, 1); + __insn_mtspr(SPR_MPL_UDN_CA_SET_1, 1); +#endif +} + + +/* + * Code to create, activate, deactivate, and destroy hardwall rectangles. + */ + +/* Create a hardwall for the given rectangle */ +static struct hardwall_info *hardwall_create( + size_t size, const unsigned char __user *bits) +{ + struct hardwall_info *iter, *rect; + struct cpumask mask; + unsigned long flags; + int rc; + + /* Reject crazy sizes out of hand, a la sys_mbind(). */ + if (size > PAGE_SIZE) + return ERR_PTR(-EINVAL); + + /* Copy whatever fits into a cpumask. */ + if (copy_from_user(&mask, bits, min(sizeof(struct cpumask), size))) + return ERR_PTR(-EFAULT); + + /* + * If the size was short, clear the rest of the mask; + * otherwise validate that the rest of the user mask was zero + * (we don't try hard to be efficient when validating huge masks). + */ + if (size < sizeof(struct cpumask)) { + memset((char *)&mask + size, 0, sizeof(struct cpumask) - size); + } else if (size > sizeof(struct cpumask)) { + size_t i; + for (i = sizeof(struct cpumask); i < size; ++i) { + char c; + if (get_user(c, &bits[i])) + return ERR_PTR(-EFAULT); + if (c) + return ERR_PTR(-EINVAL); + } + } + + /* Allocate a new rectangle optimistically. */ + rect = kmalloc(sizeof(struct hardwall_info), + GFP_KERNEL | __GFP_ZERO); + if (rect == NULL) + return ERR_PTR(-ENOMEM); + INIT_LIST_HEAD(&rect->task_head); + + /* Compute the rectangle size and validate that it's plausible. */ + rc = setup_rectangle(rect, &mask); + if (rc != 0) { + kfree(rect); + return ERR_PTR(rc); + } + + /* Confirm it doesn't overlap and add it to the list. */ + spin_lock_irqsave(&hardwall_lock, flags); + list_for_each_entry(iter, &rectangles, list) { + if (overlaps(iter, rect)) { + spin_unlock_irqrestore(&hardwall_lock, flags); + kfree(rect); + return ERR_PTR(-EBUSY); + } + } + list_add_tail(&rect->list, &rectangles); + spin_unlock_irqrestore(&hardwall_lock, flags); + + /* Set up appropriate hardwalling on all affected cpus. */ + hardwall_setup(rect); + + /* Create a /proc/tile/hardwall entry. */ + hardwall_add_proc(rect); + + return rect; +} + +/* Activate a given hardwall on this cpu for this process. */ +static int hardwall_activate(struct hardwall_info *rect) +{ + int cpu, x, y; + unsigned long flags; + struct task_struct *p = current; + struct thread_struct *ts = &p->thread; + + /* Require a rectangle. */ + if (rect == NULL) + return -ENODATA; + + /* Not allowed to activate a rectangle that is being torn down. */ + if (rect->teardown_in_progress) + return -EINVAL; + + /* + * Get our affinity; if we're not bound to this tile uniquely, + * we can't access the network registers. + */ + if (cpumask_weight(&p->cpus_allowed) != 1) + return -EPERM; + + /* Make sure we are bound to a cpu in this rectangle. */ + cpu = smp_processor_id(); + BUG_ON(cpumask_first(&p->cpus_allowed) != cpu); + x = cpu_x(cpu); + y = cpu_y(cpu); + if (!contains(rect, x, y)) + return -EINVAL; + + /* If we are already bound to this hardwall, it's a no-op. */ + if (ts->hardwall) { + BUG_ON(ts->hardwall != rect); + return 0; + } + + /* Success! This process gets to use the user networks on this cpu. */ + ts->hardwall = rect; + spin_lock_irqsave(&hardwall_lock, flags); + list_add(&ts->hardwall_list, &rect->task_head); + spin_unlock_irqrestore(&hardwall_lock, flags); + grant_network_mpls(); + printk(KERN_DEBUG "Pid %d (%s) activated for hardwall: cpu %d\n", + p->pid, p->comm, cpu); + return 0; +} + +/* + * Deactivate a task's hardwall. Must hold hardwall_lock. + * This method may be called from free_task(), so we don't want to + * rely on too many fields of struct task_struct still being valid. + * We assume the cpus_allowed, pid, and comm fields are still valid. + */ +static void _hardwall_deactivate(struct task_struct *task) +{ + struct thread_struct *ts = &task->thread; + + if (cpumask_weight(&task->cpus_allowed) != 1) { + pr_err("pid %d (%s) releasing networks with" + " an affinity mask containing %d cpus!\n", + task->pid, task->comm, + cpumask_weight(&task->cpus_allowed)); + BUG(); + } + + BUG_ON(ts->hardwall == NULL); + ts->hardwall = NULL; + list_del(&ts->hardwall_list); + if (task == current) + restrict_network_mpls(); +} + +/* Deactivate a task's hardwall. */ +int hardwall_deactivate(struct task_struct *task) +{ + unsigned long flags; + int activated; + + spin_lock_irqsave(&hardwall_lock, flags); + activated = (task->thread.hardwall != NULL); + if (activated) + _hardwall_deactivate(task); + spin_unlock_irqrestore(&hardwall_lock, flags); + + if (!activated) + return -EINVAL; + + printk(KERN_DEBUG "Pid %d (%s) deactivated for hardwall: cpu %d\n", + task->pid, task->comm, smp_processor_id()); + return 0; +} + +/* Stop a UDN switch before draining the network. */ +static void stop_udn_switch(void *ignored) +{ +#if !CHIP_HAS_REV1_XDN() + /* Freeze the switch and the demux. */ + __insn_mtspr(SPR_UDN_SP_FREEZE, + SPR_UDN_SP_FREEZE__SP_FRZ_MASK | + SPR_UDN_SP_FREEZE__DEMUX_FRZ_MASK | + SPR_UDN_SP_FREEZE__NON_DEST_EXT_MASK); +#endif +} + +/* Drain all the state from a stopped switch. */ +static void drain_udn_switch(void *ignored) +{ +#if !CHIP_HAS_REV1_XDN() + int i; + int from_tile_words, ca_count; + + /* Empty out the 5 switch point fifos. */ + for (i = 0; i < 5; i++) { + int words, j; + __insn_mtspr(SPR_UDN_SP_FIFO_SEL, i); + words = __insn_mfspr(SPR_UDN_SP_STATE) & 0xF; + for (j = 0; j < words; j++) + (void) __insn_mfspr(SPR_UDN_SP_FIFO_DATA); + BUG_ON((__insn_mfspr(SPR_UDN_SP_STATE) & 0xF) != 0); + } + + /* Dump out the 3 word fifo at top. */ + from_tile_words = (__insn_mfspr(SPR_UDN_DEMUX_STATUS) >> 10) & 0x3; + for (i = 0; i < from_tile_words; i++) + (void) __insn_mfspr(SPR_UDN_DEMUX_WRITE_FIFO); + + /* Empty out demuxes. */ + while (__insn_mfspr(SPR_UDN_DATA_AVAIL) & (1 << 0)) + (void) __tile_udn0_receive(); + while (__insn_mfspr(SPR_UDN_DATA_AVAIL) & (1 << 1)) + (void) __tile_udn1_receive(); + while (__insn_mfspr(SPR_UDN_DATA_AVAIL) & (1 << 2)) + (void) __tile_udn2_receive(); + while (__insn_mfspr(SPR_UDN_DATA_AVAIL) & (1 << 3)) + (void) __tile_udn3_receive(); + BUG_ON((__insn_mfspr(SPR_UDN_DATA_AVAIL) & 0xF) != 0); + + /* Empty out catch all. */ + ca_count = __insn_mfspr(SPR_UDN_DEMUX_CA_COUNT); + for (i = 0; i < ca_count; i++) + (void) __insn_mfspr(SPR_UDN_CA_DATA); + BUG_ON(__insn_mfspr(SPR_UDN_DEMUX_CA_COUNT) != 0); + + /* Clear demux logic. */ + __insn_mtspr(SPR_UDN_DEMUX_CTL, 1); + + /* + * Write switch state; experimentation indicates that 0xc3000 + * is an idle switch point. + */ + for (i = 0; i < 5; i++) { + __insn_mtspr(SPR_UDN_SP_FIFO_SEL, i); + __insn_mtspr(SPR_UDN_SP_STATE, 0xc3000); + } +#endif +} + +/* Reset random UDN state registers at boot up and during hardwall teardown. */ +void reset_network_state(void) +{ +#if !CHIP_HAS_REV1_XDN() + /* Reset UDN coordinates to their standard value */ + unsigned int cpu = smp_processor_id(); + unsigned int x = cpu % smp_width; + unsigned int y = cpu / smp_width; +#endif + + if (udn_disabled) + return; + +#if !CHIP_HAS_REV1_XDN() + __insn_mtspr(SPR_UDN_TILE_COORD, (x << 18) | (y << 7)); + + /* Set demux tags to predefined values and enable them. */ + __insn_mtspr(SPR_UDN_TAG_VALID, 0xf); + __insn_mtspr(SPR_UDN_TAG_0, (1 << 0)); + __insn_mtspr(SPR_UDN_TAG_1, (1 << 1)); + __insn_mtspr(SPR_UDN_TAG_2, (1 << 2)); + __insn_mtspr(SPR_UDN_TAG_3, (1 << 3)); +#endif + + /* Clear out other random registers so we have a clean slate. */ + __insn_mtspr(SPR_UDN_AVAIL_EN, 0); + __insn_mtspr(SPR_UDN_DEADLOCK_TIMEOUT, 0); +#if !CHIP_HAS_REV1_XDN() + __insn_mtspr(SPR_UDN_REFILL_EN, 0); + __insn_mtspr(SPR_UDN_DEMUX_QUEUE_SEL, 0); + __insn_mtspr(SPR_UDN_SP_FIFO_SEL, 0); +#endif + + /* Start the switch and demux. */ +#if !CHIP_HAS_REV1_XDN() + __insn_mtspr(SPR_UDN_SP_FREEZE, 0); +#endif +} + +/* Restart a UDN switch after draining. */ +static void restart_udn_switch(void *ignored) +{ + reset_network_state(); + + /* Disable firewall interrupts. */ + __insn_mtspr(SPR_UDN_DIRECTION_PROTECT, 0); + disable_firewall_interrupts(); +} + +/* Build a struct cpumask containing all valid tiles in bounding rectangle. */ +static void fill_mask(struct hardwall_info *r, struct cpumask *result) +{ + int x, y, cpu; + + cpumask_clear(result); + + cpu = r->ulhc_y * smp_width + r->ulhc_x; + for (y = 0; y < r->height; ++y, cpu += smp_width - r->width) { + for (x = 0; x < r->width; ++x, ++cpu) + cpu_online_set(cpu, result); + } +} + +/* Last reference to a hardwall is gone, so clear the network. */ +static void hardwall_destroy(struct hardwall_info *rect) +{ + struct task_struct *task; + unsigned long flags; + struct cpumask mask; + + /* Make sure this file actually represents a rectangle. */ + if (rect == NULL) + return; + + /* + * Deactivate any remaining tasks. It's possible to race with + * some other thread that is exiting and hasn't yet called + * deactivate (when freeing its thread_info), so we carefully + * deactivate any remaining tasks before freeing the + * hardwall_info object itself. + */ + spin_lock_irqsave(&hardwall_lock, flags); + list_for_each_entry(task, &rect->task_head, thread.hardwall_list) + _hardwall_deactivate(task); + spin_unlock_irqrestore(&hardwall_lock, flags); + + /* Drain the UDN. */ + printk(KERN_DEBUG "Clearing hardwall rectangle %dx%d %d,%d\n", + rect->width, rect->height, rect->ulhc_x, rect->ulhc_y); + fill_mask(rect, &mask); + on_each_cpu_mask(&mask, stop_udn_switch, NULL, 1); + on_each_cpu_mask(&mask, drain_udn_switch, NULL, 1); + + /* Restart switch and disable firewall. */ + on_each_cpu_mask(&mask, restart_udn_switch, NULL, 1); + + /* Remove the /proc/tile/hardwall entry. */ + hardwall_remove_proc(rect); + + /* Now free the rectangle from the list. */ + spin_lock_irqsave(&hardwall_lock, flags); + BUG_ON(!list_empty(&rect->task_head)); + list_del(&rect->list); + spin_unlock_irqrestore(&hardwall_lock, flags); + kfree(rect); +} + + +static int hardwall_proc_show(struct seq_file *sf, void *v) +{ + struct hardwall_info *rect = sf->private; + char buf[256]; + + int rc = cpulist_scnprintf(buf, sizeof(buf), &rect->cpumask); + buf[rc++] = '\n'; + seq_write(sf, buf, rc); + return 0; +} + +static int hardwall_proc_open(struct inode *inode, + struct file *file) +{ + return single_open(file, hardwall_proc_show, PDE(inode)->data); +} + +static const struct file_operations hardwall_proc_fops = { + .open = hardwall_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static void hardwall_add_proc(struct hardwall_info *rect) +{ + char buf[64]; + snprintf(buf, sizeof(buf), "%d", rect->id); + proc_create_data(buf, 0444, hardwall_proc_dir, + &hardwall_proc_fops, rect); +} + +static void hardwall_remove_proc(struct hardwall_info *rect) +{ + char buf[64]; + snprintf(buf, sizeof(buf), "%d", rect->id); + remove_proc_entry(buf, hardwall_proc_dir); +} + +int proc_pid_hardwall(struct task_struct *task, char *buffer) +{ + struct hardwall_info *rect = task->thread.hardwall; + return rect ? sprintf(buffer, "%d\n", rect->id) : 0; +} + +void proc_tile_hardwall_init(struct proc_dir_entry *root) +{ + if (!udn_disabled) + hardwall_proc_dir = proc_mkdir("hardwall", root); +} + + +/* + * Character device support via ioctl/close. + */ + +static long hardwall_ioctl(struct file *file, unsigned int a, unsigned long b) +{ + struct hardwall_info *rect = file->private_data; + + if (_IOC_TYPE(a) != HARDWALL_IOCTL_BASE) + return -EINVAL; + + switch (_IOC_NR(a)) { + case _HARDWALL_CREATE: + if (udn_disabled) + return -ENOSYS; + if (rect != NULL) + return -EALREADY; + rect = hardwall_create(_IOC_SIZE(a), + (const unsigned char __user *)b); + if (IS_ERR(rect)) + return PTR_ERR(rect); + file->private_data = rect; + return 0; + + case _HARDWALL_ACTIVATE: + return hardwall_activate(rect); + + case _HARDWALL_DEACTIVATE: + if (current->thread.hardwall != rect) + return -EINVAL; + return hardwall_deactivate(current); + + case _HARDWALL_GET_ID: + return rect ? rect->id : -EINVAL; + + default: + return -EINVAL; + } +} + +#ifdef CONFIG_COMPAT +static long hardwall_compat_ioctl(struct file *file, + unsigned int a, unsigned long b) +{ + /* Sign-extend the argument so it can be used as a pointer. */ + return hardwall_ioctl(file, a, (unsigned long)compat_ptr(b)); +} +#endif + +/* The user process closed the file; revoke access to user networks. */ +static int hardwall_flush(struct file *file, fl_owner_t owner) +{ + struct hardwall_info *rect = file->private_data; + struct task_struct *task, *tmp; + unsigned long flags; + + if (rect) { + /* + * NOTE: if multiple threads are activated on this hardwall + * file, the other threads will continue having access to the + * UDN until they are context-switched out and back in again. + * + * NOTE: A NULL files pointer means the task is being torn + * down, so in that case we also deactivate it. + */ + spin_lock_irqsave(&hardwall_lock, flags); + list_for_each_entry_safe(task, tmp, &rect->task_head, + thread.hardwall_list) { + if (task->files == owner || task->files == NULL) + _hardwall_deactivate(task); + } + spin_unlock_irqrestore(&hardwall_lock, flags); + } + + return 0; +} + +/* This hardwall is gone, so destroy it. */ +static int hardwall_release(struct inode *inode, struct file *file) +{ + hardwall_destroy(file->private_data); + return 0; +} + +static const struct file_operations dev_hardwall_fops = { + .open = nonseekable_open, + .unlocked_ioctl = hardwall_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = hardwall_compat_ioctl, +#endif + .flush = hardwall_flush, + .release = hardwall_release, +}; + +static struct cdev hardwall_dev; + +static int __init dev_hardwall_init(void) +{ + int rc; + dev_t dev; + + rc = alloc_chrdev_region(&dev, 0, 1, "hardwall"); + if (rc < 0) + return rc; + cdev_init(&hardwall_dev, &dev_hardwall_fops); + rc = cdev_add(&hardwall_dev, dev, 1); + if (rc < 0) + return rc; + + return 0; +} +late_initcall(dev_hardwall_init); diff --git a/arch/tile/kernel/head_32.S b/arch/tile/kernel/head_32.S new file mode 100644 index 00000000..1a39b7c1 --- /dev/null +++ b/arch/tile/kernel/head_32.S @@ -0,0 +1,184 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * TILE startup code. + */ + +#include <linux/linkage.h> +#include <linux/init.h> +#include <asm/page.h> +#include <asm/pgtable.h> +#include <asm/thread_info.h> +#include <asm/processor.h> +#include <asm/asm-offsets.h> +#include <hv/hypervisor.h> +#include <arch/chip.h> +#include <arch/spr_def.h> + +/* + * This module contains the entry code for kernel images. It performs the + * minimal setup needed to call the generic C routines. + */ + + __HEAD +ENTRY(_start) + /* Notify the hypervisor of what version of the API we want */ + { + movei r1, TILE_CHIP + movei r2, TILE_CHIP_REV + } + { + moveli r0, _HV_VERSION + jal hv_init + } + /* Get a reasonable default ASID in r0 */ + { + move r0, zero + jal hv_inquire_asid + } + /* Install the default page table */ + { + moveli r6, lo16(swapper_pgprot - PAGE_OFFSET) + move r4, r0 /* use starting ASID of range for this page table */ + } + { + moveli r0, lo16(swapper_pg_dir - PAGE_OFFSET) + auli r6, r6, ha16(swapper_pgprot - PAGE_OFFSET) + } + { + lw r2, r6 + addi r6, r6, 4 + } + { + lw r3, r6 + auli r0, r0, ha16(swapper_pg_dir - PAGE_OFFSET) + } + { + inv r6 + move r1, zero /* high 32 bits of CPA is zero */ + } + { + moveli lr, lo16(1f) + move r5, zero + } + { + auli lr, lr, ha16(1f) + j hv_install_context + } +1: + + /* Get our processor number and save it away in SAVE_K_0. */ + jal hv_inquire_topology + mulll_uu r4, r1, r2 /* r1 == y, r2 == width */ + add r4, r4, r0 /* r0 == x, so r4 == cpu == y*width + x */ + +#ifdef CONFIG_SMP + /* + * Load up our per-cpu offset. When the first (master) tile + * boots, this value is still zero, so we will load boot_pc + * with start_kernel, and boot_sp with init_stack + THREAD_SIZE. + * The master tile initializes the per-cpu offset array, so that + * when subsequent (secondary) tiles boot, they will instead load + * from their per-cpu versions of boot_sp and boot_pc. + */ + moveli r5, lo16(__per_cpu_offset) + auli r5, r5, ha16(__per_cpu_offset) + s2a r5, r4, r5 + lw r5, r5 + bnz r5, 1f + + /* + * Save the width and height to the smp_topology variable + * for later use. + */ + moveli r0, lo16(smp_topology + HV_TOPOLOGY_WIDTH_OFFSET) + auli r0, r0, ha16(smp_topology + HV_TOPOLOGY_WIDTH_OFFSET) + { + sw r0, r2 + addi r0, r0, (HV_TOPOLOGY_HEIGHT_OFFSET - HV_TOPOLOGY_WIDTH_OFFSET) + } + sw r0, r3 +1: +#else + move r5, zero +#endif + + /* Load and go with the correct pc and sp. */ + { + addli r1, r5, lo16(boot_sp) + addli r0, r5, lo16(boot_pc) + } + { + auli r1, r1, ha16(boot_sp) + auli r0, r0, ha16(boot_pc) + } + lw r0, r0 + lw sp, r1 + or r4, sp, r4 + mtspr SPR_SYSTEM_SAVE_K_0, r4 /* save ksp0 + cpu */ + addi sp, sp, -STACK_TOP_DELTA + { + move lr, zero /* stop backtraces in the called function */ + jr r0 + } + ENDPROC(_start) + +__PAGE_ALIGNED_BSS + .align PAGE_SIZE +ENTRY(empty_zero_page) + .fill PAGE_SIZE,1,0 + END(empty_zero_page) + + .macro PTE va, cpa, bits1, no_org=0 + .ifeq \no_org + .org swapper_pg_dir + HV_L1_INDEX(\va) * HV_PTE_SIZE + .endif + .word HV_PTE_PAGE | HV_PTE_DIRTY | HV_PTE_PRESENT | HV_PTE_ACCESSED | \ + (HV_PTE_MODE_CACHE_NO_L3 << HV_PTE_INDEX_MODE) + .word (\bits1) | (HV_CPA_TO_PFN(\cpa) << (HV_PTE_INDEX_PFN - 32)) + .endm + +__PAGE_ALIGNED_DATA + .align PAGE_SIZE +ENTRY(swapper_pg_dir) + /* + * All data pages from PAGE_OFFSET to MEM_USER_INTRPT are mapped as + * VA = PA + PAGE_OFFSET. We remap things with more precise access + * permissions and more respect for size of RAM later. + */ + .set addr, 0 + .rept (MEM_USER_INTRPT - PAGE_OFFSET) >> PGDIR_SHIFT + PTE addr + PAGE_OFFSET, addr, (1 << (HV_PTE_INDEX_READABLE - 32)) | \ + (1 << (HV_PTE_INDEX_WRITABLE - 32)) + .set addr, addr + PGDIR_SIZE + .endr + + /* The true text VAs are mapped as VA = PA + MEM_SV_INTRPT */ + PTE MEM_SV_INTRPT, 0, (1 << (HV_PTE_INDEX_READABLE - 32)) | \ + (1 << (HV_PTE_INDEX_EXECUTABLE - 32)) + .org swapper_pg_dir + HV_L1_SIZE + END(swapper_pg_dir) + + /* + * Isolate swapper_pgprot to its own cache line, since each cpu + * starting up will read it using VA-is-PA and local homing. + * This would otherwise likely conflict with other data on the cache + * line, once we have set its permanent home in the page tables. + */ + __INITDATA + .align CHIP_L2_LINE_SIZE() +ENTRY(swapper_pgprot) + PTE 0, 0, (1 << (HV_PTE_INDEX_READABLE - 32)) | \ + (1 << (HV_PTE_INDEX_WRITABLE - 32)), 1 + .align CHIP_L2_LINE_SIZE() + END(swapper_pgprot) diff --git a/arch/tile/kernel/head_64.S b/arch/tile/kernel/head_64.S new file mode 100644 index 00000000..6bc3a932 --- /dev/null +++ b/arch/tile/kernel/head_64.S @@ -0,0 +1,269 @@ +/* + * Copyright 2011 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * TILE startup code. + */ + +#include <linux/linkage.h> +#include <linux/init.h> +#include <asm/page.h> +#include <asm/pgtable.h> +#include <asm/thread_info.h> +#include <asm/processor.h> +#include <asm/asm-offsets.h> +#include <hv/hypervisor.h> +#include <arch/chip.h> +#include <arch/spr_def.h> + +/* + * This module contains the entry code for kernel images. It performs the + * minimal setup needed to call the generic C routines. + */ + + __HEAD +ENTRY(_start) + /* Notify the hypervisor of what version of the API we want */ + { + movei r1, TILE_CHIP + movei r2, TILE_CHIP_REV + } + { + moveli r0, _HV_VERSION + jal hv_init + } + /* Get a reasonable default ASID in r0 */ + { + move r0, zero + jal hv_inquire_asid + } + + /* + * Install the default page table. The relocation required to + * statically define the table is a bit too complex, so we have + * to plug in the pointer from the L0 to the L1 table by hand. + * We only do this on the first cpu to boot, though, since the + * other CPUs should see a properly-constructed page table. + */ + { + v4int_l r2, zero, r0 /* ASID for hv_install_context */ + moveli r4, hw1_last(swapper_pgprot - PAGE_OFFSET) + } + { + shl16insli r4, r4, hw0(swapper_pgprot - PAGE_OFFSET) + } + { + ld r1, r4 /* access_pte for hv_install_context */ + } + { + moveli r0, hw1_last(.Lsv_data_pmd - PAGE_OFFSET) + moveli r6, hw1_last(temp_data_pmd - PAGE_OFFSET) + } + { + /* After initializing swapper_pgprot, HV_PTE_GLOBAL is set. */ + bfextu r7, r1, HV_PTE_INDEX_GLOBAL, HV_PTE_INDEX_GLOBAL + inv r4 + } + bnez r7, .Lno_write + { + shl16insli r0, r0, hw0(.Lsv_data_pmd - PAGE_OFFSET) + shl16insli r6, r6, hw0(temp_data_pmd - PAGE_OFFSET) + } + { + /* Cut off the low bits of the PT address. */ + shrui r6, r6, HV_LOG2_PAGE_TABLE_ALIGN + /* Start with our access pte. */ + move r5, r1 + } + { + /* Stuff the address into the page table pointer slot of the PTE. */ + bfins r5, r6, HV_PTE_INDEX_PTFN, \ + HV_PTE_INDEX_PTFN + HV_PTE_PTFN_BITS - 1 + } + { + /* Store the L0 data PTE. */ + st r0, r5 + addli r6, r6, (temp_code_pmd - temp_data_pmd) >> \ + HV_LOG2_PAGE_TABLE_ALIGN + } + { + addli r0, r0, .Lsv_code_pmd - .Lsv_data_pmd + bfins r5, r6, HV_PTE_INDEX_PTFN, \ + HV_PTE_INDEX_PTFN + HV_PTE_PTFN_BITS - 1 + } + /* Store the L0 code PTE. */ + st r0, r5 + +.Lno_write: + moveli lr, hw2_last(1f) + { + shl16insli lr, lr, hw1(1f) + moveli r0, hw1_last(swapper_pg_dir - PAGE_OFFSET) + } + { + shl16insli lr, lr, hw0(1f) + shl16insli r0, r0, hw0(swapper_pg_dir - PAGE_OFFSET) + } + { + move r3, zero + j hv_install_context + } +1: + + /* Install the interrupt base. */ + moveli r0, hw2_last(MEM_SV_START) + shl16insli r0, r0, hw1(MEM_SV_START) + shl16insli r0, r0, hw0(MEM_SV_START) + mtspr SPR_INTERRUPT_VECTOR_BASE_K, r0 + + /* + * Get our processor number and save it away in SAVE_K_0. + * Extract stuff from the topology structure: r4 = y, r6 = x, + * r5 = width. FIXME: consider whether we want to just make these + * 64-bit values (and if so fix smp_topology write below, too). + */ + jal hv_inquire_topology + { + v4int_l r5, zero, r1 /* r5 = width */ + shrui r4, r0, 32 /* r4 = y */ + } + { + v4int_l r6, zero, r0 /* r6 = x */ + mul_lu_lu r4, r4, r5 + } + { + add r4, r4, r6 /* r4 == cpu == y*width + x */ + } + +#ifdef CONFIG_SMP + /* + * Load up our per-cpu offset. When the first (master) tile + * boots, this value is still zero, so we will load boot_pc + * with start_kernel, and boot_sp with init_stack + THREAD_SIZE. + * The master tile initializes the per-cpu offset array, so that + * when subsequent (secondary) tiles boot, they will instead load + * from their per-cpu versions of boot_sp and boot_pc. + */ + moveli r5, hw2_last(__per_cpu_offset) + shl16insli r5, r5, hw1(__per_cpu_offset) + shl16insli r5, r5, hw0(__per_cpu_offset) + shl3add r5, r4, r5 + ld r5, r5 + bnez r5, 1f + + /* + * Save the width and height to the smp_topology variable + * for later use. + */ + moveli r0, hw2_last(smp_topology + HV_TOPOLOGY_WIDTH_OFFSET) + shl16insli r0, r0, hw1(smp_topology + HV_TOPOLOGY_WIDTH_OFFSET) + shl16insli r0, r0, hw0(smp_topology + HV_TOPOLOGY_WIDTH_OFFSET) + st r0, r1 +1: +#else + move r5, zero +#endif + + /* Load and go with the correct pc and sp. */ + { + moveli r1, hw2_last(boot_sp) + moveli r0, hw2_last(boot_pc) + } + { + shl16insli r1, r1, hw1(boot_sp) + shl16insli r0, r0, hw1(boot_pc) + } + { + shl16insli r1, r1, hw0(boot_sp) + shl16insli r0, r0, hw0(boot_pc) + } + { + add r1, r1, r5 + add r0, r0, r5 + } + ld r0, r0 + ld sp, r1 + or r4, sp, r4 + mtspr SPR_SYSTEM_SAVE_K_0, r4 /* save ksp0 + cpu */ + addi sp, sp, -STACK_TOP_DELTA + { + move lr, zero /* stop backtraces in the called function */ + jr r0 + } + ENDPROC(_start) + +__PAGE_ALIGNED_BSS + .align PAGE_SIZE +ENTRY(empty_zero_page) + .fill PAGE_SIZE,1,0 + END(empty_zero_page) + + .macro PTE cpa, bits1 + .quad HV_PTE_PAGE | HV_PTE_DIRTY | HV_PTE_PRESENT | HV_PTE_ACCESSED |\ + HV_PTE_GLOBAL | (HV_PTE_MODE_CACHE_NO_L3 << HV_PTE_INDEX_MODE) |\ + (\bits1) | (HV_CPA_TO_PFN(\cpa) << HV_PTE_INDEX_PFN) + .endm + +__PAGE_ALIGNED_DATA + .align PAGE_SIZE +ENTRY(swapper_pg_dir) + .org swapper_pg_dir + HV_L0_INDEX(PAGE_OFFSET) * HV_PTE_SIZE +.Lsv_data_pmd: + .quad 0 /* PTE temp_data_pmd - PAGE_OFFSET, 0 */ + .org swapper_pg_dir + HV_L0_INDEX(MEM_SV_START) * HV_PTE_SIZE +.Lsv_code_pmd: + .quad 0 /* PTE temp_code_pmd - PAGE_OFFSET, 0 */ + .org swapper_pg_dir + HV_L0_SIZE + END(swapper_pg_dir) + + .align HV_PAGE_TABLE_ALIGN +ENTRY(temp_data_pmd) + /* + * We fill the PAGE_OFFSET pmd with huge pages with + * VA = PA + PAGE_OFFSET. We remap things with more precise access + * permissions later. + */ + .set addr, 0 + .rept HV_L1_ENTRIES + PTE addr, HV_PTE_READABLE | HV_PTE_WRITABLE + .set addr, addr + HV_PAGE_SIZE_LARGE + .endr + .org temp_data_pmd + HV_L1_SIZE + END(temp_data_pmd) + + .align HV_PAGE_TABLE_ALIGN +ENTRY(temp_code_pmd) + /* + * We fill the MEM_SV_START pmd with huge pages with + * VA = PA + PAGE_OFFSET. We remap things with more precise access + * permissions later. + */ + .set addr, 0 + .rept HV_L1_ENTRIES + PTE addr, HV_PTE_READABLE | HV_PTE_EXECUTABLE + .set addr, addr + HV_PAGE_SIZE_LARGE + .endr + .org temp_code_pmd + HV_L1_SIZE + END(temp_code_pmd) + + /* + * Isolate swapper_pgprot to its own cache line, since each cpu + * starting up will read it using VA-is-PA and local homing. + * This would otherwise likely conflict with other data on the cache + * line, once we have set its permanent home in the page tables. + */ + __INITDATA + .align CHIP_L2_LINE_SIZE() +ENTRY(swapper_pgprot) + .quad HV_PTE_PRESENT | (HV_PTE_MODE_CACHE_NO_L3 << HV_PTE_INDEX_MODE) + .align CHIP_L2_LINE_SIZE() + END(swapper_pgprot) diff --git a/arch/tile/kernel/hvglue.lds b/arch/tile/kernel/hvglue.lds new file mode 100644 index 00000000..2b7cd0a6 --- /dev/null +++ b/arch/tile/kernel/hvglue.lds @@ -0,0 +1,58 @@ +/* Hypervisor call vector addresses; see <hv/hypervisor.h> */ +hv_init = TEXT_OFFSET + 0x10020; +hv_install_context = TEXT_OFFSET + 0x10040; +hv_sysconf = TEXT_OFFSET + 0x10060; +hv_get_rtc = TEXT_OFFSET + 0x10080; +hv_set_rtc = TEXT_OFFSET + 0x100a0; +hv_flush_asid = TEXT_OFFSET + 0x100c0; +hv_flush_page = TEXT_OFFSET + 0x100e0; +hv_flush_pages = TEXT_OFFSET + 0x10100; +hv_restart = TEXT_OFFSET + 0x10120; +hv_halt = TEXT_OFFSET + 0x10140; +hv_power_off = TEXT_OFFSET + 0x10160; +hv_inquire_physical = TEXT_OFFSET + 0x10180; +hv_inquire_memory_controller = TEXT_OFFSET + 0x101a0; +hv_inquire_virtual = TEXT_OFFSET + 0x101c0; +hv_inquire_asid = TEXT_OFFSET + 0x101e0; +hv_nanosleep = TEXT_OFFSET + 0x10200; +hv_console_read_if_ready = TEXT_OFFSET + 0x10220; +hv_console_write = TEXT_OFFSET + 0x10240; +hv_downcall_dispatch = TEXT_OFFSET + 0x10260; +hv_inquire_topology = TEXT_OFFSET + 0x10280; +hv_fs_findfile = TEXT_OFFSET + 0x102a0; +hv_fs_fstat = TEXT_OFFSET + 0x102c0; +hv_fs_pread = TEXT_OFFSET + 0x102e0; +hv_physaddr_read64 = TEXT_OFFSET + 0x10300; +hv_physaddr_write64 = TEXT_OFFSET + 0x10320; +hv_get_command_line = TEXT_OFFSET + 0x10340; +hv_set_caching = TEXT_OFFSET + 0x10360; +hv_bzero_page = TEXT_OFFSET + 0x10380; +hv_register_message_state = TEXT_OFFSET + 0x103a0; +hv_send_message = TEXT_OFFSET + 0x103c0; +hv_receive_message = TEXT_OFFSET + 0x103e0; +hv_inquire_context = TEXT_OFFSET + 0x10400; +hv_start_all_tiles = TEXT_OFFSET + 0x10420; +hv_dev_open = TEXT_OFFSET + 0x10440; +hv_dev_close = TEXT_OFFSET + 0x10460; +hv_dev_pread = TEXT_OFFSET + 0x10480; +hv_dev_pwrite = TEXT_OFFSET + 0x104a0; +hv_dev_poll = TEXT_OFFSET + 0x104c0; +hv_dev_poll_cancel = TEXT_OFFSET + 0x104e0; +hv_dev_preada = TEXT_OFFSET + 0x10500; +hv_dev_pwritea = TEXT_OFFSET + 0x10520; +hv_flush_remote = TEXT_OFFSET + 0x10540; +hv_console_putc = TEXT_OFFSET + 0x10560; +hv_inquire_tiles = TEXT_OFFSET + 0x10580; +hv_confstr = TEXT_OFFSET + 0x105a0; +hv_reexec = TEXT_OFFSET + 0x105c0; +hv_set_command_line = TEXT_OFFSET + 0x105e0; +hv_clear_intr = TEXT_OFFSET + 0x10600; +hv_enable_intr = TEXT_OFFSET + 0x10620; +hv_disable_intr = TEXT_OFFSET + 0x10640; +hv_raise_intr = TEXT_OFFSET + 0x10660; +hv_trigger_ipi = TEXT_OFFSET + 0x10680; +hv_store_mapping = TEXT_OFFSET + 0x106a0; +hv_inquire_realpa = TEXT_OFFSET + 0x106c0; +hv_flush_all = TEXT_OFFSET + 0x106e0; +hv_get_ipi_pte = TEXT_OFFSET + 0x10700; +hv_glue_internals = TEXT_OFFSET + 0x10720; diff --git a/arch/tile/kernel/init_task.c b/arch/tile/kernel/init_task.c new file mode 100644 index 00000000..928b3187 --- /dev/null +++ b/arch/tile/kernel/init_task.c @@ -0,0 +1,59 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#include <linux/mm.h> +#include <linux/fs.h> +#include <linux/init_task.h> +#include <linux/mqueue.h> +#include <linux/module.h> +#include <linux/start_kernel.h> +#include <linux/uaccess.h> + +static struct signal_struct init_signals = INIT_SIGNALS(init_signals); +static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); + +/* + * Initial thread structure. + * + * We need to make sure that this is THREAD_SIZE aligned due to the + * way process stacks are handled. This is done by having a special + * "init_task" linker map entry.. + */ +union thread_union init_thread_union __init_task_data = { + INIT_THREAD_INFO(init_task) +}; + +/* + * Initial task structure. + * + * All other task structs will be allocated on slabs in fork.c + */ +struct task_struct init_task = INIT_TASK(init_task); +EXPORT_SYMBOL(init_task); + +/* + * per-CPU stack and boot info. + */ +DEFINE_PER_CPU(unsigned long, boot_sp) = + (unsigned long)init_stack + THREAD_SIZE; + +#ifdef CONFIG_SMP +DEFINE_PER_CPU(unsigned long, boot_pc) = (unsigned long)start_kernel; +#else +/* + * The variable must be __initdata since it references __init code. + * With CONFIG_SMP it is per-cpu data, which is exempt from validation. + */ +unsigned long __initdata boot_pc = (unsigned long)start_kernel; +#endif diff --git a/arch/tile/kernel/intvec_32.S b/arch/tile/kernel/intvec_32.S new file mode 100644 index 00000000..69435151 --- /dev/null +++ b/arch/tile/kernel/intvec_32.S @@ -0,0 +1,1944 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * Linux interrupt vectors. + */ + +#include <linux/linkage.h> +#include <linux/errno.h> +#include <linux/init.h> +#include <linux/unistd.h> +#include <asm/ptrace.h> +#include <asm/thread_info.h> +#include <asm/irqflags.h> +#include <asm/atomic_32.h> +#include <asm/asm-offsets.h> +#include <hv/hypervisor.h> +#include <arch/abi.h> +#include <arch/interrupts.h> +#include <arch/spr_def.h> + +#ifdef CONFIG_PREEMPT +# error "No support for kernel preemption currently" +#endif + +#define PTREGS_PTR(reg, ptreg) addli reg, sp, C_ABI_SAVE_AREA_SIZE + (ptreg) + +#define PTREGS_OFFSET_SYSCALL PTREGS_OFFSET_REG(TREG_SYSCALL_NR) + +#if !CHIP_HAS_WH64() + /* By making this an empty macro, we can use wh64 in the code. */ + .macro wh64 reg + .endm +#endif + + .macro push_reg reg, ptr=sp, delta=-4 + { + sw \ptr, \reg + addli \ptr, \ptr, \delta + } + .endm + + .macro pop_reg reg, ptr=sp, delta=4 + { + lw \reg, \ptr + addli \ptr, \ptr, \delta + } + .endm + + .macro pop_reg_zero reg, zreg, ptr=sp, delta=4 + { + move \zreg, zero + lw \reg, \ptr + addi \ptr, \ptr, \delta + } + .endm + + .macro push_extra_callee_saves reg + PTREGS_PTR(\reg, PTREGS_OFFSET_REG(51)) + push_reg r51, \reg + push_reg r50, \reg + push_reg r49, \reg + push_reg r48, \reg + push_reg r47, \reg + push_reg r46, \reg + push_reg r45, \reg + push_reg r44, \reg + push_reg r43, \reg + push_reg r42, \reg + push_reg r41, \reg + push_reg r40, \reg + push_reg r39, \reg + push_reg r38, \reg + push_reg r37, \reg + push_reg r36, \reg + push_reg r35, \reg + push_reg r34, \reg, PTREGS_OFFSET_BASE - PTREGS_OFFSET_REG(34) + .endm + + .macro panic str + .pushsection .rodata, "a" +1: + .asciz "\str" + .popsection + { + moveli r0, lo16(1b) + } + { + auli r0, r0, ha16(1b) + jal panic + } + .endm + +#ifdef __COLLECT_LINKER_FEEDBACK__ + .pushsection .text.intvec_feedback,"ax" +intvec_feedback: + .popsection +#endif + + /* + * Default interrupt handler. + * + * vecnum is where we'll put this code. + * c_routine is the C routine we'll call. + * + * The C routine is passed two arguments: + * - A pointer to the pt_regs state. + * - The interrupt vector number. + * + * The "processing" argument specifies the code for processing + * the interrupt. Defaults to "handle_interrupt". + */ + .macro int_hand vecnum, vecname, c_routine, processing=handle_interrupt + .org (\vecnum << 8) +intvec_\vecname: + .ifc \vecnum, INT_SWINT_1 + blz TREG_SYSCALL_NR_NAME, sys_cmpxchg + .endif + + /* Temporarily save a register so we have somewhere to work. */ + + mtspr SPR_SYSTEM_SAVE_K_1, r0 + mfspr r0, SPR_EX_CONTEXT_K_1 + + /* The cmpxchg code clears sp to force us to reset it here on fault. */ + { + bz sp, 2f + andi r0, r0, SPR_EX_CONTEXT_1_1__PL_MASK /* mask off ICS */ + } + + .ifc \vecnum, INT_DOUBLE_FAULT + /* + * For double-faults from user-space, fall through to the normal + * register save and stack setup path. Otherwise, it's the + * hypervisor giving us one last chance to dump diagnostics, and we + * branch to the kernel_double_fault routine to do so. + */ + bz r0, 1f + j _kernel_double_fault +1: + .else + /* + * If we're coming from user-space, then set sp to the top of + * the kernel stack. Otherwise, assume sp is already valid. + */ + { + bnz r0, 0f + move r0, sp + } + .endif + + .ifc \c_routine, do_page_fault + /* + * The page_fault handler may be downcalled directly by the + * hypervisor even when Linux is running and has ICS set. + * + * In this case the contents of EX_CONTEXT_K_1 reflect the + * previous fault and can't be relied on to choose whether or + * not to reinitialize the stack pointer. So we add a test + * to see whether SYSTEM_SAVE_K_2 has the high bit set, + * and if so we don't reinitialize sp, since we must be coming + * from Linux. (In fact the precise case is !(val & ~1), + * but any Linux PC has to have the high bit set.) + * + * Note that the hypervisor *always* sets SYSTEM_SAVE_K_2 for + * any path that turns into a downcall to one of our TLB handlers. + */ + mfspr r0, SPR_SYSTEM_SAVE_K_2 + { + blz r0, 0f /* high bit in S_S_1_2 is for a PC to use */ + move r0, sp + } + .endif + +2: + /* + * SYSTEM_SAVE_K_0 holds the cpu number in the low bits, and + * the current stack top in the higher bits. So we recover + * our stack top by just masking off the low bits, then + * point sp at the top aligned address on the actual stack page. + */ + mfspr r0, SPR_SYSTEM_SAVE_K_0 + mm r0, r0, zero, LOG2_THREAD_SIZE, 31 + +0: + /* + * Align the stack mod 64 so we can properly predict what + * cache lines we need to write-hint to reduce memory fetch + * latency as we enter the kernel. The layout of memory is + * as follows, with cache line 0 at the lowest VA, and cache + * line 4 just below the r0 value this "andi" computes. + * Note that we never write to cache line 4, and we skip + * cache line 1 for syscalls. + * + * cache line 4: ptregs padding (two words) + * cache line 3: r46...lr, pc, ex1, faultnum, orig_r0, flags, pad + * cache line 2: r30...r45 + * cache line 1: r14...r29 + * cache line 0: 2 x frame, r0..r13 + */ + andi r0, r0, -64 + + /* + * Push the first four registers on the stack, so that we can set + * them to vector-unique values before we jump to the common code. + * + * Registers are pushed on the stack as a struct pt_regs, + * with the sp initially just above the struct, and when we're + * done, sp points to the base of the struct, minus + * C_ABI_SAVE_AREA_SIZE, so we can directly jal to C code. + * + * This routine saves just the first four registers, plus the + * stack context so we can do proper backtracing right away, + * and defers to handle_interrupt to save the rest. + * The backtracer needs pc, ex1, lr, sp, r52, and faultnum. + */ + addli r0, r0, PTREGS_OFFSET_LR - (PTREGS_SIZE + KSTK_PTREGS_GAP) + wh64 r0 /* cache line 3 */ + { + sw r0, lr + addli r0, r0, PTREGS_OFFSET_SP - PTREGS_OFFSET_LR + } + { + sw r0, sp + addli sp, r0, PTREGS_OFFSET_REG(52) - PTREGS_OFFSET_SP + } + { + sw sp, r52 + addli sp, sp, PTREGS_OFFSET_REG(1) - PTREGS_OFFSET_REG(52) + } + wh64 sp /* cache line 0 */ + { + sw sp, r1 + addli sp, sp, PTREGS_OFFSET_REG(2) - PTREGS_OFFSET_REG(1) + } + { + sw sp, r2 + addli sp, sp, PTREGS_OFFSET_REG(3) - PTREGS_OFFSET_REG(2) + } + { + sw sp, r3 + addli sp, sp, PTREGS_OFFSET_PC - PTREGS_OFFSET_REG(3) + } + mfspr r0, SPR_EX_CONTEXT_K_0 + .ifc \processing,handle_syscall + /* + * Bump the saved PC by one bundle so that when we return, we won't + * execute the same swint instruction again. We need to do this while + * we're in the critical section. + */ + addi r0, r0, 8 + .endif + { + sw sp, r0 + addli sp, sp, PTREGS_OFFSET_EX1 - PTREGS_OFFSET_PC + } + mfspr r0, SPR_EX_CONTEXT_K_1 + { + sw sp, r0 + addi sp, sp, PTREGS_OFFSET_FAULTNUM - PTREGS_OFFSET_EX1 + /* + * Use r0 for syscalls so it's a temporary; use r1 for interrupts + * so that it gets passed through unchanged to the handler routine. + * Note that the .if conditional confusingly spans bundles. + */ + .ifc \processing,handle_syscall + movei r0, \vecnum + } + { + sw sp, r0 + .else + movei r1, \vecnum + } + { + sw sp, r1 + .endif + addli sp, sp, PTREGS_OFFSET_REG(0) - PTREGS_OFFSET_FAULTNUM + } + mfspr r0, SPR_SYSTEM_SAVE_K_1 /* Original r0 */ + { + sw sp, r0 + addi sp, sp, -PTREGS_OFFSET_REG(0) - 4 + } + { + sw sp, zero /* write zero into "Next SP" frame pointer */ + addi sp, sp, -4 /* leave SP pointing at bottom of frame */ + } + .ifc \processing,handle_syscall + j handle_syscall + .else + /* + * Capture per-interrupt SPR context to registers. + * We overload the meaning of r3 on this path such that if its bit 31 + * is set, we have to mask all interrupts including NMIs before + * clearing the interrupt critical section bit. + * See discussion below at "finish_interrupt_save". + */ + .ifc \c_routine, do_page_fault + mfspr r2, SPR_SYSTEM_SAVE_K_3 /* address of page fault */ + mfspr r3, SPR_SYSTEM_SAVE_K_2 /* info about page fault */ + .else + .ifc \vecnum, INT_DOUBLE_FAULT + { + mfspr r2, SPR_SYSTEM_SAVE_K_2 /* double fault info from HV */ + movei r3, 0 + } + .else + .ifc \c_routine, do_trap + { + mfspr r2, GPV_REASON + movei r3, 0 + } + .else + .ifc \c_routine, op_handle_perf_interrupt + { + mfspr r2, PERF_COUNT_STS + movei r3, -1 /* not used, but set for consistency */ + } + .else +#if CHIP_HAS_AUX_PERF_COUNTERS() + .ifc \c_routine, op_handle_aux_perf_interrupt + { + mfspr r2, AUX_PERF_COUNT_STS + movei r3, -1 /* not used, but set for consistency */ + } + .else +#endif + movei r3, 0 +#if CHIP_HAS_AUX_PERF_COUNTERS() + .endif +#endif + .endif + .endif + .endif + .endif + /* Put function pointer in r0 */ + moveli r0, lo16(\c_routine) + { + auli r0, r0, ha16(\c_routine) + j \processing + } + .endif + ENDPROC(intvec_\vecname) + +#ifdef __COLLECT_LINKER_FEEDBACK__ + .pushsection .text.intvec_feedback,"ax" + .org (\vecnum << 5) + FEEDBACK_ENTER_EXPLICIT(intvec_\vecname, .intrpt1, 1 << 8) + jrp lr + .popsection +#endif + + .endm + + + /* + * Save the rest of the registers that we didn't save in the actual + * vector itself. We can't use r0-r10 inclusive here. + */ + .macro finish_interrupt_save, function + + /* If it's a syscall, save a proper orig_r0, otherwise just zero. */ + PTREGS_PTR(r52, PTREGS_OFFSET_ORIG_R0) + { + .ifc \function,handle_syscall + sw r52, r0 + .else + sw r52, zero + .endif + PTREGS_PTR(r52, PTREGS_OFFSET_TP) + } + + /* + * For ordinary syscalls, we save neither caller- nor callee- + * save registers, since the syscall invoker doesn't expect the + * caller-saves to be saved, and the called kernel functions will + * take care of saving the callee-saves for us. + * + * For interrupts we save just the caller-save registers. Saving + * them is required (since the "caller" can't save them). Again, + * the called kernel functions will restore the callee-save + * registers for us appropriately. + * + * On return, we normally restore nothing special for syscalls, + * and just the caller-save registers for interrupts. + * + * However, there are some important caveats to all this: + * + * - We always save a few callee-save registers to give us + * some scratchpad registers to carry across function calls. + * + * - fork/vfork/etc require us to save all the callee-save + * registers, which we do in PTREGS_SYSCALL_ALL_REGS, below. + * + * - We always save r0..r5 and r10 for syscalls, since we need + * to reload them a bit later for the actual kernel call, and + * since we might need them for -ERESTARTNOINTR, etc. + * + * - Before invoking a signal handler, we save the unsaved + * callee-save registers so they are visible to the + * signal handler or any ptracer. + * + * - If the unsaved callee-save registers are modified, we set + * a bit in pt_regs so we know to reload them from pt_regs + * and not just rely on the kernel function unwinding. + * (Done for ptrace register writes and SA_SIGINFO handler.) + */ + { + sw r52, tp + PTREGS_PTR(r52, PTREGS_OFFSET_REG(33)) + } + wh64 r52 /* cache line 2 */ + push_reg r33, r52 + push_reg r32, r52 + push_reg r31, r52 + .ifc \function,handle_syscall + push_reg r30, r52, PTREGS_OFFSET_SYSCALL - PTREGS_OFFSET_REG(30) + push_reg TREG_SYSCALL_NR_NAME, r52, \ + PTREGS_OFFSET_REG(5) - PTREGS_OFFSET_SYSCALL + .else + + push_reg r30, r52, PTREGS_OFFSET_REG(29) - PTREGS_OFFSET_REG(30) + wh64 r52 /* cache line 1 */ + push_reg r29, r52 + push_reg r28, r52 + push_reg r27, r52 + push_reg r26, r52 + push_reg r25, r52 + push_reg r24, r52 + push_reg r23, r52 + push_reg r22, r52 + push_reg r21, r52 + push_reg r20, r52 + push_reg r19, r52 + push_reg r18, r52 + push_reg r17, r52 + push_reg r16, r52 + push_reg r15, r52 + push_reg r14, r52 + push_reg r13, r52 + push_reg r12, r52 + push_reg r11, r52 + push_reg r10, r52 + push_reg r9, r52 + push_reg r8, r52 + push_reg r7, r52 + push_reg r6, r52 + + .endif + + push_reg r5, r52 + sw r52, r4 + + /* Load tp with our per-cpu offset. */ +#ifdef CONFIG_SMP + { + mfspr r20, SPR_SYSTEM_SAVE_K_0 + moveli r21, lo16(__per_cpu_offset) + } + { + auli r21, r21, ha16(__per_cpu_offset) + mm r20, r20, zero, 0, LOG2_THREAD_SIZE-1 + } + s2a r20, r20, r21 + lw tp, r20 +#else + move tp, zero +#endif + + /* + * If we will be returning to the kernel, we will need to + * reset the interrupt masks to the state they had before. + * Set DISABLE_IRQ in flags iff we came from PL1 with irqs disabled. + * We load flags in r32 here so we can jump to .Lrestore_regs + * directly after do_page_fault_ics() if necessary. + */ + mfspr r32, SPR_EX_CONTEXT_K_1 + { + andi r32, r32, SPR_EX_CONTEXT_1_1__PL_MASK /* mask off ICS */ + PTREGS_PTR(r21, PTREGS_OFFSET_FLAGS) + } + bzt r32, 1f /* zero if from user space */ + IRQS_DISABLED(r32) /* zero if irqs enabled */ +#if PT_FLAGS_DISABLE_IRQ != 1 +# error Value of IRQS_DISABLED used to set PT_FLAGS_DISABLE_IRQ; fix +#endif +1: + .ifnc \function,handle_syscall + /* Record the fact that we saved the caller-save registers above. */ + ori r32, r32, PT_FLAGS_CALLER_SAVES + .endif + sw r21, r32 + +#ifdef __COLLECT_LINKER_FEEDBACK__ + /* + * Notify the feedback routines that we were in the + * appropriate fixed interrupt vector area. Note that we + * still have ICS set at this point, so we can't invoke any + * atomic operations or we will panic. The feedback + * routines internally preserve r0..r10 and r30 up. + */ + .ifnc \function,handle_syscall + shli r20, r1, 5 + .else + moveli r20, INT_SWINT_1 << 5 + .endif + addli r20, r20, lo16(intvec_feedback) + auli r20, r20, ha16(intvec_feedback) + jalr r20 + + /* And now notify the feedback routines that we are here. */ + FEEDBACK_ENTER(\function) +#endif + + /* + * we've captured enough state to the stack (including in + * particular our EX_CONTEXT state) that we can now release + * the interrupt critical section and replace it with our + * standard "interrupts disabled" mask value. This allows + * synchronous interrupts (and profile interrupts) to punch + * through from this point onwards. + * + * If bit 31 of r3 is set during a non-NMI interrupt, we know we + * are on the path where the hypervisor has punched through our + * ICS with a page fault, so we call out to do_page_fault_ics() + * to figure out what to do with it. If the fault was in + * an atomic op, we unlock the atomic lock, adjust the + * saved register state a little, and return "zero" in r4, + * falling through into the normal page-fault interrupt code. + * If the fault was in a kernel-space atomic operation, then + * do_page_fault_ics() resolves it itself, returns "one" in r4, + * and as a result goes directly to restoring registers and iret, + * without trying to adjust the interrupt masks at all. + * The do_page_fault_ics() API involves passing and returning + * a five-word struct (in registers) to avoid writing the + * save and restore code here. + */ + .ifc \function,handle_nmi + IRQ_DISABLE_ALL(r20) + .else + .ifnc \function,handle_syscall + bgezt r3, 1f + { + PTREGS_PTR(r0, PTREGS_OFFSET_BASE) + jal do_page_fault_ics + } + FEEDBACK_REENTER(\function) + bzt r4, 1f + j .Lrestore_regs +1: + .endif + IRQ_DISABLE(r20, r21) + .endif + mtspr INTERRUPT_CRITICAL_SECTION, zero + +#if CHIP_HAS_WH64() + /* + * Prepare the first 256 stack bytes to be rapidly accessible + * without having to fetch the background data. We don't really + * know how far to write-hint, but kernel stacks generally + * aren't that big, and write-hinting here does take some time. + */ + addi r52, sp, -64 + { + wh64 r52 + addi r52, r52, -64 + } + { + wh64 r52 + addi r52, r52, -64 + } + { + wh64 r52 + addi r52, r52, -64 + } + wh64 r52 +#endif + +#ifdef CONFIG_TRACE_IRQFLAGS + .ifnc \function,handle_nmi + /* + * We finally have enough state set up to notify the irq + * tracing code that irqs were disabled on entry to the handler. + * The TRACE_IRQS_OFF call clobbers registers r0-r29. + * For syscalls, we already have the register state saved away + * on the stack, so we don't bother to do any register saves here, + * and later we pop the registers back off the kernel stack. + * For interrupt handlers, save r0-r3 in callee-saved registers. + */ + .ifnc \function,handle_syscall + { move r30, r0; move r31, r1 } + { move r32, r2; move r33, r3 } + .endif + TRACE_IRQS_OFF + .ifnc \function,handle_syscall + { move r0, r30; move r1, r31 } + { move r2, r32; move r3, r33 } + .endif + .endif +#endif + + .endm + + .macro check_single_stepping, kind, not_single_stepping + /* + * Check for single stepping in user-level priv + * kind can be "normal", "ill", or "syscall" + * At end, if fall-thru + * r29: thread_info->step_state + * r28: &pt_regs->pc + * r27: pt_regs->pc + * r26: thread_info->step_state->buffer + */ + + /* Check for single stepping */ + GET_THREAD_INFO(r29) + { + /* Get pointer to field holding step state */ + addi r29, r29, THREAD_INFO_STEP_STATE_OFFSET + + /* Get pointer to EX1 in register state */ + PTREGS_PTR(r27, PTREGS_OFFSET_EX1) + } + { + /* Get pointer to field holding PC */ + PTREGS_PTR(r28, PTREGS_OFFSET_PC) + + /* Load the pointer to the step state */ + lw r29, r29 + } + /* Load EX1 */ + lw r27, r27 + { + /* Points to flags */ + addi r23, r29, SINGLESTEP_STATE_FLAGS_OFFSET + + /* No single stepping if there is no step state structure */ + bzt r29, \not_single_stepping + } + { + /* mask off ICS and any other high bits */ + andi r27, r27, SPR_EX_CONTEXT_1_1__PL_MASK + + /* Load pointer to single step instruction buffer */ + lw r26, r29 + } + /* Check priv state */ + bnz r27, \not_single_stepping + + /* Get flags */ + lw r22, r23 + { + /* Branch if single-step mode not enabled */ + bbnst r22, \not_single_stepping + + /* Clear enabled flag */ + andi r22, r22, ~SINGLESTEP_STATE_MASK_IS_ENABLED + } + .ifc \kind,normal + { + /* Load PC */ + lw r27, r28 + + /* Point to the entry containing the original PC */ + addi r24, r29, SINGLESTEP_STATE_ORIG_PC_OFFSET + } + { + /* Disable single stepping flag */ + sw r23, r22 + } + { + /* Get the original pc */ + lw r24, r24 + + /* See if the PC is at the start of the single step buffer */ + seq r25, r26, r27 + } + /* + * NOTE: it is really expected that the PC be in the single step buffer + * at this point + */ + bzt r25, \not_single_stepping + + /* Restore the original PC */ + sw r28, r24 + .else + .ifc \kind,syscall + { + /* Load PC */ + lw r27, r28 + + /* Point to the entry containing the next PC */ + addi r24, r29, SINGLESTEP_STATE_NEXT_PC_OFFSET + } + { + /* Increment the stopped PC by the bundle size */ + addi r26, r26, 8 + + /* Disable single stepping flag */ + sw r23, r22 + } + { + /* Get the next pc */ + lw r24, r24 + + /* + * See if the PC is one bundle past the start of the + * single step buffer + */ + seq r25, r26, r27 + } + { + /* + * NOTE: it is really expected that the PC be in the + * single step buffer at this point + */ + bzt r25, \not_single_stepping + } + /* Set to the next PC */ + sw r28, r24 + .else + { + /* Point to 3rd bundle in buffer */ + addi r25, r26, 16 + + /* Load PC */ + lw r27, r28 + } + { + /* Disable single stepping flag */ + sw r23, r22 + + /* See if the PC is in the single step buffer */ + slte_u r24, r26, r27 + } + { + slte_u r25, r27, r25 + + /* + * NOTE: it is really expected that the PC be in the + * single step buffer at this point + */ + bzt r24, \not_single_stepping + } + bzt r25, \not_single_stepping + .endif + .endif + .endm + + /* + * Redispatch a downcall. + */ + .macro dc_dispatch vecnum, vecname + .org (\vecnum << 8) +intvec_\vecname: + j hv_downcall_dispatch + ENDPROC(intvec_\vecname) + .endm + + /* + * Common code for most interrupts. The C function we're eventually + * going to is in r0, and the faultnum is in r1; the original + * values for those registers are on the stack. + */ + .pushsection .text.handle_interrupt,"ax" +handle_interrupt: + finish_interrupt_save handle_interrupt + + /* + * Check for if we are single stepping in user level. If so, then + * we need to restore the PC. + */ + + check_single_stepping normal, .Ldispatch_interrupt +.Ldispatch_interrupt: + + /* Jump to the C routine; it should enable irqs as soon as possible. */ + { + jalr r0 + PTREGS_PTR(r0, PTREGS_OFFSET_BASE) + } + FEEDBACK_REENTER(handle_interrupt) + { + movei r30, 0 /* not an NMI */ + j interrupt_return + } + STD_ENDPROC(handle_interrupt) + +/* + * This routine takes a boolean in r30 indicating if this is an NMI. + * If so, we also expect a boolean in r31 indicating whether to + * re-enable the oprofile interrupts. + * + * Note that .Lresume_userspace is jumped to directly in several + * places, and we need to make sure r30 is set correctly in those + * callers as well. + */ +STD_ENTRY(interrupt_return) + /* If we're resuming to kernel space, don't check thread flags. */ + { + bnz r30, .Lrestore_all /* NMIs don't special-case user-space */ + PTREGS_PTR(r29, PTREGS_OFFSET_EX1) + } + lw r29, r29 + andi r29, r29, SPR_EX_CONTEXT_1_1__PL_MASK /* mask off ICS */ + { + bzt r29, .Lresume_userspace + PTREGS_PTR(r29, PTREGS_OFFSET_PC) + } + + /* If we're resuming to _cpu_idle_nap, bump PC forward by 8. */ + { + lw r28, r29 + moveli r27, lo16(_cpu_idle_nap) + } + { + auli r27, r27, ha16(_cpu_idle_nap) + } + { + seq r27, r27, r28 + } + { + bbns r27, .Lrestore_all + addi r28, r28, 8 + } + sw r29, r28 + j .Lrestore_all + +.Lresume_userspace: + FEEDBACK_REENTER(interrupt_return) + + /* + * Use r33 to hold whether we have already loaded the callee-saves + * into ptregs. We don't want to do it twice in this loop, since + * then we'd clobber whatever changes are made by ptrace, etc. + * Get base of stack in r32. + */ + { + GET_THREAD_INFO(r32) + movei r33, 0 + } + +.Lretry_work_pending: + /* + * Disable interrupts so as to make sure we don't + * miss an interrupt that sets any of the thread flags (like + * need_resched or sigpending) between sampling and the iret. + * Routines like schedule() or do_signal() may re-enable + * interrupts before returning. + */ + IRQ_DISABLE(r20, r21) + TRACE_IRQS_OFF /* Note: clobbers registers r0-r29 */ + + + /* Check to see if there is any work to do before returning to user. */ + { + addi r29, r32, THREAD_INFO_FLAGS_OFFSET + moveli r1, lo16(_TIF_ALLWORK_MASK) + } + { + lw r29, r29 + auli r1, r1, ha16(_TIF_ALLWORK_MASK) + } + and r1, r29, r1 + bzt r1, .Lrestore_all + + /* + * Make sure we have all the registers saved for signal + * handling, notify-resume, or single-step. Call out to C + * code to figure out exactly what we need to do for each flag bit, + * then if necessary, reload the flags and recheck. + */ + { + PTREGS_PTR(r0, PTREGS_OFFSET_BASE) + bnz r33, 1f + } + push_extra_callee_saves r0 + movei r33, 1 +1: jal do_work_pending + bnz r0, .Lretry_work_pending + + /* + * In the NMI case we + * omit the call to single_process_check_nohz, which normally checks + * to see if we should start or stop the scheduler tick, because + * we can't call arbitrary Linux code from an NMI context. + * We always call the homecache TLB deferral code to re-trigger + * the deferral mechanism. + * + * The other chunk of responsibility this code has is to reset the + * interrupt masks appropriately to reset irqs and NMIs. We have + * to call TRACE_IRQS_OFF and TRACE_IRQS_ON to support all the + * lockdep-type stuff, but we can't set ICS until afterwards, since + * ICS can only be used in very tight chunks of code to avoid + * tripping over various assertions that it is off. + * + * (There is what looks like a window of vulnerability here since + * we might take a profile interrupt between the two SPR writes + * that set the mask, but since we write the low SPR word first, + * and our interrupt entry code checks the low SPR word, any + * profile interrupt will actually disable interrupts in both SPRs + * before returning, which is OK.) + */ +.Lrestore_all: + PTREGS_PTR(r0, PTREGS_OFFSET_EX1) + { + lw r0, r0 + PTREGS_PTR(r32, PTREGS_OFFSET_FLAGS) + } + { + andi r0, r0, SPR_EX_CONTEXT_1_1__PL_MASK + lw r32, r32 + } + bnz r0, 1f + j 2f +#if PT_FLAGS_DISABLE_IRQ != 1 +# error Assuming PT_FLAGS_DISABLE_IRQ == 1 so we can use bbnst below +#endif +1: bbnst r32, 2f + IRQ_DISABLE(r20,r21) + TRACE_IRQS_OFF + movei r0, 1 + mtspr INTERRUPT_CRITICAL_SECTION, r0 + bzt r30, .Lrestore_regs + j 3f +2: TRACE_IRQS_ON + movei r0, 1 + mtspr INTERRUPT_CRITICAL_SECTION, r0 + IRQ_ENABLE(r20, r21) + bzt r30, .Lrestore_regs +3: + + + /* + * We now commit to returning from this interrupt, since we will be + * doing things like setting EX_CONTEXT SPRs and unwinding the stack + * frame. No calls should be made to any other code after this point. + * This code should only be entered with ICS set. + * r32 must still be set to ptregs.flags. + * We launch loads to each cache line separately first, so we can + * get some parallelism out of the memory subsystem. + * We start zeroing caller-saved registers throughout, since + * that will save some cycles if this turns out to be a syscall. + */ +.Lrestore_regs: + FEEDBACK_REENTER(interrupt_return) /* called from elsewhere */ + + /* + * Rotate so we have one high bit and one low bit to test. + * - low bit says whether to restore all the callee-saved registers, + * or just r30-r33, and r52 up. + * - high bit (i.e. sign bit) says whether to restore all the + * caller-saved registers, or just r0. + */ +#if PT_FLAGS_CALLER_SAVES != 2 || PT_FLAGS_RESTORE_REGS != 4 +# error Rotate trick does not work :-) +#endif + { + rli r20, r32, 30 + PTREGS_PTR(sp, PTREGS_OFFSET_REG(0)) + } + + /* + * Load cache lines 0, 2, and 3 in that order, then use + * the last loaded value, which makes it likely that the other + * cache lines have also loaded, at which point we should be + * able to safely read all the remaining words on those cache + * lines without waiting for the memory subsystem. + */ + pop_reg_zero r0, r28, sp, PTREGS_OFFSET_REG(30) - PTREGS_OFFSET_REG(0) + pop_reg_zero r30, r2, sp, PTREGS_OFFSET_PC - PTREGS_OFFSET_REG(30) + pop_reg_zero r21, r3, sp, PTREGS_OFFSET_EX1 - PTREGS_OFFSET_PC + pop_reg_zero lr, r4, sp, PTREGS_OFFSET_REG(52) - PTREGS_OFFSET_EX1 + { + mtspr SPR_EX_CONTEXT_K_0, r21 + move r5, zero + } + { + mtspr SPR_EX_CONTEXT_K_1, lr + andi lr, lr, SPR_EX_CONTEXT_1_1__PL_MASK /* mask off ICS */ + } + + /* Restore callee-saveds that we actually use. */ + pop_reg_zero r52, r6, sp, PTREGS_OFFSET_REG(31) - PTREGS_OFFSET_REG(52) + pop_reg_zero r31, r7 + pop_reg_zero r32, r8 + pop_reg_zero r33, r9, sp, PTREGS_OFFSET_REG(29) - PTREGS_OFFSET_REG(33) + + /* + * If we modified other callee-saveds, restore them now. + * This is rare, but could be via ptrace or signal handler. + */ + { + move r10, zero + bbs r20, .Lrestore_callees + } +.Lcontinue_restore_regs: + + /* Check if we're returning from a syscall. */ + { + move r11, zero + blzt r20, 1f /* no, so go restore callee-save registers */ + } + + /* + * Check if we're returning to userspace. + * Note that if we're not, we don't worry about zeroing everything. + */ + { + addli sp, sp, PTREGS_OFFSET_LR - PTREGS_OFFSET_REG(29) + bnz lr, .Lkernel_return + } + + /* + * On return from syscall, we've restored r0 from pt_regs, but we + * clear the remainder of the caller-saved registers. We could + * restore the syscall arguments, but there's not much point, + * and it ensures user programs aren't trying to use the + * caller-saves if we clear them, as well as avoiding leaking + * kernel pointers into userspace. + */ + pop_reg_zero lr, r12, sp, PTREGS_OFFSET_TP - PTREGS_OFFSET_LR + pop_reg_zero tp, r13, sp, PTREGS_OFFSET_SP - PTREGS_OFFSET_TP + { + lw sp, sp + move r14, zero + move r15, zero + } + { move r16, zero; move r17, zero } + { move r18, zero; move r19, zero } + { move r20, zero; move r21, zero } + { move r22, zero; move r23, zero } + { move r24, zero; move r25, zero } + { move r26, zero; move r27, zero } + + /* Set r1 to errno if we are returning an error, otherwise zero. */ + { + moveli r29, 4096 + sub r1, zero, r0 + } + slt_u r29, r1, r29 + { + mnz r1, r29, r1 + move r29, zero + } + iret + + /* + * Not a syscall, so restore caller-saved registers. + * First kick off a load for cache line 1, which we're touching + * for the first time here. + */ + .align 64 +1: pop_reg r29, sp, PTREGS_OFFSET_REG(1) - PTREGS_OFFSET_REG(29) + pop_reg r1 + pop_reg r2 + pop_reg r3 + pop_reg r4 + pop_reg r5 + pop_reg r6 + pop_reg r7 + pop_reg r8 + pop_reg r9 + pop_reg r10 + pop_reg r11 + pop_reg r12 + pop_reg r13 + pop_reg r14 + pop_reg r15 + pop_reg r16 + pop_reg r17 + pop_reg r18 + pop_reg r19 + pop_reg r20 + pop_reg r21 + pop_reg r22 + pop_reg r23 + pop_reg r24 + pop_reg r25 + pop_reg r26 + pop_reg r27 + pop_reg r28, sp, PTREGS_OFFSET_LR - PTREGS_OFFSET_REG(28) + /* r29 already restored above */ + bnz lr, .Lkernel_return + pop_reg lr, sp, PTREGS_OFFSET_TP - PTREGS_OFFSET_LR + pop_reg tp, sp, PTREGS_OFFSET_SP - PTREGS_OFFSET_TP + lw sp, sp + iret + + /* + * We can't restore tp when in kernel mode, since a thread might + * have migrated from another cpu and brought a stale tp value. + */ +.Lkernel_return: + pop_reg lr, sp, PTREGS_OFFSET_SP - PTREGS_OFFSET_LR + lw sp, sp + iret + + /* Restore callee-saved registers from r34 to r51. */ +.Lrestore_callees: + addli sp, sp, PTREGS_OFFSET_REG(34) - PTREGS_OFFSET_REG(29) + pop_reg r34 + pop_reg r35 + pop_reg r36 + pop_reg r37 + pop_reg r38 + pop_reg r39 + pop_reg r40 + pop_reg r41 + pop_reg r42 + pop_reg r43 + pop_reg r44 + pop_reg r45 + pop_reg r46 + pop_reg r47 + pop_reg r48 + pop_reg r49 + pop_reg r50 + pop_reg r51, sp, PTREGS_OFFSET_REG(29) - PTREGS_OFFSET_REG(51) + j .Lcontinue_restore_regs + STD_ENDPROC(interrupt_return) + + /* + * Some interrupts don't check for single stepping + */ + .pushsection .text.handle_interrupt_no_single_step,"ax" +handle_interrupt_no_single_step: + finish_interrupt_save handle_interrupt_no_single_step + { + jalr r0 + PTREGS_PTR(r0, PTREGS_OFFSET_BASE) + } + FEEDBACK_REENTER(handle_interrupt_no_single_step) + { + movei r30, 0 /* not an NMI */ + j interrupt_return + } + STD_ENDPROC(handle_interrupt_no_single_step) + + /* + * "NMI" interrupts mask ALL interrupts before calling the + * handler, and don't check thread flags, etc., on the way + * back out. In general, the only things we do here for NMIs + * are the register save/restore, fixing the PC if we were + * doing single step, and the dataplane kernel-TLB management. + * We don't (for example) deal with start/stop of the sched tick. + */ + .pushsection .text.handle_nmi,"ax" +handle_nmi: + finish_interrupt_save handle_nmi + check_single_stepping normal, .Ldispatch_nmi +.Ldispatch_nmi: + { + jalr r0 + PTREGS_PTR(r0, PTREGS_OFFSET_BASE) + } + FEEDBACK_REENTER(handle_nmi) + j interrupt_return + STD_ENDPROC(handle_nmi) + + /* + * Parallel code for syscalls to handle_interrupt. + */ + .pushsection .text.handle_syscall,"ax" +handle_syscall: + finish_interrupt_save handle_syscall + + /* + * Check for if we are single stepping in user level. If so, then + * we need to restore the PC. + */ + check_single_stepping syscall, .Ldispatch_syscall +.Ldispatch_syscall: + + /* Enable irqs. */ + TRACE_IRQS_ON + IRQ_ENABLE(r20, r21) + + /* Bump the counter for syscalls made on this tile. */ + moveli r20, lo16(irq_stat + IRQ_CPUSTAT_SYSCALL_COUNT_OFFSET) + auli r20, r20, ha16(irq_stat + IRQ_CPUSTAT_SYSCALL_COUNT_OFFSET) + add r20, r20, tp + lw r21, r20 + addi r21, r21, 1 + { + sw r20, r21 + GET_THREAD_INFO(r31) + } + + /* Trace syscalls, if requested. */ + addi r31, r31, THREAD_INFO_FLAGS_OFFSET + lw r30, r31 + andi r30, r30, _TIF_SYSCALL_TRACE + bzt r30, .Lrestore_syscall_regs + jal do_syscall_trace + FEEDBACK_REENTER(handle_syscall) + + /* + * We always reload our registers from the stack at this + * point. They might be valid, if we didn't build with + * TRACE_IRQFLAGS, and this isn't a dataplane tile, and we're not + * doing syscall tracing, but there are enough cases now that it + * seems simplest just to do the reload unconditionally. + */ +.Lrestore_syscall_regs: + PTREGS_PTR(r11, PTREGS_OFFSET_REG(0)) + pop_reg r0, r11 + pop_reg r1, r11 + pop_reg r2, r11 + pop_reg r3, r11 + pop_reg r4, r11 + pop_reg r5, r11, PTREGS_OFFSET_SYSCALL - PTREGS_OFFSET_REG(5) + pop_reg TREG_SYSCALL_NR_NAME, r11 + + /* Ensure that the syscall number is within the legal range. */ + moveli r21, __NR_syscalls + { + slt_u r21, TREG_SYSCALL_NR_NAME, r21 + moveli r20, lo16(sys_call_table) + } + { + bbns r21, .Linvalid_syscall + auli r20, r20, ha16(sys_call_table) + } + s2a r20, TREG_SYSCALL_NR_NAME, r20 + lw r20, r20 + + /* Jump to syscall handler. */ + jalr r20 +.Lhandle_syscall_link: /* value of "lr" after "jalr r20" above */ + + /* + * Write our r0 onto the stack so it gets restored instead + * of whatever the user had there before. + */ + PTREGS_PTR(r29, PTREGS_OFFSET_REG(0)) + sw r29, r0 + +.Lsyscall_sigreturn_skip: + FEEDBACK_REENTER(handle_syscall) + + /* Do syscall trace again, if requested. */ + lw r30, r31 + andi r30, r30, _TIF_SYSCALL_TRACE + bzt r30, 1f + jal do_syscall_trace + FEEDBACK_REENTER(handle_syscall) +1: { + movei r30, 0 /* not an NMI */ + j .Lresume_userspace /* jump into middle of interrupt_return */ + } + +.Linvalid_syscall: + /* Report an invalid syscall back to the user program */ + { + PTREGS_PTR(r29, PTREGS_OFFSET_REG(0)) + movei r28, -ENOSYS + } + sw r29, r28 + { + movei r30, 0 /* not an NMI */ + j .Lresume_userspace /* jump into middle of interrupt_return */ + } + STD_ENDPROC(handle_syscall) + + /* Return the address for oprofile to suppress in backtraces. */ +STD_ENTRY_SECTION(handle_syscall_link_address, .text.handle_syscall) + lnk r0 + { + addli r0, r0, .Lhandle_syscall_link - . + jrp lr + } + STD_ENDPROC(handle_syscall_link_address) + +STD_ENTRY(ret_from_fork) + jal sim_notify_fork + jal schedule_tail + FEEDBACK_REENTER(ret_from_fork) + { + movei r30, 0 /* not an NMI */ + j .Lresume_userspace /* jump into middle of interrupt_return */ + } + STD_ENDPROC(ret_from_fork) + + /* + * Code for ill interrupt. + */ + .pushsection .text.handle_ill,"ax" +handle_ill: + finish_interrupt_save handle_ill + + /* + * Check for if we are single stepping in user level. If so, then + * we need to restore the PC. + */ + check_single_stepping ill, .Ldispatch_normal_ill + + { + /* See if the PC is the 1st bundle in the buffer */ + seq r25, r27, r26 + + /* Point to the 2nd bundle in the buffer */ + addi r26, r26, 8 + } + { + /* Point to the original pc */ + addi r24, r29, SINGLESTEP_STATE_ORIG_PC_OFFSET + + /* Branch if the PC is the 1st bundle in the buffer */ + bnz r25, 3f + } + { + /* See if the PC is the 2nd bundle of the buffer */ + seq r25, r27, r26 + + /* Set PC to next instruction */ + addi r24, r29, SINGLESTEP_STATE_NEXT_PC_OFFSET + } + { + /* Point to flags */ + addi r25, r29, SINGLESTEP_STATE_FLAGS_OFFSET + + /* Branch if PC is in the second bundle */ + bz r25, 2f + } + /* Load flags */ + lw r25, r25 + { + /* + * Get the offset for the register to restore + * Note: the lower bound is 2, so we have implicit scaling by 4. + * No multiplication of the register number by the size of a register + * is needed. + */ + mm r27, r25, zero, SINGLESTEP_STATE_TARGET_LB, \ + SINGLESTEP_STATE_TARGET_UB + + /* Mask Rewrite_LR */ + andi r25, r25, SINGLESTEP_STATE_MASK_UPDATE + } + { + addi r29, r29, SINGLESTEP_STATE_UPDATE_VALUE_OFFSET + + /* Don't rewrite temp register */ + bz r25, 3f + } + { + /* Get the temp value */ + lw r29, r29 + + /* Point to where the register is stored */ + add r27, r27, sp + } + + /* Add in the C ABI save area size to the register offset */ + addi r27, r27, C_ABI_SAVE_AREA_SIZE + + /* Restore the user's register with the temp value */ + sw r27, r29 + j 3f + +2: + /* Must be in the third bundle */ + addi r24, r29, SINGLESTEP_STATE_BRANCH_NEXT_PC_OFFSET + +3: + /* set PC and continue */ + lw r26, r24 + { + sw r28, r26 + GET_THREAD_INFO(r0) + } + + /* + * Clear TIF_SINGLESTEP to prevent recursion if we execute an ill. + * The normal non-arch flow redundantly clears TIF_SINGLESTEP, but we + * need to clear it here and can't really impose on all other arches. + * So what's another write between friends? + */ + + addi r1, r0, THREAD_INFO_FLAGS_OFFSET + { + lw r2, r1 + addi r0, r0, THREAD_INFO_TASK_OFFSET /* currently a no-op */ + } + andi r2, r2, ~_TIF_SINGLESTEP + sw r1, r2 + + /* Issue a sigtrap */ + { + lw r0, r0 /* indirect thru thread_info to get task_info*/ + addi r1, sp, C_ABI_SAVE_AREA_SIZE /* put ptregs pointer into r1 */ + move r2, zero /* load error code into r2 */ + } + + jal send_sigtrap /* issue a SIGTRAP */ + FEEDBACK_REENTER(handle_ill) + { + movei r30, 0 /* not an NMI */ + j .Lresume_userspace /* jump into middle of interrupt_return */ + } + +.Ldispatch_normal_ill: + { + jalr r0 + PTREGS_PTR(r0, PTREGS_OFFSET_BASE) + } + FEEDBACK_REENTER(handle_ill) + { + movei r30, 0 /* not an NMI */ + j interrupt_return + } + STD_ENDPROC(handle_ill) + +/* Various stub interrupt handlers and syscall handlers */ + +STD_ENTRY_LOCAL(_kernel_double_fault) + mfspr r1, SPR_EX_CONTEXT_K_0 + move r2, lr + move r3, sp + move r4, r52 + addi sp, sp, -C_ABI_SAVE_AREA_SIZE + j kernel_double_fault + STD_ENDPROC(_kernel_double_fault) + +STD_ENTRY_LOCAL(bad_intr) + mfspr r2, SPR_EX_CONTEXT_K_0 + panic "Unhandled interrupt %#x: PC %#lx" + STD_ENDPROC(bad_intr) + +/* Put address of pt_regs in reg and jump. */ +#define PTREGS_SYSCALL(x, reg) \ + STD_ENTRY(_##x); \ + { \ + PTREGS_PTR(reg, PTREGS_OFFSET_BASE); \ + j x \ + }; \ + STD_ENDPROC(_##x) + +/* + * Special-case sigreturn to not write r0 to the stack on return. + * This is technically more efficient, but it also avoids difficulties + * in the 64-bit OS when handling 32-bit compat code, since we must not + * sign-extend r0 for the sigreturn return-value case. + */ +#define PTREGS_SYSCALL_SIGRETURN(x, reg) \ + STD_ENTRY(_##x); \ + addli lr, lr, .Lsyscall_sigreturn_skip - .Lhandle_syscall_link; \ + { \ + PTREGS_PTR(reg, PTREGS_OFFSET_BASE); \ + j x \ + }; \ + STD_ENDPROC(_##x) + +PTREGS_SYSCALL(sys_execve, r3) +PTREGS_SYSCALL(sys_sigaltstack, r2) +PTREGS_SYSCALL_SIGRETURN(sys_rt_sigreturn, r0) +PTREGS_SYSCALL(sys_cmpxchg_badaddr, r1) + +/* Save additional callee-saves to pt_regs, put address in r4 and jump. */ +STD_ENTRY(_sys_clone) + push_extra_callee_saves r4 + j sys_clone + STD_ENDPROC(_sys_clone) + +/* + * This entrypoint is taken for the cmpxchg and atomic_update fast + * swints. We may wish to generalize it to other fast swints at some + * point, but for now there are just two very similar ones, which + * makes it faster. + * + * The fast swint code is designed to have a small footprint. It does + * not save or restore any GPRs, counting on the caller-save registers + * to be available to it on entry. It does not modify any callee-save + * registers (including "lr"). It does not check what PL it is being + * called at, so you'd better not call it other than at PL0. + * The <atomic.h> wrapper assumes it only clobbers r20-r29, so if + * it ever is necessary to use more registers, be aware. + * + * It does not use the stack, but since it might be re-interrupted by + * a page fault which would assume the stack was valid, it does + * save/restore the stack pointer and zero it out to make sure it gets reset. + * Since we always keep interrupts disabled, the hypervisor won't + * clobber our EX_CONTEXT_K_x registers, so we don't save/restore them + * (other than to advance the PC on return). + * + * We have to manually validate the user vs kernel address range + * (since at PL1 we can read/write both), and for performance reasons + * we don't allow cmpxchg on the fc000000 memory region, since we only + * validate that the user address is below PAGE_OFFSET. + * + * We place it in the __HEAD section to ensure it is relatively + * near to the intvec_SWINT_1 code (reachable by a conditional branch). + * + * Our use of ATOMIC_LOCK_REG here must match do_page_fault_ics(). + * + * As we do in lib/atomic_asm_32.S, we bypass a store if the value we + * would store is the same as the value we just loaded. + */ + __HEAD + .align 64 + /* Align much later jump on the start of a cache line. */ +#if !ATOMIC_LOCKS_FOUND_VIA_TABLE() + nop +#if PAGE_SIZE >= 0x10000 + nop +#endif +#endif +ENTRY(sys_cmpxchg) + + /* + * Save "sp" and set it zero for any possible page fault. + * + * HACK: We want to both zero sp and check r0's alignment, + * so we do both at once. If "sp" becomes nonzero we + * know r0 is unaligned and branch to the error handler that + * restores sp, so this is OK. + * + * ICS is disabled right now so having a garbage but nonzero + * sp is OK, since we won't execute any faulting instructions + * when it is nonzero. + */ + { + move r27, sp + andi sp, r0, 3 + } + + /* + * Get the lock address in ATOMIC_LOCK_REG, and also validate that the + * address is less than PAGE_OFFSET, since that won't trap at PL1. + * We only use bits less than PAGE_SHIFT to avoid having to worry + * about aliasing among multiple mappings of the same physical page, + * and we ignore the low 3 bits so we have one lock that covers + * both a cmpxchg64() and a cmpxchg() on either its low or high word. + * NOTE: this must match __atomic_hashed_lock() in lib/atomic_32.c. + */ + +#if (PAGE_OFFSET & 0xffff) != 0 +# error Code here assumes PAGE_OFFSET can be loaded with just hi16() +#endif + +#if ATOMIC_LOCKS_FOUND_VIA_TABLE() + { + /* Check for unaligned input. */ + bnz sp, .Lcmpxchg_badaddr + mm r25, r0, zero, 3, PAGE_SHIFT-1 + } + { + crc32_32 r25, zero, r25 + moveli r21, lo16(atomic_lock_ptr) + } + { + auli r21, r21, ha16(atomic_lock_ptr) + auli r23, zero, hi16(PAGE_OFFSET) /* hugepage-aligned */ + } + { + shri r20, r25, 32 - ATOMIC_HASH_L1_SHIFT + slt_u r23, r0, r23 + lw r26, r0 /* see comment in the "#else" for the "lw r26". */ + } + { + s2a r21, r20, r21 + bbns r23, .Lcmpxchg_badaddr + } + { + lw r21, r21 + seqi r23, TREG_SYSCALL_NR_NAME, __NR_FAST_cmpxchg64 + andi r25, r25, ATOMIC_HASH_L2_SIZE - 1 + } + { + /* Branch away at this point if we're doing a 64-bit cmpxchg. */ + bbs r23, .Lcmpxchg64 + andi r23, r0, 7 /* Precompute alignment for cmpxchg64. */ + } + { + s2a ATOMIC_LOCK_REG_NAME, r25, r21 + j .Lcmpxchg32_tns /* see comment in the #else for the jump. */ + } + +#else /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */ + { + /* Check for unaligned input. */ + bnz sp, .Lcmpxchg_badaddr + auli r23, zero, hi16(PAGE_OFFSET) /* hugepage-aligned */ + } + { + /* + * Slide bits into position for 'mm'. We want to ignore + * the low 3 bits of r0, and consider only the next + * ATOMIC_HASH_SHIFT bits. + * Because of C pointer arithmetic, we want to compute this: + * + * ((char*)atomic_locks + + * (((r0 >> 3) & (1 << (ATOMIC_HASH_SIZE - 1))) << 2)) + * + * Instead of two shifts we just ">> 1", and use 'mm' + * to ignore the low and high bits we don't want. + */ + shri r25, r0, 1 + + slt_u r23, r0, r23 + + /* + * Ensure that the TLB is loaded before we take out the lock. + * On tilepro, this will start fetching the value all the way + * into our L1 as well (and if it gets modified before we + * grab the lock, it will be invalidated from our cache + * before we reload it). On tile64, we'll start fetching it + * into our L1 if we're the home, and if we're not, we'll + * still at least start fetching it into the home's L2. + */ + lw r26, r0 + } + { + auli r21, zero, ha16(atomic_locks) + + bbns r23, .Lcmpxchg_badaddr + } +#if PAGE_SIZE < 0x10000 + /* atomic_locks is page-aligned so for big pages we don't need this. */ + addli r21, r21, lo16(atomic_locks) +#endif + { + /* + * Insert the hash bits into the page-aligned pointer. + * ATOMIC_HASH_SHIFT is so big that we don't actually hash + * the unmasked address bits, as that may cause unnecessary + * collisions. + */ + mm ATOMIC_LOCK_REG_NAME, r25, r21, 2, (ATOMIC_HASH_SHIFT + 2) - 1 + + seqi r23, TREG_SYSCALL_NR_NAME, __NR_FAST_cmpxchg64 + } + { + /* Branch away at this point if we're doing a 64-bit cmpxchg. */ + bbs r23, .Lcmpxchg64 + andi r23, r0, 7 /* Precompute alignment for cmpxchg64. */ + } + { + /* + * We very carefully align the code that actually runs with + * the lock held (twelve bundles) so that we know it is all in + * the icache when we start. This instruction (the jump) is + * at the start of the first cache line, address zero mod 64; + * we jump to the very end of the second cache line to get that + * line loaded in the icache, then fall through to issue the tns + * in the third cache line, at which point it's all cached. + * Note that is for performance, not correctness. + */ + j .Lcmpxchg32_tns + } + +#endif /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */ + +/* Symbol for do_page_fault_ics() to use to compare against the PC. */ +.global __sys_cmpxchg_grab_lock +__sys_cmpxchg_grab_lock: + + /* + * Perform the actual cmpxchg or atomic_update. + */ +.Ldo_cmpxchg32: + { + lw r21, r0 + seqi r23, TREG_SYSCALL_NR_NAME, __NR_FAST_atomic_update + move r24, r2 + } + { + seq r22, r21, r1 /* See if cmpxchg matches. */ + and r25, r21, r1 /* If atomic_update, compute (*mem & mask) */ + } + { + or r22, r22, r23 /* Skip compare branch for atomic_update. */ + add r25, r25, r2 /* Compute (*mem & mask) + addend. */ + } + { + mvnz r24, r23, r25 /* Use atomic_update value if appropriate. */ + bbns r22, .Lcmpxchg32_nostore + } + seq r22, r24, r21 /* Are we storing the value we loaded? */ + bbs r22, .Lcmpxchg32_nostore + sw r0, r24 + + /* The following instruction is the start of the second cache line. */ + /* Do slow mtspr here so the following "mf" waits less. */ + { + move sp, r27 + mtspr SPR_EX_CONTEXT_K_0, r28 + } + mf + + { + move r0, r21 + sw ATOMIC_LOCK_REG_NAME, zero + } + iret + + /* Duplicated code here in the case where we don't overlap "mf" */ +.Lcmpxchg32_nostore: + { + move r0, r21 + sw ATOMIC_LOCK_REG_NAME, zero + } + { + move sp, r27 + mtspr SPR_EX_CONTEXT_K_0, r28 + } + iret + + /* + * The locking code is the same for 32-bit cmpxchg/atomic_update, + * and for 64-bit cmpxchg. We provide it as a macro and put + * it into both versions. We can't share the code literally + * since it depends on having the right branch-back address. + */ + .macro cmpxchg_lock, bitwidth + + /* Lock; if we succeed, jump back up to the read-modify-write. */ +#ifdef CONFIG_SMP + tns r21, ATOMIC_LOCK_REG_NAME +#else + /* + * Non-SMP preserves all the lock infrastructure, to keep the + * code simpler for the interesting (SMP) case. However, we do + * one small optimization here and in atomic_asm.S, which is + * to fake out acquiring the actual lock in the atomic_lock table. + */ + movei r21, 0 +#endif + + /* Issue the slow SPR here while the tns result is in flight. */ + mfspr r28, SPR_EX_CONTEXT_K_0 + + { + addi r28, r28, 8 /* return to the instruction after the swint1 */ + bzt r21, .Ldo_cmpxchg\bitwidth + } + /* + * The preceding instruction is the last thing that must be + * hot in the icache before we do the "tns" above. + */ + +#ifdef CONFIG_SMP + /* + * We failed to acquire the tns lock on our first try. Now use + * bounded exponential backoff to retry, like __atomic_spinlock(). + */ + { + moveli r23, 2048 /* maximum backoff time in cycles */ + moveli r25, 32 /* starting backoff time in cycles */ + } +1: mfspr r26, CYCLE_LOW /* get start point for this backoff */ +2: mfspr r22, CYCLE_LOW /* test to see if we've backed off enough */ + sub r22, r22, r26 + slt r22, r22, r25 + bbst r22, 2b + { + shli r25, r25, 1 /* double the backoff; retry the tns */ + tns r21, ATOMIC_LOCK_REG_NAME + } + slt r26, r23, r25 /* is the proposed backoff too big? */ + { + mvnz r25, r26, r23 + bzt r21, .Ldo_cmpxchg\bitwidth + } + j 1b +#endif /* CONFIG_SMP */ + .endm + +.Lcmpxchg32_tns: + /* + * This is the last instruction on the second cache line. + * The nop here loads the second line, then we fall through + * to the tns to load the third line before we take the lock. + */ + nop + cmpxchg_lock 32 + + /* + * This code is invoked from sys_cmpxchg after most of the + * preconditions have been checked. We still need to check + * that r0 is 8-byte aligned, since if it's not we won't + * actually be atomic. However, ATOMIC_LOCK_REG has the atomic + * lock pointer and r27/r28 have the saved SP/PC. + * r23 is holding "r0 & 7" so we can test for alignment. + * The compare value is in r2/r3; the new value is in r4/r5. + * On return, we must put the old value in r0/r1. + */ + .align 64 +.Lcmpxchg64: + { +#if ATOMIC_LOCKS_FOUND_VIA_TABLE() + s2a ATOMIC_LOCK_REG_NAME, r25, r21 +#endif + bzt r23, .Lcmpxchg64_tns + } + j .Lcmpxchg_badaddr + +.Ldo_cmpxchg64: + { + lw r21, r0 + addi r25, r0, 4 + } + { + lw r1, r25 + } + seq r26, r21, r2 + { + bz r26, .Lcmpxchg64_mismatch + seq r26, r1, r3 + } + { + bz r26, .Lcmpxchg64_mismatch + } + sw r0, r4 + sw r25, r5 + + /* + * The 32-bit path provides optimized "match" and "mismatch" + * iret paths, but we don't have enough bundles in this cache line + * to do that, so we just make even the "mismatch" path do an "mf". + */ +.Lcmpxchg64_mismatch: + { + move sp, r27 + mtspr SPR_EX_CONTEXT_K_0, r28 + } + mf + { + move r0, r21 + sw ATOMIC_LOCK_REG_NAME, zero + } + iret + +.Lcmpxchg64_tns: + cmpxchg_lock 64 + + + /* + * Reset sp and revector to sys_cmpxchg_badaddr(), which will + * just raise the appropriate signal and exit. Doing it this + * way means we don't have to duplicate the code in intvec.S's + * int_hand macro that locates the top of the stack. + */ +.Lcmpxchg_badaddr: + { + moveli TREG_SYSCALL_NR_NAME, __NR_cmpxchg_badaddr + move sp, r27 + } + j intvec_SWINT_1 + ENDPROC(sys_cmpxchg) + ENTRY(__sys_cmpxchg_end) + + +/* The single-step support may need to read all the registers. */ +int_unalign: + push_extra_callee_saves r0 + j do_trap + +/* Include .intrpt1 array of interrupt vectors */ + .section ".intrpt1", "ax" + +#define op_handle_perf_interrupt bad_intr +#define op_handle_aux_perf_interrupt bad_intr + +#ifndef CONFIG_HARDWALL +#define do_hardwall_trap bad_intr +#endif + + int_hand INT_ITLB_MISS, ITLB_MISS, \ + do_page_fault, handle_interrupt_no_single_step + int_hand INT_MEM_ERROR, MEM_ERROR, bad_intr + int_hand INT_ILL, ILL, do_trap, handle_ill + int_hand INT_GPV, GPV, do_trap + int_hand INT_SN_ACCESS, SN_ACCESS, do_trap + int_hand INT_IDN_ACCESS, IDN_ACCESS, do_trap + int_hand INT_UDN_ACCESS, UDN_ACCESS, do_trap + int_hand INT_IDN_REFILL, IDN_REFILL, bad_intr + int_hand INT_UDN_REFILL, UDN_REFILL, bad_intr + int_hand INT_IDN_COMPLETE, IDN_COMPLETE, bad_intr + int_hand INT_UDN_COMPLETE, UDN_COMPLETE, bad_intr + int_hand INT_SWINT_3, SWINT_3, do_trap + int_hand INT_SWINT_2, SWINT_2, do_trap + int_hand INT_SWINT_1, SWINT_1, SYSCALL, handle_syscall + int_hand INT_SWINT_0, SWINT_0, do_trap + int_hand INT_UNALIGN_DATA, UNALIGN_DATA, int_unalign + int_hand INT_DTLB_MISS, DTLB_MISS, do_page_fault + int_hand INT_DTLB_ACCESS, DTLB_ACCESS, do_page_fault + int_hand INT_DMATLB_MISS, DMATLB_MISS, do_page_fault + int_hand INT_DMATLB_ACCESS, DMATLB_ACCESS, do_page_fault + int_hand INT_SNITLB_MISS, SNITLB_MISS, do_page_fault + int_hand INT_SN_NOTIFY, SN_NOTIFY, bad_intr + int_hand INT_SN_FIREWALL, SN_FIREWALL, do_hardwall_trap + int_hand INT_IDN_FIREWALL, IDN_FIREWALL, bad_intr + int_hand INT_UDN_FIREWALL, UDN_FIREWALL, do_hardwall_trap + int_hand INT_TILE_TIMER, TILE_TIMER, do_timer_interrupt + int_hand INT_IDN_TIMER, IDN_TIMER, bad_intr + int_hand INT_UDN_TIMER, UDN_TIMER, bad_intr + int_hand INT_DMA_NOTIFY, DMA_NOTIFY, bad_intr + int_hand INT_IDN_CA, IDN_CA, bad_intr + int_hand INT_UDN_CA, UDN_CA, bad_intr + int_hand INT_IDN_AVAIL, IDN_AVAIL, bad_intr + int_hand INT_UDN_AVAIL, UDN_AVAIL, bad_intr + int_hand INT_PERF_COUNT, PERF_COUNT, \ + op_handle_perf_interrupt, handle_nmi + int_hand INT_INTCTRL_3, INTCTRL_3, bad_intr +#if CONFIG_KERNEL_PL == 2 + dc_dispatch INT_INTCTRL_2, INTCTRL_2 + int_hand INT_INTCTRL_1, INTCTRL_1, bad_intr +#else + int_hand INT_INTCTRL_2, INTCTRL_2, bad_intr + dc_dispatch INT_INTCTRL_1, INTCTRL_1 +#endif + int_hand INT_INTCTRL_0, INTCTRL_0, bad_intr + int_hand INT_MESSAGE_RCV_DWNCL, MESSAGE_RCV_DWNCL, \ + hv_message_intr + int_hand INT_DEV_INTR_DWNCL, DEV_INTR_DWNCL, \ + tile_dev_intr + int_hand INT_I_ASID, I_ASID, bad_intr + int_hand INT_D_ASID, D_ASID, bad_intr + int_hand INT_DMATLB_MISS_DWNCL, DMATLB_MISS_DWNCL, \ + do_page_fault + int_hand INT_SNITLB_MISS_DWNCL, SNITLB_MISS_DWNCL, \ + do_page_fault + int_hand INT_DMATLB_ACCESS_DWNCL, DMATLB_ACCESS_DWNCL, \ + do_page_fault + int_hand INT_SN_CPL, SN_CPL, bad_intr + int_hand INT_DOUBLE_FAULT, DOUBLE_FAULT, do_trap +#if CHIP_HAS_AUX_PERF_COUNTERS() + int_hand INT_AUX_PERF_COUNT, AUX_PERF_COUNT, \ + op_handle_aux_perf_interrupt, handle_nmi +#endif + + /* Synthetic interrupt delivered only by the simulator */ + int_hand INT_BREAKPOINT, BREAKPOINT, do_breakpoint diff --git a/arch/tile/kernel/intvec_64.S b/arch/tile/kernel/intvec_64.S new file mode 100644 index 00000000..30ae76e5 --- /dev/null +++ b/arch/tile/kernel/intvec_64.S @@ -0,0 +1,1289 @@ +/* + * Copyright 2011 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * Linux interrupt vectors. + */ + +#include <linux/linkage.h> +#include <linux/errno.h> +#include <linux/unistd.h> +#include <asm/ptrace.h> +#include <asm/thread_info.h> +#include <asm/irqflags.h> +#include <asm/asm-offsets.h> +#include <asm/types.h> +#include <asm/signal.h> +#include <hv/hypervisor.h> +#include <arch/abi.h> +#include <arch/interrupts.h> +#include <arch/spr_def.h> + +#ifdef CONFIG_PREEMPT +# error "No support for kernel preemption currently" +#endif + +#define PTREGS_PTR(reg, ptreg) addli reg, sp, C_ABI_SAVE_AREA_SIZE + (ptreg) + +#define PTREGS_OFFSET_SYSCALL PTREGS_OFFSET_REG(TREG_SYSCALL_NR) + + + .macro push_reg reg, ptr=sp, delta=-8 + { + st \ptr, \reg + addli \ptr, \ptr, \delta + } + .endm + + .macro pop_reg reg, ptr=sp, delta=8 + { + ld \reg, \ptr + addli \ptr, \ptr, \delta + } + .endm + + .macro pop_reg_zero reg, zreg, ptr=sp, delta=8 + { + move \zreg, zero + ld \reg, \ptr + addi \ptr, \ptr, \delta + } + .endm + + .macro push_extra_callee_saves reg + PTREGS_PTR(\reg, PTREGS_OFFSET_REG(51)) + push_reg r51, \reg + push_reg r50, \reg + push_reg r49, \reg + push_reg r48, \reg + push_reg r47, \reg + push_reg r46, \reg + push_reg r45, \reg + push_reg r44, \reg + push_reg r43, \reg + push_reg r42, \reg + push_reg r41, \reg + push_reg r40, \reg + push_reg r39, \reg + push_reg r38, \reg + push_reg r37, \reg + push_reg r36, \reg + push_reg r35, \reg + push_reg r34, \reg, PTREGS_OFFSET_BASE - PTREGS_OFFSET_REG(34) + .endm + + .macro panic str + .pushsection .rodata, "a" +1: + .asciz "\str" + .popsection + { + moveli r0, hw2_last(1b) + } + { + shl16insli r0, r0, hw1(1b) + } + { + shl16insli r0, r0, hw0(1b) + jal panic + } + .endm + + +#ifdef __COLLECT_LINKER_FEEDBACK__ + .pushsection .text.intvec_feedback,"ax" +intvec_feedback: + .popsection +#endif + + /* + * Default interrupt handler. + * + * vecnum is where we'll put this code. + * c_routine is the C routine we'll call. + * + * The C routine is passed two arguments: + * - A pointer to the pt_regs state. + * - The interrupt vector number. + * + * The "processing" argument specifies the code for processing + * the interrupt. Defaults to "handle_interrupt". + */ + .macro int_hand vecnum, vecname, c_routine, processing=handle_interrupt + .org (\vecnum << 8) +intvec_\vecname: + /* Temporarily save a register so we have somewhere to work. */ + + mtspr SPR_SYSTEM_SAVE_K_1, r0 + mfspr r0, SPR_EX_CONTEXT_K_1 + + andi r0, r0, SPR_EX_CONTEXT_1_1__PL_MASK /* mask off ICS */ + + .ifc \vecnum, INT_DOUBLE_FAULT + /* + * For double-faults from user-space, fall through to the normal + * register save and stack setup path. Otherwise, it's the + * hypervisor giving us one last chance to dump diagnostics, and we + * branch to the kernel_double_fault routine to do so. + */ + beqz r0, 1f + j _kernel_double_fault +1: + .else + /* + * If we're coming from user-space, then set sp to the top of + * the kernel stack. Otherwise, assume sp is already valid. + */ + { + bnez r0, 0f + move r0, sp + } + .endif + + .ifc \c_routine, do_page_fault + /* + * The page_fault handler may be downcalled directly by the + * hypervisor even when Linux is running and has ICS set. + * + * In this case the contents of EX_CONTEXT_K_1 reflect the + * previous fault and can't be relied on to choose whether or + * not to reinitialize the stack pointer. So we add a test + * to see whether SYSTEM_SAVE_K_2 has the high bit set, + * and if so we don't reinitialize sp, since we must be coming + * from Linux. (In fact the precise case is !(val & ~1), + * but any Linux PC has to have the high bit set.) + * + * Note that the hypervisor *always* sets SYSTEM_SAVE_K_2 for + * any path that turns into a downcall to one of our TLB handlers. + * + * FIXME: if we end up never using this path, perhaps we should + * prevent the hypervisor from generating downcalls in this case. + * The advantage of getting a downcall is we can panic in Linux. + */ + mfspr r0, SPR_SYSTEM_SAVE_K_2 + { + bltz r0, 0f /* high bit in S_S_1_2 is for a PC to use */ + move r0, sp + } + .endif + + + /* + * SYSTEM_SAVE_K_0 holds the cpu number in the low bits, and + * the current stack top in the higher bits. So we recover + * our stack top by just masking off the low bits, then + * point sp at the top aligned address on the actual stack page. + */ + mfspr r0, SPR_SYSTEM_SAVE_K_0 + mm r0, zero, LOG2_THREAD_SIZE, 63 + +0: + /* + * Align the stack mod 64 so we can properly predict what + * cache lines we need to write-hint to reduce memory fetch + * latency as we enter the kernel. The layout of memory is + * as follows, with cache line 0 at the lowest VA, and cache + * line 8 just below the r0 value this "andi" computes. + * Note that we never write to cache line 8, and we skip + * cache lines 1-3 for syscalls. + * + * cache line 8: ptregs padding (two words) + * cache line 7: sp, lr, pc, ex1, faultnum, orig_r0, flags, cmpexch + * cache line 6: r46...r53 (tp) + * cache line 5: r38...r45 + * cache line 4: r30...r37 + * cache line 3: r22...r29 + * cache line 2: r14...r21 + * cache line 1: r6...r13 + * cache line 0: 2 x frame, r0..r5 + */ + andi r0, r0, -64 + + /* + * Push the first four registers on the stack, so that we can set + * them to vector-unique values before we jump to the common code. + * + * Registers are pushed on the stack as a struct pt_regs, + * with the sp initially just above the struct, and when we're + * done, sp points to the base of the struct, minus + * C_ABI_SAVE_AREA_SIZE, so we can directly jal to C code. + * + * This routine saves just the first four registers, plus the + * stack context so we can do proper backtracing right away, + * and defers to handle_interrupt to save the rest. + * The backtracer needs pc, ex1, lr, sp, r52, and faultnum. + */ + addli r0, r0, PTREGS_OFFSET_LR - (PTREGS_SIZE + KSTK_PTREGS_GAP) + wh64 r0 /* cache line 7 */ + { + st r0, lr + addli r0, r0, PTREGS_OFFSET_SP - PTREGS_OFFSET_LR + } + { + st r0, sp + addli sp, r0, PTREGS_OFFSET_REG(52) - PTREGS_OFFSET_SP + } + wh64 sp /* cache line 6 */ + { + st sp, r52 + addli sp, sp, PTREGS_OFFSET_REG(1) - PTREGS_OFFSET_REG(52) + } + wh64 sp /* cache line 0 */ + { + st sp, r1 + addli sp, sp, PTREGS_OFFSET_REG(2) - PTREGS_OFFSET_REG(1) + } + { + st sp, r2 + addli sp, sp, PTREGS_OFFSET_REG(3) - PTREGS_OFFSET_REG(2) + } + { + st sp, r3 + addli sp, sp, PTREGS_OFFSET_PC - PTREGS_OFFSET_REG(3) + } + mfspr r0, SPR_EX_CONTEXT_K_0 + .ifc \processing,handle_syscall + /* + * Bump the saved PC by one bundle so that when we return, we won't + * execute the same swint instruction again. We need to do this while + * we're in the critical section. + */ + addi r0, r0, 8 + .endif + { + st sp, r0 + addli sp, sp, PTREGS_OFFSET_EX1 - PTREGS_OFFSET_PC + } + mfspr r0, SPR_EX_CONTEXT_K_1 + { + st sp, r0 + addi sp, sp, PTREGS_OFFSET_FAULTNUM - PTREGS_OFFSET_EX1 + /* + * Use r0 for syscalls so it's a temporary; use r1 for interrupts + * so that it gets passed through unchanged to the handler routine. + * Note that the .if conditional confusingly spans bundles. + */ + .ifc \processing,handle_syscall + movei r0, \vecnum + } + { + st sp, r0 + .else + movei r1, \vecnum + } + { + st sp, r1 + .endif + addli sp, sp, PTREGS_OFFSET_REG(0) - PTREGS_OFFSET_FAULTNUM + } + mfspr r0, SPR_SYSTEM_SAVE_K_1 /* Original r0 */ + { + st sp, r0 + addi sp, sp, -PTREGS_OFFSET_REG(0) - 8 + } + { + st sp, zero /* write zero into "Next SP" frame pointer */ + addi sp, sp, -8 /* leave SP pointing at bottom of frame */ + } + .ifc \processing,handle_syscall + j handle_syscall + .else + /* Capture per-interrupt SPR context to registers. */ + .ifc \c_routine, do_page_fault + mfspr r2, SPR_SYSTEM_SAVE_K_3 /* address of page fault */ + mfspr r3, SPR_SYSTEM_SAVE_K_2 /* info about page fault */ + .else + .ifc \vecnum, INT_ILL_TRANS + mfspr r2, ILL_TRANS_REASON + .else + .ifc \vecnum, INT_DOUBLE_FAULT + mfspr r2, SPR_SYSTEM_SAVE_K_2 /* double fault info from HV */ + .else + .ifc \c_routine, do_trap + mfspr r2, GPV_REASON + .else + .ifc \c_routine, op_handle_perf_interrupt + mfspr r2, PERF_COUNT_STS +#if CHIP_HAS_AUX_PERF_COUNTERS() + .else + .ifc \c_routine, op_handle_aux_perf_interrupt + mfspr r2, AUX_PERF_COUNT_STS + .endif +#endif + .endif + .endif + .endif + .endif + .endif + /* Put function pointer in r0 */ + moveli r0, hw2_last(\c_routine) + shl16insli r0, r0, hw1(\c_routine) + { + shl16insli r0, r0, hw0(\c_routine) + j \processing + } + .endif + ENDPROC(intvec_\vecname) + +#ifdef __COLLECT_LINKER_FEEDBACK__ + .pushsection .text.intvec_feedback,"ax" + .org (\vecnum << 5) + FEEDBACK_ENTER_EXPLICIT(intvec_\vecname, .intrpt1, 1 << 8) + jrp lr + .popsection +#endif + + .endm + + + /* + * Save the rest of the registers that we didn't save in the actual + * vector itself. We can't use r0-r10 inclusive here. + */ + .macro finish_interrupt_save, function + + /* If it's a syscall, save a proper orig_r0, otherwise just zero. */ + PTREGS_PTR(r52, PTREGS_OFFSET_ORIG_R0) + { + .ifc \function,handle_syscall + st r52, r0 + .else + st r52, zero + .endif + PTREGS_PTR(r52, PTREGS_OFFSET_TP) + } + st r52, tp + { + mfspr tp, CMPEXCH_VALUE + PTREGS_PTR(r52, PTREGS_OFFSET_CMPEXCH) + } + + /* + * For ordinary syscalls, we save neither caller- nor callee- + * save registers, since the syscall invoker doesn't expect the + * caller-saves to be saved, and the called kernel functions will + * take care of saving the callee-saves for us. + * + * For interrupts we save just the caller-save registers. Saving + * them is required (since the "caller" can't save them). Again, + * the called kernel functions will restore the callee-save + * registers for us appropriately. + * + * On return, we normally restore nothing special for syscalls, + * and just the caller-save registers for interrupts. + * + * However, there are some important caveats to all this: + * + * - We always save a few callee-save registers to give us + * some scratchpad registers to carry across function calls. + * + * - fork/vfork/etc require us to save all the callee-save + * registers, which we do in PTREGS_SYSCALL_ALL_REGS, below. + * + * - We always save r0..r5 and r10 for syscalls, since we need + * to reload them a bit later for the actual kernel call, and + * since we might need them for -ERESTARTNOINTR, etc. + * + * - Before invoking a signal handler, we save the unsaved + * callee-save registers so they are visible to the + * signal handler or any ptracer. + * + * - If the unsaved callee-save registers are modified, we set + * a bit in pt_regs so we know to reload them from pt_regs + * and not just rely on the kernel function unwinding. + * (Done for ptrace register writes and SA_SIGINFO handler.) + */ + { + st r52, tp + PTREGS_PTR(r52, PTREGS_OFFSET_REG(33)) + } + wh64 r52 /* cache line 4 */ + push_reg r33, r52 + push_reg r32, r52 + push_reg r31, r52 + .ifc \function,handle_syscall + push_reg r30, r52, PTREGS_OFFSET_SYSCALL - PTREGS_OFFSET_REG(30) + push_reg TREG_SYSCALL_NR_NAME, r52, \ + PTREGS_OFFSET_REG(5) - PTREGS_OFFSET_SYSCALL + .else + + push_reg r30, r52, PTREGS_OFFSET_REG(29) - PTREGS_OFFSET_REG(30) + wh64 r52 /* cache line 3 */ + push_reg r29, r52 + push_reg r28, r52 + push_reg r27, r52 + push_reg r26, r52 + push_reg r25, r52 + push_reg r24, r52 + push_reg r23, r52 + push_reg r22, r52 + wh64 r52 /* cache line 2 */ + push_reg r21, r52 + push_reg r20, r52 + push_reg r19, r52 + push_reg r18, r52 + push_reg r17, r52 + push_reg r16, r52 + push_reg r15, r52 + push_reg r14, r52 + wh64 r52 /* cache line 1 */ + push_reg r13, r52 + push_reg r12, r52 + push_reg r11, r52 + push_reg r10, r52 + push_reg r9, r52 + push_reg r8, r52 + push_reg r7, r52 + push_reg r6, r52 + + .endif + + push_reg r5, r52 + st r52, r4 + + /* Load tp with our per-cpu offset. */ +#ifdef CONFIG_SMP + { + mfspr r20, SPR_SYSTEM_SAVE_K_0 + moveli r21, hw2_last(__per_cpu_offset) + } + { + shl16insli r21, r21, hw1(__per_cpu_offset) + bfextu r20, r20, 0, LOG2_THREAD_SIZE-1 + } + shl16insli r21, r21, hw0(__per_cpu_offset) + shl3add r20, r20, r21 + ld tp, r20 +#else + move tp, zero +#endif + + /* + * If we will be returning to the kernel, we will need to + * reset the interrupt masks to the state they had before. + * Set DISABLE_IRQ in flags iff we came from PL1 with irqs disabled. + */ + mfspr r32, SPR_EX_CONTEXT_K_1 + { + andi r32, r32, SPR_EX_CONTEXT_1_1__PL_MASK /* mask off ICS */ + PTREGS_PTR(r21, PTREGS_OFFSET_FLAGS) + } + beqzt r32, 1f /* zero if from user space */ + IRQS_DISABLED(r32) /* zero if irqs enabled */ +#if PT_FLAGS_DISABLE_IRQ != 1 +# error Value of IRQS_DISABLED used to set PT_FLAGS_DISABLE_IRQ; fix +#endif +1: + .ifnc \function,handle_syscall + /* Record the fact that we saved the caller-save registers above. */ + ori r32, r32, PT_FLAGS_CALLER_SAVES + .endif + st r21, r32 + +#ifdef __COLLECT_LINKER_FEEDBACK__ + /* + * Notify the feedback routines that we were in the + * appropriate fixed interrupt vector area. Note that we + * still have ICS set at this point, so we can't invoke any + * atomic operations or we will panic. The feedback + * routines internally preserve r0..r10 and r30 up. + */ + .ifnc \function,handle_syscall + shli r20, r1, 5 + .else + moveli r20, INT_SWINT_1 << 5 + .endif + moveli r21, hw2_last(intvec_feedback) + shl16insli r21, r21, hw1(intvec_feedback) + shl16insli r21, r21, hw0(intvec_feedback) + add r20, r20, r21 + jalr r20 + + /* And now notify the feedback routines that we are here. */ + FEEDBACK_ENTER(\function) +#endif + + /* + * we've captured enough state to the stack (including in + * particular our EX_CONTEXT state) that we can now release + * the interrupt critical section and replace it with our + * standard "interrupts disabled" mask value. This allows + * synchronous interrupts (and profile interrupts) to punch + * through from this point onwards. + */ + .ifc \function,handle_nmi + IRQ_DISABLE_ALL(r20) + .else + IRQ_DISABLE(r20, r21) + .endif + mtspr INTERRUPT_CRITICAL_SECTION, zero + + /* + * Prepare the first 256 stack bytes to be rapidly accessible + * without having to fetch the background data. + */ + addi r52, sp, -64 + { + wh64 r52 + addi r52, r52, -64 + } + { + wh64 r52 + addi r52, r52, -64 + } + { + wh64 r52 + addi r52, r52, -64 + } + wh64 r52 + +#ifdef CONFIG_TRACE_IRQFLAGS + .ifnc \function,handle_nmi + /* + * We finally have enough state set up to notify the irq + * tracing code that irqs were disabled on entry to the handler. + * The TRACE_IRQS_OFF call clobbers registers r0-r29. + * For syscalls, we already have the register state saved away + * on the stack, so we don't bother to do any register saves here, + * and later we pop the registers back off the kernel stack. + * For interrupt handlers, save r0-r3 in callee-saved registers. + */ + .ifnc \function,handle_syscall + { move r30, r0; move r31, r1 } + { move r32, r2; move r33, r3 } + .endif + TRACE_IRQS_OFF + .ifnc \function,handle_syscall + { move r0, r30; move r1, r31 } + { move r2, r32; move r3, r33 } + .endif + .endif +#endif + + .endm + + /* + * Redispatch a downcall. + */ + .macro dc_dispatch vecnum, vecname + .org (\vecnum << 8) +intvec_\vecname: + j hv_downcall_dispatch + ENDPROC(intvec_\vecname) + .endm + + /* + * Common code for most interrupts. The C function we're eventually + * going to is in r0, and the faultnum is in r1; the original + * values for those registers are on the stack. + */ + .pushsection .text.handle_interrupt,"ax" +handle_interrupt: + finish_interrupt_save handle_interrupt + + /* Jump to the C routine; it should enable irqs as soon as possible. */ + { + jalr r0 + PTREGS_PTR(r0, PTREGS_OFFSET_BASE) + } + FEEDBACK_REENTER(handle_interrupt) + { + movei r30, 0 /* not an NMI */ + j interrupt_return + } + STD_ENDPROC(handle_interrupt) + +/* + * This routine takes a boolean in r30 indicating if this is an NMI. + * If so, we also expect a boolean in r31 indicating whether to + * re-enable the oprofile interrupts. + * + * Note that .Lresume_userspace is jumped to directly in several + * places, and we need to make sure r30 is set correctly in those + * callers as well. + */ +STD_ENTRY(interrupt_return) + /* If we're resuming to kernel space, don't check thread flags. */ + { + bnez r30, .Lrestore_all /* NMIs don't special-case user-space */ + PTREGS_PTR(r29, PTREGS_OFFSET_EX1) + } + ld r29, r29 + andi r29, r29, SPR_EX_CONTEXT_1_1__PL_MASK /* mask off ICS */ + { + beqzt r29, .Lresume_userspace + PTREGS_PTR(r29, PTREGS_OFFSET_PC) + } + + /* If we're resuming to _cpu_idle_nap, bump PC forward by 8. */ + moveli r27, hw2_last(_cpu_idle_nap) + { + ld r28, r29 + shl16insli r27, r27, hw1(_cpu_idle_nap) + } + { + shl16insli r27, r27, hw0(_cpu_idle_nap) + } + { + cmpeq r27, r27, r28 + } + { + blbc r27, .Lrestore_all + addi r28, r28, 8 + } + st r29, r28 + j .Lrestore_all + +.Lresume_userspace: + FEEDBACK_REENTER(interrupt_return) + + /* + * Use r33 to hold whether we have already loaded the callee-saves + * into ptregs. We don't want to do it twice in this loop, since + * then we'd clobber whatever changes are made by ptrace, etc. + */ + { + movei r33, 0 + move r32, sp + } + + /* Get base of stack in r32. */ + EXTRACT_THREAD_INFO(r32) + +.Lretry_work_pending: + /* + * Disable interrupts so as to make sure we don't + * miss an interrupt that sets any of the thread flags (like + * need_resched or sigpending) between sampling and the iret. + * Routines like schedule() or do_signal() may re-enable + * interrupts before returning. + */ + IRQ_DISABLE(r20, r21) + TRACE_IRQS_OFF /* Note: clobbers registers r0-r29 */ + + + /* Check to see if there is any work to do before returning to user. */ + { + addi r29, r32, THREAD_INFO_FLAGS_OFFSET + moveli r1, hw1_last(_TIF_ALLWORK_MASK) + } + { + ld r29, r29 + shl16insli r1, r1, hw0(_TIF_ALLWORK_MASK) + } + and r1, r29, r1 + beqzt r1, .Lrestore_all + + /* + * Make sure we have all the registers saved for signal + * handling or notify-resume. Call out to C code to figure out + * exactly what we need to do for each flag bit, then if + * necessary, reload the flags and recheck. + */ + { + PTREGS_PTR(r0, PTREGS_OFFSET_BASE) + bnez r33, 1f + } + push_extra_callee_saves r0 + movei r33, 1 +1: jal do_work_pending + bnez r0, .Lretry_work_pending + + /* + * In the NMI case we + * omit the call to single_process_check_nohz, which normally checks + * to see if we should start or stop the scheduler tick, because + * we can't call arbitrary Linux code from an NMI context. + * We always call the homecache TLB deferral code to re-trigger + * the deferral mechanism. + * + * The other chunk of responsibility this code has is to reset the + * interrupt masks appropriately to reset irqs and NMIs. We have + * to call TRACE_IRQS_OFF and TRACE_IRQS_ON to support all the + * lockdep-type stuff, but we can't set ICS until afterwards, since + * ICS can only be used in very tight chunks of code to avoid + * tripping over various assertions that it is off. + */ +.Lrestore_all: + PTREGS_PTR(r0, PTREGS_OFFSET_EX1) + { + ld r0, r0 + PTREGS_PTR(r32, PTREGS_OFFSET_FLAGS) + } + { + andi r0, r0, SPR_EX_CONTEXT_1_1__PL_MASK + ld r32, r32 + } + bnez r0, 1f + j 2f +#if PT_FLAGS_DISABLE_IRQ != 1 +# error Assuming PT_FLAGS_DISABLE_IRQ == 1 so we can use blbct below +#endif +1: blbct r32, 2f + IRQ_DISABLE(r20,r21) + TRACE_IRQS_OFF + movei r0, 1 + mtspr INTERRUPT_CRITICAL_SECTION, r0 + beqzt r30, .Lrestore_regs + j 3f +2: TRACE_IRQS_ON + movei r0, 1 + mtspr INTERRUPT_CRITICAL_SECTION, r0 + IRQ_ENABLE(r20, r21) + beqzt r30, .Lrestore_regs +3: + + + /* + * We now commit to returning from this interrupt, since we will be + * doing things like setting EX_CONTEXT SPRs and unwinding the stack + * frame. No calls should be made to any other code after this point. + * This code should only be entered with ICS set. + * r32 must still be set to ptregs.flags. + * We launch loads to each cache line separately first, so we can + * get some parallelism out of the memory subsystem. + * We start zeroing caller-saved registers throughout, since + * that will save some cycles if this turns out to be a syscall. + */ +.Lrestore_regs: + FEEDBACK_REENTER(interrupt_return) /* called from elsewhere */ + + /* + * Rotate so we have one high bit and one low bit to test. + * - low bit says whether to restore all the callee-saved registers, + * or just r30-r33, and r52 up. + * - high bit (i.e. sign bit) says whether to restore all the + * caller-saved registers, or just r0. + */ +#if PT_FLAGS_CALLER_SAVES != 2 || PT_FLAGS_RESTORE_REGS != 4 +# error Rotate trick does not work :-) +#endif + { + rotli r20, r32, 62 + PTREGS_PTR(sp, PTREGS_OFFSET_REG(0)) + } + + /* + * Load cache lines 0, 4, 6 and 7, in that order, then use + * the last loaded value, which makes it likely that the other + * cache lines have also loaded, at which point we should be + * able to safely read all the remaining words on those cache + * lines without waiting for the memory subsystem. + */ + pop_reg r0, sp, PTREGS_OFFSET_REG(30) - PTREGS_OFFSET_REG(0) + pop_reg r30, sp, PTREGS_OFFSET_REG(52) - PTREGS_OFFSET_REG(30) + pop_reg_zero r52, r3, sp, PTREGS_OFFSET_CMPEXCH - PTREGS_OFFSET_REG(52) + pop_reg_zero r21, r27, sp, PTREGS_OFFSET_EX1 - PTREGS_OFFSET_CMPEXCH + pop_reg_zero lr, r2, sp, PTREGS_OFFSET_PC - PTREGS_OFFSET_EX1 + { + mtspr CMPEXCH_VALUE, r21 + move r4, zero + } + pop_reg r21, sp, PTREGS_OFFSET_REG(31) - PTREGS_OFFSET_PC + { + mtspr SPR_EX_CONTEXT_K_1, lr + andi lr, lr, SPR_EX_CONTEXT_1_1__PL_MASK /* mask off ICS */ + } + { + mtspr SPR_EX_CONTEXT_K_0, r21 + move r5, zero + } + + /* Restore callee-saveds that we actually use. */ + pop_reg_zero r31, r6 + pop_reg_zero r32, r7 + pop_reg_zero r33, r8, sp, PTREGS_OFFSET_REG(29) - PTREGS_OFFSET_REG(33) + + /* + * If we modified other callee-saveds, restore them now. + * This is rare, but could be via ptrace or signal handler. + */ + { + move r9, zero + blbs r20, .Lrestore_callees + } +.Lcontinue_restore_regs: + + /* Check if we're returning from a syscall. */ + { + move r10, zero + bltzt r20, 1f /* no, so go restore callee-save registers */ + } + + /* + * Check if we're returning to userspace. + * Note that if we're not, we don't worry about zeroing everything. + */ + { + addli sp, sp, PTREGS_OFFSET_LR - PTREGS_OFFSET_REG(29) + bnez lr, .Lkernel_return + } + + /* + * On return from syscall, we've restored r0 from pt_regs, but we + * clear the remainder of the caller-saved registers. We could + * restore the syscall arguments, but there's not much point, + * and it ensures user programs aren't trying to use the + * caller-saves if we clear them, as well as avoiding leaking + * kernel pointers into userspace. + */ + pop_reg_zero lr, r11, sp, PTREGS_OFFSET_TP - PTREGS_OFFSET_LR + pop_reg_zero tp, r12, sp, PTREGS_OFFSET_SP - PTREGS_OFFSET_TP + { + ld sp, sp + move r13, zero + move r14, zero + } + { move r15, zero; move r16, zero } + { move r17, zero; move r18, zero } + { move r19, zero; move r20, zero } + { move r21, zero; move r22, zero } + { move r23, zero; move r24, zero } + { move r25, zero; move r26, zero } + + /* Set r1 to errno if we are returning an error, otherwise zero. */ + { + moveli r29, 4096 + sub r1, zero, r0 + } + { + move r28, zero + cmpltu r29, r1, r29 + } + { + mnz r1, r29, r1 + move r29, zero + } + iret + + /* + * Not a syscall, so restore caller-saved registers. + * First kick off loads for cache lines 1-3, which we're touching + * for the first time here. + */ + .align 64 +1: pop_reg r29, sp, PTREGS_OFFSET_REG(21) - PTREGS_OFFSET_REG(29) + pop_reg r21, sp, PTREGS_OFFSET_REG(13) - PTREGS_OFFSET_REG(21) + pop_reg r13, sp, PTREGS_OFFSET_REG(1) - PTREGS_OFFSET_REG(13) + pop_reg r1 + pop_reg r2 + pop_reg r3 + pop_reg r4 + pop_reg r5 + pop_reg r6 + pop_reg r7 + pop_reg r8 + pop_reg r9 + pop_reg r10 + pop_reg r11 + pop_reg r12, sp, 16 + /* r13 already restored above */ + pop_reg r14 + pop_reg r15 + pop_reg r16 + pop_reg r17 + pop_reg r18 + pop_reg r19 + pop_reg r20, sp, 16 + /* r21 already restored above */ + pop_reg r22 + pop_reg r23 + pop_reg r24 + pop_reg r25 + pop_reg r26 + pop_reg r27 + pop_reg r28, sp, PTREGS_OFFSET_LR - PTREGS_OFFSET_REG(28) + /* r29 already restored above */ + bnez lr, .Lkernel_return + pop_reg lr, sp, PTREGS_OFFSET_TP - PTREGS_OFFSET_LR + pop_reg tp, sp, PTREGS_OFFSET_SP - PTREGS_OFFSET_TP + ld sp, sp + iret + + /* + * We can't restore tp when in kernel mode, since a thread might + * have migrated from another cpu and brought a stale tp value. + */ +.Lkernel_return: + pop_reg lr, sp, PTREGS_OFFSET_SP - PTREGS_OFFSET_LR + ld sp, sp + iret + + /* Restore callee-saved registers from r34 to r51. */ +.Lrestore_callees: + addli sp, sp, PTREGS_OFFSET_REG(34) - PTREGS_OFFSET_REG(29) + pop_reg r34 + pop_reg r35 + pop_reg r36 + pop_reg r37 + pop_reg r38 + pop_reg r39 + pop_reg r40 + pop_reg r41 + pop_reg r42 + pop_reg r43 + pop_reg r44 + pop_reg r45 + pop_reg r46 + pop_reg r47 + pop_reg r48 + pop_reg r49 + pop_reg r50 + pop_reg r51, sp, PTREGS_OFFSET_REG(29) - PTREGS_OFFSET_REG(51) + j .Lcontinue_restore_regs + STD_ENDPROC(interrupt_return) + + /* + * "NMI" interrupts mask ALL interrupts before calling the + * handler, and don't check thread flags, etc., on the way + * back out. In general, the only things we do here for NMIs + * are register save/restore and dataplane kernel-TLB management. + * We don't (for example) deal with start/stop of the sched tick. + */ + .pushsection .text.handle_nmi,"ax" +handle_nmi: + finish_interrupt_save handle_nmi + { + jalr r0 + PTREGS_PTR(r0, PTREGS_OFFSET_BASE) + } + FEEDBACK_REENTER(handle_nmi) + { + movei r30, 1 + move r31, r0 + } + j interrupt_return + STD_ENDPROC(handle_nmi) + + /* + * Parallel code for syscalls to handle_interrupt. + */ + .pushsection .text.handle_syscall,"ax" +handle_syscall: + finish_interrupt_save handle_syscall + + /* Enable irqs. */ + TRACE_IRQS_ON + IRQ_ENABLE(r20, r21) + + /* Bump the counter for syscalls made on this tile. */ + moveli r20, hw2_last(irq_stat + IRQ_CPUSTAT_SYSCALL_COUNT_OFFSET) + shl16insli r20, r20, hw1(irq_stat + IRQ_CPUSTAT_SYSCALL_COUNT_OFFSET) + shl16insli r20, r20, hw0(irq_stat + IRQ_CPUSTAT_SYSCALL_COUNT_OFFSET) + add r20, r20, tp + ld4s r21, r20 + { + addi r21, r21, 1 + move r31, sp + } + { + st4 r20, r21 + EXTRACT_THREAD_INFO(r31) + } + + /* Trace syscalls, if requested. */ + addi r31, r31, THREAD_INFO_FLAGS_OFFSET + ld r30, r31 + andi r30, r30, _TIF_SYSCALL_TRACE + { + addi r30, r31, THREAD_INFO_STATUS_OFFSET - THREAD_INFO_FLAGS_OFFSET + beqzt r30, .Lrestore_syscall_regs + } + jal do_syscall_trace + FEEDBACK_REENTER(handle_syscall) + + /* + * We always reload our registers from the stack at this + * point. They might be valid, if we didn't build with + * TRACE_IRQFLAGS, and this isn't a dataplane tile, and we're not + * doing syscall tracing, but there are enough cases now that it + * seems simplest just to do the reload unconditionally. + */ +.Lrestore_syscall_regs: + { + ld r30, r30 + PTREGS_PTR(r11, PTREGS_OFFSET_REG(0)) + } + pop_reg r0, r11 + pop_reg r1, r11 + pop_reg r2, r11 + pop_reg r3, r11 + pop_reg r4, r11 + pop_reg r5, r11, PTREGS_OFFSET_SYSCALL - PTREGS_OFFSET_REG(5) + { + ld TREG_SYSCALL_NR_NAME, r11 + moveli r21, __NR_syscalls + } + + /* Ensure that the syscall number is within the legal range. */ + { + moveli r20, hw2(sys_call_table) + blbs r30, .Lcompat_syscall + } + { + cmpltu r21, TREG_SYSCALL_NR_NAME, r21 + shl16insli r20, r20, hw1(sys_call_table) + } + { + blbc r21, .Linvalid_syscall + shl16insli r20, r20, hw0(sys_call_table) + } +.Lload_syscall_pointer: + shl3add r20, TREG_SYSCALL_NR_NAME, r20 + ld r20, r20 + + /* Jump to syscall handler. */ + jalr r20 +.Lhandle_syscall_link: /* value of "lr" after "jalr r20" above */ + + /* + * Write our r0 onto the stack so it gets restored instead + * of whatever the user had there before. + * In compat mode, sign-extend r0 before storing it. + */ + { + PTREGS_PTR(r29, PTREGS_OFFSET_REG(0)) + blbct r30, 1f + } + addxi r0, r0, 0 +1: st r29, r0 + +.Lsyscall_sigreturn_skip: + FEEDBACK_REENTER(handle_syscall) + + /* Do syscall trace again, if requested. */ + ld r30, r31 + andi r0, r30, _TIF_SYSCALL_TRACE + { + andi r0, r30, _TIF_SINGLESTEP + beqzt r0, 1f + } + jal do_syscall_trace + FEEDBACK_REENTER(handle_syscall) + andi r0, r30, _TIF_SINGLESTEP + +1: beqzt r0, 2f + + /* Single stepping -- notify ptrace. */ + { + movei r0, SIGTRAP + jal ptrace_notify + } + FEEDBACK_REENTER(handle_syscall) + +2: { + movei r30, 0 /* not an NMI */ + j .Lresume_userspace /* jump into middle of interrupt_return */ + } + +.Lcompat_syscall: + /* + * Load the base of the compat syscall table in r20, and + * range-check the syscall number (duplicated from 64-bit path). + * Sign-extend all the user's passed arguments to make them consistent. + * Also save the original "r(n)" values away in "r(11+n)" in + * case the syscall table entry wants to validate them. + */ + moveli r20, hw2(compat_sys_call_table) + { + cmpltu r21, TREG_SYSCALL_NR_NAME, r21 + shl16insli r20, r20, hw1(compat_sys_call_table) + } + { + blbc r21, .Linvalid_syscall + shl16insli r20, r20, hw0(compat_sys_call_table) + } + { move r11, r0; addxi r0, r0, 0 } + { move r12, r1; addxi r1, r1, 0 } + { move r13, r2; addxi r2, r2, 0 } + { move r14, r3; addxi r3, r3, 0 } + { move r15, r4; addxi r4, r4, 0 } + { move r16, r5; addxi r5, r5, 0 } + j .Lload_syscall_pointer + +.Linvalid_syscall: + /* Report an invalid syscall back to the user program */ + { + PTREGS_PTR(r29, PTREGS_OFFSET_REG(0)) + movei r28, -ENOSYS + } + st r29, r28 + { + movei r30, 0 /* not an NMI */ + j .Lresume_userspace /* jump into middle of interrupt_return */ + } + STD_ENDPROC(handle_syscall) + + /* Return the address for oprofile to suppress in backtraces. */ +STD_ENTRY_SECTION(handle_syscall_link_address, .text.handle_syscall) + lnk r0 + { + addli r0, r0, .Lhandle_syscall_link - . + jrp lr + } + STD_ENDPROC(handle_syscall_link_address) + +STD_ENTRY(ret_from_fork) + jal sim_notify_fork + jal schedule_tail + FEEDBACK_REENTER(ret_from_fork) + { + movei r30, 0 /* not an NMI */ + j .Lresume_userspace /* jump into middle of interrupt_return */ + } + STD_ENDPROC(ret_from_fork) + +/* Various stub interrupt handlers and syscall handlers */ + +STD_ENTRY_LOCAL(_kernel_double_fault) + mfspr r1, SPR_EX_CONTEXT_K_0 + move r2, lr + move r3, sp + move r4, r52 + addi sp, sp, -C_ABI_SAVE_AREA_SIZE + j kernel_double_fault + STD_ENDPROC(_kernel_double_fault) + +STD_ENTRY_LOCAL(bad_intr) + mfspr r2, SPR_EX_CONTEXT_K_0 + panic "Unhandled interrupt %#x: PC %#lx" + STD_ENDPROC(bad_intr) + +/* Put address of pt_regs in reg and jump. */ +#define PTREGS_SYSCALL(x, reg) \ + STD_ENTRY(_##x); \ + { \ + PTREGS_PTR(reg, PTREGS_OFFSET_BASE); \ + j x \ + }; \ + STD_ENDPROC(_##x) + +/* + * Special-case sigreturn to not write r0 to the stack on return. + * This is technically more efficient, but it also avoids difficulties + * in the 64-bit OS when handling 32-bit compat code, since we must not + * sign-extend r0 for the sigreturn return-value case. + */ +#define PTREGS_SYSCALL_SIGRETURN(x, reg) \ + STD_ENTRY(_##x); \ + addli lr, lr, .Lsyscall_sigreturn_skip - .Lhandle_syscall_link; \ + { \ + PTREGS_PTR(reg, PTREGS_OFFSET_BASE); \ + j x \ + }; \ + STD_ENDPROC(_##x) + +PTREGS_SYSCALL(sys_execve, r3) +PTREGS_SYSCALL(sys_sigaltstack, r2) +PTREGS_SYSCALL_SIGRETURN(sys_rt_sigreturn, r0) +#ifdef CONFIG_COMPAT +PTREGS_SYSCALL(compat_sys_execve, r3) +PTREGS_SYSCALL(compat_sys_sigaltstack, r2) +PTREGS_SYSCALL_SIGRETURN(compat_sys_rt_sigreturn, r0) +#endif + +/* Save additional callee-saves to pt_regs, put address in r4 and jump. */ +STD_ENTRY(_sys_clone) + push_extra_callee_saves r4 + j sys_clone + STD_ENDPROC(_sys_clone) + +/* The single-step support may need to read all the registers. */ +int_unalign: + push_extra_callee_saves r0 + j do_trap + +/* Fill the return address stack with nonzero entries. */ +STD_ENTRY(fill_ra_stack) + { + move r0, lr + jal 1f + } +1: jal 2f +2: jal 3f +3: jal 4f +4: jrp r0 + STD_ENDPROC(fill_ra_stack) + +/* Include .intrpt1 array of interrupt vectors */ + .section ".intrpt1", "ax" + +#define op_handle_perf_interrupt bad_intr +#define op_handle_aux_perf_interrupt bad_intr + +#ifndef CONFIG_HARDWALL +#define do_hardwall_trap bad_intr +#endif + + int_hand INT_MEM_ERROR, MEM_ERROR, do_trap + int_hand INT_SINGLE_STEP_3, SINGLE_STEP_3, bad_intr +#if CONFIG_KERNEL_PL == 2 + int_hand INT_SINGLE_STEP_2, SINGLE_STEP_2, gx_singlestep_handle + int_hand INT_SINGLE_STEP_1, SINGLE_STEP_1, bad_intr +#else + int_hand INT_SINGLE_STEP_2, SINGLE_STEP_2, bad_intr + int_hand INT_SINGLE_STEP_1, SINGLE_STEP_1, gx_singlestep_handle +#endif + int_hand INT_SINGLE_STEP_0, SINGLE_STEP_0, bad_intr + int_hand INT_IDN_COMPLETE, IDN_COMPLETE, bad_intr + int_hand INT_UDN_COMPLETE, UDN_COMPLETE, bad_intr + int_hand INT_ITLB_MISS, ITLB_MISS, do_page_fault + int_hand INT_ILL, ILL, do_trap + int_hand INT_GPV, GPV, do_trap + int_hand INT_IDN_ACCESS, IDN_ACCESS, do_trap + int_hand INT_UDN_ACCESS, UDN_ACCESS, do_trap + int_hand INT_SWINT_3, SWINT_3, do_trap + int_hand INT_SWINT_2, SWINT_2, do_trap + int_hand INT_SWINT_1, SWINT_1, SYSCALL, handle_syscall + int_hand INT_SWINT_0, SWINT_0, do_trap + int_hand INT_ILL_TRANS, ILL_TRANS, do_trap + int_hand INT_UNALIGN_DATA, UNALIGN_DATA, int_unalign + int_hand INT_DTLB_MISS, DTLB_MISS, do_page_fault + int_hand INT_DTLB_ACCESS, DTLB_ACCESS, do_page_fault + int_hand INT_IDN_FIREWALL, IDN_FIREWALL, bad_intr + int_hand INT_UDN_FIREWALL, UDN_FIREWALL, do_hardwall_trap + int_hand INT_TILE_TIMER, TILE_TIMER, do_timer_interrupt + int_hand INT_IDN_TIMER, IDN_TIMER, bad_intr + int_hand INT_UDN_TIMER, UDN_TIMER, bad_intr + int_hand INT_IDN_AVAIL, IDN_AVAIL, bad_intr + int_hand INT_UDN_AVAIL, UDN_AVAIL, bad_intr + int_hand INT_IPI_3, IPI_3, bad_intr +#if CONFIG_KERNEL_PL == 2 + int_hand INT_IPI_2, IPI_2, tile_dev_intr + int_hand INT_IPI_1, IPI_1, bad_intr +#else + int_hand INT_IPI_2, IPI_2, bad_intr + int_hand INT_IPI_1, IPI_1, tile_dev_intr +#endif + int_hand INT_IPI_0, IPI_0, bad_intr + int_hand INT_PERF_COUNT, PERF_COUNT, \ + op_handle_perf_interrupt, handle_nmi + int_hand INT_AUX_PERF_COUNT, AUX_PERF_COUNT, \ + op_handle_perf_interrupt, handle_nmi + int_hand INT_INTCTRL_3, INTCTRL_3, bad_intr +#if CONFIG_KERNEL_PL == 2 + dc_dispatch INT_INTCTRL_2, INTCTRL_2 + int_hand INT_INTCTRL_1, INTCTRL_1, bad_intr +#else + int_hand INT_INTCTRL_2, INTCTRL_2, bad_intr + dc_dispatch INT_INTCTRL_1, INTCTRL_1 +#endif + int_hand INT_INTCTRL_0, INTCTRL_0, bad_intr + int_hand INT_MESSAGE_RCV_DWNCL, MESSAGE_RCV_DWNCL, \ + hv_message_intr + int_hand INT_DEV_INTR_DWNCL, DEV_INTR_DWNCL, bad_intr + int_hand INT_I_ASID, I_ASID, bad_intr + int_hand INT_D_ASID, D_ASID, bad_intr + int_hand INT_DOUBLE_FAULT, DOUBLE_FAULT, do_trap + + /* Synthetic interrupt delivered only by the simulator */ + int_hand INT_BREAKPOINT, BREAKPOINT, do_breakpoint diff --git a/arch/tile/kernel/irq.c b/arch/tile/kernel/irq.c new file mode 100644 index 00000000..02e62806 --- /dev/null +++ b/arch/tile/kernel/irq.c @@ -0,0 +1,295 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#include <linux/module.h> +#include <linux/seq_file.h> +#include <linux/interrupt.h> +#include <linux/irq.h> +#include <linux/kernel_stat.h> +#include <linux/uaccess.h> +#include <hv/drv_pcie_rc_intf.h> +#include <arch/spr_def.h> +#include <asm/traps.h> + +/* Bit-flag stored in irq_desc->chip_data to indicate HW-cleared irqs. */ +#define IS_HW_CLEARED 1 + +/* + * The set of interrupts we enable for arch_local_irq_enable(). + * This is initialized to have just a single interrupt that the kernel + * doesn't actually use as a sentinel. During kernel init, + * interrupts are added as the kernel gets prepared to support them. + * NOTE: we could probably initialize them all statically up front. + */ +DEFINE_PER_CPU(unsigned long long, interrupts_enabled_mask) = + INITIAL_INTERRUPTS_ENABLED; +EXPORT_PER_CPU_SYMBOL(interrupts_enabled_mask); + +/* Define per-tile device interrupt statistics state. */ +DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_internodealigned_in_smp; +EXPORT_PER_CPU_SYMBOL(irq_stat); + +/* + * Define per-tile irq disable mask; the hardware/HV only has a single + * mask that we use to implement both masking and disabling. + */ +static DEFINE_PER_CPU(unsigned long, irq_disable_mask) + ____cacheline_internodealigned_in_smp; + +/* + * Per-tile IRQ nesting depth. Used to make sure we enable newly + * enabled IRQs before exiting the outermost interrupt. + */ +static DEFINE_PER_CPU(int, irq_depth); + +/* State for allocating IRQs on Gx. */ +#if CHIP_HAS_IPI() +static unsigned long available_irqs = ~(1UL << IRQ_RESCHEDULE); +static DEFINE_SPINLOCK(available_irqs_lock); +#endif + +#if CHIP_HAS_IPI() +/* Use SPRs to manipulate device interrupts. */ +#define mask_irqs(irq_mask) __insn_mtspr(SPR_IPI_MASK_SET_K, irq_mask) +#define unmask_irqs(irq_mask) __insn_mtspr(SPR_IPI_MASK_RESET_K, irq_mask) +#define clear_irqs(irq_mask) __insn_mtspr(SPR_IPI_EVENT_RESET_K, irq_mask) +#else +/* Use HV to manipulate device interrupts. */ +#define mask_irqs(irq_mask) hv_disable_intr(irq_mask) +#define unmask_irqs(irq_mask) hv_enable_intr(irq_mask) +#define clear_irqs(irq_mask) hv_clear_intr(irq_mask) +#endif + +/* + * The interrupt handling path, implemented in terms of HV interrupt + * emulation on TILE64 and TILEPro, and IPI hardware on TILE-Gx. + */ +void tile_dev_intr(struct pt_regs *regs, int intnum) +{ + int depth = __get_cpu_var(irq_depth)++; + unsigned long original_irqs; + unsigned long remaining_irqs; + struct pt_regs *old_regs; + +#if CHIP_HAS_IPI() + /* + * Pending interrupts are listed in an SPR. We might be + * nested, so be sure to only handle irqs that weren't already + * masked by a previous interrupt. Then, mask out the ones + * we're going to handle. + */ + unsigned long masked = __insn_mfspr(SPR_IPI_MASK_K); + original_irqs = __insn_mfspr(SPR_IPI_EVENT_K) & ~masked; + __insn_mtspr(SPR_IPI_MASK_SET_K, original_irqs); +#else + /* + * Hypervisor performs the equivalent of the Gx code above and + * then puts the pending interrupt mask into a system save reg + * for us to find. + */ + original_irqs = __insn_mfspr(SPR_SYSTEM_SAVE_K_3); +#endif + remaining_irqs = original_irqs; + + /* Track time spent here in an interrupt context. */ + old_regs = set_irq_regs(regs); + irq_enter(); + +#ifdef CONFIG_DEBUG_STACKOVERFLOW + /* Debugging check for stack overflow: less than 1/8th stack free? */ + { + long sp = stack_pointer - (long) current_thread_info(); + if (unlikely(sp < (sizeof(struct thread_info) + STACK_WARN))) { + pr_emerg("tile_dev_intr: " + "stack overflow: %ld\n", + sp - sizeof(struct thread_info)); + dump_stack(); + } + } +#endif + while (remaining_irqs) { + unsigned long irq = __ffs(remaining_irqs); + remaining_irqs &= ~(1UL << irq); + + /* Count device irqs; Linux IPIs are counted elsewhere. */ + if (irq != IRQ_RESCHEDULE) + __get_cpu_var(irq_stat).irq_dev_intr_count++; + + generic_handle_irq(irq); + } + + /* + * If we weren't nested, turn on all enabled interrupts, + * including any that were reenabled during interrupt + * handling. + */ + if (depth == 0) + unmask_irqs(~__get_cpu_var(irq_disable_mask)); + + __get_cpu_var(irq_depth)--; + + /* + * Track time spent against the current process again and + * process any softirqs if they are waiting. + */ + irq_exit(); + set_irq_regs(old_regs); +} + + +/* + * Remove an irq from the disabled mask. If we're in an interrupt + * context, defer enabling the HW interrupt until we leave. + */ +static void tile_irq_chip_enable(struct irq_data *d) +{ + get_cpu_var(irq_disable_mask) &= ~(1UL << d->irq); + if (__get_cpu_var(irq_depth) == 0) + unmask_irqs(1UL << d->irq); + put_cpu_var(irq_disable_mask); +} + +/* + * Add an irq to the disabled mask. We disable the HW interrupt + * immediately so that there's no possibility of it firing. If we're + * in an interrupt context, the return path is careful to avoid + * unmasking a newly disabled interrupt. + */ +static void tile_irq_chip_disable(struct irq_data *d) +{ + get_cpu_var(irq_disable_mask) |= (1UL << d->irq); + mask_irqs(1UL << d->irq); + put_cpu_var(irq_disable_mask); +} + +/* Mask an interrupt. */ +static void tile_irq_chip_mask(struct irq_data *d) +{ + mask_irqs(1UL << d->irq); +} + +/* Unmask an interrupt. */ +static void tile_irq_chip_unmask(struct irq_data *d) +{ + unmask_irqs(1UL << d->irq); +} + +/* + * Clear an interrupt before processing it so that any new assertions + * will trigger another irq. + */ +static void tile_irq_chip_ack(struct irq_data *d) +{ + if ((unsigned long)irq_data_get_irq_chip_data(d) != IS_HW_CLEARED) + clear_irqs(1UL << d->irq); +} + +/* + * For per-cpu interrupts, we need to avoid unmasking any interrupts + * that we disabled via disable_percpu_irq(). + */ +static void tile_irq_chip_eoi(struct irq_data *d) +{ + if (!(__get_cpu_var(irq_disable_mask) & (1UL << d->irq))) + unmask_irqs(1UL << d->irq); +} + +static struct irq_chip tile_irq_chip = { + .name = "tile_irq_chip", + .irq_enable = tile_irq_chip_enable, + .irq_disable = tile_irq_chip_disable, + .irq_ack = tile_irq_chip_ack, + .irq_eoi = tile_irq_chip_eoi, + .irq_mask = tile_irq_chip_mask, + .irq_unmask = tile_irq_chip_unmask, +}; + +void __init init_IRQ(void) +{ + ipi_init(); +} + +void __cpuinit setup_irq_regs(void) +{ + /* Enable interrupt delivery. */ + unmask_irqs(~0UL); +#if CHIP_HAS_IPI() + arch_local_irq_unmask(INT_IPI_K); +#endif +} + +void tile_irq_activate(unsigned int irq, int tile_irq_type) +{ + /* + * We use handle_level_irq() by default because the pending + * interrupt vector (whether modeled by the HV on TILE64 and + * TILEPro or implemented in hardware on TILE-Gx) has + * level-style semantics for each bit. An interrupt fires + * whenever a bit is high, not just at edges. + */ + irq_flow_handler_t handle = handle_level_irq; + if (tile_irq_type == TILE_IRQ_PERCPU) + handle = handle_percpu_irq; + irq_set_chip_and_handler(irq, &tile_irq_chip, handle); + + /* + * Flag interrupts that are hardware-cleared so that ack() + * won't clear them. + */ + if (tile_irq_type == TILE_IRQ_HW_CLEAR) + irq_set_chip_data(irq, (void *)IS_HW_CLEARED); +} +EXPORT_SYMBOL(tile_irq_activate); + + +void ack_bad_irq(unsigned int irq) +{ + pr_err("unexpected IRQ trap at vector %02x\n", irq); +} + +/* + * Generic, controller-independent functions: + */ + +#if CHIP_HAS_IPI() +int create_irq(void) +{ + unsigned long flags; + int result; + + spin_lock_irqsave(&available_irqs_lock, flags); + if (available_irqs == 0) + result = -ENOMEM; + else { + result = __ffs(available_irqs); + available_irqs &= ~(1UL << result); + dynamic_irq_init(result); + } + spin_unlock_irqrestore(&available_irqs_lock, flags); + + return result; +} +EXPORT_SYMBOL(create_irq); + +void destroy_irq(unsigned int irq) +{ + unsigned long flags; + + spin_lock_irqsave(&available_irqs_lock, flags); + available_irqs |= (1UL << irq); + dynamic_irq_cleanup(irq); + spin_unlock_irqrestore(&available_irqs_lock, flags); +} +EXPORT_SYMBOL(destroy_irq); +#endif diff --git a/arch/tile/kernel/machine_kexec.c b/arch/tile/kernel/machine_kexec.c new file mode 100644 index 00000000..6255f2ea --- /dev/null +++ b/arch/tile/kernel/machine_kexec.c @@ -0,0 +1,282 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * based on machine_kexec.c from other architectures in linux-2.6.18 + */ + +#include <linux/mm.h> +#include <linux/kexec.h> +#include <linux/delay.h> +#include <linux/reboot.h> +#include <linux/errno.h> +#include <linux/vmalloc.h> +#include <linux/cpumask.h> +#include <linux/kernel.h> +#include <linux/elf.h> +#include <linux/highmem.h> +#include <linux/mmu_context.h> +#include <linux/io.h> +#include <linux/timex.h> +#include <asm/pgtable.h> +#include <asm/pgalloc.h> +#include <asm/cacheflush.h> +#include <asm/checksum.h> +#include <hv/hypervisor.h> + + +/* + * This stuff is not in elf.h and is not in any other kernel include. + * This stuff is needed below in the little boot notes parser to + * extract the command line so we can pass it to the hypervisor. + */ +struct Elf32_Bhdr { + Elf32_Word b_signature; + Elf32_Word b_size; + Elf32_Half b_checksum; + Elf32_Half b_records; +}; +#define ELF_BOOT_MAGIC 0x0E1FB007 +#define EBN_COMMAND_LINE 0x00000004 +#define roundupsz(X) (((X) + 3) & ~3) + +/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ + + +void machine_shutdown(void) +{ + /* + * Normally we would stop all the other processors here, but + * the check in machine_kexec_prepare below ensures we'll only + * get this far if we've been booted with "nosmp" on the + * command line or without CONFIG_SMP so there's nothing to do + * here (for now). + */ +} + +void machine_crash_shutdown(struct pt_regs *regs) +{ + /* + * Cannot happen. This type of kexec is disabled on this + * architecture (and enforced in machine_kexec_prepare below). + */ +} + + +int machine_kexec_prepare(struct kimage *image) +{ + if (num_online_cpus() > 1) { + pr_warning("%s: detected attempt to kexec " + "with num_online_cpus() > 1\n", + __func__); + return -ENOSYS; + } + if (image->type != KEXEC_TYPE_DEFAULT) { + pr_warning("%s: detected attempt to kexec " + "with unsupported type: %d\n", + __func__, + image->type); + return -ENOSYS; + } + return 0; +} + +void machine_kexec_cleanup(struct kimage *image) +{ + /* + * We did nothing in machine_kexec_prepare, + * so we have nothing to do here. + */ +} + +/* + * If we can find elf boot notes on this page, return the command + * line. Otherwise, silently return null. Somewhat kludgy, but no + * good way to do this without significantly rearchitecting the + * architecture-independent kexec code. + */ + +static unsigned char *kexec_bn2cl(void *pg) +{ + struct Elf32_Bhdr *bhdrp; + Elf32_Nhdr *nhdrp; + unsigned char *desc; + unsigned char *command_line; + __sum16 csum; + + bhdrp = (struct Elf32_Bhdr *) pg; + + /* + * This routine is invoked for every source page, so make + * sure to quietly ignore every impossible page. + */ + if (bhdrp->b_signature != ELF_BOOT_MAGIC || + bhdrp->b_size > PAGE_SIZE) + return 0; + + /* + * If we get a checksum mismatch, warn with the checksum + * so we can diagnose better. + */ + csum = ip_compute_csum(pg, bhdrp->b_size); + if (csum != 0) { + pr_warning("%s: bad checksum %#x (size %d)\n", + __func__, csum, bhdrp->b_size); + return 0; + } + + nhdrp = (Elf32_Nhdr *) (bhdrp + 1); + + while (nhdrp->n_type != EBN_COMMAND_LINE) { + + desc = (unsigned char *) (nhdrp + 1); + desc += roundupsz(nhdrp->n_descsz); + + nhdrp = (Elf32_Nhdr *) desc; + + /* still in bounds? */ + if ((unsigned char *) (nhdrp + 1) > + ((unsigned char *) pg) + bhdrp->b_size) { + + pr_info("%s: out of bounds\n", __func__); + return 0; + } + } + + command_line = (unsigned char *) (nhdrp + 1); + desc = command_line; + + while (*desc != '\0') { + desc++; + if (((unsigned long)desc & PAGE_MASK) != (unsigned long)pg) { + pr_info("%s: ran off end of page\n", + __func__); + return 0; + } + } + + return command_line; +} + +static void kexec_find_and_set_command_line(struct kimage *image) +{ + kimage_entry_t *ptr, entry; + + unsigned char *command_line = 0; + unsigned char *r; + HV_Errno hverr; + + for (ptr = &image->head; + (entry = *ptr) && !(entry & IND_DONE); + ptr = (entry & IND_INDIRECTION) ? + phys_to_virt((entry & PAGE_MASK)) : ptr + 1) { + + if ((entry & IND_SOURCE)) { + void *va = + kmap_atomic_pfn(entry >> PAGE_SHIFT); + r = kexec_bn2cl(va); + if (r) { + command_line = r; + break; + } + kunmap_atomic(va); + } + } + + if (command_line != 0) { + pr_info("setting new command line to \"%s\"\n", + command_line); + + hverr = hv_set_command_line( + (HV_VirtAddr) command_line, strlen(command_line)); + kunmap_atomic(command_line); + } else { + pr_info("%s: no command line found; making empty\n", + __func__); + hverr = hv_set_command_line((HV_VirtAddr) command_line, 0); + } + if (hverr) + pr_warning("%s: hv_set_command_line returned error: %d\n", + __func__, hverr); +} + +/* + * The kexec code range-checks all its PAs, so to avoid having it run + * amok and allocate memory and then sequester it from every other + * controller, we force it to come from controller zero. We also + * disable the oom-killer since if we do end up running out of memory, + * that almost certainly won't help. + */ +struct page *kimage_alloc_pages_arch(gfp_t gfp_mask, unsigned int order) +{ + gfp_mask |= __GFP_THISNODE | __GFP_NORETRY; + return alloc_pages_node(0, gfp_mask, order); +} + +static void setup_quasi_va_is_pa(void) +{ + HV_PTE *pgtable; + HV_PTE pte; + int i; + + /* + * Flush our TLB to prevent conflicts between the previous contents + * and the new stuff we're about to add. + */ + local_flush_tlb_all(); + + /* setup VA is PA, at least up to PAGE_OFFSET */ + + pgtable = (HV_PTE *)current->mm->pgd; + pte = hv_pte(_PAGE_KERNEL | _PAGE_HUGE_PAGE); + pte = hv_pte_set_mode(pte, HV_PTE_MODE_CACHE_NO_L3); + + for (i = 0; i < pgd_index(PAGE_OFFSET); i++) { + unsigned long pfn = i << (HPAGE_SHIFT - PAGE_SHIFT); + if (pfn_valid(pfn)) + __set_pte(&pgtable[i], pfn_pte(pfn, pte)); + } +} + + +void machine_kexec(struct kimage *image) +{ + void *reboot_code_buffer; + void (*rnk)(unsigned long, void *, unsigned long) + __noreturn; + + /* Mask all interrupts before starting to reboot. */ + interrupt_mask_set_mask(~0ULL); + + kexec_find_and_set_command_line(image); + + /* + * Adjust the home caching of the control page to be cached on + * this cpu, and copy the assembly helper into the control + * code page, which we map in the vmalloc area. + */ + homecache_change_page_home(image->control_code_page, 0, + smp_processor_id()); + reboot_code_buffer = vmap(&image->control_code_page, 1, 0, + __pgprot(_PAGE_KERNEL | _PAGE_EXECUTABLE)); + memcpy(reboot_code_buffer, relocate_new_kernel, + relocate_new_kernel_size); + __flush_icache_range( + (unsigned long) reboot_code_buffer, + (unsigned long) reboot_code_buffer + relocate_new_kernel_size); + + setup_quasi_va_is_pa(); + + /* now call it */ + rnk = reboot_code_buffer; + (*rnk)(image->head, reboot_code_buffer, image->start); +} diff --git a/arch/tile/kernel/messaging.c b/arch/tile/kernel/messaging.c new file mode 100644 index 00000000..0858ee6b --- /dev/null +++ b/arch/tile/kernel/messaging.c @@ -0,0 +1,116 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#include <linux/percpu.h> +#include <linux/smp.h> +#include <linux/hardirq.h> +#include <linux/ptrace.h> +#include <asm/hv_driver.h> +#include <asm/irq_regs.h> +#include <asm/traps.h> +#include <hv/hypervisor.h> +#include <arch/interrupts.h> + +/* All messages are stored here */ +static DEFINE_PER_CPU(HV_MsgState, msg_state); + +void __cpuinit init_messaging(void) +{ + /* Allocate storage for messages in kernel space */ + HV_MsgState *state = &__get_cpu_var(msg_state); + int rc = hv_register_message_state(state); + if (rc != HV_OK) + panic("hv_register_message_state: error %d", rc); + + /* Make sure downcall interrupts will be enabled. */ + arch_local_irq_unmask(INT_INTCTRL_K); +} + +void hv_message_intr(struct pt_regs *regs, int intnum) +{ + /* + * We enter with interrupts disabled and leave them disabled, + * to match expectations of called functions (e.g. + * do_ccupdate_local() in mm/slab.c). This is also consistent + * with normal call entry for device interrupts. + */ + + int message[HV_MAX_MESSAGE_SIZE/sizeof(int)]; + HV_RcvMsgInfo rmi; + int nmsgs = 0; + + /* Track time spent here in an interrupt context */ + struct pt_regs *old_regs = set_irq_regs(regs); + irq_enter(); + +#ifdef CONFIG_DEBUG_STACKOVERFLOW + /* Debugging check for stack overflow: less than 1/8th stack free? */ + { + long sp = stack_pointer - (long) current_thread_info(); + if (unlikely(sp < (sizeof(struct thread_info) + STACK_WARN))) { + pr_emerg("hv_message_intr: " + "stack overflow: %ld\n", + sp - sizeof(struct thread_info)); + dump_stack(); + } + } +#endif + + while (1) { + rmi = hv_receive_message(__get_cpu_var(msg_state), + (HV_VirtAddr) message, + sizeof(message)); + if (rmi.msglen == 0) + break; + + if (rmi.msglen < 0) + panic("hv_receive_message failed: %d", rmi.msglen); + + ++nmsgs; + + if (rmi.source == HV_MSG_TILE) { + int tag; + + /* we just send tags for now */ + BUG_ON(rmi.msglen != sizeof(int)); + + tag = message[0]; +#ifdef CONFIG_SMP + evaluate_message(message[0]); +#else + panic("Received IPI message %d in UP mode", tag); +#endif + } else if (rmi.source == HV_MSG_INTR) { + HV_IntrMsg *him = (HV_IntrMsg *)message; + struct hv_driver_cb *cb = + (struct hv_driver_cb *)him->intarg; + cb->callback(cb, him->intdata); + __get_cpu_var(irq_stat).irq_hv_msg_count++; + } + } + + /* + * We shouldn't have gotten a message downcall with no + * messages available. + */ + if (nmsgs == 0) + panic("Message downcall invoked with no messages!"); + + /* + * Track time spent against the current process again and + * process any softirqs if they are waiting. + */ + irq_exit(); + set_irq_regs(old_regs); +} diff --git a/arch/tile/kernel/module.c b/arch/tile/kernel/module.c new file mode 100644 index 00000000..98d47692 --- /dev/null +++ b/arch/tile/kernel/module.c @@ -0,0 +1,234 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * Based on i386 version, copyright (C) 2001 Rusty Russell. + */ + +#include <linux/moduleloader.h> +#include <linux/elf.h> +#include <linux/vmalloc.h> +#include <linux/fs.h> +#include <linux/string.h> +#include <linux/kernel.h> +#include <asm/pgtable.h> +#include <asm/homecache.h> +#include <arch/opcode.h> + +#ifdef __tilegx__ +# define Elf_Rela Elf64_Rela +# define ELF_R_SYM ELF64_R_SYM +# define ELF_R_TYPE ELF64_R_TYPE +#else +# define Elf_Rela Elf32_Rela +# define ELF_R_SYM ELF32_R_SYM +# define ELF_R_TYPE ELF32_R_TYPE +#endif + +#ifdef MODULE_DEBUG +#define DEBUGP printk +#else +#define DEBUGP(fmt...) +#endif + +/* + * Allocate some address space in the range MEM_MODULE_START to + * MEM_MODULE_END and populate it with memory. + */ +void *module_alloc(unsigned long size) +{ + struct page **pages; + pgprot_t prot_rwx = __pgprot(_PAGE_KERNEL | _PAGE_KERNEL_EXEC); + struct vm_struct *area; + int i = 0; + int npages; + + if (size == 0) + return NULL; + npages = (size + PAGE_SIZE - 1) / PAGE_SIZE; + pages = kmalloc(npages * sizeof(struct page *), GFP_KERNEL); + if (pages == NULL) + return NULL; + for (; i < npages; ++i) { + pages[i] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM); + if (!pages[i]) + goto error; + } + + area = __get_vm_area(size, VM_ALLOC, MEM_MODULE_START, MEM_MODULE_END); + if (!area) + goto error; + area->nr_pages = npages; + area->pages = pages; + + if (map_vm_area(area, prot_rwx, &pages)) { + vunmap(area->addr); + goto error; + } + + return area->addr; + +error: + while (--i >= 0) + __free_page(pages[i]); + kfree(pages); + return NULL; +} + + +/* Free memory returned from module_alloc */ +void module_free(struct module *mod, void *module_region) +{ + vfree(module_region); + + /* Globally flush the L1 icache. */ + flush_remote(0, HV_FLUSH_EVICT_L1I, cpu_online_mask, + 0, 0, 0, NULL, NULL, 0); + + /* + * FIXME: If module_region == mod->module_init, trim exception + * table entries. + */ +} + +#ifdef __tilegx__ +/* + * Validate that the high 16 bits of "value" is just the sign-extension of + * the low 48 bits. + */ +static int validate_hw2_last(long value, struct module *me) +{ + if (((value << 16) >> 16) != value) { + pr_warning("module %s: Out of range HW2_LAST value %#lx\n", + me->name, value); + return 0; + } + return 1; +} + +/* + * Validate that "value" isn't too big to hold in a JumpOff relocation. + */ +static int validate_jumpoff(long value) +{ + /* Determine size of jump offset. */ + int shift = __builtin_clzl(get_JumpOff_X1(create_JumpOff_X1(-1))); + + /* Check to see if it fits into the relocation slot. */ + long f = get_JumpOff_X1(create_JumpOff_X1(value)); + f = (f << shift) >> shift; + + return f == value; +} +#endif + +int apply_relocate_add(Elf_Shdr *sechdrs, + const char *strtab, + unsigned int symindex, + unsigned int relsec, + struct module *me) +{ + unsigned int i; + Elf_Rela *rel = (void *)sechdrs[relsec].sh_addr; + Elf_Sym *sym; + u64 *location; + unsigned long value; + + DEBUGP("Applying relocate section %u to %u\n", relsec, + sechdrs[relsec].sh_info); + for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) { + /* This is where to make the change */ + location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr + + rel[i].r_offset; + /* + * This is the symbol it is referring to. + * Note that all undefined symbols have been resolved. + */ + sym = (Elf_Sym *)sechdrs[symindex].sh_addr + + ELF_R_SYM(rel[i].r_info); + value = sym->st_value + rel[i].r_addend; + + switch (ELF_R_TYPE(rel[i].r_info)) { + +#define MUNGE(func) (*location = ((*location & ~func(-1)) | func(value))) + +#ifndef __tilegx__ + case R_TILE_32: + *(uint32_t *)location = value; + break; + case R_TILE_IMM16_X0_HA: + value = (value + 0x8000) >> 16; + /*FALLTHROUGH*/ + case R_TILE_IMM16_X0_LO: + MUNGE(create_Imm16_X0); + break; + case R_TILE_IMM16_X1_HA: + value = (value + 0x8000) >> 16; + /*FALLTHROUGH*/ + case R_TILE_IMM16_X1_LO: + MUNGE(create_Imm16_X1); + break; + case R_TILE_JOFFLONG_X1: + value -= (unsigned long) location; /* pc-relative */ + value = (long) value >> 3; /* count by instrs */ + MUNGE(create_JOffLong_X1); + break; +#else + case R_TILEGX_64: + *location = value; + break; + case R_TILEGX_IMM16_X0_HW2_LAST: + if (!validate_hw2_last(value, me)) + return -ENOEXEC; + value >>= 16; + /*FALLTHROUGH*/ + case R_TILEGX_IMM16_X0_HW1: + value >>= 16; + /*FALLTHROUGH*/ + case R_TILEGX_IMM16_X0_HW0: + MUNGE(create_Imm16_X0); + break; + case R_TILEGX_IMM16_X1_HW2_LAST: + if (!validate_hw2_last(value, me)) + return -ENOEXEC; + value >>= 16; + /*FALLTHROUGH*/ + case R_TILEGX_IMM16_X1_HW1: + value >>= 16; + /*FALLTHROUGH*/ + case R_TILEGX_IMM16_X1_HW0: + MUNGE(create_Imm16_X1); + break; + case R_TILEGX_JUMPOFF_X1: + value -= (unsigned long) location; /* pc-relative */ + value = (long) value >> 3; /* count by instrs */ + if (!validate_jumpoff(value)) { + pr_warning("module %s: Out of range jump to" + " %#llx at %#llx (%p)\n", me->name, + sym->st_value + rel[i].r_addend, + rel[i].r_offset, location); + return -ENOEXEC; + } + MUNGE(create_JumpOff_X1); + break; +#endif + +#undef MUNGE + + default: + pr_err("module %s: Unknown relocation: %d\n", + me->name, (int) ELF_R_TYPE(rel[i].r_info)); + return -ENOEXEC; + } + } + return 0; +} diff --git a/arch/tile/kernel/pci-dma.c b/arch/tile/kernel/pci-dma.c new file mode 100644 index 00000000..b3ed19f8 --- /dev/null +++ b/arch/tile/kernel/pci-dma.c @@ -0,0 +1,252 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#include <linux/mm.h> +#include <linux/dma-mapping.h> +#include <linux/vmalloc.h> +#include <linux/export.h> +#include <asm/tlbflush.h> +#include <asm/homecache.h> + +/* Generic DMA mapping functions: */ + +/* + * Allocate what Linux calls "coherent" memory, which for us just + * means uncached. + */ +void *dma_alloc_coherent(struct device *dev, + size_t size, + dma_addr_t *dma_handle, + gfp_t gfp) +{ + u64 dma_mask = dev->coherent_dma_mask ?: DMA_BIT_MASK(32); + int node = dev_to_node(dev); + int order = get_order(size); + struct page *pg; + dma_addr_t addr; + + gfp |= __GFP_ZERO; + + /* + * By forcing NUMA node 0 for 32-bit masks we ensure that the + * high 32 bits of the resulting PA will be zero. If the mask + * size is, e.g., 24, we may still not be able to guarantee a + * suitable memory address, in which case we will return NULL. + * But such devices are uncommon. + */ + if (dma_mask <= DMA_BIT_MASK(32)) + node = 0; + + pg = homecache_alloc_pages_node(node, gfp, order, PAGE_HOME_UNCACHED); + if (pg == NULL) + return NULL; + + addr = page_to_phys(pg); + if (addr + size > dma_mask) { + homecache_free_pages(addr, order); + return NULL; + } + + *dma_handle = addr; + return page_address(pg); +} +EXPORT_SYMBOL(dma_alloc_coherent); + +/* + * Free memory that was allocated with dma_alloc_coherent. + */ +void dma_free_coherent(struct device *dev, size_t size, + void *vaddr, dma_addr_t dma_handle) +{ + homecache_free_pages((unsigned long)vaddr, get_order(size)); +} +EXPORT_SYMBOL(dma_free_coherent); + +/* + * The map routines "map" the specified address range for DMA + * accesses. The memory belongs to the device after this call is + * issued, until it is unmapped with dma_unmap_single. + * + * We don't need to do any mapping, we just flush the address range + * out of the cache and return a DMA address. + * + * The unmap routines do whatever is necessary before the processor + * accesses the memory again, and must be called before the driver + * touches the memory. We can get away with a cache invalidate if we + * can count on nothing having been touched. + */ + +/* Flush a PA range from cache page by page. */ +static void __dma_map_pa_range(dma_addr_t dma_addr, size_t size) +{ + struct page *page = pfn_to_page(PFN_DOWN(dma_addr)); + size_t bytesleft = PAGE_SIZE - (dma_addr & (PAGE_SIZE - 1)); + + while ((ssize_t)size > 0) { + /* Flush the page. */ + homecache_flush_cache(page++, 0); + + /* Figure out if we need to continue on the next page. */ + size -= bytesleft; + bytesleft = PAGE_SIZE; + } +} + +/* + * dma_map_single can be passed any memory address, and there appear + * to be no alignment constraints. + * + * There is a chance that the start of the buffer will share a cache + * line with some other data that has been touched in the meantime. + */ +dma_addr_t dma_map_single(struct device *dev, void *ptr, size_t size, + enum dma_data_direction direction) +{ + dma_addr_t dma_addr = __pa(ptr); + + BUG_ON(!valid_dma_direction(direction)); + WARN_ON(size == 0); + + __dma_map_pa_range(dma_addr, size); + + return dma_addr; +} +EXPORT_SYMBOL(dma_map_single); + +void dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size, + enum dma_data_direction direction) +{ + BUG_ON(!valid_dma_direction(direction)); +} +EXPORT_SYMBOL(dma_unmap_single); + +int dma_map_sg(struct device *dev, struct scatterlist *sglist, int nents, + enum dma_data_direction direction) +{ + struct scatterlist *sg; + int i; + + BUG_ON(!valid_dma_direction(direction)); + + WARN_ON(nents == 0 || sglist->length == 0); + + for_each_sg(sglist, sg, nents, i) { + sg->dma_address = sg_phys(sg); + __dma_map_pa_range(sg->dma_address, sg->length); + } + + return nents; +} +EXPORT_SYMBOL(dma_map_sg); + +void dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nhwentries, + enum dma_data_direction direction) +{ + BUG_ON(!valid_dma_direction(direction)); +} +EXPORT_SYMBOL(dma_unmap_sg); + +dma_addr_t dma_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction direction) +{ + BUG_ON(!valid_dma_direction(direction)); + + BUG_ON(offset + size > PAGE_SIZE); + homecache_flush_cache(page, 0); + + return page_to_pa(page) + offset; +} +EXPORT_SYMBOL(dma_map_page); + +void dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size, + enum dma_data_direction direction) +{ + BUG_ON(!valid_dma_direction(direction)); +} +EXPORT_SYMBOL(dma_unmap_page); + +void dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, + size_t size, enum dma_data_direction direction) +{ + BUG_ON(!valid_dma_direction(direction)); +} +EXPORT_SYMBOL(dma_sync_single_for_cpu); + +void dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, + size_t size, enum dma_data_direction direction) +{ + unsigned long start = PFN_DOWN(dma_handle); + unsigned long end = PFN_DOWN(dma_handle + size - 1); + unsigned long i; + + BUG_ON(!valid_dma_direction(direction)); + for (i = start; i <= end; ++i) + homecache_flush_cache(pfn_to_page(i), 0); +} +EXPORT_SYMBOL(dma_sync_single_for_device); + +void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems, + enum dma_data_direction direction) +{ + BUG_ON(!valid_dma_direction(direction)); + WARN_ON(nelems == 0 || sg[0].length == 0); +} +EXPORT_SYMBOL(dma_sync_sg_for_cpu); + +/* + * Flush and invalidate cache for scatterlist. + */ +void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sglist, + int nelems, enum dma_data_direction direction) +{ + struct scatterlist *sg; + int i; + + BUG_ON(!valid_dma_direction(direction)); + WARN_ON(nelems == 0 || sglist->length == 0); + + for_each_sg(sglist, sg, nelems, i) { + dma_sync_single_for_device(dev, sg->dma_address, + sg_dma_len(sg), direction); + } +} +EXPORT_SYMBOL(dma_sync_sg_for_device); + +void dma_sync_single_range_for_cpu(struct device *dev, dma_addr_t dma_handle, + unsigned long offset, size_t size, + enum dma_data_direction direction) +{ + dma_sync_single_for_cpu(dev, dma_handle + offset, size, direction); +} +EXPORT_SYMBOL(dma_sync_single_range_for_cpu); + +void dma_sync_single_range_for_device(struct device *dev, + dma_addr_t dma_handle, + unsigned long offset, size_t size, + enum dma_data_direction direction) +{ + dma_sync_single_for_device(dev, dma_handle + offset, size, direction); +} +EXPORT_SYMBOL(dma_sync_single_range_for_device); + +/* + * dma_alloc_noncoherent() returns non-cacheable memory, so there's no + * need to do any flushing here. + */ +void dma_cache_sync(struct device *dev, void *vaddr, size_t size, + enum dma_data_direction direction) +{ +} +EXPORT_SYMBOL(dma_cache_sync); diff --git a/arch/tile/kernel/pci.c b/arch/tile/kernel/pci.c new file mode 100644 index 00000000..b56d12bf --- /dev/null +++ b/arch/tile/kernel/pci.c @@ -0,0 +1,632 @@ +/* + * Copyright 2011 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#include <linux/kernel.h> +#include <linux/pci.h> +#include <linux/delay.h> +#include <linux/string.h> +#include <linux/init.h> +#include <linux/capability.h> +#include <linux/sched.h> +#include <linux/errno.h> +#include <linux/bootmem.h> +#include <linux/irq.h> +#include <linux/io.h> +#include <linux/uaccess.h> +#include <linux/export.h> + +#include <asm/processor.h> +#include <asm/sections.h> +#include <asm/byteorder.h> +#include <asm/hv_driver.h> +#include <hv/drv_pcie_rc_intf.h> + + +/* + * Initialization flow and process + * ------------------------------- + * + * This files contains the routines to search for PCI buses, + * enumerate the buses, and configure any attached devices. + * + * There are two entry points here: + * 1) tile_pci_init + * This sets up the pci_controller structs, and opens the + * FDs to the hypervisor. This is called from setup_arch() early + * in the boot process. + * 2) pcibios_init + * This probes the PCI bus(es) for any attached hardware. It's + * called by subsys_initcall. All of the real work is done by the + * generic Linux PCI layer. + * + */ + +/* + * This flag tells if the platform is TILEmpower that needs + * special configuration for the PLX switch chip. + */ +int __write_once tile_plx_gen1; + +static struct pci_controller controllers[TILE_NUM_PCIE]; +static int num_controllers; +static int pci_scan_flags[TILE_NUM_PCIE]; + +static struct pci_ops tile_cfg_ops; + + +/* + * We don't need to worry about the alignment of resources. + */ +resource_size_t pcibios_align_resource(void *data, const struct resource *res, + resource_size_t size, resource_size_t align) +{ + return res->start; +} +EXPORT_SYMBOL(pcibios_align_resource); + +/* + * Open a FD to the hypervisor PCI device. + * + * controller_id is the controller number, config type is 0 or 1 for + * config0 or config1 operations. + */ +static int __devinit tile_pcie_open(int controller_id, int config_type) +{ + char filename[32]; + int fd; + + sprintf(filename, "pcie/%d/config%d", controller_id, config_type); + + fd = hv_dev_open((HV_VirtAddr)filename, 0); + + return fd; +} + + +/* + * Get the IRQ numbers from the HV and set up the handlers for them. + */ +static int __devinit tile_init_irqs(int controller_id, + struct pci_controller *controller) +{ + char filename[32]; + int fd; + int ret; + int x; + struct pcie_rc_config rc_config; + + sprintf(filename, "pcie/%d/ctl", controller_id); + fd = hv_dev_open((HV_VirtAddr)filename, 0); + if (fd < 0) { + pr_err("PCI: hv_dev_open(%s) failed\n", filename); + return -1; + } + ret = hv_dev_pread(fd, 0, (HV_VirtAddr)(&rc_config), + sizeof(rc_config), PCIE_RC_CONFIG_MASK_OFF); + hv_dev_close(fd); + if (ret != sizeof(rc_config)) { + pr_err("PCI: wanted %zd bytes, got %d\n", + sizeof(rc_config), ret); + return -1; + } + /* Record irq_base so that we can map INTx to IRQ # later. */ + controller->irq_base = rc_config.intr; + + for (x = 0; x < 4; x++) + tile_irq_activate(rc_config.intr + x, + TILE_IRQ_HW_CLEAR); + + if (rc_config.plx_gen1) + controller->plx_gen1 = 1; + + return 0; +} + +/* + * First initialization entry point, called from setup_arch(). + * + * Find valid controllers and fill in pci_controller structs for each + * of them. + * + * Returns the number of controllers discovered. + */ +int __init tile_pci_init(void) +{ + int i; + + pr_info("PCI: Searching for controllers...\n"); + + /* Re-init number of PCIe controllers to support hot-plug feature. */ + num_controllers = 0; + + /* Do any configuration we need before using the PCIe */ + + for (i = 0; i < TILE_NUM_PCIE; i++) { + /* + * To see whether we need a real config op based on + * the results of pcibios_init(), to support PCIe hot-plug. + */ + if (pci_scan_flags[i] == 0) { + int hv_cfg_fd0 = -1; + int hv_cfg_fd1 = -1; + int hv_mem_fd = -1; + char name[32]; + struct pci_controller *controller; + + /* + * Open the fd to the HV. If it fails then this + * device doesn't exist. + */ + hv_cfg_fd0 = tile_pcie_open(i, 0); + if (hv_cfg_fd0 < 0) + continue; + hv_cfg_fd1 = tile_pcie_open(i, 1); + if (hv_cfg_fd1 < 0) { + pr_err("PCI: Couldn't open config fd to HV " + "for controller %d\n", i); + goto err_cont; + } + + sprintf(name, "pcie/%d/mem", i); + hv_mem_fd = hv_dev_open((HV_VirtAddr)name, 0); + if (hv_mem_fd < 0) { + pr_err("PCI: Could not open mem fd to HV!\n"); + goto err_cont; + } + + pr_info("PCI: Found PCI controller #%d\n", i); + + controller = &controllers[i]; + + controller->index = i; + controller->hv_cfg_fd[0] = hv_cfg_fd0; + controller->hv_cfg_fd[1] = hv_cfg_fd1; + controller->hv_mem_fd = hv_mem_fd; + controller->first_busno = 0; + controller->last_busno = 0xff; + controller->ops = &tile_cfg_ops; + + num_controllers++; + continue; + +err_cont: + if (hv_cfg_fd0 >= 0) + hv_dev_close(hv_cfg_fd0); + if (hv_cfg_fd1 >= 0) + hv_dev_close(hv_cfg_fd1); + if (hv_mem_fd >= 0) + hv_dev_close(hv_mem_fd); + continue; + } + } + + /* + * Before using the PCIe, see if we need to do any platform-specific + * configuration, such as the PLX switch Gen 1 issue on TILEmpower. + */ + for (i = 0; i < num_controllers; i++) { + struct pci_controller *controller = &controllers[i]; + + if (controller->plx_gen1) + tile_plx_gen1 = 1; + } + + return num_controllers; +} + +/* + * (pin - 1) converts from the PCI standard's [1:4] convention to + * a normal [0:3] range. + */ +static int tile_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +{ + struct pci_controller *controller = + (struct pci_controller *)dev->sysdata; + return (pin - 1) + controller->irq_base; +} + + +static void __devinit fixup_read_and_payload_sizes(void) +{ + struct pci_dev *dev = NULL; + int smallest_max_payload = 0x1; /* Tile maxes out at 256 bytes. */ + int max_read_size = 0x2; /* Limit to 512 byte reads. */ + u16 new_values; + + /* Scan for the smallest maximum payload size. */ + while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { + int pcie_caps_offset; + u32 devcap; + int max_payload; + + pcie_caps_offset = pci_find_capability(dev, PCI_CAP_ID_EXP); + if (pcie_caps_offset == 0) + continue; + + pci_read_config_dword(dev, pcie_caps_offset + PCI_EXP_DEVCAP, + &devcap); + max_payload = devcap & PCI_EXP_DEVCAP_PAYLOAD; + if (max_payload < smallest_max_payload) + smallest_max_payload = max_payload; + } + + /* Now, set the max_payload_size for all devices to that value. */ + new_values = (max_read_size << 12) | (smallest_max_payload << 5); + while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { + int pcie_caps_offset; + u16 devctl; + + pcie_caps_offset = pci_find_capability(dev, PCI_CAP_ID_EXP); + if (pcie_caps_offset == 0) + continue; + + pci_read_config_word(dev, pcie_caps_offset + PCI_EXP_DEVCTL, + &devctl); + devctl &= ~(PCI_EXP_DEVCTL_PAYLOAD | PCI_EXP_DEVCTL_READRQ); + devctl |= new_values; + pci_write_config_word(dev, pcie_caps_offset + PCI_EXP_DEVCTL, + devctl); + } +} + + +/* + * Second PCI initialization entry point, called by subsys_initcall. + * + * The controllers have been set up by the time we get here, by a call to + * tile_pci_init. + */ +int __init pcibios_init(void) +{ + int i; + + pr_info("PCI: Probing PCI hardware\n"); + + /* + * Delay a bit in case devices aren't ready. Some devices are + * known to require at least 20ms here, but we use a more + * conservative value. + */ + mdelay(250); + + /* Scan all of the recorded PCI controllers. */ + for (i = 0; i < TILE_NUM_PCIE; i++) { + /* + * Do real pcibios init ops if the controller is initialized + * by tile_pci_init() successfully and not initialized by + * pcibios_init() yet to support PCIe hot-plug. + */ + if (pci_scan_flags[i] == 0 && controllers[i].ops != NULL) { + struct pci_controller *controller = &controllers[i]; + struct pci_bus *bus; + + if (tile_init_irqs(i, controller)) { + pr_err("PCI: Could not initialize IRQs\n"); + continue; + } + + pr_info("PCI: initializing controller #%d\n", i); + + /* + * This comes from the generic Linux PCI driver. + * + * It reads the PCI tree for this bus into the Linux + * data structures. + * + * This is inlined in linux/pci.h and calls into + * pci_scan_bus_parented() in probe.c. + */ + bus = pci_scan_bus(0, controller->ops, controller); + controller->root_bus = bus; + controller->last_busno = bus->subordinate; + } + } + + /* Do machine dependent PCI interrupt routing */ + pci_fixup_irqs(pci_common_swizzle, tile_map_irq); + + /* + * This comes from the generic Linux PCI driver. + * + * It allocates all of the resources (I/O memory, etc) + * associated with the devices read in above. + */ + pci_assign_unassigned_resources(); + + /* Configure the max_read_size and max_payload_size values. */ + fixup_read_and_payload_sizes(); + + /* Record the I/O resources in the PCI controller structure. */ + for (i = 0; i < TILE_NUM_PCIE; i++) { + /* + * Do real pcibios init ops if the controller is initialized + * by tile_pci_init() successfully and not initialized by + * pcibios_init() yet to support PCIe hot-plug. + */ + if (pci_scan_flags[i] == 0 && controllers[i].ops != NULL) { + struct pci_bus *root_bus = controllers[i].root_bus; + struct pci_bus *next_bus; + struct pci_dev *dev; + + list_for_each_entry(dev, &root_bus->devices, bus_list) { + /* + * Find the PCI host controller, ie. the 1st + * bridge. + */ + if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && + (PCI_SLOT(dev->devfn) == 0)) { + next_bus = dev->subordinate; + controllers[i].mem_resources[0] = + *next_bus->resource[0]; + controllers[i].mem_resources[1] = + *next_bus->resource[1]; + controllers[i].mem_resources[2] = + *next_bus->resource[2]; + + /* Setup flags. */ + pci_scan_flags[i] = 1; + + break; + } + } + } + } + + return 0; +} +subsys_initcall(pcibios_init); + +/* + * No bus fixups needed. + */ +void __devinit pcibios_fixup_bus(struct pci_bus *bus) +{ + /* Nothing needs to be done. */ +} + +void pcibios_set_master(struct pci_dev *dev) +{ + /* No special bus mastering setup handling. */ +} + +/* + * This can be called from the generic PCI layer, but doesn't need to + * do anything. + */ +char __devinit *pcibios_setup(char *str) +{ + /* Nothing needs to be done. */ + return str; +} + +/* + * This is called from the generic Linux layer. + */ +void __devinit pcibios_update_irq(struct pci_dev *dev, int irq) +{ + pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq); +} + +/* + * Enable memory and/or address decoding, as appropriate, for the + * device described by the 'dev' struct. + * + * This is called from the generic PCI layer, and can be called + * for bridges or endpoints. + */ +int pcibios_enable_device(struct pci_dev *dev, int mask) +{ + u16 cmd, old_cmd; + u8 header_type; + int i; + struct resource *r; + + pci_read_config_byte(dev, PCI_HEADER_TYPE, &header_type); + + pci_read_config_word(dev, PCI_COMMAND, &cmd); + old_cmd = cmd; + if ((header_type & 0x7F) == PCI_HEADER_TYPE_BRIDGE) { + /* + * For bridges, we enable both memory and I/O decoding + * in call cases. + */ + cmd |= PCI_COMMAND_IO; + cmd |= PCI_COMMAND_MEMORY; + } else { + /* + * For endpoints, we enable memory and/or I/O decoding + * only if they have a memory resource of that type. + */ + for (i = 0; i < 6; i++) { + r = &dev->resource[i]; + if (r->flags & IORESOURCE_UNSET) { + pr_err("PCI: Device %s not available " + "because of resource collisions\n", + pci_name(dev)); + return -EINVAL; + } + if (r->flags & IORESOURCE_IO) + cmd |= PCI_COMMAND_IO; + if (r->flags & IORESOURCE_MEM) + cmd |= PCI_COMMAND_MEMORY; + } + } + + /* + * We only write the command if it changed. + */ + if (cmd != old_cmd) + pci_write_config_word(dev, PCI_COMMAND, cmd); + return 0; +} + +/**************************************************************** + * + * Tile PCI config space read/write routines + * + ****************************************************************/ + +/* + * These are the normal read and write ops + * These are expanded with macros from pci_bus_read_config_byte() etc. + * + * devfn is the combined PCI slot & function. + * + * offset is in bytes, from the start of config space for the + * specified bus & slot. + */ + +static int __devinit tile_cfg_read(struct pci_bus *bus, + unsigned int devfn, + int offset, + int size, + u32 *val) +{ + struct pci_controller *controller = bus->sysdata; + int busnum = bus->number & 0xff; + int slot = (devfn >> 3) & 0x1f; + int function = devfn & 0x7; + u32 addr; + int config_mode = 1; + + /* + * There is no bridge between the Tile and bus 0, so we + * use config0 to talk to bus 0. + * + * If we're talking to a bus other than zero then we + * must have found a bridge. + */ + if (busnum == 0) { + /* + * We fake an empty slot for (busnum == 0) && (slot > 0), + * since there is only one slot on bus 0. + */ + if (slot) { + *val = 0xFFFFFFFF; + return 0; + } + config_mode = 0; + } + + addr = busnum << 20; /* Bus in 27:20 */ + addr |= slot << 15; /* Slot (device) in 19:15 */ + addr |= function << 12; /* Function is in 14:12 */ + addr |= (offset & 0xFFF); /* byte address in 0:11 */ + + return hv_dev_pread(controller->hv_cfg_fd[config_mode], 0, + (HV_VirtAddr)(val), size, addr); +} + + +/* + * See tile_cfg_read() for relevant comments. + * Note that "val" is the value to write, not a pointer to that value. + */ +static int __devinit tile_cfg_write(struct pci_bus *bus, + unsigned int devfn, + int offset, + int size, + u32 val) +{ + struct pci_controller *controller = bus->sysdata; + int busnum = bus->number & 0xff; + int slot = (devfn >> 3) & 0x1f; + int function = devfn & 0x7; + u32 addr; + int config_mode = 1; + HV_VirtAddr valp = (HV_VirtAddr)&val; + + /* + * For bus 0 slot 0 we use config 0 accesses. + */ + if (busnum == 0) { + /* + * We fake an empty slot for (busnum == 0) && (slot > 0), + * since there is only one slot on bus 0. + */ + if (slot) + return 0; + config_mode = 0; + } + + addr = busnum << 20; /* Bus in 27:20 */ + addr |= slot << 15; /* Slot (device) in 19:15 */ + addr |= function << 12; /* Function is in 14:12 */ + addr |= (offset & 0xFFF); /* byte address in 0:11 */ + +#ifdef __BIG_ENDIAN + /* Point to the correct part of the 32-bit "val". */ + valp += 4 - size; +#endif + + return hv_dev_pwrite(controller->hv_cfg_fd[config_mode], 0, + valp, size, addr); +} + + +static struct pci_ops tile_cfg_ops = { + .read = tile_cfg_read, + .write = tile_cfg_write, +}; + + +/* + * In the following, each PCI controller's mem_resources[1] + * represents its (non-prefetchable) PCI memory resource. + * mem_resources[0] and mem_resources[2] refer to its PCI I/O and + * prefetchable PCI memory resources, respectively. + * For more details, see pci_setup_bridge() in setup-bus.c. + * By comparing the target PCI memory address against the + * end address of controller 0, we can determine the controller + * that should accept the PCI memory access. + */ +#define TILE_READ(size, type) \ +type _tile_read##size(unsigned long addr) \ +{ \ + type val; \ + int idx = 0; \ + if (addr > controllers[0].mem_resources[1].end && \ + addr > controllers[0].mem_resources[2].end) \ + idx = 1; \ + if (hv_dev_pread(controllers[idx].hv_mem_fd, 0, \ + (HV_VirtAddr)(&val), sizeof(type), addr)) \ + pr_err("PCI: read %zd bytes at 0x%lX failed\n", \ + sizeof(type), addr); \ + return val; \ +} \ +EXPORT_SYMBOL(_tile_read##size) + +TILE_READ(b, u8); +TILE_READ(w, u16); +TILE_READ(l, u32); +TILE_READ(q, u64); + +#define TILE_WRITE(size, type) \ +void _tile_write##size(type val, unsigned long addr) \ +{ \ + int idx = 0; \ + if (addr > controllers[0].mem_resources[1].end && \ + addr > controllers[0].mem_resources[2].end) \ + idx = 1; \ + if (hv_dev_pwrite(controllers[idx].hv_mem_fd, 0, \ + (HV_VirtAddr)(&val), sizeof(type), addr)) \ + pr_err("PCI: write %zd bytes at 0x%lX failed\n", \ + sizeof(type), addr); \ +} \ +EXPORT_SYMBOL(_tile_write##size) + +TILE_WRITE(b, u8); +TILE_WRITE(w, u16); +TILE_WRITE(l, u32); +TILE_WRITE(q, u64); diff --git a/arch/tile/kernel/proc.c b/arch/tile/kernel/proc.c new file mode 100644 index 00000000..446a7f52 --- /dev/null +++ b/arch/tile/kernel/proc.c @@ -0,0 +1,162 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#include <linux/smp.h> +#include <linux/seq_file.h> +#include <linux/threads.h> +#include <linux/cpumask.h> +#include <linux/timex.h> +#include <linux/delay.h> +#include <linux/fs.h> +#include <linux/proc_fs.h> +#include <linux/sysctl.h> +#include <linux/hardirq.h> +#include <linux/mman.h> +#include <asm/unaligned.h> +#include <asm/pgtable.h> +#include <asm/processor.h> +#include <asm/sections.h> +#include <asm/homecache.h> +#include <asm/hardwall.h> +#include <arch/chip.h> + + +/* + * Support /proc/cpuinfo + */ + +#define cpu_to_ptr(n) ((void *)((long)(n)+1)) +#define ptr_to_cpu(p) ((long)(p) - 1) + +static int show_cpuinfo(struct seq_file *m, void *v) +{ + int n = ptr_to_cpu(v); + + if (n == 0) { + char buf[NR_CPUS*5]; + cpulist_scnprintf(buf, sizeof(buf), cpu_online_mask); + seq_printf(m, "cpu count\t: %d\n", num_online_cpus()); + seq_printf(m, "cpu list\t: %s\n", buf); + seq_printf(m, "model name\t: %s\n", chip_model); + seq_printf(m, "flags\t\t:\n"); /* nothing for now */ + seq_printf(m, "cpu MHz\t\t: %llu.%06llu\n", + get_clock_rate() / 1000000, + (get_clock_rate() % 1000000)); + seq_printf(m, "bogomips\t: %lu.%02lu\n\n", + loops_per_jiffy/(500000/HZ), + (loops_per_jiffy/(5000/HZ)) % 100); + } + +#ifdef CONFIG_SMP + if (!cpu_online(n)) + return 0; +#endif + + seq_printf(m, "processor\t: %d\n", n); + + /* Print only num_online_cpus() blank lines total. */ + if (cpumask_next(n, cpu_online_mask) < nr_cpu_ids) + seq_printf(m, "\n"); + + return 0; +} + +static void *c_start(struct seq_file *m, loff_t *pos) +{ + return *pos < nr_cpu_ids ? cpu_to_ptr(*pos) : NULL; +} +static void *c_next(struct seq_file *m, void *v, loff_t *pos) +{ + ++*pos; + return c_start(m, pos); +} +static void c_stop(struct seq_file *m, void *v) +{ +} +const struct seq_operations cpuinfo_op = { + .start = c_start, + .next = c_next, + .stop = c_stop, + .show = show_cpuinfo, +}; + +/* + * Support /proc/tile directory + */ + +static int __init proc_tile_init(void) +{ + struct proc_dir_entry *root = proc_mkdir("tile", NULL); + if (root == NULL) + return 0; + + proc_tile_hardwall_init(root); + + return 0; +} + +arch_initcall(proc_tile_init); + +/* + * Support /proc/sys/tile directory + */ + +#ifndef __tilegx__ /* FIXME: GX: no support for unaligned access yet */ +static ctl_table unaligned_subtable[] = { + { + .procname = "enabled", + .data = &unaligned_fixup, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + { + .procname = "printk", + .data = &unaligned_printk, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + { + .procname = "count", + .data = &unaligned_fixup_count, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + {} +}; + +static ctl_table unaligned_table[] = { + { + .procname = "unaligned_fixup", + .mode = 0555, + .child = unaligned_subtable + }, + {} +}; + +static struct ctl_path tile_path[] = { + { .procname = "tile" }, + { } +}; + +static int __init proc_sys_tile_init(void) +{ + register_sysctl_paths(tile_path, unaligned_table); + return 0; +} + +arch_initcall(proc_sys_tile_init); +#endif diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c new file mode 100644 index 00000000..54e6c64b --- /dev/null +++ b/arch/tile/kernel/process.c @@ -0,0 +1,749 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#include <linux/sched.h> +#include <linux/preempt.h> +#include <linux/module.h> +#include <linux/fs.h> +#include <linux/kprobes.h> +#include <linux/elfcore.h> +#include <linux/tick.h> +#include <linux/init.h> +#include <linux/mm.h> +#include <linux/compat.h> +#include <linux/hardirq.h> +#include <linux/syscalls.h> +#include <linux/kernel.h> +#include <linux/tracehook.h> +#include <linux/signal.h> +#include <asm/stack.h> +#include <asm/switch_to.h> +#include <asm/homecache.h> +#include <asm/syscalls.h> +#include <asm/traps.h> +#include <asm/setup.h> +#ifdef CONFIG_HARDWALL +#include <asm/hardwall.h> +#endif +#include <arch/chip.h> +#include <arch/abi.h> +#include <arch/sim_def.h> + + +/* + * Use the (x86) "idle=poll" option to prefer low latency when leaving the + * idle loop over low power while in the idle loop, e.g. if we have + * one thread per core and we want to get threads out of futex waits fast. + */ +static int no_idle_nap; +static int __init idle_setup(char *str) +{ + if (!str) + return -EINVAL; + + if (!strcmp(str, "poll")) { + pr_info("using polling idle threads.\n"); + no_idle_nap = 1; + } else if (!strcmp(str, "halt")) + no_idle_nap = 0; + else + return -1; + + return 0; +} +early_param("idle", idle_setup); + +/* + * The idle thread. There's no useful work to be + * done, so just try to conserve power and have a + * low exit latency (ie sit in a loop waiting for + * somebody to say that they'd like to reschedule) + */ +void cpu_idle(void) +{ + int cpu = smp_processor_id(); + + + current_thread_info()->status |= TS_POLLING; + + if (no_idle_nap) { + while (1) { + while (!need_resched()) + cpu_relax(); + schedule(); + } + } + + /* endless idle loop with no priority at all */ + while (1) { + tick_nohz_idle_enter(); + rcu_idle_enter(); + while (!need_resched()) { + if (cpu_is_offline(cpu)) + BUG(); /* no HOTPLUG_CPU */ + + local_irq_disable(); + __get_cpu_var(irq_stat).idle_timestamp = jiffies; + current_thread_info()->status &= ~TS_POLLING; + /* + * TS_POLLING-cleared state must be visible before we + * test NEED_RESCHED: + */ + smp_mb(); + + if (!need_resched()) + _cpu_idle(); + else + local_irq_enable(); + current_thread_info()->status |= TS_POLLING; + } + rcu_idle_exit(); + tick_nohz_idle_exit(); + schedule_preempt_disabled(); + } +} + +struct thread_info *alloc_thread_info_node(struct task_struct *task, int node) +{ + struct page *page; + gfp_t flags = GFP_KERNEL; + +#ifdef CONFIG_DEBUG_STACK_USAGE + flags |= __GFP_ZERO; +#endif + + page = alloc_pages_node(node, flags, THREAD_SIZE_ORDER); + if (!page) + return NULL; + + return (struct thread_info *)page_address(page); +} + +/* + * Free a thread_info node, and all of its derivative + * data structures. + */ +void free_thread_info(struct thread_info *info) +{ + struct single_step_state *step_state = info->step_state; + +#ifdef CONFIG_HARDWALL + /* + * We free a thread_info from the context of the task that has + * been scheduled next, so the original task is already dead. + * Calling deactivate here just frees up the data structures. + * If the task we're freeing held the last reference to a + * hardwall fd, it would have been released prior to this point + * anyway via exit_files(), and "hardwall" would be NULL by now. + */ + if (info->task->thread.hardwall) + hardwall_deactivate(info->task); +#endif + + if (step_state) { + + /* + * FIXME: we don't munmap step_state->buffer + * because the mm_struct for this process (info->task->mm) + * has already been zeroed in exit_mm(). Keeping a + * reference to it here seems like a bad move, so this + * means we can't munmap() the buffer, and therefore if we + * ptrace multiple threads in a process, we will slowly + * leak user memory. (Note that as soon as the last + * thread in a process dies, we will reclaim all user + * memory including single-step buffers in the usual way.) + * We should either assign a kernel VA to this buffer + * somehow, or we should associate the buffer(s) with the + * mm itself so we can clean them up that way. + */ + kfree(step_state); + } + + free_pages((unsigned long)info, THREAD_SIZE_ORDER); +} + +static void save_arch_state(struct thread_struct *t); + +int copy_thread(unsigned long clone_flags, unsigned long sp, + unsigned long stack_size, + struct task_struct *p, struct pt_regs *regs) +{ + struct pt_regs *childregs; + unsigned long ksp; + + /* + * When creating a new kernel thread we pass sp as zero. + * Assign it to a reasonable value now that we have the stack. + */ + if (sp == 0 && regs->ex1 == PL_ICS_EX1(KERNEL_PL, 0)) + sp = KSTK_TOP(p); + + /* + * Do not clone step state from the parent; each thread + * must make its own lazily. + */ + task_thread_info(p)->step_state = NULL; + + /* + * Start new thread in ret_from_fork so it schedules properly + * and then return from interrupt like the parent. + */ + p->thread.pc = (unsigned long) ret_from_fork; + + /* Save user stack top pointer so we can ID the stack vm area later. */ + p->thread.usp0 = sp; + + /* Record the pid of the process that created this one. */ + p->thread.creator_pid = current->pid; + + /* + * Copy the registers onto the kernel stack so the + * return-from-interrupt code will reload it into registers. + */ + childregs = task_pt_regs(p); + *childregs = *regs; + childregs->regs[0] = 0; /* return value is zero */ + childregs->sp = sp; /* override with new user stack pointer */ + + /* + * If CLONE_SETTLS is set, set "tp" in the new task to "r4", + * which is passed in as arg #5 to sys_clone(). + */ + if (clone_flags & CLONE_SETTLS) + childregs->tp = regs->regs[4]; + + /* + * Copy the callee-saved registers from the passed pt_regs struct + * into the context-switch callee-saved registers area. + * This way when we start the interrupt-return sequence, the + * callee-save registers will be correctly in registers, which + * is how we assume the compiler leaves them as we start doing + * the normal return-from-interrupt path after calling C code. + * Zero out the C ABI save area to mark the top of the stack. + */ + ksp = (unsigned long) childregs; + ksp -= C_ABI_SAVE_AREA_SIZE; /* interrupt-entry save area */ + ((long *)ksp)[0] = ((long *)ksp)[1] = 0; + ksp -= CALLEE_SAVED_REGS_COUNT * sizeof(unsigned long); + memcpy((void *)ksp, ®s->regs[CALLEE_SAVED_FIRST_REG], + CALLEE_SAVED_REGS_COUNT * sizeof(unsigned long)); + ksp -= C_ABI_SAVE_AREA_SIZE; /* __switch_to() save area */ + ((long *)ksp)[0] = ((long *)ksp)[1] = 0; + p->thread.ksp = ksp; + +#if CHIP_HAS_TILE_DMA() + /* + * No DMA in the new thread. We model this on the fact that + * fork() clears the pending signals, alarms, and aio for the child. + */ + memset(&p->thread.tile_dma_state, 0, sizeof(struct tile_dma_state)); + memset(&p->thread.dma_async_tlb, 0, sizeof(struct async_tlb)); +#endif + +#if CHIP_HAS_SN_PROC() + /* Likewise, the new thread is not running static processor code. */ + p->thread.sn_proc_running = 0; + memset(&p->thread.sn_async_tlb, 0, sizeof(struct async_tlb)); +#endif + +#if CHIP_HAS_PROC_STATUS_SPR() + /* New thread has its miscellaneous processor state bits clear. */ + p->thread.proc_status = 0; +#endif + +#ifdef CONFIG_HARDWALL + /* New thread does not own any networks. */ + p->thread.hardwall = NULL; +#endif + + + /* + * Start the new thread with the current architecture state + * (user interrupt masks, etc.). + */ + save_arch_state(&p->thread); + + return 0; +} + +/* + * Return "current" if it looks plausible, or else a pointer to a dummy. + * This can be helpful if we are just trying to emit a clean panic. + */ +struct task_struct *validate_current(void) +{ + static struct task_struct corrupt = { .comm = "<corrupt>" }; + struct task_struct *tsk = current; + if (unlikely((unsigned long)tsk < PAGE_OFFSET || + (high_memory && (void *)tsk > high_memory) || + ((unsigned long)tsk & (__alignof__(*tsk) - 1)) != 0)) { + pr_err("Corrupt 'current' %p (sp %#lx)\n", tsk, stack_pointer); + tsk = &corrupt; + } + return tsk; +} + +/* Take and return the pointer to the previous task, for schedule_tail(). */ +struct task_struct *sim_notify_fork(struct task_struct *prev) +{ + struct task_struct *tsk = current; + __insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_OS_FORK_PARENT | + (tsk->thread.creator_pid << _SIM_CONTROL_OPERATOR_BITS)); + __insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_OS_FORK | + (tsk->pid << _SIM_CONTROL_OPERATOR_BITS)); + return prev; +} + +int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs) +{ + struct pt_regs *ptregs = task_pt_regs(tsk); + elf_core_copy_regs(regs, ptregs); + return 1; +} + +#if CHIP_HAS_TILE_DMA() + +/* Allow user processes to access the DMA SPRs */ +void grant_dma_mpls(void) +{ +#if CONFIG_KERNEL_PL == 2 + __insn_mtspr(SPR_MPL_DMA_CPL_SET_1, 1); + __insn_mtspr(SPR_MPL_DMA_NOTIFY_SET_1, 1); +#else + __insn_mtspr(SPR_MPL_DMA_CPL_SET_0, 1); + __insn_mtspr(SPR_MPL_DMA_NOTIFY_SET_0, 1); +#endif +} + +/* Forbid user processes from accessing the DMA SPRs */ +void restrict_dma_mpls(void) +{ +#if CONFIG_KERNEL_PL == 2 + __insn_mtspr(SPR_MPL_DMA_CPL_SET_2, 1); + __insn_mtspr(SPR_MPL_DMA_NOTIFY_SET_2, 1); +#else + __insn_mtspr(SPR_MPL_DMA_CPL_SET_1, 1); + __insn_mtspr(SPR_MPL_DMA_NOTIFY_SET_1, 1); +#endif +} + +/* Pause the DMA engine, then save off its state registers. */ +static void save_tile_dma_state(struct tile_dma_state *dma) +{ + unsigned long state = __insn_mfspr(SPR_DMA_USER_STATUS); + unsigned long post_suspend_state; + + /* If we're running, suspend the engine. */ + if ((state & DMA_STATUS_MASK) == SPR_DMA_STATUS__RUNNING_MASK) + __insn_mtspr(SPR_DMA_CTR, SPR_DMA_CTR__SUSPEND_MASK); + + /* + * Wait for the engine to idle, then save regs. Note that we + * want to record the "running" bit from before suspension, + * and the "done" bit from after, so that we can properly + * distinguish a case where the user suspended the engine from + * the case where the kernel suspended as part of the context + * swap. + */ + do { + post_suspend_state = __insn_mfspr(SPR_DMA_USER_STATUS); + } while (post_suspend_state & SPR_DMA_STATUS__BUSY_MASK); + + dma->src = __insn_mfspr(SPR_DMA_SRC_ADDR); + dma->src_chunk = __insn_mfspr(SPR_DMA_SRC_CHUNK_ADDR); + dma->dest = __insn_mfspr(SPR_DMA_DST_ADDR); + dma->dest_chunk = __insn_mfspr(SPR_DMA_DST_CHUNK_ADDR); + dma->strides = __insn_mfspr(SPR_DMA_STRIDE); + dma->chunk_size = __insn_mfspr(SPR_DMA_CHUNK_SIZE); + dma->byte = __insn_mfspr(SPR_DMA_BYTE); + dma->status = (state & SPR_DMA_STATUS__RUNNING_MASK) | + (post_suspend_state & SPR_DMA_STATUS__DONE_MASK); +} + +/* Restart a DMA that was running before we were context-switched out. */ +static void restore_tile_dma_state(struct thread_struct *t) +{ + const struct tile_dma_state *dma = &t->tile_dma_state; + + /* + * The only way to restore the done bit is to run a zero + * length transaction. + */ + if ((dma->status & SPR_DMA_STATUS__DONE_MASK) && + !(__insn_mfspr(SPR_DMA_USER_STATUS) & SPR_DMA_STATUS__DONE_MASK)) { + __insn_mtspr(SPR_DMA_BYTE, 0); + __insn_mtspr(SPR_DMA_CTR, SPR_DMA_CTR__REQUEST_MASK); + while (__insn_mfspr(SPR_DMA_USER_STATUS) & + SPR_DMA_STATUS__BUSY_MASK) + ; + } + + __insn_mtspr(SPR_DMA_SRC_ADDR, dma->src); + __insn_mtspr(SPR_DMA_SRC_CHUNK_ADDR, dma->src_chunk); + __insn_mtspr(SPR_DMA_DST_ADDR, dma->dest); + __insn_mtspr(SPR_DMA_DST_CHUNK_ADDR, dma->dest_chunk); + __insn_mtspr(SPR_DMA_STRIDE, dma->strides); + __insn_mtspr(SPR_DMA_CHUNK_SIZE, dma->chunk_size); + __insn_mtspr(SPR_DMA_BYTE, dma->byte); + + /* + * Restart the engine if we were running and not done. + * Clear a pending async DMA fault that we were waiting on return + * to user space to execute, since we expect the DMA engine + * to regenerate those faults for us now. Note that we don't + * try to clear the TIF_ASYNC_TLB flag, since it's relatively + * harmless if set, and it covers both DMA and the SN processor. + */ + if ((dma->status & DMA_STATUS_MASK) == SPR_DMA_STATUS__RUNNING_MASK) { + t->dma_async_tlb.fault_num = 0; + __insn_mtspr(SPR_DMA_CTR, SPR_DMA_CTR__REQUEST_MASK); + } +} + +#endif + +static void save_arch_state(struct thread_struct *t) +{ +#if CHIP_HAS_SPLIT_INTR_MASK() + t->interrupt_mask = __insn_mfspr(SPR_INTERRUPT_MASK_0_0) | + ((u64)__insn_mfspr(SPR_INTERRUPT_MASK_0_1) << 32); +#else + t->interrupt_mask = __insn_mfspr(SPR_INTERRUPT_MASK_0); +#endif + t->ex_context[0] = __insn_mfspr(SPR_EX_CONTEXT_0_0); + t->ex_context[1] = __insn_mfspr(SPR_EX_CONTEXT_0_1); + t->system_save[0] = __insn_mfspr(SPR_SYSTEM_SAVE_0_0); + t->system_save[1] = __insn_mfspr(SPR_SYSTEM_SAVE_0_1); + t->system_save[2] = __insn_mfspr(SPR_SYSTEM_SAVE_0_2); + t->system_save[3] = __insn_mfspr(SPR_SYSTEM_SAVE_0_3); + t->intctrl_0 = __insn_mfspr(SPR_INTCTRL_0_STATUS); +#if CHIP_HAS_PROC_STATUS_SPR() + t->proc_status = __insn_mfspr(SPR_PROC_STATUS); +#endif +#if !CHIP_HAS_FIXED_INTVEC_BASE() + t->interrupt_vector_base = __insn_mfspr(SPR_INTERRUPT_VECTOR_BASE_0); +#endif +#if CHIP_HAS_TILE_RTF_HWM() + t->tile_rtf_hwm = __insn_mfspr(SPR_TILE_RTF_HWM); +#endif +#if CHIP_HAS_DSTREAM_PF() + t->dstream_pf = __insn_mfspr(SPR_DSTREAM_PF); +#endif +} + +static void restore_arch_state(const struct thread_struct *t) +{ +#if CHIP_HAS_SPLIT_INTR_MASK() + __insn_mtspr(SPR_INTERRUPT_MASK_0_0, (u32) t->interrupt_mask); + __insn_mtspr(SPR_INTERRUPT_MASK_0_1, t->interrupt_mask >> 32); +#else + __insn_mtspr(SPR_INTERRUPT_MASK_0, t->interrupt_mask); +#endif + __insn_mtspr(SPR_EX_CONTEXT_0_0, t->ex_context[0]); + __insn_mtspr(SPR_EX_CONTEXT_0_1, t->ex_context[1]); + __insn_mtspr(SPR_SYSTEM_SAVE_0_0, t->system_save[0]); + __insn_mtspr(SPR_SYSTEM_SAVE_0_1, t->system_save[1]); + __insn_mtspr(SPR_SYSTEM_SAVE_0_2, t->system_save[2]); + __insn_mtspr(SPR_SYSTEM_SAVE_0_3, t->system_save[3]); + __insn_mtspr(SPR_INTCTRL_0_STATUS, t->intctrl_0); +#if CHIP_HAS_PROC_STATUS_SPR() + __insn_mtspr(SPR_PROC_STATUS, t->proc_status); +#endif +#if !CHIP_HAS_FIXED_INTVEC_BASE() + __insn_mtspr(SPR_INTERRUPT_VECTOR_BASE_0, t->interrupt_vector_base); +#endif +#if CHIP_HAS_TILE_RTF_HWM() + __insn_mtspr(SPR_TILE_RTF_HWM, t->tile_rtf_hwm); +#endif +#if CHIP_HAS_DSTREAM_PF() + __insn_mtspr(SPR_DSTREAM_PF, t->dstream_pf); +#endif +} + + +void _prepare_arch_switch(struct task_struct *next) +{ +#if CHIP_HAS_SN_PROC() + int snctl; +#endif +#if CHIP_HAS_TILE_DMA() + struct tile_dma_state *dma = ¤t->thread.tile_dma_state; + if (dma->enabled) + save_tile_dma_state(dma); +#endif +#if CHIP_HAS_SN_PROC() + /* + * Suspend the static network processor if it was running. + * We do not suspend the fabric itself, just like we don't + * try to suspend the UDN. + */ + snctl = __insn_mfspr(SPR_SNCTL); + current->thread.sn_proc_running = + (snctl & SPR_SNCTL__FRZPROC_MASK) == 0; + if (current->thread.sn_proc_running) + __insn_mtspr(SPR_SNCTL, snctl | SPR_SNCTL__FRZPROC_MASK); +#endif +} + + +struct task_struct *__sched _switch_to(struct task_struct *prev, + struct task_struct *next) +{ + /* DMA state is already saved; save off other arch state. */ + save_arch_state(&prev->thread); + +#if CHIP_HAS_TILE_DMA() + /* + * Restore DMA in new task if desired. + * Note that it is only safe to restart here since interrupts + * are disabled, so we can't take any DMATLB miss or access + * interrupts before we have finished switching stacks. + */ + if (next->thread.tile_dma_state.enabled) { + restore_tile_dma_state(&next->thread); + grant_dma_mpls(); + } else { + restrict_dma_mpls(); + } +#endif + + /* Restore other arch state. */ + restore_arch_state(&next->thread); + +#if CHIP_HAS_SN_PROC() + /* + * Restart static network processor in the new process + * if it was running before. + */ + if (next->thread.sn_proc_running) { + int snctl = __insn_mfspr(SPR_SNCTL); + __insn_mtspr(SPR_SNCTL, snctl & ~SPR_SNCTL__FRZPROC_MASK); + } +#endif + +#ifdef CONFIG_HARDWALL + /* Enable or disable access to the network registers appropriately. */ + if (prev->thread.hardwall != NULL) { + if (next->thread.hardwall == NULL) + restrict_network_mpls(); + } else if (next->thread.hardwall != NULL) { + grant_network_mpls(); + } +#endif + + /* + * Switch kernel SP, PC, and callee-saved registers. + * In the context of the new task, return the old task pointer + * (i.e. the task that actually called __switch_to). + * Pass the value to use for SYSTEM_SAVE_K_0 when we reset our sp. + */ + return __switch_to(prev, next, next_current_ksp0(next)); +} + +/* + * This routine is called on return from interrupt if any of the + * TIF_WORK_MASK flags are set in thread_info->flags. It is + * entered with interrupts disabled so we don't miss an event + * that modified the thread_info flags. If any flag is set, we + * handle it and return, and the calling assembly code will + * re-disable interrupts, reload the thread flags, and call back + * if more flags need to be handled. + * + * We return whether we need to check the thread_info flags again + * or not. Note that we don't clear TIF_SINGLESTEP here, so it's + * important that it be tested last, and then claim that we don't + * need to recheck the flags. + */ +int do_work_pending(struct pt_regs *regs, u32 thread_info_flags) +{ + /* If we enter in kernel mode, do nothing and exit the caller loop. */ + if (!user_mode(regs)) + return 0; + + if (thread_info_flags & _TIF_NEED_RESCHED) { + schedule(); + return 1; + } +#if CHIP_HAS_TILE_DMA() || CHIP_HAS_SN_PROC() + if (thread_info_flags & _TIF_ASYNC_TLB) { + do_async_page_fault(regs); + return 1; + } +#endif + if (thread_info_flags & _TIF_SIGPENDING) { + do_signal(regs); + return 1; + } + if (thread_info_flags & _TIF_NOTIFY_RESUME) { + clear_thread_flag(TIF_NOTIFY_RESUME); + tracehook_notify_resume(regs); + if (current->replacement_session_keyring) + key_replace_session_keyring(); + return 1; + } + if (thread_info_flags & _TIF_SINGLESTEP) { + single_step_once(regs); + return 0; + } + panic("work_pending: bad flags %#x\n", thread_info_flags); +} + +/* Note there is an implicit fifth argument if (clone_flags & CLONE_SETTLS). */ +SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp, + void __user *, parent_tidptr, void __user *, child_tidptr, + struct pt_regs *, regs) +{ + if (!newsp) + newsp = regs->sp; + return do_fork(clone_flags, newsp, regs, 0, + parent_tidptr, child_tidptr); +} + +/* + * sys_execve() executes a new program. + */ +SYSCALL_DEFINE4(execve, const char __user *, path, + const char __user *const __user *, argv, + const char __user *const __user *, envp, + struct pt_regs *, regs) +{ + long error; + char *filename; + + filename = getname(path); + error = PTR_ERR(filename); + if (IS_ERR(filename)) + goto out; + error = do_execve(filename, argv, envp, regs); + putname(filename); + if (error == 0) + single_step_execve(); +out: + return error; +} + +#ifdef CONFIG_COMPAT +long compat_sys_execve(const char __user *path, + compat_uptr_t __user *argv, + compat_uptr_t __user *envp, + struct pt_regs *regs) +{ + long error; + char *filename; + + filename = getname(path); + error = PTR_ERR(filename); + if (IS_ERR(filename)) + goto out; + error = compat_do_execve(filename, argv, envp, regs); + putname(filename); + if (error == 0) + single_step_execve(); +out: + return error; +} +#endif + +unsigned long get_wchan(struct task_struct *p) +{ + struct KBacktraceIterator kbt; + + if (!p || p == current || p->state == TASK_RUNNING) + return 0; + + for (KBacktraceIterator_init(&kbt, p, NULL); + !KBacktraceIterator_end(&kbt); + KBacktraceIterator_next(&kbt)) { + if (!in_sched_functions(kbt.it.pc)) + return kbt.it.pc; + } + + return 0; +} + +/* + * We pass in lr as zero (cleared in kernel_thread) and the caller + * part of the backtrace ABI on the stack also zeroed (in copy_thread) + * so that backtraces will stop with this function. + * Note that we don't use r0, since copy_thread() clears it. + */ +static void start_kernel_thread(int dummy, int (*fn)(int), int arg) +{ + do_exit(fn(arg)); +} + +/* + * Create a kernel thread + */ +int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) +{ + struct pt_regs regs; + + memset(®s, 0, sizeof(regs)); + regs.ex1 = PL_ICS_EX1(KERNEL_PL, 0); /* run at kernel PL, no ICS */ + regs.pc = (long) start_kernel_thread; + regs.flags = PT_FLAGS_CALLER_SAVES; /* need to restore r1 and r2 */ + regs.regs[1] = (long) fn; /* function pointer */ + regs.regs[2] = (long) arg; /* parameter register */ + + /* Ok, create the new process.. */ + return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, ®s, + 0, NULL, NULL); +} +EXPORT_SYMBOL(kernel_thread); + +/* Flush thread state. */ +void flush_thread(void) +{ + /* Nothing */ +} + +/* + * Free current thread data structures etc.. + */ +void exit_thread(void) +{ + /* Nothing */ +} + +void show_regs(struct pt_regs *regs) +{ + struct task_struct *tsk = validate_current(); + int i; + + pr_err("\n"); + pr_err(" Pid: %d, comm: %20s, CPU: %d\n", + tsk->pid, tsk->comm, smp_processor_id()); +#ifdef __tilegx__ + for (i = 0; i < 51; i += 3) + pr_err(" r%-2d: "REGFMT" r%-2d: "REGFMT" r%-2d: "REGFMT"\n", + i, regs->regs[i], i+1, regs->regs[i+1], + i+2, regs->regs[i+2]); + pr_err(" r51: "REGFMT" r52: "REGFMT" tp : "REGFMT"\n", + regs->regs[51], regs->regs[52], regs->tp); + pr_err(" sp : "REGFMT" lr : "REGFMT"\n", regs->sp, regs->lr); +#else + for (i = 0; i < 52; i += 4) + pr_err(" r%-2d: "REGFMT" r%-2d: "REGFMT + " r%-2d: "REGFMT" r%-2d: "REGFMT"\n", + i, regs->regs[i], i+1, regs->regs[i+1], + i+2, regs->regs[i+2], i+3, regs->regs[i+3]); + pr_err(" r52: "REGFMT" tp : "REGFMT" sp : "REGFMT" lr : "REGFMT"\n", + regs->regs[52], regs->tp, regs->sp, regs->lr); +#endif + pr_err(" pc : "REGFMT" ex1: %ld faultnum: %ld\n", + regs->pc, regs->ex1, regs->faultnum); + + dump_stack_regs(regs); +} diff --git a/arch/tile/kernel/ptrace.c b/arch/tile/kernel/ptrace.c new file mode 100644 index 00000000..e92e4052 --- /dev/null +++ b/arch/tile/kernel/ptrace.c @@ -0,0 +1,205 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * Copied from i386: Ross Biro 1/23/92 + */ + +#include <linux/kernel.h> +#include <linux/ptrace.h> +#include <linux/kprobes.h> +#include <linux/compat.h> +#include <linux/uaccess.h> +#include <asm/traps.h> + +void user_enable_single_step(struct task_struct *child) +{ + set_tsk_thread_flag(child, TIF_SINGLESTEP); +} + +void user_disable_single_step(struct task_struct *child) +{ + clear_tsk_thread_flag(child, TIF_SINGLESTEP); +} + +/* + * Called by kernel/ptrace.c when detaching.. + */ +void ptrace_disable(struct task_struct *child) +{ + clear_tsk_thread_flag(child, TIF_SINGLESTEP); + + /* + * These two are currently unused, but will be set by arch_ptrace() + * and used in the syscall assembly when we do support them. + */ + clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); +} + +long arch_ptrace(struct task_struct *child, long request, + unsigned long addr, unsigned long data) +{ + unsigned long __user *datap = (long __user __force *)data; + unsigned long tmp; + long ret = -EIO; + char *childreg; + struct pt_regs copyregs; + int ex1_offset; + + switch (request) { + + case PTRACE_PEEKUSR: /* Read register from pt_regs. */ + if (addr >= PTREGS_SIZE) + break; + childreg = (char *)task_pt_regs(child) + addr; +#ifdef CONFIG_COMPAT + if (is_compat_task()) { + if (addr & (sizeof(compat_long_t)-1)) + break; + ret = put_user(*(compat_long_t *)childreg, + (compat_long_t __user *)datap); + } else +#endif + { + if (addr & (sizeof(long)-1)) + break; + ret = put_user(*(long *)childreg, datap); + } + break; + + case PTRACE_POKEUSR: /* Write register in pt_regs. */ + if (addr >= PTREGS_SIZE) + break; + childreg = (char *)task_pt_regs(child) + addr; + + /* Guard against overwrites of the privilege level. */ + ex1_offset = PTREGS_OFFSET_EX1; +#if defined(CONFIG_COMPAT) && defined(__BIG_ENDIAN) + if (is_compat_task()) /* point at low word */ + ex1_offset += sizeof(compat_long_t); +#endif + if (addr == ex1_offset) + data = PL_ICS_EX1(USER_PL, EX1_ICS(data)); + +#ifdef CONFIG_COMPAT + if (is_compat_task()) { + if (addr & (sizeof(compat_long_t)-1)) + break; + *(compat_long_t *)childreg = data; + } else +#endif + { + if (addr & (sizeof(long)-1)) + break; + *(long *)childreg = data; + } + ret = 0; + break; + + case PTRACE_GETREGS: /* Get all registers from the child. */ + if (copy_to_user(datap, task_pt_regs(child), + sizeof(struct pt_regs)) == 0) { + ret = 0; + } + break; + + case PTRACE_SETREGS: /* Set all registers in the child. */ + if (copy_from_user(©regs, datap, + sizeof(struct pt_regs)) == 0) { + copyregs.ex1 = + PL_ICS_EX1(USER_PL, EX1_ICS(copyregs.ex1)); + *task_pt_regs(child) = copyregs; + ret = 0; + } + break; + + case PTRACE_GETFPREGS: /* Get the child FPU state. */ + case PTRACE_SETFPREGS: /* Set the child FPU state. */ + break; + + case PTRACE_SETOPTIONS: + /* Support TILE-specific ptrace options. */ + child->ptrace &= ~PT_TRACE_MASK_TILE; + tmp = data & PTRACE_O_MASK_TILE; + data &= ~PTRACE_O_MASK_TILE; + ret = ptrace_request(child, request, addr, data); + if (tmp & PTRACE_O_TRACEMIGRATE) + child->ptrace |= PT_TRACE_MIGRATE; + break; + + default: +#ifdef CONFIG_COMPAT + if (task_thread_info(current)->status & TS_COMPAT) { + ret = compat_ptrace_request(child, request, + addr, data); + break; + } +#endif + ret = ptrace_request(child, request, addr, data); + break; + } + + return ret; +} + +#ifdef CONFIG_COMPAT +/* Not used; we handle compat issues in arch_ptrace() directly. */ +long compat_arch_ptrace(struct task_struct *child, compat_long_t request, + compat_ulong_t addr, compat_ulong_t data) +{ + BUG(); +} +#endif + +void do_syscall_trace(void) +{ + if (!test_thread_flag(TIF_SYSCALL_TRACE)) + return; + + if (!(current->ptrace & PT_PTRACED)) + return; + + /* + * The 0x80 provides a way for the tracing parent to distinguish + * between a syscall stop and SIGTRAP delivery + */ + ptrace_notify(SIGTRAP|((current->ptrace & PT_TRACESYSGOOD) ? 0x80 : 0)); + + /* + * this isn't the same as continuing with a signal, but it will do + * for normal use. strace only continues with a signal if the + * stopping signal is not SIGTRAP. -brl + */ + if (current->exit_code) { + send_sig(current->exit_code, current, 1); + current->exit_code = 0; + } +} + +void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code) +{ + struct siginfo info; + + memset(&info, 0, sizeof(info)); + info.si_signo = SIGTRAP; + info.si_code = TRAP_BRKPT; + info.si_addr = (void __user *) regs->pc; + + /* Send us the fakey SIGTRAP */ + force_sig_info(SIGTRAP, &info, tsk); +} + +/* Handle synthetic interrupt delivered only by the simulator. */ +void __kprobes do_breakpoint(struct pt_regs* regs, int fault_num) +{ + send_sigtrap(current, regs, fault_num); +} diff --git a/arch/tile/kernel/reboot.c b/arch/tile/kernel/reboot.c new file mode 100644 index 00000000..baa3d905 --- /dev/null +++ b/arch/tile/kernel/reboot.c @@ -0,0 +1,51 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#include <linux/stddef.h> +#include <linux/reboot.h> +#include <linux/smp.h> +#include <linux/pm.h> +#include <asm/page.h> +#include <asm/setup.h> +#include <hv/hypervisor.h> + +#ifndef CONFIG_SMP +#define smp_send_stop() +#endif + +void machine_halt(void) +{ + warn_early_printk(); + arch_local_irq_disable_all(); + smp_send_stop(); + hv_halt(); +} + +void machine_power_off(void) +{ + warn_early_printk(); + arch_local_irq_disable_all(); + smp_send_stop(); + hv_power_off(); +} + +void machine_restart(char *cmd) +{ + arch_local_irq_disable_all(); + smp_send_stop(); + hv_restart((HV_VirtAddr) "vmlinux", (HV_VirtAddr) cmd); +} + +/* No interesting distinction to be made here. */ +void (*pm_power_off)(void) = NULL; diff --git a/arch/tile/kernel/regs_32.S b/arch/tile/kernel/regs_32.S new file mode 100644 index 00000000..c12280c2 --- /dev/null +++ b/arch/tile/kernel/regs_32.S @@ -0,0 +1,145 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#include <linux/linkage.h> +#include <asm/ptrace.h> +#include <asm/asm-offsets.h> +#include <arch/spr_def.h> +#include <asm/processor.h> +#include <asm/switch_to.h> + +/* + * See <asm/system.h>; called with prev and next task_struct pointers. + * "prev" is returned in r0 for _switch_to and also for ret_from_fork. + * + * We want to save pc/sp in "prev", and get the new pc/sp from "next". + * We also need to save all the callee-saved registers on the stack. + * + * Intel enables/disables access to the hardware cycle counter in + * seccomp (secure computing) environments if necessary, based on + * has_secure_computing(). We might want to do this at some point, + * though it would require virtualizing the other SPRs under WORLD_ACCESS. + * + * Since we're saving to the stack, we omit sp from this list. + * And for parallels with other architectures, we save lr separately, + * in the thread_struct itself (as the "pc" field). + * + * This code also needs to be aligned with process.c copy_thread() + */ + +#if CALLEE_SAVED_REGS_COUNT != 24 +# error Mismatch between <asm/system.h> and kernel/entry.S +#endif +#define FRAME_SIZE ((2 + CALLEE_SAVED_REGS_COUNT) * 4) + +#define SAVE_REG(r) { sw r12, r; addi r12, r12, 4 } +#define LOAD_REG(r) { lw r, r12; addi r12, r12, 4 } +#define FOR_EACH_CALLEE_SAVED_REG(f) \ + f(r30); f(r31); \ + f(r32); f(r33); f(r34); f(r35); f(r36); f(r37); f(r38); f(r39); \ + f(r40); f(r41); f(r42); f(r43); f(r44); f(r45); f(r46); f(r47); \ + f(r48); f(r49); f(r50); f(r51); f(r52); + +STD_ENTRY_SECTION(__switch_to, .sched.text) + { + move r10, sp + sw sp, lr + addi sp, sp, -FRAME_SIZE + } + { + addi r11, sp, 4 + addi r12, sp, 8 + } + { + sw r11, r10 + addli r4, r1, TASK_STRUCT_THREAD_KSP_OFFSET + } + { + lw r13, r4 /* Load new sp to a temp register early. */ + addli r3, r0, TASK_STRUCT_THREAD_KSP_OFFSET + } + FOR_EACH_CALLEE_SAVED_REG(SAVE_REG) + { + sw r3, sp + addli r3, r0, TASK_STRUCT_THREAD_PC_OFFSET + } + { + sw r3, lr + addli r4, r1, TASK_STRUCT_THREAD_PC_OFFSET + } + { + lw lr, r4 + addi r12, r13, 8 + } + { + /* Update sp and ksp0 simultaneously to avoid backtracer warnings. */ + move sp, r13 + mtspr SPR_SYSTEM_SAVE_K_0, r2 + } + FOR_EACH_CALLEE_SAVED_REG(LOAD_REG) +.L__switch_to_pc: + { + addi sp, sp, FRAME_SIZE + jrp lr /* r0 is still valid here, so return it */ + } + STD_ENDPROC(__switch_to) + +/* Return a suitable address for the backtracer for suspended threads */ +STD_ENTRY_SECTION(get_switch_to_pc, .sched.text) + lnk r0 + { + addli r0, r0, .L__switch_to_pc - . + jrp lr + } + STD_ENDPROC(get_switch_to_pc) + +STD_ENTRY(get_pt_regs) + .irp reg, r0, r1, r2, r3, r4, r5, r6, r7, \ + r8, r9, r10, r11, r12, r13, r14, r15, \ + r16, r17, r18, r19, r20, r21, r22, r23, \ + r24, r25, r26, r27, r28, r29, r30, r31, \ + r32, r33, r34, r35, r36, r37, r38, r39, \ + r40, r41, r42, r43, r44, r45, r46, r47, \ + r48, r49, r50, r51, r52, tp, sp + { + sw r0, \reg + addi r0, r0, 4 + } + .endr + { + sw r0, lr + addi r0, r0, PTREGS_OFFSET_PC - PTREGS_OFFSET_LR + } + lnk r1 + { + sw r0, r1 + addi r0, r0, PTREGS_OFFSET_EX1 - PTREGS_OFFSET_PC + } + mfspr r1, INTERRUPT_CRITICAL_SECTION + shli r1, r1, SPR_EX_CONTEXT_1_1__ICS_SHIFT + ori r1, r1, KERNEL_PL + { + sw r0, r1 + addi r0, r0, PTREGS_OFFSET_FAULTNUM - PTREGS_OFFSET_EX1 + } + { + sw r0, zero /* clear faultnum */ + addi r0, r0, PTREGS_OFFSET_ORIG_R0 - PTREGS_OFFSET_FAULTNUM + } + { + sw r0, zero /* clear orig_r0 */ + addli r0, r0, -PTREGS_OFFSET_ORIG_R0 /* restore r0 to base */ + } + jrp lr + STD_ENDPROC(get_pt_regs) diff --git a/arch/tile/kernel/regs_64.S b/arch/tile/kernel/regs_64.S new file mode 100644 index 00000000..0829fd01 --- /dev/null +++ b/arch/tile/kernel/regs_64.S @@ -0,0 +1,145 @@ +/* + * Copyright 2011 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#include <linux/linkage.h> +#include <asm/ptrace.h> +#include <asm/asm-offsets.h> +#include <arch/spr_def.h> +#include <asm/processor.h> +#include <asm/switch_to.h> + +/* + * See <asm/system.h>; called with prev and next task_struct pointers. + * "prev" is returned in r0 for _switch_to and also for ret_from_fork. + * + * We want to save pc/sp in "prev", and get the new pc/sp from "next". + * We also need to save all the callee-saved registers on the stack. + * + * Intel enables/disables access to the hardware cycle counter in + * seccomp (secure computing) environments if necessary, based on + * has_secure_computing(). We might want to do this at some point, + * though it would require virtualizing the other SPRs under WORLD_ACCESS. + * + * Since we're saving to the stack, we omit sp from this list. + * And for parallels with other architectures, we save lr separately, + * in the thread_struct itself (as the "pc" field). + * + * This code also needs to be aligned with process.c copy_thread() + */ + +#if CALLEE_SAVED_REGS_COUNT != 24 +# error Mismatch between <asm/system.h> and kernel/entry.S +#endif +#define FRAME_SIZE ((2 + CALLEE_SAVED_REGS_COUNT) * 8) + +#define SAVE_REG(r) { st r12, r; addi r12, r12, 8 } +#define LOAD_REG(r) { ld r, r12; addi r12, r12, 8 } +#define FOR_EACH_CALLEE_SAVED_REG(f) \ + f(r30); f(r31); \ + f(r32); f(r33); f(r34); f(r35); f(r36); f(r37); f(r38); f(r39); \ + f(r40); f(r41); f(r42); f(r43); f(r44); f(r45); f(r46); f(r47); \ + f(r48); f(r49); f(r50); f(r51); f(r52); + +STD_ENTRY_SECTION(__switch_to, .sched.text) + { + move r10, sp + st sp, lr + } + { + addli r11, sp, -FRAME_SIZE + 8 + addli sp, sp, -FRAME_SIZE + } + { + st r11, r10 + addli r4, r1, TASK_STRUCT_THREAD_KSP_OFFSET + } + { + ld r13, r4 /* Load new sp to a temp register early. */ + addi r12, sp, 16 + } + FOR_EACH_CALLEE_SAVED_REG(SAVE_REG) + addli r3, r0, TASK_STRUCT_THREAD_KSP_OFFSET + { + st r3, sp + addli r3, r0, TASK_STRUCT_THREAD_PC_OFFSET + } + { + st r3, lr + addli r4, r1, TASK_STRUCT_THREAD_PC_OFFSET + } + { + ld lr, r4 + addi r12, r13, 16 + } + { + /* Update sp and ksp0 simultaneously to avoid backtracer warnings. */ + move sp, r13 + mtspr SPR_SYSTEM_SAVE_K_0, r2 + } + FOR_EACH_CALLEE_SAVED_REG(LOAD_REG) +.L__switch_to_pc: + { + addli sp, sp, FRAME_SIZE + jrp lr /* r0 is still valid here, so return it */ + } + STD_ENDPROC(__switch_to) + +/* Return a suitable address for the backtracer for suspended threads */ +STD_ENTRY_SECTION(get_switch_to_pc, .sched.text) + lnk r0 + { + addli r0, r0, .L__switch_to_pc - . + jrp lr + } + STD_ENDPROC(get_switch_to_pc) + +STD_ENTRY(get_pt_regs) + .irp reg, r0, r1, r2, r3, r4, r5, r6, r7, \ + r8, r9, r10, r11, r12, r13, r14, r15, \ + r16, r17, r18, r19, r20, r21, r22, r23, \ + r24, r25, r26, r27, r28, r29, r30, r31, \ + r32, r33, r34, r35, r36, r37, r38, r39, \ + r40, r41, r42, r43, r44, r45, r46, r47, \ + r48, r49, r50, r51, r52, tp, sp + { + st r0, \reg + addi r0, r0, 8 + } + .endr + { + st r0, lr + addi r0, r0, PTREGS_OFFSET_PC - PTREGS_OFFSET_LR + } + lnk r1 + { + st r0, r1 + addi r0, r0, PTREGS_OFFSET_EX1 - PTREGS_OFFSET_PC + } + mfspr r1, INTERRUPT_CRITICAL_SECTION + shli r1, r1, SPR_EX_CONTEXT_1_1__ICS_SHIFT + ori r1, r1, KERNEL_PL + { + st r0, r1 + addi r0, r0, PTREGS_OFFSET_FAULTNUM - PTREGS_OFFSET_EX1 + } + { + st r0, zero /* clear faultnum */ + addi r0, r0, PTREGS_OFFSET_ORIG_R0 - PTREGS_OFFSET_FAULTNUM + } + { + st r0, zero /* clear orig_r0 */ + addli r0, r0, -PTREGS_OFFSET_ORIG_R0 /* restore r0 to base */ + } + jrp lr + STD_ENDPROC(get_pt_regs) diff --git a/arch/tile/kernel/relocate_kernel.S b/arch/tile/kernel/relocate_kernel.S new file mode 100644 index 00000000..010b4185 --- /dev/null +++ b/arch/tile/kernel/relocate_kernel.S @@ -0,0 +1,280 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * copy new kernel into place and then call hv_reexec + * + */ + +#include <linux/linkage.h> +#include <arch/chip.h> +#include <asm/page.h> +#include <hv/hypervisor.h> + +#define ___hvb MEM_SV_INTRPT + HV_GLUE_START_CPA + +#define ___hv_dispatch(f) (___hvb + (HV_DISPATCH_ENTRY_SIZE * f)) + +#define ___hv_console_putc ___hv_dispatch(HV_DISPATCH_CONSOLE_PUTC) +#define ___hv_halt ___hv_dispatch(HV_DISPATCH_HALT) +#define ___hv_reexec ___hv_dispatch(HV_DISPATCH_REEXEC) +#define ___hv_flush_remote ___hv_dispatch(HV_DISPATCH_FLUSH_REMOTE) + +#undef RELOCATE_NEW_KERNEL_VERBOSE + +STD_ENTRY(relocate_new_kernel) + + move r30, r0 /* page list */ + move r31, r1 /* address of page we are on */ + move r32, r2 /* start address of new kernel */ + + shri r1, r1, PAGE_SHIFT + addi r1, r1, 1 + shli sp, r1, PAGE_SHIFT + addi sp, sp, -8 + /* we now have a stack (whether we need one or not) */ + + moveli r40, lo16(___hv_console_putc) + auli r40, r40, ha16(___hv_console_putc) + +#ifdef RELOCATE_NEW_KERNEL_VERBOSE + moveli r0, 'r' + jalr r40 + + moveli r0, '_' + jalr r40 + + moveli r0, 'n' + jalr r40 + + moveli r0, '_' + jalr r40 + + moveli r0, 'k' + jalr r40 + + moveli r0, '\n' + jalr r40 +#endif + + /* + * Throughout this code r30 is pointer to the element of page + * list we are working on. + * + * Normally we get to the next element of the page list by + * incrementing r30 by four. The exception is if the element + * on the page list is an IND_INDIRECTION in which case we use + * the element with the low bits masked off as the new value + * of r30. + * + * To get this started, we need the value passed to us (which + * will always be an IND_INDIRECTION) in memory somewhere with + * r30 pointing at it. To do that, we push the value passed + * to us on the stack and make r30 point to it. + */ + + sw sp, r30 + move r30, sp + addi sp, sp, -8 + +#if CHIP_HAS_CBOX_HOME_MAP() + /* + * On TILEPro, we need to flush all tiles' caches, since we may + * have been doing hash-for-home caching there. Note that we + * must do this _after_ we're completely done modifying any memory + * other than our output buffer (which we know is locally cached). + * We want the caches to be fully clean when we do the reexec, + * because the hypervisor is going to do this flush again at that + * point, and we don't want that second flush to overwrite any memory. + */ + { + move r0, zero /* cache_pa */ + move r1, zero + } + { + auli r2, zero, ha16(HV_FLUSH_EVICT_L2) /* cache_control */ + movei r3, -1 /* cache_cpumask; -1 means all client tiles */ + } + { + move r4, zero /* tlb_va */ + move r5, zero /* tlb_length */ + } + { + move r6, zero /* tlb_pgsize */ + move r7, zero /* tlb_cpumask */ + } + { + move r8, zero /* asids */ + moveli r20, lo16(___hv_flush_remote) + } + { + move r9, zero /* asidcount */ + auli r20, r20, ha16(___hv_flush_remote) + } + + jalr r20 +#endif + + /* r33 is destination pointer, default to zero */ + + moveli r33, 0 + +.Lloop: lw r10, r30 + + andi r9, r10, 0xf /* low 4 bits tell us what type it is */ + xor r10, r10, r9 /* r10 is now value with low 4 bits stripped */ + + seqi r0, r9, 0x1 /* IND_DESTINATION */ + bzt r0, .Ltry2 + + move r33, r10 + +#ifdef RELOCATE_NEW_KERNEL_VERBOSE + moveli r0, 'd' + jalr r40 +#endif + + addi r30, r30, 4 + j .Lloop + +.Ltry2: + seqi r0, r9, 0x2 /* IND_INDIRECTION */ + bzt r0, .Ltry4 + + move r30, r10 + +#ifdef RELOCATE_NEW_KERNEL_VERBOSE + moveli r0, 'i' + jalr r40 +#endif + + j .Lloop + +.Ltry4: + seqi r0, r9, 0x4 /* IND_DONE */ + bzt r0, .Ltry8 + + mf + +#ifdef RELOCATE_NEW_KERNEL_VERBOSE + moveli r0, 'D' + jalr r40 + moveli r0, '\n' + jalr r40 +#endif + + move r0, r32 + moveli r1, 0 /* arg to hv_reexec is 64 bits */ + + moveli r41, lo16(___hv_reexec) + auli r41, r41, ha16(___hv_reexec) + + jalr r41 + + /* we should not get here */ + + moveli r0, '?' + jalr r40 + moveli r0, '\n' + jalr r40 + + j .Lhalt + +.Ltry8: seqi r0, r9, 0x8 /* IND_SOURCE */ + bz r0, .Lerr /* unknown type */ + + /* copy page at r10 to page at r33 */ + + move r11, r33 + + moveli r0, lo16(PAGE_SIZE) + auli r0, r0, ha16(PAGE_SIZE) + add r33, r33, r0 + + /* copy word at r10 to word at r11 until r11 equals r33 */ + + /* We know page size must be multiple of 16, so we can unroll + * 16 times safely without any edge case checking. + * + * Issue a flush of the destination every 16 words to avoid + * incoherence when starting the new kernel. (Now this is + * just good paranoia because the hv_reexec call will also + * take care of this.) + */ + +1: + { lw r0, r10; addi r10, r10, 4 } + { sw r11, r0; addi r11, r11, 4 } + { lw r0, r10; addi r10, r10, 4 } + { sw r11, r0; addi r11, r11, 4 } + { lw r0, r10; addi r10, r10, 4 } + { sw r11, r0; addi r11, r11, 4 } + { lw r0, r10; addi r10, r10, 4 } + { sw r11, r0; addi r11, r11, 4 } + { lw r0, r10; addi r10, r10, 4 } + { sw r11, r0; addi r11, r11, 4 } + { lw r0, r10; addi r10, r10, 4 } + { sw r11, r0; addi r11, r11, 4 } + { lw r0, r10; addi r10, r10, 4 } + { sw r11, r0; addi r11, r11, 4 } + { lw r0, r10; addi r10, r10, 4 } + { sw r11, r0; addi r11, r11, 4 } + { lw r0, r10; addi r10, r10, 4 } + { sw r11, r0; addi r11, r11, 4 } + { lw r0, r10; addi r10, r10, 4 } + { sw r11, r0; addi r11, r11, 4 } + { lw r0, r10; addi r10, r10, 4 } + { sw r11, r0; addi r11, r11, 4 } + { lw r0, r10; addi r10, r10, 4 } + { sw r11, r0; addi r11, r11, 4 } + { lw r0, r10; addi r10, r10, 4 } + { sw r11, r0; addi r11, r11, 4 } + { lw r0, r10; addi r10, r10, 4 } + { sw r11, r0; addi r11, r11, 4 } + { lw r0, r10; addi r10, r10, 4 } + { sw r11, r0; addi r11, r11, 4 } + { lw r0, r10; addi r10, r10, 4 } + { sw r11, r0 } + { flush r11 ; addi r11, r11, 4 } + + seq r0, r33, r11 + bzt r0, 1b + +#ifdef RELOCATE_NEW_KERNEL_VERBOSE + moveli r0, 's' + jalr r40 +#endif + + addi r30, r30, 4 + j .Lloop + + +.Lerr: moveli r0, 'e' + jalr r40 + moveli r0, 'r' + jalr r40 + moveli r0, 'r' + jalr r40 + moveli r0, '\n' + jalr r40 +.Lhalt: + moveli r41, lo16(___hv_halt) + auli r41, r41, ha16(___hv_halt) + + jalr r41 + STD_ENDPROC(relocate_new_kernel) + + .section .rodata,"a" + + .globl relocate_new_kernel_size +relocate_new_kernel_size: + .long .Lend_relocate_new_kernel - relocate_new_kernel diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c new file mode 100644 index 00000000..bff23f47 --- /dev/null +++ b/arch/tile/kernel/setup.c @@ -0,0 +1,1532 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/mmzone.h> +#include <linux/bootmem.h> +#include <linux/module.h> +#include <linux/node.h> +#include <linux/cpu.h> +#include <linux/ioport.h> +#include <linux/irq.h> +#include <linux/kexec.h> +#include <linux/pci.h> +#include <linux/initrd.h> +#include <linux/io.h> +#include <linux/highmem.h> +#include <linux/smp.h> +#include <linux/timex.h> +#include <asm/setup.h> +#include <asm/sections.h> +#include <asm/cacheflush.h> +#include <asm/pgalloc.h> +#include <asm/mmu_context.h> +#include <hv/hypervisor.h> +#include <arch/interrupts.h> + +/* <linux/smp.h> doesn't provide this definition. */ +#ifndef CONFIG_SMP +#define setup_max_cpus 1 +#endif + +static inline int ABS(int x) { return x >= 0 ? x : -x; } + +/* Chip information */ +char chip_model[64] __write_once; + +struct pglist_data node_data[MAX_NUMNODES] __read_mostly; +EXPORT_SYMBOL(node_data); + +/* We only create bootmem data on node 0. */ +static bootmem_data_t __initdata node0_bdata; + +/* Information on the NUMA nodes that we compute early */ +unsigned long __cpuinitdata node_start_pfn[MAX_NUMNODES]; +unsigned long __cpuinitdata node_end_pfn[MAX_NUMNODES]; +unsigned long __initdata node_memmap_pfn[MAX_NUMNODES]; +unsigned long __initdata node_percpu_pfn[MAX_NUMNODES]; +unsigned long __initdata node_free_pfn[MAX_NUMNODES]; + +static unsigned long __initdata node_percpu[MAX_NUMNODES]; + +#ifdef CONFIG_HIGHMEM +/* Page frame index of end of lowmem on each controller. */ +unsigned long __cpuinitdata node_lowmem_end_pfn[MAX_NUMNODES]; + +/* Number of pages that can be mapped into lowmem. */ +static unsigned long __initdata mappable_physpages; +#endif + +/* Data on which physical memory controller corresponds to which NUMA node */ +int node_controller[MAX_NUMNODES] = { [0 ... MAX_NUMNODES-1] = -1 }; + +#ifdef CONFIG_HIGHMEM +/* Map information from VAs to PAs */ +unsigned long pbase_map[1 << (32 - HPAGE_SHIFT)] + __write_once __attribute__((aligned(L2_CACHE_BYTES))); +EXPORT_SYMBOL(pbase_map); + +/* Map information from PAs to VAs */ +void *vbase_map[NR_PA_HIGHBIT_VALUES] + __write_once __attribute__((aligned(L2_CACHE_BYTES))); +EXPORT_SYMBOL(vbase_map); +#endif + +/* Node number as a function of the high PA bits */ +int highbits_to_node[NR_PA_HIGHBIT_VALUES] __write_once; +EXPORT_SYMBOL(highbits_to_node); + +static unsigned int __initdata maxmem_pfn = -1U; +static unsigned int __initdata maxnodemem_pfn[MAX_NUMNODES] = { + [0 ... MAX_NUMNODES-1] = -1U +}; +static nodemask_t __initdata isolnodes; + +#ifdef CONFIG_PCI +enum { DEFAULT_PCI_RESERVE_MB = 64 }; +static unsigned int __initdata pci_reserve_mb = DEFAULT_PCI_RESERVE_MB; +unsigned long __initdata pci_reserve_start_pfn = -1U; +unsigned long __initdata pci_reserve_end_pfn = -1U; +#endif + +static int __init setup_maxmem(char *str) +{ + unsigned long long maxmem; + if (str == NULL || (maxmem = memparse(str, NULL)) == 0) + return -EINVAL; + + maxmem_pfn = (maxmem >> HPAGE_SHIFT) << (HPAGE_SHIFT - PAGE_SHIFT); + pr_info("Forcing RAM used to no more than %dMB\n", + maxmem_pfn >> (20 - PAGE_SHIFT)); + return 0; +} +early_param("maxmem", setup_maxmem); + +static int __init setup_maxnodemem(char *str) +{ + char *endp; + unsigned long long maxnodemem; + long node; + + node = str ? simple_strtoul(str, &endp, 0) : INT_MAX; + if (node >= MAX_NUMNODES || *endp != ':') + return -EINVAL; + + maxnodemem = memparse(endp+1, NULL); + maxnodemem_pfn[node] = (maxnodemem >> HPAGE_SHIFT) << + (HPAGE_SHIFT - PAGE_SHIFT); + pr_info("Forcing RAM used on node %ld to no more than %dMB\n", + node, maxnodemem_pfn[node] >> (20 - PAGE_SHIFT)); + return 0; +} +early_param("maxnodemem", setup_maxnodemem); + +static int __init setup_isolnodes(char *str) +{ + char buf[MAX_NUMNODES * 5]; + if (str == NULL || nodelist_parse(str, isolnodes) != 0) + return -EINVAL; + + nodelist_scnprintf(buf, sizeof(buf), isolnodes); + pr_info("Set isolnodes value to '%s'\n", buf); + return 0; +} +early_param("isolnodes", setup_isolnodes); + +#ifdef CONFIG_PCI +static int __init setup_pci_reserve(char* str) +{ + unsigned long mb; + + if (str == NULL || strict_strtoul(str, 0, &mb) != 0 || + mb > 3 * 1024) + return -EINVAL; + + pci_reserve_mb = mb; + pr_info("Reserving %dMB for PCIE root complex mappings\n", + pci_reserve_mb); + return 0; +} +early_param("pci_reserve", setup_pci_reserve); +#endif + +#ifndef __tilegx__ +/* + * vmalloc=size forces the vmalloc area to be exactly 'size' bytes. + * This can be used to increase (or decrease) the vmalloc area. + */ +static int __init parse_vmalloc(char *arg) +{ + if (!arg) + return -EINVAL; + + VMALLOC_RESERVE = (memparse(arg, &arg) + PGDIR_SIZE - 1) & PGDIR_MASK; + + /* See validate_va() for more on this test. */ + if ((long)_VMALLOC_START >= 0) + early_panic("\"vmalloc=%#lx\" value too large: maximum %#lx\n", + VMALLOC_RESERVE, _VMALLOC_END - 0x80000000UL); + + return 0; +} +early_param("vmalloc", parse_vmalloc); +#endif + +#ifdef CONFIG_HIGHMEM +/* + * Determine for each controller where its lowmem is mapped and how much of + * it is mapped there. On controller zero, the first few megabytes are + * already mapped in as code at MEM_SV_INTRPT, so in principle we could + * start our data mappings higher up, but for now we don't bother, to avoid + * additional confusion. + * + * One question is whether, on systems with more than 768 Mb and + * controllers of different sizes, to map in a proportionate amount of + * each one, or to try to map the same amount from each controller. + * (E.g. if we have three controllers with 256MB, 1GB, and 256MB + * respectively, do we map 256MB from each, or do we map 128 MB, 512 + * MB, and 128 MB respectively?) For now we use a proportionate + * solution like the latter. + * + * The VA/PA mapping demands that we align our decisions at 16 MB + * boundaries so that we can rapidly convert VA to PA. + */ +static void *__init setup_pa_va_mapping(void) +{ + unsigned long curr_pages = 0; + unsigned long vaddr = PAGE_OFFSET; + nodemask_t highonlynodes = isolnodes; + int i, j; + + memset(pbase_map, -1, sizeof(pbase_map)); + memset(vbase_map, -1, sizeof(vbase_map)); + + /* Node zero cannot be isolated for LOWMEM purposes. */ + node_clear(0, highonlynodes); + + /* Count up the number of pages on non-highonlynodes controllers. */ + mappable_physpages = 0; + for_each_online_node(i) { + if (!node_isset(i, highonlynodes)) + mappable_physpages += + node_end_pfn[i] - node_start_pfn[i]; + } + + for_each_online_node(i) { + unsigned long start = node_start_pfn[i]; + unsigned long end = node_end_pfn[i]; + unsigned long size = end - start; + unsigned long vaddr_end; + + if (node_isset(i, highonlynodes)) { + /* Mark this controller as having no lowmem. */ + node_lowmem_end_pfn[i] = start; + continue; + } + + curr_pages += size; + if (mappable_physpages > MAXMEM_PFN) { + vaddr_end = PAGE_OFFSET + + (((u64)curr_pages * MAXMEM_PFN / + mappable_physpages) + << PAGE_SHIFT); + } else { + vaddr_end = PAGE_OFFSET + (curr_pages << PAGE_SHIFT); + } + for (j = 0; vaddr < vaddr_end; vaddr += HPAGE_SIZE, ++j) { + unsigned long this_pfn = + start + (j << HUGETLB_PAGE_ORDER); + pbase_map[vaddr >> HPAGE_SHIFT] = this_pfn; + if (vbase_map[__pfn_to_highbits(this_pfn)] == + (void *)-1) + vbase_map[__pfn_to_highbits(this_pfn)] = + (void *)(vaddr & HPAGE_MASK); + } + node_lowmem_end_pfn[i] = start + (j << HUGETLB_PAGE_ORDER); + BUG_ON(node_lowmem_end_pfn[i] > end); + } + + /* Return highest address of any mapped memory. */ + return (void *)vaddr; +} +#endif /* CONFIG_HIGHMEM */ + +/* + * Register our most important memory mappings with the debug stub. + * + * This is up to 4 mappings for lowmem, one mapping per memory + * controller, plus one for our text segment. + */ +static void __cpuinit store_permanent_mappings(void) +{ + int i; + + for_each_online_node(i) { + HV_PhysAddr pa = ((HV_PhysAddr)node_start_pfn[i]) << PAGE_SHIFT; +#ifdef CONFIG_HIGHMEM + HV_PhysAddr high_mapped_pa = node_lowmem_end_pfn[i]; +#else + HV_PhysAddr high_mapped_pa = node_end_pfn[i]; +#endif + + unsigned long pages = high_mapped_pa - node_start_pfn[i]; + HV_VirtAddr addr = (HV_VirtAddr) __va(pa); + hv_store_mapping(addr, pages << PAGE_SHIFT, pa); + } + + hv_store_mapping((HV_VirtAddr)_stext, + (uint32_t)(_einittext - _stext), 0); +} + +/* + * Use hv_inquire_physical() to populate node_{start,end}_pfn[] + * and node_online_map, doing suitable sanity-checking. + * Also set min_low_pfn, max_low_pfn, and max_pfn. + */ +static void __init setup_memory(void) +{ + int i, j; + int highbits_seen[NR_PA_HIGHBIT_VALUES] = { 0 }; +#ifdef CONFIG_HIGHMEM + long highmem_pages; +#endif +#ifndef __tilegx__ + int cap; +#endif +#if defined(CONFIG_HIGHMEM) || defined(__tilegx__) + long lowmem_pages; +#endif + + /* We are using a char to hold the cpu_2_node[] mapping */ + BUILD_BUG_ON(MAX_NUMNODES > 127); + + /* Discover the ranges of memory available to us */ + for (i = 0; ; ++i) { + unsigned long start, size, end, highbits; + HV_PhysAddrRange range = hv_inquire_physical(i); + if (range.size == 0) + break; +#ifdef CONFIG_FLATMEM + if (i > 0) { + pr_err("Can't use discontiguous PAs: %#llx..%#llx\n", + range.size, range.start + range.size); + continue; + } +#endif +#ifndef __tilegx__ + if ((unsigned long)range.start) { + pr_err("Range not at 4GB multiple: %#llx..%#llx\n", + range.start, range.start + range.size); + continue; + } +#endif + if ((range.start & (HPAGE_SIZE-1)) != 0 || + (range.size & (HPAGE_SIZE-1)) != 0) { + unsigned long long start_pa = range.start; + unsigned long long orig_size = range.size; + range.start = (start_pa + HPAGE_SIZE - 1) & HPAGE_MASK; + range.size -= (range.start - start_pa); + range.size &= HPAGE_MASK; + pr_err("Range not hugepage-aligned: %#llx..%#llx:" + " now %#llx-%#llx\n", + start_pa, start_pa + orig_size, + range.start, range.start + range.size); + } + highbits = __pa_to_highbits(range.start); + if (highbits >= NR_PA_HIGHBIT_VALUES) { + pr_err("PA high bits too high: %#llx..%#llx\n", + range.start, range.start + range.size); + continue; + } + if (highbits_seen[highbits]) { + pr_err("Range overlaps in high bits: %#llx..%#llx\n", + range.start, range.start + range.size); + continue; + } + highbits_seen[highbits] = 1; + if (PFN_DOWN(range.size) > maxnodemem_pfn[i]) { + int max_size = maxnodemem_pfn[i]; + if (max_size > 0) { + pr_err("Maxnodemem reduced node %d to" + " %d pages\n", i, max_size); + range.size = PFN_PHYS(max_size); + } else { + pr_err("Maxnodemem disabled node %d\n", i); + continue; + } + } + if (num_physpages + PFN_DOWN(range.size) > maxmem_pfn) { + int max_size = maxmem_pfn - num_physpages; + if (max_size > 0) { + pr_err("Maxmem reduced node %d to %d pages\n", + i, max_size); + range.size = PFN_PHYS(max_size); + } else { + pr_err("Maxmem disabled node %d\n", i); + continue; + } + } + if (i >= MAX_NUMNODES) { + pr_err("Too many PA nodes (#%d): %#llx...%#llx\n", + i, range.size, range.size + range.start); + continue; + } + + start = range.start >> PAGE_SHIFT; + size = range.size >> PAGE_SHIFT; + end = start + size; + +#ifndef __tilegx__ + if (((HV_PhysAddr)end << PAGE_SHIFT) != + (range.start + range.size)) { + pr_err("PAs too high to represent: %#llx..%#llx\n", + range.start, range.start + range.size); + continue; + } +#endif +#ifdef CONFIG_PCI + /* + * Blocks that overlap the pci reserved region must + * have enough space to hold the maximum percpu data + * region at the top of the range. If there isn't + * enough space above the reserved region, just + * truncate the node. + */ + if (start <= pci_reserve_start_pfn && + end > pci_reserve_start_pfn) { + unsigned int per_cpu_size = + __per_cpu_end - __per_cpu_start; + unsigned int percpu_pages = + NR_CPUS * (PFN_UP(per_cpu_size) >> PAGE_SHIFT); + if (end < pci_reserve_end_pfn + percpu_pages) { + end = pci_reserve_start_pfn; + pr_err("PCI mapping region reduced node %d to" + " %ld pages\n", i, end - start); + } + } +#endif + + for (j = __pfn_to_highbits(start); + j <= __pfn_to_highbits(end - 1); j++) + highbits_to_node[j] = i; + + node_start_pfn[i] = start; + node_end_pfn[i] = end; + node_controller[i] = range.controller; + num_physpages += size; + max_pfn = end; + + /* Mark node as online */ + node_set(i, node_online_map); + node_set(i, node_possible_map); + } + +#ifndef __tilegx__ + /* + * For 4KB pages, mem_map "struct page" data is 1% of the size + * of the physical memory, so can be quite big (640 MB for + * four 16G zones). These structures must be mapped in + * lowmem, and since we currently cap out at about 768 MB, + * it's impractical to try to use this much address space. + * For now, arbitrarily cap the amount of physical memory + * we're willing to use at 8 million pages (32GB of 4KB pages). + */ + cap = 8 * 1024 * 1024; /* 8 million pages */ + if (num_physpages > cap) { + int num_nodes = num_online_nodes(); + int cap_each = cap / num_nodes; + unsigned long dropped_pages = 0; + for (i = 0; i < num_nodes; ++i) { + int size = node_end_pfn[i] - node_start_pfn[i]; + if (size > cap_each) { + dropped_pages += (size - cap_each); + node_end_pfn[i] = node_start_pfn[i] + cap_each; + } + } + num_physpages -= dropped_pages; + pr_warning("Only using %ldMB memory;" + " ignoring %ldMB.\n", + num_physpages >> (20 - PAGE_SHIFT), + dropped_pages >> (20 - PAGE_SHIFT)); + pr_warning("Consider using a larger page size.\n"); + } +#endif + + /* Heap starts just above the last loaded address. */ + min_low_pfn = PFN_UP((unsigned long)_end - PAGE_OFFSET); + +#ifdef CONFIG_HIGHMEM + /* Find where we map lowmem from each controller. */ + high_memory = setup_pa_va_mapping(); + + /* Set max_low_pfn based on what node 0 can directly address. */ + max_low_pfn = node_lowmem_end_pfn[0]; + + lowmem_pages = (mappable_physpages > MAXMEM_PFN) ? + MAXMEM_PFN : mappable_physpages; + highmem_pages = (long) (num_physpages - lowmem_pages); + + pr_notice("%ldMB HIGHMEM available.\n", + pages_to_mb(highmem_pages > 0 ? highmem_pages : 0)); + pr_notice("%ldMB LOWMEM available.\n", + pages_to_mb(lowmem_pages)); +#else + /* Set max_low_pfn based on what node 0 can directly address. */ + max_low_pfn = node_end_pfn[0]; + +#ifndef __tilegx__ + if (node_end_pfn[0] > MAXMEM_PFN) { + pr_warning("Only using %ldMB LOWMEM.\n", + MAXMEM>>20); + pr_warning("Use a HIGHMEM enabled kernel.\n"); + max_low_pfn = MAXMEM_PFN; + max_pfn = MAXMEM_PFN; + num_physpages = MAXMEM_PFN; + node_end_pfn[0] = MAXMEM_PFN; + } else { + pr_notice("%ldMB memory available.\n", + pages_to_mb(node_end_pfn[0])); + } + for (i = 1; i < MAX_NUMNODES; ++i) { + node_start_pfn[i] = 0; + node_end_pfn[i] = 0; + } + high_memory = __va(node_end_pfn[0]); +#else + lowmem_pages = 0; + for (i = 0; i < MAX_NUMNODES; ++i) { + int pages = node_end_pfn[i] - node_start_pfn[i]; + lowmem_pages += pages; + if (pages) + high_memory = pfn_to_kaddr(node_end_pfn[i]); + } + pr_notice("%ldMB memory available.\n", + pages_to_mb(lowmem_pages)); +#endif +#endif +} + +static void __init setup_bootmem_allocator(void) +{ + unsigned long bootmap_size, first_alloc_pfn, last_alloc_pfn; + + /* Provide a node 0 bdata. */ + NODE_DATA(0)->bdata = &node0_bdata; + +#ifdef CONFIG_PCI + /* Don't let boot memory alias the PCI region. */ + last_alloc_pfn = min(max_low_pfn, pci_reserve_start_pfn); +#else + last_alloc_pfn = max_low_pfn; +#endif + + /* + * Initialize the boot-time allocator (with low memory only): + * The first argument says where to put the bitmap, and the + * second says where the end of allocatable memory is. + */ + bootmap_size = init_bootmem(min_low_pfn, last_alloc_pfn); + + /* + * Let the bootmem allocator use all the space we've given it + * except for its own bitmap. + */ + first_alloc_pfn = min_low_pfn + PFN_UP(bootmap_size); + if (first_alloc_pfn >= last_alloc_pfn) + early_panic("Not enough memory on controller 0 for bootmem\n"); + + free_bootmem(PFN_PHYS(first_alloc_pfn), + PFN_PHYS(last_alloc_pfn - first_alloc_pfn)); + +#ifdef CONFIG_KEXEC + if (crashk_res.start != crashk_res.end) + reserve_bootmem(crashk_res.start, resource_size(&crashk_res), 0); +#endif +} + +void *__init alloc_remap(int nid, unsigned long size) +{ + int pages = node_end_pfn[nid] - node_start_pfn[nid]; + void *map = pfn_to_kaddr(node_memmap_pfn[nid]); + BUG_ON(size != pages * sizeof(struct page)); + memset(map, 0, size); + return map; +} + +static int __init percpu_size(void) +{ + int size = __per_cpu_end - __per_cpu_start; + size += PERCPU_MODULE_RESERVE; + size += PERCPU_DYNAMIC_EARLY_SIZE; + if (size < PCPU_MIN_UNIT_SIZE) + size = PCPU_MIN_UNIT_SIZE; + size = roundup(size, PAGE_SIZE); + + /* In several places we assume the per-cpu data fits on a huge page. */ + BUG_ON(kdata_huge && size > HPAGE_SIZE); + return size; +} + +static inline unsigned long alloc_bootmem_pfn(int size, unsigned long goal) +{ + void *kva = __alloc_bootmem(size, PAGE_SIZE, goal); + unsigned long pfn = kaddr_to_pfn(kva); + BUG_ON(goal && PFN_PHYS(pfn) != goal); + return pfn; +} + +static void __init zone_sizes_init(void) +{ + unsigned long zones_size[MAX_NR_ZONES] = { 0 }; + int size = percpu_size(); + int num_cpus = smp_height * smp_width; + int i; + + for (i = 0; i < num_cpus; ++i) + node_percpu[cpu_to_node(i)] += size; + + for_each_online_node(i) { + unsigned long start = node_start_pfn[i]; + unsigned long end = node_end_pfn[i]; +#ifdef CONFIG_HIGHMEM + unsigned long lowmem_end = node_lowmem_end_pfn[i]; +#else + unsigned long lowmem_end = end; +#endif + int memmap_size = (end - start) * sizeof(struct page); + node_free_pfn[i] = start; + + /* + * Set aside pages for per-cpu data and the mem_map array. + * + * Since the per-cpu data requires special homecaching, + * if we are in kdata_huge mode, we put it at the end of + * the lowmem region. If we're not in kdata_huge mode, + * we take the per-cpu pages from the bottom of the + * controller, since that avoids fragmenting a huge page + * that users might want. We always take the memmap + * from the bottom of the controller, since with + * kdata_huge that lets it be under a huge TLB entry. + * + * If the user has requested isolnodes for a controller, + * though, there'll be no lowmem, so we just alloc_bootmem + * the memmap. There will be no percpu memory either. + */ + if (__pfn_to_highbits(start) == 0) { + /* In low PAs, allocate via bootmem. */ + unsigned long goal = 0; + node_memmap_pfn[i] = + alloc_bootmem_pfn(memmap_size, goal); + if (kdata_huge) + goal = PFN_PHYS(lowmem_end) - node_percpu[i]; + if (node_percpu[i]) + node_percpu_pfn[i] = + alloc_bootmem_pfn(node_percpu[i], goal); + } else if (cpu_isset(i, isolnodes)) { + node_memmap_pfn[i] = alloc_bootmem_pfn(memmap_size, 0); + BUG_ON(node_percpu[i] != 0); + } else { + /* In high PAs, just reserve some pages. */ + node_memmap_pfn[i] = node_free_pfn[i]; + node_free_pfn[i] += PFN_UP(memmap_size); + if (!kdata_huge) { + node_percpu_pfn[i] = node_free_pfn[i]; + node_free_pfn[i] += PFN_UP(node_percpu[i]); + } else { + node_percpu_pfn[i] = + lowmem_end - PFN_UP(node_percpu[i]); + } + } + +#ifdef CONFIG_HIGHMEM + if (start > lowmem_end) { + zones_size[ZONE_NORMAL] = 0; + zones_size[ZONE_HIGHMEM] = end - start; + } else { + zones_size[ZONE_NORMAL] = lowmem_end - start; + zones_size[ZONE_HIGHMEM] = end - lowmem_end; + } +#else + zones_size[ZONE_NORMAL] = end - start; +#endif + + /* + * Everyone shares node 0's bootmem allocator, but + * we use alloc_remap(), above, to put the actual + * struct page array on the individual controllers, + * which is most of the data that we actually care about. + * We can't place bootmem allocators on the other + * controllers since the bootmem allocator can only + * operate on 32-bit physical addresses. + */ + NODE_DATA(i)->bdata = NODE_DATA(0)->bdata; + + free_area_init_node(i, zones_size, start, NULL); + printk(KERN_DEBUG " Normal zone: %ld per-cpu pages\n", + PFN_UP(node_percpu[i])); + + /* Track the type of memory on each node */ + if (zones_size[ZONE_NORMAL]) + node_set_state(i, N_NORMAL_MEMORY); +#ifdef CONFIG_HIGHMEM + if (end != start) + node_set_state(i, N_HIGH_MEMORY); +#endif + + node_set_online(i); + } +} + +#ifdef CONFIG_NUMA + +/* which logical CPUs are on which nodes */ +struct cpumask node_2_cpu_mask[MAX_NUMNODES] __write_once; +EXPORT_SYMBOL(node_2_cpu_mask); + +/* which node each logical CPU is on */ +char cpu_2_node[NR_CPUS] __write_once __attribute__((aligned(L2_CACHE_BYTES))); +EXPORT_SYMBOL(cpu_2_node); + +/* Return cpu_to_node() except for cpus not yet assigned, which return -1 */ +static int __init cpu_to_bound_node(int cpu, struct cpumask* unbound_cpus) +{ + if (!cpu_possible(cpu) || cpumask_test_cpu(cpu, unbound_cpus)) + return -1; + else + return cpu_to_node(cpu); +} + +/* Return number of immediately-adjacent tiles sharing the same NUMA node. */ +static int __init node_neighbors(int node, int cpu, + struct cpumask *unbound_cpus) +{ + int neighbors = 0; + int w = smp_width; + int h = smp_height; + int x = cpu % w; + int y = cpu / w; + if (x > 0 && cpu_to_bound_node(cpu-1, unbound_cpus) == node) + ++neighbors; + if (x < w-1 && cpu_to_bound_node(cpu+1, unbound_cpus) == node) + ++neighbors; + if (y > 0 && cpu_to_bound_node(cpu-w, unbound_cpus) == node) + ++neighbors; + if (y < h-1 && cpu_to_bound_node(cpu+w, unbound_cpus) == node) + ++neighbors; + return neighbors; +} + +static void __init setup_numa_mapping(void) +{ + int distance[MAX_NUMNODES][NR_CPUS]; + HV_Coord coord; + int cpu, node, cpus, i, x, y; + int num_nodes = num_online_nodes(); + struct cpumask unbound_cpus; + nodemask_t default_nodes; + + cpumask_clear(&unbound_cpus); + + /* Get set of nodes we will use for defaults */ + nodes_andnot(default_nodes, node_online_map, isolnodes); + if (nodes_empty(default_nodes)) { + BUG_ON(!node_isset(0, node_online_map)); + pr_err("Forcing NUMA node zero available as a default node\n"); + node_set(0, default_nodes); + } + + /* Populate the distance[] array */ + memset(distance, -1, sizeof(distance)); + cpu = 0; + for (coord.y = 0; coord.y < smp_height; ++coord.y) { + for (coord.x = 0; coord.x < smp_width; + ++coord.x, ++cpu) { + BUG_ON(cpu >= nr_cpu_ids); + if (!cpu_possible(cpu)) { + cpu_2_node[cpu] = -1; + continue; + } + for_each_node_mask(node, default_nodes) { + HV_MemoryControllerInfo info = + hv_inquire_memory_controller( + coord, node_controller[node]); + distance[node][cpu] = + ABS(info.coord.x) + ABS(info.coord.y); + } + cpumask_set_cpu(cpu, &unbound_cpus); + } + } + cpus = cpu; + + /* + * Round-robin through the NUMA nodes until all the cpus are + * assigned. We could be more clever here (e.g. create four + * sorted linked lists on the same set of cpu nodes, and pull + * off them in round-robin sequence, removing from all four + * lists each time) but given the relatively small numbers + * involved, O(n^2) seem OK for a one-time cost. + */ + node = first_node(default_nodes); + while (!cpumask_empty(&unbound_cpus)) { + int best_cpu = -1; + int best_distance = INT_MAX; + for (cpu = 0; cpu < cpus; ++cpu) { + if (cpumask_test_cpu(cpu, &unbound_cpus)) { + /* + * Compute metric, which is how much + * closer the cpu is to this memory + * controller than the others, shifted + * up, and then the number of + * neighbors already in the node as an + * epsilon adjustment to try to keep + * the nodes compact. + */ + int d = distance[node][cpu] * num_nodes; + for_each_node_mask(i, default_nodes) { + if (i != node) + d -= distance[i][cpu]; + } + d *= 8; /* allow space for epsilon */ + d -= node_neighbors(node, cpu, &unbound_cpus); + if (d < best_distance) { + best_cpu = cpu; + best_distance = d; + } + } + } + BUG_ON(best_cpu < 0); + cpumask_set_cpu(best_cpu, &node_2_cpu_mask[node]); + cpu_2_node[best_cpu] = node; + cpumask_clear_cpu(best_cpu, &unbound_cpus); + node = next_node(node, default_nodes); + if (node == MAX_NUMNODES) + node = first_node(default_nodes); + } + + /* Print out node assignments and set defaults for disabled cpus */ + cpu = 0; + for (y = 0; y < smp_height; ++y) { + printk(KERN_DEBUG "NUMA cpu-to-node row %d:", y); + for (x = 0; x < smp_width; ++x, ++cpu) { + if (cpu_to_node(cpu) < 0) { + pr_cont(" -"); + cpu_2_node[cpu] = first_node(default_nodes); + } else { + pr_cont(" %d", cpu_to_node(cpu)); + } + } + pr_cont("\n"); + } +} + +static struct cpu cpu_devices[NR_CPUS]; + +static int __init topology_init(void) +{ + int i; + + for_each_online_node(i) + register_one_node(i); + + for (i = 0; i < smp_height * smp_width; ++i) + register_cpu(&cpu_devices[i], i); + + return 0; +} + +subsys_initcall(topology_init); + +#else /* !CONFIG_NUMA */ + +#define setup_numa_mapping() do { } while (0) + +#endif /* CONFIG_NUMA */ + +/** + * setup_cpu() - Do all necessary per-cpu, tile-specific initialization. + * @boot: Is this the boot cpu? + * + * Called from setup_arch() on the boot cpu, or online_secondary(). + */ +void __cpuinit setup_cpu(int boot) +{ + /* The boot cpu sets up its permanent mappings much earlier. */ + if (!boot) + store_permanent_mappings(); + + /* Allow asynchronous TLB interrupts. */ +#if CHIP_HAS_TILE_DMA() + arch_local_irq_unmask(INT_DMATLB_MISS); + arch_local_irq_unmask(INT_DMATLB_ACCESS); +#endif +#if CHIP_HAS_SN_PROC() + arch_local_irq_unmask(INT_SNITLB_MISS); +#endif +#ifdef __tilegx__ + arch_local_irq_unmask(INT_SINGLE_STEP_K); +#endif + + /* + * Allow user access to many generic SPRs, like the cycle + * counter, PASS/FAIL/DONE, INTERRUPT_CRITICAL_SECTION, etc. + */ + __insn_mtspr(SPR_MPL_WORLD_ACCESS_SET_0, 1); + +#if CHIP_HAS_SN() + /* Static network is not restricted. */ + __insn_mtspr(SPR_MPL_SN_ACCESS_SET_0, 1); +#endif +#if CHIP_HAS_SN_PROC() + __insn_mtspr(SPR_MPL_SN_NOTIFY_SET_0, 1); + __insn_mtspr(SPR_MPL_SN_CPL_SET_0, 1); +#endif + + /* + * Set the MPL for interrupt control 0 & 1 to the corresponding + * values. This includes access to the SYSTEM_SAVE and EX_CONTEXT + * SPRs, as well as the interrupt mask. + */ + __insn_mtspr(SPR_MPL_INTCTRL_0_SET_0, 1); + __insn_mtspr(SPR_MPL_INTCTRL_1_SET_1, 1); + + /* Initialize IRQ support for this cpu. */ + setup_irq_regs(); + +#ifdef CONFIG_HARDWALL + /* Reset the network state on this cpu. */ + reset_network_state(); +#endif +} + +#ifdef CONFIG_BLK_DEV_INITRD + +/* + * Note that the kernel can potentially support other compression + * techniques than gz, though we don't do so by default. If we ever + * decide to do so we can either look for other filename extensions, + * or just allow a file with this name to be compressed with an + * arbitrary compressor (somewhat counterintuitively). + */ +static int __initdata set_initramfs_file; +static char __initdata initramfs_file[128] = "initramfs.cpio.gz"; + +static int __init setup_initramfs_file(char *str) +{ + if (str == NULL) + return -EINVAL; + strncpy(initramfs_file, str, sizeof(initramfs_file) - 1); + set_initramfs_file = 1; + + return 0; +} +early_param("initramfs_file", setup_initramfs_file); + +/* + * We look for an "initramfs.cpio.gz" file in the hvfs. + * If there is one, we allocate some memory for it and it will be + * unpacked to the initramfs. + */ +static void __init load_hv_initrd(void) +{ + HV_FS_StatInfo stat; + int fd, rc; + void *initrd; + + fd = hv_fs_findfile((HV_VirtAddr) initramfs_file); + if (fd == HV_ENOENT) { + if (set_initramfs_file) + pr_warning("No such hvfs initramfs file '%s'\n", + initramfs_file); + return; + } + BUG_ON(fd < 0); + stat = hv_fs_fstat(fd); + BUG_ON(stat.size < 0); + if (stat.flags & HV_FS_ISDIR) { + pr_warning("Ignoring hvfs file '%s': it's a directory.\n", + initramfs_file); + return; + } + initrd = alloc_bootmem_pages(stat.size); + rc = hv_fs_pread(fd, (HV_VirtAddr) initrd, stat.size, 0); + if (rc != stat.size) { + pr_err("Error reading %d bytes from hvfs file '%s': %d\n", + stat.size, initramfs_file, rc); + free_initrd_mem((unsigned long) initrd, stat.size); + return; + } + initrd_start = (unsigned long) initrd; + initrd_end = initrd_start + stat.size; +} + +void __init free_initrd_mem(unsigned long begin, unsigned long end) +{ + free_bootmem(__pa(begin), end - begin); +} + +#else +static inline void load_hv_initrd(void) {} +#endif /* CONFIG_BLK_DEV_INITRD */ + +static void __init validate_hv(void) +{ + /* + * It may already be too late, but let's check our built-in + * configuration against what the hypervisor is providing. + */ + unsigned long glue_size = hv_sysconf(HV_SYSCONF_GLUE_SIZE); + int hv_page_size = hv_sysconf(HV_SYSCONF_PAGE_SIZE_SMALL); + int hv_hpage_size = hv_sysconf(HV_SYSCONF_PAGE_SIZE_LARGE); + HV_ASIDRange asid_range; + +#ifndef CONFIG_SMP + HV_Topology topology = hv_inquire_topology(); + BUG_ON(topology.coord.x != 0 || topology.coord.y != 0); + if (topology.width != 1 || topology.height != 1) { + pr_warning("Warning: booting UP kernel on %dx%d grid;" + " will ignore all but first tile.\n", + topology.width, topology.height); + } +#endif + + if (PAGE_OFFSET + HV_GLUE_START_CPA + glue_size > (unsigned long)_text) + early_panic("Hypervisor glue size %ld is too big!\n", + glue_size); + if (hv_page_size != PAGE_SIZE) + early_panic("Hypervisor page size %#x != our %#lx\n", + hv_page_size, PAGE_SIZE); + if (hv_hpage_size != HPAGE_SIZE) + early_panic("Hypervisor huge page size %#x != our %#lx\n", + hv_hpage_size, HPAGE_SIZE); + +#ifdef CONFIG_SMP + /* + * Some hypervisor APIs take a pointer to a bitmap array + * whose size is at least the number of cpus on the chip. + * We use a struct cpumask for this, so it must be big enough. + */ + if ((smp_height * smp_width) > nr_cpu_ids) + early_panic("Hypervisor %d x %d grid too big for Linux" + " NR_CPUS %d\n", smp_height, smp_width, + nr_cpu_ids); +#endif + + /* + * Check that we're using allowed ASIDs, and initialize the + * various asid variables to their appropriate initial states. + */ + asid_range = hv_inquire_asid(0); + __get_cpu_var(current_asid) = min_asid = asid_range.start; + max_asid = asid_range.start + asid_range.size - 1; + + if (hv_confstr(HV_CONFSTR_CHIP_MODEL, (HV_VirtAddr)chip_model, + sizeof(chip_model)) < 0) { + pr_err("Warning: HV_CONFSTR_CHIP_MODEL not available\n"); + strlcpy(chip_model, "unknown", sizeof(chip_model)); + } +} + +static void __init validate_va(void) +{ +#ifndef __tilegx__ /* FIXME: GX: probably some validation relevant here */ + /* + * Similarly, make sure we're only using allowed VAs. + * We assume we can contiguously use MEM_USER_INTRPT .. MEM_HV_INTRPT, + * and 0 .. KERNEL_HIGH_VADDR. + * In addition, make sure we CAN'T use the end of memory, since + * we use the last chunk of each pgd for the pgd_list. + */ + int i, user_kernel_ok = 0; + unsigned long max_va = 0; + unsigned long list_va = + ((PGD_LIST_OFFSET / sizeof(pgd_t)) << PGDIR_SHIFT); + + for (i = 0; ; ++i) { + HV_VirtAddrRange range = hv_inquire_virtual(i); + if (range.size == 0) + break; + if (range.start <= MEM_USER_INTRPT && + range.start + range.size >= MEM_HV_INTRPT) + user_kernel_ok = 1; + if (range.start == 0) + max_va = range.size; + BUG_ON(range.start + range.size > list_va); + } + if (!user_kernel_ok) + early_panic("Hypervisor not configured for user/kernel VAs\n"); + if (max_va == 0) + early_panic("Hypervisor not configured for low VAs\n"); + if (max_va < KERNEL_HIGH_VADDR) + early_panic("Hypervisor max VA %#lx smaller than %#lx\n", + max_va, KERNEL_HIGH_VADDR); + + /* Kernel PCs must have their high bit set; see intvec.S. */ + if ((long)VMALLOC_START >= 0) + early_panic( + "Linux VMALLOC region below the 2GB line (%#lx)!\n" + "Reconfigure the kernel with fewer NR_HUGE_VMAPS\n" + "or smaller VMALLOC_RESERVE.\n", + VMALLOC_START); +#endif +} + +/* + * cpu_lotar_map lists all the cpus that are valid for the supervisor + * to cache data on at a page level, i.e. what cpus can be placed in + * the LOTAR field of a PTE. It is equivalent to the set of possible + * cpus plus any other cpus that are willing to share their cache. + * It is set by hv_inquire_tiles(HV_INQ_TILES_LOTAR). + */ +struct cpumask __write_once cpu_lotar_map; +EXPORT_SYMBOL(cpu_lotar_map); + +#if CHIP_HAS_CBOX_HOME_MAP() +/* + * hash_for_home_map lists all the tiles that hash-for-home data + * will be cached on. Note that this may includes tiles that are not + * valid for this supervisor to use otherwise (e.g. if a hypervisor + * device is being shared between multiple supervisors). + * It is set by hv_inquire_tiles(HV_INQ_TILES_HFH_CACHE). + */ +struct cpumask hash_for_home_map; +EXPORT_SYMBOL(hash_for_home_map); +#endif + +/* + * cpu_cacheable_map lists all the cpus whose caches the hypervisor can + * flush on our behalf. It is set to cpu_possible_mask OR'ed with + * hash_for_home_map, and it is what should be passed to + * hv_flush_remote() to flush all caches. Note that if there are + * dedicated hypervisor driver tiles that have authorized use of their + * cache, those tiles will only appear in cpu_lotar_map, NOT in + * cpu_cacheable_map, as they are a special case. + */ +struct cpumask __write_once cpu_cacheable_map; +EXPORT_SYMBOL(cpu_cacheable_map); + +static __initdata struct cpumask disabled_map; + +static int __init disabled_cpus(char *str) +{ + int boot_cpu = smp_processor_id(); + + if (str == NULL || cpulist_parse_crop(str, &disabled_map) != 0) + return -EINVAL; + if (cpumask_test_cpu(boot_cpu, &disabled_map)) { + pr_err("disabled_cpus: can't disable boot cpu %d\n", boot_cpu); + cpumask_clear_cpu(boot_cpu, &disabled_map); + } + return 0; +} + +early_param("disabled_cpus", disabled_cpus); + +void __init print_disabled_cpus(void) +{ + if (!cpumask_empty(&disabled_map)) { + char buf[100]; + cpulist_scnprintf(buf, sizeof(buf), &disabled_map); + pr_info("CPUs not available for Linux: %s\n", buf); + } +} + +static void __init setup_cpu_maps(void) +{ + struct cpumask hv_disabled_map, cpu_possible_init; + int boot_cpu = smp_processor_id(); + int cpus, i, rc; + + /* Learn which cpus are allowed by the hypervisor. */ + rc = hv_inquire_tiles(HV_INQ_TILES_AVAIL, + (HV_VirtAddr) cpumask_bits(&cpu_possible_init), + sizeof(cpu_cacheable_map)); + if (rc < 0) + early_panic("hv_inquire_tiles(AVAIL) failed: rc %d\n", rc); + if (!cpumask_test_cpu(boot_cpu, &cpu_possible_init)) + early_panic("Boot CPU %d disabled by hypervisor!\n", boot_cpu); + + /* Compute the cpus disabled by the hvconfig file. */ + cpumask_complement(&hv_disabled_map, &cpu_possible_init); + + /* Include them with the cpus disabled by "disabled_cpus". */ + cpumask_or(&disabled_map, &disabled_map, &hv_disabled_map); + + /* + * Disable every cpu after "setup_max_cpus". But don't mark + * as disabled the cpus that are outside of our initial rectangle, + * since that turns out to be confusing. + */ + cpus = 1; /* this cpu */ + cpumask_set_cpu(boot_cpu, &disabled_map); /* ignore this cpu */ + for (i = 0; cpus < setup_max_cpus; ++i) + if (!cpumask_test_cpu(i, &disabled_map)) + ++cpus; + for (; i < smp_height * smp_width; ++i) + cpumask_set_cpu(i, &disabled_map); + cpumask_clear_cpu(boot_cpu, &disabled_map); /* reset this cpu */ + for (i = smp_height * smp_width; i < NR_CPUS; ++i) + cpumask_clear_cpu(i, &disabled_map); + + /* + * Setup cpu_possible map as every cpu allocated to us, minus + * the results of any "disabled_cpus" settings. + */ + cpumask_andnot(&cpu_possible_init, &cpu_possible_init, &disabled_map); + init_cpu_possible(&cpu_possible_init); + + /* Learn which cpus are valid for LOTAR caching. */ + rc = hv_inquire_tiles(HV_INQ_TILES_LOTAR, + (HV_VirtAddr) cpumask_bits(&cpu_lotar_map), + sizeof(cpu_lotar_map)); + if (rc < 0) { + pr_err("warning: no HV_INQ_TILES_LOTAR; using AVAIL\n"); + cpu_lotar_map = *cpu_possible_mask; + } + +#if CHIP_HAS_CBOX_HOME_MAP() + /* Retrieve set of CPUs used for hash-for-home caching */ + rc = hv_inquire_tiles(HV_INQ_TILES_HFH_CACHE, + (HV_VirtAddr) hash_for_home_map.bits, + sizeof(hash_for_home_map)); + if (rc < 0) + early_panic("hv_inquire_tiles(HFH_CACHE) failed: rc %d\n", rc); + cpumask_or(&cpu_cacheable_map, cpu_possible_mask, &hash_for_home_map); +#else + cpu_cacheable_map = *cpu_possible_mask; +#endif +} + + +static int __init dataplane(char *str) +{ + pr_warning("WARNING: dataplane support disabled in this kernel\n"); + return 0; +} + +early_param("dataplane", dataplane); + +#ifdef CONFIG_CMDLINE_BOOL +static char __initdata builtin_cmdline[COMMAND_LINE_SIZE] = CONFIG_CMDLINE; +#endif + +void __init setup_arch(char **cmdline_p) +{ + int len; + +#if defined(CONFIG_CMDLINE_BOOL) && defined(CONFIG_CMDLINE_OVERRIDE) + len = hv_get_command_line((HV_VirtAddr) boot_command_line, + COMMAND_LINE_SIZE); + if (boot_command_line[0]) + pr_warning("WARNING: ignoring dynamic command line \"%s\"\n", + boot_command_line); + strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE); +#else + char *hv_cmdline; +#if defined(CONFIG_CMDLINE_BOOL) + if (builtin_cmdline[0]) { + int builtin_len = strlcpy(boot_command_line, builtin_cmdline, + COMMAND_LINE_SIZE); + if (builtin_len < COMMAND_LINE_SIZE-1) + boot_command_line[builtin_len++] = ' '; + hv_cmdline = &boot_command_line[builtin_len]; + len = COMMAND_LINE_SIZE - builtin_len; + } else +#endif + { + hv_cmdline = boot_command_line; + len = COMMAND_LINE_SIZE; + } + len = hv_get_command_line((HV_VirtAddr) hv_cmdline, len); + if (len < 0 || len > COMMAND_LINE_SIZE) + early_panic("hv_get_command_line failed: %d\n", len); +#endif + + *cmdline_p = boot_command_line; + + /* Set disabled_map and setup_max_cpus very early */ + parse_early_param(); + + /* Make sure the kernel is compatible with the hypervisor. */ + validate_hv(); + validate_va(); + + setup_cpu_maps(); + + +#ifdef CONFIG_PCI + /* + * Initialize the PCI structures. This is done before memory + * setup so that we know whether or not a pci_reserve region + * is necessary. + */ + if (tile_pci_init() == 0) + pci_reserve_mb = 0; + + /* PCI systems reserve a region just below 4GB for mapping iomem. */ + pci_reserve_end_pfn = (1 << (32 - PAGE_SHIFT)); + pci_reserve_start_pfn = pci_reserve_end_pfn - + (pci_reserve_mb << (20 - PAGE_SHIFT)); +#endif + + init_mm.start_code = (unsigned long) _text; + init_mm.end_code = (unsigned long) _etext; + init_mm.end_data = (unsigned long) _edata; + init_mm.brk = (unsigned long) _end; + + setup_memory(); + store_permanent_mappings(); + setup_bootmem_allocator(); + + /* + * NOTE: before this point _nobody_ is allowed to allocate + * any memory using the bootmem allocator. + */ + + paging_init(); + setup_numa_mapping(); + zone_sizes_init(); + set_page_homes(); + setup_cpu(1); + setup_clock(); + load_hv_initrd(); +} + + +/* + * Set up per-cpu memory. + */ + +unsigned long __per_cpu_offset[NR_CPUS] __write_once; +EXPORT_SYMBOL(__per_cpu_offset); + +static size_t __initdata pfn_offset[MAX_NUMNODES] = { 0 }; +static unsigned long __initdata percpu_pfn[NR_CPUS] = { 0 }; + +/* + * As the percpu code allocates pages, we return the pages from the + * end of the node for the specified cpu. + */ +static void *__init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align) +{ + int nid = cpu_to_node(cpu); + unsigned long pfn = node_percpu_pfn[nid] + pfn_offset[nid]; + + BUG_ON(size % PAGE_SIZE != 0); + pfn_offset[nid] += size / PAGE_SIZE; + BUG_ON(node_percpu[nid] < size); + node_percpu[nid] -= size; + if (percpu_pfn[cpu] == 0) + percpu_pfn[cpu] = pfn; + return pfn_to_kaddr(pfn); +} + +/* + * Pages reserved for percpu memory are not freeable, and in any case we are + * on a short path to panic() in setup_per_cpu_area() at this point anyway. + */ +static void __init pcpu_fc_free(void *ptr, size_t size) +{ +} + +/* + * Set up vmalloc page tables using bootmem for the percpu code. + */ +static void __init pcpu_fc_populate_pte(unsigned long addr) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + + BUG_ON(pgd_addr_invalid(addr)); + if (addr < VMALLOC_START || addr >= VMALLOC_END) + panic("PCPU addr %#lx outside vmalloc range %#lx..%#lx;" + " try increasing CONFIG_VMALLOC_RESERVE\n", + addr, VMALLOC_START, VMALLOC_END); + + pgd = swapper_pg_dir + pgd_index(addr); + pud = pud_offset(pgd, addr); + BUG_ON(!pud_present(*pud)); + pmd = pmd_offset(pud, addr); + if (pmd_present(*pmd)) { + BUG_ON(pmd_huge_page(*pmd)); + } else { + pte = __alloc_bootmem(L2_KERNEL_PGTABLE_SIZE, + HV_PAGE_TABLE_ALIGN, 0); + pmd_populate_kernel(&init_mm, pmd, pte); + } +} + +void __init setup_per_cpu_areas(void) +{ + struct page *pg; + unsigned long delta, pfn, lowmem_va; + unsigned long size = percpu_size(); + char *ptr; + int rc, cpu, i; + + rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE, pcpu_fc_alloc, + pcpu_fc_free, pcpu_fc_populate_pte); + if (rc < 0) + panic("Cannot initialize percpu area (err=%d)", rc); + + delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start; + for_each_possible_cpu(cpu) { + __per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu]; + + /* finv the copy out of cache so we can change homecache */ + ptr = pcpu_base_addr + pcpu_unit_offsets[cpu]; + __finv_buffer(ptr, size); + pfn = percpu_pfn[cpu]; + + /* Rewrite the page tables to cache on that cpu */ + pg = pfn_to_page(pfn); + for (i = 0; i < size; i += PAGE_SIZE, ++pfn, ++pg) { + + /* Update the vmalloc mapping and page home. */ + pte_t *ptep = + virt_to_pte(NULL, (unsigned long)ptr + i); + pte_t pte = *ptep; + BUG_ON(pfn != pte_pfn(pte)); + pte = hv_pte_set_mode(pte, HV_PTE_MODE_CACHE_TILE_L3); + pte = set_remote_cache_cpu(pte, cpu); + set_pte(ptep, pte); + + /* Update the lowmem mapping for consistency. */ + lowmem_va = (unsigned long)pfn_to_kaddr(pfn); + ptep = virt_to_pte(NULL, lowmem_va); + if (pte_huge(*ptep)) { + printk(KERN_DEBUG "early shatter of huge page" + " at %#lx\n", lowmem_va); + shatter_pmd((pmd_t *)ptep); + ptep = virt_to_pte(NULL, lowmem_va); + BUG_ON(pte_huge(*ptep)); + } + BUG_ON(pfn != pte_pfn(*ptep)); + set_pte(ptep, pte); + } + } + + /* Set our thread pointer appropriately. */ + set_my_cpu_offset(__per_cpu_offset[smp_processor_id()]); + + /* Make sure the finv's have completed. */ + mb_incoherent(); + + /* Flush the TLB so we reference it properly from here on out. */ + local_flush_tlb_all(); +} + +static struct resource data_resource = { + .name = "Kernel data", + .start = 0, + .end = 0, + .flags = IORESOURCE_BUSY | IORESOURCE_MEM +}; + +static struct resource code_resource = { + .name = "Kernel code", + .start = 0, + .end = 0, + .flags = IORESOURCE_BUSY | IORESOURCE_MEM +}; + +/* + * We reserve all resources above 4GB so that PCI won't try to put + * mappings above 4GB; the standard allows that for some devices but + * the probing code trunates values to 32 bits. + */ +#ifdef CONFIG_PCI +static struct resource* __init +insert_non_bus_resource(void) +{ + struct resource *res = + kzalloc(sizeof(struct resource), GFP_ATOMIC); + res->name = "Non-Bus Physical Address Space"; + res->start = (1ULL << 32); + res->end = -1LL; + res->flags = IORESOURCE_BUSY | IORESOURCE_MEM; + if (insert_resource(&iomem_resource, res)) { + kfree(res); + return NULL; + } + return res; +} +#endif + +static struct resource* __init +insert_ram_resource(u64 start_pfn, u64 end_pfn) +{ + struct resource *res = + kzalloc(sizeof(struct resource), GFP_ATOMIC); + res->name = "System RAM"; + res->start = start_pfn << PAGE_SHIFT; + res->end = (end_pfn << PAGE_SHIFT) - 1; + res->flags = IORESOURCE_BUSY | IORESOURCE_MEM; + if (insert_resource(&iomem_resource, res)) { + kfree(res); + return NULL; + } + return res; +} + +/* + * Request address space for all standard resources + * + * If the system includes PCI root complex drivers, we need to create + * a window just below 4GB where PCI BARs can be mapped. + */ +static int __init request_standard_resources(void) +{ + int i; + enum { CODE_DELTA = MEM_SV_INTRPT - PAGE_OFFSET }; + + iomem_resource.end = -1LL; +#ifdef CONFIG_PCI + insert_non_bus_resource(); +#endif + + for_each_online_node(i) { + u64 start_pfn = node_start_pfn[i]; + u64 end_pfn = node_end_pfn[i]; + +#ifdef CONFIG_PCI + if (start_pfn <= pci_reserve_start_pfn && + end_pfn > pci_reserve_start_pfn) { + if (end_pfn > pci_reserve_end_pfn) + insert_ram_resource(pci_reserve_end_pfn, + end_pfn); + end_pfn = pci_reserve_start_pfn; + } +#endif + insert_ram_resource(start_pfn, end_pfn); + } + + code_resource.start = __pa(_text - CODE_DELTA); + code_resource.end = __pa(_etext - CODE_DELTA)-1; + data_resource.start = __pa(_sdata); + data_resource.end = __pa(_end)-1; + + insert_resource(&iomem_resource, &code_resource); + insert_resource(&iomem_resource, &data_resource); + +#ifdef CONFIG_KEXEC + insert_resource(&iomem_resource, &crashk_res); +#endif + + return 0; +} + +subsys_initcall(request_standard_resources); diff --git a/arch/tile/kernel/signal.c b/arch/tile/kernel/signal.c new file mode 100644 index 00000000..f79d4b88 --- /dev/null +++ b/arch/tile/kernel/signal.c @@ -0,0 +1,476 @@ +/* + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#include <linux/sched.h> +#include <linux/mm.h> +#include <linux/smp.h> +#include <linux/kernel.h> +#include <linux/signal.h> +#include <linux/errno.h> +#include <linux/wait.h> +#include <linux/unistd.h> +#include <linux/stddef.h> +#include <linux/personality.h> +#include <linux/suspend.h> +#include <linux/ptrace.h> +#include <linux/elf.h> +#include <linux/compat.h> +#include <linux/syscalls.h> +#include <linux/uaccess.h> +#include <asm/processor.h> +#include <asm/ucontext.h> +#include <asm/sigframe.h> +#include <asm/syscalls.h> +#include <arch/interrupts.h> + +#define DEBUG_SIG 0 + +#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) + +SYSCALL_DEFINE3(sigaltstack, const stack_t __user *, uss, + stack_t __user *, uoss, struct pt_regs *, regs) +{ + return do_sigaltstack(uss, uoss, regs->sp); +} + + +/* + * Do a signal return; undo the signal stack. + */ + +int restore_sigcontext(struct pt_regs *regs, + struct sigcontext __user *sc) +{ + int err = 0; + int i; + + /* Always make any pending restarted system calls return -EINTR */ + current_thread_info()->restart_block.fn = do_no_restart_syscall; + + /* + * Enforce that sigcontext is like pt_regs, and doesn't mess + * up our stack alignment rules. + */ + BUILD_BUG_ON(sizeof(struct sigcontext) != sizeof(struct pt_regs)); + BUILD_BUG_ON(sizeof(struct sigcontext) % 8 != 0); + + for (i = 0; i < sizeof(struct pt_regs)/sizeof(long); ++i) + err |= __get_user(regs->regs[i], &sc->gregs[i]); + + /* Ensure that the PL is always set to USER_PL. */ + regs->ex1 = PL_ICS_EX1(USER_PL, EX1_ICS(regs->ex1)); + + regs->faultnum = INT_SWINT_1_SIGRETURN; + + return err; +} + +void signal_fault(const char *type, struct pt_regs *regs, + void __user *frame, int sig) +{ + trace_unhandled_signal(type, regs, (unsigned long)frame, SIGSEGV); + force_sigsegv(sig, current); +} + +/* The assembly shim for this function arranges to ignore the return value. */ +SYSCALL_DEFINE1(rt_sigreturn, struct pt_regs *, regs) +{ + struct rt_sigframe __user *frame = + (struct rt_sigframe __user *)(regs->sp); + sigset_t set; + + if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) + goto badframe; + if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) + goto badframe; + + sigdelsetmask(&set, ~_BLOCKABLE); + set_current_blocked(&set); + + if (restore_sigcontext(regs, &frame->uc.uc_mcontext)) + goto badframe; + + if (do_sigaltstack(&frame->uc.uc_stack, NULL, regs->sp) == -EFAULT) + goto badframe; + + return 0; + +badframe: + signal_fault("bad sigreturn frame", regs, frame, 0); + return 0; +} + +/* + * Set up a signal frame. + */ + +int setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs) +{ + int i, err = 0; + + for (i = 0; i < sizeof(struct pt_regs)/sizeof(long); ++i) + err |= __put_user(regs->regs[i], &sc->gregs[i]); + + return err; +} + +/* + * Determine which stack to use.. + */ +static inline void __user *get_sigframe(struct k_sigaction *ka, + struct pt_regs *regs, + size_t frame_size) +{ + unsigned long sp; + + /* Default to using normal stack */ + sp = regs->sp; + + /* + * If we are on the alternate signal stack and would overflow + * it, don't. Return an always-bogus address instead so we + * will die with SIGSEGV. + */ + if (on_sig_stack(sp) && !likely(on_sig_stack(sp - frame_size))) + return (void __user __force *)-1UL; + + /* This is the X/Open sanctioned signal stack switching. */ + if (ka->sa.sa_flags & SA_ONSTACK) { + if (sas_ss_flags(sp) == 0) + sp = current->sas_ss_sp + current->sas_ss_size; + } + + sp -= frame_size; + /* + * Align the stack pointer according to the TILE ABI, + * i.e. so that on function entry (sp & 15) == 0. + */ + sp &= -16UL; + return (void __user *) sp; +} + +static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, + sigset_t *set, struct pt_regs *regs) +{ + unsigned long restorer; + struct rt_sigframe __user *frame; + int err = 0; + int usig; + + frame = get_sigframe(ka, regs, sizeof(*frame)); + + if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) + goto give_sigsegv; + + usig = current_thread_info()->exec_domain + && current_thread_info()->exec_domain->signal_invmap + && sig < 32 + ? current_thread_info()->exec_domain->signal_invmap[sig] + : sig; + + /* Always write at least the signal number for the stack backtracer. */ + if (ka->sa.sa_flags & SA_SIGINFO) { + /* At sigreturn time, restore the callee-save registers too. */ + err |= copy_siginfo_to_user(&frame->info, info); + regs->flags |= PT_FLAGS_RESTORE_REGS; + } else { + err |= __put_user(info->si_signo, &frame->info.si_signo); + } + + /* Create the ucontext. */ + err |= __clear_user(&frame->save_area, sizeof(frame->save_area)); + err |= __put_user(0, &frame->uc.uc_flags); + err |= __put_user(NULL, &frame->uc.uc_link); + err |= __put_user((void __user *)(current->sas_ss_sp), + &frame->uc.uc_stack.ss_sp); + err |= __put_user(sas_ss_flags(regs->sp), + &frame->uc.uc_stack.ss_flags); + err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size); + err |= setup_sigcontext(&frame->uc.uc_mcontext, regs); + err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); + if (err) + goto give_sigsegv; + + restorer = VDSO_BASE; + if (ka->sa.sa_flags & SA_RESTORER) + restorer = (unsigned long) ka->sa.sa_restorer; + + /* + * Set up registers for signal handler. + * Registers that we don't modify keep the value they had from + * user-space at the time we took the signal. + * We always pass siginfo and mcontext, regardless of SA_SIGINFO, + * since some things rely on this (e.g. glibc's debug/segfault.c). + */ + regs->pc = (unsigned long) ka->sa.sa_handler; + regs->ex1 = PL_ICS_EX1(USER_PL, 1); /* set crit sec in handler */ + regs->sp = (unsigned long) frame; + regs->lr = restorer; + regs->regs[0] = (unsigned long) usig; + regs->regs[1] = (unsigned long) &frame->info; + regs->regs[2] = (unsigned long) &frame->uc; + regs->flags |= PT_FLAGS_CALLER_SAVES; + + /* + * Notify any tracer that was single-stepping it. + * The tracer may want to single-step inside the + * handler too. + */ + if (test_thread_flag(TIF_SINGLESTEP)) + ptrace_notify(SIGTRAP); + + return 0; + +give_sigsegv: + signal_fault("bad setup frame", regs, frame, sig); + return -EFAULT; +} + +/* + * OK, we're invoking a handler + */ + +static int handle_signal(unsigned long sig, siginfo_t *info, + struct k_sigaction *ka, sigset_t *oldset, + struct pt_regs *regs) +{ + int ret; + + /* Are we from a system call? */ + if (regs->faultnum == INT_SWINT_1) { + /* If so, check system call restarting.. */ + switch (regs->regs[0]) { + case -ERESTART_RESTARTBLOCK: + case -ERESTARTNOHAND: + regs->regs[0] = -EINTR; + break; + + case -ERESTARTSYS: + if (!(ka->sa.sa_flags & SA_RESTART)) { + regs->regs[0] = -EINTR; + break; + } + /* fallthrough */ + case -ERESTARTNOINTR: + /* Reload caller-saves to restore r0..r5 and r10. */ + regs->flags |= PT_FLAGS_CALLER_SAVES; + regs->regs[0] = regs->orig_r0; + regs->pc -= 8; + } + } + + /* Set up the stack frame */ +#ifdef CONFIG_COMPAT + if (is_compat_task()) + ret = compat_setup_rt_frame(sig, ka, info, oldset, regs); + else +#endif + ret = setup_rt_frame(sig, ka, info, oldset, regs); + if (ret == 0) { + /* This code is only called from system calls or from + * the work_pending path in the return-to-user code, and + * either way we can re-enable interrupts unconditionally. + */ + block_sigmask(ka, sig); + } + + return ret; +} + +/* + * Note that 'init' is a special process: it doesn't get signals it doesn't + * want to handle. Thus you cannot kill init even with a SIGKILL even by + * mistake. + */ +void do_signal(struct pt_regs *regs) +{ + siginfo_t info; + int signr; + struct k_sigaction ka; + sigset_t *oldset; + + /* + * i386 will check if we're coming from kernel mode and bail out + * here. In my experience this just turns weird crashes into + * weird spin-hangs. But if we find a case where this seems + * helpful, we can reinstate the check on "!user_mode(regs)". + */ + + if (current_thread_info()->status & TS_RESTORE_SIGMASK) + oldset = ¤t->saved_sigmask; + else + oldset = ¤t->blocked; + + signr = get_signal_to_deliver(&info, &ka, regs, NULL); + if (signr > 0) { + /* Whee! Actually deliver the signal. */ + if (handle_signal(signr, &info, &ka, oldset, regs) == 0) { + /* + * A signal was successfully delivered; the saved + * sigmask will have been stored in the signal frame, + * and will be restored by sigreturn, so we can simply + * clear the TS_RESTORE_SIGMASK flag. + */ + current_thread_info()->status &= ~TS_RESTORE_SIGMASK; + } + + goto done; + } + + /* Did we come from a system call? */ + if (regs->faultnum == INT_SWINT_1) { + /* Restart the system call - no handlers present */ + switch (regs->regs[0]) { + case -ERESTARTNOHAND: + case -ERESTARTSYS: + case -ERESTARTNOINTR: + regs->flags |= PT_FLAGS_CALLER_SAVES; + regs->regs[0] = regs->orig_r0; + regs->pc -= 8; + break; + + case -ERESTART_RESTARTBLOCK: + regs->flags |= PT_FLAGS_CALLER_SAVES; + regs->regs[TREG_SYSCALL_NR] = __NR_restart_syscall; + regs->pc -= 8; + break; + } + } + + /* If there's no signal to deliver, just put the saved sigmask back. */ + if (current_thread_info()->status & TS_RESTORE_SIGMASK) { + current_thread_info()->status &= ~TS_RESTORE_SIGMASK; + sigprocmask(SIG_SETMASK, ¤t->saved_sigmask, NULL); + } + +done: + /* Avoid double syscall restart if there are nested signals. */ + regs->faultnum = INT_SWINT_1_SIGRETURN; +} + +int show_unhandled_signals = 1; + +static int __init crashinfo(char *str) +{ + unsigned long val; + const char *word; + + if (*str == '\0') + val = 2; + else if (*str != '=' || strict_strtoul(++str, 0, &val) != 0) + return 0; + show_unhandled_signals = val; + switch (show_unhandled_signals) { + case 0: + word = "No"; + break; + case 1: + word = "One-line"; + break; + default: + word = "Detailed"; + break; + } + pr_info("%s crash reports will be generated on the console\n", word); + return 1; +} +__setup("crashinfo", crashinfo); + +static void dump_mem(void __user *address) +{ + void __user *addr; + enum { region_size = 256, bytes_per_line = 16 }; + int i, j, k; + int found_readable_mem = 0; + + pr_err("\n"); + if (!access_ok(VERIFY_READ, address, 1)) { + pr_err("Not dumping at address 0x%lx (kernel address)\n", + (unsigned long)address); + return; + } + + addr = (void __user *) + (((unsigned long)address & -bytes_per_line) - region_size/2); + if (addr > address) + addr = NULL; + for (i = 0; i < region_size; + addr += bytes_per_line, i += bytes_per_line) { + unsigned char buf[bytes_per_line]; + char line[100]; + if (copy_from_user(buf, addr, bytes_per_line)) + continue; + if (!found_readable_mem) { + pr_err("Dumping memory around address 0x%lx:\n", + (unsigned long)address); + found_readable_mem = 1; + } + j = sprintf(line, REGFMT":", (unsigned long)addr); + for (k = 0; k < bytes_per_line; ++k) + j += sprintf(&line[j], " %02x", buf[k]); + pr_err("%s\n", line); + } + if (!found_readable_mem) + pr_err("No readable memory around address 0x%lx\n", + (unsigned long)address); +} + +void trace_unhandled_signal(const char *type, struct pt_regs *regs, + unsigned long address, int sig) +{ + struct task_struct *tsk = current; + + if (show_unhandled_signals == 0) + return; + + /* If the signal is handled, don't show it here. */ + if (!is_global_init(tsk)) { + void __user *handler = + tsk->sighand->action[sig-1].sa.sa_handler; + if (handler != SIG_IGN && handler != SIG_DFL) + return; + } + + /* Rate-limit the one-line output, not the detailed output. */ + if (show_unhandled_signals <= 1 && !printk_ratelimit()) + return; + + printk("%s%s[%d]: %s at %lx pc "REGFMT" signal %d", + task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG, + tsk->comm, task_pid_nr(tsk), type, address, regs->pc, sig); + + print_vma_addr(KERN_CONT " in ", regs->pc); + + printk(KERN_CONT "\n"); + + if (show_unhandled_signals > 1) { + switch (sig) { + case SIGILL: + case SIGFPE: + case SIGSEGV: + case SIGBUS: + pr_err("User crash: signal %d," + " trap %ld, address 0x%lx\n", + sig, regs->faultnum, address); + show_regs(regs); + dump_mem((void __user *)address); + break; + default: + pr_err("User crash: signal %d, trap %ld\n", + sig, regs->faultnum); + break; + } + } +} diff --git a/arch/tile/kernel/single_step.c b/arch/tile/kernel/single_step.c new file mode 100644 index 00000000..89529c9f --- /dev/null +++ b/arch/tile/kernel/single_step.c @@ -0,0 +1,768 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * A code-rewriter that enables instruction single-stepping. + * Derived from iLib's single-stepping code. + */ + +#ifndef __tilegx__ /* Hardware support for single step unavailable. */ + +/* These functions are only used on the TILE platform */ +#include <linux/slab.h> +#include <linux/thread_info.h> +#include <linux/uaccess.h> +#include <linux/mman.h> +#include <linux/types.h> +#include <linux/err.h> +#include <asm/cacheflush.h> +#include <asm/unaligned.h> +#include <arch/abi.h> +#include <arch/opcode.h> + +#define signExtend17(val) sign_extend((val), 17) +#define TILE_X1_MASK (0xffffffffULL << 31) + +int unaligned_printk; + +static int __init setup_unaligned_printk(char *str) +{ + long val; + if (strict_strtol(str, 0, &val) != 0) + return 0; + unaligned_printk = val; + pr_info("Printk for each unaligned data accesses is %s\n", + unaligned_printk ? "enabled" : "disabled"); + return 1; +} +__setup("unaligned_printk=", setup_unaligned_printk); + +unsigned int unaligned_fixup_count; + +enum mem_op { + MEMOP_NONE, + MEMOP_LOAD, + MEMOP_STORE, + MEMOP_LOAD_POSTINCR, + MEMOP_STORE_POSTINCR +}; + +static inline tile_bundle_bits set_BrOff_X1(tile_bundle_bits n, s32 offset) +{ + tile_bundle_bits result; + + /* mask out the old offset */ + tile_bundle_bits mask = create_BrOff_X1(-1); + result = n & (~mask); + + /* or in the new offset */ + result |= create_BrOff_X1(offset); + + return result; +} + +static inline tile_bundle_bits move_X1(tile_bundle_bits n, int dest, int src) +{ + tile_bundle_bits result; + tile_bundle_bits op; + + result = n & (~TILE_X1_MASK); + + op = create_Opcode_X1(SPECIAL_0_OPCODE_X1) | + create_RRROpcodeExtension_X1(OR_SPECIAL_0_OPCODE_X1) | + create_Dest_X1(dest) | + create_SrcB_X1(TREG_ZERO) | + create_SrcA_X1(src) ; + + result |= op; + return result; +} + +static inline tile_bundle_bits nop_X1(tile_bundle_bits n) +{ + return move_X1(n, TREG_ZERO, TREG_ZERO); +} + +static inline tile_bundle_bits addi_X1( + tile_bundle_bits n, int dest, int src, int imm) +{ + n &= ~TILE_X1_MASK; + + n |= (create_SrcA_X1(src) | + create_Dest_X1(dest) | + create_Imm8_X1(imm) | + create_S_X1(0) | + create_Opcode_X1(IMM_0_OPCODE_X1) | + create_ImmOpcodeExtension_X1(ADDI_IMM_0_OPCODE_X1)); + + return n; +} + +static tile_bundle_bits rewrite_load_store_unaligned( + struct single_step_state *state, + tile_bundle_bits bundle, + struct pt_regs *regs, + enum mem_op mem_op, + int size, int sign_ext) +{ + unsigned char __user *addr; + int val_reg, addr_reg, err, val; + + /* Get address and value registers */ + if (bundle & TILEPRO_BUNDLE_Y_ENCODING_MASK) { + addr_reg = get_SrcA_Y2(bundle); + val_reg = get_SrcBDest_Y2(bundle); + } else if (mem_op == MEMOP_LOAD || mem_op == MEMOP_LOAD_POSTINCR) { + addr_reg = get_SrcA_X1(bundle); + val_reg = get_Dest_X1(bundle); + } else { + addr_reg = get_SrcA_X1(bundle); + val_reg = get_SrcB_X1(bundle); + } + + /* + * If registers are not GPRs, don't try to handle it. + * + * FIXME: we could handle non-GPR loads by getting the real value + * from memory, writing it to the single step buffer, using a + * temp_reg to hold a pointer to that memory, then executing that + * instruction and resetting temp_reg. For non-GPR stores, it's a + * little trickier; we could use the single step buffer for that + * too, but we'd have to add some more state bits so that we could + * call back in here to copy that value to the real target. For + * now, we just handle the simple case. + */ + if ((val_reg >= PTREGS_NR_GPRS && + (val_reg != TREG_ZERO || + mem_op == MEMOP_LOAD || + mem_op == MEMOP_LOAD_POSTINCR)) || + addr_reg >= PTREGS_NR_GPRS) + return bundle; + + /* If it's aligned, don't handle it specially */ + addr = (void __user *)regs->regs[addr_reg]; + if (((unsigned long)addr % size) == 0) + return bundle; + + /* + * Return SIGBUS with the unaligned address, if requested. + * Note that we return SIGBUS even for completely invalid addresses + * as long as they are in fact unaligned; this matches what the + * tilepro hardware would be doing, if it could provide us with the + * actual bad address in an SPR, which it doesn't. + */ + if (unaligned_fixup == 0) { + siginfo_t info = { + .si_signo = SIGBUS, + .si_code = BUS_ADRALN, + .si_addr = addr + }; + trace_unhandled_signal("unaligned trap", regs, + (unsigned long)addr, SIGBUS); + force_sig_info(info.si_signo, &info, current); + return (tilepro_bundle_bits) 0; + } + +#ifndef __LITTLE_ENDIAN +# error We assume little-endian representation with copy_xx_user size 2 here +#endif + /* Handle unaligned load/store */ + if (mem_op == MEMOP_LOAD || mem_op == MEMOP_LOAD_POSTINCR) { + unsigned short val_16; + switch (size) { + case 2: + err = copy_from_user(&val_16, addr, sizeof(val_16)); + val = sign_ext ? ((short)val_16) : val_16; + break; + case 4: + err = copy_from_user(&val, addr, sizeof(val)); + break; + default: + BUG(); + } + if (err == 0) { + state->update_reg = val_reg; + state->update_value = val; + state->update = 1; + } + } else { + val = (val_reg == TREG_ZERO) ? 0 : regs->regs[val_reg]; + err = copy_to_user(addr, &val, size); + } + + if (err) { + siginfo_t info = { + .si_signo = SIGSEGV, + .si_code = SEGV_MAPERR, + .si_addr = addr + }; + trace_unhandled_signal("segfault", regs, + (unsigned long)addr, SIGSEGV); + force_sig_info(info.si_signo, &info, current); + return (tile_bundle_bits) 0; + } + + if (unaligned_printk || unaligned_fixup_count == 0) { + pr_info("Process %d/%s: PC %#lx: Fixup of" + " unaligned %s at %#lx.\n", + current->pid, current->comm, regs->pc, + (mem_op == MEMOP_LOAD || + mem_op == MEMOP_LOAD_POSTINCR) ? + "load" : "store", + (unsigned long)addr); + if (!unaligned_printk) { +#define P pr_info +P("\n"); +P("Unaligned fixups in the kernel will slow your application considerably.\n"); +P("To find them, write a \"1\" to /proc/sys/tile/unaligned_fixup/printk,\n"); +P("which requests the kernel show all unaligned fixups, or write a \"0\"\n"); +P("to /proc/sys/tile/unaligned_fixup/enabled, in which case each unaligned\n"); +P("access will become a SIGBUS you can debug. No further warnings will be\n"); +P("shown so as to avoid additional slowdown, but you can track the number\n"); +P("of fixups performed via /proc/sys/tile/unaligned_fixup/count.\n"); +P("Use the tile-addr2line command (see \"info addr2line\") to decode PCs.\n"); +P("\n"); +#undef P + } + } + ++unaligned_fixup_count; + + if (bundle & TILEPRO_BUNDLE_Y_ENCODING_MASK) { + /* Convert the Y2 instruction to a prefetch. */ + bundle &= ~(create_SrcBDest_Y2(-1) | + create_Opcode_Y2(-1)); + bundle |= (create_SrcBDest_Y2(TREG_ZERO) | + create_Opcode_Y2(LW_OPCODE_Y2)); + /* Replace the load postincr with an addi */ + } else if (mem_op == MEMOP_LOAD_POSTINCR) { + bundle = addi_X1(bundle, addr_reg, addr_reg, + get_Imm8_X1(bundle)); + /* Replace the store postincr with an addi */ + } else if (mem_op == MEMOP_STORE_POSTINCR) { + bundle = addi_X1(bundle, addr_reg, addr_reg, + get_Dest_Imm8_X1(bundle)); + } else { + /* Convert the X1 instruction to a nop. */ + bundle &= ~(create_Opcode_X1(-1) | + create_UnShOpcodeExtension_X1(-1) | + create_UnOpcodeExtension_X1(-1)); + bundle |= (create_Opcode_X1(SHUN_0_OPCODE_X1) | + create_UnShOpcodeExtension_X1( + UN_0_SHUN_0_OPCODE_X1) | + create_UnOpcodeExtension_X1( + NOP_UN_0_SHUN_0_OPCODE_X1)); + } + + return bundle; +} + +/* + * Called after execve() has started the new image. This allows us + * to reset the info state. Note that the the mmap'ed memory, if there + * was any, has already been unmapped by the exec. + */ +void single_step_execve(void) +{ + struct thread_info *ti = current_thread_info(); + kfree(ti->step_state); + ti->step_state = NULL; +} + +/** + * single_step_once() - entry point when single stepping has been triggered. + * @regs: The machine register state + * + * When we arrive at this routine via a trampoline, the single step + * engine copies the executing bundle to the single step buffer. + * If the instruction is a condition branch, then the target is + * reset to one past the next instruction. If the instruction + * sets the lr, then that is noted. If the instruction is a jump + * or call, then the new target pc is preserved and the current + * bundle instruction set to null. + * + * The necessary post-single-step rewriting information is stored in + * single_step_state-> We use data segment values because the + * stack will be rewound when we run the rewritten single-stepped + * instruction. + */ +void single_step_once(struct pt_regs *regs) +{ + extern tile_bundle_bits __single_step_ill_insn; + extern tile_bundle_bits __single_step_j_insn; + extern tile_bundle_bits __single_step_addli_insn; + extern tile_bundle_bits __single_step_auli_insn; + struct thread_info *info = (void *)current_thread_info(); + struct single_step_state *state = info->step_state; + int is_single_step = test_ti_thread_flag(info, TIF_SINGLESTEP); + tile_bundle_bits __user *buffer, *pc; + tile_bundle_bits bundle; + int temp_reg; + int target_reg = TREG_LR; + int err; + enum mem_op mem_op = MEMOP_NONE; + int size = 0, sign_ext = 0; /* happy compiler */ + + asm( +" .pushsection .rodata.single_step\n" +" .align 8\n" +" .globl __single_step_ill_insn\n" +"__single_step_ill_insn:\n" +" ill\n" +" .globl __single_step_addli_insn\n" +"__single_step_addli_insn:\n" +" { nop; addli r0, zero, 0 }\n" +" .globl __single_step_auli_insn\n" +"__single_step_auli_insn:\n" +" { nop; auli r0, r0, 0 }\n" +" .globl __single_step_j_insn\n" +"__single_step_j_insn:\n" +" j .\n" +" .popsection\n" + ); + + /* + * Enable interrupts here to allow touching userspace and the like. + * The callers expect this: do_trap() already has interrupts + * enabled, and do_work_pending() handles functions that enable + * interrupts internally. + */ + local_irq_enable(); + + if (state == NULL) { + /* allocate a page of writable, executable memory */ + state = kmalloc(sizeof(struct single_step_state), GFP_KERNEL); + if (state == NULL) { + pr_err("Out of kernel memory trying to single-step\n"); + return; + } + + /* allocate a cache line of writable, executable memory */ + buffer = (void __user *) vm_mmap(NULL, 0, 64, + PROT_EXEC | PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, + 0); + + if (IS_ERR((void __force *)buffer)) { + kfree(state); + pr_err("Out of kernel pages trying to single-step\n"); + return; + } + + state->buffer = buffer; + state->is_enabled = 0; + + info->step_state = state; + + /* Validate our stored instruction patterns */ + BUG_ON(get_Opcode_X1(__single_step_addli_insn) != + ADDLI_OPCODE_X1); + BUG_ON(get_Opcode_X1(__single_step_auli_insn) != + AULI_OPCODE_X1); + BUG_ON(get_SrcA_X1(__single_step_addli_insn) != TREG_ZERO); + BUG_ON(get_Dest_X1(__single_step_addli_insn) != 0); + BUG_ON(get_JOffLong_X1(__single_step_j_insn) != 0); + } + + /* + * If we are returning from a syscall, we still haven't hit the + * "ill" for the swint1 instruction. So back the PC up to be + * pointing at the swint1, but we'll actually return directly + * back to the "ill" so we come back in via SIGILL as if we + * had "executed" the swint1 without ever being in kernel space. + */ + if (regs->faultnum == INT_SWINT_1) + regs->pc -= 8; + + pc = (tile_bundle_bits __user *)(regs->pc); + if (get_user(bundle, pc) != 0) { + pr_err("Couldn't read instruction at %p trying to step\n", pc); + return; + } + + /* We'll follow the instruction with 2 ill op bundles */ + state->orig_pc = (unsigned long)pc; + state->next_pc = (unsigned long)(pc + 1); + state->branch_next_pc = 0; + state->update = 0; + + if (!(bundle & TILEPRO_BUNDLE_Y_ENCODING_MASK)) { + /* two wide, check for control flow */ + int opcode = get_Opcode_X1(bundle); + + switch (opcode) { + /* branches */ + case BRANCH_OPCODE_X1: + { + s32 offset = signExtend17(get_BrOff_X1(bundle)); + + /* + * For branches, we use a rewriting trick to let the + * hardware evaluate whether the branch is taken or + * untaken. We record the target offset and then + * rewrite the branch instruction to target 1 insn + * ahead if the branch is taken. We then follow the + * rewritten branch with two bundles, each containing + * an "ill" instruction. The supervisor examines the + * pc after the single step code is executed, and if + * the pc is the first ill instruction, then the + * branch (if any) was not taken. If the pc is the + * second ill instruction, then the branch was + * taken. The new pc is computed for these cases, and + * inserted into the registers for the thread. If + * the pc is the start of the single step code, then + * an exception or interrupt was taken before the + * code started processing, and the same "original" + * pc is restored. This change, different from the + * original implementation, has the advantage of + * executing a single user instruction. + */ + state->branch_next_pc = (unsigned long)(pc + offset); + + /* rewrite branch offset to go forward one bundle */ + bundle = set_BrOff_X1(bundle, 2); + } + break; + + /* jumps */ + case JALB_OPCODE_X1: + case JALF_OPCODE_X1: + state->update = 1; + state->next_pc = + (unsigned long) (pc + get_JOffLong_X1(bundle)); + break; + + case JB_OPCODE_X1: + case JF_OPCODE_X1: + state->next_pc = + (unsigned long) (pc + get_JOffLong_X1(bundle)); + bundle = nop_X1(bundle); + break; + + case SPECIAL_0_OPCODE_X1: + switch (get_RRROpcodeExtension_X1(bundle)) { + /* jump-register */ + case JALRP_SPECIAL_0_OPCODE_X1: + case JALR_SPECIAL_0_OPCODE_X1: + state->update = 1; + state->next_pc = + regs->regs[get_SrcA_X1(bundle)]; + break; + + case JRP_SPECIAL_0_OPCODE_X1: + case JR_SPECIAL_0_OPCODE_X1: + state->next_pc = + regs->regs[get_SrcA_X1(bundle)]; + bundle = nop_X1(bundle); + break; + + case LNK_SPECIAL_0_OPCODE_X1: + state->update = 1; + target_reg = get_Dest_X1(bundle); + break; + + /* stores */ + case SH_SPECIAL_0_OPCODE_X1: + mem_op = MEMOP_STORE; + size = 2; + break; + + case SW_SPECIAL_0_OPCODE_X1: + mem_op = MEMOP_STORE; + size = 4; + break; + } + break; + + /* loads and iret */ + case SHUN_0_OPCODE_X1: + if (get_UnShOpcodeExtension_X1(bundle) == + UN_0_SHUN_0_OPCODE_X1) { + switch (get_UnOpcodeExtension_X1(bundle)) { + case LH_UN_0_SHUN_0_OPCODE_X1: + mem_op = MEMOP_LOAD; + size = 2; + sign_ext = 1; + break; + + case LH_U_UN_0_SHUN_0_OPCODE_X1: + mem_op = MEMOP_LOAD; + size = 2; + sign_ext = 0; + break; + + case LW_UN_0_SHUN_0_OPCODE_X1: + mem_op = MEMOP_LOAD; + size = 4; + break; + + case IRET_UN_0_SHUN_0_OPCODE_X1: + { + unsigned long ex0_0 = __insn_mfspr( + SPR_EX_CONTEXT_0_0); + unsigned long ex0_1 = __insn_mfspr( + SPR_EX_CONTEXT_0_1); + /* + * Special-case it if we're iret'ing + * to PL0 again. Otherwise just let + * it run and it will generate SIGILL. + */ + if (EX1_PL(ex0_1) == USER_PL) { + state->next_pc = ex0_0; + regs->ex1 = ex0_1; + bundle = nop_X1(bundle); + } + } + } + } + break; + +#if CHIP_HAS_WH64() + /* postincrement operations */ + case IMM_0_OPCODE_X1: + switch (get_ImmOpcodeExtension_X1(bundle)) { + case LWADD_IMM_0_OPCODE_X1: + mem_op = MEMOP_LOAD_POSTINCR; + size = 4; + break; + + case LHADD_IMM_0_OPCODE_X1: + mem_op = MEMOP_LOAD_POSTINCR; + size = 2; + sign_ext = 1; + break; + + case LHADD_U_IMM_0_OPCODE_X1: + mem_op = MEMOP_LOAD_POSTINCR; + size = 2; + sign_ext = 0; + break; + + case SWADD_IMM_0_OPCODE_X1: + mem_op = MEMOP_STORE_POSTINCR; + size = 4; + break; + + case SHADD_IMM_0_OPCODE_X1: + mem_op = MEMOP_STORE_POSTINCR; + size = 2; + break; + + default: + break; + } + break; +#endif /* CHIP_HAS_WH64() */ + } + + if (state->update) { + /* + * Get an available register. We start with a + * bitmask with 1's for available registers. + * We truncate to the low 32 registers since + * we are guaranteed to have set bits in the + * low 32 bits, then use ctz to pick the first. + */ + u32 mask = (u32) ~((1ULL << get_Dest_X0(bundle)) | + (1ULL << get_SrcA_X0(bundle)) | + (1ULL << get_SrcB_X0(bundle)) | + (1ULL << target_reg)); + temp_reg = __builtin_ctz(mask); + state->update_reg = temp_reg; + state->update_value = regs->regs[temp_reg]; + regs->regs[temp_reg] = (unsigned long) (pc+1); + regs->flags |= PT_FLAGS_RESTORE_REGS; + bundle = move_X1(bundle, target_reg, temp_reg); + } + } else { + int opcode = get_Opcode_Y2(bundle); + + switch (opcode) { + /* loads */ + case LH_OPCODE_Y2: + mem_op = MEMOP_LOAD; + size = 2; + sign_ext = 1; + break; + + case LH_U_OPCODE_Y2: + mem_op = MEMOP_LOAD; + size = 2; + sign_ext = 0; + break; + + case LW_OPCODE_Y2: + mem_op = MEMOP_LOAD; + size = 4; + break; + + /* stores */ + case SH_OPCODE_Y2: + mem_op = MEMOP_STORE; + size = 2; + break; + + case SW_OPCODE_Y2: + mem_op = MEMOP_STORE; + size = 4; + break; + } + } + + /* + * Check if we need to rewrite an unaligned load/store. + * Returning zero is a special value meaning we need to SIGSEGV. + */ + if (mem_op != MEMOP_NONE && unaligned_fixup >= 0) { + bundle = rewrite_load_store_unaligned(state, bundle, regs, + mem_op, size, sign_ext); + if (bundle == 0) + return; + } + + /* write the bundle to our execution area */ + buffer = state->buffer; + err = __put_user(bundle, buffer++); + + /* + * If we're really single-stepping, we take an INT_ILL after. + * If we're just handling an unaligned access, we can just + * jump directly back to where we were in user code. + */ + if (is_single_step) { + err |= __put_user(__single_step_ill_insn, buffer++); + err |= __put_user(__single_step_ill_insn, buffer++); + } else { + long delta; + + if (state->update) { + /* We have some state to update; do it inline */ + int ha16; + bundle = __single_step_addli_insn; + bundle |= create_Dest_X1(state->update_reg); + bundle |= create_Imm16_X1(state->update_value); + err |= __put_user(bundle, buffer++); + bundle = __single_step_auli_insn; + bundle |= create_Dest_X1(state->update_reg); + bundle |= create_SrcA_X1(state->update_reg); + ha16 = (state->update_value + 0x8000) >> 16; + bundle |= create_Imm16_X1(ha16); + err |= __put_user(bundle, buffer++); + state->update = 0; + } + + /* End with a jump back to the next instruction */ + delta = ((regs->pc + TILE_BUNDLE_SIZE_IN_BYTES) - + (unsigned long)buffer) >> + TILE_LOG2_BUNDLE_ALIGNMENT_IN_BYTES; + bundle = __single_step_j_insn; + bundle |= create_JOffLong_X1(delta); + err |= __put_user(bundle, buffer++); + } + + if (err) { + pr_err("Fault when writing to single-step buffer\n"); + return; + } + + /* + * Flush the buffer. + * We do a local flush only, since this is a thread-specific buffer. + */ + __flush_icache_range((unsigned long)state->buffer, + (unsigned long)buffer); + + /* Indicate enabled */ + state->is_enabled = is_single_step; + regs->pc = (unsigned long)state->buffer; + + /* Fault immediately if we are coming back from a syscall. */ + if (regs->faultnum == INT_SWINT_1) + regs->pc += 8; +} + +#else +#include <linux/smp.h> +#include <linux/ptrace.h> +#include <arch/spr_def.h> + +static DEFINE_PER_CPU(unsigned long, ss_saved_pc); + + +/* + * Called directly on the occasion of an interrupt. + * + * If the process doesn't have single step set, then we use this as an + * opportunity to turn single step off. + * + * It has been mentioned that we could conditionally turn off single stepping + * on each entry into the kernel and rely on single_step_once to turn it + * on for the processes that matter (as we already do), but this + * implementation is somewhat more efficient in that we muck with registers + * once on a bum interrupt rather than on every entry into the kernel. + * + * If SINGLE_STEP_CONTROL_K has CANCELED set, then an interrupt occurred, + * so we have to run through this process again before we can say that an + * instruction has executed. + * + * swint will set CANCELED, but it's a legitimate instruction. Fortunately + * it changes the PC. If it hasn't changed, then we know that the interrupt + * wasn't generated by swint and we'll need to run this process again before + * we can say an instruction has executed. + * + * If either CANCELED == 0 or the PC's changed, we send out SIGTRAPs and get + * on with our lives. + */ + +void gx_singlestep_handle(struct pt_regs *regs, int fault_num) +{ + unsigned long *ss_pc = &__get_cpu_var(ss_saved_pc); + struct thread_info *info = (void *)current_thread_info(); + int is_single_step = test_ti_thread_flag(info, TIF_SINGLESTEP); + unsigned long control = __insn_mfspr(SPR_SINGLE_STEP_CONTROL_K); + + if (is_single_step == 0) { + __insn_mtspr(SPR_SINGLE_STEP_EN_K_K, 0); + + } else if ((*ss_pc != regs->pc) || + (!(control & SPR_SINGLE_STEP_CONTROL_1__CANCELED_MASK))) { + + ptrace_notify(SIGTRAP); + control |= SPR_SINGLE_STEP_CONTROL_1__CANCELED_MASK; + control |= SPR_SINGLE_STEP_CONTROL_1__INHIBIT_MASK; + __insn_mtspr(SPR_SINGLE_STEP_CONTROL_K, control); + } +} + + +/* + * Called from need_singlestep. Set up the control registers and the enable + * register, then return back. + */ + +void single_step_once(struct pt_regs *regs) +{ + unsigned long *ss_pc = &__get_cpu_var(ss_saved_pc); + unsigned long control = __insn_mfspr(SPR_SINGLE_STEP_CONTROL_K); + + *ss_pc = regs->pc; + control |= SPR_SINGLE_STEP_CONTROL_1__CANCELED_MASK; + control |= SPR_SINGLE_STEP_CONTROL_1__INHIBIT_MASK; + __insn_mtspr(SPR_SINGLE_STEP_CONTROL_K, control); + __insn_mtspr(SPR_SINGLE_STEP_EN_K_K, 1 << USER_PL); +} + +void single_step_execve(void) +{ + /* Nothing */ +} + +#endif /* !__tilegx__ */ diff --git a/arch/tile/kernel/smp.c b/arch/tile/kernel/smp.c new file mode 100644 index 00000000..91da0f72 --- /dev/null +++ b/arch/tile/kernel/smp.c @@ -0,0 +1,244 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * TILE SMP support routines. + */ + +#include <linux/smp.h> +#include <linux/interrupt.h> +#include <linux/io.h> +#include <linux/irq.h> +#include <linux/module.h> +#include <asm/cacheflush.h> + +HV_Topology smp_topology __write_once; +EXPORT_SYMBOL(smp_topology); + +#if CHIP_HAS_IPI() +static unsigned long __iomem *ipi_mappings[NR_CPUS]; +#endif + + +/* + * Top-level send_IPI*() functions to send messages to other cpus. + */ + +/* Set by smp_send_stop() to avoid recursive panics. */ +static int stopping_cpus; + +static void __send_IPI_many(HV_Recipient *recip, int nrecip, int tag) +{ + int sent = 0; + while (sent < nrecip) { + int rc = hv_send_message(recip, nrecip, + (HV_VirtAddr)&tag, sizeof(tag)); + if (rc < 0) { + if (!stopping_cpus) /* avoid recursive panic */ + panic("hv_send_message returned %d", rc); + break; + } + WARN_ONCE(rc == 0, "hv_send_message() returned zero\n"); + sent += rc; + } +} + +void send_IPI_single(int cpu, int tag) +{ + HV_Recipient recip = { + .y = cpu / smp_width, + .x = cpu % smp_width, + .state = HV_TO_BE_SENT + }; + __send_IPI_many(&recip, 1, tag); +} + +void send_IPI_many(const struct cpumask *mask, int tag) +{ + HV_Recipient recip[NR_CPUS]; + int cpu; + int nrecip = 0; + int my_cpu = smp_processor_id(); + for_each_cpu(cpu, mask) { + HV_Recipient *r; + BUG_ON(cpu == my_cpu); + r = &recip[nrecip++]; + r->y = cpu / smp_width; + r->x = cpu % smp_width; + r->state = HV_TO_BE_SENT; + } + __send_IPI_many(recip, nrecip, tag); +} + +void send_IPI_allbutself(int tag) +{ + struct cpumask mask; + cpumask_copy(&mask, cpu_online_mask); + cpumask_clear_cpu(smp_processor_id(), &mask); + send_IPI_many(&mask, tag); +} + +/* + * Functions related to starting/stopping cpus. + */ + +/* Handler to start the current cpu. */ +static void smp_start_cpu_interrupt(void) +{ + get_irq_regs()->pc = start_cpu_function_addr; +} + +/* Handler to stop the current cpu. */ +static void smp_stop_cpu_interrupt(void) +{ + set_cpu_online(smp_processor_id(), 0); + arch_local_irq_disable_all(); + for (;;) + asm("nap; nop"); +} + +/* This function calls the 'stop' function on all other CPUs in the system. */ +void smp_send_stop(void) +{ + stopping_cpus = 1; + send_IPI_allbutself(MSG_TAG_STOP_CPU); +} + +/* On panic, just wait; we may get an smp_send_stop() later on. */ +void panic_smp_self_stop(void) +{ + while (1) + asm("nap; nop"); +} + +/* + * Dispatch code called from hv_message_intr() for HV_MSG_TILE hv messages. + */ +void evaluate_message(int tag) +{ + switch (tag) { + case MSG_TAG_START_CPU: /* Start up a cpu */ + smp_start_cpu_interrupt(); + break; + + case MSG_TAG_STOP_CPU: /* Sent to shut down slave CPU's */ + smp_stop_cpu_interrupt(); + break; + + case MSG_TAG_CALL_FUNCTION_MANY: /* Call function on cpumask */ + generic_smp_call_function_interrupt(); + break; + + case MSG_TAG_CALL_FUNCTION_SINGLE: /* Call function on one other CPU */ + generic_smp_call_function_single_interrupt(); + break; + + default: + panic("Unknown IPI message tag %d", tag); + break; + } +} + + +/* + * flush_icache_range() code uses smp_call_function(). + */ + +struct ipi_flush { + unsigned long start; + unsigned long end; +}; + +static void ipi_flush_icache_range(void *info) +{ + struct ipi_flush *flush = (struct ipi_flush *) info; + __flush_icache_range(flush->start, flush->end); +} + +void flush_icache_range(unsigned long start, unsigned long end) +{ + struct ipi_flush flush = { start, end }; + preempt_disable(); + on_each_cpu(ipi_flush_icache_range, &flush, 1); + preempt_enable(); +} + + +/* Called when smp_send_reschedule() triggers IRQ_RESCHEDULE. */ +static irqreturn_t handle_reschedule_ipi(int irq, void *token) +{ + __get_cpu_var(irq_stat).irq_resched_count++; + scheduler_ipi(); + + return IRQ_HANDLED; +} + +static struct irqaction resched_action = { + .handler = handle_reschedule_ipi, + .name = "resched", + .dev_id = handle_reschedule_ipi /* unique token */, +}; + +void __init ipi_init(void) +{ +#if CHIP_HAS_IPI() + int cpu; + /* Map IPI trigger MMIO addresses. */ + for_each_possible_cpu(cpu) { + HV_Coord tile; + HV_PTE pte; + unsigned long offset; + + tile.x = cpu_x(cpu); + tile.y = cpu_y(cpu); + if (hv_get_ipi_pte(tile, KERNEL_PL, &pte) != 0) + panic("Failed to initialize IPI for cpu %d\n", cpu); + + offset = hv_pte_get_pfn(pte) << PAGE_SHIFT; + ipi_mappings[cpu] = ioremap_prot(offset, PAGE_SIZE, pte); + } +#endif + + /* Bind handle_reschedule_ipi() to IRQ_RESCHEDULE. */ + tile_irq_activate(IRQ_RESCHEDULE, TILE_IRQ_PERCPU); + BUG_ON(setup_irq(IRQ_RESCHEDULE, &resched_action)); +} + +#if CHIP_HAS_IPI() + +void smp_send_reschedule(int cpu) +{ + WARN_ON(cpu_is_offline(cpu)); + + /* + * We just want to do an MMIO store. The traditional writeq() + * functions aren't really correct here, since they're always + * directed at the PCI shim. For now, just do a raw store, + * casting away the __iomem attribute. + */ + ((unsigned long __force *)ipi_mappings[cpu])[IRQ_RESCHEDULE] = 0; +} + +#else + +void smp_send_reschedule(int cpu) +{ + HV_Coord coord; + + WARN_ON(cpu_is_offline(cpu)); + + coord.y = cpu_y(cpu); + coord.x = cpu_x(cpu); + hv_trigger_ipi(coord, IRQ_RESCHEDULE); +} + +#endif /* CHIP_HAS_IPI() */ diff --git a/arch/tile/kernel/smpboot.c b/arch/tile/kernel/smpboot.c new file mode 100644 index 00000000..172aef7d --- /dev/null +++ b/arch/tile/kernel/smpboot.c @@ -0,0 +1,279 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/sched.h> +#include <linux/kernel_stat.h> +#include <linux/bootmem.h> +#include <linux/notifier.h> +#include <linux/cpu.h> +#include <linux/percpu.h> +#include <linux/delay.h> +#include <linux/err.h> +#include <linux/irq.h> +#include <asm/mmu_context.h> +#include <asm/tlbflush.h> +#include <asm/sections.h> + +/* State of each CPU. */ +static DEFINE_PER_CPU(int, cpu_state) = { 0 }; + +/* The messaging code jumps to this pointer during boot-up */ +unsigned long start_cpu_function_addr; + +/* Called very early during startup to mark boot cpu as online */ +void __init smp_prepare_boot_cpu(void) +{ + int cpu = smp_processor_id(); + set_cpu_online(cpu, 1); + set_cpu_present(cpu, 1); + __get_cpu_var(cpu_state) = CPU_ONLINE; + + init_messaging(); +} + +static void start_secondary(void); + +/* + * Called at the top of init() to launch all the other CPUs. + * They run free to complete their initialization and then wait + * until they get an IPI from the boot cpu to come online. + */ +void __init smp_prepare_cpus(unsigned int max_cpus) +{ + long rc; + int cpu, cpu_count; + int boot_cpu = smp_processor_id(); + + current_thread_info()->cpu = boot_cpu; + + /* + * Pin this task to the boot CPU while we bring up the others, + * just to make sure we don't uselessly migrate as they come up. + */ + rc = sched_setaffinity(current->pid, cpumask_of(boot_cpu)); + if (rc != 0) + pr_err("Couldn't set init affinity to boot cpu (%ld)\n", rc); + + /* Print information about disabled and dataplane cpus. */ + print_disabled_cpus(); + + /* + * Tell the messaging subsystem how to respond to the + * startup message. We use a level of indirection to avoid + * confusing the linker with the fact that the messaging + * subsystem is calling __init code. + */ + start_cpu_function_addr = (unsigned long) &online_secondary; + + /* Set up thread context for all new processors. */ + cpu_count = 1; + for (cpu = 0; cpu < NR_CPUS; ++cpu) { + struct task_struct *idle; + + if (cpu == boot_cpu) + continue; + + if (!cpu_possible(cpu)) { + /* + * Make this processor do nothing on boot. + * Note that we don't give the boot_pc function + * a stack, so it has to be assembly code. + */ + per_cpu(boot_sp, cpu) = 0; + per_cpu(boot_pc, cpu) = (unsigned long) smp_nap; + continue; + } + + /* Create a new idle thread to run start_secondary() */ + idle = fork_idle(cpu); + if (IS_ERR(idle)) + panic("failed fork for CPU %d", cpu); + idle->thread.pc = (unsigned long) start_secondary; + + /* Make this thread the boot thread for this processor */ + per_cpu(boot_sp, cpu) = task_ksp0(idle); + per_cpu(boot_pc, cpu) = idle->thread.pc; + + ++cpu_count; + } + BUG_ON(cpu_count > (max_cpus ? max_cpus : 1)); + + /* Fire up the other tiles, if any */ + init_cpu_present(cpu_possible_mask); + if (cpumask_weight(cpu_present_mask) > 1) { + mb(); /* make sure all data is visible to new processors */ + hv_start_all_tiles(); + } +} + +static __initdata struct cpumask init_affinity; + +static __init int reset_init_affinity(void) +{ + long rc = sched_setaffinity(current->pid, &init_affinity); + if (rc != 0) + pr_warning("couldn't reset init affinity (%ld)\n", + rc); + return 0; +} +late_initcall(reset_init_affinity); + +static struct cpumask cpu_started __cpuinitdata; + +/* + * Activate a secondary processor. Very minimal; don't add anything + * to this path without knowing what you're doing, since SMP booting + * is pretty fragile. + */ +static void __cpuinit start_secondary(void) +{ + int cpuid = smp_processor_id(); + + /* Set our thread pointer appropriately. */ + set_my_cpu_offset(__per_cpu_offset[cpuid]); + + preempt_disable(); + + /* + * In large machines even this will slow us down, since we + * will be contending for for the printk spinlock. + */ + /* printk(KERN_DEBUG "Initializing CPU#%d\n", cpuid); */ + + /* Initialize the current asid for our first page table. */ + __get_cpu_var(current_asid) = min_asid; + + /* Set up this thread as another owner of the init_mm */ + atomic_inc(&init_mm.mm_count); + current->active_mm = &init_mm; + if (current->mm) + BUG(); + enter_lazy_tlb(&init_mm, current); + + /* Allow hypervisor messages to be received */ + init_messaging(); + local_irq_enable(); + + /* Indicate that we're ready to come up. */ + /* Must not do this before we're ready to receive messages */ + if (cpumask_test_and_set_cpu(cpuid, &cpu_started)) { + pr_warning("CPU#%d already started!\n", cpuid); + for (;;) + local_irq_enable(); + } + + smp_nap(); +} + +/* + * Bring a secondary processor online. + */ +void __cpuinit online_secondary(void) +{ + /* + * low-memory mappings have been cleared, flush them from + * the local TLBs too. + */ + local_flush_tlb(); + + BUG_ON(in_interrupt()); + + /* This must be done before setting cpu_online_mask */ + wmb(); + + notify_cpu_starting(smp_processor_id()); + + /* + * We need to hold call_lock, so there is no inconsistency + * between the time smp_call_function() determines number of + * IPI recipients, and the time when the determination is made + * for which cpus receive the IPI. Holding this + * lock helps us to not include this cpu in a currently in progress + * smp_call_function(). + */ + ipi_call_lock(); + set_cpu_online(smp_processor_id(), 1); + ipi_call_unlock(); + __get_cpu_var(cpu_state) = CPU_ONLINE; + + /* Set up tile-specific state for this cpu. */ + setup_cpu(0); + + /* Set up tile-timer clock-event device on this cpu */ + setup_tile_timer(); + + preempt_enable(); + + cpu_idle(); +} + +int __cpuinit __cpu_up(unsigned int cpu) +{ + /* Wait 5s total for all CPUs for them to come online */ + static int timeout; + for (; !cpumask_test_cpu(cpu, &cpu_started); timeout++) { + if (timeout >= 50000) { + pr_info("skipping unresponsive cpu%d\n", cpu); + local_irq_enable(); + return -EIO; + } + udelay(100); + } + + local_irq_enable(); + per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; + + /* Unleash the CPU! */ + send_IPI_single(cpu, MSG_TAG_START_CPU); + while (!cpumask_test_cpu(cpu, cpu_online_mask)) + cpu_relax(); + return 0; +} + +static void panic_start_cpu(void) +{ + panic("Received a MSG_START_CPU IPI after boot finished."); +} + +void __init smp_cpus_done(unsigned int max_cpus) +{ + int cpu, next, rc; + + /* Reset the response to a (now illegal) MSG_START_CPU IPI. */ + start_cpu_function_addr = (unsigned long) &panic_start_cpu; + + cpumask_copy(&init_affinity, cpu_online_mask); + + /* + * Pin ourselves to a single cpu in the initial affinity set + * so that kernel mappings for the rootfs are not in the dataplane, + * if set, and to avoid unnecessary migrating during bringup. + * Use the last cpu just in case the whole chip has been + * isolated from the scheduler, to keep init away from likely + * more useful user code. This also ensures that work scheduled + * via schedule_delayed_work() in the init routines will land + * on this cpu. + */ + for (cpu = cpumask_first(&init_affinity); + (next = cpumask_next(cpu, &init_affinity)) < nr_cpu_ids; + cpu = next) + ; + rc = sched_setaffinity(current->pid, cpumask_of(cpu)); + if (rc != 0) + pr_err("Couldn't set init affinity to cpu %d (%d)\n", cpu, rc); +} diff --git a/arch/tile/kernel/stack.c b/arch/tile/kernel/stack.c new file mode 100644 index 00000000..b2f44c28 --- /dev/null +++ b/arch/tile/kernel/stack.c @@ -0,0 +1,491 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/kprobes.h> +#include <linux/module.h> +#include <linux/pfn.h> +#include <linux/kallsyms.h> +#include <linux/stacktrace.h> +#include <linux/uaccess.h> +#include <linux/mmzone.h> +#include <linux/dcache.h> +#include <linux/fs.h> +#include <asm/backtrace.h> +#include <asm/page.h> +#include <asm/ucontext.h> +#include <asm/switch_to.h> +#include <asm/sigframe.h> +#include <asm/stack.h> +#include <arch/abi.h> +#include <arch/interrupts.h> + +#define KBT_ONGOING 0 /* Backtrace still ongoing */ +#define KBT_DONE 1 /* Backtrace cleanly completed */ +#define KBT_RUNNING 2 /* Can't run backtrace on a running task */ +#define KBT_LOOP 3 /* Backtrace entered a loop */ + +/* Is address on the specified kernel stack? */ +static int in_kernel_stack(struct KBacktraceIterator *kbt, unsigned long sp) +{ + ulong kstack_base = (ulong) kbt->task->stack; + if (kstack_base == 0) /* corrupt task pointer; just follow stack... */ + return sp >= PAGE_OFFSET && sp < (unsigned long)high_memory; + return sp >= kstack_base && sp < kstack_base + THREAD_SIZE; +} + +/* Callback for backtracer; basically a glorified memcpy */ +static bool read_memory_func(void *result, unsigned long address, + unsigned int size, void *vkbt) +{ + int retval; + struct KBacktraceIterator *kbt = (struct KBacktraceIterator *)vkbt; + + if (address == 0) + return 0; + if (__kernel_text_address(address)) { + /* OK to read kernel code. */ + } else if (address >= PAGE_OFFSET) { + /* We only tolerate kernel-space reads of this task's stack */ + if (!in_kernel_stack(kbt, address)) + return 0; + } else if (!kbt->is_current) { + return 0; /* can't read from other user address spaces */ + } + pagefault_disable(); + retval = __copy_from_user_inatomic(result, + (void __user __force *)address, + size); + pagefault_enable(); + return (retval == 0); +} + +/* Return a pt_regs pointer for a valid fault handler frame */ +static struct pt_regs *valid_fault_handler(struct KBacktraceIterator* kbt) +{ + const char *fault = NULL; /* happy compiler */ + char fault_buf[64]; + unsigned long sp = kbt->it.sp; + struct pt_regs *p; + + if (sp % sizeof(long) != 0) + return NULL; + if (!in_kernel_stack(kbt, sp)) + return NULL; + if (!in_kernel_stack(kbt, sp + C_ABI_SAVE_AREA_SIZE + PTREGS_SIZE-1)) + return NULL; + p = (struct pt_regs *)(sp + C_ABI_SAVE_AREA_SIZE); + if (p->faultnum == INT_SWINT_1 || p->faultnum == INT_SWINT_1_SIGRETURN) + fault = "syscall"; + else { + if (kbt->verbose) { /* else we aren't going to use it */ + snprintf(fault_buf, sizeof(fault_buf), + "interrupt %ld", p->faultnum); + fault = fault_buf; + } + } + if (EX1_PL(p->ex1) == KERNEL_PL && + __kernel_text_address(p->pc) && + in_kernel_stack(kbt, p->sp) && + p->sp >= sp) { + if (kbt->verbose) + pr_err(" <%s while in kernel mode>\n", fault); + } else if (EX1_PL(p->ex1) == USER_PL && + p->pc < PAGE_OFFSET && + p->sp < PAGE_OFFSET) { + if (kbt->verbose) + pr_err(" <%s while in user mode>\n", fault); + } else if (kbt->verbose) { + pr_err(" (odd fault: pc %#lx, sp %#lx, ex1 %#lx?)\n", + p->pc, p->sp, p->ex1); + p = NULL; + } + if (!kbt->profile || (INT_MASK(p->faultnum) & QUEUED_INTERRUPTS) == 0) + return p; + return NULL; +} + +/* Is the pc pointing to a sigreturn trampoline? */ +static int is_sigreturn(unsigned long pc) +{ + return (pc == VDSO_BASE); +} + +/* Return a pt_regs pointer for a valid signal handler frame */ +static struct pt_regs *valid_sigframe(struct KBacktraceIterator* kbt, + struct rt_sigframe* kframe) +{ + BacktraceIterator *b = &kbt->it; + + if (b->pc == VDSO_BASE && b->sp < PAGE_OFFSET && + b->sp % sizeof(long) == 0) { + int retval; + pagefault_disable(); + retval = __copy_from_user_inatomic( + kframe, (void __user __force *)b->sp, + sizeof(*kframe)); + pagefault_enable(); + if (retval != 0 || + (unsigned int)(kframe->info.si_signo) >= _NSIG) + return NULL; + if (kbt->verbose) { + pr_err(" <received signal %d>\n", + kframe->info.si_signo); + } + return (struct pt_regs *)&kframe->uc.uc_mcontext; + } + return NULL; +} + +static int KBacktraceIterator_is_sigreturn(struct KBacktraceIterator *kbt) +{ + return is_sigreturn(kbt->it.pc); +} + +static int KBacktraceIterator_restart(struct KBacktraceIterator *kbt) +{ + struct pt_regs *p; + struct rt_sigframe kframe; + + p = valid_fault_handler(kbt); + if (p == NULL) + p = valid_sigframe(kbt, &kframe); + if (p == NULL) + return 0; + backtrace_init(&kbt->it, read_memory_func, kbt, + p->pc, p->lr, p->sp, p->regs[52]); + kbt->new_context = 1; + return 1; +} + +/* Find a frame that isn't a sigreturn, if there is one. */ +static int KBacktraceIterator_next_item_inclusive( + struct KBacktraceIterator *kbt) +{ + for (;;) { + do { + if (!KBacktraceIterator_is_sigreturn(kbt)) + return KBT_ONGOING; + } while (backtrace_next(&kbt->it)); + + if (!KBacktraceIterator_restart(kbt)) + return KBT_DONE; + } +} + +/* + * If the current sp is on a page different than what we recorded + * as the top-of-kernel-stack last time we context switched, we have + * probably blown the stack, and nothing is going to work out well. + * If we can at least get out a warning, that may help the debug, + * though we probably won't be able to backtrace into the code that + * actually did the recursive damage. + */ +static void validate_stack(struct pt_regs *regs) +{ + int cpu = smp_processor_id(); + unsigned long ksp0 = get_current_ksp0(); + unsigned long ksp0_base = ksp0 - THREAD_SIZE; + unsigned long sp = stack_pointer; + + if (EX1_PL(regs->ex1) == KERNEL_PL && regs->sp >= ksp0) { + pr_err("WARNING: cpu %d: kernel stack page %#lx underrun!\n" + " sp %#lx (%#lx in caller), caller pc %#lx, lr %#lx\n", + cpu, ksp0_base, sp, regs->sp, regs->pc, regs->lr); + } + + else if (sp < ksp0_base + sizeof(struct thread_info)) { + pr_err("WARNING: cpu %d: kernel stack page %#lx overrun!\n" + " sp %#lx (%#lx in caller), caller pc %#lx, lr %#lx\n", + cpu, ksp0_base, sp, regs->sp, regs->pc, regs->lr); + } +} + +void KBacktraceIterator_init(struct KBacktraceIterator *kbt, + struct task_struct *t, struct pt_regs *regs) +{ + unsigned long pc, lr, sp, r52; + int is_current; + + /* + * Set up callback information. We grab the kernel stack base + * so we will allow reads of that address range. + */ + is_current = (t == NULL || t == current); + kbt->is_current = is_current; + if (is_current) + t = validate_current(); + kbt->task = t; + kbt->verbose = 0; /* override in caller if desired */ + kbt->profile = 0; /* override in caller if desired */ + kbt->end = KBT_ONGOING; + kbt->new_context = 1; + if (is_current) + validate_stack(regs); + + if (regs == NULL) { + if (is_current || t->state == TASK_RUNNING) { + /* Can't do this; we need registers */ + kbt->end = KBT_RUNNING; + return; + } + pc = get_switch_to_pc(); + lr = t->thread.pc; + sp = t->thread.ksp; + r52 = 0; + } else { + pc = regs->pc; + lr = regs->lr; + sp = regs->sp; + r52 = regs->regs[52]; + } + + backtrace_init(&kbt->it, read_memory_func, kbt, pc, lr, sp, r52); + kbt->end = KBacktraceIterator_next_item_inclusive(kbt); +} +EXPORT_SYMBOL(KBacktraceIterator_init); + +int KBacktraceIterator_end(struct KBacktraceIterator *kbt) +{ + return kbt->end != KBT_ONGOING; +} +EXPORT_SYMBOL(KBacktraceIterator_end); + +void KBacktraceIterator_next(struct KBacktraceIterator *kbt) +{ + unsigned long old_pc = kbt->it.pc, old_sp = kbt->it.sp; + kbt->new_context = 0; + if (!backtrace_next(&kbt->it) && !KBacktraceIterator_restart(kbt)) { + kbt->end = KBT_DONE; + return; + } + kbt->end = KBacktraceIterator_next_item_inclusive(kbt); + if (old_pc == kbt->it.pc && old_sp == kbt->it.sp) { + /* Trapped in a loop; give up. */ + kbt->end = KBT_LOOP; + } +} +EXPORT_SYMBOL(KBacktraceIterator_next); + +static void describe_addr(struct KBacktraceIterator *kbt, + unsigned long address, + int have_mmap_sem, char *buf, size_t bufsize) +{ + struct vm_area_struct *vma; + size_t namelen, remaining; + unsigned long size, offset, adjust; + char *p, *modname; + const char *name; + int rc; + + /* + * Look one byte back for every caller frame (i.e. those that + * aren't a new context) so we look up symbol data for the + * call itself, not the following instruction, which may be on + * a different line (or in a different function). + */ + adjust = !kbt->new_context; + address -= adjust; + + if (address >= PAGE_OFFSET) { + /* Handle kernel symbols. */ + BUG_ON(bufsize < KSYM_NAME_LEN); + name = kallsyms_lookup(address, &size, &offset, + &modname, buf); + if (name == NULL) { + buf[0] = '\0'; + return; + } + namelen = strlen(buf); + remaining = (bufsize - 1) - namelen; + p = buf + namelen; + rc = snprintf(p, remaining, "+%#lx/%#lx ", + offset + adjust, size); + if (modname && rc < remaining) + snprintf(p + rc, remaining - rc, "[%s] ", modname); + buf[bufsize-1] = '\0'; + return; + } + + /* If we don't have the mmap_sem, we can't show any more info. */ + buf[0] = '\0'; + if (!have_mmap_sem) + return; + + /* Find vma info. */ + vma = find_vma(kbt->task->mm, address); + if (vma == NULL || address < vma->vm_start) { + snprintf(buf, bufsize, "[unmapped address] "); + return; + } + + if (vma->vm_file) { + char *s; + p = d_path(&vma->vm_file->f_path, buf, bufsize); + if (IS_ERR(p)) + p = "?"; + s = strrchr(p, '/'); + if (s) + p = s+1; + } else { + p = "anon"; + } + + /* Generate a string description of the vma info. */ + namelen = strlen(p); + remaining = (bufsize - 1) - namelen; + memmove(buf, p, namelen); + snprintf(buf + namelen, remaining, "[%lx+%lx] ", + vma->vm_start, vma->vm_end - vma->vm_start); +} + +/* + * This method wraps the backtracer's more generic support. + * It is only invoked from the architecture-specific code; show_stack() + * and dump_stack() (in entry.S) are architecture-independent entry points. + */ +void tile_show_stack(struct KBacktraceIterator *kbt, int headers) +{ + int i; + int have_mmap_sem = 0; + + if (headers) { + /* + * Add a blank line since if we are called from panic(), + * then bust_spinlocks() spit out a space in front of us + * and it will mess up our KERN_ERR. + */ + pr_err("\n"); + pr_err("Starting stack dump of tid %d, pid %d (%s)" + " on cpu %d at cycle %lld\n", + kbt->task->pid, kbt->task->tgid, kbt->task->comm, + smp_processor_id(), get_cycles()); + } + kbt->verbose = 1; + i = 0; + for (; !KBacktraceIterator_end(kbt); KBacktraceIterator_next(kbt)) { + char namebuf[KSYM_NAME_LEN+100]; + unsigned long address = kbt->it.pc; + + /* Try to acquire the mmap_sem as we pass into userspace. */ + if (address < PAGE_OFFSET && !have_mmap_sem && kbt->task->mm) + have_mmap_sem = + down_read_trylock(&kbt->task->mm->mmap_sem); + + describe_addr(kbt, address, have_mmap_sem, + namebuf, sizeof(namebuf)); + + pr_err(" frame %d: 0x%lx %s(sp 0x%lx)\n", + i++, address, namebuf, (unsigned long)(kbt->it.sp)); + + if (i >= 100) { + pr_err("Stack dump truncated" + " (%d frames)\n", i); + break; + } + } + if (kbt->end == KBT_LOOP) + pr_err("Stack dump stopped; next frame identical to this one\n"); + if (headers) + pr_err("Stack dump complete\n"); + if (have_mmap_sem) + up_read(&kbt->task->mm->mmap_sem); +} +EXPORT_SYMBOL(tile_show_stack); + + +/* This is called from show_regs() and _dump_stack() */ +void dump_stack_regs(struct pt_regs *regs) +{ + struct KBacktraceIterator kbt; + KBacktraceIterator_init(&kbt, NULL, regs); + tile_show_stack(&kbt, 1); +} +EXPORT_SYMBOL(dump_stack_regs); + +static struct pt_regs *regs_to_pt_regs(struct pt_regs *regs, + ulong pc, ulong lr, ulong sp, ulong r52) +{ + memset(regs, 0, sizeof(struct pt_regs)); + regs->pc = pc; + regs->lr = lr; + regs->sp = sp; + regs->regs[52] = r52; + return regs; +} + +/* This is called from dump_stack() and just converts to pt_regs */ +void _dump_stack(int dummy, ulong pc, ulong lr, ulong sp, ulong r52) +{ + struct pt_regs regs; + dump_stack_regs(regs_to_pt_regs(®s, pc, lr, sp, r52)); +} + +/* This is called from KBacktraceIterator_init_current() */ +void _KBacktraceIterator_init_current(struct KBacktraceIterator *kbt, ulong pc, + ulong lr, ulong sp, ulong r52) +{ + struct pt_regs regs; + KBacktraceIterator_init(kbt, NULL, + regs_to_pt_regs(®s, pc, lr, sp, r52)); +} + +/* This is called only from kernel/sched.c, with esp == NULL */ +void show_stack(struct task_struct *task, unsigned long *esp) +{ + struct KBacktraceIterator kbt; + if (task == NULL || task == current) + KBacktraceIterator_init_current(&kbt); + else + KBacktraceIterator_init(&kbt, task, NULL); + tile_show_stack(&kbt, 0); +} + +#ifdef CONFIG_STACKTRACE + +/* Support generic Linux stack API too */ + +void save_stack_trace_tsk(struct task_struct *task, struct stack_trace *trace) +{ + struct KBacktraceIterator kbt; + int skip = trace->skip; + int i = 0; + + if (task == NULL || task == current) + KBacktraceIterator_init_current(&kbt); + else + KBacktraceIterator_init(&kbt, task, NULL); + for (; !KBacktraceIterator_end(&kbt); KBacktraceIterator_next(&kbt)) { + if (skip) { + --skip; + continue; + } + if (i >= trace->max_entries || kbt.it.pc < PAGE_OFFSET) + break; + trace->entries[i++] = kbt.it.pc; + } + trace->nr_entries = i; +} +EXPORT_SYMBOL(save_stack_trace_tsk); + +void save_stack_trace(struct stack_trace *trace) +{ + save_stack_trace_tsk(NULL, trace); +} + +#endif + +/* In entry.S */ +EXPORT_SYMBOL(KBacktraceIterator_init_current); diff --git a/arch/tile/kernel/sys.c b/arch/tile/kernel/sys.c new file mode 100644 index 00000000..cb44ba7c --- /dev/null +++ b/arch/tile/kernel/sys.c @@ -0,0 +1,119 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * This file contains various random system calls that + * have a non-standard calling sequence on the Linux/TILE + * platform. + */ + +#include <linux/errno.h> +#include <linux/sched.h> +#include <linux/mm.h> +#include <linux/smp.h> +#include <linux/syscalls.h> +#include <linux/mman.h> +#include <linux/file.h> +#include <linux/mempolicy.h> +#include <linux/binfmts.h> +#include <linux/fs.h> +#include <linux/compat.h> +#include <linux/uaccess.h> +#include <linux/signal.h> +#include <asm/syscalls.h> +#include <asm/pgtable.h> +#include <asm/homecache.h> +#include <arch/chip.h> + +SYSCALL_DEFINE0(flush_cache) +{ + homecache_evict(cpumask_of(smp_processor_id())); + return 0; +} + +/* + * Syscalls that pass 64-bit values on 32-bit systems normally + * pass them as (low,high) word packed into the immediately adjacent + * registers. If the low word naturally falls on an even register, + * our ABI makes it work correctly; if not, we adjust it here. + * Handling it here means we don't have to fix uclibc AND glibc AND + * any other standard libcs we want to support. + */ + +#if !defined(__tilegx__) || defined(CONFIG_COMPAT) + +ssize_t sys32_readahead(int fd, u32 offset_lo, u32 offset_hi, u32 count) +{ + return sys_readahead(fd, ((loff_t)offset_hi << 32) | offset_lo, count); +} + +int sys32_fadvise64_64(int fd, u32 offset_lo, u32 offset_hi, + u32 len_lo, u32 len_hi, int advice) +{ + return sys_fadvise64_64(fd, ((loff_t)offset_hi << 32) | offset_lo, + ((loff_t)len_hi << 32) | len_lo, advice); +} + +#endif /* 32-bit syscall wrappers */ + +/* Note: used by the compat code even in 64-bit Linux. */ +SYSCALL_DEFINE6(mmap2, unsigned long, addr, unsigned long, len, + unsigned long, prot, unsigned long, flags, + unsigned long, fd, unsigned long, off_4k) +{ +#define PAGE_ADJUST (PAGE_SHIFT - 12) + if (off_4k & ((1 << PAGE_ADJUST) - 1)) + return -EINVAL; + return sys_mmap_pgoff(addr, len, prot, flags, fd, + off_4k >> PAGE_ADJUST); +} + +#ifdef __tilegx__ +SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len, + unsigned long, prot, unsigned long, flags, + unsigned long, fd, off_t, offset) +{ + if (offset & ((1 << PAGE_SHIFT) - 1)) + return -EINVAL; + return sys_mmap_pgoff(addr, len, prot, flags, fd, + offset >> PAGE_SHIFT); +} +#endif + + +/* Provide the actual syscall number to call mapping. */ +#undef __SYSCALL +#define __SYSCALL(nr, call) [nr] = (call), + +#ifndef __tilegx__ +/* See comments at the top of the file. */ +#define sys_fadvise64_64 sys32_fadvise64_64 +#define sys_readahead sys32_readahead +#endif + +/* Call the trampolines to manage pt_regs where necessary. */ +#define sys_execve _sys_execve +#define sys_sigaltstack _sys_sigaltstack +#define sys_rt_sigreturn _sys_rt_sigreturn +#define sys_clone _sys_clone +#ifndef __tilegx__ +#define sys_cmpxchg_badaddr _sys_cmpxchg_badaddr +#endif + +/* + * Note that we can't include <linux/unistd.h> here since the header + * guard will defeat us; <asm/unistd.h> checks for __SYSCALL as well. + */ +void *sys_call_table[__NR_syscalls] = { + [0 ... __NR_syscalls-1] = sys_ni_syscall, +#include <asm/unistd.h> +}; diff --git a/arch/tile/kernel/sysfs.c b/arch/tile/kernel/sysfs.c new file mode 100644 index 00000000..71ae728e --- /dev/null +++ b/arch/tile/kernel/sysfs.c @@ -0,0 +1,185 @@ +/* + * Copyright 2011 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * /sys entry support. + */ + +#include <linux/device.h> +#include <linux/cpu.h> +#include <linux/slab.h> +#include <linux/smp.h> +#include <linux/stat.h> +#include <hv/hypervisor.h> + +/* Return a string queried from the hypervisor, truncated to page size. */ +static ssize_t get_hv_confstr(char *page, int query) +{ + ssize_t n = hv_confstr(query, (unsigned long)page, PAGE_SIZE - 1); + n = n < 0 ? 0 : min(n, (ssize_t)PAGE_SIZE - 1) - 1; + if (n) + page[n++] = '\n'; + page[n] = '\0'; + return n; +} + +static ssize_t chip_width_show(struct device *dev, + struct device_attribute *attr, + char *page) +{ + return sprintf(page, "%u\n", smp_width); +} +static DEVICE_ATTR(chip_width, 0444, chip_width_show, NULL); + +static ssize_t chip_height_show(struct device *dev, + struct device_attribute *attr, + char *page) +{ + return sprintf(page, "%u\n", smp_height); +} +static DEVICE_ATTR(chip_height, 0444, chip_height_show, NULL); + +static ssize_t chip_serial_show(struct device *dev, + struct device_attribute *attr, + char *page) +{ + return get_hv_confstr(page, HV_CONFSTR_CHIP_SERIAL_NUM); +} +static DEVICE_ATTR(chip_serial, 0444, chip_serial_show, NULL); + +static ssize_t chip_revision_show(struct device *dev, + struct device_attribute *attr, + char *page) +{ + return get_hv_confstr(page, HV_CONFSTR_CHIP_REV); +} +static DEVICE_ATTR(chip_revision, 0444, chip_revision_show, NULL); + + +static ssize_t type_show(struct device *dev, + struct device_attribute *attr, + char *page) +{ + return sprintf(page, "tilera\n"); +} +static DEVICE_ATTR(type, 0444, type_show, NULL); + +#define HV_CONF_ATTR(name, conf) \ + static ssize_t name ## _show(struct device *dev, \ + struct device_attribute *attr, \ + char *page) \ + { \ + return get_hv_confstr(page, conf); \ + } \ + static DEVICE_ATTR(name, 0444, name ## _show, NULL); + +HV_CONF_ATTR(version, HV_CONFSTR_HV_SW_VER) +HV_CONF_ATTR(config_version, HV_CONFSTR_HV_CONFIG_VER) + +HV_CONF_ATTR(board_part, HV_CONFSTR_BOARD_PART_NUM) +HV_CONF_ATTR(board_serial, HV_CONFSTR_BOARD_SERIAL_NUM) +HV_CONF_ATTR(board_revision, HV_CONFSTR_BOARD_REV) +HV_CONF_ATTR(board_description, HV_CONFSTR_BOARD_DESC) +HV_CONF_ATTR(mezz_part, HV_CONFSTR_MEZZ_PART_NUM) +HV_CONF_ATTR(mezz_serial, HV_CONFSTR_MEZZ_SERIAL_NUM) +HV_CONF_ATTR(mezz_revision, HV_CONFSTR_MEZZ_REV) +HV_CONF_ATTR(mezz_description, HV_CONFSTR_MEZZ_DESC) +HV_CONF_ATTR(switch_control, HV_CONFSTR_SWITCH_CONTROL) + +static struct attribute *board_attrs[] = { + &dev_attr_board_part.attr, + &dev_attr_board_serial.attr, + &dev_attr_board_revision.attr, + &dev_attr_board_description.attr, + &dev_attr_mezz_part.attr, + &dev_attr_mezz_serial.attr, + &dev_attr_mezz_revision.attr, + &dev_attr_mezz_description.attr, + &dev_attr_switch_control.attr, + NULL +}; + +static struct attribute_group board_attr_group = { + .name = "board", + .attrs = board_attrs, +}; + + +static struct bin_attribute hvconfig_bin; + +static ssize_t +hvconfig_bin_read(struct file *filp, struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) +{ + static size_t size; + + /* Lazily learn the true size (minus the trailing NUL). */ + if (size == 0) + size = hv_confstr(HV_CONFSTR_HV_CONFIG, 0, 0) - 1; + + /* Check and adjust input parameters. */ + if (off > size) + return -EINVAL; + if (count > size - off) + count = size - off; + + if (count) { + /* Get a copy of the hvc and copy out the relevant portion. */ + char *hvc; + + size = off + count; + hvc = kmalloc(size, GFP_KERNEL); + if (hvc == NULL) + return -ENOMEM; + hv_confstr(HV_CONFSTR_HV_CONFIG, (unsigned long)hvc, size); + memcpy(buf, hvc + off, count); + kfree(hvc); + } + + return count; +} + +static int __init create_sysfs_entries(void) +{ + int err = 0; + +#define create_cpu_attr(name) \ + if (!err) \ + err = device_create_file(cpu_subsys.dev_root, &dev_attr_##name); + create_cpu_attr(chip_width); + create_cpu_attr(chip_height); + create_cpu_attr(chip_serial); + create_cpu_attr(chip_revision); + +#define create_hv_attr(name) \ + if (!err) \ + err = sysfs_create_file(hypervisor_kobj, &dev_attr_##name.attr); + create_hv_attr(type); + create_hv_attr(version); + create_hv_attr(config_version); + + if (!err) + err = sysfs_create_group(hypervisor_kobj, &board_attr_group); + + if (!err) { + sysfs_bin_attr_init(&hvconfig_bin); + hvconfig_bin.attr.name = "hvconfig"; + hvconfig_bin.attr.mode = S_IRUGO; + hvconfig_bin.read = hvconfig_bin_read; + hvconfig_bin.size = PAGE_SIZE; + err = sysfs_create_bin_file(hypervisor_kobj, &hvconfig_bin); + } + + return err; +} +subsys_initcall(create_sysfs_entries); diff --git a/arch/tile/kernel/tile-desc_32.c b/arch/tile/kernel/tile-desc_32.c new file mode 100644 index 00000000..dd7bd1d8 --- /dev/null +++ b/arch/tile/kernel/tile-desc_32.c @@ -0,0 +1,2605 @@ +/* TILEPro opcode information. + * + * Copyright 2011 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * + * + * + * + */ + +/* This define is BFD_RELOC_##x for real bfd, or -1 for everyone else. */ +#define BFD_RELOC(x) -1 + +/* Special registers. */ +#define TREG_LR 55 +#define TREG_SN 56 +#define TREG_ZERO 63 + +#include <linux/stddef.h> +#include <asm/tile-desc.h> + +const struct tilepro_opcode tilepro_opcodes[395] = +{ + { "bpt", TILEPRO_OPC_BPT, 0x2, 0, TREG_ZERO, 0, + { { 0, }, { }, { 0, }, { 0, }, { 0, } }, + }, + { "info", TILEPRO_OPC_INFO, 0xf, 1, TREG_ZERO, 1, + { { 0 }, { 1 }, { 2 }, { 3 }, { 0, } }, + }, + { "infol", TILEPRO_OPC_INFOL, 0x3, 1, TREG_ZERO, 1, + { { 4 }, { 5 }, { 0, }, { 0, }, { 0, } }, + }, + { "j", TILEPRO_OPC_J, 0x2, 1, TREG_ZERO, 1, + { { 0, }, { 6 }, { 0, }, { 0, }, { 0, } }, + }, + { "jal", TILEPRO_OPC_JAL, 0x2, 1, TREG_LR, 1, + { { 0, }, { 6 }, { 0, }, { 0, }, { 0, } }, + }, + { "move", TILEPRO_OPC_MOVE, 0xf, 2, TREG_ZERO, 1, + { { 7, 8 }, { 9, 10 }, { 11, 12 }, { 13, 14 }, { 0, } }, + }, + { "move.sn", TILEPRO_OPC_MOVE_SN, 0x3, 2, TREG_SN, 1, + { { 7, 8 }, { 9, 10 }, { 0, }, { 0, }, { 0, } }, + }, + { "movei", TILEPRO_OPC_MOVEI, 0xf, 2, TREG_ZERO, 1, + { { 7, 0 }, { 9, 1 }, { 11, 2 }, { 13, 3 }, { 0, } }, + }, + { "movei.sn", TILEPRO_OPC_MOVEI_SN, 0x3, 2, TREG_SN, 1, + { { 7, 0 }, { 9, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "moveli", TILEPRO_OPC_MOVELI, 0x3, 2, TREG_ZERO, 1, + { { 7, 4 }, { 9, 5 }, { 0, }, { 0, }, { 0, } }, + }, + { "moveli.sn", TILEPRO_OPC_MOVELI_SN, 0x3, 2, TREG_SN, 1, + { { 7, 4 }, { 9, 5 }, { 0, }, { 0, }, { 0, } }, + }, + { "movelis", TILEPRO_OPC_MOVELIS, 0x3, 2, TREG_SN, 1, + { { 7, 4 }, { 9, 5 }, { 0, }, { 0, }, { 0, } }, + }, + { "prefetch", TILEPRO_OPC_PREFETCH, 0x12, 1, TREG_ZERO, 1, + { { 0, }, { 10 }, { 0, }, { 0, }, { 15 } }, + }, + { "raise", TILEPRO_OPC_RAISE, 0x2, 0, TREG_ZERO, 1, + { { 0, }, { }, { 0, }, { 0, }, { 0, } }, + }, + { "add", TILEPRO_OPC_ADD, 0xf, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 11, 12, 18 }, { 13, 14, 19 }, { 0, } }, + }, + { "add.sn", TILEPRO_OPC_ADD_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "addb", TILEPRO_OPC_ADDB, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "addb.sn", TILEPRO_OPC_ADDB_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "addbs_u", TILEPRO_OPC_ADDBS_U, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "addbs_u.sn", TILEPRO_OPC_ADDBS_U_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "addh", TILEPRO_OPC_ADDH, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "addh.sn", TILEPRO_OPC_ADDH_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "addhs", TILEPRO_OPC_ADDHS, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "addhs.sn", TILEPRO_OPC_ADDHS_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "addi", TILEPRO_OPC_ADDI, 0xf, 3, TREG_ZERO, 1, + { { 7, 8, 0 }, { 9, 10, 1 }, { 11, 12, 2 }, { 13, 14, 3 }, { 0, } }, + }, + { "addi.sn", TILEPRO_OPC_ADDI_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "addib", TILEPRO_OPC_ADDIB, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "addib.sn", TILEPRO_OPC_ADDIB_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "addih", TILEPRO_OPC_ADDIH, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "addih.sn", TILEPRO_OPC_ADDIH_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "addli", TILEPRO_OPC_ADDLI, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 4 }, { 9, 10, 5 }, { 0, }, { 0, }, { 0, } }, + }, + { "addli.sn", TILEPRO_OPC_ADDLI_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 4 }, { 9, 10, 5 }, { 0, }, { 0, }, { 0, } }, + }, + { "addlis", TILEPRO_OPC_ADDLIS, 0x3, 3, TREG_SN, 1, + { { 7, 8, 4 }, { 9, 10, 5 }, { 0, }, { 0, }, { 0, } }, + }, + { "adds", TILEPRO_OPC_ADDS, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "adds.sn", TILEPRO_OPC_ADDS_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "adiffb_u", TILEPRO_OPC_ADIFFB_U, 0x1, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "adiffb_u.sn", TILEPRO_OPC_ADIFFB_U_SN, 0x1, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "adiffh", TILEPRO_OPC_ADIFFH, 0x1, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "adiffh.sn", TILEPRO_OPC_ADIFFH_SN, 0x1, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "and", TILEPRO_OPC_AND, 0xf, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 11, 12, 18 }, { 13, 14, 19 }, { 0, } }, + }, + { "and.sn", TILEPRO_OPC_AND_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "andi", TILEPRO_OPC_ANDI, 0xf, 3, TREG_ZERO, 1, + { { 7, 8, 0 }, { 9, 10, 1 }, { 11, 12, 2 }, { 13, 14, 3 }, { 0, } }, + }, + { "andi.sn", TILEPRO_OPC_ANDI_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "auli", TILEPRO_OPC_AULI, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 4 }, { 9, 10, 5 }, { 0, }, { 0, }, { 0, } }, + }, + { "avgb_u", TILEPRO_OPC_AVGB_U, 0x1, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "avgb_u.sn", TILEPRO_OPC_AVGB_U_SN, 0x1, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "avgh", TILEPRO_OPC_AVGH, 0x1, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "avgh.sn", TILEPRO_OPC_AVGH_SN, 0x1, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "bbns", TILEPRO_OPC_BBNS, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "bbns.sn", TILEPRO_OPC_BBNS_SN, 0x2, 2, TREG_SN, 1, + { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "bbnst", TILEPRO_OPC_BBNST, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "bbnst.sn", TILEPRO_OPC_BBNST_SN, 0x2, 2, TREG_SN, 1, + { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "bbs", TILEPRO_OPC_BBS, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "bbs.sn", TILEPRO_OPC_BBS_SN, 0x2, 2, TREG_SN, 1, + { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "bbst", TILEPRO_OPC_BBST, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "bbst.sn", TILEPRO_OPC_BBST_SN, 0x2, 2, TREG_SN, 1, + { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "bgez", TILEPRO_OPC_BGEZ, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "bgez.sn", TILEPRO_OPC_BGEZ_SN, 0x2, 2, TREG_SN, 1, + { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "bgezt", TILEPRO_OPC_BGEZT, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "bgezt.sn", TILEPRO_OPC_BGEZT_SN, 0x2, 2, TREG_SN, 1, + { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "bgz", TILEPRO_OPC_BGZ, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "bgz.sn", TILEPRO_OPC_BGZ_SN, 0x2, 2, TREG_SN, 1, + { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "bgzt", TILEPRO_OPC_BGZT, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "bgzt.sn", TILEPRO_OPC_BGZT_SN, 0x2, 2, TREG_SN, 1, + { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "bitx", TILEPRO_OPC_BITX, 0x5, 2, TREG_ZERO, 1, + { { 7, 8 }, { 0, }, { 11, 12 }, { 0, }, { 0, } }, + }, + { "bitx.sn", TILEPRO_OPC_BITX_SN, 0x1, 2, TREG_SN, 1, + { { 7, 8 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "blez", TILEPRO_OPC_BLEZ, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "blez.sn", TILEPRO_OPC_BLEZ_SN, 0x2, 2, TREG_SN, 1, + { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "blezt", TILEPRO_OPC_BLEZT, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "blezt.sn", TILEPRO_OPC_BLEZT_SN, 0x2, 2, TREG_SN, 1, + { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "blz", TILEPRO_OPC_BLZ, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "blz.sn", TILEPRO_OPC_BLZ_SN, 0x2, 2, TREG_SN, 1, + { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "blzt", TILEPRO_OPC_BLZT, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "blzt.sn", TILEPRO_OPC_BLZT_SN, 0x2, 2, TREG_SN, 1, + { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "bnz", TILEPRO_OPC_BNZ, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "bnz.sn", TILEPRO_OPC_BNZ_SN, 0x2, 2, TREG_SN, 1, + { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "bnzt", TILEPRO_OPC_BNZT, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "bnzt.sn", TILEPRO_OPC_BNZT_SN, 0x2, 2, TREG_SN, 1, + { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "bytex", TILEPRO_OPC_BYTEX, 0x5, 2, TREG_ZERO, 1, + { { 7, 8 }, { 0, }, { 11, 12 }, { 0, }, { 0, } }, + }, + { "bytex.sn", TILEPRO_OPC_BYTEX_SN, 0x1, 2, TREG_SN, 1, + { { 7, 8 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "bz", TILEPRO_OPC_BZ, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "bz.sn", TILEPRO_OPC_BZ_SN, 0x2, 2, TREG_SN, 1, + { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "bzt", TILEPRO_OPC_BZT, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "bzt.sn", TILEPRO_OPC_BZT_SN, 0x2, 2, TREG_SN, 1, + { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "clz", TILEPRO_OPC_CLZ, 0x5, 2, TREG_ZERO, 1, + { { 7, 8 }, { 0, }, { 11, 12 }, { 0, }, { 0, } }, + }, + { "clz.sn", TILEPRO_OPC_CLZ_SN, 0x1, 2, TREG_SN, 1, + { { 7, 8 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "crc32_32", TILEPRO_OPC_CRC32_32, 0x1, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "crc32_32.sn", TILEPRO_OPC_CRC32_32_SN, 0x1, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "crc32_8", TILEPRO_OPC_CRC32_8, 0x1, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "crc32_8.sn", TILEPRO_OPC_CRC32_8_SN, 0x1, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "ctz", TILEPRO_OPC_CTZ, 0x5, 2, TREG_ZERO, 1, + { { 7, 8 }, { 0, }, { 11, 12 }, { 0, }, { 0, } }, + }, + { "ctz.sn", TILEPRO_OPC_CTZ_SN, 0x1, 2, TREG_SN, 1, + { { 7, 8 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "drain", TILEPRO_OPC_DRAIN, 0x2, 0, TREG_ZERO, 0, + { { 0, }, { }, { 0, }, { 0, }, { 0, } }, + }, + { "dtlbpr", TILEPRO_OPC_DTLBPR, 0x2, 1, TREG_ZERO, 1, + { { 0, }, { 10 }, { 0, }, { 0, }, { 0, } }, + }, + { "dword_align", TILEPRO_OPC_DWORD_ALIGN, 0x1, 3, TREG_ZERO, 1, + { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "dword_align.sn", TILEPRO_OPC_DWORD_ALIGN_SN, 0x1, 3, TREG_SN, 1, + { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "finv", TILEPRO_OPC_FINV, 0x2, 1, TREG_ZERO, 1, + { { 0, }, { 10 }, { 0, }, { 0, }, { 0, } }, + }, + { "flush", TILEPRO_OPC_FLUSH, 0x2, 1, TREG_ZERO, 1, + { { 0, }, { 10 }, { 0, }, { 0, }, { 0, } }, + }, + { "fnop", TILEPRO_OPC_FNOP, 0xf, 0, TREG_ZERO, 1, + { { }, { }, { }, { }, { 0, } }, + }, + { "icoh", TILEPRO_OPC_ICOH, 0x2, 1, TREG_ZERO, 1, + { { 0, }, { 10 }, { 0, }, { 0, }, { 0, } }, + }, + { "ill", TILEPRO_OPC_ILL, 0xa, 0, TREG_ZERO, 1, + { { 0, }, { }, { 0, }, { }, { 0, } }, + }, + { "inthb", TILEPRO_OPC_INTHB, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "inthb.sn", TILEPRO_OPC_INTHB_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "inthh", TILEPRO_OPC_INTHH, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "inthh.sn", TILEPRO_OPC_INTHH_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "intlb", TILEPRO_OPC_INTLB, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "intlb.sn", TILEPRO_OPC_INTLB_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "intlh", TILEPRO_OPC_INTLH, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "intlh.sn", TILEPRO_OPC_INTLH_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "inv", TILEPRO_OPC_INV, 0x2, 1, TREG_ZERO, 1, + { { 0, }, { 10 }, { 0, }, { 0, }, { 0, } }, + }, + { "iret", TILEPRO_OPC_IRET, 0x2, 0, TREG_ZERO, 1, + { { 0, }, { }, { 0, }, { 0, }, { 0, } }, + }, + { "jalb", TILEPRO_OPC_JALB, 0x2, 1, TREG_LR, 1, + { { 0, }, { 22 }, { 0, }, { 0, }, { 0, } }, + }, + { "jalf", TILEPRO_OPC_JALF, 0x2, 1, TREG_LR, 1, + { { 0, }, { 22 }, { 0, }, { 0, }, { 0, } }, + }, + { "jalr", TILEPRO_OPC_JALR, 0x2, 1, TREG_LR, 1, + { { 0, }, { 10 }, { 0, }, { 0, }, { 0, } }, + }, + { "jalrp", TILEPRO_OPC_JALRP, 0x2, 1, TREG_LR, 1, + { { 0, }, { 10 }, { 0, }, { 0, }, { 0, } }, + }, + { "jb", TILEPRO_OPC_JB, 0x2, 1, TREG_ZERO, 1, + { { 0, }, { 22 }, { 0, }, { 0, }, { 0, } }, + }, + { "jf", TILEPRO_OPC_JF, 0x2, 1, TREG_ZERO, 1, + { { 0, }, { 22 }, { 0, }, { 0, }, { 0, } }, + }, + { "jr", TILEPRO_OPC_JR, 0x2, 1, TREG_ZERO, 1, + { { 0, }, { 10 }, { 0, }, { 0, }, { 0, } }, + }, + { "jrp", TILEPRO_OPC_JRP, 0x2, 1, TREG_ZERO, 1, + { { 0, }, { 10 }, { 0, }, { 0, }, { 0, } }, + }, + { "lb", TILEPRO_OPC_LB, 0x12, 2, TREG_ZERO, 1, + { { 0, }, { 9, 10 }, { 0, }, { 0, }, { 23, 15 } }, + }, + { "lb.sn", TILEPRO_OPC_LB_SN, 0x2, 2, TREG_SN, 1, + { { 0, }, { 9, 10 }, { 0, }, { 0, }, { 0, } }, + }, + { "lb_u", TILEPRO_OPC_LB_U, 0x12, 2, TREG_ZERO, 1, + { { 0, }, { 9, 10 }, { 0, }, { 0, }, { 23, 15 } }, + }, + { "lb_u.sn", TILEPRO_OPC_LB_U_SN, 0x2, 2, TREG_SN, 1, + { { 0, }, { 9, 10 }, { 0, }, { 0, }, { 0, } }, + }, + { "lbadd", TILEPRO_OPC_LBADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 9, 24, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "lbadd.sn", TILEPRO_OPC_LBADD_SN, 0x2, 3, TREG_SN, 1, + { { 0, }, { 9, 24, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "lbadd_u", TILEPRO_OPC_LBADD_U, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 9, 24, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "lbadd_u.sn", TILEPRO_OPC_LBADD_U_SN, 0x2, 3, TREG_SN, 1, + { { 0, }, { 9, 24, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "lh", TILEPRO_OPC_LH, 0x12, 2, TREG_ZERO, 1, + { { 0, }, { 9, 10 }, { 0, }, { 0, }, { 23, 15 } }, + }, + { "lh.sn", TILEPRO_OPC_LH_SN, 0x2, 2, TREG_SN, 1, + { { 0, }, { 9, 10 }, { 0, }, { 0, }, { 0, } }, + }, + { "lh_u", TILEPRO_OPC_LH_U, 0x12, 2, TREG_ZERO, 1, + { { 0, }, { 9, 10 }, { 0, }, { 0, }, { 23, 15 } }, + }, + { "lh_u.sn", TILEPRO_OPC_LH_U_SN, 0x2, 2, TREG_SN, 1, + { { 0, }, { 9, 10 }, { 0, }, { 0, }, { 0, } }, + }, + { "lhadd", TILEPRO_OPC_LHADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 9, 24, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "lhadd.sn", TILEPRO_OPC_LHADD_SN, 0x2, 3, TREG_SN, 1, + { { 0, }, { 9, 24, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "lhadd_u", TILEPRO_OPC_LHADD_U, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 9, 24, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "lhadd_u.sn", TILEPRO_OPC_LHADD_U_SN, 0x2, 3, TREG_SN, 1, + { { 0, }, { 9, 24, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "lnk", TILEPRO_OPC_LNK, 0x2, 1, TREG_ZERO, 1, + { { 0, }, { 9 }, { 0, }, { 0, }, { 0, } }, + }, + { "lnk.sn", TILEPRO_OPC_LNK_SN, 0x2, 1, TREG_SN, 1, + { { 0, }, { 9 }, { 0, }, { 0, }, { 0, } }, + }, + { "lw", TILEPRO_OPC_LW, 0x12, 2, TREG_ZERO, 1, + { { 0, }, { 9, 10 }, { 0, }, { 0, }, { 23, 15 } }, + }, + { "lw.sn", TILEPRO_OPC_LW_SN, 0x2, 2, TREG_SN, 1, + { { 0, }, { 9, 10 }, { 0, }, { 0, }, { 0, } }, + }, + { "lw_na", TILEPRO_OPC_LW_NA, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 9, 10 }, { 0, }, { 0, }, { 0, } }, + }, + { "lw_na.sn", TILEPRO_OPC_LW_NA_SN, 0x2, 2, TREG_SN, 1, + { { 0, }, { 9, 10 }, { 0, }, { 0, }, { 0, } }, + }, + { "lwadd", TILEPRO_OPC_LWADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 9, 24, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "lwadd.sn", TILEPRO_OPC_LWADD_SN, 0x2, 3, TREG_SN, 1, + { { 0, }, { 9, 24, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "lwadd_na", TILEPRO_OPC_LWADD_NA, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 9, 24, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "lwadd_na.sn", TILEPRO_OPC_LWADD_NA_SN, 0x2, 3, TREG_SN, 1, + { { 0, }, { 9, 24, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "maxb_u", TILEPRO_OPC_MAXB_U, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "maxb_u.sn", TILEPRO_OPC_MAXB_U_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "maxh", TILEPRO_OPC_MAXH, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "maxh.sn", TILEPRO_OPC_MAXH_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "maxib_u", TILEPRO_OPC_MAXIB_U, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "maxib_u.sn", TILEPRO_OPC_MAXIB_U_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "maxih", TILEPRO_OPC_MAXIH, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "maxih.sn", TILEPRO_OPC_MAXIH_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "mf", TILEPRO_OPC_MF, 0x2, 0, TREG_ZERO, 1, + { { 0, }, { }, { 0, }, { 0, }, { 0, } }, + }, + { "mfspr", TILEPRO_OPC_MFSPR, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 9, 25 }, { 0, }, { 0, }, { 0, } }, + }, + { "minb_u", TILEPRO_OPC_MINB_U, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "minb_u.sn", TILEPRO_OPC_MINB_U_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "minh", TILEPRO_OPC_MINH, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "minh.sn", TILEPRO_OPC_MINH_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "minib_u", TILEPRO_OPC_MINIB_U, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "minib_u.sn", TILEPRO_OPC_MINIB_U_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "minih", TILEPRO_OPC_MINIH, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "minih.sn", TILEPRO_OPC_MINIH_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "mm", TILEPRO_OPC_MM, 0x3, 5, TREG_ZERO, 1, + { { 7, 8, 16, 26, 27 }, { 9, 10, 17, 28, 29 }, { 0, }, { 0, }, { 0, } }, + }, + { "mnz", TILEPRO_OPC_MNZ, 0xf, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 11, 12, 18 }, { 13, 14, 19 }, { 0, } }, + }, + { "mnz.sn", TILEPRO_OPC_MNZ_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "mnzb", TILEPRO_OPC_MNZB, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "mnzb.sn", TILEPRO_OPC_MNZB_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "mnzh", TILEPRO_OPC_MNZH, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "mnzh.sn", TILEPRO_OPC_MNZH_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "mtspr", TILEPRO_OPC_MTSPR, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 30, 10 }, { 0, }, { 0, }, { 0, } }, + }, + { "mulhh_ss", TILEPRO_OPC_MULHH_SS, 0x5, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 0, }, { 11, 12, 18 }, { 0, }, { 0, } }, + }, + { "mulhh_ss.sn", TILEPRO_OPC_MULHH_SS_SN, 0x1, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mulhh_su", TILEPRO_OPC_MULHH_SU, 0x1, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mulhh_su.sn", TILEPRO_OPC_MULHH_SU_SN, 0x1, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mulhh_uu", TILEPRO_OPC_MULHH_UU, 0x5, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 0, }, { 11, 12, 18 }, { 0, }, { 0, } }, + }, + { "mulhh_uu.sn", TILEPRO_OPC_MULHH_UU_SN, 0x1, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mulhha_ss", TILEPRO_OPC_MULHHA_SS, 0x5, 3, TREG_ZERO, 1, + { { 21, 8, 16 }, { 0, }, { 31, 12, 18 }, { 0, }, { 0, } }, + }, + { "mulhha_ss.sn", TILEPRO_OPC_MULHHA_SS_SN, 0x1, 3, TREG_SN, 1, + { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mulhha_su", TILEPRO_OPC_MULHHA_SU, 0x1, 3, TREG_ZERO, 1, + { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mulhha_su.sn", TILEPRO_OPC_MULHHA_SU_SN, 0x1, 3, TREG_SN, 1, + { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mulhha_uu", TILEPRO_OPC_MULHHA_UU, 0x5, 3, TREG_ZERO, 1, + { { 21, 8, 16 }, { 0, }, { 31, 12, 18 }, { 0, }, { 0, } }, + }, + { "mulhha_uu.sn", TILEPRO_OPC_MULHHA_UU_SN, 0x1, 3, TREG_SN, 1, + { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mulhhsa_uu", TILEPRO_OPC_MULHHSA_UU, 0x1, 3, TREG_ZERO, 1, + { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mulhhsa_uu.sn", TILEPRO_OPC_MULHHSA_UU_SN, 0x1, 3, TREG_SN, 1, + { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mulhl_ss", TILEPRO_OPC_MULHL_SS, 0x1, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mulhl_ss.sn", TILEPRO_OPC_MULHL_SS_SN, 0x1, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mulhl_su", TILEPRO_OPC_MULHL_SU, 0x1, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mulhl_su.sn", TILEPRO_OPC_MULHL_SU_SN, 0x1, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mulhl_us", TILEPRO_OPC_MULHL_US, 0x1, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mulhl_us.sn", TILEPRO_OPC_MULHL_US_SN, 0x1, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mulhl_uu", TILEPRO_OPC_MULHL_UU, 0x1, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mulhl_uu.sn", TILEPRO_OPC_MULHL_UU_SN, 0x1, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mulhla_ss", TILEPRO_OPC_MULHLA_SS, 0x1, 3, TREG_ZERO, 1, + { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mulhla_ss.sn", TILEPRO_OPC_MULHLA_SS_SN, 0x1, 3, TREG_SN, 1, + { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mulhla_su", TILEPRO_OPC_MULHLA_SU, 0x1, 3, TREG_ZERO, 1, + { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mulhla_su.sn", TILEPRO_OPC_MULHLA_SU_SN, 0x1, 3, TREG_SN, 1, + { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mulhla_us", TILEPRO_OPC_MULHLA_US, 0x1, 3, TREG_ZERO, 1, + { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mulhla_us.sn", TILEPRO_OPC_MULHLA_US_SN, 0x1, 3, TREG_SN, 1, + { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mulhla_uu", TILEPRO_OPC_MULHLA_UU, 0x1, 3, TREG_ZERO, 1, + { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mulhla_uu.sn", TILEPRO_OPC_MULHLA_UU_SN, 0x1, 3, TREG_SN, 1, + { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mulhlsa_uu", TILEPRO_OPC_MULHLSA_UU, 0x5, 3, TREG_ZERO, 1, + { { 21, 8, 16 }, { 0, }, { 31, 12, 18 }, { 0, }, { 0, } }, + }, + { "mulhlsa_uu.sn", TILEPRO_OPC_MULHLSA_UU_SN, 0x1, 3, TREG_SN, 1, + { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mulll_ss", TILEPRO_OPC_MULLL_SS, 0x5, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 0, }, { 11, 12, 18 }, { 0, }, { 0, } }, + }, + { "mulll_ss.sn", TILEPRO_OPC_MULLL_SS_SN, 0x1, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mulll_su", TILEPRO_OPC_MULLL_SU, 0x1, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mulll_su.sn", TILEPRO_OPC_MULLL_SU_SN, 0x1, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mulll_uu", TILEPRO_OPC_MULLL_UU, 0x5, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 0, }, { 11, 12, 18 }, { 0, }, { 0, } }, + }, + { "mulll_uu.sn", TILEPRO_OPC_MULLL_UU_SN, 0x1, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mullla_ss", TILEPRO_OPC_MULLLA_SS, 0x5, 3, TREG_ZERO, 1, + { { 21, 8, 16 }, { 0, }, { 31, 12, 18 }, { 0, }, { 0, } }, + }, + { "mullla_ss.sn", TILEPRO_OPC_MULLLA_SS_SN, 0x1, 3, TREG_SN, 1, + { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mullla_su", TILEPRO_OPC_MULLLA_SU, 0x1, 3, TREG_ZERO, 1, + { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mullla_su.sn", TILEPRO_OPC_MULLLA_SU_SN, 0x1, 3, TREG_SN, 1, + { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mullla_uu", TILEPRO_OPC_MULLLA_UU, 0x5, 3, TREG_ZERO, 1, + { { 21, 8, 16 }, { 0, }, { 31, 12, 18 }, { 0, }, { 0, } }, + }, + { "mullla_uu.sn", TILEPRO_OPC_MULLLA_UU_SN, 0x1, 3, TREG_SN, 1, + { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mulllsa_uu", TILEPRO_OPC_MULLLSA_UU, 0x1, 3, TREG_ZERO, 1, + { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mulllsa_uu.sn", TILEPRO_OPC_MULLLSA_UU_SN, 0x1, 3, TREG_SN, 1, + { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mvnz", TILEPRO_OPC_MVNZ, 0x5, 3, TREG_ZERO, 1, + { { 21, 8, 16 }, { 0, }, { 31, 12, 18 }, { 0, }, { 0, } }, + }, + { "mvnz.sn", TILEPRO_OPC_MVNZ_SN, 0x1, 3, TREG_SN, 1, + { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mvz", TILEPRO_OPC_MVZ, 0x5, 3, TREG_ZERO, 1, + { { 21, 8, 16 }, { 0, }, { 31, 12, 18 }, { 0, }, { 0, } }, + }, + { "mvz.sn", TILEPRO_OPC_MVZ_SN, 0x1, 3, TREG_SN, 1, + { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mz", TILEPRO_OPC_MZ, 0xf, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 11, 12, 18 }, { 13, 14, 19 }, { 0, } }, + }, + { "mz.sn", TILEPRO_OPC_MZ_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "mzb", TILEPRO_OPC_MZB, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "mzb.sn", TILEPRO_OPC_MZB_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "mzh", TILEPRO_OPC_MZH, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "mzh.sn", TILEPRO_OPC_MZH_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "nap", TILEPRO_OPC_NAP, 0x2, 0, TREG_ZERO, 0, + { { 0, }, { }, { 0, }, { 0, }, { 0, } }, + }, + { "nop", TILEPRO_OPC_NOP, 0xf, 0, TREG_ZERO, 1, + { { }, { }, { }, { }, { 0, } }, + }, + { "nor", TILEPRO_OPC_NOR, 0xf, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 11, 12, 18 }, { 13, 14, 19 }, { 0, } }, + }, + { "nor.sn", TILEPRO_OPC_NOR_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "or", TILEPRO_OPC_OR, 0xf, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 11, 12, 18 }, { 13, 14, 19 }, { 0, } }, + }, + { "or.sn", TILEPRO_OPC_OR_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "ori", TILEPRO_OPC_ORI, 0xf, 3, TREG_ZERO, 1, + { { 7, 8, 0 }, { 9, 10, 1 }, { 11, 12, 2 }, { 13, 14, 3 }, { 0, } }, + }, + { "ori.sn", TILEPRO_OPC_ORI_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "packbs_u", TILEPRO_OPC_PACKBS_U, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "packbs_u.sn", TILEPRO_OPC_PACKBS_U_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "packhb", TILEPRO_OPC_PACKHB, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "packhb.sn", TILEPRO_OPC_PACKHB_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "packhs", TILEPRO_OPC_PACKHS, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "packhs.sn", TILEPRO_OPC_PACKHS_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "packlb", TILEPRO_OPC_PACKLB, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "packlb.sn", TILEPRO_OPC_PACKLB_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "pcnt", TILEPRO_OPC_PCNT, 0x5, 2, TREG_ZERO, 1, + { { 7, 8 }, { 0, }, { 11, 12 }, { 0, }, { 0, } }, + }, + { "pcnt.sn", TILEPRO_OPC_PCNT_SN, 0x1, 2, TREG_SN, 1, + { { 7, 8 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "rl", TILEPRO_OPC_RL, 0xf, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 11, 12, 18 }, { 13, 14, 19 }, { 0, } }, + }, + { "rl.sn", TILEPRO_OPC_RL_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "rli", TILEPRO_OPC_RLI, 0xf, 3, TREG_ZERO, 1, + { { 7, 8, 32 }, { 9, 10, 33 }, { 11, 12, 34 }, { 13, 14, 35 }, { 0, } }, + }, + { "rli.sn", TILEPRO_OPC_RLI_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 32 }, { 9, 10, 33 }, { 0, }, { 0, }, { 0, } }, + }, + { "s1a", TILEPRO_OPC_S1A, 0xf, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 11, 12, 18 }, { 13, 14, 19 }, { 0, } }, + }, + { "s1a.sn", TILEPRO_OPC_S1A_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "s2a", TILEPRO_OPC_S2A, 0xf, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 11, 12, 18 }, { 13, 14, 19 }, { 0, } }, + }, + { "s2a.sn", TILEPRO_OPC_S2A_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "s3a", TILEPRO_OPC_S3A, 0xf, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 11, 12, 18 }, { 13, 14, 19 }, { 0, } }, + }, + { "s3a.sn", TILEPRO_OPC_S3A_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "sadab_u", TILEPRO_OPC_SADAB_U, 0x1, 3, TREG_ZERO, 1, + { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "sadab_u.sn", TILEPRO_OPC_SADAB_U_SN, 0x1, 3, TREG_SN, 1, + { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "sadah", TILEPRO_OPC_SADAH, 0x1, 3, TREG_ZERO, 1, + { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "sadah.sn", TILEPRO_OPC_SADAH_SN, 0x1, 3, TREG_SN, 1, + { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "sadah_u", TILEPRO_OPC_SADAH_U, 0x1, 3, TREG_ZERO, 1, + { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "sadah_u.sn", TILEPRO_OPC_SADAH_U_SN, 0x1, 3, TREG_SN, 1, + { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "sadb_u", TILEPRO_OPC_SADB_U, 0x1, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "sadb_u.sn", TILEPRO_OPC_SADB_U_SN, 0x1, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "sadh", TILEPRO_OPC_SADH, 0x1, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "sadh.sn", TILEPRO_OPC_SADH_SN, 0x1, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "sadh_u", TILEPRO_OPC_SADH_U, 0x1, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "sadh_u.sn", TILEPRO_OPC_SADH_U_SN, 0x1, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "sb", TILEPRO_OPC_SB, 0x12, 2, TREG_ZERO, 1, + { { 0, }, { 10, 17 }, { 0, }, { 0, }, { 15, 36 } }, + }, + { "sbadd", TILEPRO_OPC_SBADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 24, 17, 37 }, { 0, }, { 0, }, { 0, } }, + }, + { "seq", TILEPRO_OPC_SEQ, 0xf, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 11, 12, 18 }, { 13, 14, 19 }, { 0, } }, + }, + { "seq.sn", TILEPRO_OPC_SEQ_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "seqb", TILEPRO_OPC_SEQB, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "seqb.sn", TILEPRO_OPC_SEQB_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "seqh", TILEPRO_OPC_SEQH, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "seqh.sn", TILEPRO_OPC_SEQH_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "seqi", TILEPRO_OPC_SEQI, 0xf, 3, TREG_ZERO, 1, + { { 7, 8, 0 }, { 9, 10, 1 }, { 11, 12, 2 }, { 13, 14, 3 }, { 0, } }, + }, + { "seqi.sn", TILEPRO_OPC_SEQI_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "seqib", TILEPRO_OPC_SEQIB, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "seqib.sn", TILEPRO_OPC_SEQIB_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "seqih", TILEPRO_OPC_SEQIH, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "seqih.sn", TILEPRO_OPC_SEQIH_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "sh", TILEPRO_OPC_SH, 0x12, 2, TREG_ZERO, 1, + { { 0, }, { 10, 17 }, { 0, }, { 0, }, { 15, 36 } }, + }, + { "shadd", TILEPRO_OPC_SHADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 24, 17, 37 }, { 0, }, { 0, }, { 0, } }, + }, + { "shl", TILEPRO_OPC_SHL, 0xf, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 11, 12, 18 }, { 13, 14, 19 }, { 0, } }, + }, + { "shl.sn", TILEPRO_OPC_SHL_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "shlb", TILEPRO_OPC_SHLB, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "shlb.sn", TILEPRO_OPC_SHLB_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "shlh", TILEPRO_OPC_SHLH, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "shlh.sn", TILEPRO_OPC_SHLH_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "shli", TILEPRO_OPC_SHLI, 0xf, 3, TREG_ZERO, 1, + { { 7, 8, 32 }, { 9, 10, 33 }, { 11, 12, 34 }, { 13, 14, 35 }, { 0, } }, + }, + { "shli.sn", TILEPRO_OPC_SHLI_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 32 }, { 9, 10, 33 }, { 0, }, { 0, }, { 0, } }, + }, + { "shlib", TILEPRO_OPC_SHLIB, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 32 }, { 9, 10, 33 }, { 0, }, { 0, }, { 0, } }, + }, + { "shlib.sn", TILEPRO_OPC_SHLIB_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 32 }, { 9, 10, 33 }, { 0, }, { 0, }, { 0, } }, + }, + { "shlih", TILEPRO_OPC_SHLIH, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 32 }, { 9, 10, 33 }, { 0, }, { 0, }, { 0, } }, + }, + { "shlih.sn", TILEPRO_OPC_SHLIH_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 32 }, { 9, 10, 33 }, { 0, }, { 0, }, { 0, } }, + }, + { "shr", TILEPRO_OPC_SHR, 0xf, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 11, 12, 18 }, { 13, 14, 19 }, { 0, } }, + }, + { "shr.sn", TILEPRO_OPC_SHR_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "shrb", TILEPRO_OPC_SHRB, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "shrb.sn", TILEPRO_OPC_SHRB_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "shrh", TILEPRO_OPC_SHRH, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "shrh.sn", TILEPRO_OPC_SHRH_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "shri", TILEPRO_OPC_SHRI, 0xf, 3, TREG_ZERO, 1, + { { 7, 8, 32 }, { 9, 10, 33 }, { 11, 12, 34 }, { 13, 14, 35 }, { 0, } }, + }, + { "shri.sn", TILEPRO_OPC_SHRI_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 32 }, { 9, 10, 33 }, { 0, }, { 0, }, { 0, } }, + }, + { "shrib", TILEPRO_OPC_SHRIB, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 32 }, { 9, 10, 33 }, { 0, }, { 0, }, { 0, } }, + }, + { "shrib.sn", TILEPRO_OPC_SHRIB_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 32 }, { 9, 10, 33 }, { 0, }, { 0, }, { 0, } }, + }, + { "shrih", TILEPRO_OPC_SHRIH, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 32 }, { 9, 10, 33 }, { 0, }, { 0, }, { 0, } }, + }, + { "shrih.sn", TILEPRO_OPC_SHRIH_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 32 }, { 9, 10, 33 }, { 0, }, { 0, }, { 0, } }, + }, + { "slt", TILEPRO_OPC_SLT, 0xf, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 11, 12, 18 }, { 13, 14, 19 }, { 0, } }, + }, + { "slt.sn", TILEPRO_OPC_SLT_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "slt_u", TILEPRO_OPC_SLT_U, 0xf, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 11, 12, 18 }, { 13, 14, 19 }, { 0, } }, + }, + { "slt_u.sn", TILEPRO_OPC_SLT_U_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "sltb", TILEPRO_OPC_SLTB, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "sltb.sn", TILEPRO_OPC_SLTB_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "sltb_u", TILEPRO_OPC_SLTB_U, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "sltb_u.sn", TILEPRO_OPC_SLTB_U_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "slte", TILEPRO_OPC_SLTE, 0xf, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 11, 12, 18 }, { 13, 14, 19 }, { 0, } }, + }, + { "slte.sn", TILEPRO_OPC_SLTE_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "slte_u", TILEPRO_OPC_SLTE_U, 0xf, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 11, 12, 18 }, { 13, 14, 19 }, { 0, } }, + }, + { "slte_u.sn", TILEPRO_OPC_SLTE_U_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "slteb", TILEPRO_OPC_SLTEB, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "slteb.sn", TILEPRO_OPC_SLTEB_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "slteb_u", TILEPRO_OPC_SLTEB_U, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "slteb_u.sn", TILEPRO_OPC_SLTEB_U_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "slteh", TILEPRO_OPC_SLTEH, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "slteh.sn", TILEPRO_OPC_SLTEH_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "slteh_u", TILEPRO_OPC_SLTEH_U, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "slteh_u.sn", TILEPRO_OPC_SLTEH_U_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "slth", TILEPRO_OPC_SLTH, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "slth.sn", TILEPRO_OPC_SLTH_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "slth_u", TILEPRO_OPC_SLTH_U, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "slth_u.sn", TILEPRO_OPC_SLTH_U_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "slti", TILEPRO_OPC_SLTI, 0xf, 3, TREG_ZERO, 1, + { { 7, 8, 0 }, { 9, 10, 1 }, { 11, 12, 2 }, { 13, 14, 3 }, { 0, } }, + }, + { "slti.sn", TILEPRO_OPC_SLTI_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "slti_u", TILEPRO_OPC_SLTI_U, 0xf, 3, TREG_ZERO, 1, + { { 7, 8, 0 }, { 9, 10, 1 }, { 11, 12, 2 }, { 13, 14, 3 }, { 0, } }, + }, + { "slti_u.sn", TILEPRO_OPC_SLTI_U_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "sltib", TILEPRO_OPC_SLTIB, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "sltib.sn", TILEPRO_OPC_SLTIB_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "sltib_u", TILEPRO_OPC_SLTIB_U, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "sltib_u.sn", TILEPRO_OPC_SLTIB_U_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "sltih", TILEPRO_OPC_SLTIH, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "sltih.sn", TILEPRO_OPC_SLTIH_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "sltih_u", TILEPRO_OPC_SLTIH_U, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "sltih_u.sn", TILEPRO_OPC_SLTIH_U_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "sne", TILEPRO_OPC_SNE, 0xf, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 11, 12, 18 }, { 13, 14, 19 }, { 0, } }, + }, + { "sne.sn", TILEPRO_OPC_SNE_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "sneb", TILEPRO_OPC_SNEB, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "sneb.sn", TILEPRO_OPC_SNEB_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "sneh", TILEPRO_OPC_SNEH, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "sneh.sn", TILEPRO_OPC_SNEH_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "sra", TILEPRO_OPC_SRA, 0xf, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 11, 12, 18 }, { 13, 14, 19 }, { 0, } }, + }, + { "sra.sn", TILEPRO_OPC_SRA_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "srab", TILEPRO_OPC_SRAB, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "srab.sn", TILEPRO_OPC_SRAB_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "srah", TILEPRO_OPC_SRAH, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "srah.sn", TILEPRO_OPC_SRAH_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "srai", TILEPRO_OPC_SRAI, 0xf, 3, TREG_ZERO, 1, + { { 7, 8, 32 }, { 9, 10, 33 }, { 11, 12, 34 }, { 13, 14, 35 }, { 0, } }, + }, + { "srai.sn", TILEPRO_OPC_SRAI_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 32 }, { 9, 10, 33 }, { 0, }, { 0, }, { 0, } }, + }, + { "sraib", TILEPRO_OPC_SRAIB, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 32 }, { 9, 10, 33 }, { 0, }, { 0, }, { 0, } }, + }, + { "sraib.sn", TILEPRO_OPC_SRAIB_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 32 }, { 9, 10, 33 }, { 0, }, { 0, }, { 0, } }, + }, + { "sraih", TILEPRO_OPC_SRAIH, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 32 }, { 9, 10, 33 }, { 0, }, { 0, }, { 0, } }, + }, + { "sraih.sn", TILEPRO_OPC_SRAIH_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 32 }, { 9, 10, 33 }, { 0, }, { 0, }, { 0, } }, + }, + { "sub", TILEPRO_OPC_SUB, 0xf, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 11, 12, 18 }, { 13, 14, 19 }, { 0, } }, + }, + { "sub.sn", TILEPRO_OPC_SUB_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "subb", TILEPRO_OPC_SUBB, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "subb.sn", TILEPRO_OPC_SUBB_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "subbs_u", TILEPRO_OPC_SUBBS_U, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "subbs_u.sn", TILEPRO_OPC_SUBBS_U_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "subh", TILEPRO_OPC_SUBH, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "subh.sn", TILEPRO_OPC_SUBH_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "subhs", TILEPRO_OPC_SUBHS, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "subhs.sn", TILEPRO_OPC_SUBHS_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "subs", TILEPRO_OPC_SUBS, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "subs.sn", TILEPRO_OPC_SUBS_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "sw", TILEPRO_OPC_SW, 0x12, 2, TREG_ZERO, 1, + { { 0, }, { 10, 17 }, { 0, }, { 0, }, { 15, 36 } }, + }, + { "swadd", TILEPRO_OPC_SWADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 24, 17, 37 }, { 0, }, { 0, }, { 0, } }, + }, + { "swint0", TILEPRO_OPC_SWINT0, 0x2, 0, TREG_ZERO, 0, + { { 0, }, { }, { 0, }, { 0, }, { 0, } }, + }, + { "swint1", TILEPRO_OPC_SWINT1, 0x2, 0, TREG_ZERO, 0, + { { 0, }, { }, { 0, }, { 0, }, { 0, } }, + }, + { "swint2", TILEPRO_OPC_SWINT2, 0x2, 0, TREG_ZERO, 0, + { { 0, }, { }, { 0, }, { 0, }, { 0, } }, + }, + { "swint3", TILEPRO_OPC_SWINT3, 0x2, 0, TREG_ZERO, 0, + { { 0, }, { }, { 0, }, { 0, }, { 0, } }, + }, + { "tblidxb0", TILEPRO_OPC_TBLIDXB0, 0x5, 2, TREG_ZERO, 1, + { { 21, 8 }, { 0, }, { 31, 12 }, { 0, }, { 0, } }, + }, + { "tblidxb0.sn", TILEPRO_OPC_TBLIDXB0_SN, 0x1, 2, TREG_SN, 1, + { { 21, 8 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "tblidxb1", TILEPRO_OPC_TBLIDXB1, 0x5, 2, TREG_ZERO, 1, + { { 21, 8 }, { 0, }, { 31, 12 }, { 0, }, { 0, } }, + }, + { "tblidxb1.sn", TILEPRO_OPC_TBLIDXB1_SN, 0x1, 2, TREG_SN, 1, + { { 21, 8 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "tblidxb2", TILEPRO_OPC_TBLIDXB2, 0x5, 2, TREG_ZERO, 1, + { { 21, 8 }, { 0, }, { 31, 12 }, { 0, }, { 0, } }, + }, + { "tblidxb2.sn", TILEPRO_OPC_TBLIDXB2_SN, 0x1, 2, TREG_SN, 1, + { { 21, 8 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "tblidxb3", TILEPRO_OPC_TBLIDXB3, 0x5, 2, TREG_ZERO, 1, + { { 21, 8 }, { 0, }, { 31, 12 }, { 0, }, { 0, } }, + }, + { "tblidxb3.sn", TILEPRO_OPC_TBLIDXB3_SN, 0x1, 2, TREG_SN, 1, + { { 21, 8 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "tns", TILEPRO_OPC_TNS, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 9, 10 }, { 0, }, { 0, }, { 0, } }, + }, + { "tns.sn", TILEPRO_OPC_TNS_SN, 0x2, 2, TREG_SN, 1, + { { 0, }, { 9, 10 }, { 0, }, { 0, }, { 0, } }, + }, + { "wh64", TILEPRO_OPC_WH64, 0x2, 1, TREG_ZERO, 1, + { { 0, }, { 10 }, { 0, }, { 0, }, { 0, } }, + }, + { "xor", TILEPRO_OPC_XOR, 0xf, 3, TREG_ZERO, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 11, 12, 18 }, { 13, 14, 19 }, { 0, } }, + }, + { "xor.sn", TILEPRO_OPC_XOR_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "xori", TILEPRO_OPC_XORI, 0x3, 3, TREG_ZERO, 1, + { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "xori.sn", TILEPRO_OPC_XORI_SN, 0x3, 3, TREG_SN, 1, + { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { NULL, TILEPRO_OPC_NONE, 0, 0, TREG_ZERO, 0, { { 0, } }, + } +}; +#define BITFIELD(start, size) ((start) | (((1 << (size)) - 1) << 6)) +#define CHILD(array_index) (TILEPRO_OPC_NONE + (array_index)) + +static const unsigned short decode_X0_fsm[1153] = +{ + BITFIELD(22, 9) /* index 0 */, + CHILD(513), CHILD(530), CHILD(547), CHILD(564), CHILD(596), CHILD(613), + CHILD(630), TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, CHILD(663), CHILD(680), CHILD(697), + CHILD(714), CHILD(746), CHILD(763), CHILD(780), TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813), + CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813), + CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813), + CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813), + CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813), + CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813), + CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813), + CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813), + CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813), + CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813), + CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(828), CHILD(828), + CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828), + CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828), + CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828), + CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828), + CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828), + CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828), + CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828), + CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828), + CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828), + CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828), + CHILD(828), CHILD(828), CHILD(843), CHILD(843), CHILD(843), CHILD(843), + CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), + CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), + CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), + CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), + CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), + CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), + CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), + CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), + CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), + CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), + CHILD(873), CHILD(878), CHILD(883), CHILD(903), CHILD(908), + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, CHILD(913), + CHILD(918), CHILD(923), CHILD(943), CHILD(948), TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, CHILD(953), TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, CHILD(988), TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_MM, TILEPRO_OPC_MM, + TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, + TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, + TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, + TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, + TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, + TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, + TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, + TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, + TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, + TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, + TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, + TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, + TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, + TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, + TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, + TILEPRO_OPC_MM, TILEPRO_OPC_MM, CHILD(993), TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, CHILD(1076), TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + BITFIELD(18, 4) /* index 513 */, + TILEPRO_OPC_NONE, TILEPRO_OPC_ADDB, TILEPRO_OPC_ADDH, TILEPRO_OPC_ADD, + TILEPRO_OPC_ADIFFB_U, TILEPRO_OPC_ADIFFH, TILEPRO_OPC_AND, + TILEPRO_OPC_AVGB_U, TILEPRO_OPC_AVGH, TILEPRO_OPC_CRC32_32, + TILEPRO_OPC_CRC32_8, TILEPRO_OPC_INTHB, TILEPRO_OPC_INTHH, + TILEPRO_OPC_INTLB, TILEPRO_OPC_INTLH, TILEPRO_OPC_MAXB_U, + BITFIELD(18, 4) /* index 530 */, + TILEPRO_OPC_MAXH, TILEPRO_OPC_MINB_U, TILEPRO_OPC_MINH, TILEPRO_OPC_MNZB, + TILEPRO_OPC_MNZH, TILEPRO_OPC_MNZ, TILEPRO_OPC_MULHHA_SS, + TILEPRO_OPC_MULHHA_SU, TILEPRO_OPC_MULHHA_UU, TILEPRO_OPC_MULHHSA_UU, + TILEPRO_OPC_MULHH_SS, TILEPRO_OPC_MULHH_SU, TILEPRO_OPC_MULHH_UU, + TILEPRO_OPC_MULHLA_SS, TILEPRO_OPC_MULHLA_SU, TILEPRO_OPC_MULHLA_US, + BITFIELD(18, 4) /* index 547 */, + TILEPRO_OPC_MULHLA_UU, TILEPRO_OPC_MULHLSA_UU, TILEPRO_OPC_MULHL_SS, + TILEPRO_OPC_MULHL_SU, TILEPRO_OPC_MULHL_US, TILEPRO_OPC_MULHL_UU, + TILEPRO_OPC_MULLLA_SS, TILEPRO_OPC_MULLLA_SU, TILEPRO_OPC_MULLLA_UU, + TILEPRO_OPC_MULLLSA_UU, TILEPRO_OPC_MULLL_SS, TILEPRO_OPC_MULLL_SU, + TILEPRO_OPC_MULLL_UU, TILEPRO_OPC_MVNZ, TILEPRO_OPC_MVZ, TILEPRO_OPC_MZB, + BITFIELD(18, 4) /* index 564 */, + TILEPRO_OPC_MZH, TILEPRO_OPC_MZ, TILEPRO_OPC_NOR, CHILD(581), + TILEPRO_OPC_PACKHB, TILEPRO_OPC_PACKLB, TILEPRO_OPC_RL, TILEPRO_OPC_S1A, + TILEPRO_OPC_S2A, TILEPRO_OPC_S3A, TILEPRO_OPC_SADAB_U, TILEPRO_OPC_SADAH, + TILEPRO_OPC_SADAH_U, TILEPRO_OPC_SADB_U, TILEPRO_OPC_SADH, + TILEPRO_OPC_SADH_U, + BITFIELD(12, 2) /* index 581 */, + TILEPRO_OPC_OR, TILEPRO_OPC_OR, TILEPRO_OPC_OR, CHILD(586), + BITFIELD(14, 2) /* index 586 */, + TILEPRO_OPC_OR, TILEPRO_OPC_OR, TILEPRO_OPC_OR, CHILD(591), + BITFIELD(16, 2) /* index 591 */, + TILEPRO_OPC_OR, TILEPRO_OPC_OR, TILEPRO_OPC_OR, TILEPRO_OPC_MOVE, + BITFIELD(18, 4) /* index 596 */, + TILEPRO_OPC_SEQB, TILEPRO_OPC_SEQH, TILEPRO_OPC_SEQ, TILEPRO_OPC_SHLB, + TILEPRO_OPC_SHLH, TILEPRO_OPC_SHL, TILEPRO_OPC_SHRB, TILEPRO_OPC_SHRH, + TILEPRO_OPC_SHR, TILEPRO_OPC_SLTB, TILEPRO_OPC_SLTB_U, TILEPRO_OPC_SLTEB, + TILEPRO_OPC_SLTEB_U, TILEPRO_OPC_SLTEH, TILEPRO_OPC_SLTEH_U, + TILEPRO_OPC_SLTE, + BITFIELD(18, 4) /* index 613 */, + TILEPRO_OPC_SLTE_U, TILEPRO_OPC_SLTH, TILEPRO_OPC_SLTH_U, TILEPRO_OPC_SLT, + TILEPRO_OPC_SLT_U, TILEPRO_OPC_SNEB, TILEPRO_OPC_SNEH, TILEPRO_OPC_SNE, + TILEPRO_OPC_SRAB, TILEPRO_OPC_SRAH, TILEPRO_OPC_SRA, TILEPRO_OPC_SUBB, + TILEPRO_OPC_SUBH, TILEPRO_OPC_SUB, TILEPRO_OPC_XOR, TILEPRO_OPC_DWORD_ALIGN, + BITFIELD(18, 3) /* index 630 */, + CHILD(639), CHILD(642), CHILD(645), CHILD(648), CHILD(651), CHILD(654), + CHILD(657), CHILD(660), + BITFIELD(21, 1) /* index 639 */, + TILEPRO_OPC_ADDS, TILEPRO_OPC_NONE, + BITFIELD(21, 1) /* index 642 */, + TILEPRO_OPC_SUBS, TILEPRO_OPC_NONE, + BITFIELD(21, 1) /* index 645 */, + TILEPRO_OPC_ADDBS_U, TILEPRO_OPC_NONE, + BITFIELD(21, 1) /* index 648 */, + TILEPRO_OPC_ADDHS, TILEPRO_OPC_NONE, + BITFIELD(21, 1) /* index 651 */, + TILEPRO_OPC_SUBBS_U, TILEPRO_OPC_NONE, + BITFIELD(21, 1) /* index 654 */, + TILEPRO_OPC_SUBHS, TILEPRO_OPC_NONE, + BITFIELD(21, 1) /* index 657 */, + TILEPRO_OPC_PACKHS, TILEPRO_OPC_NONE, + BITFIELD(21, 1) /* index 660 */, + TILEPRO_OPC_PACKBS_U, TILEPRO_OPC_NONE, + BITFIELD(18, 4) /* index 663 */, + TILEPRO_OPC_NONE, TILEPRO_OPC_ADDB_SN, TILEPRO_OPC_ADDH_SN, + TILEPRO_OPC_ADD_SN, TILEPRO_OPC_ADIFFB_U_SN, TILEPRO_OPC_ADIFFH_SN, + TILEPRO_OPC_AND_SN, TILEPRO_OPC_AVGB_U_SN, TILEPRO_OPC_AVGH_SN, + TILEPRO_OPC_CRC32_32_SN, TILEPRO_OPC_CRC32_8_SN, TILEPRO_OPC_INTHB_SN, + TILEPRO_OPC_INTHH_SN, TILEPRO_OPC_INTLB_SN, TILEPRO_OPC_INTLH_SN, + TILEPRO_OPC_MAXB_U_SN, + BITFIELD(18, 4) /* index 680 */, + TILEPRO_OPC_MAXH_SN, TILEPRO_OPC_MINB_U_SN, TILEPRO_OPC_MINH_SN, + TILEPRO_OPC_MNZB_SN, TILEPRO_OPC_MNZH_SN, TILEPRO_OPC_MNZ_SN, + TILEPRO_OPC_MULHHA_SS_SN, TILEPRO_OPC_MULHHA_SU_SN, + TILEPRO_OPC_MULHHA_UU_SN, TILEPRO_OPC_MULHHSA_UU_SN, + TILEPRO_OPC_MULHH_SS_SN, TILEPRO_OPC_MULHH_SU_SN, TILEPRO_OPC_MULHH_UU_SN, + TILEPRO_OPC_MULHLA_SS_SN, TILEPRO_OPC_MULHLA_SU_SN, + TILEPRO_OPC_MULHLA_US_SN, + BITFIELD(18, 4) /* index 697 */, + TILEPRO_OPC_MULHLA_UU_SN, TILEPRO_OPC_MULHLSA_UU_SN, + TILEPRO_OPC_MULHL_SS_SN, TILEPRO_OPC_MULHL_SU_SN, TILEPRO_OPC_MULHL_US_SN, + TILEPRO_OPC_MULHL_UU_SN, TILEPRO_OPC_MULLLA_SS_SN, TILEPRO_OPC_MULLLA_SU_SN, + TILEPRO_OPC_MULLLA_UU_SN, TILEPRO_OPC_MULLLSA_UU_SN, + TILEPRO_OPC_MULLL_SS_SN, TILEPRO_OPC_MULLL_SU_SN, TILEPRO_OPC_MULLL_UU_SN, + TILEPRO_OPC_MVNZ_SN, TILEPRO_OPC_MVZ_SN, TILEPRO_OPC_MZB_SN, + BITFIELD(18, 4) /* index 714 */, + TILEPRO_OPC_MZH_SN, TILEPRO_OPC_MZ_SN, TILEPRO_OPC_NOR_SN, CHILD(731), + TILEPRO_OPC_PACKHB_SN, TILEPRO_OPC_PACKLB_SN, TILEPRO_OPC_RL_SN, + TILEPRO_OPC_S1A_SN, TILEPRO_OPC_S2A_SN, TILEPRO_OPC_S3A_SN, + TILEPRO_OPC_SADAB_U_SN, TILEPRO_OPC_SADAH_SN, TILEPRO_OPC_SADAH_U_SN, + TILEPRO_OPC_SADB_U_SN, TILEPRO_OPC_SADH_SN, TILEPRO_OPC_SADH_U_SN, + BITFIELD(12, 2) /* index 731 */, + TILEPRO_OPC_OR_SN, TILEPRO_OPC_OR_SN, TILEPRO_OPC_OR_SN, CHILD(736), + BITFIELD(14, 2) /* index 736 */, + TILEPRO_OPC_OR_SN, TILEPRO_OPC_OR_SN, TILEPRO_OPC_OR_SN, CHILD(741), + BITFIELD(16, 2) /* index 741 */, + TILEPRO_OPC_OR_SN, TILEPRO_OPC_OR_SN, TILEPRO_OPC_OR_SN, + TILEPRO_OPC_MOVE_SN, + BITFIELD(18, 4) /* index 746 */, + TILEPRO_OPC_SEQB_SN, TILEPRO_OPC_SEQH_SN, TILEPRO_OPC_SEQ_SN, + TILEPRO_OPC_SHLB_SN, TILEPRO_OPC_SHLH_SN, TILEPRO_OPC_SHL_SN, + TILEPRO_OPC_SHRB_SN, TILEPRO_OPC_SHRH_SN, TILEPRO_OPC_SHR_SN, + TILEPRO_OPC_SLTB_SN, TILEPRO_OPC_SLTB_U_SN, TILEPRO_OPC_SLTEB_SN, + TILEPRO_OPC_SLTEB_U_SN, TILEPRO_OPC_SLTEH_SN, TILEPRO_OPC_SLTEH_U_SN, + TILEPRO_OPC_SLTE_SN, + BITFIELD(18, 4) /* index 763 */, + TILEPRO_OPC_SLTE_U_SN, TILEPRO_OPC_SLTH_SN, TILEPRO_OPC_SLTH_U_SN, + TILEPRO_OPC_SLT_SN, TILEPRO_OPC_SLT_U_SN, TILEPRO_OPC_SNEB_SN, + TILEPRO_OPC_SNEH_SN, TILEPRO_OPC_SNE_SN, TILEPRO_OPC_SRAB_SN, + TILEPRO_OPC_SRAH_SN, TILEPRO_OPC_SRA_SN, TILEPRO_OPC_SUBB_SN, + TILEPRO_OPC_SUBH_SN, TILEPRO_OPC_SUB_SN, TILEPRO_OPC_XOR_SN, + TILEPRO_OPC_DWORD_ALIGN_SN, + BITFIELD(18, 3) /* index 780 */, + CHILD(789), CHILD(792), CHILD(795), CHILD(798), CHILD(801), CHILD(804), + CHILD(807), CHILD(810), + BITFIELD(21, 1) /* index 789 */, + TILEPRO_OPC_ADDS_SN, TILEPRO_OPC_NONE, + BITFIELD(21, 1) /* index 792 */, + TILEPRO_OPC_SUBS_SN, TILEPRO_OPC_NONE, + BITFIELD(21, 1) /* index 795 */, + TILEPRO_OPC_ADDBS_U_SN, TILEPRO_OPC_NONE, + BITFIELD(21, 1) /* index 798 */, + TILEPRO_OPC_ADDHS_SN, TILEPRO_OPC_NONE, + BITFIELD(21, 1) /* index 801 */, + TILEPRO_OPC_SUBBS_U_SN, TILEPRO_OPC_NONE, + BITFIELD(21, 1) /* index 804 */, + TILEPRO_OPC_SUBHS_SN, TILEPRO_OPC_NONE, + BITFIELD(21, 1) /* index 807 */, + TILEPRO_OPC_PACKHS_SN, TILEPRO_OPC_NONE, + BITFIELD(21, 1) /* index 810 */, + TILEPRO_OPC_PACKBS_U_SN, TILEPRO_OPC_NONE, + BITFIELD(6, 2) /* index 813 */, + TILEPRO_OPC_ADDLI_SN, TILEPRO_OPC_ADDLI_SN, TILEPRO_OPC_ADDLI_SN, + CHILD(818), + BITFIELD(8, 2) /* index 818 */, + TILEPRO_OPC_ADDLI_SN, TILEPRO_OPC_ADDLI_SN, TILEPRO_OPC_ADDLI_SN, + CHILD(823), + BITFIELD(10, 2) /* index 823 */, + TILEPRO_OPC_ADDLI_SN, TILEPRO_OPC_ADDLI_SN, TILEPRO_OPC_ADDLI_SN, + TILEPRO_OPC_MOVELI_SN, + BITFIELD(6, 2) /* index 828 */, + TILEPRO_OPC_ADDLI, TILEPRO_OPC_ADDLI, TILEPRO_OPC_ADDLI, CHILD(833), + BITFIELD(8, 2) /* index 833 */, + TILEPRO_OPC_ADDLI, TILEPRO_OPC_ADDLI, TILEPRO_OPC_ADDLI, CHILD(838), + BITFIELD(10, 2) /* index 838 */, + TILEPRO_OPC_ADDLI, TILEPRO_OPC_ADDLI, TILEPRO_OPC_ADDLI, TILEPRO_OPC_MOVELI, + BITFIELD(0, 2) /* index 843 */, + TILEPRO_OPC_AULI, TILEPRO_OPC_AULI, TILEPRO_OPC_AULI, CHILD(848), + BITFIELD(2, 2) /* index 848 */, + TILEPRO_OPC_AULI, TILEPRO_OPC_AULI, TILEPRO_OPC_AULI, CHILD(853), + BITFIELD(4, 2) /* index 853 */, + TILEPRO_OPC_AULI, TILEPRO_OPC_AULI, TILEPRO_OPC_AULI, CHILD(858), + BITFIELD(6, 2) /* index 858 */, + TILEPRO_OPC_AULI, TILEPRO_OPC_AULI, TILEPRO_OPC_AULI, CHILD(863), + BITFIELD(8, 2) /* index 863 */, + TILEPRO_OPC_AULI, TILEPRO_OPC_AULI, TILEPRO_OPC_AULI, CHILD(868), + BITFIELD(10, 2) /* index 868 */, + TILEPRO_OPC_AULI, TILEPRO_OPC_AULI, TILEPRO_OPC_AULI, TILEPRO_OPC_INFOL, + BITFIELD(20, 2) /* index 873 */, + TILEPRO_OPC_NONE, TILEPRO_OPC_ADDIB, TILEPRO_OPC_ADDIH, TILEPRO_OPC_ADDI, + BITFIELD(20, 2) /* index 878 */, + TILEPRO_OPC_MAXIB_U, TILEPRO_OPC_MAXIH, TILEPRO_OPC_MINIB_U, + TILEPRO_OPC_MINIH, + BITFIELD(20, 2) /* index 883 */, + CHILD(888), TILEPRO_OPC_SEQIB, TILEPRO_OPC_SEQIH, TILEPRO_OPC_SEQI, + BITFIELD(6, 2) /* index 888 */, + TILEPRO_OPC_ORI, TILEPRO_OPC_ORI, TILEPRO_OPC_ORI, CHILD(893), + BITFIELD(8, 2) /* index 893 */, + TILEPRO_OPC_ORI, TILEPRO_OPC_ORI, TILEPRO_OPC_ORI, CHILD(898), + BITFIELD(10, 2) /* index 898 */, + TILEPRO_OPC_ORI, TILEPRO_OPC_ORI, TILEPRO_OPC_ORI, TILEPRO_OPC_MOVEI, + BITFIELD(20, 2) /* index 903 */, + TILEPRO_OPC_SLTIB, TILEPRO_OPC_SLTIB_U, TILEPRO_OPC_SLTIH, + TILEPRO_OPC_SLTIH_U, + BITFIELD(20, 2) /* index 908 */, + TILEPRO_OPC_SLTI, TILEPRO_OPC_SLTI_U, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + BITFIELD(20, 2) /* index 913 */, + TILEPRO_OPC_NONE, TILEPRO_OPC_ADDIB_SN, TILEPRO_OPC_ADDIH_SN, + TILEPRO_OPC_ADDI_SN, + BITFIELD(20, 2) /* index 918 */, + TILEPRO_OPC_MAXIB_U_SN, TILEPRO_OPC_MAXIH_SN, TILEPRO_OPC_MINIB_U_SN, + TILEPRO_OPC_MINIH_SN, + BITFIELD(20, 2) /* index 923 */, + CHILD(928), TILEPRO_OPC_SEQIB_SN, TILEPRO_OPC_SEQIH_SN, TILEPRO_OPC_SEQI_SN, + BITFIELD(6, 2) /* index 928 */, + TILEPRO_OPC_ORI_SN, TILEPRO_OPC_ORI_SN, TILEPRO_OPC_ORI_SN, CHILD(933), + BITFIELD(8, 2) /* index 933 */, + TILEPRO_OPC_ORI_SN, TILEPRO_OPC_ORI_SN, TILEPRO_OPC_ORI_SN, CHILD(938), + BITFIELD(10, 2) /* index 938 */, + TILEPRO_OPC_ORI_SN, TILEPRO_OPC_ORI_SN, TILEPRO_OPC_ORI_SN, + TILEPRO_OPC_MOVEI_SN, + BITFIELD(20, 2) /* index 943 */, + TILEPRO_OPC_SLTIB_SN, TILEPRO_OPC_SLTIB_U_SN, TILEPRO_OPC_SLTIH_SN, + TILEPRO_OPC_SLTIH_U_SN, + BITFIELD(20, 2) /* index 948 */, + TILEPRO_OPC_SLTI_SN, TILEPRO_OPC_SLTI_U_SN, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, + BITFIELD(20, 2) /* index 953 */, + TILEPRO_OPC_NONE, CHILD(958), TILEPRO_OPC_XORI, TILEPRO_OPC_NONE, + BITFIELD(0, 2) /* index 958 */, + TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, CHILD(963), + BITFIELD(2, 2) /* index 963 */, + TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, CHILD(968), + BITFIELD(4, 2) /* index 968 */, + TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, CHILD(973), + BITFIELD(6, 2) /* index 973 */, + TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, CHILD(978), + BITFIELD(8, 2) /* index 978 */, + TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, CHILD(983), + BITFIELD(10, 2) /* index 983 */, + TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_INFO, + BITFIELD(20, 2) /* index 988 */, + TILEPRO_OPC_NONE, TILEPRO_OPC_ANDI_SN, TILEPRO_OPC_XORI_SN, + TILEPRO_OPC_NONE, + BITFIELD(17, 5) /* index 993 */, + TILEPRO_OPC_NONE, TILEPRO_OPC_RLI, TILEPRO_OPC_SHLIB, TILEPRO_OPC_SHLIH, + TILEPRO_OPC_SHLI, TILEPRO_OPC_SHRIB, TILEPRO_OPC_SHRIH, TILEPRO_OPC_SHRI, + TILEPRO_OPC_SRAIB, TILEPRO_OPC_SRAIH, TILEPRO_OPC_SRAI, CHILD(1026), + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + BITFIELD(12, 4) /* index 1026 */, + TILEPRO_OPC_NONE, CHILD(1043), CHILD(1046), CHILD(1049), CHILD(1052), + CHILD(1055), CHILD(1058), CHILD(1061), CHILD(1064), CHILD(1067), + CHILD(1070), CHILD(1073), TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + BITFIELD(16, 1) /* index 1043 */, + TILEPRO_OPC_BITX, TILEPRO_OPC_NONE, + BITFIELD(16, 1) /* index 1046 */, + TILEPRO_OPC_BYTEX, TILEPRO_OPC_NONE, + BITFIELD(16, 1) /* index 1049 */, + TILEPRO_OPC_CLZ, TILEPRO_OPC_NONE, + BITFIELD(16, 1) /* index 1052 */, + TILEPRO_OPC_CTZ, TILEPRO_OPC_NONE, + BITFIELD(16, 1) /* index 1055 */, + TILEPRO_OPC_FNOP, TILEPRO_OPC_NONE, + BITFIELD(16, 1) /* index 1058 */, + TILEPRO_OPC_NOP, TILEPRO_OPC_NONE, + BITFIELD(16, 1) /* index 1061 */, + TILEPRO_OPC_PCNT, TILEPRO_OPC_NONE, + BITFIELD(16, 1) /* index 1064 */, + TILEPRO_OPC_TBLIDXB0, TILEPRO_OPC_NONE, + BITFIELD(16, 1) /* index 1067 */, + TILEPRO_OPC_TBLIDXB1, TILEPRO_OPC_NONE, + BITFIELD(16, 1) /* index 1070 */, + TILEPRO_OPC_TBLIDXB2, TILEPRO_OPC_NONE, + BITFIELD(16, 1) /* index 1073 */, + TILEPRO_OPC_TBLIDXB3, TILEPRO_OPC_NONE, + BITFIELD(17, 5) /* index 1076 */, + TILEPRO_OPC_NONE, TILEPRO_OPC_RLI_SN, TILEPRO_OPC_SHLIB_SN, + TILEPRO_OPC_SHLIH_SN, TILEPRO_OPC_SHLI_SN, TILEPRO_OPC_SHRIB_SN, + TILEPRO_OPC_SHRIH_SN, TILEPRO_OPC_SHRI_SN, TILEPRO_OPC_SRAIB_SN, + TILEPRO_OPC_SRAIH_SN, TILEPRO_OPC_SRAI_SN, CHILD(1109), TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + BITFIELD(12, 4) /* index 1109 */, + TILEPRO_OPC_NONE, CHILD(1126), CHILD(1129), CHILD(1132), CHILD(1135), + CHILD(1055), CHILD(1058), CHILD(1138), CHILD(1141), CHILD(1144), + CHILD(1147), CHILD(1150), TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + BITFIELD(16, 1) /* index 1126 */, + TILEPRO_OPC_BITX_SN, TILEPRO_OPC_NONE, + BITFIELD(16, 1) /* index 1129 */, + TILEPRO_OPC_BYTEX_SN, TILEPRO_OPC_NONE, + BITFIELD(16, 1) /* index 1132 */, + TILEPRO_OPC_CLZ_SN, TILEPRO_OPC_NONE, + BITFIELD(16, 1) /* index 1135 */, + TILEPRO_OPC_CTZ_SN, TILEPRO_OPC_NONE, + BITFIELD(16, 1) /* index 1138 */, + TILEPRO_OPC_PCNT_SN, TILEPRO_OPC_NONE, + BITFIELD(16, 1) /* index 1141 */, + TILEPRO_OPC_TBLIDXB0_SN, TILEPRO_OPC_NONE, + BITFIELD(16, 1) /* index 1144 */, + TILEPRO_OPC_TBLIDXB1_SN, TILEPRO_OPC_NONE, + BITFIELD(16, 1) /* index 1147 */, + TILEPRO_OPC_TBLIDXB2_SN, TILEPRO_OPC_NONE, + BITFIELD(16, 1) /* index 1150 */, + TILEPRO_OPC_TBLIDXB3_SN, TILEPRO_OPC_NONE, +}; + +static const unsigned short decode_X1_fsm[1540] = +{ + BITFIELD(54, 9) /* index 0 */, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + CHILD(513), CHILD(561), CHILD(594), TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, CHILD(641), + CHILD(689), CHILD(722), TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, CHILD(766), + CHILD(766), CHILD(766), CHILD(766), CHILD(766), CHILD(766), CHILD(766), + CHILD(766), CHILD(766), CHILD(766), CHILD(766), CHILD(766), CHILD(766), + CHILD(766), CHILD(766), CHILD(766), CHILD(766), CHILD(766), CHILD(766), + CHILD(766), CHILD(766), CHILD(766), CHILD(766), CHILD(766), CHILD(766), + CHILD(766), CHILD(766), CHILD(766), CHILD(766), CHILD(766), CHILD(766), + CHILD(766), CHILD(781), CHILD(781), CHILD(781), CHILD(781), CHILD(781), + CHILD(781), CHILD(781), CHILD(781), CHILD(781), CHILD(781), CHILD(781), + CHILD(781), CHILD(781), CHILD(781), CHILD(781), CHILD(781), CHILD(781), + CHILD(781), CHILD(781), CHILD(781), CHILD(781), CHILD(781), CHILD(781), + CHILD(781), CHILD(781), CHILD(781), CHILD(781), CHILD(781), CHILD(781), + CHILD(781), CHILD(781), CHILD(781), CHILD(796), CHILD(796), CHILD(796), + CHILD(796), CHILD(796), CHILD(796), CHILD(796), CHILD(796), CHILD(796), + CHILD(796), CHILD(796), CHILD(796), CHILD(796), CHILD(796), CHILD(796), + CHILD(796), CHILD(796), CHILD(796), CHILD(796), CHILD(796), CHILD(796), + CHILD(796), CHILD(796), CHILD(796), CHILD(796), CHILD(796), CHILD(796), + CHILD(796), CHILD(796), CHILD(796), CHILD(796), CHILD(796), CHILD(826), + CHILD(826), CHILD(826), CHILD(826), CHILD(826), CHILD(826), CHILD(826), + CHILD(826), CHILD(826), CHILD(826), CHILD(826), CHILD(826), CHILD(826), + CHILD(826), CHILD(826), CHILD(826), CHILD(843), CHILD(843), CHILD(843), + CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), + CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), + CHILD(843), CHILD(860), CHILD(899), CHILD(923), CHILD(932), + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + CHILD(941), CHILD(950), CHILD(974), CHILD(983), TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_MM, + TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, + TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, + TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, + TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, + TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, + TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, + TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, + TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, CHILD(992), + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, CHILD(1334), + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_J, + TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, + TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, + TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, + TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, + TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, + TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, + TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, + TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, + TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, + TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, + TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, + TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, + TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_JAL, + TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, + TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, + TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, + TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, + TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, + TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, + TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, + TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, + TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, + TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, + TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, + TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, + TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, + TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, + TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, + TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + BITFIELD(49, 5) /* index 513 */, + TILEPRO_OPC_NONE, TILEPRO_OPC_ADDB, TILEPRO_OPC_ADDH, TILEPRO_OPC_ADD, + TILEPRO_OPC_AND, TILEPRO_OPC_INTHB, TILEPRO_OPC_INTHH, TILEPRO_OPC_INTLB, + TILEPRO_OPC_INTLH, TILEPRO_OPC_JALRP, TILEPRO_OPC_JALR, TILEPRO_OPC_JRP, + TILEPRO_OPC_JR, TILEPRO_OPC_LNK, TILEPRO_OPC_MAXB_U, TILEPRO_OPC_MAXH, + TILEPRO_OPC_MINB_U, TILEPRO_OPC_MINH, TILEPRO_OPC_MNZB, TILEPRO_OPC_MNZH, + TILEPRO_OPC_MNZ, TILEPRO_OPC_MZB, TILEPRO_OPC_MZH, TILEPRO_OPC_MZ, + TILEPRO_OPC_NOR, CHILD(546), TILEPRO_OPC_PACKHB, TILEPRO_OPC_PACKLB, + TILEPRO_OPC_RL, TILEPRO_OPC_S1A, TILEPRO_OPC_S2A, TILEPRO_OPC_S3A, + BITFIELD(43, 2) /* index 546 */, + TILEPRO_OPC_OR, TILEPRO_OPC_OR, TILEPRO_OPC_OR, CHILD(551), + BITFIELD(45, 2) /* index 551 */, + TILEPRO_OPC_OR, TILEPRO_OPC_OR, TILEPRO_OPC_OR, CHILD(556), + BITFIELD(47, 2) /* index 556 */, + TILEPRO_OPC_OR, TILEPRO_OPC_OR, TILEPRO_OPC_OR, TILEPRO_OPC_MOVE, + BITFIELD(49, 5) /* index 561 */, + TILEPRO_OPC_SB, TILEPRO_OPC_SEQB, TILEPRO_OPC_SEQH, TILEPRO_OPC_SEQ, + TILEPRO_OPC_SHLB, TILEPRO_OPC_SHLH, TILEPRO_OPC_SHL, TILEPRO_OPC_SHRB, + TILEPRO_OPC_SHRH, TILEPRO_OPC_SHR, TILEPRO_OPC_SH, TILEPRO_OPC_SLTB, + TILEPRO_OPC_SLTB_U, TILEPRO_OPC_SLTEB, TILEPRO_OPC_SLTEB_U, + TILEPRO_OPC_SLTEH, TILEPRO_OPC_SLTEH_U, TILEPRO_OPC_SLTE, + TILEPRO_OPC_SLTE_U, TILEPRO_OPC_SLTH, TILEPRO_OPC_SLTH_U, TILEPRO_OPC_SLT, + TILEPRO_OPC_SLT_U, TILEPRO_OPC_SNEB, TILEPRO_OPC_SNEH, TILEPRO_OPC_SNE, + TILEPRO_OPC_SRAB, TILEPRO_OPC_SRAH, TILEPRO_OPC_SRA, TILEPRO_OPC_SUBB, + TILEPRO_OPC_SUBH, TILEPRO_OPC_SUB, + BITFIELD(49, 4) /* index 594 */, + CHILD(611), CHILD(614), CHILD(617), CHILD(620), CHILD(623), CHILD(626), + CHILD(629), CHILD(632), CHILD(635), CHILD(638), TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 611 */, + TILEPRO_OPC_SW, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 614 */, + TILEPRO_OPC_XOR, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 617 */, + TILEPRO_OPC_ADDS, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 620 */, + TILEPRO_OPC_SUBS, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 623 */, + TILEPRO_OPC_ADDBS_U, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 626 */, + TILEPRO_OPC_ADDHS, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 629 */, + TILEPRO_OPC_SUBBS_U, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 632 */, + TILEPRO_OPC_SUBHS, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 635 */, + TILEPRO_OPC_PACKHS, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 638 */, + TILEPRO_OPC_PACKBS_U, TILEPRO_OPC_NONE, + BITFIELD(49, 5) /* index 641 */, + TILEPRO_OPC_NONE, TILEPRO_OPC_ADDB_SN, TILEPRO_OPC_ADDH_SN, + TILEPRO_OPC_ADD_SN, TILEPRO_OPC_AND_SN, TILEPRO_OPC_INTHB_SN, + TILEPRO_OPC_INTHH_SN, TILEPRO_OPC_INTLB_SN, TILEPRO_OPC_INTLH_SN, + TILEPRO_OPC_JALRP, TILEPRO_OPC_JALR, TILEPRO_OPC_JRP, TILEPRO_OPC_JR, + TILEPRO_OPC_LNK_SN, TILEPRO_OPC_MAXB_U_SN, TILEPRO_OPC_MAXH_SN, + TILEPRO_OPC_MINB_U_SN, TILEPRO_OPC_MINH_SN, TILEPRO_OPC_MNZB_SN, + TILEPRO_OPC_MNZH_SN, TILEPRO_OPC_MNZ_SN, TILEPRO_OPC_MZB_SN, + TILEPRO_OPC_MZH_SN, TILEPRO_OPC_MZ_SN, TILEPRO_OPC_NOR_SN, CHILD(674), + TILEPRO_OPC_PACKHB_SN, TILEPRO_OPC_PACKLB_SN, TILEPRO_OPC_RL_SN, + TILEPRO_OPC_S1A_SN, TILEPRO_OPC_S2A_SN, TILEPRO_OPC_S3A_SN, + BITFIELD(43, 2) /* index 674 */, + TILEPRO_OPC_OR_SN, TILEPRO_OPC_OR_SN, TILEPRO_OPC_OR_SN, CHILD(679), + BITFIELD(45, 2) /* index 679 */, + TILEPRO_OPC_OR_SN, TILEPRO_OPC_OR_SN, TILEPRO_OPC_OR_SN, CHILD(684), + BITFIELD(47, 2) /* index 684 */, + TILEPRO_OPC_OR_SN, TILEPRO_OPC_OR_SN, TILEPRO_OPC_OR_SN, + TILEPRO_OPC_MOVE_SN, + BITFIELD(49, 5) /* index 689 */, + TILEPRO_OPC_SB, TILEPRO_OPC_SEQB_SN, TILEPRO_OPC_SEQH_SN, + TILEPRO_OPC_SEQ_SN, TILEPRO_OPC_SHLB_SN, TILEPRO_OPC_SHLH_SN, + TILEPRO_OPC_SHL_SN, TILEPRO_OPC_SHRB_SN, TILEPRO_OPC_SHRH_SN, + TILEPRO_OPC_SHR_SN, TILEPRO_OPC_SH, TILEPRO_OPC_SLTB_SN, + TILEPRO_OPC_SLTB_U_SN, TILEPRO_OPC_SLTEB_SN, TILEPRO_OPC_SLTEB_U_SN, + TILEPRO_OPC_SLTEH_SN, TILEPRO_OPC_SLTEH_U_SN, TILEPRO_OPC_SLTE_SN, + TILEPRO_OPC_SLTE_U_SN, TILEPRO_OPC_SLTH_SN, TILEPRO_OPC_SLTH_U_SN, + TILEPRO_OPC_SLT_SN, TILEPRO_OPC_SLT_U_SN, TILEPRO_OPC_SNEB_SN, + TILEPRO_OPC_SNEH_SN, TILEPRO_OPC_SNE_SN, TILEPRO_OPC_SRAB_SN, + TILEPRO_OPC_SRAH_SN, TILEPRO_OPC_SRA_SN, TILEPRO_OPC_SUBB_SN, + TILEPRO_OPC_SUBH_SN, TILEPRO_OPC_SUB_SN, + BITFIELD(49, 4) /* index 722 */, + CHILD(611), CHILD(739), CHILD(742), CHILD(745), CHILD(748), CHILD(751), + CHILD(754), CHILD(757), CHILD(760), CHILD(763), TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 739 */, + TILEPRO_OPC_XOR_SN, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 742 */, + TILEPRO_OPC_ADDS_SN, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 745 */, + TILEPRO_OPC_SUBS_SN, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 748 */, + TILEPRO_OPC_ADDBS_U_SN, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 751 */, + TILEPRO_OPC_ADDHS_SN, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 754 */, + TILEPRO_OPC_SUBBS_U_SN, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 757 */, + TILEPRO_OPC_SUBHS_SN, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 760 */, + TILEPRO_OPC_PACKHS_SN, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 763 */, + TILEPRO_OPC_PACKBS_U_SN, TILEPRO_OPC_NONE, + BITFIELD(37, 2) /* index 766 */, + TILEPRO_OPC_ADDLI_SN, TILEPRO_OPC_ADDLI_SN, TILEPRO_OPC_ADDLI_SN, + CHILD(771), + BITFIELD(39, 2) /* index 771 */, + TILEPRO_OPC_ADDLI_SN, TILEPRO_OPC_ADDLI_SN, TILEPRO_OPC_ADDLI_SN, + CHILD(776), + BITFIELD(41, 2) /* index 776 */, + TILEPRO_OPC_ADDLI_SN, TILEPRO_OPC_ADDLI_SN, TILEPRO_OPC_ADDLI_SN, + TILEPRO_OPC_MOVELI_SN, + BITFIELD(37, 2) /* index 781 */, + TILEPRO_OPC_ADDLI, TILEPRO_OPC_ADDLI, TILEPRO_OPC_ADDLI, CHILD(786), + BITFIELD(39, 2) /* index 786 */, + TILEPRO_OPC_ADDLI, TILEPRO_OPC_ADDLI, TILEPRO_OPC_ADDLI, CHILD(791), + BITFIELD(41, 2) /* index 791 */, + TILEPRO_OPC_ADDLI, TILEPRO_OPC_ADDLI, TILEPRO_OPC_ADDLI, TILEPRO_OPC_MOVELI, + BITFIELD(31, 2) /* index 796 */, + TILEPRO_OPC_AULI, TILEPRO_OPC_AULI, TILEPRO_OPC_AULI, CHILD(801), + BITFIELD(33, 2) /* index 801 */, + TILEPRO_OPC_AULI, TILEPRO_OPC_AULI, TILEPRO_OPC_AULI, CHILD(806), + BITFIELD(35, 2) /* index 806 */, + TILEPRO_OPC_AULI, TILEPRO_OPC_AULI, TILEPRO_OPC_AULI, CHILD(811), + BITFIELD(37, 2) /* index 811 */, + TILEPRO_OPC_AULI, TILEPRO_OPC_AULI, TILEPRO_OPC_AULI, CHILD(816), + BITFIELD(39, 2) /* index 816 */, + TILEPRO_OPC_AULI, TILEPRO_OPC_AULI, TILEPRO_OPC_AULI, CHILD(821), + BITFIELD(41, 2) /* index 821 */, + TILEPRO_OPC_AULI, TILEPRO_OPC_AULI, TILEPRO_OPC_AULI, TILEPRO_OPC_INFOL, + BITFIELD(31, 4) /* index 826 */, + TILEPRO_OPC_BZ, TILEPRO_OPC_BZT, TILEPRO_OPC_BNZ, TILEPRO_OPC_BNZT, + TILEPRO_OPC_BGZ, TILEPRO_OPC_BGZT, TILEPRO_OPC_BGEZ, TILEPRO_OPC_BGEZT, + TILEPRO_OPC_BLZ, TILEPRO_OPC_BLZT, TILEPRO_OPC_BLEZ, TILEPRO_OPC_BLEZT, + TILEPRO_OPC_BBS, TILEPRO_OPC_BBST, TILEPRO_OPC_BBNS, TILEPRO_OPC_BBNST, + BITFIELD(31, 4) /* index 843 */, + TILEPRO_OPC_BZ_SN, TILEPRO_OPC_BZT_SN, TILEPRO_OPC_BNZ_SN, + TILEPRO_OPC_BNZT_SN, TILEPRO_OPC_BGZ_SN, TILEPRO_OPC_BGZT_SN, + TILEPRO_OPC_BGEZ_SN, TILEPRO_OPC_BGEZT_SN, TILEPRO_OPC_BLZ_SN, + TILEPRO_OPC_BLZT_SN, TILEPRO_OPC_BLEZ_SN, TILEPRO_OPC_BLEZT_SN, + TILEPRO_OPC_BBS_SN, TILEPRO_OPC_BBST_SN, TILEPRO_OPC_BBNS_SN, + TILEPRO_OPC_BBNST_SN, + BITFIELD(51, 3) /* index 860 */, + TILEPRO_OPC_NONE, TILEPRO_OPC_ADDIB, TILEPRO_OPC_ADDIH, TILEPRO_OPC_ADDI, + CHILD(869), TILEPRO_OPC_MAXIB_U, TILEPRO_OPC_MAXIH, TILEPRO_OPC_MFSPR, + BITFIELD(31, 2) /* index 869 */, + TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, CHILD(874), + BITFIELD(33, 2) /* index 874 */, + TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, CHILD(879), + BITFIELD(35, 2) /* index 879 */, + TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, CHILD(884), + BITFIELD(37, 2) /* index 884 */, + TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, CHILD(889), + BITFIELD(39, 2) /* index 889 */, + TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, CHILD(894), + BITFIELD(41, 2) /* index 894 */, + TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_INFO, + BITFIELD(51, 3) /* index 899 */, + TILEPRO_OPC_MINIB_U, TILEPRO_OPC_MINIH, TILEPRO_OPC_MTSPR, CHILD(908), + TILEPRO_OPC_SEQIB, TILEPRO_OPC_SEQIH, TILEPRO_OPC_SEQI, TILEPRO_OPC_SLTIB, + BITFIELD(37, 2) /* index 908 */, + TILEPRO_OPC_ORI, TILEPRO_OPC_ORI, TILEPRO_OPC_ORI, CHILD(913), + BITFIELD(39, 2) /* index 913 */, + TILEPRO_OPC_ORI, TILEPRO_OPC_ORI, TILEPRO_OPC_ORI, CHILD(918), + BITFIELD(41, 2) /* index 918 */, + TILEPRO_OPC_ORI, TILEPRO_OPC_ORI, TILEPRO_OPC_ORI, TILEPRO_OPC_MOVEI, + BITFIELD(51, 3) /* index 923 */, + TILEPRO_OPC_SLTIB_U, TILEPRO_OPC_SLTIH, TILEPRO_OPC_SLTIH_U, + TILEPRO_OPC_SLTI, TILEPRO_OPC_SLTI_U, TILEPRO_OPC_XORI, TILEPRO_OPC_LBADD, + TILEPRO_OPC_LBADD_U, + BITFIELD(51, 3) /* index 932 */, + TILEPRO_OPC_LHADD, TILEPRO_OPC_LHADD_U, TILEPRO_OPC_LWADD, + TILEPRO_OPC_LWADD_NA, TILEPRO_OPC_SBADD, TILEPRO_OPC_SHADD, + TILEPRO_OPC_SWADD, TILEPRO_OPC_NONE, + BITFIELD(51, 3) /* index 941 */, + TILEPRO_OPC_NONE, TILEPRO_OPC_ADDIB_SN, TILEPRO_OPC_ADDIH_SN, + TILEPRO_OPC_ADDI_SN, TILEPRO_OPC_ANDI_SN, TILEPRO_OPC_MAXIB_U_SN, + TILEPRO_OPC_MAXIH_SN, TILEPRO_OPC_MFSPR, + BITFIELD(51, 3) /* index 950 */, + TILEPRO_OPC_MINIB_U_SN, TILEPRO_OPC_MINIH_SN, TILEPRO_OPC_MTSPR, CHILD(959), + TILEPRO_OPC_SEQIB_SN, TILEPRO_OPC_SEQIH_SN, TILEPRO_OPC_SEQI_SN, + TILEPRO_OPC_SLTIB_SN, + BITFIELD(37, 2) /* index 959 */, + TILEPRO_OPC_ORI_SN, TILEPRO_OPC_ORI_SN, TILEPRO_OPC_ORI_SN, CHILD(964), + BITFIELD(39, 2) /* index 964 */, + TILEPRO_OPC_ORI_SN, TILEPRO_OPC_ORI_SN, TILEPRO_OPC_ORI_SN, CHILD(969), + BITFIELD(41, 2) /* index 969 */, + TILEPRO_OPC_ORI_SN, TILEPRO_OPC_ORI_SN, TILEPRO_OPC_ORI_SN, + TILEPRO_OPC_MOVEI_SN, + BITFIELD(51, 3) /* index 974 */, + TILEPRO_OPC_SLTIB_U_SN, TILEPRO_OPC_SLTIH_SN, TILEPRO_OPC_SLTIH_U_SN, + TILEPRO_OPC_SLTI_SN, TILEPRO_OPC_SLTI_U_SN, TILEPRO_OPC_XORI_SN, + TILEPRO_OPC_LBADD_SN, TILEPRO_OPC_LBADD_U_SN, + BITFIELD(51, 3) /* index 983 */, + TILEPRO_OPC_LHADD_SN, TILEPRO_OPC_LHADD_U_SN, TILEPRO_OPC_LWADD_SN, + TILEPRO_OPC_LWADD_NA_SN, TILEPRO_OPC_SBADD, TILEPRO_OPC_SHADD, + TILEPRO_OPC_SWADD, TILEPRO_OPC_NONE, + BITFIELD(46, 7) /* index 992 */, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + CHILD(1121), CHILD(1121), CHILD(1121), CHILD(1121), CHILD(1124), + CHILD(1124), CHILD(1124), CHILD(1124), CHILD(1127), CHILD(1127), + CHILD(1127), CHILD(1127), CHILD(1130), CHILD(1130), CHILD(1130), + CHILD(1130), CHILD(1133), CHILD(1133), CHILD(1133), CHILD(1133), + CHILD(1136), CHILD(1136), CHILD(1136), CHILD(1136), CHILD(1139), + CHILD(1139), CHILD(1139), CHILD(1139), CHILD(1142), CHILD(1142), + CHILD(1142), CHILD(1142), CHILD(1145), CHILD(1145), CHILD(1145), + CHILD(1145), CHILD(1148), CHILD(1148), CHILD(1148), CHILD(1148), + CHILD(1151), CHILD(1242), CHILD(1290), CHILD(1323), TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 1121 */, + TILEPRO_OPC_RLI, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 1124 */, + TILEPRO_OPC_SHLIB, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 1127 */, + TILEPRO_OPC_SHLIH, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 1130 */, + TILEPRO_OPC_SHLI, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 1133 */, + TILEPRO_OPC_SHRIB, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 1136 */, + TILEPRO_OPC_SHRIH, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 1139 */, + TILEPRO_OPC_SHRI, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 1142 */, + TILEPRO_OPC_SRAIB, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 1145 */, + TILEPRO_OPC_SRAIH, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 1148 */, + TILEPRO_OPC_SRAI, TILEPRO_OPC_NONE, + BITFIELD(43, 3) /* index 1151 */, + TILEPRO_OPC_NONE, CHILD(1160), CHILD(1163), CHILD(1166), CHILD(1169), + CHILD(1172), CHILD(1175), CHILD(1178), + BITFIELD(53, 1) /* index 1160 */, + TILEPRO_OPC_DRAIN, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 1163 */, + TILEPRO_OPC_DTLBPR, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 1166 */, + TILEPRO_OPC_FINV, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 1169 */, + TILEPRO_OPC_FLUSH, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 1172 */, + TILEPRO_OPC_FNOP, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 1175 */, + TILEPRO_OPC_ICOH, TILEPRO_OPC_NONE, + BITFIELD(31, 2) /* index 1178 */, + CHILD(1183), CHILD(1211), CHILD(1239), CHILD(1239), + BITFIELD(53, 1) /* index 1183 */, + CHILD(1186), TILEPRO_OPC_NONE, + BITFIELD(33, 2) /* index 1186 */, + TILEPRO_OPC_ILL, TILEPRO_OPC_ILL, TILEPRO_OPC_ILL, CHILD(1191), + BITFIELD(35, 2) /* index 1191 */, + TILEPRO_OPC_ILL, CHILD(1196), TILEPRO_OPC_ILL, TILEPRO_OPC_ILL, + BITFIELD(37, 2) /* index 1196 */, + TILEPRO_OPC_ILL, CHILD(1201), TILEPRO_OPC_ILL, TILEPRO_OPC_ILL, + BITFIELD(39, 2) /* index 1201 */, + TILEPRO_OPC_ILL, CHILD(1206), TILEPRO_OPC_ILL, TILEPRO_OPC_ILL, + BITFIELD(41, 2) /* index 1206 */, + TILEPRO_OPC_ILL, TILEPRO_OPC_ILL, TILEPRO_OPC_BPT, TILEPRO_OPC_ILL, + BITFIELD(53, 1) /* index 1211 */, + CHILD(1214), TILEPRO_OPC_NONE, + BITFIELD(33, 2) /* index 1214 */, + TILEPRO_OPC_ILL, TILEPRO_OPC_ILL, TILEPRO_OPC_ILL, CHILD(1219), + BITFIELD(35, 2) /* index 1219 */, + TILEPRO_OPC_ILL, CHILD(1224), TILEPRO_OPC_ILL, TILEPRO_OPC_ILL, + BITFIELD(37, 2) /* index 1224 */, + TILEPRO_OPC_ILL, CHILD(1229), TILEPRO_OPC_ILL, TILEPRO_OPC_ILL, + BITFIELD(39, 2) /* index 1229 */, + TILEPRO_OPC_ILL, CHILD(1234), TILEPRO_OPC_ILL, TILEPRO_OPC_ILL, + BITFIELD(41, 2) /* index 1234 */, + TILEPRO_OPC_ILL, TILEPRO_OPC_ILL, TILEPRO_OPC_RAISE, TILEPRO_OPC_ILL, + BITFIELD(53, 1) /* index 1239 */, + TILEPRO_OPC_ILL, TILEPRO_OPC_NONE, + BITFIELD(43, 3) /* index 1242 */, + CHILD(1251), CHILD(1254), CHILD(1257), CHILD(1275), CHILD(1278), + CHILD(1281), CHILD(1284), CHILD(1287), + BITFIELD(53, 1) /* index 1251 */, + TILEPRO_OPC_INV, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 1254 */, + TILEPRO_OPC_IRET, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 1257 */, + CHILD(1260), TILEPRO_OPC_NONE, + BITFIELD(31, 2) /* index 1260 */, + TILEPRO_OPC_LB, TILEPRO_OPC_LB, TILEPRO_OPC_LB, CHILD(1265), + BITFIELD(33, 2) /* index 1265 */, + TILEPRO_OPC_LB, TILEPRO_OPC_LB, TILEPRO_OPC_LB, CHILD(1270), + BITFIELD(35, 2) /* index 1270 */, + TILEPRO_OPC_LB, TILEPRO_OPC_LB, TILEPRO_OPC_LB, TILEPRO_OPC_PREFETCH, + BITFIELD(53, 1) /* index 1275 */, + TILEPRO_OPC_LB_U, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 1278 */, + TILEPRO_OPC_LH, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 1281 */, + TILEPRO_OPC_LH_U, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 1284 */, + TILEPRO_OPC_LW, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 1287 */, + TILEPRO_OPC_MF, TILEPRO_OPC_NONE, + BITFIELD(43, 3) /* index 1290 */, + CHILD(1299), CHILD(1302), CHILD(1305), CHILD(1308), CHILD(1311), + CHILD(1314), CHILD(1317), CHILD(1320), + BITFIELD(53, 1) /* index 1299 */, + TILEPRO_OPC_NAP, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 1302 */, + TILEPRO_OPC_NOP, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 1305 */, + TILEPRO_OPC_SWINT0, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 1308 */, + TILEPRO_OPC_SWINT1, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 1311 */, + TILEPRO_OPC_SWINT2, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 1314 */, + TILEPRO_OPC_SWINT3, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 1317 */, + TILEPRO_OPC_TNS, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 1320 */, + TILEPRO_OPC_WH64, TILEPRO_OPC_NONE, + BITFIELD(43, 2) /* index 1323 */, + CHILD(1328), TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + BITFIELD(45, 1) /* index 1328 */, + CHILD(1331), TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 1331 */, + TILEPRO_OPC_LW_NA, TILEPRO_OPC_NONE, + BITFIELD(46, 7) /* index 1334 */, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + CHILD(1463), CHILD(1463), CHILD(1463), CHILD(1463), CHILD(1466), + CHILD(1466), CHILD(1466), CHILD(1466), CHILD(1469), CHILD(1469), + CHILD(1469), CHILD(1469), CHILD(1472), CHILD(1472), CHILD(1472), + CHILD(1472), CHILD(1475), CHILD(1475), CHILD(1475), CHILD(1475), + CHILD(1478), CHILD(1478), CHILD(1478), CHILD(1478), CHILD(1481), + CHILD(1481), CHILD(1481), CHILD(1481), CHILD(1484), CHILD(1484), + CHILD(1484), CHILD(1484), CHILD(1487), CHILD(1487), CHILD(1487), + CHILD(1487), CHILD(1490), CHILD(1490), CHILD(1490), CHILD(1490), + CHILD(1151), CHILD(1493), CHILD(1517), CHILD(1529), TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 1463 */, + TILEPRO_OPC_RLI_SN, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 1466 */, + TILEPRO_OPC_SHLIB_SN, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 1469 */, + TILEPRO_OPC_SHLIH_SN, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 1472 */, + TILEPRO_OPC_SHLI_SN, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 1475 */, + TILEPRO_OPC_SHRIB_SN, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 1478 */, + TILEPRO_OPC_SHRIH_SN, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 1481 */, + TILEPRO_OPC_SHRI_SN, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 1484 */, + TILEPRO_OPC_SRAIB_SN, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 1487 */, + TILEPRO_OPC_SRAIH_SN, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 1490 */, + TILEPRO_OPC_SRAI_SN, TILEPRO_OPC_NONE, + BITFIELD(43, 3) /* index 1493 */, + CHILD(1251), CHILD(1254), CHILD(1502), CHILD(1505), CHILD(1508), + CHILD(1511), CHILD(1514), CHILD(1287), + BITFIELD(53, 1) /* index 1502 */, + TILEPRO_OPC_LB_SN, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 1505 */, + TILEPRO_OPC_LB_U_SN, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 1508 */, + TILEPRO_OPC_LH_SN, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 1511 */, + TILEPRO_OPC_LH_U_SN, TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 1514 */, + TILEPRO_OPC_LW_SN, TILEPRO_OPC_NONE, + BITFIELD(43, 3) /* index 1517 */, + CHILD(1299), CHILD(1302), CHILD(1305), CHILD(1308), CHILD(1311), + CHILD(1314), CHILD(1526), CHILD(1320), + BITFIELD(53, 1) /* index 1526 */, + TILEPRO_OPC_TNS_SN, TILEPRO_OPC_NONE, + BITFIELD(43, 2) /* index 1529 */, + CHILD(1534), TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + BITFIELD(45, 1) /* index 1534 */, + CHILD(1537), TILEPRO_OPC_NONE, + BITFIELD(53, 1) /* index 1537 */, + TILEPRO_OPC_LW_NA_SN, TILEPRO_OPC_NONE, +}; + +static const unsigned short decode_Y0_fsm[168] = +{ + BITFIELD(27, 4) /* index 0 */, + TILEPRO_OPC_NONE, CHILD(17), CHILD(22), CHILD(27), CHILD(47), CHILD(52), + CHILD(57), CHILD(62), CHILD(67), TILEPRO_OPC_ADDI, CHILD(72), CHILD(102), + TILEPRO_OPC_SEQI, CHILD(117), TILEPRO_OPC_SLTI, TILEPRO_OPC_SLTI_U, + BITFIELD(18, 2) /* index 17 */, + TILEPRO_OPC_ADD, TILEPRO_OPC_S1A, TILEPRO_OPC_S2A, TILEPRO_OPC_SUB, + BITFIELD(18, 2) /* index 22 */, + TILEPRO_OPC_MNZ, TILEPRO_OPC_MVNZ, TILEPRO_OPC_MVZ, TILEPRO_OPC_MZ, + BITFIELD(18, 2) /* index 27 */, + TILEPRO_OPC_AND, TILEPRO_OPC_NOR, CHILD(32), TILEPRO_OPC_XOR, + BITFIELD(12, 2) /* index 32 */, + TILEPRO_OPC_OR, TILEPRO_OPC_OR, TILEPRO_OPC_OR, CHILD(37), + BITFIELD(14, 2) /* index 37 */, + TILEPRO_OPC_OR, TILEPRO_OPC_OR, TILEPRO_OPC_OR, CHILD(42), + BITFIELD(16, 2) /* index 42 */, + TILEPRO_OPC_OR, TILEPRO_OPC_OR, TILEPRO_OPC_OR, TILEPRO_OPC_MOVE, + BITFIELD(18, 2) /* index 47 */, + TILEPRO_OPC_RL, TILEPRO_OPC_SHL, TILEPRO_OPC_SHR, TILEPRO_OPC_SRA, + BITFIELD(18, 2) /* index 52 */, + TILEPRO_OPC_SLTE, TILEPRO_OPC_SLTE_U, TILEPRO_OPC_SLT, TILEPRO_OPC_SLT_U, + BITFIELD(18, 2) /* index 57 */, + TILEPRO_OPC_MULHLSA_UU, TILEPRO_OPC_S3A, TILEPRO_OPC_SEQ, TILEPRO_OPC_SNE, + BITFIELD(18, 2) /* index 62 */, + TILEPRO_OPC_MULHH_SS, TILEPRO_OPC_MULHH_UU, TILEPRO_OPC_MULLL_SS, + TILEPRO_OPC_MULLL_UU, + BITFIELD(18, 2) /* index 67 */, + TILEPRO_OPC_MULHHA_SS, TILEPRO_OPC_MULHHA_UU, TILEPRO_OPC_MULLLA_SS, + TILEPRO_OPC_MULLLA_UU, + BITFIELD(0, 2) /* index 72 */, + TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, CHILD(77), + BITFIELD(2, 2) /* index 77 */, + TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, CHILD(82), + BITFIELD(4, 2) /* index 82 */, + TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, CHILD(87), + BITFIELD(6, 2) /* index 87 */, + TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, CHILD(92), + BITFIELD(8, 2) /* index 92 */, + TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, CHILD(97), + BITFIELD(10, 2) /* index 97 */, + TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_INFO, + BITFIELD(6, 2) /* index 102 */, + TILEPRO_OPC_ORI, TILEPRO_OPC_ORI, TILEPRO_OPC_ORI, CHILD(107), + BITFIELD(8, 2) /* index 107 */, + TILEPRO_OPC_ORI, TILEPRO_OPC_ORI, TILEPRO_OPC_ORI, CHILD(112), + BITFIELD(10, 2) /* index 112 */, + TILEPRO_OPC_ORI, TILEPRO_OPC_ORI, TILEPRO_OPC_ORI, TILEPRO_OPC_MOVEI, + BITFIELD(15, 5) /* index 117 */, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_RLI, TILEPRO_OPC_RLI, TILEPRO_OPC_RLI, TILEPRO_OPC_RLI, + TILEPRO_OPC_SHLI, TILEPRO_OPC_SHLI, TILEPRO_OPC_SHLI, TILEPRO_OPC_SHLI, + TILEPRO_OPC_SHRI, TILEPRO_OPC_SHRI, TILEPRO_OPC_SHRI, TILEPRO_OPC_SHRI, + TILEPRO_OPC_SRAI, TILEPRO_OPC_SRAI, TILEPRO_OPC_SRAI, TILEPRO_OPC_SRAI, + CHILD(150), CHILD(159), TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + BITFIELD(12, 3) /* index 150 */, + TILEPRO_OPC_NONE, TILEPRO_OPC_BITX, TILEPRO_OPC_BYTEX, TILEPRO_OPC_CLZ, + TILEPRO_OPC_CTZ, TILEPRO_OPC_FNOP, TILEPRO_OPC_NOP, TILEPRO_OPC_PCNT, + BITFIELD(12, 3) /* index 159 */, + TILEPRO_OPC_TBLIDXB0, TILEPRO_OPC_TBLIDXB1, TILEPRO_OPC_TBLIDXB2, + TILEPRO_OPC_TBLIDXB3, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, +}; + +static const unsigned short decode_Y1_fsm[140] = +{ + BITFIELD(59, 4) /* index 0 */, + TILEPRO_OPC_NONE, CHILD(17), CHILD(22), CHILD(27), CHILD(47), CHILD(52), + CHILD(57), TILEPRO_OPC_ADDI, CHILD(62), CHILD(92), TILEPRO_OPC_SEQI, + CHILD(107), TILEPRO_OPC_SLTI, TILEPRO_OPC_SLTI_U, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, + BITFIELD(49, 2) /* index 17 */, + TILEPRO_OPC_ADD, TILEPRO_OPC_S1A, TILEPRO_OPC_S2A, TILEPRO_OPC_SUB, + BITFIELD(49, 2) /* index 22 */, + TILEPRO_OPC_NONE, TILEPRO_OPC_MNZ, TILEPRO_OPC_MZ, TILEPRO_OPC_NONE, + BITFIELD(49, 2) /* index 27 */, + TILEPRO_OPC_AND, TILEPRO_OPC_NOR, CHILD(32), TILEPRO_OPC_XOR, + BITFIELD(43, 2) /* index 32 */, + TILEPRO_OPC_OR, TILEPRO_OPC_OR, TILEPRO_OPC_OR, CHILD(37), + BITFIELD(45, 2) /* index 37 */, + TILEPRO_OPC_OR, TILEPRO_OPC_OR, TILEPRO_OPC_OR, CHILD(42), + BITFIELD(47, 2) /* index 42 */, + TILEPRO_OPC_OR, TILEPRO_OPC_OR, TILEPRO_OPC_OR, TILEPRO_OPC_MOVE, + BITFIELD(49, 2) /* index 47 */, + TILEPRO_OPC_RL, TILEPRO_OPC_SHL, TILEPRO_OPC_SHR, TILEPRO_OPC_SRA, + BITFIELD(49, 2) /* index 52 */, + TILEPRO_OPC_SLTE, TILEPRO_OPC_SLTE_U, TILEPRO_OPC_SLT, TILEPRO_OPC_SLT_U, + BITFIELD(49, 2) /* index 57 */, + TILEPRO_OPC_NONE, TILEPRO_OPC_S3A, TILEPRO_OPC_SEQ, TILEPRO_OPC_SNE, + BITFIELD(31, 2) /* index 62 */, + TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, CHILD(67), + BITFIELD(33, 2) /* index 67 */, + TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, CHILD(72), + BITFIELD(35, 2) /* index 72 */, + TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, CHILD(77), + BITFIELD(37, 2) /* index 77 */, + TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, CHILD(82), + BITFIELD(39, 2) /* index 82 */, + TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, CHILD(87), + BITFIELD(41, 2) /* index 87 */, + TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_INFO, + BITFIELD(37, 2) /* index 92 */, + TILEPRO_OPC_ORI, TILEPRO_OPC_ORI, TILEPRO_OPC_ORI, CHILD(97), + BITFIELD(39, 2) /* index 97 */, + TILEPRO_OPC_ORI, TILEPRO_OPC_ORI, TILEPRO_OPC_ORI, CHILD(102), + BITFIELD(41, 2) /* index 102 */, + TILEPRO_OPC_ORI, TILEPRO_OPC_ORI, TILEPRO_OPC_ORI, TILEPRO_OPC_MOVEI, + BITFIELD(48, 3) /* index 107 */, + TILEPRO_OPC_NONE, TILEPRO_OPC_RLI, TILEPRO_OPC_SHLI, TILEPRO_OPC_SHRI, + TILEPRO_OPC_SRAI, CHILD(116), TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + BITFIELD(43, 3) /* index 116 */, + TILEPRO_OPC_NONE, CHILD(125), CHILD(130), CHILD(135), TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + BITFIELD(46, 2) /* index 125 */, + TILEPRO_OPC_FNOP, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + BITFIELD(46, 2) /* index 130 */, + TILEPRO_OPC_ILL, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + BITFIELD(46, 2) /* index 135 */, + TILEPRO_OPC_NOP, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, +}; + +static const unsigned short decode_Y2_fsm[24] = +{ + BITFIELD(56, 3) /* index 0 */, + CHILD(9), TILEPRO_OPC_LB_U, TILEPRO_OPC_LH, TILEPRO_OPC_LH_U, + TILEPRO_OPC_LW, TILEPRO_OPC_SB, TILEPRO_OPC_SH, TILEPRO_OPC_SW, + BITFIELD(20, 2) /* index 9 */, + TILEPRO_OPC_LB, TILEPRO_OPC_LB, TILEPRO_OPC_LB, CHILD(14), + BITFIELD(22, 2) /* index 14 */, + TILEPRO_OPC_LB, TILEPRO_OPC_LB, TILEPRO_OPC_LB, CHILD(19), + BITFIELD(24, 2) /* index 19 */, + TILEPRO_OPC_LB, TILEPRO_OPC_LB, TILEPRO_OPC_LB, TILEPRO_OPC_PREFETCH, +}; + +#undef BITFIELD +#undef CHILD +const unsigned short * const +tilepro_bundle_decoder_fsms[TILEPRO_NUM_PIPELINE_ENCODINGS] = +{ + decode_X0_fsm, + decode_X1_fsm, + decode_Y0_fsm, + decode_Y1_fsm, + decode_Y2_fsm +}; +const struct tilepro_operand tilepro_operands[43] = +{ + { + TILEPRO_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEPRO_IMM8_X0), + 8, 1, 0, 0, 0, 0, + create_Imm8_X0, get_Imm8_X0 + }, + { + TILEPRO_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEPRO_IMM8_X1), + 8, 1, 0, 0, 0, 0, + create_Imm8_X1, get_Imm8_X1 + }, + { + TILEPRO_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEPRO_IMM8_Y0), + 8, 1, 0, 0, 0, 0, + create_Imm8_Y0, get_Imm8_Y0 + }, + { + TILEPRO_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEPRO_IMM8_Y1), + 8, 1, 0, 0, 0, 0, + create_Imm8_Y1, get_Imm8_Y1 + }, + { + TILEPRO_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEPRO_IMM16_X0), + 16, 1, 0, 0, 0, 0, + create_Imm16_X0, get_Imm16_X0 + }, + { + TILEPRO_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEPRO_IMM16_X1), + 16, 1, 0, 0, 0, 0, + create_Imm16_X1, get_Imm16_X1 + }, + { + TILEPRO_OP_TYPE_ADDRESS, BFD_RELOC(TILEPRO_JOFFLONG_X1), + 29, 1, 0, 0, 1, TILEPRO_LOG2_BUNDLE_ALIGNMENT_IN_BYTES, + create_JOffLong_X1, get_JOffLong_X1 + }, + { + TILEPRO_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 0, 1, 0, 0, + create_Dest_X0, get_Dest_X0 + }, + { + TILEPRO_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 1, 0, 0, 0, + create_SrcA_X0, get_SrcA_X0 + }, + { + TILEPRO_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 0, 1, 0, 0, + create_Dest_X1, get_Dest_X1 + }, + { + TILEPRO_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 1, 0, 0, 0, + create_SrcA_X1, get_SrcA_X1 + }, + { + TILEPRO_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 0, 1, 0, 0, + create_Dest_Y0, get_Dest_Y0 + }, + { + TILEPRO_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 1, 0, 0, 0, + create_SrcA_Y0, get_SrcA_Y0 + }, + { + TILEPRO_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 0, 1, 0, 0, + create_Dest_Y1, get_Dest_Y1 + }, + { + TILEPRO_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 1, 0, 0, 0, + create_SrcA_Y1, get_SrcA_Y1 + }, + { + TILEPRO_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 1, 0, 0, 0, + create_SrcA_Y2, get_SrcA_Y2 + }, + { + TILEPRO_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 1, 0, 0, 0, + create_SrcB_X0, get_SrcB_X0 + }, + { + TILEPRO_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 1, 0, 0, 0, + create_SrcB_X1, get_SrcB_X1 + }, + { + TILEPRO_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 1, 0, 0, 0, + create_SrcB_Y0, get_SrcB_Y0 + }, + { + TILEPRO_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 1, 0, 0, 0, + create_SrcB_Y1, get_SrcB_Y1 + }, + { + TILEPRO_OP_TYPE_ADDRESS, BFD_RELOC(TILEPRO_BROFF_X1), + 17, 1, 0, 0, 1, TILEPRO_LOG2_BUNDLE_ALIGNMENT_IN_BYTES, + create_BrOff_X1, get_BrOff_X1 + }, + { + TILEPRO_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 1, 1, 0, 0, + create_Dest_X0, get_Dest_X0 + }, + { + TILEPRO_OP_TYPE_ADDRESS, BFD_RELOC(NONE), + 28, 1, 0, 0, 1, TILEPRO_LOG2_BUNDLE_ALIGNMENT_IN_BYTES, + create_JOff_X1, get_JOff_X1 + }, + { + TILEPRO_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 0, 1, 0, 0, + create_SrcBDest_Y2, get_SrcBDest_Y2 + }, + { + TILEPRO_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 1, 1, 0, 0, + create_SrcA_X1, get_SrcA_X1 + }, + { + TILEPRO_OP_TYPE_SPR, BFD_RELOC(TILEPRO_MF_IMM15_X1), + 15, 0, 0, 0, 0, 0, + create_MF_Imm15_X1, get_MF_Imm15_X1 + }, + { + TILEPRO_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEPRO_MMSTART_X0), + 5, 0, 0, 0, 0, 0, + create_MMStart_X0, get_MMStart_X0 + }, + { + TILEPRO_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEPRO_MMEND_X0), + 5, 0, 0, 0, 0, 0, + create_MMEnd_X0, get_MMEnd_X0 + }, + { + TILEPRO_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEPRO_MMSTART_X1), + 5, 0, 0, 0, 0, 0, + create_MMStart_X1, get_MMStart_X1 + }, + { + TILEPRO_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEPRO_MMEND_X1), + 5, 0, 0, 0, 0, 0, + create_MMEnd_X1, get_MMEnd_X1 + }, + { + TILEPRO_OP_TYPE_SPR, BFD_RELOC(TILEPRO_MT_IMM15_X1), + 15, 0, 0, 0, 0, 0, + create_MT_Imm15_X1, get_MT_Imm15_X1 + }, + { + TILEPRO_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 1, 1, 0, 0, + create_Dest_Y0, get_Dest_Y0 + }, + { + TILEPRO_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEPRO_SHAMT_X0), + 5, 0, 0, 0, 0, 0, + create_ShAmt_X0, get_ShAmt_X0 + }, + { + TILEPRO_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEPRO_SHAMT_X1), + 5, 0, 0, 0, 0, 0, + create_ShAmt_X1, get_ShAmt_X1 + }, + { + TILEPRO_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEPRO_SHAMT_Y0), + 5, 0, 0, 0, 0, 0, + create_ShAmt_Y0, get_ShAmt_Y0 + }, + { + TILEPRO_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEPRO_SHAMT_Y1), + 5, 0, 0, 0, 0, 0, + create_ShAmt_Y1, get_ShAmt_Y1 + }, + { + TILEPRO_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 1, 0, 0, 0, + create_SrcBDest_Y2, get_SrcBDest_Y2 + }, + { + TILEPRO_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEPRO_DEST_IMM8_X1), + 8, 1, 0, 0, 0, 0, + create_Dest_Imm8_X1, get_Dest_Imm8_X1 + }, + { + TILEPRO_OP_TYPE_ADDRESS, BFD_RELOC(NONE), + 10, 1, 0, 0, 1, TILEPRO_LOG2_SN_INSTRUCTION_SIZE_IN_BYTES, + create_BrOff_SN, get_BrOff_SN + }, + { + TILEPRO_OP_TYPE_IMMEDIATE, BFD_RELOC(NONE), + 8, 0, 0, 0, 0, 0, + create_Imm8_SN, get_Imm8_SN + }, + { + TILEPRO_OP_TYPE_IMMEDIATE, BFD_RELOC(NONE), + 8, 1, 0, 0, 0, 0, + create_Imm8_SN, get_Imm8_SN + }, + { + TILEPRO_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 2, 0, 0, 1, 0, 0, + create_Dest_SN, get_Dest_SN + }, + { + TILEPRO_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 2, 0, 1, 0, 0, 0, + create_Src_SN, get_Src_SN + } +}; + + + + +/* Given a set of bundle bits and a specific pipe, returns which + * instruction the bundle contains in that pipe. + */ +const struct tilepro_opcode * +find_opcode(tilepro_bundle_bits bits, tilepro_pipeline pipe) +{ + const unsigned short *table = tilepro_bundle_decoder_fsms[pipe]; + int index = 0; + + while (1) + { + unsigned short bitspec = table[index]; + unsigned int bitfield = + ((unsigned int)(bits >> (bitspec & 63))) & (bitspec >> 6); + + unsigned short next = table[index + 1 + bitfield]; + if (next <= TILEPRO_OPC_NONE) + return &tilepro_opcodes[next]; + + index = next - TILEPRO_OPC_NONE; + } +} + + +int +parse_insn_tilepro(tilepro_bundle_bits bits, + unsigned int pc, + struct tilepro_decoded_instruction + decoded[TILEPRO_MAX_INSTRUCTIONS_PER_BUNDLE]) +{ + int num_instructions = 0; + int pipe; + + int min_pipe, max_pipe; + if ((bits & TILEPRO_BUNDLE_Y_ENCODING_MASK) == 0) + { + min_pipe = TILEPRO_PIPELINE_X0; + max_pipe = TILEPRO_PIPELINE_X1; + } + else + { + min_pipe = TILEPRO_PIPELINE_Y0; + max_pipe = TILEPRO_PIPELINE_Y2; + } + + /* For each pipe, find an instruction that fits. */ + for (pipe = min_pipe; pipe <= max_pipe; pipe++) + { + const struct tilepro_opcode *opc; + struct tilepro_decoded_instruction *d; + int i; + + d = &decoded[num_instructions++]; + opc = find_opcode (bits, (tilepro_pipeline)pipe); + d->opcode = opc; + + /* Decode each operand, sign extending, etc. as appropriate. */ + for (i = 0; i < opc->num_operands; i++) + { + const struct tilepro_operand *op = + &tilepro_operands[opc->operands[pipe][i]]; + int opval = op->extract (bits); + if (op->is_signed) + { + /* Sign-extend the operand. */ + int shift = (int)((sizeof(int) * 8) - op->num_bits); + opval = (opval << shift) >> shift; + } + + /* Adjust PC-relative scaled branch offsets. */ + if (op->type == TILEPRO_OP_TYPE_ADDRESS) + { + opval *= TILEPRO_BUNDLE_SIZE_IN_BYTES; + opval += (int)pc; + } + + /* Record the final value. */ + d->operands[i] = op; + d->operand_values[i] = opval; + } + } + + return num_instructions; +} diff --git a/arch/tile/kernel/tile-desc_64.c b/arch/tile/kernel/tile-desc_64.c new file mode 100644 index 00000000..65b5f8ac --- /dev/null +++ b/arch/tile/kernel/tile-desc_64.c @@ -0,0 +1,2218 @@ +/* TILE-Gx opcode information. + * + * Copyright 2011 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * + * + * + * + */ + +/* This define is BFD_RELOC_##x for real bfd, or -1 for everyone else. */ +#define BFD_RELOC(x) -1 + +/* Special registers. */ +#define TREG_LR 55 +#define TREG_SN 56 +#define TREG_ZERO 63 + +#include <linux/stddef.h> +#include <asm/tile-desc.h> + +const struct tilegx_opcode tilegx_opcodes[334] = +{ + { "bpt", TILEGX_OPC_BPT, 0x2, 0, TREG_ZERO, 0, + { { 0, }, { }, { 0, }, { 0, }, { 0, } }, + }, + { "info", TILEGX_OPC_INFO, 0xf, 1, TREG_ZERO, 1, + { { 0 }, { 1 }, { 2 }, { 3 }, { 0, } }, + }, + { "infol", TILEGX_OPC_INFOL, 0x3, 1, TREG_ZERO, 1, + { { 4 }, { 5 }, { 0, }, { 0, }, { 0, } }, + }, + { "move", TILEGX_OPC_MOVE, 0xf, 2, TREG_ZERO, 1, + { { 6, 7 }, { 8, 9 }, { 10, 11 }, { 12, 13 }, { 0, } }, + }, + { "movei", TILEGX_OPC_MOVEI, 0xf, 2, TREG_ZERO, 1, + { { 6, 0 }, { 8, 1 }, { 10, 2 }, { 12, 3 }, { 0, } }, + }, + { "moveli", TILEGX_OPC_MOVELI, 0x3, 2, TREG_ZERO, 1, + { { 6, 4 }, { 8, 5 }, { 0, }, { 0, }, { 0, } }, + }, + { "prefetch", TILEGX_OPC_PREFETCH, 0x12, 1, TREG_ZERO, 1, + { { 0, }, { 9 }, { 0, }, { 0, }, { 14 } }, + }, + { "prefetch_add_l1", TILEGX_OPC_PREFETCH_ADD_L1, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 15, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "prefetch_add_l1_fault", TILEGX_OPC_PREFETCH_ADD_L1_FAULT, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 15, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "prefetch_add_l2", TILEGX_OPC_PREFETCH_ADD_L2, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 15, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "prefetch_add_l2_fault", TILEGX_OPC_PREFETCH_ADD_L2_FAULT, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 15, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "prefetch_add_l3", TILEGX_OPC_PREFETCH_ADD_L3, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 15, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "prefetch_add_l3_fault", TILEGX_OPC_PREFETCH_ADD_L3_FAULT, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 15, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "prefetch_l1", TILEGX_OPC_PREFETCH_L1, 0x12, 1, TREG_ZERO, 1, + { { 0, }, { 9 }, { 0, }, { 0, }, { 14 } }, + }, + { "prefetch_l1_fault", TILEGX_OPC_PREFETCH_L1_FAULT, 0x12, 1, TREG_ZERO, 1, + { { 0, }, { 9 }, { 0, }, { 0, }, { 14 } }, + }, + { "prefetch_l2", TILEGX_OPC_PREFETCH_L2, 0x12, 1, TREG_ZERO, 1, + { { 0, }, { 9 }, { 0, }, { 0, }, { 14 } }, + }, + { "prefetch_l2_fault", TILEGX_OPC_PREFETCH_L2_FAULT, 0x12, 1, TREG_ZERO, 1, + { { 0, }, { 9 }, { 0, }, { 0, }, { 14 } }, + }, + { "prefetch_l3", TILEGX_OPC_PREFETCH_L3, 0x12, 1, TREG_ZERO, 1, + { { 0, }, { 9 }, { 0, }, { 0, }, { 14 } }, + }, + { "prefetch_l3_fault", TILEGX_OPC_PREFETCH_L3_FAULT, 0x12, 1, TREG_ZERO, 1, + { { 0, }, { 9 }, { 0, }, { 0, }, { 14 } }, + }, + { "raise", TILEGX_OPC_RAISE, 0x2, 0, TREG_ZERO, 1, + { { 0, }, { }, { 0, }, { 0, }, { 0, } }, + }, + { "add", TILEGX_OPC_ADD, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, + }, + { "addi", TILEGX_OPC_ADDI, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 0 }, { 8, 9, 1 }, { 10, 11, 2 }, { 12, 13, 3 }, { 0, } }, + }, + { "addli", TILEGX_OPC_ADDLI, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 4 }, { 8, 9, 5 }, { 0, }, { 0, }, { 0, } }, + }, + { "addx", TILEGX_OPC_ADDX, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, + }, + { "addxi", TILEGX_OPC_ADDXI, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 0 }, { 8, 9, 1 }, { 10, 11, 2 }, { 12, 13, 3 }, { 0, } }, + }, + { "addxli", TILEGX_OPC_ADDXLI, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 4 }, { 8, 9, 5 }, { 0, }, { 0, }, { 0, } }, + }, + { "addxsc", TILEGX_OPC_ADDXSC, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "and", TILEGX_OPC_AND, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, + }, + { "andi", TILEGX_OPC_ANDI, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 0 }, { 8, 9, 1 }, { 10, 11, 2 }, { 12, 13, 3 }, { 0, } }, + }, + { "beqz", TILEGX_OPC_BEQZ, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 9, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "beqzt", TILEGX_OPC_BEQZT, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 9, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "bfexts", TILEGX_OPC_BFEXTS, 0x1, 4, TREG_ZERO, 1, + { { 6, 7, 21, 22 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "bfextu", TILEGX_OPC_BFEXTU, 0x1, 4, TREG_ZERO, 1, + { { 6, 7, 21, 22 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "bfins", TILEGX_OPC_BFINS, 0x1, 4, TREG_ZERO, 1, + { { 23, 7, 21, 22 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "bgez", TILEGX_OPC_BGEZ, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 9, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "bgezt", TILEGX_OPC_BGEZT, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 9, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "bgtz", TILEGX_OPC_BGTZ, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 9, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "bgtzt", TILEGX_OPC_BGTZT, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 9, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "blbc", TILEGX_OPC_BLBC, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 9, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "blbct", TILEGX_OPC_BLBCT, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 9, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "blbs", TILEGX_OPC_BLBS, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 9, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "blbst", TILEGX_OPC_BLBST, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 9, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "blez", TILEGX_OPC_BLEZ, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 9, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "blezt", TILEGX_OPC_BLEZT, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 9, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "bltz", TILEGX_OPC_BLTZ, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 9, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "bltzt", TILEGX_OPC_BLTZT, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 9, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "bnez", TILEGX_OPC_BNEZ, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 9, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "bnezt", TILEGX_OPC_BNEZT, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 9, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "clz", TILEGX_OPC_CLZ, 0x5, 2, TREG_ZERO, 1, + { { 6, 7 }, { 0, }, { 10, 11 }, { 0, }, { 0, } }, + }, + { "cmoveqz", TILEGX_OPC_CMOVEQZ, 0x5, 3, TREG_ZERO, 1, + { { 23, 7, 16 }, { 0, }, { 24, 11, 18 }, { 0, }, { 0, } }, + }, + { "cmovnez", TILEGX_OPC_CMOVNEZ, 0x5, 3, TREG_ZERO, 1, + { { 23, 7, 16 }, { 0, }, { 24, 11, 18 }, { 0, }, { 0, } }, + }, + { "cmpeq", TILEGX_OPC_CMPEQ, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, + }, + { "cmpeqi", TILEGX_OPC_CMPEQI, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 0 }, { 8, 9, 1 }, { 10, 11, 2 }, { 12, 13, 3 }, { 0, } }, + }, + { "cmpexch", TILEGX_OPC_CMPEXCH, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "cmpexch4", TILEGX_OPC_CMPEXCH4, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "cmples", TILEGX_OPC_CMPLES, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, + }, + { "cmpleu", TILEGX_OPC_CMPLEU, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, + }, + { "cmplts", TILEGX_OPC_CMPLTS, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, + }, + { "cmpltsi", TILEGX_OPC_CMPLTSI, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 0 }, { 8, 9, 1 }, { 10, 11, 2 }, { 12, 13, 3 }, { 0, } }, + }, + { "cmpltu", TILEGX_OPC_CMPLTU, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, + }, + { "cmpltui", TILEGX_OPC_CMPLTUI, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 0 }, { 8, 9, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "cmpne", TILEGX_OPC_CMPNE, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, + }, + { "cmul", TILEGX_OPC_CMUL, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "cmula", TILEGX_OPC_CMULA, 0x1, 3, TREG_ZERO, 1, + { { 23, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "cmulaf", TILEGX_OPC_CMULAF, 0x1, 3, TREG_ZERO, 1, + { { 23, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "cmulf", TILEGX_OPC_CMULF, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "cmulfr", TILEGX_OPC_CMULFR, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "cmulh", TILEGX_OPC_CMULH, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "cmulhr", TILEGX_OPC_CMULHR, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "crc32_32", TILEGX_OPC_CRC32_32, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "crc32_8", TILEGX_OPC_CRC32_8, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "ctz", TILEGX_OPC_CTZ, 0x5, 2, TREG_ZERO, 1, + { { 6, 7 }, { 0, }, { 10, 11 }, { 0, }, { 0, } }, + }, + { "dblalign", TILEGX_OPC_DBLALIGN, 0x1, 3, TREG_ZERO, 1, + { { 23, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "dblalign2", TILEGX_OPC_DBLALIGN2, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "dblalign4", TILEGX_OPC_DBLALIGN4, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "dblalign6", TILEGX_OPC_DBLALIGN6, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "drain", TILEGX_OPC_DRAIN, 0x2, 0, TREG_ZERO, 0, + { { 0, }, { }, { 0, }, { 0, }, { 0, } }, + }, + { "dtlbpr", TILEGX_OPC_DTLBPR, 0x2, 1, TREG_ZERO, 1, + { { 0, }, { 9 }, { 0, }, { 0, }, { 0, } }, + }, + { "exch", TILEGX_OPC_EXCH, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "exch4", TILEGX_OPC_EXCH4, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "fdouble_add_flags", TILEGX_OPC_FDOUBLE_ADD_FLAGS, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "fdouble_addsub", TILEGX_OPC_FDOUBLE_ADDSUB, 0x1, 3, TREG_ZERO, 1, + { { 23, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "fdouble_mul_flags", TILEGX_OPC_FDOUBLE_MUL_FLAGS, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "fdouble_pack1", TILEGX_OPC_FDOUBLE_PACK1, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "fdouble_pack2", TILEGX_OPC_FDOUBLE_PACK2, 0x1, 3, TREG_ZERO, 1, + { { 23, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "fdouble_sub_flags", TILEGX_OPC_FDOUBLE_SUB_FLAGS, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "fdouble_unpack_max", TILEGX_OPC_FDOUBLE_UNPACK_MAX, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "fdouble_unpack_min", TILEGX_OPC_FDOUBLE_UNPACK_MIN, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "fetchadd", TILEGX_OPC_FETCHADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "fetchadd4", TILEGX_OPC_FETCHADD4, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "fetchaddgez", TILEGX_OPC_FETCHADDGEZ, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "fetchaddgez4", TILEGX_OPC_FETCHADDGEZ4, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "fetchand", TILEGX_OPC_FETCHAND, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "fetchand4", TILEGX_OPC_FETCHAND4, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "fetchor", TILEGX_OPC_FETCHOR, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "fetchor4", TILEGX_OPC_FETCHOR4, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "finv", TILEGX_OPC_FINV, 0x2, 1, TREG_ZERO, 1, + { { 0, }, { 9 }, { 0, }, { 0, }, { 0, } }, + }, + { "flush", TILEGX_OPC_FLUSH, 0x2, 1, TREG_ZERO, 1, + { { 0, }, { 9 }, { 0, }, { 0, }, { 0, } }, + }, + { "flushwb", TILEGX_OPC_FLUSHWB, 0x2, 0, TREG_ZERO, 1, + { { 0, }, { }, { 0, }, { 0, }, { 0, } }, + }, + { "fnop", TILEGX_OPC_FNOP, 0xf, 0, TREG_ZERO, 1, + { { }, { }, { }, { }, { 0, } }, + }, + { "fsingle_add1", TILEGX_OPC_FSINGLE_ADD1, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "fsingle_addsub2", TILEGX_OPC_FSINGLE_ADDSUB2, 0x1, 3, TREG_ZERO, 1, + { { 23, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "fsingle_mul1", TILEGX_OPC_FSINGLE_MUL1, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "fsingle_mul2", TILEGX_OPC_FSINGLE_MUL2, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "fsingle_pack1", TILEGX_OPC_FSINGLE_PACK1, 0x5, 2, TREG_ZERO, 1, + { { 6, 7 }, { 0, }, { 10, 11 }, { 0, }, { 0, } }, + }, + { "fsingle_pack2", TILEGX_OPC_FSINGLE_PACK2, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "fsingle_sub1", TILEGX_OPC_FSINGLE_SUB1, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "icoh", TILEGX_OPC_ICOH, 0x2, 1, TREG_ZERO, 1, + { { 0, }, { 9 }, { 0, }, { 0, }, { 0, } }, + }, + { "ill", TILEGX_OPC_ILL, 0xa, 0, TREG_ZERO, 1, + { { 0, }, { }, { 0, }, { }, { 0, } }, + }, + { "inv", TILEGX_OPC_INV, 0x2, 1, TREG_ZERO, 1, + { { 0, }, { 9 }, { 0, }, { 0, }, { 0, } }, + }, + { "iret", TILEGX_OPC_IRET, 0x2, 0, TREG_ZERO, 1, + { { 0, }, { }, { 0, }, { 0, }, { 0, } }, + }, + { "j", TILEGX_OPC_J, 0x2, 1, TREG_ZERO, 1, + { { 0, }, { 25 }, { 0, }, { 0, }, { 0, } }, + }, + { "jal", TILEGX_OPC_JAL, 0x2, 1, TREG_LR, 1, + { { 0, }, { 25 }, { 0, }, { 0, }, { 0, } }, + }, + { "jalr", TILEGX_OPC_JALR, 0xa, 1, TREG_LR, 1, + { { 0, }, { 9 }, { 0, }, { 13 }, { 0, } }, + }, + { "jalrp", TILEGX_OPC_JALRP, 0xa, 1, TREG_LR, 1, + { { 0, }, { 9 }, { 0, }, { 13 }, { 0, } }, + }, + { "jr", TILEGX_OPC_JR, 0xa, 1, TREG_ZERO, 1, + { { 0, }, { 9 }, { 0, }, { 13 }, { 0, } }, + }, + { "jrp", TILEGX_OPC_JRP, 0xa, 1, TREG_ZERO, 1, + { { 0, }, { 9 }, { 0, }, { 13 }, { 0, } }, + }, + { "ld", TILEGX_OPC_LD, 0x12, 2, TREG_ZERO, 1, + { { 0, }, { 8, 9 }, { 0, }, { 0, }, { 26, 14 } }, + }, + { "ld1s", TILEGX_OPC_LD1S, 0x12, 2, TREG_ZERO, 1, + { { 0, }, { 8, 9 }, { 0, }, { 0, }, { 26, 14 } }, + }, + { "ld1s_add", TILEGX_OPC_LD1S_ADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 8, 15, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "ld1u", TILEGX_OPC_LD1U, 0x12, 2, TREG_ZERO, 1, + { { 0, }, { 8, 9 }, { 0, }, { 0, }, { 26, 14 } }, + }, + { "ld1u_add", TILEGX_OPC_LD1U_ADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 8, 15, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "ld2s", TILEGX_OPC_LD2S, 0x12, 2, TREG_ZERO, 1, + { { 0, }, { 8, 9 }, { 0, }, { 0, }, { 26, 14 } }, + }, + { "ld2s_add", TILEGX_OPC_LD2S_ADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 8, 15, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "ld2u", TILEGX_OPC_LD2U, 0x12, 2, TREG_ZERO, 1, + { { 0, }, { 8, 9 }, { 0, }, { 0, }, { 26, 14 } }, + }, + { "ld2u_add", TILEGX_OPC_LD2U_ADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 8, 15, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "ld4s", TILEGX_OPC_LD4S, 0x12, 2, TREG_ZERO, 1, + { { 0, }, { 8, 9 }, { 0, }, { 0, }, { 26, 14 } }, + }, + { "ld4s_add", TILEGX_OPC_LD4S_ADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 8, 15, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "ld4u", TILEGX_OPC_LD4U, 0x12, 2, TREG_ZERO, 1, + { { 0, }, { 8, 9 }, { 0, }, { 0, }, { 26, 14 } }, + }, + { "ld4u_add", TILEGX_OPC_LD4U_ADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 8, 15, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "ld_add", TILEGX_OPC_LD_ADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 8, 15, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "ldna", TILEGX_OPC_LDNA, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 8, 9 }, { 0, }, { 0, }, { 0, } }, + }, + { "ldna_add", TILEGX_OPC_LDNA_ADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 8, 15, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "ldnt", TILEGX_OPC_LDNT, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 8, 9 }, { 0, }, { 0, }, { 0, } }, + }, + { "ldnt1s", TILEGX_OPC_LDNT1S, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 8, 9 }, { 0, }, { 0, }, { 0, } }, + }, + { "ldnt1s_add", TILEGX_OPC_LDNT1S_ADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 8, 15, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "ldnt1u", TILEGX_OPC_LDNT1U, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 8, 9 }, { 0, }, { 0, }, { 0, } }, + }, + { "ldnt1u_add", TILEGX_OPC_LDNT1U_ADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 8, 15, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "ldnt2s", TILEGX_OPC_LDNT2S, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 8, 9 }, { 0, }, { 0, }, { 0, } }, + }, + { "ldnt2s_add", TILEGX_OPC_LDNT2S_ADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 8, 15, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "ldnt2u", TILEGX_OPC_LDNT2U, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 8, 9 }, { 0, }, { 0, }, { 0, } }, + }, + { "ldnt2u_add", TILEGX_OPC_LDNT2U_ADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 8, 15, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "ldnt4s", TILEGX_OPC_LDNT4S, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 8, 9 }, { 0, }, { 0, }, { 0, } }, + }, + { "ldnt4s_add", TILEGX_OPC_LDNT4S_ADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 8, 15, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "ldnt4u", TILEGX_OPC_LDNT4U, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 8, 9 }, { 0, }, { 0, }, { 0, } }, + }, + { "ldnt4u_add", TILEGX_OPC_LDNT4U_ADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 8, 15, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "ldnt_add", TILEGX_OPC_LDNT_ADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 8, 15, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "lnk", TILEGX_OPC_LNK, 0xa, 1, TREG_ZERO, 1, + { { 0, }, { 8 }, { 0, }, { 12 }, { 0, } }, + }, + { "mf", TILEGX_OPC_MF, 0x2, 0, TREG_ZERO, 1, + { { 0, }, { }, { 0, }, { 0, }, { 0, } }, + }, + { "mfspr", TILEGX_OPC_MFSPR, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 8, 27 }, { 0, }, { 0, }, { 0, } }, + }, + { "mm", TILEGX_OPC_MM, 0x1, 4, TREG_ZERO, 1, + { { 23, 7, 21, 22 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mnz", TILEGX_OPC_MNZ, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, + }, + { "mtspr", TILEGX_OPC_MTSPR, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 28, 9 }, { 0, }, { 0, }, { 0, } }, + }, + { "mul_hs_hs", TILEGX_OPC_MUL_HS_HS, 0x5, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 10, 11, 18 }, { 0, }, { 0, } }, + }, + { "mul_hs_hu", TILEGX_OPC_MUL_HS_HU, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mul_hs_ls", TILEGX_OPC_MUL_HS_LS, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mul_hs_lu", TILEGX_OPC_MUL_HS_LU, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mul_hu_hu", TILEGX_OPC_MUL_HU_HU, 0x5, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 10, 11, 18 }, { 0, }, { 0, } }, + }, + { "mul_hu_ls", TILEGX_OPC_MUL_HU_LS, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mul_hu_lu", TILEGX_OPC_MUL_HU_LU, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mul_ls_ls", TILEGX_OPC_MUL_LS_LS, 0x5, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 10, 11, 18 }, { 0, }, { 0, } }, + }, + { "mul_ls_lu", TILEGX_OPC_MUL_LS_LU, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mul_lu_lu", TILEGX_OPC_MUL_LU_LU, 0x5, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 10, 11, 18 }, { 0, }, { 0, } }, + }, + { "mula_hs_hs", TILEGX_OPC_MULA_HS_HS, 0x5, 3, TREG_ZERO, 1, + { { 23, 7, 16 }, { 0, }, { 24, 11, 18 }, { 0, }, { 0, } }, + }, + { "mula_hs_hu", TILEGX_OPC_MULA_HS_HU, 0x1, 3, TREG_ZERO, 1, + { { 23, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mula_hs_ls", TILEGX_OPC_MULA_HS_LS, 0x1, 3, TREG_ZERO, 1, + { { 23, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mula_hs_lu", TILEGX_OPC_MULA_HS_LU, 0x1, 3, TREG_ZERO, 1, + { { 23, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mula_hu_hu", TILEGX_OPC_MULA_HU_HU, 0x5, 3, TREG_ZERO, 1, + { { 23, 7, 16 }, { 0, }, { 24, 11, 18 }, { 0, }, { 0, } }, + }, + { "mula_hu_ls", TILEGX_OPC_MULA_HU_LS, 0x1, 3, TREG_ZERO, 1, + { { 23, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mula_hu_lu", TILEGX_OPC_MULA_HU_LU, 0x1, 3, TREG_ZERO, 1, + { { 23, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mula_ls_ls", TILEGX_OPC_MULA_LS_LS, 0x5, 3, TREG_ZERO, 1, + { { 23, 7, 16 }, { 0, }, { 24, 11, 18 }, { 0, }, { 0, } }, + }, + { "mula_ls_lu", TILEGX_OPC_MULA_LS_LU, 0x1, 3, TREG_ZERO, 1, + { { 23, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mula_lu_lu", TILEGX_OPC_MULA_LU_LU, 0x5, 3, TREG_ZERO, 1, + { { 23, 7, 16 }, { 0, }, { 24, 11, 18 }, { 0, }, { 0, } }, + }, + { "mulax", TILEGX_OPC_MULAX, 0x5, 3, TREG_ZERO, 1, + { { 23, 7, 16 }, { 0, }, { 24, 11, 18 }, { 0, }, { 0, } }, + }, + { "mulx", TILEGX_OPC_MULX, 0x5, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 10, 11, 18 }, { 0, }, { 0, } }, + }, + { "mz", TILEGX_OPC_MZ, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, + }, + { "nap", TILEGX_OPC_NAP, 0x2, 0, TREG_ZERO, 0, + { { 0, }, { }, { 0, }, { 0, }, { 0, } }, + }, + { "nop", TILEGX_OPC_NOP, 0xf, 0, TREG_ZERO, 1, + { { }, { }, { }, { }, { 0, } }, + }, + { "nor", TILEGX_OPC_NOR, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, + }, + { "or", TILEGX_OPC_OR, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, + }, + { "ori", TILEGX_OPC_ORI, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 0 }, { 8, 9, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "pcnt", TILEGX_OPC_PCNT, 0x5, 2, TREG_ZERO, 1, + { { 6, 7 }, { 0, }, { 10, 11 }, { 0, }, { 0, } }, + }, + { "revbits", TILEGX_OPC_REVBITS, 0x5, 2, TREG_ZERO, 1, + { { 6, 7 }, { 0, }, { 10, 11 }, { 0, }, { 0, } }, + }, + { "revbytes", TILEGX_OPC_REVBYTES, 0x5, 2, TREG_ZERO, 1, + { { 6, 7 }, { 0, }, { 10, 11 }, { 0, }, { 0, } }, + }, + { "rotl", TILEGX_OPC_ROTL, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, + }, + { "rotli", TILEGX_OPC_ROTLI, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 29 }, { 8, 9, 30 }, { 10, 11, 31 }, { 12, 13, 32 }, { 0, } }, + }, + { "shl", TILEGX_OPC_SHL, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, + }, + { "shl16insli", TILEGX_OPC_SHL16INSLI, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 4 }, { 8, 9, 5 }, { 0, }, { 0, }, { 0, } }, + }, + { "shl1add", TILEGX_OPC_SHL1ADD, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, + }, + { "shl1addx", TILEGX_OPC_SHL1ADDX, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, + }, + { "shl2add", TILEGX_OPC_SHL2ADD, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, + }, + { "shl2addx", TILEGX_OPC_SHL2ADDX, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, + }, + { "shl3add", TILEGX_OPC_SHL3ADD, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, + }, + { "shl3addx", TILEGX_OPC_SHL3ADDX, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, + }, + { "shli", TILEGX_OPC_SHLI, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 29 }, { 8, 9, 30 }, { 10, 11, 31 }, { 12, 13, 32 }, { 0, } }, + }, + { "shlx", TILEGX_OPC_SHLX, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "shlxi", TILEGX_OPC_SHLXI, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 29 }, { 8, 9, 30 }, { 0, }, { 0, }, { 0, } }, + }, + { "shrs", TILEGX_OPC_SHRS, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, + }, + { "shrsi", TILEGX_OPC_SHRSI, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 29 }, { 8, 9, 30 }, { 10, 11, 31 }, { 12, 13, 32 }, { 0, } }, + }, + { "shru", TILEGX_OPC_SHRU, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, + }, + { "shrui", TILEGX_OPC_SHRUI, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 29 }, { 8, 9, 30 }, { 10, 11, 31 }, { 12, 13, 32 }, { 0, } }, + }, + { "shrux", TILEGX_OPC_SHRUX, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "shruxi", TILEGX_OPC_SHRUXI, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 29 }, { 8, 9, 30 }, { 0, }, { 0, }, { 0, } }, + }, + { "shufflebytes", TILEGX_OPC_SHUFFLEBYTES, 0x1, 3, TREG_ZERO, 1, + { { 23, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "st", TILEGX_OPC_ST, 0x12, 2, TREG_ZERO, 1, + { { 0, }, { 9, 17 }, { 0, }, { 0, }, { 14, 33 } }, + }, + { "st1", TILEGX_OPC_ST1, 0x12, 2, TREG_ZERO, 1, + { { 0, }, { 9, 17 }, { 0, }, { 0, }, { 14, 33 } }, + }, + { "st1_add", TILEGX_OPC_ST1_ADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 15, 17, 34 }, { 0, }, { 0, }, { 0, } }, + }, + { "st2", TILEGX_OPC_ST2, 0x12, 2, TREG_ZERO, 1, + { { 0, }, { 9, 17 }, { 0, }, { 0, }, { 14, 33 } }, + }, + { "st2_add", TILEGX_OPC_ST2_ADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 15, 17, 34 }, { 0, }, { 0, }, { 0, } }, + }, + { "st4", TILEGX_OPC_ST4, 0x12, 2, TREG_ZERO, 1, + { { 0, }, { 9, 17 }, { 0, }, { 0, }, { 14, 33 } }, + }, + { "st4_add", TILEGX_OPC_ST4_ADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 15, 17, 34 }, { 0, }, { 0, }, { 0, } }, + }, + { "st_add", TILEGX_OPC_ST_ADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 15, 17, 34 }, { 0, }, { 0, }, { 0, } }, + }, + { "stnt", TILEGX_OPC_STNT, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "stnt1", TILEGX_OPC_STNT1, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "stnt1_add", TILEGX_OPC_STNT1_ADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 15, 17, 34 }, { 0, }, { 0, }, { 0, } }, + }, + { "stnt2", TILEGX_OPC_STNT2, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "stnt2_add", TILEGX_OPC_STNT2_ADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 15, 17, 34 }, { 0, }, { 0, }, { 0, } }, + }, + { "stnt4", TILEGX_OPC_STNT4, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "stnt4_add", TILEGX_OPC_STNT4_ADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 15, 17, 34 }, { 0, }, { 0, }, { 0, } }, + }, + { "stnt_add", TILEGX_OPC_STNT_ADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 15, 17, 34 }, { 0, }, { 0, }, { 0, } }, + }, + { "sub", TILEGX_OPC_SUB, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, + }, + { "subx", TILEGX_OPC_SUBX, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, + }, + { "subxsc", TILEGX_OPC_SUBXSC, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "swint0", TILEGX_OPC_SWINT0, 0x2, 0, TREG_ZERO, 0, + { { 0, }, { }, { 0, }, { 0, }, { 0, } }, + }, + { "swint1", TILEGX_OPC_SWINT1, 0x2, 0, TREG_ZERO, 0, + { { 0, }, { }, { 0, }, { 0, }, { 0, } }, + }, + { "swint2", TILEGX_OPC_SWINT2, 0x2, 0, TREG_ZERO, 0, + { { 0, }, { }, { 0, }, { 0, }, { 0, } }, + }, + { "swint3", TILEGX_OPC_SWINT3, 0x2, 0, TREG_ZERO, 0, + { { 0, }, { }, { 0, }, { 0, }, { 0, } }, + }, + { "tblidxb0", TILEGX_OPC_TBLIDXB0, 0x5, 2, TREG_ZERO, 1, + { { 23, 7 }, { 0, }, { 24, 11 }, { 0, }, { 0, } }, + }, + { "tblidxb1", TILEGX_OPC_TBLIDXB1, 0x5, 2, TREG_ZERO, 1, + { { 23, 7 }, { 0, }, { 24, 11 }, { 0, }, { 0, } }, + }, + { "tblidxb2", TILEGX_OPC_TBLIDXB2, 0x5, 2, TREG_ZERO, 1, + { { 23, 7 }, { 0, }, { 24, 11 }, { 0, }, { 0, } }, + }, + { "tblidxb3", TILEGX_OPC_TBLIDXB3, 0x5, 2, TREG_ZERO, 1, + { { 23, 7 }, { 0, }, { 24, 11 }, { 0, }, { 0, } }, + }, + { "v1add", TILEGX_OPC_V1ADD, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v1addi", TILEGX_OPC_V1ADDI, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 0 }, { 8, 9, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "v1adduc", TILEGX_OPC_V1ADDUC, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v1adiffu", TILEGX_OPC_V1ADIFFU, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "v1avgu", TILEGX_OPC_V1AVGU, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "v1cmpeq", TILEGX_OPC_V1CMPEQ, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v1cmpeqi", TILEGX_OPC_V1CMPEQI, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 0 }, { 8, 9, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "v1cmples", TILEGX_OPC_V1CMPLES, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v1cmpleu", TILEGX_OPC_V1CMPLEU, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v1cmplts", TILEGX_OPC_V1CMPLTS, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v1cmpltsi", TILEGX_OPC_V1CMPLTSI, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 0 }, { 8, 9, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "v1cmpltu", TILEGX_OPC_V1CMPLTU, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v1cmpltui", TILEGX_OPC_V1CMPLTUI, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 0 }, { 8, 9, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "v1cmpne", TILEGX_OPC_V1CMPNE, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v1ddotpu", TILEGX_OPC_V1DDOTPU, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "v1ddotpua", TILEGX_OPC_V1DDOTPUA, 0x1, 3, TREG_ZERO, 1, + { { 23, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "v1ddotpus", TILEGX_OPC_V1DDOTPUS, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "v1ddotpusa", TILEGX_OPC_V1DDOTPUSA, 0x1, 3, TREG_ZERO, 1, + { { 23, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "v1dotp", TILEGX_OPC_V1DOTP, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "v1dotpa", TILEGX_OPC_V1DOTPA, 0x1, 3, TREG_ZERO, 1, + { { 23, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "v1dotpu", TILEGX_OPC_V1DOTPU, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "v1dotpua", TILEGX_OPC_V1DOTPUA, 0x1, 3, TREG_ZERO, 1, + { { 23, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "v1dotpus", TILEGX_OPC_V1DOTPUS, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "v1dotpusa", TILEGX_OPC_V1DOTPUSA, 0x1, 3, TREG_ZERO, 1, + { { 23, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "v1int_h", TILEGX_OPC_V1INT_H, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v1int_l", TILEGX_OPC_V1INT_L, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v1maxu", TILEGX_OPC_V1MAXU, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v1maxui", TILEGX_OPC_V1MAXUI, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 0 }, { 8, 9, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "v1minu", TILEGX_OPC_V1MINU, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v1minui", TILEGX_OPC_V1MINUI, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 0 }, { 8, 9, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "v1mnz", TILEGX_OPC_V1MNZ, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v1multu", TILEGX_OPC_V1MULTU, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "v1mulu", TILEGX_OPC_V1MULU, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "v1mulus", TILEGX_OPC_V1MULUS, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "v1mz", TILEGX_OPC_V1MZ, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v1sadau", TILEGX_OPC_V1SADAU, 0x1, 3, TREG_ZERO, 1, + { { 23, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "v1sadu", TILEGX_OPC_V1SADU, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "v1shl", TILEGX_OPC_V1SHL, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v1shli", TILEGX_OPC_V1SHLI, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 29 }, { 8, 9, 30 }, { 0, }, { 0, }, { 0, } }, + }, + { "v1shrs", TILEGX_OPC_V1SHRS, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v1shrsi", TILEGX_OPC_V1SHRSI, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 29 }, { 8, 9, 30 }, { 0, }, { 0, }, { 0, } }, + }, + { "v1shru", TILEGX_OPC_V1SHRU, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v1shrui", TILEGX_OPC_V1SHRUI, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 29 }, { 8, 9, 30 }, { 0, }, { 0, }, { 0, } }, + }, + { "v1sub", TILEGX_OPC_V1SUB, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v1subuc", TILEGX_OPC_V1SUBUC, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v2add", TILEGX_OPC_V2ADD, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v2addi", TILEGX_OPC_V2ADDI, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 0 }, { 8, 9, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "v2addsc", TILEGX_OPC_V2ADDSC, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v2adiffs", TILEGX_OPC_V2ADIFFS, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "v2avgs", TILEGX_OPC_V2AVGS, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "v2cmpeq", TILEGX_OPC_V2CMPEQ, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v2cmpeqi", TILEGX_OPC_V2CMPEQI, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 0 }, { 8, 9, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "v2cmples", TILEGX_OPC_V2CMPLES, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v2cmpleu", TILEGX_OPC_V2CMPLEU, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v2cmplts", TILEGX_OPC_V2CMPLTS, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v2cmpltsi", TILEGX_OPC_V2CMPLTSI, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 0 }, { 8, 9, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "v2cmpltu", TILEGX_OPC_V2CMPLTU, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v2cmpltui", TILEGX_OPC_V2CMPLTUI, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 0 }, { 8, 9, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "v2cmpne", TILEGX_OPC_V2CMPNE, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v2dotp", TILEGX_OPC_V2DOTP, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "v2dotpa", TILEGX_OPC_V2DOTPA, 0x1, 3, TREG_ZERO, 1, + { { 23, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "v2int_h", TILEGX_OPC_V2INT_H, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v2int_l", TILEGX_OPC_V2INT_L, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v2maxs", TILEGX_OPC_V2MAXS, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v2maxsi", TILEGX_OPC_V2MAXSI, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 0 }, { 8, 9, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "v2mins", TILEGX_OPC_V2MINS, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v2minsi", TILEGX_OPC_V2MINSI, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 0 }, { 8, 9, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "v2mnz", TILEGX_OPC_V2MNZ, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v2mulfsc", TILEGX_OPC_V2MULFSC, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "v2muls", TILEGX_OPC_V2MULS, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "v2mults", TILEGX_OPC_V2MULTS, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "v2mz", TILEGX_OPC_V2MZ, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v2packh", TILEGX_OPC_V2PACKH, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v2packl", TILEGX_OPC_V2PACKL, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v2packuc", TILEGX_OPC_V2PACKUC, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v2sadas", TILEGX_OPC_V2SADAS, 0x1, 3, TREG_ZERO, 1, + { { 23, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "v2sadau", TILEGX_OPC_V2SADAU, 0x1, 3, TREG_ZERO, 1, + { { 23, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "v2sads", TILEGX_OPC_V2SADS, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "v2sadu", TILEGX_OPC_V2SADU, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "v2shl", TILEGX_OPC_V2SHL, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v2shli", TILEGX_OPC_V2SHLI, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 29 }, { 8, 9, 30 }, { 0, }, { 0, }, { 0, } }, + }, + { "v2shlsc", TILEGX_OPC_V2SHLSC, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v2shrs", TILEGX_OPC_V2SHRS, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v2shrsi", TILEGX_OPC_V2SHRSI, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 29 }, { 8, 9, 30 }, { 0, }, { 0, }, { 0, } }, + }, + { "v2shru", TILEGX_OPC_V2SHRU, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v2shrui", TILEGX_OPC_V2SHRUI, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 29 }, { 8, 9, 30 }, { 0, }, { 0, }, { 0, } }, + }, + { "v2sub", TILEGX_OPC_V2SUB, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v2subsc", TILEGX_OPC_V2SUBSC, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v4add", TILEGX_OPC_V4ADD, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v4addsc", TILEGX_OPC_V4ADDSC, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v4int_h", TILEGX_OPC_V4INT_H, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v4int_l", TILEGX_OPC_V4INT_L, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v4packsc", TILEGX_OPC_V4PACKSC, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v4shl", TILEGX_OPC_V4SHL, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v4shlsc", TILEGX_OPC_V4SHLSC, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v4shrs", TILEGX_OPC_V4SHRS, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v4shru", TILEGX_OPC_V4SHRU, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v4sub", TILEGX_OPC_V4SUB, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v4subsc", TILEGX_OPC_V4SUBSC, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "wh64", TILEGX_OPC_WH64, 0x2, 1, TREG_ZERO, 1, + { { 0, }, { 9 }, { 0, }, { 0, }, { 0, } }, + }, + { "xor", TILEGX_OPC_XOR, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, + }, + { "xori", TILEGX_OPC_XORI, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 0 }, { 8, 9, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { NULL, TILEGX_OPC_NONE, 0, 0, TREG_ZERO, 0, { { 0, } }, + } +}; +#define BITFIELD(start, size) ((start) | (((1 << (size)) - 1) << 6)) +#define CHILD(array_index) (TILEGX_OPC_NONE + (array_index)) + +static const unsigned short decode_X0_fsm[936] = +{ + BITFIELD(22, 9) /* index 0 */, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), + CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), + CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), + CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), + CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), + CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), + CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), + CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), + CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), + CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), + CHILD(513), CHILD(513), CHILD(513), CHILD(513), TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_BFEXTS, + TILEGX_OPC_BFEXTS, TILEGX_OPC_BFEXTS, TILEGX_OPC_BFEXTS, TILEGX_OPC_BFEXTU, + TILEGX_OPC_BFEXTU, TILEGX_OPC_BFEXTU, TILEGX_OPC_BFEXTU, TILEGX_OPC_BFINS, + TILEGX_OPC_BFINS, TILEGX_OPC_BFINS, TILEGX_OPC_BFINS, TILEGX_OPC_MM, + TILEGX_OPC_MM, TILEGX_OPC_MM, TILEGX_OPC_MM, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, CHILD(528), CHILD(578), + CHILD(583), CHILD(588), CHILD(593), CHILD(598), TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, CHILD(603), CHILD(620), CHILD(637), CHILD(654), CHILD(671), + CHILD(703), CHILD(797), CHILD(814), CHILD(831), CHILD(848), CHILD(865), + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, CHILD(889), TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), + CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), + CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), + CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), + CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), + CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), + CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), + CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), + CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), + CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), + CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), + BITFIELD(6, 2) /* index 513 */, + TILEGX_OPC_ADDLI, TILEGX_OPC_ADDLI, TILEGX_OPC_ADDLI, CHILD(518), + BITFIELD(8, 2) /* index 518 */, + TILEGX_OPC_ADDLI, TILEGX_OPC_ADDLI, TILEGX_OPC_ADDLI, CHILD(523), + BITFIELD(10, 2) /* index 523 */, + TILEGX_OPC_ADDLI, TILEGX_OPC_ADDLI, TILEGX_OPC_ADDLI, TILEGX_OPC_MOVELI, + BITFIELD(20, 2) /* index 528 */, + TILEGX_OPC_NONE, CHILD(533), TILEGX_OPC_ADDXI, CHILD(548), + BITFIELD(6, 2) /* index 533 */, + TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, CHILD(538), + BITFIELD(8, 2) /* index 538 */, + TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, CHILD(543), + BITFIELD(10, 2) /* index 543 */, + TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_MOVEI, + BITFIELD(0, 2) /* index 548 */, + TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(553), + BITFIELD(2, 2) /* index 553 */, + TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(558), + BITFIELD(4, 2) /* index 558 */, + TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(563), + BITFIELD(6, 2) /* index 563 */, + TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(568), + BITFIELD(8, 2) /* index 568 */, + TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(573), + BITFIELD(10, 2) /* index 573 */, + TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_INFO, + BITFIELD(20, 2) /* index 578 */, + TILEGX_OPC_CMPEQI, TILEGX_OPC_CMPLTSI, TILEGX_OPC_CMPLTUI, TILEGX_OPC_ORI, + BITFIELD(20, 2) /* index 583 */, + TILEGX_OPC_V1ADDI, TILEGX_OPC_V1CMPEQI, TILEGX_OPC_V1CMPLTSI, + TILEGX_OPC_V1CMPLTUI, + BITFIELD(20, 2) /* index 588 */, + TILEGX_OPC_V1MAXUI, TILEGX_OPC_V1MINUI, TILEGX_OPC_V2ADDI, + TILEGX_OPC_V2CMPEQI, + BITFIELD(20, 2) /* index 593 */, + TILEGX_OPC_V2CMPLTSI, TILEGX_OPC_V2CMPLTUI, TILEGX_OPC_V2MAXSI, + TILEGX_OPC_V2MINSI, + BITFIELD(20, 2) /* index 598 */, + TILEGX_OPC_XORI, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + BITFIELD(18, 4) /* index 603 */, + TILEGX_OPC_NONE, TILEGX_OPC_ADDXSC, TILEGX_OPC_ADDX, TILEGX_OPC_ADD, + TILEGX_OPC_AND, TILEGX_OPC_CMOVEQZ, TILEGX_OPC_CMOVNEZ, TILEGX_OPC_CMPEQ, + TILEGX_OPC_CMPLES, TILEGX_OPC_CMPLEU, TILEGX_OPC_CMPLTS, TILEGX_OPC_CMPLTU, + TILEGX_OPC_CMPNE, TILEGX_OPC_CMULAF, TILEGX_OPC_CMULA, TILEGX_OPC_CMULFR, + BITFIELD(18, 4) /* index 620 */, + TILEGX_OPC_CMULF, TILEGX_OPC_CMULHR, TILEGX_OPC_CMULH, TILEGX_OPC_CMUL, + TILEGX_OPC_CRC32_32, TILEGX_OPC_CRC32_8, TILEGX_OPC_DBLALIGN2, + TILEGX_OPC_DBLALIGN4, TILEGX_OPC_DBLALIGN6, TILEGX_OPC_DBLALIGN, + TILEGX_OPC_FDOUBLE_ADDSUB, TILEGX_OPC_FDOUBLE_ADD_FLAGS, + TILEGX_OPC_FDOUBLE_MUL_FLAGS, TILEGX_OPC_FDOUBLE_PACK1, + TILEGX_OPC_FDOUBLE_PACK2, TILEGX_OPC_FDOUBLE_SUB_FLAGS, + BITFIELD(18, 4) /* index 637 */, + TILEGX_OPC_FDOUBLE_UNPACK_MAX, TILEGX_OPC_FDOUBLE_UNPACK_MIN, + TILEGX_OPC_FSINGLE_ADD1, TILEGX_OPC_FSINGLE_ADDSUB2, + TILEGX_OPC_FSINGLE_MUL1, TILEGX_OPC_FSINGLE_MUL2, TILEGX_OPC_FSINGLE_PACK2, + TILEGX_OPC_FSINGLE_SUB1, TILEGX_OPC_MNZ, TILEGX_OPC_MULAX, + TILEGX_OPC_MULA_HS_HS, TILEGX_OPC_MULA_HS_HU, TILEGX_OPC_MULA_HS_LS, + TILEGX_OPC_MULA_HS_LU, TILEGX_OPC_MULA_HU_HU, TILEGX_OPC_MULA_HU_LS, + BITFIELD(18, 4) /* index 654 */, + TILEGX_OPC_MULA_HU_LU, TILEGX_OPC_MULA_LS_LS, TILEGX_OPC_MULA_LS_LU, + TILEGX_OPC_MULA_LU_LU, TILEGX_OPC_MULX, TILEGX_OPC_MUL_HS_HS, + TILEGX_OPC_MUL_HS_HU, TILEGX_OPC_MUL_HS_LS, TILEGX_OPC_MUL_HS_LU, + TILEGX_OPC_MUL_HU_HU, TILEGX_OPC_MUL_HU_LS, TILEGX_OPC_MUL_HU_LU, + TILEGX_OPC_MUL_LS_LS, TILEGX_OPC_MUL_LS_LU, TILEGX_OPC_MUL_LU_LU, + TILEGX_OPC_MZ, + BITFIELD(18, 4) /* index 671 */, + TILEGX_OPC_NOR, CHILD(688), TILEGX_OPC_ROTL, TILEGX_OPC_SHL1ADDX, + TILEGX_OPC_SHL1ADD, TILEGX_OPC_SHL2ADDX, TILEGX_OPC_SHL2ADD, + TILEGX_OPC_SHL3ADDX, TILEGX_OPC_SHL3ADD, TILEGX_OPC_SHLX, TILEGX_OPC_SHL, + TILEGX_OPC_SHRS, TILEGX_OPC_SHRUX, TILEGX_OPC_SHRU, TILEGX_OPC_SHUFFLEBYTES, + TILEGX_OPC_SUBXSC, + BITFIELD(12, 2) /* index 688 */, + TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_OR, CHILD(693), + BITFIELD(14, 2) /* index 693 */, + TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_OR, CHILD(698), + BITFIELD(16, 2) /* index 698 */, + TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_MOVE, + BITFIELD(18, 4) /* index 703 */, + TILEGX_OPC_SUBX, TILEGX_OPC_SUB, CHILD(720), TILEGX_OPC_V1ADDUC, + TILEGX_OPC_V1ADD, TILEGX_OPC_V1ADIFFU, TILEGX_OPC_V1AVGU, + TILEGX_OPC_V1CMPEQ, TILEGX_OPC_V1CMPLES, TILEGX_OPC_V1CMPLEU, + TILEGX_OPC_V1CMPLTS, TILEGX_OPC_V1CMPLTU, TILEGX_OPC_V1CMPNE, + TILEGX_OPC_V1DDOTPUSA, TILEGX_OPC_V1DDOTPUS, TILEGX_OPC_V1DOTPA, + BITFIELD(12, 4) /* index 720 */, + TILEGX_OPC_NONE, CHILD(737), CHILD(742), CHILD(747), CHILD(752), CHILD(757), + CHILD(762), CHILD(767), CHILD(772), CHILD(777), CHILD(782), CHILD(787), + CHILD(792), TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + BITFIELD(16, 2) /* index 737 */, + TILEGX_OPC_CLZ, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + BITFIELD(16, 2) /* index 742 */, + TILEGX_OPC_CTZ, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + BITFIELD(16, 2) /* index 747 */, + TILEGX_OPC_FNOP, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + BITFIELD(16, 2) /* index 752 */, + TILEGX_OPC_FSINGLE_PACK1, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + BITFIELD(16, 2) /* index 757 */, + TILEGX_OPC_NOP, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + BITFIELD(16, 2) /* index 762 */, + TILEGX_OPC_PCNT, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + BITFIELD(16, 2) /* index 767 */, + TILEGX_OPC_REVBITS, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + BITFIELD(16, 2) /* index 772 */, + TILEGX_OPC_REVBYTES, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + BITFIELD(16, 2) /* index 777 */, + TILEGX_OPC_TBLIDXB0, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + BITFIELD(16, 2) /* index 782 */, + TILEGX_OPC_TBLIDXB1, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + BITFIELD(16, 2) /* index 787 */, + TILEGX_OPC_TBLIDXB2, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + BITFIELD(16, 2) /* index 792 */, + TILEGX_OPC_TBLIDXB3, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + BITFIELD(18, 4) /* index 797 */, + TILEGX_OPC_V1DOTPUSA, TILEGX_OPC_V1DOTPUS, TILEGX_OPC_V1DOTP, + TILEGX_OPC_V1INT_H, TILEGX_OPC_V1INT_L, TILEGX_OPC_V1MAXU, + TILEGX_OPC_V1MINU, TILEGX_OPC_V1MNZ, TILEGX_OPC_V1MULTU, TILEGX_OPC_V1MULUS, + TILEGX_OPC_V1MULU, TILEGX_OPC_V1MZ, TILEGX_OPC_V1SADAU, TILEGX_OPC_V1SADU, + TILEGX_OPC_V1SHL, TILEGX_OPC_V1SHRS, + BITFIELD(18, 4) /* index 814 */, + TILEGX_OPC_V1SHRU, TILEGX_OPC_V1SUBUC, TILEGX_OPC_V1SUB, TILEGX_OPC_V2ADDSC, + TILEGX_OPC_V2ADD, TILEGX_OPC_V2ADIFFS, TILEGX_OPC_V2AVGS, + TILEGX_OPC_V2CMPEQ, TILEGX_OPC_V2CMPLES, TILEGX_OPC_V2CMPLEU, + TILEGX_OPC_V2CMPLTS, TILEGX_OPC_V2CMPLTU, TILEGX_OPC_V2CMPNE, + TILEGX_OPC_V2DOTPA, TILEGX_OPC_V2DOTP, TILEGX_OPC_V2INT_H, + BITFIELD(18, 4) /* index 831 */, + TILEGX_OPC_V2INT_L, TILEGX_OPC_V2MAXS, TILEGX_OPC_V2MINS, TILEGX_OPC_V2MNZ, + TILEGX_OPC_V2MULFSC, TILEGX_OPC_V2MULS, TILEGX_OPC_V2MULTS, TILEGX_OPC_V2MZ, + TILEGX_OPC_V2PACKH, TILEGX_OPC_V2PACKL, TILEGX_OPC_V2PACKUC, + TILEGX_OPC_V2SADAS, TILEGX_OPC_V2SADAU, TILEGX_OPC_V2SADS, + TILEGX_OPC_V2SADU, TILEGX_OPC_V2SHLSC, + BITFIELD(18, 4) /* index 848 */, + TILEGX_OPC_V2SHL, TILEGX_OPC_V2SHRS, TILEGX_OPC_V2SHRU, TILEGX_OPC_V2SUBSC, + TILEGX_OPC_V2SUB, TILEGX_OPC_V4ADDSC, TILEGX_OPC_V4ADD, TILEGX_OPC_V4INT_H, + TILEGX_OPC_V4INT_L, TILEGX_OPC_V4PACKSC, TILEGX_OPC_V4SHLSC, + TILEGX_OPC_V4SHL, TILEGX_OPC_V4SHRS, TILEGX_OPC_V4SHRU, TILEGX_OPC_V4SUBSC, + TILEGX_OPC_V4SUB, + BITFIELD(18, 3) /* index 865 */, + CHILD(874), CHILD(877), CHILD(880), CHILD(883), CHILD(886), TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, + BITFIELD(21, 1) /* index 874 */, + TILEGX_OPC_XOR, TILEGX_OPC_NONE, + BITFIELD(21, 1) /* index 877 */, + TILEGX_OPC_V1DDOTPUA, TILEGX_OPC_NONE, + BITFIELD(21, 1) /* index 880 */, + TILEGX_OPC_V1DDOTPU, TILEGX_OPC_NONE, + BITFIELD(21, 1) /* index 883 */, + TILEGX_OPC_V1DOTPUA, TILEGX_OPC_NONE, + BITFIELD(21, 1) /* index 886 */, + TILEGX_OPC_V1DOTPU, TILEGX_OPC_NONE, + BITFIELD(18, 4) /* index 889 */, + TILEGX_OPC_NONE, TILEGX_OPC_ROTLI, TILEGX_OPC_SHLI, TILEGX_OPC_SHLXI, + TILEGX_OPC_SHRSI, TILEGX_OPC_SHRUI, TILEGX_OPC_SHRUXI, TILEGX_OPC_V1SHLI, + TILEGX_OPC_V1SHRSI, TILEGX_OPC_V1SHRUI, TILEGX_OPC_V2SHLI, + TILEGX_OPC_V2SHRSI, TILEGX_OPC_V2SHRUI, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, + BITFIELD(0, 2) /* index 906 */, + TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, + CHILD(911), + BITFIELD(2, 2) /* index 911 */, + TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, + CHILD(916), + BITFIELD(4, 2) /* index 916 */, + TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, + CHILD(921), + BITFIELD(6, 2) /* index 921 */, + TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, + CHILD(926), + BITFIELD(8, 2) /* index 926 */, + TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, + CHILD(931), + BITFIELD(10, 2) /* index 931 */, + TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, + TILEGX_OPC_INFOL, +}; + +static const unsigned short decode_X1_fsm[1206] = +{ + BITFIELD(53, 9) /* index 0 */, + CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), + CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), + CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), + CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), + CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), + CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), + CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), + CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), + CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), + CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), + CHILD(513), CHILD(513), CHILD(513), CHILD(513), TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_BEQZT, + TILEGX_OPC_BEQZT, TILEGX_OPC_BEQZ, TILEGX_OPC_BEQZ, TILEGX_OPC_BGEZT, + TILEGX_OPC_BGEZT, TILEGX_OPC_BGEZ, TILEGX_OPC_BGEZ, TILEGX_OPC_BGTZT, + TILEGX_OPC_BGTZT, TILEGX_OPC_BGTZ, TILEGX_OPC_BGTZ, TILEGX_OPC_BLBCT, + TILEGX_OPC_BLBCT, TILEGX_OPC_BLBC, TILEGX_OPC_BLBC, TILEGX_OPC_BLBST, + TILEGX_OPC_BLBST, TILEGX_OPC_BLBS, TILEGX_OPC_BLBS, TILEGX_OPC_BLEZT, + TILEGX_OPC_BLEZT, TILEGX_OPC_BLEZ, TILEGX_OPC_BLEZ, TILEGX_OPC_BLTZT, + TILEGX_OPC_BLTZT, TILEGX_OPC_BLTZ, TILEGX_OPC_BLTZ, TILEGX_OPC_BNEZT, + TILEGX_OPC_BNEZT, TILEGX_OPC_BNEZ, TILEGX_OPC_BNEZ, CHILD(528), CHILD(578), + CHILD(598), CHILD(663), CHILD(683), CHILD(688), CHILD(693), CHILD(698), + CHILD(703), CHILD(708), CHILD(713), CHILD(718), TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_JAL, + TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL, + TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL, + TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL, + TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL, + TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL, + TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL, + TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL, + TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_J, TILEGX_OPC_J, + TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, + TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, + TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, + TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, + TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, + TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, + CHILD(723), CHILD(740), CHILD(772), CHILD(789), CHILD(1108), CHILD(1125), + CHILD(1142), TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, CHILD(1159), TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, CHILD(1176), CHILD(1176), CHILD(1176), + CHILD(1176), CHILD(1176), CHILD(1176), CHILD(1176), CHILD(1176), + CHILD(1176), CHILD(1176), CHILD(1176), CHILD(1176), CHILD(1176), + CHILD(1176), CHILD(1176), CHILD(1176), CHILD(1176), CHILD(1176), + CHILD(1176), CHILD(1176), CHILD(1176), CHILD(1176), CHILD(1176), + CHILD(1176), CHILD(1176), CHILD(1176), CHILD(1176), CHILD(1176), + CHILD(1176), CHILD(1176), CHILD(1176), CHILD(1176), CHILD(1176), + CHILD(1176), CHILD(1176), CHILD(1176), CHILD(1176), CHILD(1176), + CHILD(1176), CHILD(1176), CHILD(1176), CHILD(1176), CHILD(1176), + CHILD(1176), CHILD(1176), CHILD(1176), CHILD(1176), CHILD(1176), + CHILD(1176), CHILD(1176), CHILD(1176), CHILD(1176), CHILD(1176), + CHILD(1176), CHILD(1176), CHILD(1176), CHILD(1176), CHILD(1176), + CHILD(1176), CHILD(1176), CHILD(1176), CHILD(1176), CHILD(1176), + CHILD(1176), + BITFIELD(37, 2) /* index 513 */, + TILEGX_OPC_ADDLI, TILEGX_OPC_ADDLI, TILEGX_OPC_ADDLI, CHILD(518), + BITFIELD(39, 2) /* index 518 */, + TILEGX_OPC_ADDLI, TILEGX_OPC_ADDLI, TILEGX_OPC_ADDLI, CHILD(523), + BITFIELD(41, 2) /* index 523 */, + TILEGX_OPC_ADDLI, TILEGX_OPC_ADDLI, TILEGX_OPC_ADDLI, TILEGX_OPC_MOVELI, + BITFIELD(51, 2) /* index 528 */, + TILEGX_OPC_NONE, CHILD(533), TILEGX_OPC_ADDXI, CHILD(548), + BITFIELD(37, 2) /* index 533 */, + TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, CHILD(538), + BITFIELD(39, 2) /* index 538 */, + TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, CHILD(543), + BITFIELD(41, 2) /* index 543 */, + TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_MOVEI, + BITFIELD(31, 2) /* index 548 */, + TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(553), + BITFIELD(33, 2) /* index 553 */, + TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(558), + BITFIELD(35, 2) /* index 558 */, + TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(563), + BITFIELD(37, 2) /* index 563 */, + TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(568), + BITFIELD(39, 2) /* index 568 */, + TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(573), + BITFIELD(41, 2) /* index 573 */, + TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_INFO, + BITFIELD(51, 2) /* index 578 */, + TILEGX_OPC_CMPEQI, TILEGX_OPC_CMPLTSI, TILEGX_OPC_CMPLTUI, CHILD(583), + BITFIELD(31, 2) /* index 583 */, + TILEGX_OPC_LD1S_ADD, TILEGX_OPC_LD1S_ADD, TILEGX_OPC_LD1S_ADD, CHILD(588), + BITFIELD(33, 2) /* index 588 */, + TILEGX_OPC_LD1S_ADD, TILEGX_OPC_LD1S_ADD, TILEGX_OPC_LD1S_ADD, CHILD(593), + BITFIELD(35, 2) /* index 593 */, + TILEGX_OPC_LD1S_ADD, TILEGX_OPC_LD1S_ADD, TILEGX_OPC_LD1S_ADD, + TILEGX_OPC_PREFETCH_ADD_L1_FAULT, + BITFIELD(51, 2) /* index 598 */, + CHILD(603), CHILD(618), CHILD(633), CHILD(648), + BITFIELD(31, 2) /* index 603 */, + TILEGX_OPC_LD1U_ADD, TILEGX_OPC_LD1U_ADD, TILEGX_OPC_LD1U_ADD, CHILD(608), + BITFIELD(33, 2) /* index 608 */, + TILEGX_OPC_LD1U_ADD, TILEGX_OPC_LD1U_ADD, TILEGX_OPC_LD1U_ADD, CHILD(613), + BITFIELD(35, 2) /* index 613 */, + TILEGX_OPC_LD1U_ADD, TILEGX_OPC_LD1U_ADD, TILEGX_OPC_LD1U_ADD, + TILEGX_OPC_PREFETCH_ADD_L1, + BITFIELD(31, 2) /* index 618 */, + TILEGX_OPC_LD2S_ADD, TILEGX_OPC_LD2S_ADD, TILEGX_OPC_LD2S_ADD, CHILD(623), + BITFIELD(33, 2) /* index 623 */, + TILEGX_OPC_LD2S_ADD, TILEGX_OPC_LD2S_ADD, TILEGX_OPC_LD2S_ADD, CHILD(628), + BITFIELD(35, 2) /* index 628 */, + TILEGX_OPC_LD2S_ADD, TILEGX_OPC_LD2S_ADD, TILEGX_OPC_LD2S_ADD, + TILEGX_OPC_PREFETCH_ADD_L2_FAULT, + BITFIELD(31, 2) /* index 633 */, + TILEGX_OPC_LD2U_ADD, TILEGX_OPC_LD2U_ADD, TILEGX_OPC_LD2U_ADD, CHILD(638), + BITFIELD(33, 2) /* index 638 */, + TILEGX_OPC_LD2U_ADD, TILEGX_OPC_LD2U_ADD, TILEGX_OPC_LD2U_ADD, CHILD(643), + BITFIELD(35, 2) /* index 643 */, + TILEGX_OPC_LD2U_ADD, TILEGX_OPC_LD2U_ADD, TILEGX_OPC_LD2U_ADD, + TILEGX_OPC_PREFETCH_ADD_L2, + BITFIELD(31, 2) /* index 648 */, + TILEGX_OPC_LD4S_ADD, TILEGX_OPC_LD4S_ADD, TILEGX_OPC_LD4S_ADD, CHILD(653), + BITFIELD(33, 2) /* index 653 */, + TILEGX_OPC_LD4S_ADD, TILEGX_OPC_LD4S_ADD, TILEGX_OPC_LD4S_ADD, CHILD(658), + BITFIELD(35, 2) /* index 658 */, + TILEGX_OPC_LD4S_ADD, TILEGX_OPC_LD4S_ADD, TILEGX_OPC_LD4S_ADD, + TILEGX_OPC_PREFETCH_ADD_L3_FAULT, + BITFIELD(51, 2) /* index 663 */, + CHILD(668), TILEGX_OPC_LDNT1S_ADD, TILEGX_OPC_LDNT1U_ADD, + TILEGX_OPC_LDNT2S_ADD, + BITFIELD(31, 2) /* index 668 */, + TILEGX_OPC_LD4U_ADD, TILEGX_OPC_LD4U_ADD, TILEGX_OPC_LD4U_ADD, CHILD(673), + BITFIELD(33, 2) /* index 673 */, + TILEGX_OPC_LD4U_ADD, TILEGX_OPC_LD4U_ADD, TILEGX_OPC_LD4U_ADD, CHILD(678), + BITFIELD(35, 2) /* index 678 */, + TILEGX_OPC_LD4U_ADD, TILEGX_OPC_LD4U_ADD, TILEGX_OPC_LD4U_ADD, + TILEGX_OPC_PREFETCH_ADD_L3, + BITFIELD(51, 2) /* index 683 */, + TILEGX_OPC_LDNT2U_ADD, TILEGX_OPC_LDNT4S_ADD, TILEGX_OPC_LDNT4U_ADD, + TILEGX_OPC_LDNT_ADD, + BITFIELD(51, 2) /* index 688 */, + TILEGX_OPC_LD_ADD, TILEGX_OPC_LDNA_ADD, TILEGX_OPC_MFSPR, TILEGX_OPC_MTSPR, + BITFIELD(51, 2) /* index 693 */, + TILEGX_OPC_ORI, TILEGX_OPC_ST1_ADD, TILEGX_OPC_ST2_ADD, TILEGX_OPC_ST4_ADD, + BITFIELD(51, 2) /* index 698 */, + TILEGX_OPC_STNT1_ADD, TILEGX_OPC_STNT2_ADD, TILEGX_OPC_STNT4_ADD, + TILEGX_OPC_STNT_ADD, + BITFIELD(51, 2) /* index 703 */, + TILEGX_OPC_ST_ADD, TILEGX_OPC_V1ADDI, TILEGX_OPC_V1CMPEQI, + TILEGX_OPC_V1CMPLTSI, + BITFIELD(51, 2) /* index 708 */, + TILEGX_OPC_V1CMPLTUI, TILEGX_OPC_V1MAXUI, TILEGX_OPC_V1MINUI, + TILEGX_OPC_V2ADDI, + BITFIELD(51, 2) /* index 713 */, + TILEGX_OPC_V2CMPEQI, TILEGX_OPC_V2CMPLTSI, TILEGX_OPC_V2CMPLTUI, + TILEGX_OPC_V2MAXSI, + BITFIELD(51, 2) /* index 718 */, + TILEGX_OPC_V2MINSI, TILEGX_OPC_XORI, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + BITFIELD(49, 4) /* index 723 */, + TILEGX_OPC_NONE, TILEGX_OPC_ADDXSC, TILEGX_OPC_ADDX, TILEGX_OPC_ADD, + TILEGX_OPC_AND, TILEGX_OPC_CMPEQ, TILEGX_OPC_CMPEXCH4, TILEGX_OPC_CMPEXCH, + TILEGX_OPC_CMPLES, TILEGX_OPC_CMPLEU, TILEGX_OPC_CMPLTS, TILEGX_OPC_CMPLTU, + TILEGX_OPC_CMPNE, TILEGX_OPC_DBLALIGN2, TILEGX_OPC_DBLALIGN4, + TILEGX_OPC_DBLALIGN6, + BITFIELD(49, 4) /* index 740 */, + TILEGX_OPC_EXCH4, TILEGX_OPC_EXCH, TILEGX_OPC_FETCHADD4, + TILEGX_OPC_FETCHADDGEZ4, TILEGX_OPC_FETCHADDGEZ, TILEGX_OPC_FETCHADD, + TILEGX_OPC_FETCHAND4, TILEGX_OPC_FETCHAND, TILEGX_OPC_FETCHOR4, + TILEGX_OPC_FETCHOR, TILEGX_OPC_MNZ, TILEGX_OPC_MZ, TILEGX_OPC_NOR, + CHILD(757), TILEGX_OPC_ROTL, TILEGX_OPC_SHL1ADDX, + BITFIELD(43, 2) /* index 757 */, + TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_OR, CHILD(762), + BITFIELD(45, 2) /* index 762 */, + TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_OR, CHILD(767), + BITFIELD(47, 2) /* index 767 */, + TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_MOVE, + BITFIELD(49, 4) /* index 772 */, + TILEGX_OPC_SHL1ADD, TILEGX_OPC_SHL2ADDX, TILEGX_OPC_SHL2ADD, + TILEGX_OPC_SHL3ADDX, TILEGX_OPC_SHL3ADD, TILEGX_OPC_SHLX, TILEGX_OPC_SHL, + TILEGX_OPC_SHRS, TILEGX_OPC_SHRUX, TILEGX_OPC_SHRU, TILEGX_OPC_ST1, + TILEGX_OPC_ST2, TILEGX_OPC_ST4, TILEGX_OPC_STNT1, TILEGX_OPC_STNT2, + TILEGX_OPC_STNT4, + BITFIELD(46, 7) /* index 789 */, + TILEGX_OPC_STNT, TILEGX_OPC_STNT, TILEGX_OPC_STNT, TILEGX_OPC_STNT, + TILEGX_OPC_STNT, TILEGX_OPC_STNT, TILEGX_OPC_STNT, TILEGX_OPC_STNT, + TILEGX_OPC_ST, TILEGX_OPC_ST, TILEGX_OPC_ST, TILEGX_OPC_ST, TILEGX_OPC_ST, + TILEGX_OPC_ST, TILEGX_OPC_ST, TILEGX_OPC_ST, TILEGX_OPC_SUBXSC, + TILEGX_OPC_SUBXSC, TILEGX_OPC_SUBXSC, TILEGX_OPC_SUBXSC, TILEGX_OPC_SUBXSC, + TILEGX_OPC_SUBXSC, TILEGX_OPC_SUBXSC, TILEGX_OPC_SUBXSC, TILEGX_OPC_SUBX, + TILEGX_OPC_SUBX, TILEGX_OPC_SUBX, TILEGX_OPC_SUBX, TILEGX_OPC_SUBX, + TILEGX_OPC_SUBX, TILEGX_OPC_SUBX, TILEGX_OPC_SUBX, TILEGX_OPC_SUB, + TILEGX_OPC_SUB, TILEGX_OPC_SUB, TILEGX_OPC_SUB, TILEGX_OPC_SUB, + TILEGX_OPC_SUB, TILEGX_OPC_SUB, TILEGX_OPC_SUB, CHILD(918), CHILD(927), + CHILD(1006), CHILD(1090), CHILD(1099), TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_V1ADDUC, TILEGX_OPC_V1ADDUC, TILEGX_OPC_V1ADDUC, + TILEGX_OPC_V1ADDUC, TILEGX_OPC_V1ADDUC, TILEGX_OPC_V1ADDUC, + TILEGX_OPC_V1ADDUC, TILEGX_OPC_V1ADDUC, TILEGX_OPC_V1ADD, TILEGX_OPC_V1ADD, + TILEGX_OPC_V1ADD, TILEGX_OPC_V1ADD, TILEGX_OPC_V1ADD, TILEGX_OPC_V1ADD, + TILEGX_OPC_V1ADD, TILEGX_OPC_V1ADD, TILEGX_OPC_V1CMPEQ, TILEGX_OPC_V1CMPEQ, + TILEGX_OPC_V1CMPEQ, TILEGX_OPC_V1CMPEQ, TILEGX_OPC_V1CMPEQ, + TILEGX_OPC_V1CMPEQ, TILEGX_OPC_V1CMPEQ, TILEGX_OPC_V1CMPEQ, + TILEGX_OPC_V1CMPLES, TILEGX_OPC_V1CMPLES, TILEGX_OPC_V1CMPLES, + TILEGX_OPC_V1CMPLES, TILEGX_OPC_V1CMPLES, TILEGX_OPC_V1CMPLES, + TILEGX_OPC_V1CMPLES, TILEGX_OPC_V1CMPLES, TILEGX_OPC_V1CMPLEU, + TILEGX_OPC_V1CMPLEU, TILEGX_OPC_V1CMPLEU, TILEGX_OPC_V1CMPLEU, + TILEGX_OPC_V1CMPLEU, TILEGX_OPC_V1CMPLEU, TILEGX_OPC_V1CMPLEU, + TILEGX_OPC_V1CMPLEU, TILEGX_OPC_V1CMPLTS, TILEGX_OPC_V1CMPLTS, + TILEGX_OPC_V1CMPLTS, TILEGX_OPC_V1CMPLTS, TILEGX_OPC_V1CMPLTS, + TILEGX_OPC_V1CMPLTS, TILEGX_OPC_V1CMPLTS, TILEGX_OPC_V1CMPLTS, + TILEGX_OPC_V1CMPLTU, TILEGX_OPC_V1CMPLTU, TILEGX_OPC_V1CMPLTU, + TILEGX_OPC_V1CMPLTU, TILEGX_OPC_V1CMPLTU, TILEGX_OPC_V1CMPLTU, + TILEGX_OPC_V1CMPLTU, TILEGX_OPC_V1CMPLTU, TILEGX_OPC_V1CMPNE, + TILEGX_OPC_V1CMPNE, TILEGX_OPC_V1CMPNE, TILEGX_OPC_V1CMPNE, + TILEGX_OPC_V1CMPNE, TILEGX_OPC_V1CMPNE, TILEGX_OPC_V1CMPNE, + TILEGX_OPC_V1CMPNE, TILEGX_OPC_V1INT_H, TILEGX_OPC_V1INT_H, + TILEGX_OPC_V1INT_H, TILEGX_OPC_V1INT_H, TILEGX_OPC_V1INT_H, + TILEGX_OPC_V1INT_H, TILEGX_OPC_V1INT_H, TILEGX_OPC_V1INT_H, + TILEGX_OPC_V1INT_L, TILEGX_OPC_V1INT_L, TILEGX_OPC_V1INT_L, + TILEGX_OPC_V1INT_L, TILEGX_OPC_V1INT_L, TILEGX_OPC_V1INT_L, + TILEGX_OPC_V1INT_L, TILEGX_OPC_V1INT_L, + BITFIELD(43, 3) /* index 918 */, + TILEGX_OPC_NONE, TILEGX_OPC_DRAIN, TILEGX_OPC_DTLBPR, TILEGX_OPC_FINV, + TILEGX_OPC_FLUSHWB, TILEGX_OPC_FLUSH, TILEGX_OPC_FNOP, TILEGX_OPC_ICOH, + BITFIELD(43, 3) /* index 927 */, + CHILD(936), TILEGX_OPC_INV, TILEGX_OPC_IRET, TILEGX_OPC_JALRP, + TILEGX_OPC_JALR, TILEGX_OPC_JRP, TILEGX_OPC_JR, CHILD(991), + BITFIELD(31, 2) /* index 936 */, + CHILD(941), CHILD(966), TILEGX_OPC_ILL, TILEGX_OPC_ILL, + BITFIELD(33, 2) /* index 941 */, + TILEGX_OPC_ILL, TILEGX_OPC_ILL, TILEGX_OPC_ILL, CHILD(946), + BITFIELD(35, 2) /* index 946 */, + TILEGX_OPC_ILL, CHILD(951), TILEGX_OPC_ILL, TILEGX_OPC_ILL, + BITFIELD(37, 2) /* index 951 */, + TILEGX_OPC_ILL, CHILD(956), TILEGX_OPC_ILL, TILEGX_OPC_ILL, + BITFIELD(39, 2) /* index 956 */, + TILEGX_OPC_ILL, CHILD(961), TILEGX_OPC_ILL, TILEGX_OPC_ILL, + BITFIELD(41, 2) /* index 961 */, + TILEGX_OPC_ILL, TILEGX_OPC_ILL, TILEGX_OPC_BPT, TILEGX_OPC_ILL, + BITFIELD(33, 2) /* index 966 */, + TILEGX_OPC_ILL, TILEGX_OPC_ILL, TILEGX_OPC_ILL, CHILD(971), + BITFIELD(35, 2) /* index 971 */, + TILEGX_OPC_ILL, CHILD(976), TILEGX_OPC_ILL, TILEGX_OPC_ILL, + BITFIELD(37, 2) /* index 976 */, + TILEGX_OPC_ILL, CHILD(981), TILEGX_OPC_ILL, TILEGX_OPC_ILL, + BITFIELD(39, 2) /* index 981 */, + TILEGX_OPC_ILL, CHILD(986), TILEGX_OPC_ILL, TILEGX_OPC_ILL, + BITFIELD(41, 2) /* index 986 */, + TILEGX_OPC_ILL, TILEGX_OPC_ILL, TILEGX_OPC_RAISE, TILEGX_OPC_ILL, + BITFIELD(31, 2) /* index 991 */, + TILEGX_OPC_LD1S, TILEGX_OPC_LD1S, TILEGX_OPC_LD1S, CHILD(996), + BITFIELD(33, 2) /* index 996 */, + TILEGX_OPC_LD1S, TILEGX_OPC_LD1S, TILEGX_OPC_LD1S, CHILD(1001), + BITFIELD(35, 2) /* index 1001 */, + TILEGX_OPC_LD1S, TILEGX_OPC_LD1S, TILEGX_OPC_LD1S, + TILEGX_OPC_PREFETCH_L1_FAULT, + BITFIELD(43, 3) /* index 1006 */, + CHILD(1015), CHILD(1030), CHILD(1045), CHILD(1060), CHILD(1075), + TILEGX_OPC_LDNA, TILEGX_OPC_LDNT1S, TILEGX_OPC_LDNT1U, + BITFIELD(31, 2) /* index 1015 */, + TILEGX_OPC_LD1U, TILEGX_OPC_LD1U, TILEGX_OPC_LD1U, CHILD(1020), + BITFIELD(33, 2) /* index 1020 */, + TILEGX_OPC_LD1U, TILEGX_OPC_LD1U, TILEGX_OPC_LD1U, CHILD(1025), + BITFIELD(35, 2) /* index 1025 */, + TILEGX_OPC_LD1U, TILEGX_OPC_LD1U, TILEGX_OPC_LD1U, TILEGX_OPC_PREFETCH, + BITFIELD(31, 2) /* index 1030 */, + TILEGX_OPC_LD2S, TILEGX_OPC_LD2S, TILEGX_OPC_LD2S, CHILD(1035), + BITFIELD(33, 2) /* index 1035 */, + TILEGX_OPC_LD2S, TILEGX_OPC_LD2S, TILEGX_OPC_LD2S, CHILD(1040), + BITFIELD(35, 2) /* index 1040 */, + TILEGX_OPC_LD2S, TILEGX_OPC_LD2S, TILEGX_OPC_LD2S, + TILEGX_OPC_PREFETCH_L2_FAULT, + BITFIELD(31, 2) /* index 1045 */, + TILEGX_OPC_LD2U, TILEGX_OPC_LD2U, TILEGX_OPC_LD2U, CHILD(1050), + BITFIELD(33, 2) /* index 1050 */, + TILEGX_OPC_LD2U, TILEGX_OPC_LD2U, TILEGX_OPC_LD2U, CHILD(1055), + BITFIELD(35, 2) /* index 1055 */, + TILEGX_OPC_LD2U, TILEGX_OPC_LD2U, TILEGX_OPC_LD2U, TILEGX_OPC_PREFETCH_L2, + BITFIELD(31, 2) /* index 1060 */, + TILEGX_OPC_LD4S, TILEGX_OPC_LD4S, TILEGX_OPC_LD4S, CHILD(1065), + BITFIELD(33, 2) /* index 1065 */, + TILEGX_OPC_LD4S, TILEGX_OPC_LD4S, TILEGX_OPC_LD4S, CHILD(1070), + BITFIELD(35, 2) /* index 1070 */, + TILEGX_OPC_LD4S, TILEGX_OPC_LD4S, TILEGX_OPC_LD4S, + TILEGX_OPC_PREFETCH_L3_FAULT, + BITFIELD(31, 2) /* index 1075 */, + TILEGX_OPC_LD4U, TILEGX_OPC_LD4U, TILEGX_OPC_LD4U, CHILD(1080), + BITFIELD(33, 2) /* index 1080 */, + TILEGX_OPC_LD4U, TILEGX_OPC_LD4U, TILEGX_OPC_LD4U, CHILD(1085), + BITFIELD(35, 2) /* index 1085 */, + TILEGX_OPC_LD4U, TILEGX_OPC_LD4U, TILEGX_OPC_LD4U, TILEGX_OPC_PREFETCH_L3, + BITFIELD(43, 3) /* index 1090 */, + TILEGX_OPC_LDNT2S, TILEGX_OPC_LDNT2U, TILEGX_OPC_LDNT4S, TILEGX_OPC_LDNT4U, + TILEGX_OPC_LDNT, TILEGX_OPC_LD, TILEGX_OPC_LNK, TILEGX_OPC_MF, + BITFIELD(43, 3) /* index 1099 */, + TILEGX_OPC_NAP, TILEGX_OPC_NOP, TILEGX_OPC_SWINT0, TILEGX_OPC_SWINT1, + TILEGX_OPC_SWINT2, TILEGX_OPC_SWINT3, TILEGX_OPC_WH64, TILEGX_OPC_NONE, + BITFIELD(49, 4) /* index 1108 */, + TILEGX_OPC_V1MAXU, TILEGX_OPC_V1MINU, TILEGX_OPC_V1MNZ, TILEGX_OPC_V1MZ, + TILEGX_OPC_V1SHL, TILEGX_OPC_V1SHRS, TILEGX_OPC_V1SHRU, TILEGX_OPC_V1SUBUC, + TILEGX_OPC_V1SUB, TILEGX_OPC_V2ADDSC, TILEGX_OPC_V2ADD, TILEGX_OPC_V2CMPEQ, + TILEGX_OPC_V2CMPLES, TILEGX_OPC_V2CMPLEU, TILEGX_OPC_V2CMPLTS, + TILEGX_OPC_V2CMPLTU, + BITFIELD(49, 4) /* index 1125 */, + TILEGX_OPC_V2CMPNE, TILEGX_OPC_V2INT_H, TILEGX_OPC_V2INT_L, + TILEGX_OPC_V2MAXS, TILEGX_OPC_V2MINS, TILEGX_OPC_V2MNZ, TILEGX_OPC_V2MZ, + TILEGX_OPC_V2PACKH, TILEGX_OPC_V2PACKL, TILEGX_OPC_V2PACKUC, + TILEGX_OPC_V2SHLSC, TILEGX_OPC_V2SHL, TILEGX_OPC_V2SHRS, TILEGX_OPC_V2SHRU, + TILEGX_OPC_V2SUBSC, TILEGX_OPC_V2SUB, + BITFIELD(49, 4) /* index 1142 */, + TILEGX_OPC_V4ADDSC, TILEGX_OPC_V4ADD, TILEGX_OPC_V4INT_H, + TILEGX_OPC_V4INT_L, TILEGX_OPC_V4PACKSC, TILEGX_OPC_V4SHLSC, + TILEGX_OPC_V4SHL, TILEGX_OPC_V4SHRS, TILEGX_OPC_V4SHRU, TILEGX_OPC_V4SUBSC, + TILEGX_OPC_V4SUB, TILEGX_OPC_XOR, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, + BITFIELD(49, 4) /* index 1159 */, + TILEGX_OPC_NONE, TILEGX_OPC_ROTLI, TILEGX_OPC_SHLI, TILEGX_OPC_SHLXI, + TILEGX_OPC_SHRSI, TILEGX_OPC_SHRUI, TILEGX_OPC_SHRUXI, TILEGX_OPC_V1SHLI, + TILEGX_OPC_V1SHRSI, TILEGX_OPC_V1SHRUI, TILEGX_OPC_V2SHLI, + TILEGX_OPC_V2SHRSI, TILEGX_OPC_V2SHRUI, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, + BITFIELD(31, 2) /* index 1176 */, + TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, + CHILD(1181), + BITFIELD(33, 2) /* index 1181 */, + TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, + CHILD(1186), + BITFIELD(35, 2) /* index 1186 */, + TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, + CHILD(1191), + BITFIELD(37, 2) /* index 1191 */, + TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, + CHILD(1196), + BITFIELD(39, 2) /* index 1196 */, + TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, + CHILD(1201), + BITFIELD(41, 2) /* index 1201 */, + TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, + TILEGX_OPC_INFOL, +}; + +static const unsigned short decode_Y0_fsm[178] = +{ + BITFIELD(27, 4) /* index 0 */, + CHILD(17), TILEGX_OPC_ADDXI, CHILD(32), TILEGX_OPC_CMPEQI, + TILEGX_OPC_CMPLTSI, CHILD(62), CHILD(67), CHILD(118), CHILD(123), + CHILD(128), CHILD(133), CHILD(153), CHILD(158), CHILD(163), CHILD(168), + CHILD(173), + BITFIELD(6, 2) /* index 17 */, + TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, CHILD(22), + BITFIELD(8, 2) /* index 22 */, + TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, CHILD(27), + BITFIELD(10, 2) /* index 27 */, + TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_MOVEI, + BITFIELD(0, 2) /* index 32 */, + TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(37), + BITFIELD(2, 2) /* index 37 */, + TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(42), + BITFIELD(4, 2) /* index 42 */, + TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(47), + BITFIELD(6, 2) /* index 47 */, + TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(52), + BITFIELD(8, 2) /* index 52 */, + TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(57), + BITFIELD(10, 2) /* index 57 */, + TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_INFO, + BITFIELD(18, 2) /* index 62 */, + TILEGX_OPC_ADDX, TILEGX_OPC_ADD, TILEGX_OPC_SUBX, TILEGX_OPC_SUB, + BITFIELD(15, 5) /* index 67 */, + TILEGX_OPC_SHL1ADD, TILEGX_OPC_SHL1ADD, TILEGX_OPC_SHL1ADD, + TILEGX_OPC_SHL1ADD, TILEGX_OPC_SHL1ADD, TILEGX_OPC_SHL1ADD, + TILEGX_OPC_SHL1ADD, TILEGX_OPC_SHL1ADD, TILEGX_OPC_SHL2ADD, + TILEGX_OPC_SHL2ADD, TILEGX_OPC_SHL2ADD, TILEGX_OPC_SHL2ADD, + TILEGX_OPC_SHL2ADD, TILEGX_OPC_SHL2ADD, TILEGX_OPC_SHL2ADD, + TILEGX_OPC_SHL2ADD, TILEGX_OPC_SHL3ADD, TILEGX_OPC_SHL3ADD, + TILEGX_OPC_SHL3ADD, TILEGX_OPC_SHL3ADD, TILEGX_OPC_SHL3ADD, + TILEGX_OPC_SHL3ADD, TILEGX_OPC_SHL3ADD, TILEGX_OPC_SHL3ADD, CHILD(100), + CHILD(109), TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + BITFIELD(12, 3) /* index 100 */, + TILEGX_OPC_NONE, TILEGX_OPC_CLZ, TILEGX_OPC_CTZ, TILEGX_OPC_FNOP, + TILEGX_OPC_FSINGLE_PACK1, TILEGX_OPC_NOP, TILEGX_OPC_PCNT, + TILEGX_OPC_REVBITS, + BITFIELD(12, 3) /* index 109 */, + TILEGX_OPC_REVBYTES, TILEGX_OPC_TBLIDXB0, TILEGX_OPC_TBLIDXB1, + TILEGX_OPC_TBLIDXB2, TILEGX_OPC_TBLIDXB3, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, + BITFIELD(18, 2) /* index 118 */, + TILEGX_OPC_CMPLES, TILEGX_OPC_CMPLEU, TILEGX_OPC_CMPLTS, TILEGX_OPC_CMPLTU, + BITFIELD(18, 2) /* index 123 */, + TILEGX_OPC_CMPEQ, TILEGX_OPC_CMPNE, TILEGX_OPC_MULAX, TILEGX_OPC_MULX, + BITFIELD(18, 2) /* index 128 */, + TILEGX_OPC_CMOVEQZ, TILEGX_OPC_CMOVNEZ, TILEGX_OPC_MNZ, TILEGX_OPC_MZ, + BITFIELD(18, 2) /* index 133 */, + TILEGX_OPC_AND, TILEGX_OPC_NOR, CHILD(138), TILEGX_OPC_XOR, + BITFIELD(12, 2) /* index 138 */, + TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_OR, CHILD(143), + BITFIELD(14, 2) /* index 143 */, + TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_OR, CHILD(148), + BITFIELD(16, 2) /* index 148 */, + TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_MOVE, + BITFIELD(18, 2) /* index 153 */, + TILEGX_OPC_ROTL, TILEGX_OPC_SHL, TILEGX_OPC_SHRS, TILEGX_OPC_SHRU, + BITFIELD(18, 2) /* index 158 */, + TILEGX_OPC_NONE, TILEGX_OPC_SHL1ADDX, TILEGX_OPC_SHL2ADDX, + TILEGX_OPC_SHL3ADDX, + BITFIELD(18, 2) /* index 163 */, + TILEGX_OPC_MUL_HS_HS, TILEGX_OPC_MUL_HU_HU, TILEGX_OPC_MUL_LS_LS, + TILEGX_OPC_MUL_LU_LU, + BITFIELD(18, 2) /* index 168 */, + TILEGX_OPC_MULA_HS_HS, TILEGX_OPC_MULA_HU_HU, TILEGX_OPC_MULA_LS_LS, + TILEGX_OPC_MULA_LU_LU, + BITFIELD(18, 2) /* index 173 */, + TILEGX_OPC_ROTLI, TILEGX_OPC_SHLI, TILEGX_OPC_SHRSI, TILEGX_OPC_SHRUI, +}; + +static const unsigned short decode_Y1_fsm[167] = +{ + BITFIELD(58, 4) /* index 0 */, + TILEGX_OPC_NONE, CHILD(17), TILEGX_OPC_ADDXI, CHILD(32), TILEGX_OPC_CMPEQI, + TILEGX_OPC_CMPLTSI, CHILD(62), CHILD(67), CHILD(117), CHILD(122), + CHILD(127), CHILD(132), CHILD(152), CHILD(157), CHILD(162), TILEGX_OPC_NONE, + BITFIELD(37, 2) /* index 17 */, + TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, CHILD(22), + BITFIELD(39, 2) /* index 22 */, + TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, CHILD(27), + BITFIELD(41, 2) /* index 27 */, + TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_MOVEI, + BITFIELD(31, 2) /* index 32 */, + TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(37), + BITFIELD(33, 2) /* index 37 */, + TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(42), + BITFIELD(35, 2) /* index 42 */, + TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(47), + BITFIELD(37, 2) /* index 47 */, + TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(52), + BITFIELD(39, 2) /* index 52 */, + TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(57), + BITFIELD(41, 2) /* index 57 */, + TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_INFO, + BITFIELD(49, 2) /* index 62 */, + TILEGX_OPC_ADDX, TILEGX_OPC_ADD, TILEGX_OPC_SUBX, TILEGX_OPC_SUB, + BITFIELD(47, 4) /* index 67 */, + TILEGX_OPC_SHL1ADD, TILEGX_OPC_SHL1ADD, TILEGX_OPC_SHL1ADD, + TILEGX_OPC_SHL1ADD, TILEGX_OPC_SHL2ADD, TILEGX_OPC_SHL2ADD, + TILEGX_OPC_SHL2ADD, TILEGX_OPC_SHL2ADD, TILEGX_OPC_SHL3ADD, + TILEGX_OPC_SHL3ADD, TILEGX_OPC_SHL3ADD, TILEGX_OPC_SHL3ADD, CHILD(84), + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + BITFIELD(43, 3) /* index 84 */, + CHILD(93), CHILD(96), CHILD(99), CHILD(102), CHILD(105), CHILD(108), + CHILD(111), CHILD(114), + BITFIELD(46, 1) /* index 93 */, + TILEGX_OPC_NONE, TILEGX_OPC_FNOP, + BITFIELD(46, 1) /* index 96 */, + TILEGX_OPC_NONE, TILEGX_OPC_ILL, + BITFIELD(46, 1) /* index 99 */, + TILEGX_OPC_NONE, TILEGX_OPC_JALRP, + BITFIELD(46, 1) /* index 102 */, + TILEGX_OPC_NONE, TILEGX_OPC_JALR, + BITFIELD(46, 1) /* index 105 */, + TILEGX_OPC_NONE, TILEGX_OPC_JRP, + BITFIELD(46, 1) /* index 108 */, + TILEGX_OPC_NONE, TILEGX_OPC_JR, + BITFIELD(46, 1) /* index 111 */, + TILEGX_OPC_NONE, TILEGX_OPC_LNK, + BITFIELD(46, 1) /* index 114 */, + TILEGX_OPC_NONE, TILEGX_OPC_NOP, + BITFIELD(49, 2) /* index 117 */, + TILEGX_OPC_CMPLES, TILEGX_OPC_CMPLEU, TILEGX_OPC_CMPLTS, TILEGX_OPC_CMPLTU, + BITFIELD(49, 2) /* index 122 */, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_CMPEQ, TILEGX_OPC_CMPNE, + BITFIELD(49, 2) /* index 127 */, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_MNZ, TILEGX_OPC_MZ, + BITFIELD(49, 2) /* index 132 */, + TILEGX_OPC_AND, TILEGX_OPC_NOR, CHILD(137), TILEGX_OPC_XOR, + BITFIELD(43, 2) /* index 137 */, + TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_OR, CHILD(142), + BITFIELD(45, 2) /* index 142 */, + TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_OR, CHILD(147), + BITFIELD(47, 2) /* index 147 */, + TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_MOVE, + BITFIELD(49, 2) /* index 152 */, + TILEGX_OPC_ROTL, TILEGX_OPC_SHL, TILEGX_OPC_SHRS, TILEGX_OPC_SHRU, + BITFIELD(49, 2) /* index 157 */, + TILEGX_OPC_NONE, TILEGX_OPC_SHL1ADDX, TILEGX_OPC_SHL2ADDX, + TILEGX_OPC_SHL3ADDX, + BITFIELD(49, 2) /* index 162 */, + TILEGX_OPC_ROTLI, TILEGX_OPC_SHLI, TILEGX_OPC_SHRSI, TILEGX_OPC_SHRUI, +}; + +static const unsigned short decode_Y2_fsm[118] = +{ + BITFIELD(62, 2) /* index 0 */, + TILEGX_OPC_NONE, CHILD(5), CHILD(66), CHILD(109), + BITFIELD(55, 3) /* index 5 */, + CHILD(14), CHILD(14), CHILD(14), CHILD(17), CHILD(40), CHILD(40), CHILD(40), + CHILD(43), + BITFIELD(26, 1) /* index 14 */, + TILEGX_OPC_LD1S, TILEGX_OPC_LD1U, + BITFIELD(26, 1) /* index 17 */, + CHILD(20), CHILD(30), + BITFIELD(51, 2) /* index 20 */, + TILEGX_OPC_LD1S, TILEGX_OPC_LD1S, TILEGX_OPC_LD1S, CHILD(25), + BITFIELD(53, 2) /* index 25 */, + TILEGX_OPC_LD1S, TILEGX_OPC_LD1S, TILEGX_OPC_LD1S, + TILEGX_OPC_PREFETCH_L1_FAULT, + BITFIELD(51, 2) /* index 30 */, + TILEGX_OPC_LD1U, TILEGX_OPC_LD1U, TILEGX_OPC_LD1U, CHILD(35), + BITFIELD(53, 2) /* index 35 */, + TILEGX_OPC_LD1U, TILEGX_OPC_LD1U, TILEGX_OPC_LD1U, TILEGX_OPC_PREFETCH, + BITFIELD(26, 1) /* index 40 */, + TILEGX_OPC_LD2S, TILEGX_OPC_LD2U, + BITFIELD(26, 1) /* index 43 */, + CHILD(46), CHILD(56), + BITFIELD(51, 2) /* index 46 */, + TILEGX_OPC_LD2S, TILEGX_OPC_LD2S, TILEGX_OPC_LD2S, CHILD(51), + BITFIELD(53, 2) /* index 51 */, + TILEGX_OPC_LD2S, TILEGX_OPC_LD2S, TILEGX_OPC_LD2S, + TILEGX_OPC_PREFETCH_L2_FAULT, + BITFIELD(51, 2) /* index 56 */, + TILEGX_OPC_LD2U, TILEGX_OPC_LD2U, TILEGX_OPC_LD2U, CHILD(61), + BITFIELD(53, 2) /* index 61 */, + TILEGX_OPC_LD2U, TILEGX_OPC_LD2U, TILEGX_OPC_LD2U, TILEGX_OPC_PREFETCH_L2, + BITFIELD(56, 2) /* index 66 */, + CHILD(71), CHILD(74), CHILD(90), CHILD(93), + BITFIELD(26, 1) /* index 71 */, + TILEGX_OPC_NONE, TILEGX_OPC_LD4S, + BITFIELD(26, 1) /* index 74 */, + TILEGX_OPC_NONE, CHILD(77), + BITFIELD(51, 2) /* index 77 */, + TILEGX_OPC_LD4S, TILEGX_OPC_LD4S, TILEGX_OPC_LD4S, CHILD(82), + BITFIELD(53, 2) /* index 82 */, + TILEGX_OPC_LD4S, TILEGX_OPC_LD4S, TILEGX_OPC_LD4S, CHILD(87), + BITFIELD(55, 1) /* index 87 */, + TILEGX_OPC_LD4S, TILEGX_OPC_PREFETCH_L3_FAULT, + BITFIELD(26, 1) /* index 90 */, + TILEGX_OPC_LD4U, TILEGX_OPC_LD, + BITFIELD(26, 1) /* index 93 */, + CHILD(96), TILEGX_OPC_LD, + BITFIELD(51, 2) /* index 96 */, + TILEGX_OPC_LD4U, TILEGX_OPC_LD4U, TILEGX_OPC_LD4U, CHILD(101), + BITFIELD(53, 2) /* index 101 */, + TILEGX_OPC_LD4U, TILEGX_OPC_LD4U, TILEGX_OPC_LD4U, CHILD(106), + BITFIELD(55, 1) /* index 106 */, + TILEGX_OPC_LD4U, TILEGX_OPC_PREFETCH_L3, + BITFIELD(26, 1) /* index 109 */, + CHILD(112), CHILD(115), + BITFIELD(57, 1) /* index 112 */, + TILEGX_OPC_ST1, TILEGX_OPC_ST4, + BITFIELD(57, 1) /* index 115 */, + TILEGX_OPC_ST2, TILEGX_OPC_ST, +}; + +#undef BITFIELD +#undef CHILD +const unsigned short * const +tilegx_bundle_decoder_fsms[TILEGX_NUM_PIPELINE_ENCODINGS] = +{ + decode_X0_fsm, + decode_X1_fsm, + decode_Y0_fsm, + decode_Y1_fsm, + decode_Y2_fsm +}; +const struct tilegx_operand tilegx_operands[35] = +{ + { + TILEGX_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEGX_IMM8_X0), + 8, 1, 0, 0, 0, 0, + create_Imm8_X0, get_Imm8_X0 + }, + { + TILEGX_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEGX_IMM8_X1), + 8, 1, 0, 0, 0, 0, + create_Imm8_X1, get_Imm8_X1 + }, + { + TILEGX_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEGX_IMM8_Y0), + 8, 1, 0, 0, 0, 0, + create_Imm8_Y0, get_Imm8_Y0 + }, + { + TILEGX_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEGX_IMM8_Y1), + 8, 1, 0, 0, 0, 0, + create_Imm8_Y1, get_Imm8_Y1 + }, + { + TILEGX_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEGX_IMM16_X0_HW0_LAST), + 16, 1, 0, 0, 0, 0, + create_Imm16_X0, get_Imm16_X0 + }, + { + TILEGX_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEGX_IMM16_X1_HW0_LAST), + 16, 1, 0, 0, 0, 0, + create_Imm16_X1, get_Imm16_X1 + }, + { + TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 0, 1, 0, 0, + create_Dest_X0, get_Dest_X0 + }, + { + TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 1, 0, 0, 0, + create_SrcA_X0, get_SrcA_X0 + }, + { + TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 0, 1, 0, 0, + create_Dest_X1, get_Dest_X1 + }, + { + TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 1, 0, 0, 0, + create_SrcA_X1, get_SrcA_X1 + }, + { + TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 0, 1, 0, 0, + create_Dest_Y0, get_Dest_Y0 + }, + { + TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 1, 0, 0, 0, + create_SrcA_Y0, get_SrcA_Y0 + }, + { + TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 0, 1, 0, 0, + create_Dest_Y1, get_Dest_Y1 + }, + { + TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 1, 0, 0, 0, + create_SrcA_Y1, get_SrcA_Y1 + }, + { + TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 1, 0, 0, 0, + create_SrcA_Y2, get_SrcA_Y2 + }, + { + TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 1, 1, 0, 0, + create_SrcA_X1, get_SrcA_X1 + }, + { + TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 1, 0, 0, 0, + create_SrcB_X0, get_SrcB_X0 + }, + { + TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 1, 0, 0, 0, + create_SrcB_X1, get_SrcB_X1 + }, + { + TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 1, 0, 0, 0, + create_SrcB_Y0, get_SrcB_Y0 + }, + { + TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 1, 0, 0, 0, + create_SrcB_Y1, get_SrcB_Y1 + }, + { + TILEGX_OP_TYPE_ADDRESS, BFD_RELOC(TILEGX_BROFF_X1), + 17, 1, 0, 0, 1, TILEGX_LOG2_BUNDLE_ALIGNMENT_IN_BYTES, + create_BrOff_X1, get_BrOff_X1 + }, + { + TILEGX_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEGX_MMSTART_X0), + 6, 0, 0, 0, 0, 0, + create_BFStart_X0, get_BFStart_X0 + }, + { + TILEGX_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEGX_MMEND_X0), + 6, 0, 0, 0, 0, 0, + create_BFEnd_X0, get_BFEnd_X0 + }, + { + TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 1, 1, 0, 0, + create_Dest_X0, get_Dest_X0 + }, + { + TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 1, 1, 0, 0, + create_Dest_Y0, get_Dest_Y0 + }, + { + TILEGX_OP_TYPE_ADDRESS, BFD_RELOC(TILEGX_JUMPOFF_X1), + 27, 1, 0, 0, 1, TILEGX_LOG2_BUNDLE_ALIGNMENT_IN_BYTES, + create_JumpOff_X1, get_JumpOff_X1 + }, + { + TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 0, 1, 0, 0, + create_SrcBDest_Y2, get_SrcBDest_Y2 + }, + { + TILEGX_OP_TYPE_SPR, BFD_RELOC(TILEGX_MF_IMM14_X1), + 14, 0, 0, 0, 0, 0, + create_MF_Imm14_X1, get_MF_Imm14_X1 + }, + { + TILEGX_OP_TYPE_SPR, BFD_RELOC(TILEGX_MT_IMM14_X1), + 14, 0, 0, 0, 0, 0, + create_MT_Imm14_X1, get_MT_Imm14_X1 + }, + { + TILEGX_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEGX_SHAMT_X0), + 6, 0, 0, 0, 0, 0, + create_ShAmt_X0, get_ShAmt_X0 + }, + { + TILEGX_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEGX_SHAMT_X1), + 6, 0, 0, 0, 0, 0, + create_ShAmt_X1, get_ShAmt_X1 + }, + { + TILEGX_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEGX_SHAMT_Y0), + 6, 0, 0, 0, 0, 0, + create_ShAmt_Y0, get_ShAmt_Y0 + }, + { + TILEGX_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEGX_SHAMT_Y1), + 6, 0, 0, 0, 0, 0, + create_ShAmt_Y1, get_ShAmt_Y1 + }, + { + TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 1, 0, 0, 0, + create_SrcBDest_Y2, get_SrcBDest_Y2 + }, + { + TILEGX_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEGX_DEST_IMM8_X1), + 8, 1, 0, 0, 0, 0, + create_Dest_Imm8_X1, get_Dest_Imm8_X1 + } +}; + + + + +/* Given a set of bundle bits and the lookup FSM for a specific pipe, + * returns which instruction the bundle contains in that pipe. + */ +static const struct tilegx_opcode * +find_opcode(tilegx_bundle_bits bits, const unsigned short *table) +{ + int index = 0; + + while (1) + { + unsigned short bitspec = table[index]; + unsigned int bitfield = + ((unsigned int)(bits >> (bitspec & 63))) & (bitspec >> 6); + + unsigned short next = table[index + 1 + bitfield]; + if (next <= TILEGX_OPC_NONE) + return &tilegx_opcodes[next]; + + index = next - TILEGX_OPC_NONE; + } +} + + +int +parse_insn_tilegx(tilegx_bundle_bits bits, + unsigned long long pc, + struct tilegx_decoded_instruction + decoded[TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE]) +{ + int num_instructions = 0; + int pipe; + + int min_pipe, max_pipe; + if ((bits & TILEGX_BUNDLE_MODE_MASK) == 0) + { + min_pipe = TILEGX_PIPELINE_X0; + max_pipe = TILEGX_PIPELINE_X1; + } + else + { + min_pipe = TILEGX_PIPELINE_Y0; + max_pipe = TILEGX_PIPELINE_Y2; + } + + /* For each pipe, find an instruction that fits. */ + for (pipe = min_pipe; pipe <= max_pipe; pipe++) + { + const struct tilegx_opcode *opc; + struct tilegx_decoded_instruction *d; + int i; + + d = &decoded[num_instructions++]; + opc = find_opcode (bits, tilegx_bundle_decoder_fsms[pipe]); + d->opcode = opc; + + /* Decode each operand, sign extending, etc. as appropriate. */ + for (i = 0; i < opc->num_operands; i++) + { + const struct tilegx_operand *op = + &tilegx_operands[opc->operands[pipe][i]]; + int raw_opval = op->extract (bits); + long long opval; + + if (op->is_signed) + { + /* Sign-extend the operand. */ + int shift = (int)((sizeof(int) * 8) - op->num_bits); + raw_opval = (raw_opval << shift) >> shift; + } + + /* Adjust PC-relative scaled branch offsets. */ + if (op->type == TILEGX_OP_TYPE_ADDRESS) + opval = (raw_opval * TILEGX_BUNDLE_SIZE_IN_BYTES) + pc; + else + opval = raw_opval; + + /* Record the final value. */ + d->operands[i] = op; + d->operand_values[i] = opval; + } + } + + return num_instructions; +} diff --git a/arch/tile/kernel/time.c b/arch/tile/kernel/time.c new file mode 100644 index 00000000..f6f50f2a --- /dev/null +++ b/arch/tile/kernel/time.c @@ -0,0 +1,235 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * Support the cycle counter clocksource and tile timer clock event device. + */ + +#include <linux/time.h> +#include <linux/timex.h> +#include <linux/clocksource.h> +#include <linux/clockchips.h> +#include <linux/hardirq.h> +#include <linux/sched.h> +#include <linux/smp.h> +#include <linux/delay.h> +#include <linux/module.h> +#include <asm/irq_regs.h> +#include <asm/traps.h> +#include <hv/hypervisor.h> +#include <arch/interrupts.h> +#include <arch/spr_def.h> + + +/* + * Define the cycle counter clock source. + */ + +/* How many cycles per second we are running at. */ +static cycles_t cycles_per_sec __write_once; + +cycles_t get_clock_rate(void) +{ + return cycles_per_sec; +} + +#if CHIP_HAS_SPLIT_CYCLE() +cycles_t get_cycles(void) +{ + unsigned int high = __insn_mfspr(SPR_CYCLE_HIGH); + unsigned int low = __insn_mfspr(SPR_CYCLE_LOW); + unsigned int high2 = __insn_mfspr(SPR_CYCLE_HIGH); + + while (unlikely(high != high2)) { + low = __insn_mfspr(SPR_CYCLE_LOW); + high = high2; + high2 = __insn_mfspr(SPR_CYCLE_HIGH); + } + + return (((cycles_t)high) << 32) | low; +} +EXPORT_SYMBOL(get_cycles); +#endif + +/* + * We use a relatively small shift value so that sched_clock() + * won't wrap around very often. + */ +#define SCHED_CLOCK_SHIFT 10 + +static unsigned long sched_clock_mult __write_once; + +static cycles_t clocksource_get_cycles(struct clocksource *cs) +{ + return get_cycles(); +} + +static struct clocksource cycle_counter_cs = { + .name = "cycle counter", + .rating = 300, + .read = clocksource_get_cycles, + .mask = CLOCKSOURCE_MASK(64), + .flags = CLOCK_SOURCE_IS_CONTINUOUS, +}; + +/* + * Called very early from setup_arch() to set cycles_per_sec. + * We initialize it early so we can use it to set up loops_per_jiffy. + */ +void __init setup_clock(void) +{ + cycles_per_sec = hv_sysconf(HV_SYSCONF_CPU_SPEED); + sched_clock_mult = + clocksource_hz2mult(cycles_per_sec, SCHED_CLOCK_SHIFT); +} + +void __init calibrate_delay(void) +{ + loops_per_jiffy = get_clock_rate() / HZ; + pr_info("Clock rate yields %lu.%02lu BogoMIPS (lpj=%lu)\n", + loops_per_jiffy/(500000/HZ), + (loops_per_jiffy/(5000/HZ)) % 100, loops_per_jiffy); +} + +/* Called fairly late in init/main.c, but before we go smp. */ +void __init time_init(void) +{ + /* Initialize and register the clock source. */ + clocksource_register_hz(&cycle_counter_cs, cycles_per_sec); + + /* Start up the tile-timer interrupt source on the boot cpu. */ + setup_tile_timer(); +} + + +/* + * Define the tile timer clock event device. The timer is driven by + * the TILE_TIMER_CONTROL register, which consists of a 31-bit down + * counter, plus bit 31, which signifies that the counter has wrapped + * from zero to (2**31) - 1. The INT_TILE_TIMER interrupt will be + * raised as long as bit 31 is set. + * + * The TILE_MINSEC value represents the largest range of real-time + * we can possibly cover with the timer, based on MAX_TICK combined + * with the slowest reasonable clock rate we might run at. + */ + +#define MAX_TICK 0x7fffffff /* we have 31 bits of countdown timer */ +#define TILE_MINSEC 5 /* timer covers no more than 5 seconds */ + +static int tile_timer_set_next_event(unsigned long ticks, + struct clock_event_device *evt) +{ + BUG_ON(ticks > MAX_TICK); + __insn_mtspr(SPR_TILE_TIMER_CONTROL, ticks); + arch_local_irq_unmask_now(INT_TILE_TIMER); + return 0; +} + +/* + * Whenever anyone tries to change modes, we just mask interrupts + * and wait for the next event to get set. + */ +static void tile_timer_set_mode(enum clock_event_mode mode, + struct clock_event_device *evt) +{ + arch_local_irq_mask_now(INT_TILE_TIMER); +} + +/* + * Set min_delta_ns to 1 microsecond, since it takes about + * that long to fire the interrupt. + */ +static DEFINE_PER_CPU(struct clock_event_device, tile_timer) = { + .name = "tile timer", + .features = CLOCK_EVT_FEAT_ONESHOT, + .min_delta_ns = 1000, + .rating = 100, + .irq = -1, + .set_next_event = tile_timer_set_next_event, + .set_mode = tile_timer_set_mode, +}; + +void __cpuinit setup_tile_timer(void) +{ + struct clock_event_device *evt = &__get_cpu_var(tile_timer); + + /* Fill in fields that are speed-specific. */ + clockevents_calc_mult_shift(evt, cycles_per_sec, TILE_MINSEC); + evt->max_delta_ns = clockevent_delta2ns(MAX_TICK, evt); + + /* Mark as being for this cpu only. */ + evt->cpumask = cpumask_of(smp_processor_id()); + + /* Start out with timer not firing. */ + arch_local_irq_mask_now(INT_TILE_TIMER); + + /* Register tile timer. */ + clockevents_register_device(evt); +} + +/* Called from the interrupt vector. */ +void do_timer_interrupt(struct pt_regs *regs, int fault_num) +{ + struct pt_regs *old_regs = set_irq_regs(regs); + struct clock_event_device *evt = &__get_cpu_var(tile_timer); + + /* + * Mask the timer interrupt here, since we are a oneshot timer + * and there are now by definition no events pending. + */ + arch_local_irq_mask(INT_TILE_TIMER); + + /* Track time spent here in an interrupt context */ + irq_enter(); + + /* Track interrupt count. */ + __get_cpu_var(irq_stat).irq_timer_count++; + + /* Call the generic timer handler */ + evt->event_handler(evt); + + /* + * Track time spent against the current process again and + * process any softirqs if they are waiting. + */ + irq_exit(); + + set_irq_regs(old_regs); +} + +/* + * Scheduler clock - returns current time in nanosec units. + * Note that with LOCKDEP, this is called during lockdep_init(), and + * we will claim that sched_clock() is zero for a little while, until + * we run setup_clock(), above. + */ +unsigned long long sched_clock(void) +{ + return clocksource_cyc2ns(get_cycles(), + sched_clock_mult, SCHED_CLOCK_SHIFT); +} + +int setup_profiling_timer(unsigned int multiplier) +{ + return -EINVAL; +} + +/* + * Use the tile timer to convert nsecs to core clock cycles, relying + * on it having the same frequency as SPR_CYCLE. + */ +cycles_t ns2cycles(unsigned long nsecs) +{ + struct clock_event_device *dev = &__get_cpu_var(tile_timer); + return ((u64)nsecs * dev->mult) >> dev->shift; +} diff --git a/arch/tile/kernel/tlb.c b/arch/tile/kernel/tlb.c new file mode 100644 index 00000000..a5f241c2 --- /dev/null +++ b/arch/tile/kernel/tlb.c @@ -0,0 +1,97 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + */ + +#include <linux/cpumask.h> +#include <linux/module.h> +#include <asm/tlbflush.h> +#include <asm/homecache.h> +#include <hv/hypervisor.h> + +/* From tlbflush.h */ +DEFINE_PER_CPU(int, current_asid); +int min_asid, max_asid; + +/* + * Note that we flush the L1I (for VM_EXEC pages) as well as the TLB + * so that when we are unmapping an executable page, we also flush it. + * Combined with flushing the L1I at context switch time, this means + * we don't have to do any other icache flushes. + */ + +void flush_tlb_mm(struct mm_struct *mm) +{ + HV_Remote_ASID asids[NR_CPUS]; + int i = 0, cpu; + for_each_cpu(cpu, mm_cpumask(mm)) { + HV_Remote_ASID *asid = &asids[i++]; + asid->y = cpu / smp_topology.width; + asid->x = cpu % smp_topology.width; + asid->asid = per_cpu(current_asid, cpu); + } + flush_remote(0, HV_FLUSH_EVICT_L1I, mm_cpumask(mm), + 0, 0, 0, NULL, asids, i); +} + +void flush_tlb_current_task(void) +{ + flush_tlb_mm(current->mm); +} + +void flush_tlb_page_mm(const struct vm_area_struct *vma, struct mm_struct *mm, + unsigned long va) +{ + unsigned long size = hv_page_size(vma); + int cache = (vma->vm_flags & VM_EXEC) ? HV_FLUSH_EVICT_L1I : 0; + flush_remote(0, cache, mm_cpumask(mm), + va, size, size, mm_cpumask(mm), NULL, 0); +} + +void flush_tlb_page(const struct vm_area_struct *vma, unsigned long va) +{ + flush_tlb_page_mm(vma, vma->vm_mm, va); +} +EXPORT_SYMBOL(flush_tlb_page); + +void flush_tlb_range(const struct vm_area_struct *vma, + unsigned long start, unsigned long end) +{ + unsigned long size = hv_page_size(vma); + struct mm_struct *mm = vma->vm_mm; + int cache = (vma->vm_flags & VM_EXEC) ? HV_FLUSH_EVICT_L1I : 0; + flush_remote(0, cache, mm_cpumask(mm), start, end - start, size, + mm_cpumask(mm), NULL, 0); +} + +void flush_tlb_all(void) +{ + int i; + for (i = 0; ; ++i) { + HV_VirtAddrRange r = hv_inquire_virtual(i); + if (r.size == 0) + break; + flush_remote(0, HV_FLUSH_EVICT_L1I, cpu_online_mask, + r.start, r.size, PAGE_SIZE, cpu_online_mask, + NULL, 0); + flush_remote(0, 0, NULL, + r.start, r.size, HPAGE_SIZE, cpu_online_mask, + NULL, 0); + } +} + +void flush_tlb_kernel_range(unsigned long start, unsigned long end) +{ + flush_remote(0, HV_FLUSH_EVICT_L1I, cpu_online_mask, + start, end - start, PAGE_SIZE, cpu_online_mask, NULL, 0); +} diff --git a/arch/tile/kernel/traps.c b/arch/tile/kernel/traps.c new file mode 100644 index 00000000..73cff814 --- /dev/null +++ b/arch/tile/kernel/traps.c @@ -0,0 +1,329 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/kprobes.h> +#include <linux/module.h> +#include <linux/reboot.h> +#include <linux/uaccess.h> +#include <linux/ptrace.h> +#include <asm/stack.h> +#include <asm/traps.h> +#include <asm/setup.h> + +#include <arch/interrupts.h> +#include <arch/spr_def.h> +#include <arch/opcode.h> + +void __init trap_init(void) +{ + /* Nothing needed here since we link code at .intrpt1 */ +} + +int unaligned_fixup = 1; + +static int __init setup_unaligned_fixup(char *str) +{ + /* + * Say "=-1" to completely disable it. If you just do "=0", we + * will still parse the instruction, then fire a SIGBUS with + * the correct address from inside the single_step code. + */ + long val; + if (strict_strtol(str, 0, &val) != 0) + return 0; + unaligned_fixup = val; + pr_info("Fixups for unaligned data accesses are %s\n", + unaligned_fixup >= 0 ? + (unaligned_fixup ? "enabled" : "disabled") : + "completely disabled"); + return 1; +} +__setup("unaligned_fixup=", setup_unaligned_fixup); + +#if CHIP_HAS_TILE_DMA() + +static int dma_disabled; + +static int __init nodma(char *str) +{ + pr_info("User-space DMA is disabled\n"); + dma_disabled = 1; + return 1; +} +__setup("nodma", nodma); + +/* How to decode SPR_GPV_REASON */ +#define IRET_ERROR (1U << 31) +#define MT_ERROR (1U << 30) +#define MF_ERROR (1U << 29) +#define SPR_INDEX ((1U << 15) - 1) +#define SPR_MPL_SHIFT 9 /* starting bit position for MPL encoded in SPR */ + +/* + * See if this GPV is just to notify the kernel of SPR use and we can + * retry the user instruction after adjusting some MPLs suitably. + */ +static int retry_gpv(unsigned int gpv_reason) +{ + int mpl; + + if (gpv_reason & IRET_ERROR) + return 0; + + BUG_ON((gpv_reason & (MT_ERROR|MF_ERROR)) == 0); + mpl = (gpv_reason & SPR_INDEX) >> SPR_MPL_SHIFT; + if (mpl == INT_DMA_NOTIFY && !dma_disabled) { + /* User is turning on DMA. Allow it and retry. */ + printk(KERN_DEBUG "Process %d/%s is now enabled for DMA\n", + current->pid, current->comm); + BUG_ON(current->thread.tile_dma_state.enabled); + current->thread.tile_dma_state.enabled = 1; + grant_dma_mpls(); + return 1; + } + + return 0; +} + +#endif /* CHIP_HAS_TILE_DMA() */ + +#ifdef __tilegx__ +#define bundle_bits tilegx_bundle_bits +#else +#define bundle_bits tile_bundle_bits +#endif + +extern bundle_bits bpt_code; + +asm(".pushsection .rodata.bpt_code,\"a\";" + ".align 8;" + "bpt_code: bpt;" + ".size bpt_code,.-bpt_code;" + ".popsection"); + +static int special_ill(bundle_bits bundle, int *sigp, int *codep) +{ + int sig, code, maxcode; + + if (bundle == bpt_code) { + *sigp = SIGTRAP; + *codep = TRAP_BRKPT; + return 1; + } + + /* If it's a "raise" bundle, then "ill" must be in pipe X1. */ +#ifdef __tilegx__ + if ((bundle & TILEGX_BUNDLE_MODE_MASK) != 0) + return 0; + if (get_Opcode_X1(bundle) != RRR_0_OPCODE_X1) + return 0; + if (get_RRROpcodeExtension_X1(bundle) != UNARY_RRR_0_OPCODE_X1) + return 0; + if (get_UnaryOpcodeExtension_X1(bundle) != ILL_UNARY_OPCODE_X1) + return 0; +#else + if (bundle & TILEPRO_BUNDLE_Y_ENCODING_MASK) + return 0; + if (get_Opcode_X1(bundle) != SHUN_0_OPCODE_X1) + return 0; + if (get_UnShOpcodeExtension_X1(bundle) != UN_0_SHUN_0_OPCODE_X1) + return 0; + if (get_UnOpcodeExtension_X1(bundle) != ILL_UN_0_SHUN_0_OPCODE_X1) + return 0; +#endif + + /* Check that the magic distinguishers are set to mean "raise". */ + if (get_Dest_X1(bundle) != 29 || get_SrcA_X1(bundle) != 37) + return 0; + + /* There must be an "addli zero, zero, VAL" in X0. */ + if (get_Opcode_X0(bundle) != ADDLI_OPCODE_X0) + return 0; + if (get_Dest_X0(bundle) != TREG_ZERO) + return 0; + if (get_SrcA_X0(bundle) != TREG_ZERO) + return 0; + + /* + * Validate the proposed signal number and si_code value. + * Note that we embed these in the static instruction itself + * so that we perturb the register state as little as possible + * at the time of the actual fault; it's unlikely you'd ever + * need to dynamically choose which kind of fault to raise + * from user space. + */ + sig = get_Imm16_X0(bundle) & 0x3f; + switch (sig) { + case SIGILL: + maxcode = NSIGILL; + break; + case SIGFPE: + maxcode = NSIGFPE; + break; + case SIGSEGV: + maxcode = NSIGSEGV; + break; + case SIGBUS: + maxcode = NSIGBUS; + break; + case SIGTRAP: + maxcode = NSIGTRAP; + break; + default: + return 0; + } + code = (get_Imm16_X0(bundle) >> 6) & 0xf; + if (code <= 0 || code > maxcode) + return 0; + + /* Make it the requested signal. */ + *sigp = sig; + *codep = code | __SI_FAULT; + return 1; +} + +void __kprobes do_trap(struct pt_regs *regs, int fault_num, + unsigned long reason) +{ + siginfo_t info = { 0 }; + int signo, code; + unsigned long address = 0; + bundle_bits instr; + + /* Re-enable interrupts. */ + local_irq_enable(); + + /* + * If it hits in kernel mode and we can't fix it up, just exit the + * current process and hope for the best. + */ + if (!user_mode(regs)) { + if (fixup_exception(regs)) /* only UNALIGN_DATA in practice */ + return; + pr_alert("Kernel took bad trap %d at PC %#lx\n", + fault_num, regs->pc); + if (fault_num == INT_GPV) + pr_alert("GPV_REASON is %#lx\n", reason); + show_regs(regs); + do_exit(SIGKILL); /* FIXME: implement i386 die() */ + return; + } + + switch (fault_num) { + case INT_MEM_ERROR: + signo = SIGBUS; + code = BUS_OBJERR; + break; + case INT_ILL: + if (copy_from_user(&instr, (void __user *)regs->pc, + sizeof(instr))) { + pr_err("Unreadable instruction for INT_ILL:" + " %#lx\n", regs->pc); + do_exit(SIGKILL); + return; + } + if (!special_ill(instr, &signo, &code)) { + signo = SIGILL; + code = ILL_ILLOPC; + } + address = regs->pc; + break; + case INT_GPV: +#if CHIP_HAS_TILE_DMA() + if (retry_gpv(reason)) + return; +#endif + /*FALLTHROUGH*/ + case INT_UDN_ACCESS: + case INT_IDN_ACCESS: +#if CHIP_HAS_SN() + case INT_SN_ACCESS: +#endif + signo = SIGILL; + code = ILL_PRVREG; + address = regs->pc; + break; + case INT_SWINT_3: + case INT_SWINT_2: + case INT_SWINT_0: + signo = SIGILL; + code = ILL_ILLTRP; + address = regs->pc; + break; + case INT_UNALIGN_DATA: +#ifndef __tilegx__ /* Emulated support for single step debugging */ + if (unaligned_fixup >= 0) { + struct single_step_state *state = + current_thread_info()->step_state; + if (!state || + (void __user *)(regs->pc) != state->buffer) { + single_step_once(regs); + return; + } + } +#endif + signo = SIGBUS; + code = BUS_ADRALN; + address = 0; + break; + case INT_DOUBLE_FAULT: + /* + * For double fault, "reason" is actually passed as + * SYSTEM_SAVE_K_2, the hypervisor's double-fault info, so + * we can provide the original fault number rather than + * the uninteresting "INT_DOUBLE_FAULT" so the user can + * learn what actually struck while PL0 ICS was set. + */ + fault_num = reason; + signo = SIGILL; + code = ILL_DBLFLT; + address = regs->pc; + break; +#ifdef __tilegx__ + case INT_ILL_TRANS: { + /* Avoid a hardware erratum with the return address stack. */ + fill_ra_stack(); + + signo = SIGSEGV; + code = SEGV_MAPERR; + if (reason & SPR_ILL_TRANS_REASON__I_STREAM_VA_RMASK) + address = regs->pc; + else + address = 0; /* FIXME: GX: single-step for address */ + break; + } +#endif + default: + panic("Unexpected do_trap interrupt number %d", fault_num); + return; + } + + info.si_signo = signo; + info.si_code = code; + info.si_addr = (void __user *)address; + if (signo == SIGILL) + info.si_trapno = fault_num; + if (signo != SIGTRAP) + trace_unhandled_signal("trap", regs, address, signo); + force_sig_info(signo, &info, current); +} + +void kernel_double_fault(int dummy, ulong pc, ulong lr, ulong sp, ulong r52) +{ + _dump_stack(dummy, pc, lr, sp, r52); + pr_emerg("Double fault: exiting\n"); + machine_halt(); +} diff --git a/arch/tile/kernel/vmlinux.lds.S b/arch/tile/kernel/vmlinux.lds.S new file mode 100644 index 00000000..631f10de --- /dev/null +++ b/arch/tile/kernel/vmlinux.lds.S @@ -0,0 +1,95 @@ +#include <asm-generic/vmlinux.lds.h> +#include <asm/page.h> +#include <asm/cache.h> +#include <asm/thread_info.h> +#include <hv/hypervisor.h> + +/* Text loads starting from the supervisor interrupt vector address. */ +#define TEXT_OFFSET MEM_SV_INTRPT + +OUTPUT_ARCH(tile) +ENTRY(_start) +jiffies = jiffies_64; + +PHDRS +{ + intrpt1 PT_LOAD ; + text PT_LOAD ; + data PT_LOAD ; +} +SECTIONS +{ + /* Text is loaded with a different VA than data; start with text. */ + #undef LOAD_OFFSET + #define LOAD_OFFSET TEXT_OFFSET + + /* Interrupt vectors */ + .intrpt1 (LOAD_OFFSET) : AT ( 0 ) /* put at the start of physical memory */ + { + _text = .; + _stext = .; + *(.intrpt1) + } :intrpt1 =0 + + /* Hypervisor call vectors */ + #include "hvglue.lds" + + /* Now the real code */ + . = ALIGN(0x20000); + .text : AT (ADDR(.text) - LOAD_OFFSET) { + HEAD_TEXT + SCHED_TEXT + LOCK_TEXT + __fix_text_end = .; /* tile-cpack won't rearrange before this */ + TEXT_TEXT + *(.text.*) + *(.coldtext*) + *(.fixup) + *(.gnu.warning) + } :text =0 + _etext = .; + + /* "Init" is divided into two areas with very different virtual addresses. */ + INIT_TEXT_SECTION(PAGE_SIZE) + + /* Now we skip back to PAGE_OFFSET for the data. */ + . = (. - TEXT_OFFSET + PAGE_OFFSET); + #undef LOAD_OFFSET + #define LOAD_OFFSET PAGE_OFFSET + + . = ALIGN(PAGE_SIZE); + VMLINUX_SYMBOL(_sinitdata) = .; + INIT_DATA_SECTION(16) :data =0 + PERCPU_SECTION(L2_CACHE_BYTES) + . = ALIGN(PAGE_SIZE); + VMLINUX_SYMBOL(_einitdata) = .; + + _sdata = .; /* Start of data section */ + + RO_DATA_SECTION(PAGE_SIZE) + + /* initially writeable, then read-only */ + . = ALIGN(PAGE_SIZE); + __w1data_begin = .; + .w1data : AT(ADDR(.w1data) - LOAD_OFFSET) { + VMLINUX_SYMBOL(__w1data_begin) = .; + *(.w1data) + VMLINUX_SYMBOL(__w1data_end) = .; + } + + RW_DATA_SECTION(L2_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE) + + _edata = .; + + EXCEPTION_TABLE(L2_CACHE_BYTES) + NOTES + + + BSS_SECTION(8, PAGE_SIZE, 1) + _end = . ; + + STABS_DEBUG + DWARF_DEBUG + + DISCARDS +} |