diff options
Diffstat (limited to 'arch/ia64/sn/kernel')
27 files changed, 7277 insertions, 0 deletions
diff --git a/arch/ia64/sn/kernel/Makefile b/arch/ia64/sn/kernel/Makefile new file mode 100644 index 00000000..d27df1d4 --- /dev/null +++ b/arch/ia64/sn/kernel/Makefile @@ -0,0 +1,18 @@ +# arch/ia64/sn/kernel/Makefile +# +# This file is subject to the terms and conditions of the GNU General Public +# License. See the file "COPYING" in the main directory of this archive +# for more details. +# +# Copyright (C) 1999,2001-2006,2008 Silicon Graphics, Inc. All Rights Reserved. +# + +ccflags-y := -Iarch/ia64/sn/include + +obj-y += setup.o bte.o bte_error.o irq.o mca.o idle.o \ + huberror.o io_acpi_init.o io_common.o \ + io_init.o iomv.o klconflib.o pio_phys.o \ + sn2/ +obj-$(CONFIG_IA64_GENERIC) += machvec.o +obj-$(CONFIG_SGI_TIOCX) += tiocx.o +obj-$(CONFIG_PCI_MSI) += msi_sn.o diff --git a/arch/ia64/sn/kernel/bte.c b/arch/ia64/sn/kernel/bte.c new file mode 100644 index 00000000..cad775a1 --- /dev/null +++ b/arch/ia64/sn/kernel/bte.c @@ -0,0 +1,471 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2000-2007 Silicon Graphics, Inc. All Rights Reserved. + */ + +#include <linux/module.h> +#include <asm/sn/nodepda.h> +#include <asm/sn/addrs.h> +#include <asm/sn/arch.h> +#include <asm/sn/sn_cpuid.h> +#include <asm/sn/pda.h> +#include <asm/sn/shubio.h> +#include <asm/nodedata.h> +#include <asm/delay.h> + +#include <linux/bootmem.h> +#include <linux/string.h> +#include <linux/sched.h> +#include <linux/slab.h> + +#include <asm/sn/bte.h> + +#ifndef L1_CACHE_MASK +#define L1_CACHE_MASK (L1_CACHE_BYTES - 1) +#endif + +/* two interfaces on two btes */ +#define MAX_INTERFACES_TO_TRY 4 +#define MAX_NODES_TO_TRY 2 + +static struct bteinfo_s *bte_if_on_node(nasid_t nasid, int interface) +{ + nodepda_t *tmp_nodepda; + + if (nasid_to_cnodeid(nasid) == -1) + return (struct bteinfo_s *)NULL; + + tmp_nodepda = NODEPDA(nasid_to_cnodeid(nasid)); + return &tmp_nodepda->bte_if[interface]; + +} + +static inline void bte_start_transfer(struct bteinfo_s *bte, u64 len, u64 mode) +{ + if (is_shub2()) { + BTE_CTRL_STORE(bte, (IBLS_BUSY | ((len) | (mode) << 24))); + } else { + BTE_LNSTAT_STORE(bte, len); + BTE_CTRL_STORE(bte, mode); + } +} + +/************************************************************************ + * Block Transfer Engine copy related functions. + * + ***********************************************************************/ + +/* + * bte_copy(src, dest, len, mode, notification) + * + * Use the block transfer engine to move kernel memory from src to dest + * using the assigned mode. + * + * Parameters: + * src - physical address of the transfer source. + * dest - physical address of the transfer destination. + * len - number of bytes to transfer from source to dest. + * mode - hardware defined. See reference information + * for IBCT0/1 in the SHUB Programmers Reference + * notification - kernel virtual address of the notification cache + * line. If NULL, the default is used and + * the bte_copy is synchronous. + * + * NOTE: This function requires src, dest, and len to + * be cacheline aligned. + */ +bte_result_t bte_copy(u64 src, u64 dest, u64 len, u64 mode, void *notification) +{ + u64 transfer_size; + u64 transfer_stat; + u64 notif_phys_addr; + struct bteinfo_s *bte; + bte_result_t bte_status; + unsigned long irq_flags; + unsigned long itc_end = 0; + int nasid_to_try[MAX_NODES_TO_TRY]; + int my_nasid = cpuid_to_nasid(raw_smp_processor_id()); + int bte_if_index, nasid_index; + int bte_first, btes_per_node = BTES_PER_NODE; + + BTE_PRINTK(("bte_copy(0x%lx, 0x%lx, 0x%lx, 0x%lx, 0x%p)\n", + src, dest, len, mode, notification)); + + if (len == 0) { + return BTE_SUCCESS; + } + + BUG_ON(len & L1_CACHE_MASK); + BUG_ON(src & L1_CACHE_MASK); + BUG_ON(dest & L1_CACHE_MASK); + BUG_ON(len > BTE_MAX_XFER); + + /* + * Start with interface corresponding to cpu number + */ + bte_first = raw_smp_processor_id() % btes_per_node; + + if (mode & BTE_USE_DEST) { + /* try remote then local */ + nasid_to_try[0] = NASID_GET(dest); + if (mode & BTE_USE_ANY) { + nasid_to_try[1] = my_nasid; + } else { + nasid_to_try[1] = (int)NULL; + } + } else { + /* try local then remote */ + nasid_to_try[0] = my_nasid; + if (mode & BTE_USE_ANY) { + nasid_to_try[1] = NASID_GET(dest); + } else { + nasid_to_try[1] = (int)NULL; + } + } + +retry_bteop: + do { + local_irq_save(irq_flags); + + bte_if_index = bte_first; + nasid_index = 0; + + /* Attempt to lock one of the BTE interfaces. */ + while (nasid_index < MAX_NODES_TO_TRY) { + bte = bte_if_on_node(nasid_to_try[nasid_index],bte_if_index); + + if (bte == NULL) { + nasid_index++; + continue; + } + + if (spin_trylock(&bte->spinlock)) { + if (!(*bte->most_rcnt_na & BTE_WORD_AVAILABLE) || + (BTE_LNSTAT_LOAD(bte) & BTE_ACTIVE)) { + /* Got the lock but BTE still busy */ + spin_unlock(&bte->spinlock); + } else { + /* we got the lock and it's not busy */ + break; + } + } + + bte_if_index = (bte_if_index + 1) % btes_per_node; /* Next interface */ + if (bte_if_index == bte_first) { + /* + * We've tried all interfaces on this node + */ + nasid_index++; + } + + bte = NULL; + } + + if (bte != NULL) { + break; + } + + local_irq_restore(irq_flags); + + if (!(mode & BTE_WACQUIRE)) { + return BTEFAIL_NOTAVAIL; + } + } while (1); + + if (notification == NULL) { + /* User does not want to be notified. */ + bte->most_rcnt_na = &bte->notify; + } else { + bte->most_rcnt_na = notification; + } + + /* Calculate the number of cache lines to transfer. */ + transfer_size = ((len >> L1_CACHE_SHIFT) & BTE_LEN_MASK); + + /* Initialize the notification to a known value. */ + *bte->most_rcnt_na = BTE_WORD_BUSY; + notif_phys_addr = (u64)bte->most_rcnt_na; + + /* Set the source and destination registers */ + BTE_PRINTKV(("IBSA = 0x%lx)\n", src)); + BTE_SRC_STORE(bte, src); + BTE_PRINTKV(("IBDA = 0x%lx)\n", dest)); + BTE_DEST_STORE(bte, dest); + + /* Set the notification register */ + BTE_PRINTKV(("IBNA = 0x%lx)\n", notif_phys_addr)); + BTE_NOTIF_STORE(bte, notif_phys_addr); + + /* Initiate the transfer */ + BTE_PRINTK(("IBCT = 0x%lx)\n", BTE_VALID_MODE(mode))); + bte_start_transfer(bte, transfer_size, BTE_VALID_MODE(mode)); + + itc_end = ia64_get_itc() + (40000000 * local_cpu_data->cyc_per_usec); + + spin_unlock_irqrestore(&bte->spinlock, irq_flags); + + if (notification != NULL) { + return BTE_SUCCESS; + } + + while ((transfer_stat = *bte->most_rcnt_na) == BTE_WORD_BUSY) { + cpu_relax(); + if (ia64_get_itc() > itc_end) { + BTE_PRINTK(("BTE timeout nasid 0x%x bte%d IBLS = 0x%lx na 0x%lx\n", + NASID_GET(bte->bte_base_addr), bte->bte_num, + BTE_LNSTAT_LOAD(bte), *bte->most_rcnt_na) ); + bte->bte_error_count++; + bte->bh_error = IBLS_ERROR; + bte_error_handler((unsigned long)NODEPDA(bte->bte_cnode)); + *bte->most_rcnt_na = BTE_WORD_AVAILABLE; + goto retry_bteop; + } + } + + BTE_PRINTKV((" Delay Done. IBLS = 0x%lx, most_rcnt_na = 0x%lx\n", + BTE_LNSTAT_LOAD(bte), *bte->most_rcnt_na)); + + if (transfer_stat & IBLS_ERROR) { + bte_status = BTE_GET_ERROR_STATUS(transfer_stat); + } else { + bte_status = BTE_SUCCESS; + } + *bte->most_rcnt_na = BTE_WORD_AVAILABLE; + + BTE_PRINTK(("Returning status is 0x%lx and most_rcnt_na is 0x%lx\n", + BTE_LNSTAT_LOAD(bte), *bte->most_rcnt_na)); + + return bte_status; +} + +EXPORT_SYMBOL(bte_copy); + +/* + * bte_unaligned_copy(src, dest, len, mode) + * + * use the block transfer engine to move kernel + * memory from src to dest using the assigned mode. + * + * Parameters: + * src - physical address of the transfer source. + * dest - physical address of the transfer destination. + * len - number of bytes to transfer from source to dest. + * mode - hardware defined. See reference information + * for IBCT0/1 in the SGI documentation. + * + * NOTE: If the source, dest, and len are all cache line aligned, + * then it would be _FAR_ preferable to use bte_copy instead. + */ +bte_result_t bte_unaligned_copy(u64 src, u64 dest, u64 len, u64 mode) +{ + int destFirstCacheOffset; + u64 headBteSource; + u64 headBteLen; + u64 headBcopySrcOffset; + u64 headBcopyDest; + u64 headBcopyLen; + u64 footBteSource; + u64 footBteLen; + u64 footBcopyDest; + u64 footBcopyLen; + bte_result_t rv; + char *bteBlock, *bteBlock_unaligned; + + if (len == 0) { + return BTE_SUCCESS; + } + + /* temporary buffer used during unaligned transfers */ + bteBlock_unaligned = kmalloc(len + 3 * L1_CACHE_BYTES, GFP_KERNEL); + if (bteBlock_unaligned == NULL) { + return BTEFAIL_NOTAVAIL; + } + bteBlock = (char *)L1_CACHE_ALIGN((u64) bteBlock_unaligned); + + headBcopySrcOffset = src & L1_CACHE_MASK; + destFirstCacheOffset = dest & L1_CACHE_MASK; + + /* + * At this point, the transfer is broken into + * (up to) three sections. The first section is + * from the start address to the first physical + * cache line, the second is from the first physical + * cache line to the last complete cache line, + * and the third is from the last cache line to the + * end of the buffer. The first and third sections + * are handled by bte copying into a temporary buffer + * and then bcopy'ing the necessary section into the + * final location. The middle section is handled with + * a standard bte copy. + * + * One nasty exception to the above rule is when the + * source and destination are not symmetrically + * mis-aligned. If the source offset from the first + * cache line is different from the destination offset, + * we make the first section be the entire transfer + * and the bcopy the entire block into place. + */ + if (headBcopySrcOffset == destFirstCacheOffset) { + + /* + * Both the source and destination are the same + * distance from a cache line boundary so we can + * use the bte to transfer the bulk of the + * data. + */ + headBteSource = src & ~L1_CACHE_MASK; + headBcopyDest = dest; + if (headBcopySrcOffset) { + headBcopyLen = + (len > + (L1_CACHE_BYTES - + headBcopySrcOffset) ? L1_CACHE_BYTES + - headBcopySrcOffset : len); + headBteLen = L1_CACHE_BYTES; + } else { + headBcopyLen = 0; + headBteLen = 0; + } + + if (len > headBcopyLen) { + footBcopyLen = (len - headBcopyLen) & L1_CACHE_MASK; + footBteLen = L1_CACHE_BYTES; + + footBteSource = src + len - footBcopyLen; + footBcopyDest = dest + len - footBcopyLen; + + if (footBcopyDest == (headBcopyDest + headBcopyLen)) { + /* + * We have two contiguous bcopy + * blocks. Merge them. + */ + headBcopyLen += footBcopyLen; + headBteLen += footBteLen; + } else if (footBcopyLen > 0) { + rv = bte_copy(footBteSource, + ia64_tpa((unsigned long)bteBlock), + footBteLen, mode, NULL); + if (rv != BTE_SUCCESS) { + kfree(bteBlock_unaligned); + return rv; + } + + memcpy(__va(footBcopyDest), + (char *)bteBlock, footBcopyLen); + } + } else { + footBcopyLen = 0; + footBteLen = 0; + } + + if (len > (headBcopyLen + footBcopyLen)) { + /* now transfer the middle. */ + rv = bte_copy((src + headBcopyLen), + (dest + + headBcopyLen), + (len - headBcopyLen - + footBcopyLen), mode, NULL); + if (rv != BTE_SUCCESS) { + kfree(bteBlock_unaligned); + return rv; + } + + } + } else { + + /* + * The transfer is not symmetric, we will + * allocate a buffer large enough for all the + * data, bte_copy into that buffer and then + * bcopy to the destination. + */ + + headBcopySrcOffset = src & L1_CACHE_MASK; + headBcopyDest = dest; + headBcopyLen = len; + + headBteSource = src - headBcopySrcOffset; + /* Add the leading and trailing bytes from source */ + headBteLen = L1_CACHE_ALIGN(len + headBcopySrcOffset); + } + + if (headBcopyLen > 0) { + rv = bte_copy(headBteSource, + ia64_tpa((unsigned long)bteBlock), headBteLen, + mode, NULL); + if (rv != BTE_SUCCESS) { + kfree(bteBlock_unaligned); + return rv; + } + + memcpy(__va(headBcopyDest), ((char *)bteBlock + + headBcopySrcOffset), headBcopyLen); + } + kfree(bteBlock_unaligned); + return BTE_SUCCESS; +} + +EXPORT_SYMBOL(bte_unaligned_copy); + +/************************************************************************ + * Block Transfer Engine initialization functions. + * + ***********************************************************************/ + +/* + * bte_init_node(nodepda, cnode) + * + * Initialize the nodepda structure with BTE base addresses and + * spinlocks. + */ +void bte_init_node(nodepda_t * mynodepda, cnodeid_t cnode) +{ + int i; + + /* + * Indicate that all the block transfer engines on this node + * are available. + */ + + /* + * Allocate one bte_recover_t structure per node. It holds + * the recovery lock for node. All the bte interface structures + * will point at this one bte_recover structure to get the lock. + */ + spin_lock_init(&mynodepda->bte_recovery_lock); + init_timer(&mynodepda->bte_recovery_timer); + mynodepda->bte_recovery_timer.function = bte_error_handler; + mynodepda->bte_recovery_timer.data = (unsigned long)mynodepda; + + for (i = 0; i < BTES_PER_NODE; i++) { + u64 *base_addr; + + /* Which link status register should we use? */ + base_addr = (u64 *) + REMOTE_HUB_ADDR(cnodeid_to_nasid(cnode), BTE_BASE_ADDR(i)); + mynodepda->bte_if[i].bte_base_addr = base_addr; + mynodepda->bte_if[i].bte_source_addr = BTE_SOURCE_ADDR(base_addr); + mynodepda->bte_if[i].bte_destination_addr = BTE_DEST_ADDR(base_addr); + mynodepda->bte_if[i].bte_control_addr = BTE_CTRL_ADDR(base_addr); + mynodepda->bte_if[i].bte_notify_addr = BTE_NOTIF_ADDR(base_addr); + + /* + * Initialize the notification and spinlock + * so the first transfer can occur. + */ + mynodepda->bte_if[i].most_rcnt_na = + &(mynodepda->bte_if[i].notify); + mynodepda->bte_if[i].notify = BTE_WORD_AVAILABLE; + spin_lock_init(&mynodepda->bte_if[i].spinlock); + + mynodepda->bte_if[i].bte_cnode = cnode; + mynodepda->bte_if[i].bte_error_count = 0; + mynodepda->bte_if[i].bte_num = i; + mynodepda->bte_if[i].cleanup_active = 0; + mynodepda->bte_if[i].bh_error = 0; + } + +} diff --git a/arch/ia64/sn/kernel/bte_error.c b/arch/ia64/sn/kernel/bte_error.c new file mode 100644 index 00000000..4cb09f3f --- /dev/null +++ b/arch/ia64/sn/kernel/bte_error.c @@ -0,0 +1,260 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2000-2007 Silicon Graphics, Inc. All Rights Reserved. + */ + +#include <linux/types.h> +#include <asm/sn/sn_sal.h> +#include "ioerror.h" +#include <asm/sn/addrs.h> +#include <asm/sn/shubio.h> +#include <asm/sn/geo.h> +#include "xtalk/xwidgetdev.h" +#include "xtalk/hubdev.h" +#include <asm/sn/bte.h> +#include <asm/param.h> + +/* + * Bte error handling is done in two parts. The first captures + * any crb related errors. Since there can be multiple crbs per + * interface and multiple interfaces active, we need to wait until + * all active crbs are completed. This is the first job of the + * second part error handler. When all bte related CRBs are cleanly + * completed, it resets the interfaces and gets them ready for new + * transfers to be queued. + */ + +void bte_error_handler(unsigned long); + +/* + * Wait until all BTE related CRBs are completed + * and then reset the interfaces. + */ +int shub1_bte_error_handler(unsigned long _nodepda) +{ + struct nodepda_s *err_nodepda = (struct nodepda_s *)_nodepda; + struct timer_list *recovery_timer = &err_nodepda->bte_recovery_timer; + nasid_t nasid; + int i; + int valid_crbs; + ii_imem_u_t imem; /* II IMEM Register */ + ii_icrb0_d_u_t icrbd; /* II CRB Register D */ + ii_ibcr_u_t ibcr; + ii_icmr_u_t icmr; + ii_ieclr_u_t ieclr; + + BTE_PRINTK(("shub1_bte_error_handler(%p) - %d\n", err_nodepda, + smp_processor_id())); + + if ((err_nodepda->bte_if[0].bh_error == BTE_SUCCESS) && + (err_nodepda->bte_if[1].bh_error == BTE_SUCCESS)) { + BTE_PRINTK(("eh:%p:%d Nothing to do.\n", err_nodepda, + smp_processor_id())); + return 1; + } + + /* Determine information about our hub */ + nasid = cnodeid_to_nasid(err_nodepda->bte_if[0].bte_cnode); + + /* + * A BTE transfer can use multiple CRBs. We need to make sure + * that all the BTE CRBs are complete (or timed out) before + * attempting to clean up the error. Resetting the BTE while + * there are still BTE CRBs active will hang the BTE. + * We should look at all the CRBs to see if they are allocated + * to the BTE and see if they are still active. When none + * are active, we can continue with the cleanup. + * + * We also want to make sure that the local NI port is up. + * When a router resets the NI port can go down, while it + * goes through the LLP handshake, but then comes back up. + */ + icmr.ii_icmr_regval = REMOTE_HUB_L(nasid, IIO_ICMR); + if (icmr.ii_icmr_fld_s.i_crb_mark != 0) { + /* + * There are errors which still need to be cleaned up by + * hubiio_crb_error_handler + */ + mod_timer(recovery_timer, jiffies + (HZ * 5)); + BTE_PRINTK(("eh:%p:%d Marked Giving up\n", err_nodepda, + smp_processor_id())); + return 1; + } + if (icmr.ii_icmr_fld_s.i_crb_vld != 0) { + + valid_crbs = icmr.ii_icmr_fld_s.i_crb_vld; + + for (i = 0; i < IIO_NUM_CRBS; i++) { + if (!((1 << i) & valid_crbs)) { + /* This crb was not marked as valid, ignore */ + continue; + } + icrbd.ii_icrb0_d_regval = + REMOTE_HUB_L(nasid, IIO_ICRB_D(i)); + if (icrbd.d_bteop) { + mod_timer(recovery_timer, jiffies + (HZ * 5)); + BTE_PRINTK(("eh:%p:%d Valid %d, Giving up\n", + err_nodepda, smp_processor_id(), + i)); + return 1; + } + } + } + + BTE_PRINTK(("eh:%p:%d Cleaning up\n", err_nodepda, smp_processor_id())); + /* Re-enable both bte interfaces */ + imem.ii_imem_regval = REMOTE_HUB_L(nasid, IIO_IMEM); + imem.ii_imem_fld_s.i_b0_esd = imem.ii_imem_fld_s.i_b1_esd = 1; + REMOTE_HUB_S(nasid, IIO_IMEM, imem.ii_imem_regval); + + /* Clear BTE0/1 error bits */ + ieclr.ii_ieclr_regval = 0; + if (err_nodepda->bte_if[0].bh_error != BTE_SUCCESS) + ieclr.ii_ieclr_fld_s.i_e_bte_0 = 1; + if (err_nodepda->bte_if[1].bh_error != BTE_SUCCESS) + ieclr.ii_ieclr_fld_s.i_e_bte_1 = 1; + REMOTE_HUB_S(nasid, IIO_IECLR, ieclr.ii_ieclr_regval); + + /* Reinitialize both BTE state machines. */ + ibcr.ii_ibcr_regval = REMOTE_HUB_L(nasid, IIO_IBCR); + ibcr.ii_ibcr_fld_s.i_soft_reset = 1; + REMOTE_HUB_S(nasid, IIO_IBCR, ibcr.ii_ibcr_regval); + + del_timer(recovery_timer); + return 0; +} + +/* + * Wait until all BTE related CRBs are completed + * and then reset the interfaces. + */ +int shub2_bte_error_handler(unsigned long _nodepda) +{ + struct nodepda_s *err_nodepda = (struct nodepda_s *)_nodepda; + struct timer_list *recovery_timer = &err_nodepda->bte_recovery_timer; + struct bteinfo_s *bte; + nasid_t nasid; + u64 status; + int i; + + nasid = cnodeid_to_nasid(err_nodepda->bte_if[0].bte_cnode); + + /* + * Verify that all the BTEs are complete + */ + for (i = 0; i < BTES_PER_NODE; i++) { + bte = &err_nodepda->bte_if[i]; + status = BTE_LNSTAT_LOAD(bte); + if (status & IBLS_ERROR) { + bte->bh_error = BTE_SHUB2_ERROR(status); + continue; + } + if (!(status & IBLS_BUSY)) + continue; + mod_timer(recovery_timer, jiffies + (HZ * 5)); + BTE_PRINTK(("eh:%p:%d Marked Giving up\n", err_nodepda, + smp_processor_id())); + return 1; + } + if (ia64_sn_bte_recovery(nasid)) + panic("bte_error_handler(): Fatal BTE Error"); + + del_timer(recovery_timer); + return 0; +} + +/* + * Wait until all BTE related CRBs are completed + * and then reset the interfaces. + */ +void bte_error_handler(unsigned long _nodepda) +{ + struct nodepda_s *err_nodepda = (struct nodepda_s *)_nodepda; + spinlock_t *recovery_lock = &err_nodepda->bte_recovery_lock; + int i; + unsigned long irq_flags; + volatile u64 *notify; + bte_result_t bh_error; + + BTE_PRINTK(("bte_error_handler(%p) - %d\n", err_nodepda, + smp_processor_id())); + + spin_lock_irqsave(recovery_lock, irq_flags); + + /* + * Lock all interfaces on this node to prevent new transfers + * from being queued. + */ + for (i = 0; i < BTES_PER_NODE; i++) { + if (err_nodepda->bte_if[i].cleanup_active) { + continue; + } + spin_lock(&err_nodepda->bte_if[i].spinlock); + BTE_PRINTK(("eh:%p:%d locked %d\n", err_nodepda, + smp_processor_id(), i)); + err_nodepda->bte_if[i].cleanup_active = 1; + } + + if (is_shub1()) { + if (shub1_bte_error_handler(_nodepda)) { + spin_unlock_irqrestore(recovery_lock, irq_flags); + return; + } + } else { + if (shub2_bte_error_handler(_nodepda)) { + spin_unlock_irqrestore(recovery_lock, irq_flags); + return; + } + } + + for (i = 0; i < BTES_PER_NODE; i++) { + bh_error = err_nodepda->bte_if[i].bh_error; + if (bh_error != BTE_SUCCESS) { + /* There is an error which needs to be notified */ + notify = err_nodepda->bte_if[i].most_rcnt_na; + BTE_PRINTK(("cnode %d bte %d error=0x%lx\n", + err_nodepda->bte_if[i].bte_cnode, + err_nodepda->bte_if[i].bte_num, + IBLS_ERROR | (u64) bh_error)); + *notify = IBLS_ERROR | bh_error; + err_nodepda->bte_if[i].bh_error = BTE_SUCCESS; + } + + err_nodepda->bte_if[i].cleanup_active = 0; + BTE_PRINTK(("eh:%p:%d Unlocked %d\n", err_nodepda, + smp_processor_id(), i)); + spin_unlock(&err_nodepda->bte_if[i].spinlock); + } + + spin_unlock_irqrestore(recovery_lock, irq_flags); +} + +/* + * First part error handler. This is called whenever any error CRB interrupt + * is generated by the II. + */ +void +bte_crb_error_handler(cnodeid_t cnode, int btenum, + int crbnum, ioerror_t * ioe, int bteop) +{ + struct bteinfo_s *bte; + + + bte = &(NODEPDA(cnode)->bte_if[btenum]); + + /* + * The caller has already figured out the error type, we save that + * in the bte handle structure for the thread exercising the + * interface to consume. + */ + bte->bh_error = ioe->ie_errortype + BTEFAIL_OFFSET; + bte->bte_error_count++; + + BTE_PRINTK(("Got an error on cnode %d bte %d: HW error type 0x%x\n", + bte->bte_cnode, bte->bte_num, ioe->ie_errortype)); + bte_error_handler((unsigned long) NODEPDA(cnode)); +} + diff --git a/arch/ia64/sn/kernel/huberror.c b/arch/ia64/sn/kernel/huberror.c new file mode 100644 index 00000000..f925dec2 --- /dev/null +++ b/arch/ia64/sn/kernel/huberror.c @@ -0,0 +1,220 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1992 - 1997, 2000,2002-2007 Silicon Graphics, Inc. All rights reserved. + */ + +#include <linux/types.h> +#include <linux/interrupt.h> +#include <asm/delay.h> +#include <asm/sn/sn_sal.h> +#include "ioerror.h" +#include <asm/sn/addrs.h> +#include <asm/sn/shubio.h> +#include <asm/sn/geo.h> +#include "xtalk/xwidgetdev.h" +#include "xtalk/hubdev.h" +#include <asm/sn/bte.h> + +void hubiio_crb_error_handler(struct hubdev_info *hubdev_info); +extern void bte_crb_error_handler(cnodeid_t, int, int, ioerror_t *, + int); +static irqreturn_t hub_eint_handler(int irq, void *arg) +{ + struct hubdev_info *hubdev_info; + struct ia64_sal_retval ret_stuff; + nasid_t nasid; + + ret_stuff.status = 0; + ret_stuff.v0 = 0; + hubdev_info = (struct hubdev_info *)arg; + nasid = hubdev_info->hdi_nasid; + + if (is_shub1()) { + SAL_CALL_NOLOCK(ret_stuff, SN_SAL_HUB_ERROR_INTERRUPT, + (u64) nasid, 0, 0, 0, 0, 0, 0); + + if ((int)ret_stuff.v0) + panic("%s: Fatal %s Error", __func__, + ((nasid & 1) ? "TIO" : "HUBII")); + + if (!(nasid & 1)) /* Not a TIO, handle CRB errors */ + (void)hubiio_crb_error_handler(hubdev_info); + } else + if (nasid & 1) { /* TIO errors */ + SAL_CALL_NOLOCK(ret_stuff, SN_SAL_HUB_ERROR_INTERRUPT, + (u64) nasid, 0, 0, 0, 0, 0, 0); + + if ((int)ret_stuff.v0) + panic("%s: Fatal TIO Error", __func__); + } else + bte_error_handler((unsigned long)NODEPDA(nasid_to_cnodeid(nasid))); + + return IRQ_HANDLED; +} + +/* + * Free the hub CRB "crbnum" which encountered an error. + * Assumption is, error handling was successfully done, + * and we now want to return the CRB back to Hub for normal usage. + * + * In order to free the CRB, all that's needed is to de-allocate it + * + * Assumption: + * No other processor is mucking around with the hub control register. + * So, upper layer has to single thread this. + */ +void hubiio_crb_free(struct hubdev_info *hubdev_info, int crbnum) +{ + ii_icrb0_b_u_t icrbb; + + /* + * The hardware does NOT clear the mark bit, so it must get cleared + * here to be sure the error is not processed twice. + */ + icrbb.ii_icrb0_b_regval = REMOTE_HUB_L(hubdev_info->hdi_nasid, + IIO_ICRB_B(crbnum)); + icrbb.b_mark = 0; + REMOTE_HUB_S(hubdev_info->hdi_nasid, IIO_ICRB_B(crbnum), + icrbb.ii_icrb0_b_regval); + /* + * Deallocate the register wait till hub indicates it's done. + */ + REMOTE_HUB_S(hubdev_info->hdi_nasid, IIO_ICDR, (IIO_ICDR_PND | crbnum)); + while (REMOTE_HUB_L(hubdev_info->hdi_nasid, IIO_ICDR) & IIO_ICDR_PND) + cpu_relax(); + +} + +/* + * hubiio_crb_error_handler + * + * This routine gets invoked when a hub gets an error + * interrupt. So, the routine is running in interrupt context + * at error interrupt level. + * Action: + * It's responsible for identifying ALL the CRBs that are marked + * with error, and process them. + * + * If you find the CRB that's marked with error, map this to the + * reason it caused error, and invoke appropriate error handler. + * + * XXX Be aware of the information in the context register. + * + * NOTE: + * Use REMOTE_HUB_* macro instead of LOCAL_HUB_* so that the interrupt + * handler can be run on any node. (not necessarily the node + * corresponding to the hub that encountered error). + */ + +void hubiio_crb_error_handler(struct hubdev_info *hubdev_info) +{ + nasid_t nasid; + ii_icrb0_a_u_t icrba; /* II CRB Register A */ + ii_icrb0_b_u_t icrbb; /* II CRB Register B */ + ii_icrb0_c_u_t icrbc; /* II CRB Register C */ + ii_icrb0_d_u_t icrbd; /* II CRB Register D */ + ii_icrb0_e_u_t icrbe; /* II CRB Register D */ + int i; + int num_errors = 0; /* Num of errors handled */ + ioerror_t ioerror; + + nasid = hubdev_info->hdi_nasid; + + /* + * XXX - Add locking for any recovery actions + */ + /* + * Scan through all CRBs in the Hub, and handle the errors + * in any of the CRBs marked. + */ + for (i = 0; i < IIO_NUM_CRBS; i++) { + /* Check this crb entry to see if it is in error. */ + icrbb.ii_icrb0_b_regval = REMOTE_HUB_L(nasid, IIO_ICRB_B(i)); + + if (icrbb.b_mark == 0) { + continue; + } + + icrba.ii_icrb0_a_regval = REMOTE_HUB_L(nasid, IIO_ICRB_A(i)); + + IOERROR_INIT(&ioerror); + + /* read other CRB error registers. */ + icrbc.ii_icrb0_c_regval = REMOTE_HUB_L(nasid, IIO_ICRB_C(i)); + icrbd.ii_icrb0_d_regval = REMOTE_HUB_L(nasid, IIO_ICRB_D(i)); + icrbe.ii_icrb0_e_regval = REMOTE_HUB_L(nasid, IIO_ICRB_E(i)); + + IOERROR_SETVALUE(&ioerror, errortype, icrbb.b_ecode); + + /* Check if this error is due to BTE operation, + * and handle it separately. + */ + if (icrbd.d_bteop || + ((icrbb.b_initiator == IIO_ICRB_INIT_BTE0 || + icrbb.b_initiator == IIO_ICRB_INIT_BTE1) && + (icrbb.b_imsgtype == IIO_ICRB_IMSGT_BTE || + icrbb.b_imsgtype == IIO_ICRB_IMSGT_SN1NET))) { + + int bte_num; + + if (icrbd.d_bteop) + bte_num = icrbc.c_btenum; + else /* b_initiator bit 2 gives BTE number */ + bte_num = (icrbb.b_initiator & 0x4) >> 2; + + hubiio_crb_free(hubdev_info, i); + + bte_crb_error_handler(nasid_to_cnodeid(nasid), bte_num, + i, &ioerror, icrbd.d_bteop); + num_errors++; + continue; + } + } +} + +/* + * Function : hub_error_init + * Purpose : initialize the error handling requirements for a given hub. + * Parameters : cnode, the compact nodeid. + * Assumptions : Called only once per hub, either by a local cpu. Or by a + * remote cpu, when this hub is headless.(cpuless) + * Returns : None + */ +void hub_error_init(struct hubdev_info *hubdev_info) +{ + + if (request_irq(SGI_II_ERROR, hub_eint_handler, IRQF_SHARED, + "SN_hub_error", hubdev_info)) { + printk(KERN_ERR "hub_error_init: Failed to request_irq for 0x%p\n", + hubdev_info); + return; + } + irq_set_handler(SGI_II_ERROR, handle_level_irq); + sn_set_err_irq_affinity(SGI_II_ERROR); +} + + +/* + * Function : ice_error_init + * Purpose : initialize the error handling requirements for a given tio. + * Parameters : cnode, the compact nodeid. + * Assumptions : Called only once per tio. + * Returns : None + */ +void ice_error_init(struct hubdev_info *hubdev_info) +{ + + if (request_irq + (SGI_TIO_ERROR, (void *)hub_eint_handler, IRQF_SHARED, "SN_TIO_error", + (void *)hubdev_info)) { + printk("ice_error_init: request_irq() error hubdev_info 0x%p\n", + hubdev_info); + return; + } + irq_set_handler(SGI_TIO_ERROR, handle_level_irq); + sn_set_err_irq_affinity(SGI_TIO_ERROR); +} + diff --git a/arch/ia64/sn/kernel/idle.c b/arch/ia64/sn/kernel/idle.c new file mode 100644 index 00000000..49d178f0 --- /dev/null +++ b/arch/ia64/sn/kernel/idle.c @@ -0,0 +1,30 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2001-2004 Silicon Graphics, Inc. All rights reserved. + */ + +#include <asm/sn/leds.h> + +void snidle(int state) +{ + if (state) { + if (pda->idle_flag == 0) { + /* + * Turn the activity LED off. + */ + set_led_bits(0, LED_CPU_ACTIVITY); + } + + pda->idle_flag = 1; + } else { + /* + * Turn the activity LED on. + */ + set_led_bits(LED_CPU_ACTIVITY, LED_CPU_ACTIVITY); + + pda->idle_flag = 0; + } +} diff --git a/arch/ia64/sn/kernel/io_acpi_init.c b/arch/ia64/sn/kernel/io_acpi_init.c new file mode 100644 index 00000000..b1725398 --- /dev/null +++ b/arch/ia64/sn/kernel/io_acpi_init.c @@ -0,0 +1,510 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2006 Silicon Graphics, Inc. All rights reserved. + */ + +#include <asm/sn/types.h> +#include <asm/sn/addrs.h> +#include <asm/sn/pcidev.h> +#include <asm/sn/pcibus_provider_defs.h> +#include <asm/sn/sn_sal.h> +#include "xtalk/hubdev.h" +#include <linux/acpi.h> +#include <linux/slab.h> +#include <linux/export.h> + + +/* + * The code in this file will only be executed when running with + * a PROM that has ACPI IO support. (i.e., SN_ACPI_BASE_SUPPORT() == 1) + */ + + +/* + * This value must match the UUID the PROM uses + * (io/acpi/defblk.c) when building a vendor descriptor. + */ +struct acpi_vendor_uuid sn_uuid = { + .subtype = 0, + .data = { 0x2c, 0xc6, 0xa6, 0xfe, 0x9c, 0x44, 0xda, 0x11, + 0xa2, 0x7c, 0x08, 0x00, 0x69, 0x13, 0xea, 0x51 }, +}; + +struct sn_pcidev_match { + u8 bus; + unsigned int devfn; + acpi_handle handle; +}; + +/* + * Perform the early IO init in PROM. + */ +static long +sal_ioif_init(u64 *result) +{ + struct ia64_sal_retval isrv = {0,0,0,0}; + + SAL_CALL_NOLOCK(isrv, + SN_SAL_IOIF_INIT, 0, 0, 0, 0, 0, 0, 0); + *result = isrv.v0; + return isrv.status; +} + +/* + * sn_acpi_hubdev_init() - This function is called by acpi_ns_get_device_callback() + * for all SGIHUB and SGITIO acpi devices defined in the + * DSDT. It obtains the hubdev_info pointer from the + * ACPI vendor resource, which the PROM setup, and sets up the + * hubdev_info in the pda. + */ + +static acpi_status __init +sn_acpi_hubdev_init(acpi_handle handle, u32 depth, void *context, void **ret) +{ + struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; + struct acpi_buffer name_buffer = { ACPI_ALLOCATE_BUFFER, NULL }; + u64 addr; + struct hubdev_info *hubdev; + struct hubdev_info *hubdev_ptr; + int i; + u64 nasid; + struct acpi_resource *resource; + acpi_status status; + struct acpi_resource_vendor_typed *vendor; + extern void sn_common_hubdev_init(struct hubdev_info *); + + status = acpi_get_vendor_resource(handle, METHOD_NAME__CRS, + &sn_uuid, &buffer); + if (ACPI_FAILURE(status)) { + acpi_get_name(handle, ACPI_FULL_PATHNAME, &name_buffer); + printk(KERN_ERR + "sn_acpi_hubdev_init: acpi_get_vendor_resource() " + "(0x%x) failed for: %s\n", status, + (char *)name_buffer.pointer); + kfree(name_buffer.pointer); + return AE_OK; /* Continue walking namespace */ + } + + resource = buffer.pointer; + vendor = &resource->data.vendor_typed; + if ((vendor->byte_length - sizeof(struct acpi_vendor_uuid)) != + sizeof(struct hubdev_info *)) { + acpi_get_name(handle, ACPI_FULL_PATHNAME, &name_buffer); + printk(KERN_ERR + "sn_acpi_hubdev_init: Invalid vendor data length: " + "%d for: %s\n", + vendor->byte_length, (char *)name_buffer.pointer); + kfree(name_buffer.pointer); + goto exit; + } + + memcpy(&addr, vendor->byte_data, sizeof(struct hubdev_info *)); + hubdev_ptr = __va((struct hubdev_info *) addr); + + nasid = hubdev_ptr->hdi_nasid; + i = nasid_to_cnodeid(nasid); + hubdev = (struct hubdev_info *)(NODEPDA(i)->pdinfo); + *hubdev = *hubdev_ptr; + sn_common_hubdev_init(hubdev); + +exit: + kfree(buffer.pointer); + return AE_OK; /* Continue walking namespace */ +} + +/* + * sn_get_bussoft_ptr() - The pcibus_bussoft pointer is found in + * the ACPI Vendor resource for this bus. + */ +static struct pcibus_bussoft * +sn_get_bussoft_ptr(struct pci_bus *bus) +{ + u64 addr; + struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; + struct acpi_buffer name_buffer = { ACPI_ALLOCATE_BUFFER, NULL }; + acpi_handle handle; + struct pcibus_bussoft *prom_bussoft_ptr; + struct acpi_resource *resource; + acpi_status status; + struct acpi_resource_vendor_typed *vendor; + + + handle = PCI_CONTROLLER(bus)->acpi_handle; + status = acpi_get_vendor_resource(handle, METHOD_NAME__CRS, + &sn_uuid, &buffer); + if (ACPI_FAILURE(status)) { + acpi_get_name(handle, ACPI_FULL_PATHNAME, &name_buffer); + printk(KERN_ERR "%s: " + "acpi_get_vendor_resource() failed (0x%x) for: %s\n", + __func__, status, (char *)name_buffer.pointer); + kfree(name_buffer.pointer); + return NULL; + } + resource = buffer.pointer; + vendor = &resource->data.vendor_typed; + + if ((vendor->byte_length - sizeof(struct acpi_vendor_uuid)) != + sizeof(struct pcibus_bussoft *)) { + printk(KERN_ERR + "%s: Invalid vendor data length %d\n", + __func__, vendor->byte_length); + kfree(buffer.pointer); + return NULL; + } + memcpy(&addr, vendor->byte_data, sizeof(struct pcibus_bussoft *)); + prom_bussoft_ptr = __va((struct pcibus_bussoft *) addr); + kfree(buffer.pointer); + + return prom_bussoft_ptr; +} + +/* + * sn_extract_device_info - Extract the pcidev_info and the sn_irq_info + * pointers from the vendor resource using the + * provided acpi handle, and copy the structures + * into the argument buffers. + */ +static int +sn_extract_device_info(acpi_handle handle, struct pcidev_info **pcidev_info, + struct sn_irq_info **sn_irq_info) +{ + u64 addr; + struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; + struct acpi_buffer name_buffer = { ACPI_ALLOCATE_BUFFER, NULL }; + struct sn_irq_info *irq_info, *irq_info_prom; + struct pcidev_info *pcidev_ptr, *pcidev_prom_ptr; + struct acpi_resource *resource; + int ret = 0; + acpi_status status; + struct acpi_resource_vendor_typed *vendor; + + /* + * The pointer to this device's pcidev_info structure in + * the PROM, is in the vendor resource. + */ + status = acpi_get_vendor_resource(handle, METHOD_NAME__CRS, + &sn_uuid, &buffer); + if (ACPI_FAILURE(status)) { + acpi_get_name(handle, ACPI_FULL_PATHNAME, &name_buffer); + printk(KERN_ERR + "%s: acpi_get_vendor_resource() failed (0x%x) for: %s\n", + __func__, status, (char *)name_buffer.pointer); + kfree(name_buffer.pointer); + return 1; + } + + resource = buffer.pointer; + vendor = &resource->data.vendor_typed; + if ((vendor->byte_length - sizeof(struct acpi_vendor_uuid)) != + sizeof(struct pci_devdev_info *)) { + acpi_get_name(handle, ACPI_FULL_PATHNAME, &name_buffer); + printk(KERN_ERR + "%s: Invalid vendor data length: %d for: %s\n", + __func__, vendor->byte_length, + (char *)name_buffer.pointer); + kfree(name_buffer.pointer); + ret = 1; + goto exit; + } + + pcidev_ptr = kzalloc(sizeof(struct pcidev_info), GFP_KERNEL); + if (!pcidev_ptr) + panic("%s: Unable to alloc memory for pcidev_info", __func__); + + memcpy(&addr, vendor->byte_data, sizeof(struct pcidev_info *)); + pcidev_prom_ptr = __va(addr); + memcpy(pcidev_ptr, pcidev_prom_ptr, sizeof(struct pcidev_info)); + + /* Get the IRQ info */ + irq_info = kzalloc(sizeof(struct sn_irq_info), GFP_KERNEL); + if (!irq_info) + panic("%s: Unable to alloc memory for sn_irq_info", __func__); + + if (pcidev_ptr->pdi_sn_irq_info) { + irq_info_prom = __va(pcidev_ptr->pdi_sn_irq_info); + memcpy(irq_info, irq_info_prom, sizeof(struct sn_irq_info)); + } + + *pcidev_info = pcidev_ptr; + *sn_irq_info = irq_info; + +exit: + kfree(buffer.pointer); + return ret; +} + +static unsigned int +get_host_devfn(acpi_handle device_handle, acpi_handle rootbus_handle) +{ + unsigned long long adr; + acpi_handle child; + unsigned int devfn; + int function; + acpi_handle parent; + int slot; + acpi_status status; + struct acpi_buffer name_buffer = { ACPI_ALLOCATE_BUFFER, NULL }; + + acpi_get_name(device_handle, ACPI_FULL_PATHNAME, &name_buffer); + + /* + * Do an upward search to find the root bus device, and + * obtain the host devfn from the previous child device. + */ + child = device_handle; + while (child) { + status = acpi_get_parent(child, &parent); + if (ACPI_FAILURE(status)) { + printk(KERN_ERR "%s: acpi_get_parent() failed " + "(0x%x) for: %s\n", __func__, status, + (char *)name_buffer.pointer); + panic("%s: Unable to find host devfn\n", __func__); + } + if (parent == rootbus_handle) + break; + child = parent; + } + if (!child) { + printk(KERN_ERR "%s: Unable to find root bus for: %s\n", + __func__, (char *)name_buffer.pointer); + BUG(); + } + + status = acpi_evaluate_integer(child, METHOD_NAME__ADR, NULL, &adr); + if (ACPI_FAILURE(status)) { + printk(KERN_ERR "%s: Unable to get _ADR (0x%x) for: %s\n", + __func__, status, (char *)name_buffer.pointer); + panic("%s: Unable to find host devfn\n", __func__); + } + + kfree(name_buffer.pointer); + + slot = (adr >> 16) & 0xffff; + function = adr & 0xffff; + devfn = PCI_DEVFN(slot, function); + return devfn; +} + +/* + * find_matching_device - Callback routine to find the ACPI device + * that matches up with our pci_dev device. + * Matching is done on bus number and devfn. + * To find the bus number for a particular + * ACPI device, we must look at the _BBN method + * of its parent. + */ +static acpi_status +find_matching_device(acpi_handle handle, u32 lvl, void *context, void **rv) +{ + unsigned long long bbn = -1; + unsigned long long adr; + acpi_handle parent = NULL; + acpi_status status; + unsigned int devfn; + int function; + int slot; + struct sn_pcidev_match *info = context; + struct acpi_buffer name_buffer = { ACPI_ALLOCATE_BUFFER, NULL }; + + status = acpi_evaluate_integer(handle, METHOD_NAME__ADR, NULL, + &adr); + if (ACPI_SUCCESS(status)) { + status = acpi_get_parent(handle, &parent); + if (ACPI_FAILURE(status)) { + acpi_get_name(handle, ACPI_FULL_PATHNAME, &name_buffer); + printk(KERN_ERR + "%s: acpi_get_parent() failed (0x%x) for: %s\n", + __func__, status, (char *)name_buffer.pointer); + kfree(name_buffer.pointer); + return AE_OK; + } + status = acpi_evaluate_integer(parent, METHOD_NAME__BBN, + NULL, &bbn); + if (ACPI_FAILURE(status)) { + acpi_get_name(handle, ACPI_FULL_PATHNAME, &name_buffer); + printk(KERN_ERR + "%s: Failed to find _BBN in parent of: %s\n", + __func__, (char *)name_buffer.pointer); + kfree(name_buffer.pointer); + return AE_OK; + } + + slot = (adr >> 16) & 0xffff; + function = adr & 0xffff; + devfn = PCI_DEVFN(slot, function); + if ((info->devfn == devfn) && (info->bus == bbn)) { + /* We have a match! */ + info->handle = handle; + return 1; + } + } + return AE_OK; +} + +/* + * sn_acpi_get_pcidev_info - Search ACPI namespace for the acpi + * device matching the specified pci_dev, + * and return the pcidev info and irq info. + */ +int +sn_acpi_get_pcidev_info(struct pci_dev *dev, struct pcidev_info **pcidev_info, + struct sn_irq_info **sn_irq_info) +{ + unsigned int host_devfn; + struct sn_pcidev_match pcidev_match; + acpi_handle rootbus_handle; + unsigned long long segment; + acpi_status status; + struct acpi_buffer name_buffer = { ACPI_ALLOCATE_BUFFER, NULL }; + + rootbus_handle = PCI_CONTROLLER(dev)->acpi_handle; + status = acpi_evaluate_integer(rootbus_handle, METHOD_NAME__SEG, NULL, + &segment); + if (ACPI_SUCCESS(status)) { + if (segment != pci_domain_nr(dev)) { + acpi_get_name(rootbus_handle, ACPI_FULL_PATHNAME, + &name_buffer); + printk(KERN_ERR + "%s: Segment number mismatch, 0x%llx vs 0x%x for: %s\n", + __func__, segment, pci_domain_nr(dev), + (char *)name_buffer.pointer); + kfree(name_buffer.pointer); + return 1; + } + } else { + acpi_get_name(rootbus_handle, ACPI_FULL_PATHNAME, &name_buffer); + printk(KERN_ERR "%s: Unable to get __SEG from: %s\n", + __func__, (char *)name_buffer.pointer); + kfree(name_buffer.pointer); + return 1; + } + + /* + * We want to search all devices in this segment/domain + * of the ACPI namespace for the matching ACPI device, + * which holds the pcidev_info pointer in its vendor resource. + */ + pcidev_match.bus = dev->bus->number; + pcidev_match.devfn = dev->devfn; + pcidev_match.handle = NULL; + + acpi_walk_namespace(ACPI_TYPE_DEVICE, rootbus_handle, ACPI_UINT32_MAX, + find_matching_device, NULL, &pcidev_match, NULL); + + if (!pcidev_match.handle) { + printk(KERN_ERR + "%s: Could not find matching ACPI device for %s.\n", + __func__, pci_name(dev)); + return 1; + } + + if (sn_extract_device_info(pcidev_match.handle, pcidev_info, sn_irq_info)) + return 1; + + /* Build up the pcidev_info.pdi_slot_host_handle */ + host_devfn = get_host_devfn(pcidev_match.handle, rootbus_handle); + (*pcidev_info)->pdi_slot_host_handle = + ((unsigned long) pci_domain_nr(dev) << 40) | + /* bus == 0 */ + host_devfn; + return 0; +} + +/* + * sn_acpi_slot_fixup - Obtain the pcidev_info and sn_irq_info. + * Perform any SN specific slot fixup. + * At present there does not appear to be + * any generic way to handle a ROM image + * that has been shadowed by the PROM, so + * we pass a pointer to it within the + * pcidev_info structure. + */ + +void +sn_acpi_slot_fixup(struct pci_dev *dev) +{ + void __iomem *addr; + struct pcidev_info *pcidev_info = NULL; + struct sn_irq_info *sn_irq_info = NULL; + size_t image_size, size; + + if (sn_acpi_get_pcidev_info(dev, &pcidev_info, &sn_irq_info)) { + panic("%s: Failure obtaining pcidev_info for %s\n", + __func__, pci_name(dev)); + } + + if (pcidev_info->pdi_pio_mapped_addr[PCI_ROM_RESOURCE]) { + /* + * A valid ROM image exists and has been shadowed by the + * PROM. Setup the pci_dev ROM resource with the address + * of the shadowed copy, and the actual length of the ROM image. + */ + size = pci_resource_len(dev, PCI_ROM_RESOURCE); + addr = ioremap(pcidev_info->pdi_pio_mapped_addr[PCI_ROM_RESOURCE], + size); + image_size = pci_get_rom_size(dev, addr, size); + dev->resource[PCI_ROM_RESOURCE].start = (unsigned long) addr; + dev->resource[PCI_ROM_RESOURCE].end = + (unsigned long) addr + image_size - 1; + dev->resource[PCI_ROM_RESOURCE].flags |= IORESOURCE_ROM_BIOS_COPY; + } + sn_pci_fixup_slot(dev, pcidev_info, sn_irq_info); +} + +EXPORT_SYMBOL(sn_acpi_slot_fixup); + + +/* + * sn_acpi_bus_fixup - Perform SN specific setup of software structs + * (pcibus_bussoft, pcidev_info) and hardware + * registers, for the specified bus and devices under it. + */ +void +sn_acpi_bus_fixup(struct pci_bus *bus) +{ + struct pci_dev *pci_dev = NULL; + struct pcibus_bussoft *prom_bussoft_ptr; + + if (!bus->parent) { /* If root bus */ + prom_bussoft_ptr = sn_get_bussoft_ptr(bus); + if (prom_bussoft_ptr == NULL) { + printk(KERN_ERR + "%s: 0x%04x:0x%02x Unable to " + "obtain prom_bussoft_ptr\n", + __func__, pci_domain_nr(bus), bus->number); + return; + } + sn_common_bus_fixup(bus, prom_bussoft_ptr); + } + list_for_each_entry(pci_dev, &bus->devices, bus_list) { + sn_acpi_slot_fixup(pci_dev); + } +} + +/* + * sn_io_acpi_init - PROM has ACPI support for IO, defining at a minimum the + * nodes and root buses in the DSDT. As a result, bus scanning + * will be initiated by the Linux ACPI code. + */ + +void __init +sn_io_acpi_init(void) +{ + u64 result; + long status; + + /* SN Altix does not follow the IOSAPIC IRQ routing model */ + acpi_irq_model = ACPI_IRQ_MODEL_PLATFORM; + + /* Setup hubdev_info for all SGIHUB/SGITIO devices */ + acpi_get_devices("SGIHUB", sn_acpi_hubdev_init, NULL, NULL); + acpi_get_devices("SGITIO", sn_acpi_hubdev_init, NULL, NULL); + + status = sal_ioif_init(&result); + if (status || result) + panic("sal_ioif_init failed: [%lx] %s\n", + status, ia64_sal_strerror(status)); +} diff --git a/arch/ia64/sn/kernel/io_common.c b/arch/ia64/sn/kernel/io_common.c new file mode 100644 index 00000000..fbb5f2f8 --- /dev/null +++ b/arch/ia64/sn/kernel/io_common.c @@ -0,0 +1,567 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2006 Silicon Graphics, Inc. All rights reserved. + */ + +#include <linux/bootmem.h> +#include <linux/export.h> +#include <linux/slab.h> +#include <asm/sn/types.h> +#include <asm/sn/addrs.h> +#include <asm/sn/sn_feature_sets.h> +#include <asm/sn/geo.h> +#include <asm/sn/io.h> +#include <asm/sn/l1.h> +#include <asm/sn/module.h> +#include <asm/sn/pcibr_provider.h> +#include <asm/sn/pcibus_provider_defs.h> +#include <asm/sn/pcidev.h> +#include <asm/sn/simulator.h> +#include <asm/sn/sn_sal.h> +#include <asm/sn/tioca_provider.h> +#include <asm/sn/tioce_provider.h> +#include "xtalk/hubdev.h" +#include "xtalk/xwidgetdev.h" +#include <linux/acpi.h> +#include <asm/sn/sn2/sn_hwperf.h> +#include <asm/sn/acpi.h> + +extern void sn_init_cpei_timer(void); +extern void register_sn_procfs(void); +extern void sn_io_acpi_init(void); +extern void sn_io_init(void); + + +static struct list_head sn_sysdata_list; + +/* sysdata list struct */ +struct sysdata_el { + struct list_head entry; + void *sysdata; +}; + +int sn_ioif_inited; /* SN I/O infrastructure initialized? */ + +int sn_acpi_rev; /* SN ACPI revision */ +EXPORT_SYMBOL_GPL(sn_acpi_rev); + +struct sn_pcibus_provider *sn_pci_provider[PCIIO_ASIC_MAX_TYPES]; /* indexed by asic type */ + +/* + * Hooks and struct for unsupported pci providers + */ + +static dma_addr_t +sn_default_pci_map(struct pci_dev *pdev, unsigned long paddr, size_t size, int type) +{ + return 0; +} + +static void +sn_default_pci_unmap(struct pci_dev *pdev, dma_addr_t addr, int direction) +{ + return; +} + +static void * +sn_default_pci_bus_fixup(struct pcibus_bussoft *soft, struct pci_controller *controller) +{ + return NULL; +} + +static struct sn_pcibus_provider sn_pci_default_provider = { + .dma_map = sn_default_pci_map, + .dma_map_consistent = sn_default_pci_map, + .dma_unmap = sn_default_pci_unmap, + .bus_fixup = sn_default_pci_bus_fixup, +}; + +/* + * Retrieve the DMA Flush List given nasid, widget, and device. + * This list is needed to implement the WAR - Flush DMA data on PIO Reads. + */ +static inline u64 +sal_get_device_dmaflush_list(u64 nasid, u64 widget_num, u64 device_num, + u64 address) +{ + struct ia64_sal_retval ret_stuff; + ret_stuff.status = 0; + ret_stuff.v0 = 0; + + SAL_CALL_NOLOCK(ret_stuff, + (u64) SN_SAL_IOIF_GET_DEVICE_DMAFLUSH_LIST, + (u64) nasid, (u64) widget_num, + (u64) device_num, (u64) address, 0, 0, 0); + return ret_stuff.status; +} + +/* + * sn_pcidev_info_get() - Retrieve the pcidev_info struct for the specified + * device. + */ +inline struct pcidev_info * +sn_pcidev_info_get(struct pci_dev *dev) +{ + struct pcidev_info *pcidev; + + list_for_each_entry(pcidev, + &(SN_PLATFORM_DATA(dev)->pcidev_info), pdi_list) { + if (pcidev->pdi_linux_pcidev == dev) + return pcidev; + } + return NULL; +} + +/* Older PROM flush WAR + * + * 01/16/06 -- This war will be in place until a new official PROM is released. + * Additionally note that the struct sn_flush_device_war also has to be + * removed from arch/ia64/sn/include/xtalk/hubdev.h + */ + +static s64 sn_device_fixup_war(u64 nasid, u64 widget, int device, + struct sn_flush_device_common *common) +{ + struct sn_flush_device_war *war_list; + struct sn_flush_device_war *dev_entry; + struct ia64_sal_retval isrv = {0,0,0,0}; + + printk_once(KERN_WARNING + "PROM version < 4.50 -- implementing old PROM flush WAR\n"); + + war_list = kzalloc(DEV_PER_WIDGET * sizeof(*war_list), GFP_KERNEL); + BUG_ON(!war_list); + + SAL_CALL_NOLOCK(isrv, SN_SAL_IOIF_GET_WIDGET_DMAFLUSH_LIST, + nasid, widget, __pa(war_list), 0, 0, 0 ,0); + if (isrv.status) + panic("sn_device_fixup_war failed: %s\n", + ia64_sal_strerror(isrv.status)); + + dev_entry = war_list + device; + memcpy(common,dev_entry, sizeof(*common)); + kfree(war_list); + + return isrv.status; +} + +/* + * sn_common_hubdev_init() - This routine is called to initialize the HUB data + * structure for each node in the system. + */ +void __init +sn_common_hubdev_init(struct hubdev_info *hubdev) +{ + + struct sn_flush_device_kernel *sn_flush_device_kernel; + struct sn_flush_device_kernel *dev_entry; + s64 status; + int widget, device, size; + + /* Attach the error interrupt handlers */ + if (hubdev->hdi_nasid & 1) /* If TIO */ + ice_error_init(hubdev); + else + hub_error_init(hubdev); + + for (widget = 0; widget <= HUB_WIDGET_ID_MAX; widget++) + hubdev->hdi_xwidget_info[widget].xwi_hubinfo = hubdev; + + if (!hubdev->hdi_flush_nasid_list.widget_p) + return; + + size = (HUB_WIDGET_ID_MAX + 1) * + sizeof(struct sn_flush_device_kernel *); + hubdev->hdi_flush_nasid_list.widget_p = + kzalloc(size, GFP_KERNEL); + BUG_ON(!hubdev->hdi_flush_nasid_list.widget_p); + + for (widget = 0; widget <= HUB_WIDGET_ID_MAX; widget++) { + size = DEV_PER_WIDGET * + sizeof(struct sn_flush_device_kernel); + sn_flush_device_kernel = kzalloc(size, GFP_KERNEL); + BUG_ON(!sn_flush_device_kernel); + + dev_entry = sn_flush_device_kernel; + for (device = 0; device < DEV_PER_WIDGET; + device++, dev_entry++) { + size = sizeof(struct sn_flush_device_common); + dev_entry->common = kzalloc(size, GFP_KERNEL); + BUG_ON(!dev_entry->common); + if (sn_prom_feature_available(PRF_DEVICE_FLUSH_LIST)) + status = sal_get_device_dmaflush_list( + hubdev->hdi_nasid, widget, device, + (u64)(dev_entry->common)); + else + status = sn_device_fixup_war(hubdev->hdi_nasid, + widget, device, + dev_entry->common); + if (status != SALRET_OK) + panic("SAL call failed: %s\n", + ia64_sal_strerror(status)); + + spin_lock_init(&dev_entry->sfdl_flush_lock); + } + + if (sn_flush_device_kernel) + hubdev->hdi_flush_nasid_list.widget_p[widget] = + sn_flush_device_kernel; + } +} + +void sn_pci_unfixup_slot(struct pci_dev *dev) +{ + struct pci_dev *host_pci_dev = SN_PCIDEV_INFO(dev)->host_pci_dev; + + sn_irq_unfixup(dev); + pci_dev_put(host_pci_dev); + pci_dev_put(dev); +} + +/* + * sn_pci_fixup_slot() + */ +void sn_pci_fixup_slot(struct pci_dev *dev, struct pcidev_info *pcidev_info, + struct sn_irq_info *sn_irq_info) +{ + int segment = pci_domain_nr(dev->bus); + struct pcibus_bussoft *bs; + struct pci_bus *host_pci_bus; + struct pci_dev *host_pci_dev; + unsigned int bus_no, devfn; + + pci_dev_get(dev); /* for the sysdata pointer */ + + /* Add pcidev_info to list in pci_controller.platform_data */ + list_add_tail(&pcidev_info->pdi_list, + &(SN_PLATFORM_DATA(dev->bus)->pcidev_info)); + /* + * Using the PROMs values for the PCI host bus, get the Linux + * PCI host_pci_dev struct and set up host bus linkages + */ + + bus_no = (pcidev_info->pdi_slot_host_handle >> 32) & 0xff; + devfn = pcidev_info->pdi_slot_host_handle & 0xffffffff; + host_pci_bus = pci_find_bus(segment, bus_no); + host_pci_dev = pci_get_slot(host_pci_bus, devfn); + + pcidev_info->host_pci_dev = host_pci_dev; + pcidev_info->pdi_linux_pcidev = dev; + pcidev_info->pdi_host_pcidev_info = SN_PCIDEV_INFO(host_pci_dev); + bs = SN_PCIBUS_BUSSOFT(dev->bus); + pcidev_info->pdi_pcibus_info = bs; + + if (bs && bs->bs_asic_type < PCIIO_ASIC_MAX_TYPES) { + SN_PCIDEV_BUSPROVIDER(dev) = sn_pci_provider[bs->bs_asic_type]; + } else { + SN_PCIDEV_BUSPROVIDER(dev) = &sn_pci_default_provider; + } + + /* Only set up IRQ stuff if this device has a host bus context */ + if (bs && sn_irq_info->irq_irq) { + pcidev_info->pdi_sn_irq_info = sn_irq_info; + dev->irq = pcidev_info->pdi_sn_irq_info->irq_irq; + sn_irq_fixup(dev, sn_irq_info); + } else { + pcidev_info->pdi_sn_irq_info = NULL; + kfree(sn_irq_info); + } +} + +/* + * sn_common_bus_fixup - Perform platform specific bus fixup. + * Execute the ASIC specific fixup routine + * for this bus. + */ +void +sn_common_bus_fixup(struct pci_bus *bus, + struct pcibus_bussoft *prom_bussoft_ptr) +{ + int cnode; + struct pci_controller *controller; + struct hubdev_info *hubdev_info; + int nasid; + void *provider_soft; + struct sn_pcibus_provider *provider; + struct sn_platform_data *sn_platform_data; + + controller = PCI_CONTROLLER(bus); + /* + * Per-provider fixup. Copies the bus soft structure from prom + * to local area and links SN_PCIBUS_BUSSOFT(). + */ + + if (prom_bussoft_ptr->bs_asic_type >= PCIIO_ASIC_MAX_TYPES) { + printk(KERN_WARNING "sn_common_bus_fixup: Unsupported asic type, %d", + prom_bussoft_ptr->bs_asic_type); + return; + } + + if (prom_bussoft_ptr->bs_asic_type == PCIIO_ASIC_TYPE_PPB) + return; /* no further fixup necessary */ + + provider = sn_pci_provider[prom_bussoft_ptr->bs_asic_type]; + if (provider == NULL) + panic("sn_common_bus_fixup: No provider registered for this asic type, %d", + prom_bussoft_ptr->bs_asic_type); + + if (provider->bus_fixup) + provider_soft = (*provider->bus_fixup) (prom_bussoft_ptr, + controller); + else + provider_soft = NULL; + + /* + * Generic bus fixup goes here. Don't reference prom_bussoft_ptr + * after this point. + */ + controller->platform_data = kzalloc(sizeof(struct sn_platform_data), + GFP_KERNEL); + BUG_ON(controller->platform_data == NULL); + sn_platform_data = + (struct sn_platform_data *) controller->platform_data; + sn_platform_data->provider_soft = provider_soft; + INIT_LIST_HEAD(&((struct sn_platform_data *) + controller->platform_data)->pcidev_info); + nasid = NASID_GET(SN_PCIBUS_BUSSOFT(bus)->bs_base); + cnode = nasid_to_cnodeid(nasid); + hubdev_info = (struct hubdev_info *)(NODEPDA(cnode)->pdinfo); + SN_PCIBUS_BUSSOFT(bus)->bs_xwidget_info = + &(hubdev_info->hdi_xwidget_info[SN_PCIBUS_BUSSOFT(bus)->bs_xid]); + + /* + * If the node information we obtained during the fixup phase is + * invalid then set controller->node to -1 (undetermined) + */ + if (controller->node >= num_online_nodes()) { + struct pcibus_bussoft *b = SN_PCIBUS_BUSSOFT(bus); + + printk(KERN_WARNING "Device ASIC=%u XID=%u PBUSNUM=%u " + "L_IO=%llx L_MEM=%llx BASE=%llx\n", + b->bs_asic_type, b->bs_xid, b->bs_persist_busnum, + b->bs_legacy_io, b->bs_legacy_mem, b->bs_base); + printk(KERN_WARNING "on node %d but only %d nodes online." + "Association set to undetermined.\n", + controller->node, num_online_nodes()); + controller->node = -1; + } +} + +void sn_bus_store_sysdata(struct pci_dev *dev) +{ + struct sysdata_el *element; + + element = kzalloc(sizeof(struct sysdata_el), GFP_KERNEL); + if (!element) { + dev_dbg(&dev->dev, "%s: out of memory!\n", __func__); + return; + } + element->sysdata = SN_PCIDEV_INFO(dev); + list_add(&element->entry, &sn_sysdata_list); +} + +void sn_bus_free_sysdata(void) +{ + struct sysdata_el *element; + struct list_head *list, *safe; + + list_for_each_safe(list, safe, &sn_sysdata_list) { + element = list_entry(list, struct sysdata_el, entry); + list_del(&element->entry); + list_del(&(((struct pcidev_info *) + (element->sysdata))->pdi_list)); + kfree(element->sysdata); + kfree(element); + } + return; +} + +/* + * hubdev_init_node() - Creates the HUB data structure and link them to it's + * own NODE specific data area. + */ +void __init hubdev_init_node(nodepda_t * npda, cnodeid_t node) +{ + struct hubdev_info *hubdev_info; + int size; + pg_data_t *pg; + + size = sizeof(struct hubdev_info); + + if (node >= num_online_nodes()) /* Headless/memless IO nodes */ + pg = NODE_DATA(0); + else + pg = NODE_DATA(node); + + hubdev_info = (struct hubdev_info *)alloc_bootmem_node(pg, size); + + npda->pdinfo = (void *)hubdev_info; +} + +geoid_t +cnodeid_get_geoid(cnodeid_t cnode) +{ + struct hubdev_info *hubdev; + + hubdev = (struct hubdev_info *)(NODEPDA(cnode)->pdinfo); + return hubdev->hdi_geoid; +} + +void sn_generate_path(struct pci_bus *pci_bus, char *address) +{ + nasid_t nasid; + cnodeid_t cnode; + geoid_t geoid; + moduleid_t moduleid; + u16 bricktype; + + nasid = NASID_GET(SN_PCIBUS_BUSSOFT(pci_bus)->bs_base); + cnode = nasid_to_cnodeid(nasid); + geoid = cnodeid_get_geoid(cnode); + moduleid = geo_module(geoid); + + sprintf(address, "module_%c%c%c%c%.2d", + '0'+RACK_GET_CLASS(MODULE_GET_RACK(moduleid)), + '0'+RACK_GET_GROUP(MODULE_GET_RACK(moduleid)), + '0'+RACK_GET_NUM(MODULE_GET_RACK(moduleid)), + MODULE_GET_BTCHAR(moduleid), MODULE_GET_BPOS(moduleid)); + + /* Tollhouse requires slot id to be displayed */ + bricktype = MODULE_GET_BTYPE(moduleid); + if ((bricktype == L1_BRICKTYPE_191010) || + (bricktype == L1_BRICKTYPE_1932)) + sprintf(address + strlen(address), "^%d", + geo_slot(geoid)); +} + +void __devinit +sn_pci_fixup_bus(struct pci_bus *bus) +{ + + if (SN_ACPI_BASE_SUPPORT()) + sn_acpi_bus_fixup(bus); + else + sn_bus_fixup(bus); +} + +/* + * sn_io_early_init - Perform early IO (and some non-IO) initialization. + * In particular, setup the sn_pci_provider[] array. + * This needs to be done prior to any bus scanning + * (acpi_scan_init()) in the ACPI case, as the SN + * bus fixup code will reference the array. + */ +static int __init +sn_io_early_init(void) +{ + int i; + + if (!ia64_platform_is("sn2") || IS_RUNNING_ON_FAKE_PROM()) + return 0; + + /* we set the acpi revision to that of the DSDT table OEM rev. */ + { + struct acpi_table_header *header = NULL; + + acpi_get_table(ACPI_SIG_DSDT, 1, &header); + BUG_ON(header == NULL); + sn_acpi_rev = header->oem_revision; + } + + /* + * prime sn_pci_provider[]. Individual provider init routines will + * override their respective default entries. + */ + + for (i = 0; i < PCIIO_ASIC_MAX_TYPES; i++) + sn_pci_provider[i] = &sn_pci_default_provider; + + pcibr_init_provider(); + tioca_init_provider(); + tioce_init_provider(); + + /* + * This is needed to avoid bounce limit checks in the blk layer + */ + ia64_max_iommu_merge_mask = ~PAGE_MASK; + + sn_irq_lh_init(); + INIT_LIST_HEAD(&sn_sysdata_list); + sn_init_cpei_timer(); + +#ifdef CONFIG_PROC_FS + register_sn_procfs(); +#endif + + { + struct acpi_table_header *header; + (void)acpi_get_table(ACPI_SIG_DSDT, 1, &header); + printk(KERN_INFO "ACPI DSDT OEM Rev 0x%x\n", + header->oem_revision); + } + if (SN_ACPI_BASE_SUPPORT()) + sn_io_acpi_init(); + else + sn_io_init(); + return 0; +} + +arch_initcall(sn_io_early_init); + +/* + * sn_io_late_init() - Perform any final platform specific IO initialization. + */ + +int __init +sn_io_late_init(void) +{ + struct pci_bus *bus; + struct pcibus_bussoft *bussoft; + cnodeid_t cnode; + nasid_t nasid; + cnodeid_t near_cnode; + + if (!ia64_platform_is("sn2") || IS_RUNNING_ON_FAKE_PROM()) + return 0; + + /* + * Setup closest node in pci_controller->node for + * PIC, TIOCP, TIOCE (TIOCA does it during bus fixup using + * info from the PROM). + */ + bus = NULL; + while ((bus = pci_find_next_bus(bus)) != NULL) { + bussoft = SN_PCIBUS_BUSSOFT(bus); + nasid = NASID_GET(bussoft->bs_base); + cnode = nasid_to_cnodeid(nasid); + if ((bussoft->bs_asic_type == PCIIO_ASIC_TYPE_TIOCP) || + (bussoft->bs_asic_type == PCIIO_ASIC_TYPE_TIOCE) || + (bussoft->bs_asic_type == PCIIO_ASIC_TYPE_PIC)) { + /* PCI Bridge: find nearest node with CPUs */ + int e = sn_hwperf_get_nearest_node(cnode, NULL, + &near_cnode); + if (e < 0) { + near_cnode = (cnodeid_t)-1; /* use any node */ + printk(KERN_WARNING "sn_io_late_init: failed " + "to find near node with CPUs for " + "node %d, err=%d\n", cnode, e); + } + PCI_CONTROLLER(bus)->node = near_cnode; + } + } + + sn_ioif_inited = 1; /* SN I/O infrastructure now initialized */ + + return 0; +} + +fs_initcall(sn_io_late_init); + +EXPORT_SYMBOL(sn_pci_unfixup_slot); +EXPORT_SYMBOL(sn_bus_store_sysdata); +EXPORT_SYMBOL(sn_bus_free_sysdata); +EXPORT_SYMBOL(sn_generate_path); + diff --git a/arch/ia64/sn/kernel/io_init.c b/arch/ia64/sn/kernel/io_init.c new file mode 100644 index 00000000..238e2c51 --- /dev/null +++ b/arch/ia64/sn/kernel/io_init.c @@ -0,0 +1,383 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1992 - 1997, 2000-2006 Silicon Graphics, Inc. All rights reserved. + */ + +#include <linux/slab.h> +#include <linux/export.h> +#include <asm/sn/types.h> +#include <asm/sn/addrs.h> +#include <asm/sn/io.h> +#include <asm/sn/module.h> +#include <asm/sn/intr.h> +#include <asm/sn/pcibus_provider_defs.h> +#include <asm/sn/pcidev.h> +#include <asm/sn/sn_sal.h> +#include "xtalk/hubdev.h" + +/* + * The code in this file will only be executed when running with + * a PROM that does _not_ have base ACPI IO support. + * (i.e., SN_ACPI_BASE_SUPPORT() == 0) + */ + +static int max_segment_number; /* Default highest segment number */ +static int max_pcibus_number = 255; /* Default highest pci bus number */ + + +/* + * Retrieve the hub device info structure for the given nasid. + */ +static inline u64 sal_get_hubdev_info(u64 handle, u64 address) +{ + struct ia64_sal_retval ret_stuff; + ret_stuff.status = 0; + ret_stuff.v0 = 0; + + SAL_CALL_NOLOCK(ret_stuff, + (u64) SN_SAL_IOIF_GET_HUBDEV_INFO, + (u64) handle, (u64) address, 0, 0, 0, 0, 0); + return ret_stuff.v0; +} + +/* + * Retrieve the pci bus information given the bus number. + */ +static inline u64 sal_get_pcibus_info(u64 segment, u64 busnum, u64 address) +{ + struct ia64_sal_retval ret_stuff; + ret_stuff.status = 0; + ret_stuff.v0 = 0; + + SAL_CALL_NOLOCK(ret_stuff, + (u64) SN_SAL_IOIF_GET_PCIBUS_INFO, + (u64) segment, (u64) busnum, (u64) address, 0, 0, 0, 0); + return ret_stuff.v0; +} + +/* + * Retrieve the pci device information given the bus and device|function number. + */ +static inline u64 +sal_get_pcidev_info(u64 segment, u64 bus_number, u64 devfn, u64 pci_dev, + u64 sn_irq_info) +{ + struct ia64_sal_retval ret_stuff; + ret_stuff.status = 0; + ret_stuff.v0 = 0; + + SAL_CALL_NOLOCK(ret_stuff, + (u64) SN_SAL_IOIF_GET_PCIDEV_INFO, + (u64) segment, (u64) bus_number, (u64) devfn, + (u64) pci_dev, + sn_irq_info, 0, 0); + return ret_stuff.v0; +} + + +/* + * sn_fixup_ionodes() - This routine initializes the HUB data structure for + * each node in the system. This function is only + * executed when running with a non-ACPI capable PROM. + */ +static void __init sn_fixup_ionodes(void) +{ + + struct hubdev_info *hubdev; + u64 status; + u64 nasid; + int i; + extern void sn_common_hubdev_init(struct hubdev_info *); + + /* + * Get SGI Specific HUB chipset information. + * Inform Prom that this kernel can support domain bus numbering. + */ + for (i = 0; i < num_cnodes; i++) { + hubdev = (struct hubdev_info *)(NODEPDA(i)->pdinfo); + nasid = cnodeid_to_nasid(i); + hubdev->max_segment_number = 0xffffffff; + hubdev->max_pcibus_number = 0xff; + status = sal_get_hubdev_info(nasid, (u64) __pa(hubdev)); + if (status) + continue; + + /* Save the largest Domain and pcibus numbers found. */ + if (hubdev->max_segment_number) { + /* + * Dealing with a Prom that supports segments. + */ + max_segment_number = hubdev->max_segment_number; + max_pcibus_number = hubdev->max_pcibus_number; + } + sn_common_hubdev_init(hubdev); + } +} + +/* + * sn_pci_legacy_window_fixup - Create PCI controller windows for + * legacy IO and MEM space. This needs to + * be done here, as the PROM does not have + * ACPI support defining the root buses + * and their resources (_CRS), + */ +static void +sn_legacy_pci_window_fixup(struct pci_controller *controller, + u64 legacy_io, u64 legacy_mem) +{ + controller->window = kcalloc(2, sizeof(struct pci_window), + GFP_KERNEL); + BUG_ON(controller->window == NULL); + controller->window[0].offset = legacy_io; + controller->window[0].resource.name = "legacy_io"; + controller->window[0].resource.flags = IORESOURCE_IO; + controller->window[0].resource.start = legacy_io; + controller->window[0].resource.end = + controller->window[0].resource.start + 0xffff; + controller->window[0].resource.parent = &ioport_resource; + controller->window[1].offset = legacy_mem; + controller->window[1].resource.name = "legacy_mem"; + controller->window[1].resource.flags = IORESOURCE_MEM; + controller->window[1].resource.start = legacy_mem; + controller->window[1].resource.end = + controller->window[1].resource.start + (1024 * 1024) - 1; + controller->window[1].resource.parent = &iomem_resource; + controller->windows = 2; +} + +/* + * sn_pci_window_fixup() - Create a pci_window for each device resource. + * It will setup pci_windows for use by + * pcibios_bus_to_resource(), pcibios_resource_to_bus(), + * etc. + */ +static void +sn_pci_window_fixup(struct pci_dev *dev, unsigned int count, + s64 * pci_addrs) +{ + struct pci_controller *controller = PCI_CONTROLLER(dev->bus); + unsigned int i; + unsigned int idx; + unsigned int new_count; + struct pci_window *new_window; + + if (count == 0) + return; + idx = controller->windows; + new_count = controller->windows + count; + new_window = kcalloc(new_count, sizeof(struct pci_window), GFP_KERNEL); + BUG_ON(new_window == NULL); + if (controller->window) { + memcpy(new_window, controller->window, + sizeof(struct pci_window) * controller->windows); + kfree(controller->window); + } + + /* Setup a pci_window for each device resource. */ + for (i = 0; i <= PCI_ROM_RESOURCE; i++) { + if (pci_addrs[i] == -1) + continue; + + new_window[idx].offset = dev->resource[i].start - pci_addrs[i]; + new_window[idx].resource = dev->resource[i]; + idx++; + } + + controller->windows = new_count; + controller->window = new_window; +} + +/* + * sn_io_slot_fixup() - We are not running with an ACPI capable PROM, + * and need to convert the pci_dev->resource + * 'start' and 'end' addresses to mapped addresses, + * and setup the pci_controller->window array entries. + */ +void +sn_io_slot_fixup(struct pci_dev *dev) +{ + unsigned int count = 0; + int idx; + s64 pci_addrs[PCI_ROM_RESOURCE + 1]; + unsigned long addr, end, size, start; + struct pcidev_info *pcidev_info; + struct sn_irq_info *sn_irq_info; + int status; + + pcidev_info = kzalloc(sizeof(struct pcidev_info), GFP_KERNEL); + if (!pcidev_info) + panic("%s: Unable to alloc memory for pcidev_info", __func__); + + sn_irq_info = kzalloc(sizeof(struct sn_irq_info), GFP_KERNEL); + if (!sn_irq_info) + panic("%s: Unable to alloc memory for sn_irq_info", __func__); + + /* Call to retrieve pci device information needed by kernel. */ + status = sal_get_pcidev_info((u64) pci_domain_nr(dev), + (u64) dev->bus->number, + dev->devfn, + (u64) __pa(pcidev_info), + (u64) __pa(sn_irq_info)); + + BUG_ON(status); /* Cannot get platform pci device information */ + + + /* Copy over PIO Mapped Addresses */ + for (idx = 0; idx <= PCI_ROM_RESOURCE; idx++) { + + if (!pcidev_info->pdi_pio_mapped_addr[idx]) { + pci_addrs[idx] = -1; + continue; + } + + start = dev->resource[idx].start; + end = dev->resource[idx].end; + size = end - start; + if (size == 0) { + pci_addrs[idx] = -1; + continue; + } + pci_addrs[idx] = start; + count++; + addr = pcidev_info->pdi_pio_mapped_addr[idx]; + addr = ((addr << 4) >> 4) | __IA64_UNCACHED_OFFSET; + dev->resource[idx].start = addr; + dev->resource[idx].end = addr + size; + + /* + * if it's already in the device structure, remove it before + * inserting + */ + if (dev->resource[idx].parent && dev->resource[idx].parent->child) + release_resource(&dev->resource[idx]); + + if (dev->resource[idx].flags & IORESOURCE_IO) + insert_resource(&ioport_resource, &dev->resource[idx]); + else + insert_resource(&iomem_resource, &dev->resource[idx]); + /* + * If ROM, set the actual ROM image size, and mark as + * shadowed in PROM. + */ + if (idx == PCI_ROM_RESOURCE) { + size_t image_size; + void __iomem *rom; + + rom = ioremap(pci_resource_start(dev, PCI_ROM_RESOURCE), + size + 1); + image_size = pci_get_rom_size(dev, rom, size + 1); + dev->resource[PCI_ROM_RESOURCE].end = + dev->resource[PCI_ROM_RESOURCE].start + + image_size - 1; + dev->resource[PCI_ROM_RESOURCE].flags |= + IORESOURCE_ROM_BIOS_COPY; + } + } + /* Create a pci_window in the pci_controller struct for + * each device resource. + */ + if (count > 0) + sn_pci_window_fixup(dev, count, pci_addrs); + + sn_pci_fixup_slot(dev, pcidev_info, sn_irq_info); +} + +EXPORT_SYMBOL(sn_io_slot_fixup); + +/* + * sn_pci_controller_fixup() - This routine sets up a bus's resources + * consistent with the Linux PCI abstraction layer. + */ +static void __init +sn_pci_controller_fixup(int segment, int busnum, struct pci_bus *bus) +{ + s64 status = 0; + struct pci_controller *controller; + struct pcibus_bussoft *prom_bussoft_ptr; + LIST_HEAD(resources); + int i; + + status = sal_get_pcibus_info((u64) segment, (u64) busnum, + (u64) ia64_tpa(&prom_bussoft_ptr)); + if (status > 0) + return; /*bus # does not exist */ + prom_bussoft_ptr = __va(prom_bussoft_ptr); + + controller = kzalloc(sizeof(*controller), GFP_KERNEL); + BUG_ON(!controller); + controller->segment = segment; + + /* + * Temporarily save the prom_bussoft_ptr for use by sn_bus_fixup(). + * (platform_data will be overwritten later in sn_common_bus_fixup()) + */ + controller->platform_data = prom_bussoft_ptr; + + sn_legacy_pci_window_fixup(controller, + prom_bussoft_ptr->bs_legacy_io, + prom_bussoft_ptr->bs_legacy_mem); + for (i = 0; i < controller->windows; i++) + pci_add_resource_offset(&resources, + &controller->window[i].resource, + controller->window[i].offset); + bus = pci_scan_root_bus(NULL, busnum, &pci_root_ops, controller, + &resources); + if (bus == NULL) + goto error_return; /* error, or bus already scanned */ + + bus->sysdata = controller; + + return; + +error_return: + + kfree(controller); + return; +} + +/* + * sn_bus_fixup + */ +void +sn_bus_fixup(struct pci_bus *bus) +{ + struct pci_dev *pci_dev = NULL; + struct pcibus_bussoft *prom_bussoft_ptr; + + if (!bus->parent) { /* If root bus */ + prom_bussoft_ptr = PCI_CONTROLLER(bus)->platform_data; + if (prom_bussoft_ptr == NULL) { + printk(KERN_ERR + "sn_bus_fixup: 0x%04x:0x%02x Unable to " + "obtain prom_bussoft_ptr\n", + pci_domain_nr(bus), bus->number); + return; + } + sn_common_bus_fixup(bus, prom_bussoft_ptr); + } + list_for_each_entry(pci_dev, &bus->devices, bus_list) { + sn_io_slot_fixup(pci_dev); + } + +} + +/* + * sn_io_init - PROM does not have ACPI support to define nodes or root buses, + * so we need to do things the hard way, including initiating the + * bus scanning ourselves. + */ + +void __init sn_io_init(void) +{ + int i, j; + + sn_fixup_ionodes(); + + /* busses are not known yet ... */ + for (i = 0; i <= max_segment_number; i++) + for (j = 0; j <= max_pcibus_number; j++) + sn_pci_controller_fixup(i, j, NULL); +} diff --git a/arch/ia64/sn/kernel/iomv.c b/arch/ia64/sn/kernel/iomv.c new file mode 100644 index 00000000..c77ebdf9 --- /dev/null +++ b/arch/ia64/sn/kernel/iomv.c @@ -0,0 +1,82 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2000-2003, 2006 Silicon Graphics, Inc. All rights reserved. + */ + +#include <linux/module.h> +#include <linux/acpi.h> +#include <asm/io.h> +#include <asm/delay.h> +#include <asm/vga.h> +#include <asm/sn/nodepda.h> +#include <asm/sn/simulator.h> +#include <asm/sn/pda.h> +#include <asm/sn/sn_cpuid.h> +#include <asm/sn/shub_mmr.h> +#include <asm/sn/acpi.h> + +#define IS_LEGACY_VGA_IOPORT(p) \ + (((p) >= 0x3b0 && (p) <= 0x3bb) || ((p) >= 0x3c0 && (p) <= 0x3df)) + +/** + * sn_io_addr - convert an in/out port to an i/o address + * @port: port to convert + * + * Legacy in/out instructions are converted to ld/st instructions + * on IA64. This routine will convert a port number into a valid + * SN i/o address. Used by sn_in*() and sn_out*(). + */ + +void *sn_io_addr(unsigned long port) +{ + if (!IS_RUNNING_ON_SIMULATOR()) { + if (IS_LEGACY_VGA_IOPORT(port)) + return (__ia64_mk_io_addr(port)); + /* On sn2, legacy I/O ports don't point at anything */ + if (port < (64 * 1024)) + return NULL; + if (SN_ACPI_BASE_SUPPORT()) + return (__ia64_mk_io_addr(port)); + else + return ((void *)(port | __IA64_UNCACHED_OFFSET)); + } else { + /* but the simulator uses them... */ + unsigned long addr; + + /* + * word align port, but need more than 10 bits + * for accessing registers in bedrock local block + * (so we don't do port&0xfff) + */ + addr = (is_shub2() ? 0xc00000028c000000UL : 0xc0000087cc000000UL) | ((port >> 2) << 12); + if ((port >= 0x1f0 && port <= 0x1f7) || port == 0x3f6 || port == 0x3f7) + addr |= port; + return (void *)addr; + } +} + +EXPORT_SYMBOL(sn_io_addr); + +/** + * __sn_mmiowb - I/O space memory barrier + * + * See arch/ia64/include/asm/io.h and Documentation/DocBook/deviceiobook.tmpl + * for details. + * + * On SN2, we wait for the PIO_WRITE_STATUS SHub register to clear. + * See PV 871084 for details about the WAR about zero value. + * + */ +void __sn_mmiowb(void) +{ + volatile unsigned long *adr = pda->pio_write_status_addr; + unsigned long val = pda->pio_write_status_val; + + while ((*adr & SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_MASK) != val) + cpu_relax(); +} + +EXPORT_SYMBOL(__sn_mmiowb); diff --git a/arch/ia64/sn/kernel/irq.c b/arch/ia64/sn/kernel/irq.c new file mode 100644 index 00000000..62cf4dde --- /dev/null +++ b/arch/ia64/sn/kernel/irq.c @@ -0,0 +1,488 @@ +/* + * Platform dependent support for SGI SN + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2000-2008 Silicon Graphics, Inc. All Rights Reserved. + */ + +#include <linux/irq.h> +#include <linux/spinlock.h> +#include <linux/init.h> +#include <linux/rculist.h> +#include <linux/slab.h> +#include <asm/sn/addrs.h> +#include <asm/sn/arch.h> +#include <asm/sn/intr.h> +#include <asm/sn/pcibr_provider.h> +#include <asm/sn/pcibus_provider_defs.h> +#include <asm/sn/pcidev.h> +#include <asm/sn/shub_mmr.h> +#include <asm/sn/sn_sal.h> +#include <asm/sn/sn_feature_sets.h> + +static void register_intr_pda(struct sn_irq_info *sn_irq_info); +static void unregister_intr_pda(struct sn_irq_info *sn_irq_info); + +extern int sn_ioif_inited; +struct list_head **sn_irq_lh; +static DEFINE_SPINLOCK(sn_irq_info_lock); /* non-IRQ lock */ + +u64 sn_intr_alloc(nasid_t local_nasid, int local_widget, + struct sn_irq_info *sn_irq_info, + int req_irq, nasid_t req_nasid, + int req_slice) +{ + struct ia64_sal_retval ret_stuff; + ret_stuff.status = 0; + ret_stuff.v0 = 0; + + SAL_CALL_NOLOCK(ret_stuff, (u64) SN_SAL_IOIF_INTERRUPT, + (u64) SAL_INTR_ALLOC, (u64) local_nasid, + (u64) local_widget, __pa(sn_irq_info), (u64) req_irq, + (u64) req_nasid, (u64) req_slice); + + return ret_stuff.status; +} + +void sn_intr_free(nasid_t local_nasid, int local_widget, + struct sn_irq_info *sn_irq_info) +{ + struct ia64_sal_retval ret_stuff; + ret_stuff.status = 0; + ret_stuff.v0 = 0; + + SAL_CALL_NOLOCK(ret_stuff, (u64) SN_SAL_IOIF_INTERRUPT, + (u64) SAL_INTR_FREE, (u64) local_nasid, + (u64) local_widget, (u64) sn_irq_info->irq_irq, + (u64) sn_irq_info->irq_cookie, 0, 0); +} + +u64 sn_intr_redirect(nasid_t local_nasid, int local_widget, + struct sn_irq_info *sn_irq_info, + nasid_t req_nasid, int req_slice) +{ + struct ia64_sal_retval ret_stuff; + ret_stuff.status = 0; + ret_stuff.v0 = 0; + + SAL_CALL_NOLOCK(ret_stuff, (u64) SN_SAL_IOIF_INTERRUPT, + (u64) SAL_INTR_REDIRECT, (u64) local_nasid, + (u64) local_widget, __pa(sn_irq_info), + (u64) req_nasid, (u64) req_slice, 0); + + return ret_stuff.status; +} + +static unsigned int sn_startup_irq(struct irq_data *data) +{ + return 0; +} + +static void sn_shutdown_irq(struct irq_data *data) +{ +} + +extern void ia64_mca_register_cpev(int); + +static void sn_disable_irq(struct irq_data *data) +{ + if (data->irq == local_vector_to_irq(IA64_CPE_VECTOR)) + ia64_mca_register_cpev(0); +} + +static void sn_enable_irq(struct irq_data *data) +{ + if (data->irq == local_vector_to_irq(IA64_CPE_VECTOR)) + ia64_mca_register_cpev(data->irq); +} + +static void sn_ack_irq(struct irq_data *data) +{ + u64 event_occurred, mask; + unsigned int irq = data->irq & 0xff; + + event_occurred = HUB_L((u64*)LOCAL_MMR_ADDR(SH_EVENT_OCCURRED)); + mask = event_occurred & SH_ALL_INT_MASK; + HUB_S((u64*)LOCAL_MMR_ADDR(SH_EVENT_OCCURRED_ALIAS), mask); + __set_bit(irq, (volatile void *)pda->sn_in_service_ivecs); + + irq_move_irq(data); +} + +struct sn_irq_info *sn_retarget_vector(struct sn_irq_info *sn_irq_info, + nasid_t nasid, int slice) +{ + int vector; + int cpuid; +#ifdef CONFIG_SMP + int cpuphys; +#endif + int64_t bridge; + int local_widget, status; + nasid_t local_nasid; + struct sn_irq_info *new_irq_info; + struct sn_pcibus_provider *pci_provider; + + bridge = (u64) sn_irq_info->irq_bridge; + if (!bridge) { + return NULL; /* irq is not a device interrupt */ + } + + local_nasid = NASID_GET(bridge); + + if (local_nasid & 1) + local_widget = TIO_SWIN_WIDGETNUM(bridge); + else + local_widget = SWIN_WIDGETNUM(bridge); + vector = sn_irq_info->irq_irq; + + /* Make use of SAL_INTR_REDIRECT if PROM supports it */ + status = sn_intr_redirect(local_nasid, local_widget, sn_irq_info, nasid, slice); + if (!status) { + new_irq_info = sn_irq_info; + goto finish_up; + } + + /* + * PROM does not support SAL_INTR_REDIRECT, or it failed. + * Revert to old method. + */ + new_irq_info = kmemdup(sn_irq_info, sizeof(struct sn_irq_info), + GFP_ATOMIC); + if (new_irq_info == NULL) + return NULL; + + /* Free the old PROM new_irq_info structure */ + sn_intr_free(local_nasid, local_widget, new_irq_info); + unregister_intr_pda(new_irq_info); + + /* allocate a new PROM new_irq_info struct */ + status = sn_intr_alloc(local_nasid, local_widget, + new_irq_info, vector, + nasid, slice); + + /* SAL call failed */ + if (status) { + kfree(new_irq_info); + return NULL; + } + + register_intr_pda(new_irq_info); + spin_lock(&sn_irq_info_lock); + list_replace_rcu(&sn_irq_info->list, &new_irq_info->list); + spin_unlock(&sn_irq_info_lock); + kfree_rcu(sn_irq_info, rcu); + + +finish_up: + /* Update kernels new_irq_info with new target info */ + cpuid = nasid_slice_to_cpuid(new_irq_info->irq_nasid, + new_irq_info->irq_slice); + new_irq_info->irq_cpuid = cpuid; + + pci_provider = sn_pci_provider[new_irq_info->irq_bridge_type]; + + /* + * If this represents a line interrupt, target it. If it's + * an msi (irq_int_bit < 0), it's already targeted. + */ + if (new_irq_info->irq_int_bit >= 0 && + pci_provider && pci_provider->target_interrupt) + (pci_provider->target_interrupt)(new_irq_info); + +#ifdef CONFIG_SMP + cpuphys = cpu_physical_id(cpuid); + set_irq_affinity_info((vector & 0xff), cpuphys, 0); +#endif + + return new_irq_info; +} + +static int sn_set_affinity_irq(struct irq_data *data, + const struct cpumask *mask, bool force) +{ + struct sn_irq_info *sn_irq_info, *sn_irq_info_safe; + unsigned int irq = data->irq; + nasid_t nasid; + int slice; + + nasid = cpuid_to_nasid(cpumask_first(mask)); + slice = cpuid_to_slice(cpumask_first(mask)); + + list_for_each_entry_safe(sn_irq_info, sn_irq_info_safe, + sn_irq_lh[irq], list) + (void)sn_retarget_vector(sn_irq_info, nasid, slice); + + return 0; +} + +#ifdef CONFIG_SMP +void sn_set_err_irq_affinity(unsigned int irq) +{ + /* + * On systems which support CPU disabling (SHub2), all error interrupts + * are targeted at the boot CPU. + */ + if (is_shub2() && sn_prom_feature_available(PRF_CPU_DISABLE_SUPPORT)) + set_irq_affinity_info(irq, cpu_physical_id(0), 0); +} +#else +void sn_set_err_irq_affinity(unsigned int irq) { } +#endif + +static void +sn_mask_irq(struct irq_data *data) +{ +} + +static void +sn_unmask_irq(struct irq_data *data) +{ +} + +struct irq_chip irq_type_sn = { + .name = "SN hub", + .irq_startup = sn_startup_irq, + .irq_shutdown = sn_shutdown_irq, + .irq_enable = sn_enable_irq, + .irq_disable = sn_disable_irq, + .irq_ack = sn_ack_irq, + .irq_mask = sn_mask_irq, + .irq_unmask = sn_unmask_irq, + .irq_set_affinity = sn_set_affinity_irq +}; + +ia64_vector sn_irq_to_vector(int irq) +{ + if (irq >= IA64_NUM_VECTORS) + return 0; + return (ia64_vector)irq; +} + +unsigned int sn_local_vector_to_irq(u8 vector) +{ + return (CPU_VECTOR_TO_IRQ(smp_processor_id(), vector)); +} + +void sn_irq_init(void) +{ + int i; + + ia64_first_device_vector = IA64_SN2_FIRST_DEVICE_VECTOR; + ia64_last_device_vector = IA64_SN2_LAST_DEVICE_VECTOR; + + for (i = 0; i < NR_IRQS; i++) { + if (irq_get_chip(i) == &no_irq_chip) + irq_set_chip(i, &irq_type_sn); + } +} + +static void register_intr_pda(struct sn_irq_info *sn_irq_info) +{ + int irq = sn_irq_info->irq_irq; + int cpu = sn_irq_info->irq_cpuid; + + if (pdacpu(cpu)->sn_last_irq < irq) { + pdacpu(cpu)->sn_last_irq = irq; + } + + if (pdacpu(cpu)->sn_first_irq == 0 || pdacpu(cpu)->sn_first_irq > irq) + pdacpu(cpu)->sn_first_irq = irq; +} + +static void unregister_intr_pda(struct sn_irq_info *sn_irq_info) +{ + int irq = sn_irq_info->irq_irq; + int cpu = sn_irq_info->irq_cpuid; + struct sn_irq_info *tmp_irq_info; + int i, foundmatch; + + rcu_read_lock(); + if (pdacpu(cpu)->sn_last_irq == irq) { + foundmatch = 0; + for (i = pdacpu(cpu)->sn_last_irq - 1; + i && !foundmatch; i--) { + list_for_each_entry_rcu(tmp_irq_info, + sn_irq_lh[i], + list) { + if (tmp_irq_info->irq_cpuid == cpu) { + foundmatch = 1; + break; + } + } + } + pdacpu(cpu)->sn_last_irq = i; + } + + if (pdacpu(cpu)->sn_first_irq == irq) { + foundmatch = 0; + for (i = pdacpu(cpu)->sn_first_irq + 1; + i < NR_IRQS && !foundmatch; i++) { + list_for_each_entry_rcu(tmp_irq_info, + sn_irq_lh[i], + list) { + if (tmp_irq_info->irq_cpuid == cpu) { + foundmatch = 1; + break; + } + } + } + pdacpu(cpu)->sn_first_irq = ((i == NR_IRQS) ? 0 : i); + } + rcu_read_unlock(); +} + +void sn_irq_fixup(struct pci_dev *pci_dev, struct sn_irq_info *sn_irq_info) +{ + nasid_t nasid = sn_irq_info->irq_nasid; + int slice = sn_irq_info->irq_slice; + int cpu = nasid_slice_to_cpuid(nasid, slice); +#ifdef CONFIG_SMP + int cpuphys; +#endif + + pci_dev_get(pci_dev); + sn_irq_info->irq_cpuid = cpu; + sn_irq_info->irq_pciioinfo = SN_PCIDEV_INFO(pci_dev); + + /* link it into the sn_irq[irq] list */ + spin_lock(&sn_irq_info_lock); + list_add_rcu(&sn_irq_info->list, sn_irq_lh[sn_irq_info->irq_irq]); + reserve_irq_vector(sn_irq_info->irq_irq); + if (sn_irq_info->irq_int_bit != -1) + irq_set_handler(sn_irq_info->irq_irq, handle_level_irq); + spin_unlock(&sn_irq_info_lock); + + register_intr_pda(sn_irq_info); +#ifdef CONFIG_SMP + cpuphys = cpu_physical_id(cpu); + set_irq_affinity_info(sn_irq_info->irq_irq, cpuphys, 0); + /* + * Affinity was set by the PROM, prevent it from + * being reset by the request_irq() path. + */ + irqd_mark_affinity_was_set(irq_get_irq_data(sn_irq_info->irq_irq)); +#endif +} + +void sn_irq_unfixup(struct pci_dev *pci_dev) +{ + struct sn_irq_info *sn_irq_info; + + /* Only cleanup IRQ stuff if this device has a host bus context */ + if (!SN_PCIDEV_BUSSOFT(pci_dev)) + return; + + sn_irq_info = SN_PCIDEV_INFO(pci_dev)->pdi_sn_irq_info; + if (!sn_irq_info) + return; + if (!sn_irq_info->irq_irq) { + kfree(sn_irq_info); + return; + } + + unregister_intr_pda(sn_irq_info); + spin_lock(&sn_irq_info_lock); + list_del_rcu(&sn_irq_info->list); + spin_unlock(&sn_irq_info_lock); + if (list_empty(sn_irq_lh[sn_irq_info->irq_irq])) + free_irq_vector(sn_irq_info->irq_irq); + kfree_rcu(sn_irq_info, rcu); + pci_dev_put(pci_dev); + +} + +static inline void +sn_call_force_intr_provider(struct sn_irq_info *sn_irq_info) +{ + struct sn_pcibus_provider *pci_provider; + + pci_provider = sn_pci_provider[sn_irq_info->irq_bridge_type]; + + /* Don't force an interrupt if the irq has been disabled */ + if (!irqd_irq_disabled(irq_get_irq_data(sn_irq_info->irq_irq)) && + pci_provider && pci_provider->force_interrupt) + (*pci_provider->force_interrupt)(sn_irq_info); +} + +/* + * Check for lost interrupts. If the PIC int_status reg. says that + * an interrupt has been sent, but not handled, and the interrupt + * is not pending in either the cpu irr regs or in the soft irr regs, + * and the interrupt is not in service, then the interrupt may have + * been lost. Force an interrupt on that pin. It is possible that + * the interrupt is in flight, so we may generate a spurious interrupt, + * but we should never miss a real lost interrupt. + */ +static void sn_check_intr(int irq, struct sn_irq_info *sn_irq_info) +{ + u64 regval; + struct pcidev_info *pcidev_info; + struct pcibus_info *pcibus_info; + + /* + * Bridge types attached to TIO (anything but PIC) do not need this WAR + * since they do not target Shub II interrupt registers. If that + * ever changes, this check needs to accommodate. + */ + if (sn_irq_info->irq_bridge_type != PCIIO_ASIC_TYPE_PIC) + return; + + pcidev_info = (struct pcidev_info *)sn_irq_info->irq_pciioinfo; + if (!pcidev_info) + return; + + pcibus_info = + (struct pcibus_info *)pcidev_info->pdi_host_pcidev_info-> + pdi_pcibus_info; + regval = pcireg_intr_status_get(pcibus_info); + + if (!ia64_get_irr(irq_to_vector(irq))) { + if (!test_bit(irq, pda->sn_in_service_ivecs)) { + regval &= 0xff; + if (sn_irq_info->irq_int_bit & regval & + sn_irq_info->irq_last_intr) { + regval &= ~(sn_irq_info->irq_int_bit & regval); + sn_call_force_intr_provider(sn_irq_info); + } + } + } + sn_irq_info->irq_last_intr = regval; +} + +void sn_lb_int_war_check(void) +{ + struct sn_irq_info *sn_irq_info; + int i; + + if (!sn_ioif_inited || pda->sn_first_irq == 0) + return; + + rcu_read_lock(); + for (i = pda->sn_first_irq; i <= pda->sn_last_irq; i++) { + list_for_each_entry_rcu(sn_irq_info, sn_irq_lh[i], list) { + sn_check_intr(i, sn_irq_info); + } + } + rcu_read_unlock(); +} + +void __init sn_irq_lh_init(void) +{ + int i; + + sn_irq_lh = kmalloc(sizeof(struct list_head *) * NR_IRQS, GFP_KERNEL); + if (!sn_irq_lh) + panic("SN PCI INIT: Failed to allocate memory for PCI init\n"); + + for (i = 0; i < NR_IRQS; i++) { + sn_irq_lh[i] = kmalloc(sizeof(struct list_head), GFP_KERNEL); + if (!sn_irq_lh[i]) + panic("SN PCI INIT: Failed IRQ memory allocation\n"); + + INIT_LIST_HEAD(sn_irq_lh[i]); + } +} diff --git a/arch/ia64/sn/kernel/klconflib.c b/arch/ia64/sn/kernel/klconflib.c new file mode 100644 index 00000000..87682b48 --- /dev/null +++ b/arch/ia64/sn/kernel/klconflib.c @@ -0,0 +1,107 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1992 - 1997, 2000-2004 Silicon Graphics, Inc. All rights reserved. + */ + +#include <linux/types.h> +#include <linux/ctype.h> +#include <linux/string.h> +#include <linux/kernel.h> +#include <asm/sn/types.h> +#include <asm/sn/module.h> +#include <asm/sn/l1.h> + +char brick_types[MAX_BRICK_TYPES + 1] = "cri.xdpn%#=vo^kjbf890123456789..."; +/* + * Format a module id for printing. + * + * There are three possible formats: + * + * MODULE_FORMAT_BRIEF is the brief 6-character format, including + * the actual brick-type as recorded in the + * moduleid_t, eg. 002c15 for a C-brick, or + * 101#17 for a PX-brick. + * + * MODULE_FORMAT_LONG is the hwgraph format, eg. rack/002/bay/15 + * of rack/101/bay/17 (note that the brick + * type does not appear in this format). + * + * MODULE_FORMAT_LCD is like MODULE_FORMAT_BRIEF, except that it + * ensures that the module id provided appears + * exactly as it would on the LCD display of + * the corresponding brick, eg. still 002c15 + * for a C-brick, but 101p17 for a PX-brick. + * + * maule (9/13/04): Removed top-level check for (fmt == MODULE_FORMAT_LCD) + * making MODULE_FORMAT_LCD equivalent to MODULE_FORMAT_BRIEF. It was + * decided that all callers should assume the returned string should be what + * is displayed on the brick L1 LCD. + */ +void +format_module_id(char *buffer, moduleid_t m, int fmt) +{ + int rack, position; + unsigned char brickchar; + + rack = MODULE_GET_RACK(m); + brickchar = MODULE_GET_BTCHAR(m); + + /* Be sure we use the same brick type character as displayed + * on the brick's LCD + */ + switch (brickchar) + { + case L1_BRICKTYPE_GA: + case L1_BRICKTYPE_OPUS_TIO: + brickchar = L1_BRICKTYPE_C; + break; + + case L1_BRICKTYPE_PX: + case L1_BRICKTYPE_PE: + case L1_BRICKTYPE_PA: + case L1_BRICKTYPE_SA: /* we can move this to the "I's" later + * if that makes more sense + */ + brickchar = L1_BRICKTYPE_P; + break; + + case L1_BRICKTYPE_IX: + case L1_BRICKTYPE_IA: + + brickchar = L1_BRICKTYPE_I; + break; + } + + position = MODULE_GET_BPOS(m); + + if ((fmt == MODULE_FORMAT_BRIEF) || (fmt == MODULE_FORMAT_LCD)) { + /* Brief module number format, eg. 002c15 */ + + /* Decompress the rack number */ + *buffer++ = '0' + RACK_GET_CLASS(rack); + *buffer++ = '0' + RACK_GET_GROUP(rack); + *buffer++ = '0' + RACK_GET_NUM(rack); + + /* Add the brick type */ + *buffer++ = brickchar; + } + else if (fmt == MODULE_FORMAT_LONG) { + /* Fuller hwgraph format, eg. rack/002/bay/15 */ + + strcpy(buffer, "rack" "/"); buffer += strlen(buffer); + + *buffer++ = '0' + RACK_GET_CLASS(rack); + *buffer++ = '0' + RACK_GET_GROUP(rack); + *buffer++ = '0' + RACK_GET_NUM(rack); + + strcpy(buffer, "/" "bay" "/"); buffer += strlen(buffer); + } + + /* Add the bay position, using at least two digits */ + if (position < 10) + *buffer++ = '0'; + sprintf(buffer, "%d", position); +} diff --git a/arch/ia64/sn/kernel/machvec.c b/arch/ia64/sn/kernel/machvec.c new file mode 100644 index 00000000..02bb9155 --- /dev/null +++ b/arch/ia64/sn/kernel/machvec.c @@ -0,0 +1,11 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2002-2003 Silicon Graphics, Inc. All Rights Reserved. + */ + +#define MACHVEC_PLATFORM_NAME sn2 +#define MACHVEC_PLATFORM_HEADER <asm/machvec_sn2.h> +#include <asm/machvec_init.h> diff --git a/arch/ia64/sn/kernel/mca.c b/arch/ia64/sn/kernel/mca.c new file mode 100644 index 00000000..27793f7a --- /dev/null +++ b/arch/ia64/sn/kernel/mca.c @@ -0,0 +1,146 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2000-2006 Silicon Graphics, Inc. All Rights Reserved. + */ + +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/timer.h> +#include <linux/vmalloc.h> +#include <linux/mutex.h> +#include <asm/mca.h> +#include <asm/sal.h> +#include <asm/sn/sn_sal.h> + +/* + * Interval for calling SAL to poll for errors that do NOT cause error + * interrupts. SAL will raise a CPEI if any errors are present that + * need to be logged. + */ +#define CPEI_INTERVAL (5*HZ) + +struct timer_list sn_cpei_timer; +void sn_init_cpei_timer(void); + +/* Printing oemdata from mca uses data that is not passed through SAL, it is + * global. Only one user at a time. + */ +static DEFINE_MUTEX(sn_oemdata_mutex); +static u8 **sn_oemdata; +static u64 *sn_oemdata_size, sn_oemdata_bufsize; + +/* + * print_hook + * + * This function is the callback routine that SAL calls to log error + * info for platform errors. buf is appended to sn_oemdata, resizing as + * required. + * Note: this is a SAL to OS callback, running under the same rules as the SAL + * code. SAL calls are run with preempt disabled so this routine must not + * sleep. vmalloc can sleep so print_hook cannot resize the output buffer + * itself, instead it must set the required size and return to let the caller + * resize the buffer then redrive the SAL call. + */ +static int print_hook(const char *fmt, ...) +{ + char buf[400]; + int len; + va_list args; + va_start(args, fmt); + vsnprintf(buf, sizeof(buf), fmt, args); + va_end(args); + len = strlen(buf); + if (*sn_oemdata_size + len <= sn_oemdata_bufsize) + memcpy(*sn_oemdata + *sn_oemdata_size, buf, len); + *sn_oemdata_size += len; + return 0; +} + +static void sn_cpei_handler(int irq, void *devid, struct pt_regs *regs) +{ + /* + * this function's sole purpose is to call SAL when we receive + * a CE interrupt from SHUB or when the timer routine decides + * we need to call SAL to check for CEs. + */ + + /* CALL SAL_LOG_CE */ + + ia64_sn_plat_cpei_handler(); +} + +static void sn_cpei_timer_handler(unsigned long dummy) +{ + sn_cpei_handler(-1, NULL, NULL); + mod_timer(&sn_cpei_timer, jiffies + CPEI_INTERVAL); +} + +void sn_init_cpei_timer(void) +{ + init_timer(&sn_cpei_timer); + sn_cpei_timer.expires = jiffies + CPEI_INTERVAL; + sn_cpei_timer.function = sn_cpei_timer_handler; + add_timer(&sn_cpei_timer); +} + +static int +sn_platform_plat_specific_err_print(const u8 * sect_header, u8 ** oemdata, + u64 * oemdata_size) +{ + mutex_lock(&sn_oemdata_mutex); + sn_oemdata = oemdata; + sn_oemdata_size = oemdata_size; + sn_oemdata_bufsize = 0; + *sn_oemdata_size = PAGE_SIZE; /* first guess at how much data will be generated */ + while (*sn_oemdata_size > sn_oemdata_bufsize) { + u8 *newbuf = vmalloc(*sn_oemdata_size); + if (!newbuf) { + mutex_unlock(&sn_oemdata_mutex); + printk(KERN_ERR "%s: unable to extend sn_oemdata\n", + __func__); + return 1; + } + vfree(*sn_oemdata); + *sn_oemdata = newbuf; + sn_oemdata_bufsize = *sn_oemdata_size; + *sn_oemdata_size = 0; + ia64_sn_plat_specific_err_print(print_hook, (char *)sect_header); + } + mutex_unlock(&sn_oemdata_mutex); + return 0; +} + +/* Callback when userspace salinfo wants to decode oem data via the platform + * kernel and/or prom. + */ +int sn_salinfo_platform_oemdata(const u8 *sect_header, u8 **oemdata, u64 *oemdata_size) +{ + efi_guid_t guid = *(efi_guid_t *)sect_header; + int valid = 0; + *oemdata_size = 0; + vfree(*oemdata); + *oemdata = NULL; + if (efi_guidcmp(guid, SAL_PLAT_SPECIFIC_ERR_SECT_GUID) == 0) { + sal_log_plat_specific_err_info_t *psei = (sal_log_plat_specific_err_info_t *)sect_header; + valid = psei->valid.oem_data; + } else if (efi_guidcmp(guid, SAL_PLAT_MEM_DEV_ERR_SECT_GUID) == 0) { + sal_log_mem_dev_err_info_t *mdei = (sal_log_mem_dev_err_info_t *)sect_header; + valid = mdei->valid.oem_data; + } + if (valid) + return sn_platform_plat_specific_err_print(sect_header, oemdata, oemdata_size); + else + return 0; +} + +static int __init sn_salinfo_init(void) +{ + if (ia64_platform_is("sn2")) + salinfo_platform_oemdata = &sn_salinfo_platform_oemdata; + return 0; +} + +module_init(sn_salinfo_init) diff --git a/arch/ia64/sn/kernel/msi_sn.c b/arch/ia64/sn/kernel/msi_sn.c new file mode 100644 index 00000000..2b98b9e0 --- /dev/null +++ b/arch/ia64/sn/kernel/msi_sn.c @@ -0,0 +1,238 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2006 Silicon Graphics, Inc. All Rights Reserved. + */ + +#include <linux/types.h> +#include <linux/irq.h> +#include <linux/pci.h> +#include <linux/cpumask.h> +#include <linux/msi.h> +#include <linux/slab.h> + +#include <asm/sn/addrs.h> +#include <asm/sn/intr.h> +#include <asm/sn/pcibus_provider_defs.h> +#include <asm/sn/pcidev.h> +#include <asm/sn/nodepda.h> + +struct sn_msi_info { + u64 pci_addr; + struct sn_irq_info *sn_irq_info; +}; + +static struct sn_msi_info sn_msi_info[NR_IRQS]; + +static struct irq_chip sn_msi_chip; + +void sn_teardown_msi_irq(unsigned int irq) +{ + nasid_t nasid; + int widget; + struct pci_dev *pdev; + struct pcidev_info *sn_pdev; + struct sn_irq_info *sn_irq_info; + struct pcibus_bussoft *bussoft; + struct sn_pcibus_provider *provider; + + sn_irq_info = sn_msi_info[irq].sn_irq_info; + if (sn_irq_info == NULL || sn_irq_info->irq_int_bit >= 0) + return; + + sn_pdev = (struct pcidev_info *)sn_irq_info->irq_pciioinfo; + pdev = sn_pdev->pdi_linux_pcidev; + provider = SN_PCIDEV_BUSPROVIDER(pdev); + + (*provider->dma_unmap)(pdev, + sn_msi_info[irq].pci_addr, + PCI_DMA_FROMDEVICE); + sn_msi_info[irq].pci_addr = 0; + + bussoft = SN_PCIDEV_BUSSOFT(pdev); + nasid = NASID_GET(bussoft->bs_base); + widget = (nasid & 1) ? + TIO_SWIN_WIDGETNUM(bussoft->bs_base) : + SWIN_WIDGETNUM(bussoft->bs_base); + + sn_intr_free(nasid, widget, sn_irq_info); + sn_msi_info[irq].sn_irq_info = NULL; + + destroy_irq(irq); +} + +int sn_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *entry) +{ + struct msi_msg msg; + int widget; + int status; + nasid_t nasid; + u64 bus_addr; + struct sn_irq_info *sn_irq_info; + struct pcibus_bussoft *bussoft = SN_PCIDEV_BUSSOFT(pdev); + struct sn_pcibus_provider *provider = SN_PCIDEV_BUSPROVIDER(pdev); + int irq; + + if (!entry->msi_attrib.is_64) + return -EINVAL; + + if (bussoft == NULL) + return -EINVAL; + + if (provider == NULL || provider->dma_map_consistent == NULL) + return -EINVAL; + + irq = create_irq(); + if (irq < 0) + return irq; + + /* + * Set up the vector plumbing. Let the prom (via sn_intr_alloc) + * decide which cpu to direct this msi at by default. + */ + + nasid = NASID_GET(bussoft->bs_base); + widget = (nasid & 1) ? + TIO_SWIN_WIDGETNUM(bussoft->bs_base) : + SWIN_WIDGETNUM(bussoft->bs_base); + + sn_irq_info = kzalloc(sizeof(struct sn_irq_info), GFP_KERNEL); + if (! sn_irq_info) { + destroy_irq(irq); + return -ENOMEM; + } + + status = sn_intr_alloc(nasid, widget, sn_irq_info, irq, -1, -1); + if (status) { + kfree(sn_irq_info); + destroy_irq(irq); + return -ENOMEM; + } + + sn_irq_info->irq_int_bit = -1; /* mark this as an MSI irq */ + sn_irq_fixup(pdev, sn_irq_info); + + /* Prom probably should fill these in, but doesn't ... */ + sn_irq_info->irq_bridge_type = bussoft->bs_asic_type; + sn_irq_info->irq_bridge = (void *)bussoft->bs_base; + + /* + * Map the xio address into bus space + */ + bus_addr = (*provider->dma_map_consistent)(pdev, + sn_irq_info->irq_xtalkaddr, + sizeof(sn_irq_info->irq_xtalkaddr), + SN_DMA_MSI|SN_DMA_ADDR_XIO); + if (! bus_addr) { + sn_intr_free(nasid, widget, sn_irq_info); + kfree(sn_irq_info); + destroy_irq(irq); + return -ENOMEM; + } + + sn_msi_info[irq].sn_irq_info = sn_irq_info; + sn_msi_info[irq].pci_addr = bus_addr; + + msg.address_hi = (u32)(bus_addr >> 32); + msg.address_lo = (u32)(bus_addr & 0x00000000ffffffff); + + /* + * In the SN platform, bit 16 is a "send vector" bit which + * must be present in order to move the vector through the system. + */ + msg.data = 0x100 + irq; + + irq_set_msi_desc(irq, entry); + write_msi_msg(irq, &msg); + irq_set_chip_and_handler(irq, &sn_msi_chip, handle_edge_irq); + + return 0; +} + +#ifdef CONFIG_SMP +static int sn_set_msi_irq_affinity(struct irq_data *data, + const struct cpumask *cpu_mask, bool force) +{ + struct msi_msg msg; + int slice; + nasid_t nasid; + u64 bus_addr; + struct pci_dev *pdev; + struct pcidev_info *sn_pdev; + struct sn_irq_info *sn_irq_info; + struct sn_irq_info *new_irq_info; + struct sn_pcibus_provider *provider; + unsigned int cpu, irq = data->irq; + + cpu = cpumask_first(cpu_mask); + sn_irq_info = sn_msi_info[irq].sn_irq_info; + if (sn_irq_info == NULL || sn_irq_info->irq_int_bit >= 0) + return -1; + + /* + * Release XIO resources for the old MSI PCI address + */ + + get_cached_msi_msg(irq, &msg); + sn_pdev = (struct pcidev_info *)sn_irq_info->irq_pciioinfo; + pdev = sn_pdev->pdi_linux_pcidev; + provider = SN_PCIDEV_BUSPROVIDER(pdev); + + bus_addr = (u64)(msg.address_hi) << 32 | (u64)(msg.address_lo); + (*provider->dma_unmap)(pdev, bus_addr, PCI_DMA_FROMDEVICE); + sn_msi_info[irq].pci_addr = 0; + + nasid = cpuid_to_nasid(cpu); + slice = cpuid_to_slice(cpu); + + new_irq_info = sn_retarget_vector(sn_irq_info, nasid, slice); + sn_msi_info[irq].sn_irq_info = new_irq_info; + if (new_irq_info == NULL) + return -1; + + /* + * Map the xio address into bus space + */ + + bus_addr = (*provider->dma_map_consistent)(pdev, + new_irq_info->irq_xtalkaddr, + sizeof(new_irq_info->irq_xtalkaddr), + SN_DMA_MSI|SN_DMA_ADDR_XIO); + + sn_msi_info[irq].pci_addr = bus_addr; + msg.address_hi = (u32)(bus_addr >> 32); + msg.address_lo = (u32)(bus_addr & 0x00000000ffffffff); + + write_msi_msg(irq, &msg); + cpumask_copy(data->affinity, cpu_mask); + + return 0; +} +#endif /* CONFIG_SMP */ + +static void sn_ack_msi_irq(struct irq_data *data) +{ + irq_move_irq(data); + ia64_eoi(); +} + +static int sn_msi_retrigger_irq(struct irq_data *data) +{ + unsigned int vector = data->irq; + ia64_resend_irq(vector); + + return 1; +} + +static struct irq_chip sn_msi_chip = { + .name = "PCI-MSI", + .irq_mask = mask_msi_irq, + .irq_unmask = unmask_msi_irq, + .irq_ack = sn_ack_msi_irq, +#ifdef CONFIG_SMP + .irq_set_affinity = sn_set_msi_irq_affinity, +#endif + .irq_retrigger = sn_msi_retrigger_irq, +}; diff --git a/arch/ia64/sn/kernel/pio_phys.S b/arch/ia64/sn/kernel/pio_phys.S new file mode 100644 index 00000000..3c7d48d6 --- /dev/null +++ b/arch/ia64/sn/kernel/pio_phys.S @@ -0,0 +1,71 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2000-2005 Silicon Graphics, Inc. All rights reserved. + * + * This file contains macros used to access MMR registers via + * uncached physical addresses. + * pio_phys_read_mmr - read an MMR + * pio_phys_write_mmr - write an MMR + * pio_atomic_phys_write_mmrs - atomically write 1 or 2 MMRs with psr.ic=0 + * Second MMR will be skipped if address is NULL + * + * Addresses passed to these routines should be uncached physical addresses + * ie., 0x80000.... + */ + + + +#include <asm/asmmacro.h> +#include <asm/page.h> + +GLOBAL_ENTRY(pio_phys_read_mmr) + .prologue + .regstk 1,0,0,0 + .body + mov r2=psr + rsm psr.i | psr.dt + ;; + srlz.d + ld8.acq r8=[r32] + ;; + mov psr.l=r2;; + srlz.d + br.ret.sptk.many rp +END(pio_phys_read_mmr) + +GLOBAL_ENTRY(pio_phys_write_mmr) + .prologue + .regstk 2,0,0,0 + .body + mov r2=psr + rsm psr.i | psr.dt + ;; + srlz.d + st8.rel [r32]=r33 + ;; + mov psr.l=r2;; + srlz.d + br.ret.sptk.many rp +END(pio_phys_write_mmr) + +GLOBAL_ENTRY(pio_atomic_phys_write_mmrs) + .prologue + .regstk 4,0,0,0 + .body + mov r2=psr + cmp.ne p9,p0=r34,r0; + rsm psr.i | psr.dt | psr.ic + ;; + srlz.d + st8.rel [r32]=r33 +(p9) st8.rel [r34]=r35 + ;; + mov psr.l=r2;; + srlz.d + br.ret.sptk.many rp +END(pio_atomic_phys_write_mmrs) + + diff --git a/arch/ia64/sn/kernel/setup.c b/arch/ia64/sn/kernel/setup.c new file mode 100644 index 00000000..f82e7b46 --- /dev/null +++ b/arch/ia64/sn/kernel/setup.c @@ -0,0 +1,775 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1999,2001-2006 Silicon Graphics, Inc. All rights reserved. + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/delay.h> +#include <linux/kernel.h> +#include <linux/kdev_t.h> +#include <linux/string.h> +#include <linux/screen_info.h> +#include <linux/console.h> +#include <linux/timex.h> +#include <linux/sched.h> +#include <linux/ioport.h> +#include <linux/mm.h> +#include <linux/serial.h> +#include <linux/irq.h> +#include <linux/bootmem.h> +#include <linux/mmzone.h> +#include <linux/interrupt.h> +#include <linux/acpi.h> +#include <linux/compiler.h> +#include <linux/root_dev.h> +#include <linux/nodemask.h> +#include <linux/pm.h> +#include <linux/efi.h> + +#include <asm/io.h> +#include <asm/sal.h> +#include <asm/machvec.h> +#include <asm/processor.h> +#include <asm/vga.h> +#include <asm/setup.h> +#include <asm/sn/arch.h> +#include <asm/sn/addrs.h> +#include <asm/sn/pda.h> +#include <asm/sn/nodepda.h> +#include <asm/sn/sn_cpuid.h> +#include <asm/sn/simulator.h> +#include <asm/sn/leds.h> +#include <asm/sn/bte.h> +#include <asm/sn/shub_mmr.h> +#include <asm/sn/clksupport.h> +#include <asm/sn/sn_sal.h> +#include <asm/sn/geo.h> +#include <asm/sn/sn_feature_sets.h> +#include "xtalk/xwidgetdev.h" +#include "xtalk/hubdev.h" +#include <asm/sn/klconfig.h> + + +DEFINE_PER_CPU(struct pda_s, pda_percpu); + +#define MAX_PHYS_MEMORY (1UL << IA64_MAX_PHYS_BITS) /* Max physical address supported */ + +extern void bte_init_node(nodepda_t *, cnodeid_t); + +extern void sn_timer_init(void); +extern unsigned long last_time_offset; +extern void (*ia64_mark_idle) (int); +extern void snidle(int); + +unsigned long sn_rtc_cycles_per_second; +EXPORT_SYMBOL(sn_rtc_cycles_per_second); + +DEFINE_PER_CPU(struct sn_hub_info_s, __sn_hub_info); +EXPORT_PER_CPU_SYMBOL(__sn_hub_info); + +DEFINE_PER_CPU(short, __sn_cnodeid_to_nasid[MAX_COMPACT_NODES]); +EXPORT_PER_CPU_SYMBOL(__sn_cnodeid_to_nasid); + +DEFINE_PER_CPU(struct nodepda_s *, __sn_nodepda); +EXPORT_PER_CPU_SYMBOL(__sn_nodepda); + +char sn_system_serial_number_string[128]; +EXPORT_SYMBOL(sn_system_serial_number_string); +u64 sn_partition_serial_number; +EXPORT_SYMBOL(sn_partition_serial_number); +u8 sn_partition_id; +EXPORT_SYMBOL(sn_partition_id); +u8 sn_system_size; +EXPORT_SYMBOL(sn_system_size); +u8 sn_sharing_domain_size; +EXPORT_SYMBOL(sn_sharing_domain_size); +u8 sn_coherency_id; +EXPORT_SYMBOL(sn_coherency_id); +u8 sn_region_size; +EXPORT_SYMBOL(sn_region_size); +int sn_prom_type; /* 0=hardware, 1=medusa/realprom, 2=medusa/fakeprom */ + +short physical_node_map[MAX_NUMALINK_NODES]; +static unsigned long sn_prom_features[MAX_PROM_FEATURE_SETS]; + +EXPORT_SYMBOL(physical_node_map); + +int num_cnodes; + +static void sn_init_pdas(char **); +static void build_cnode_tables(void); + +static nodepda_t *nodepdaindr[MAX_COMPACT_NODES]; + +/* + * The format of "screen_info" is strange, and due to early i386-setup + * code. This is just enough to make the console code think we're on a + * VGA color display. + */ +struct screen_info sn_screen_info = { + .orig_x = 0, + .orig_y = 0, + .orig_video_mode = 3, + .orig_video_cols = 80, + .orig_video_ega_bx = 3, + .orig_video_lines = 25, + .orig_video_isVGA = 1, + .orig_video_points = 16 +}; + +/* + * This routine can only be used during init, since + * smp_boot_data is an init data structure. + * We have to use smp_boot_data.cpu_phys_id to find + * the physical id of the processor because the normal + * cpu_physical_id() relies on data structures that + * may not be initialized yet. + */ + +static int __init pxm_to_nasid(int pxm) +{ + int i; + int nid; + + nid = pxm_to_node(pxm); + for (i = 0; i < num_node_memblks; i++) { + if (node_memblk[i].nid == nid) { + return NASID_GET(node_memblk[i].start_paddr); + } + } + return -1; +} + +/** + * early_sn_setup - early setup routine for SN platforms + * + * Sets up an initial console to aid debugging. Intended primarily + * for bringup. See start_kernel() in init/main.c. + */ + +void __init early_sn_setup(void) +{ + efi_system_table_t *efi_systab; + efi_config_table_t *config_tables; + struct ia64_sal_systab *sal_systab; + struct ia64_sal_desc_entry_point *ep; + char *p; + int i, j; + + /* + * Parse enough of the SAL tables to locate the SAL entry point. Since, console + * IO on SN2 is done via SAL calls, early_printk won't work without this. + * + * This code duplicates some of the ACPI table parsing that is in efi.c & sal.c. + * Any changes to those file may have to be made here as well. + */ + efi_systab = (efi_system_table_t *) __va(ia64_boot_param->efi_systab); + config_tables = __va(efi_systab->tables); + for (i = 0; i < efi_systab->nr_tables; i++) { + if (efi_guidcmp(config_tables[i].guid, SAL_SYSTEM_TABLE_GUID) == + 0) { + sal_systab = __va(config_tables[i].table); + p = (char *)(sal_systab + 1); + for (j = 0; j < sal_systab->entry_count; j++) { + if (*p == SAL_DESC_ENTRY_POINT) { + ep = (struct ia64_sal_desc_entry_point + *)p; + ia64_sal_handler_init(__va + (ep->sal_proc), + __va(ep->gp)); + return; + } + p += SAL_DESC_SIZE(*p); + } + } + } + /* Uh-oh, SAL not available?? */ + printk(KERN_ERR "failed to find SAL entry point\n"); +} + +extern int platform_intr_list[]; +static int __cpuinitdata shub_1_1_found; + +/* + * sn_check_for_wars + * + * Set flag for enabling shub specific wars + */ + +static inline int __cpuinit is_shub_1_1(int nasid) +{ + unsigned long id; + int rev; + + if (is_shub2()) + return 0; + id = REMOTE_HUB_L(nasid, SH1_SHUB_ID); + rev = (id & SH1_SHUB_ID_REVISION_MASK) >> SH1_SHUB_ID_REVISION_SHFT; + return rev <= 2; +} + +static void __cpuinit sn_check_for_wars(void) +{ + int cnode; + + if (is_shub2()) { + /* none yet */ + } else { + for_each_online_node(cnode) { + if (is_shub_1_1(cnodeid_to_nasid(cnode))) + shub_1_1_found = 1; + } + } +} + +/* + * Scan the EFI PCDP table (if it exists) for an acceptable VGA console + * output device. If one exists, pick it and set sn_legacy_{io,mem} to + * reflect the bus offsets needed to address it. + * + * Since pcdp support in SN is not supported in the 2.4 kernel (or at least + * the one lbs is based on) just declare the needed structs here. + * + * Reference spec http://www.dig64.org/specifications/DIG64_PCDPv20.pdf + * + * Returns 0 if no acceptable vga is found, !0 otherwise. + * + * Note: This stuff is duped here because Altix requires the PCDP to + * locate a usable VGA device due to lack of proper ACPI support. Structures + * could be used from drivers/firmware/pcdp.h, but it was decided that moving + * this file to a more public location just for Altix use was undesirable. + */ + +struct hcdp_uart_desc { + u8 pad[45]; +}; + +struct pcdp { + u8 signature[4]; /* should be 'HCDP' */ + u32 length; + u8 rev; /* should be >=3 for pcdp, <3 for hcdp */ + u8 sum; + u8 oem_id[6]; + u64 oem_tableid; + u32 oem_rev; + u32 creator_id; + u32 creator_rev; + u32 num_type0; + struct hcdp_uart_desc uart[0]; /* num_type0 of these */ + /* pcdp descriptors follow */ +} __attribute__((packed)); + +struct pcdp_device_desc { + u8 type; + u8 primary; + u16 length; + u16 index; + /* interconnect specific structure follows */ + /* device specific structure follows that */ +} __attribute__((packed)); + +struct pcdp_interface_pci { + u8 type; /* 1 == pci */ + u8 reserved; + u16 length; + u8 segment; + u8 bus; + u8 dev; + u8 fun; + u16 devid; + u16 vendid; + u32 acpi_interrupt; + u64 mmio_tra; + u64 ioport_tra; + u8 flags; + u8 translation; +} __attribute__((packed)); + +struct pcdp_vga_device { + u8 num_eas_desc; + /* ACPI Extended Address Space Desc follows */ +} __attribute__((packed)); + +/* from pcdp_device_desc.primary */ +#define PCDP_PRIMARY_CONSOLE 0x01 + +/* from pcdp_device_desc.type */ +#define PCDP_CONSOLE_INOUT 0x0 +#define PCDP_CONSOLE_DEBUG 0x1 +#define PCDP_CONSOLE_OUT 0x2 +#define PCDP_CONSOLE_IN 0x3 +#define PCDP_CONSOLE_TYPE_VGA 0x8 + +#define PCDP_CONSOLE_VGA (PCDP_CONSOLE_TYPE_VGA | PCDP_CONSOLE_OUT) + +/* from pcdp_interface_pci.type */ +#define PCDP_IF_PCI 1 + +/* from pcdp_interface_pci.translation */ +#define PCDP_PCI_TRANS_IOPORT 0x02 +#define PCDP_PCI_TRANS_MMIO 0x01 + +#if defined(CONFIG_VT) && defined(CONFIG_VGA_CONSOLE) +static void +sn_scan_pcdp(void) +{ + u8 *bp; + struct pcdp *pcdp; + struct pcdp_device_desc device; + struct pcdp_interface_pci if_pci; + extern struct efi efi; + + if (efi.hcdp == EFI_INVALID_TABLE_ADDR) + return; /* no hcdp/pcdp table */ + + pcdp = __va(efi.hcdp); + + if (pcdp->rev < 3) + return; /* only support PCDP (rev >= 3) */ + + for (bp = (u8 *)&pcdp->uart[pcdp->num_type0]; + bp < (u8 *)pcdp + pcdp->length; + bp += device.length) { + memcpy(&device, bp, sizeof(device)); + if (! (device.primary & PCDP_PRIMARY_CONSOLE)) + continue; /* not primary console */ + + if (device.type != PCDP_CONSOLE_VGA) + continue; /* not VGA descriptor */ + + memcpy(&if_pci, bp+sizeof(device), sizeof(if_pci)); + if (if_pci.type != PCDP_IF_PCI) + continue; /* not PCI interconnect */ + + if (if_pci.translation & PCDP_PCI_TRANS_IOPORT) + vga_console_iobase = if_pci.ioport_tra; + + if (if_pci.translation & PCDP_PCI_TRANS_MMIO) + vga_console_membase = + if_pci.mmio_tra | __IA64_UNCACHED_OFFSET; + + break; /* once we find the primary, we're done */ + } +} +#endif + +static unsigned long sn2_rtc_initial; + +/** + * sn_setup - SN platform setup routine + * @cmdline_p: kernel command line + * + * Handles platform setup for SN machines. This includes determining + * the RTC frequency (via a SAL call), initializing secondary CPUs, and + * setting up per-node data areas. The console is also initialized here. + */ +void __init sn_setup(char **cmdline_p) +{ + long status, ticks_per_sec, drift; + u32 version = sn_sal_rev(); + extern void sn_cpu_init(void); + + sn2_rtc_initial = rtc_time(); + ia64_sn_plat_set_error_handling_features(); // obsolete + ia64_sn_set_os_feature(OSF_MCA_SLV_TO_OS_INIT_SLV); + ia64_sn_set_os_feature(OSF_FEAT_LOG_SBES); + /* + * Note: The calls to notify the PROM of ACPI and PCI Segment + * support must be done prior to acpi_load_tables(), as + * an ACPI capable PROM will rebuild the DSDT as result + * of the call. + */ + ia64_sn_set_os_feature(OSF_PCISEGMENT_ENABLE); + ia64_sn_set_os_feature(OSF_ACPI_ENABLE); + + /* Load the new DSDT and SSDT tables into the global table list. */ + acpi_table_init(); + +#if defined(CONFIG_VT) && defined(CONFIG_VGA_CONSOLE) + /* + * Handle SN vga console. + * + * SN systems do not have enough ACPI table information + * being passed from prom to identify VGA adapters and the legacy + * addresses to access them. Until that is done, SN systems rely + * on the PCDP table to identify the primary VGA console if one + * exists. + * + * However, kernel PCDP support is optional, and even if it is built + * into the kernel, it will not be used if the boot cmdline contains + * console= directives. + * + * So, to work around this mess, we duplicate some of the PCDP code + * here so that the primary VGA console (as defined by PCDP) will + * work on SN systems even if a different console (e.g. serial) is + * selected on the boot line (or CONFIG_EFI_PCDP is off). + */ + + if (! vga_console_membase) + sn_scan_pcdp(); + + /* + * Setup legacy IO space. + * vga_console_iobase maps to PCI IO Space address 0 on the + * bus containing the VGA console. + */ + if (vga_console_iobase) { + io_space[0].mmio_base = + (unsigned long) ioremap(vga_console_iobase, 0); + io_space[0].sparse = 0; + } + + if (vga_console_membase) { + /* usable vga ... make tty0 the preferred default console */ + if (!strstr(*cmdline_p, "console=")) + add_preferred_console("tty", 0, NULL); + } else { + printk(KERN_DEBUG "SGI: Disabling VGA console\n"); + if (!strstr(*cmdline_p, "console=")) + add_preferred_console("ttySG", 0, NULL); +#ifdef CONFIG_DUMMY_CONSOLE + conswitchp = &dummy_con; +#else + conswitchp = NULL; +#endif /* CONFIG_DUMMY_CONSOLE */ + } +#endif /* def(CONFIG_VT) && def(CONFIG_VGA_CONSOLE) */ + + MAX_DMA_ADDRESS = PAGE_OFFSET + MAX_PHYS_MEMORY; + + /* + * Build the tables for managing cnodes. + */ + build_cnode_tables(); + + status = + ia64_sal_freq_base(SAL_FREQ_BASE_REALTIME_CLOCK, &ticks_per_sec, + &drift); + if (status != 0 || ticks_per_sec < 100000) { + printk(KERN_WARNING + "unable to determine platform RTC clock frequency, guessing.\n"); + /* PROM gives wrong value for clock freq. so guess */ + sn_rtc_cycles_per_second = 1000000000000UL / 30000UL; + } else + sn_rtc_cycles_per_second = ticks_per_sec; + + platform_intr_list[ACPI_INTERRUPT_CPEI] = IA64_CPE_VECTOR; + + printk("SGI SAL version %x.%02x\n", version >> 8, version & 0x00FF); + + /* + * we set the default root device to /dev/hda + * to make simulation easy + */ + ROOT_DEV = Root_HDA1; + + /* + * Create the PDAs and NODEPDAs for all the cpus. + */ + sn_init_pdas(cmdline_p); + + ia64_mark_idle = &snidle; + + /* + * For the bootcpu, we do this here. All other cpus will make the + * call as part of cpu_init in slave cpu initialization. + */ + sn_cpu_init(); + +#ifdef CONFIG_SMP + init_smp_config(); +#endif + screen_info = sn_screen_info; + + sn_timer_init(); + + /* + * set pm_power_off to a SAL call to allow + * sn machines to power off. The SAL call can be replaced + * by an ACPI interface call when ACPI is fully implemented + * for sn. + */ + pm_power_off = ia64_sn_power_down; + current->thread.flags |= IA64_THREAD_MIGRATION; +} + +/** + * sn_init_pdas - setup node data areas + * + * One time setup for Node Data Area. Called by sn_setup(). + */ +static void __init sn_init_pdas(char **cmdline_p) +{ + cnodeid_t cnode; + + /* + * Allocate & initialize the nodepda for each node. + */ + for_each_online_node(cnode) { + nodepdaindr[cnode] = + alloc_bootmem_node(NODE_DATA(cnode), sizeof(nodepda_t)); + memset(nodepdaindr[cnode]->phys_cpuid, -1, + sizeof(nodepdaindr[cnode]->phys_cpuid)); + spin_lock_init(&nodepdaindr[cnode]->ptc_lock); + } + + /* + * Allocate & initialize nodepda for TIOs. For now, put them on node 0. + */ + for (cnode = num_online_nodes(); cnode < num_cnodes; cnode++) + nodepdaindr[cnode] = + alloc_bootmem_node(NODE_DATA(0), sizeof(nodepda_t)); + + /* + * Now copy the array of nodepda pointers to each nodepda. + */ + for (cnode = 0; cnode < num_cnodes; cnode++) + memcpy(nodepdaindr[cnode]->pernode_pdaindr, nodepdaindr, + sizeof(nodepdaindr)); + + /* + * Set up IO related platform-dependent nodepda fields. + * The following routine actually sets up the hubinfo struct + * in nodepda. + */ + for_each_online_node(cnode) { + bte_init_node(nodepdaindr[cnode], cnode); + } + + /* + * Initialize the per node hubdev. This includes IO Nodes and + * headless/memless nodes. + */ + for (cnode = 0; cnode < num_cnodes; cnode++) { + hubdev_init_node(nodepdaindr[cnode], cnode); + } +} + +/** + * sn_cpu_init - initialize per-cpu data areas + * @cpuid: cpuid of the caller + * + * Called during cpu initialization on each cpu as it starts. + * Currently, initializes the per-cpu data area for SNIA. + * Also sets up a few fields in the nodepda. Also known as + * platform_cpu_init() by the ia64 machvec code. + */ +void __cpuinit sn_cpu_init(void) +{ + int cpuid; + int cpuphyid; + int nasid; + int subnode; + int slice; + int cnode; + int i; + static int wars_have_been_checked, set_cpu0_number; + + cpuid = smp_processor_id(); + if (cpuid == 0 && IS_MEDUSA()) { + if (ia64_sn_is_fake_prom()) + sn_prom_type = 2; + else + sn_prom_type = 1; + printk(KERN_INFO "Running on medusa with %s PROM\n", + (sn_prom_type == 1) ? "real" : "fake"); + } + + memset(pda, 0, sizeof(pda)); + if (ia64_sn_get_sn_info(0, &sn_hub_info->shub2, + &sn_hub_info->nasid_bitmask, + &sn_hub_info->nasid_shift, + &sn_system_size, &sn_sharing_domain_size, + &sn_partition_id, &sn_coherency_id, + &sn_region_size)) + BUG(); + sn_hub_info->as_shift = sn_hub_info->nasid_shift - 2; + + /* + * Don't check status. The SAL call is not supported on all PROMs + * but a failure is harmless. + * Architecturally, cpu_init is always called twice on cpu 0. We + * should set cpu_number on cpu 0 once. + */ + if (cpuid == 0) { + if (!set_cpu0_number) { + (void) ia64_sn_set_cpu_number(cpuid); + set_cpu0_number = 1; + } + } else + (void) ia64_sn_set_cpu_number(cpuid); + + /* + * The boot cpu makes this call again after platform initialization is + * complete. + */ + if (nodepdaindr[0] == NULL) + return; + + for (i = 0; i < MAX_PROM_FEATURE_SETS; i++) + if (ia64_sn_get_prom_feature_set(i, &sn_prom_features[i]) != 0) + break; + + cpuphyid = get_sapicid(); + + if (ia64_sn_get_sapic_info(cpuphyid, &nasid, &subnode, &slice)) + BUG(); + + for (i=0; i < MAX_NUMNODES; i++) { + if (nodepdaindr[i]) { + nodepdaindr[i]->phys_cpuid[cpuid].nasid = nasid; + nodepdaindr[i]->phys_cpuid[cpuid].slice = slice; + nodepdaindr[i]->phys_cpuid[cpuid].subnode = subnode; + } + } + + cnode = nasid_to_cnodeid(nasid); + + sn_nodepda = nodepdaindr[cnode]; + + pda->led_address = + (typeof(pda->led_address)) (LED0 + (slice << LED_CPU_SHIFT)); + pda->led_state = LED_ALWAYS_SET; + pda->hb_count = HZ / 2; + pda->hb_state = 0; + pda->idle_flag = 0; + + if (cpuid != 0) { + /* copy cpu 0's sn_cnodeid_to_nasid table to this cpu's */ + memcpy(sn_cnodeid_to_nasid, + (&per_cpu(__sn_cnodeid_to_nasid, 0)), + sizeof(__ia64_per_cpu_var(__sn_cnodeid_to_nasid))); + } + + /* + * Check for WARs. + * Only needs to be done once, on BSP. + * Has to be done after loop above, because it uses this cpu's + * sn_cnodeid_to_nasid table which was just initialized if this + * isn't cpu 0. + * Has to be done before assignment below. + */ + if (!wars_have_been_checked) { + sn_check_for_wars(); + wars_have_been_checked = 1; + } + sn_hub_info->shub_1_1_found = shub_1_1_found; + + /* + * Set up addresses of PIO/MEM write status registers. + */ + { + u64 pio1[] = {SH1_PIO_WRITE_STATUS_0, 0, SH1_PIO_WRITE_STATUS_1, 0}; + u64 pio2[] = {SH2_PIO_WRITE_STATUS_0, SH2_PIO_WRITE_STATUS_2, + SH2_PIO_WRITE_STATUS_1, SH2_PIO_WRITE_STATUS_3}; + u64 *pio; + pio = is_shub1() ? pio1 : pio2; + pda->pio_write_status_addr = + (volatile unsigned long *)GLOBAL_MMR_ADDR(nasid, pio[slice]); + pda->pio_write_status_val = is_shub1() ? SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_MASK : 0; + } + + /* + * WAR addresses for SHUB 1.x. + */ + if (local_node_data->active_cpu_count++ == 0 && is_shub1()) { + int buddy_nasid; + buddy_nasid = + cnodeid_to_nasid(numa_node_id() == + num_online_nodes() - 1 ? 0 : numa_node_id() + 1); + pda->pio_shub_war_cam_addr = + (volatile unsigned long *)GLOBAL_MMR_ADDR(nasid, + SH1_PI_CAM_CONTROL); + } +} + +/* + * Build tables for converting between NASIDs and cnodes. + */ +static inline int __init board_needs_cnode(int type) +{ + return (type == KLTYPE_SNIA || type == KLTYPE_TIO); +} + +void __init build_cnode_tables(void) +{ + int nasid; + int node; + lboard_t *brd; + + memset(physical_node_map, -1, sizeof(physical_node_map)); + memset(sn_cnodeid_to_nasid, -1, + sizeof(__ia64_per_cpu_var(__sn_cnodeid_to_nasid))); + + /* + * First populate the tables with C/M bricks. This ensures that + * cnode == node for all C & M bricks. + */ + for_each_online_node(node) { + nasid = pxm_to_nasid(node_to_pxm(node)); + sn_cnodeid_to_nasid[node] = nasid; + physical_node_map[nasid] = node; + } + + /* + * num_cnodes is total number of C/M/TIO bricks. Because of the 256 node + * limit on the number of nodes, we can't use the generic node numbers + * for this. Note that num_cnodes is incremented below as TIOs or + * headless/memoryless nodes are discovered. + */ + num_cnodes = num_online_nodes(); + + /* fakeprom does not support klgraph */ + if (IS_RUNNING_ON_FAKE_PROM()) + return; + + /* Find TIOs & headless/memoryless nodes and add them to the tables */ + for_each_online_node(node) { + kl_config_hdr_t *klgraph_header; + nasid = cnodeid_to_nasid(node); + klgraph_header = ia64_sn_get_klconfig_addr(nasid); + BUG_ON(klgraph_header == NULL); + brd = NODE_OFFSET_TO_LBOARD(nasid, klgraph_header->ch_board_info); + while (brd) { + if (board_needs_cnode(brd->brd_type) && physical_node_map[brd->brd_nasid] < 0) { + sn_cnodeid_to_nasid[num_cnodes] = brd->brd_nasid; + physical_node_map[brd->brd_nasid] = num_cnodes++; + } + brd = find_lboard_next(brd); + } + } +} + +int +nasid_slice_to_cpuid(int nasid, int slice) +{ + long cpu; + + for (cpu = 0; cpu < nr_cpu_ids; cpu++) + if (cpuid_to_nasid(cpu) == nasid && + cpuid_to_slice(cpu) == slice) + return cpu; + + return -1; +} + +int sn_prom_feature_available(int id) +{ + if (id >= BITS_PER_LONG * MAX_PROM_FEATURE_SETS) + return 0; + return test_bit(id, sn_prom_features); +} + +void +sn_kernel_launch_event(void) +{ + /* ignore status until we understand possible failure, if any*/ + if (ia64_sn_kernel_launch_event()) + printk(KERN_ERR "KEXEC is not supported in this PROM, Please update the PROM.\n"); +} +EXPORT_SYMBOL(sn_prom_feature_available); + diff --git a/arch/ia64/sn/kernel/sn2/Makefile b/arch/ia64/sn/kernel/sn2/Makefile new file mode 100644 index 00000000..3d09108d --- /dev/null +++ b/arch/ia64/sn/kernel/sn2/Makefile @@ -0,0 +1,15 @@ +# arch/ia64/sn/kernel/sn2/Makefile +# +# This file is subject to the terms and conditions of the GNU General Public +# License. See the file "COPYING" in the main directory of this archive +# for more details. +# +# Copyright (C) 1999,2001-2002 Silicon Graphics, Inc. All rights reserved. +# +# sn2 specific kernel files +# + +ccflags-y := -Iarch/ia64/sn/include + +obj-y += cache.o io.o ptc_deadlock.o sn2_smp.o sn_proc_fs.o \ + prominfo_proc.o timer.o timer_interrupt.o sn_hwperf.o diff --git a/arch/ia64/sn/kernel/sn2/cache.c b/arch/ia64/sn/kernel/sn2/cache.c new file mode 100644 index 00000000..2862cb33 --- /dev/null +++ b/arch/ia64/sn/kernel/sn2/cache.c @@ -0,0 +1,41 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2001-2003, 2006 Silicon Graphics, Inc. All rights reserved. + * + */ +#include <linux/module.h> +#include <asm/pgalloc.h> +#include <asm/sn/arch.h> + +/** + * sn_flush_all_caches - flush a range of address from all caches (incl. L4) + * @flush_addr: identity mapped region 7 address to start flushing + * @bytes: number of bytes to flush + * + * Flush a range of addresses from all caches including L4. + * All addresses fully or partially contained within + * @flush_addr to @flush_addr + @bytes are flushed + * from all caches. + */ +void +sn_flush_all_caches(long flush_addr, long bytes) +{ + unsigned long addr = flush_addr; + + /* SHub1 requires a cached address */ + if (is_shub1() && (addr & RGN_BITS) == RGN_BASE(RGN_UNCACHED)) + addr = (addr - RGN_BASE(RGN_UNCACHED)) + RGN_BASE(RGN_KERNEL); + + flush_icache_range(addr, addr + bytes); + /* + * The last call may have returned before the caches + * were actually flushed, so we call it again to make + * sure. + */ + flush_icache_range(addr, addr + bytes); + mb(); +} +EXPORT_SYMBOL(sn_flush_all_caches); diff --git a/arch/ia64/sn/kernel/sn2/io.c b/arch/ia64/sn/kernel/sn2/io.c new file mode 100644 index 00000000..a12c0586 --- /dev/null +++ b/arch/ia64/sn/kernel/sn2/io.c @@ -0,0 +1,101 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2003 Silicon Graphics, Inc. All rights reserved. + * + * The generic kernel requires function pointers to these routines, so + * we wrap the inlines from asm/ia64/sn/sn2/io.h here. + */ + +#include <asm/sn/io.h> + +#ifdef CONFIG_IA64_GENERIC + +#undef __sn_inb +#undef __sn_inw +#undef __sn_inl +#undef __sn_outb +#undef __sn_outw +#undef __sn_outl +#undef __sn_readb +#undef __sn_readw +#undef __sn_readl +#undef __sn_readq +#undef __sn_readb_relaxed +#undef __sn_readw_relaxed +#undef __sn_readl_relaxed +#undef __sn_readq_relaxed + +unsigned int __sn_inb(unsigned long port) +{ + return ___sn_inb(port); +} + +unsigned int __sn_inw(unsigned long port) +{ + return ___sn_inw(port); +} + +unsigned int __sn_inl(unsigned long port) +{ + return ___sn_inl(port); +} + +void __sn_outb(unsigned char val, unsigned long port) +{ + ___sn_outb(val, port); +} + +void __sn_outw(unsigned short val, unsigned long port) +{ + ___sn_outw(val, port); +} + +void __sn_outl(unsigned int val, unsigned long port) +{ + ___sn_outl(val, port); +} + +unsigned char __sn_readb(void __iomem *addr) +{ + return ___sn_readb(addr); +} + +unsigned short __sn_readw(void __iomem *addr) +{ + return ___sn_readw(addr); +} + +unsigned int __sn_readl(void __iomem *addr) +{ + return ___sn_readl(addr); +} + +unsigned long __sn_readq(void __iomem *addr) +{ + return ___sn_readq(addr); +} + +unsigned char __sn_readb_relaxed(void __iomem *addr) +{ + return ___sn_readb_relaxed(addr); +} + +unsigned short __sn_readw_relaxed(void __iomem *addr) +{ + return ___sn_readw_relaxed(addr); +} + +unsigned int __sn_readl_relaxed(void __iomem *addr) +{ + return ___sn_readl_relaxed(addr); +} + +unsigned long __sn_readq_relaxed(void __iomem *addr) +{ + return ___sn_readq_relaxed(addr); +} + +#endif diff --git a/arch/ia64/sn/kernel/sn2/prominfo_proc.c b/arch/ia64/sn/kernel/sn2/prominfo_proc.c new file mode 100644 index 00000000..20b88cb1 --- /dev/null +++ b/arch/ia64/sn/kernel/sn2/prominfo_proc.c @@ -0,0 +1,273 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1999,2001-2004, 2006 Silicon Graphics, Inc. All Rights Reserved. + * + * Module to export the system's Firmware Interface Tables, including + * PROM revision numbers and banners, in /proc + */ +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/proc_fs.h> +#include <linux/nodemask.h> +#include <asm/io.h> +#include <asm/sn/sn_sal.h> +#include <asm/sn/sn_cpuid.h> +#include <asm/sn/addrs.h> + +MODULE_DESCRIPTION("PROM version reporting for /proc"); +MODULE_AUTHOR("Chad Talbott"); +MODULE_LICENSE("GPL"); + +/* Standard Intel FIT entry types */ +#define FIT_ENTRY_FIT_HEADER 0x00 /* FIT header entry */ +#define FIT_ENTRY_PAL_B 0x01 /* PAL_B entry */ +/* Entries 0x02 through 0x0D reserved by Intel */ +#define FIT_ENTRY_PAL_A_PROC 0x0E /* Processor-specific PAL_A entry */ +#define FIT_ENTRY_PAL_A 0x0F /* PAL_A entry, same as... */ +#define FIT_ENTRY_PAL_A_GEN 0x0F /* ...Generic PAL_A entry */ +#define FIT_ENTRY_UNUSED 0x7F /* Unused (reserved by Intel?) */ +/* OEM-defined entries range from 0x10 to 0x7E. */ +#define FIT_ENTRY_SAL_A 0x10 /* SAL_A entry */ +#define FIT_ENTRY_SAL_B 0x11 /* SAL_B entry */ +#define FIT_ENTRY_SALRUNTIME 0x12 /* SAL runtime entry */ +#define FIT_ENTRY_EFI 0x1F /* EFI entry */ +#define FIT_ENTRY_FPSWA 0x20 /* embedded fpswa entry */ +#define FIT_ENTRY_VMLINUX 0x21 /* embedded vmlinux entry */ + +#define FIT_MAJOR_SHIFT (32 + 8) +#define FIT_MAJOR_MASK ((1 << 8) - 1) +#define FIT_MINOR_SHIFT 32 +#define FIT_MINOR_MASK ((1 << 8) - 1) + +#define FIT_MAJOR(q) \ + ((unsigned) ((q) >> FIT_MAJOR_SHIFT) & FIT_MAJOR_MASK) +#define FIT_MINOR(q) \ + ((unsigned) ((q) >> FIT_MINOR_SHIFT) & FIT_MINOR_MASK) + +#define FIT_TYPE_SHIFT (32 + 16) +#define FIT_TYPE_MASK ((1 << 7) - 1) + +#define FIT_TYPE(q) \ + ((unsigned) ((q) >> FIT_TYPE_SHIFT) & FIT_TYPE_MASK) + +struct fit_type_map_t { + unsigned char type; + const char *name; +}; + +static const struct fit_type_map_t fit_entry_types[] = { + {FIT_ENTRY_FIT_HEADER, "FIT Header"}, + {FIT_ENTRY_PAL_A_GEN, "Generic PAL_A"}, + {FIT_ENTRY_PAL_A_PROC, "Processor-specific PAL_A"}, + {FIT_ENTRY_PAL_A, "PAL_A"}, + {FIT_ENTRY_PAL_B, "PAL_B"}, + {FIT_ENTRY_SAL_A, "SAL_A"}, + {FIT_ENTRY_SAL_B, "SAL_B"}, + {FIT_ENTRY_SALRUNTIME, "SAL runtime"}, + {FIT_ENTRY_EFI, "EFI"}, + {FIT_ENTRY_VMLINUX, "Embedded Linux"}, + {FIT_ENTRY_FPSWA, "Embedded FPSWA"}, + {FIT_ENTRY_UNUSED, "Unused"}, + {0xff, "Error"}, +}; + +static const char *fit_type_name(unsigned char type) +{ + struct fit_type_map_t const *mapp; + + for (mapp = fit_entry_types; mapp->type != 0xff; mapp++) + if (type == mapp->type) + return mapp->name; + + if ((type > FIT_ENTRY_PAL_A) && (type < FIT_ENTRY_UNUSED)) + return "OEM type"; + if ((type > FIT_ENTRY_PAL_B) && (type < FIT_ENTRY_PAL_A)) + return "Reserved"; + + return "Unknown type"; +} + +static int +get_fit_entry(unsigned long nasid, int index, unsigned long *fentry, + char *banner, int banlen) +{ + return ia64_sn_get_fit_compt(nasid, index, fentry, banner, banlen); +} + + +/* + * These two routines display the FIT table for each node. + */ +static int dump_fit_entry(char *page, unsigned long *fentry) +{ + unsigned type; + + type = FIT_TYPE(fentry[1]); + return sprintf(page, "%02x %-25s %x.%02x %016lx %u\n", + type, + fit_type_name(type), + FIT_MAJOR(fentry[1]), FIT_MINOR(fentry[1]), + fentry[0], + /* mult by sixteen to get size in bytes */ + (unsigned)(fentry[1] & 0xffffff) * 16); +} + + +/* + * We assume that the fit table will be small enough that we can print + * the whole thing into one page. (This is true for our default 16kB + * pages -- each entry is about 60 chars wide when printed.) I read + * somewhere that the maximum size of the FIT is 128 entries, so we're + * OK except for 4kB pages (and no one is going to do that on SN + * anyway). + */ +static int +dump_fit(char *page, unsigned long nasid) +{ + unsigned long fentry[2]; + int index; + char *p; + + p = page; + for (index=0;;index++) { + BUG_ON(index * 60 > PAGE_SIZE); + if (get_fit_entry(nasid, index, fentry, NULL, 0)) + break; + p += dump_fit_entry(p, fentry); + } + + return p - page; +} + +static int +dump_version(char *page, unsigned long nasid) +{ + unsigned long fentry[2]; + char banner[128]; + int index; + int len; + + for (index = 0; ; index++) { + if (get_fit_entry(nasid, index, fentry, banner, + sizeof(banner))) + return 0; + if (FIT_TYPE(fentry[1]) == FIT_ENTRY_SAL_A) + break; + } + + len = sprintf(page, "%x.%02x\n", FIT_MAJOR(fentry[1]), + FIT_MINOR(fentry[1])); + page += len; + + if (banner[0]) + len += snprintf(page, PAGE_SIZE-len, "%s\n", banner); + + return len; +} + +/* same as in proc_misc.c */ +static int +proc_calc_metrics(char *page, char **start, off_t off, int count, int *eof, + int len) +{ + if (len <= off + count) + *eof = 1; + *start = page + off; + len -= off; + if (len > count) + len = count; + if (len < 0) + len = 0; + return len; +} + +static int +read_version_entry(char *page, char **start, off_t off, int count, int *eof, + void *data) +{ + int len; + + /* data holds the NASID of the node */ + len = dump_version(page, (unsigned long)data); + len = proc_calc_metrics(page, start, off, count, eof, len); + return len; +} + +static int +read_fit_entry(char *page, char **start, off_t off, int count, int *eof, + void *data) +{ + int len; + + /* data holds the NASID of the node */ + len = dump_fit(page, (unsigned long)data); + len = proc_calc_metrics(page, start, off, count, eof, len); + + return len; +} + +/* module entry points */ +int __init prominfo_init(void); +void __exit prominfo_exit(void); + +module_init(prominfo_init); +module_exit(prominfo_exit); + +static struct proc_dir_entry **proc_entries; +static struct proc_dir_entry *sgi_prominfo_entry; + +#define NODE_NAME_LEN 11 + +int __init prominfo_init(void) +{ + struct proc_dir_entry **entp; + cnodeid_t cnodeid; + unsigned long nasid; + int size; + char name[NODE_NAME_LEN]; + + if (!ia64_platform_is("sn2")) + return 0; + + size = num_online_nodes() * sizeof(struct proc_dir_entry *); + proc_entries = kzalloc(size, GFP_KERNEL); + if (!proc_entries) + return -ENOMEM; + + sgi_prominfo_entry = proc_mkdir("sgi_prominfo", NULL); + + entp = proc_entries; + for_each_online_node(cnodeid) { + sprintf(name, "node%d", cnodeid); + *entp = proc_mkdir(name, sgi_prominfo_entry); + nasid = cnodeid_to_nasid(cnodeid); + create_proc_read_entry("fit", 0, *entp, read_fit_entry, + (void *)nasid); + create_proc_read_entry("version", 0, *entp, + read_version_entry, (void *)nasid); + entp++; + } + + return 0; +} + +void __exit prominfo_exit(void) +{ + struct proc_dir_entry **entp; + unsigned int cnodeid; + char name[NODE_NAME_LEN]; + + entp = proc_entries; + for_each_online_node(cnodeid) { + remove_proc_entry("fit", *entp); + remove_proc_entry("version", *entp); + sprintf(name, "node%d", cnodeid); + remove_proc_entry(name, sgi_prominfo_entry); + entp++; + } + remove_proc_entry("sgi_prominfo", NULL); + kfree(proc_entries); +} diff --git a/arch/ia64/sn/kernel/sn2/ptc_deadlock.S b/arch/ia64/sn/kernel/sn2/ptc_deadlock.S new file mode 100644 index 00000000..bebbcc4f --- /dev/null +++ b/arch/ia64/sn/kernel/sn2/ptc_deadlock.S @@ -0,0 +1,92 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2000-2005 Silicon Graphics, Inc. All rights reserved. + */ + +#include <asm/types.h> +#include <asm/sn/shub_mmr.h> + +#define DEADLOCKBIT SH_PIO_WRITE_STATUS_WRITE_DEADLOCK_SHFT +#define WRITECOUNTMASK SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_MASK +#define ALIAS_OFFSET 8 + + + .global sn2_ptc_deadlock_recovery_core + .proc sn2_ptc_deadlock_recovery_core + +sn2_ptc_deadlock_recovery_core: + .regstk 6,0,0,0 + + ptc0 = in0 + data0 = in1 + ptc1 = in2 + data1 = in3 + piowc = in4 + zeroval = in5 + piowcphy = r30 + psrsave = r2 + scr1 = r16 + scr2 = r17 + mask = r18 + + + extr.u piowcphy=piowc,0,61;; // Convert piowc to uncached physical address + dep piowcphy=-1,piowcphy,63,1 + movl mask=WRITECOUNTMASK + mov r8=r0 + +1: + cmp.ne p8,p9=r0,ptc1 // Test for shub type (ptc1 non-null on shub1) + // p8 = 1 if shub1, p9 = 1 if shub2 + + add scr2=ALIAS_OFFSET,piowc // Address of WRITE_STATUS alias register + mov scr1=7;; // Clear DEADLOCK, WRITE_ERROR, MULTI_WRITE_ERROR +(p8) st8.rel [scr2]=scr1;; +(p9) ld8.acq scr1=[scr2];; + +5: ld8.acq scr1=[piowc];; // Wait for PIOs to complete. + hint @pause + and scr2=scr1,mask;; // mask of writecount bits + cmp.ne p6,p0=zeroval,scr2 +(p6) br.cond.sptk 5b + + + + ////////////// BEGIN PHYSICAL MODE //////////////////// + mov psrsave=psr // Disable IC (no PMIs) + rsm psr.i | psr.dt | psr.ic;; + srlz.i;; + + st8.rel [ptc0]=data0 // Write PTC0 & wait for completion. + +5: ld8.acq scr1=[piowcphy];; // Wait for PIOs to complete. + hint @pause + and scr2=scr1,mask;; // mask of writecount bits + cmp.ne p6,p0=zeroval,scr2 +(p6) br.cond.sptk 5b;; + + tbit.nz p8,p7=scr1,DEADLOCKBIT;;// Test for DEADLOCK +(p7) cmp.ne p7,p0=r0,ptc1;; // Test for non-null ptc1 + +(p7) st8.rel [ptc1]=data1;; // Now write PTC1. + +5: ld8.acq scr1=[piowcphy];; // Wait for PIOs to complete. + hint @pause + and scr2=scr1,mask;; // mask of writecount bits + cmp.ne p6,p0=zeroval,scr2 +(p6) br.cond.sptk 5b + + tbit.nz p8,p0=scr1,DEADLOCKBIT;;// Test for DEADLOCK + + mov psr.l=psrsave;; // Reenable IC + srlz.i;; + ////////////// END PHYSICAL MODE //////////////////// + +(p8) add r8=1,r8 +(p8) br.cond.spnt 1b;; // Repeat if DEADLOCK occurred. + + br.ret.sptk rp + .endp sn2_ptc_deadlock_recovery_core diff --git a/arch/ia64/sn/kernel/sn2/sn2_smp.c b/arch/ia64/sn/kernel/sn2/sn2_smp.c new file mode 100644 index 00000000..68c84541 --- /dev/null +++ b/arch/ia64/sn/kernel/sn2/sn2_smp.c @@ -0,0 +1,572 @@ +/* + * SN2 Platform specific SMP Support + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2000-2006 Silicon Graphics, Inc. All rights reserved. + */ + +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/spinlock.h> +#include <linux/threads.h> +#include <linux/sched.h> +#include <linux/smp.h> +#include <linux/interrupt.h> +#include <linux/irq.h> +#include <linux/mmzone.h> +#include <linux/module.h> +#include <linux/bitops.h> +#include <linux/nodemask.h> +#include <linux/proc_fs.h> +#include <linux/seq_file.h> + +#include <asm/processor.h> +#include <asm/irq.h> +#include <asm/sal.h> +#include <asm/delay.h> +#include <asm/io.h> +#include <asm/smp.h> +#include <asm/tlb.h> +#include <asm/numa.h> +#include <asm/hw_irq.h> +#include <asm/current.h> +#include <asm/sn/sn_cpuid.h> +#include <asm/sn/sn_sal.h> +#include <asm/sn/addrs.h> +#include <asm/sn/shub_mmr.h> +#include <asm/sn/nodepda.h> +#include <asm/sn/rw_mmr.h> +#include <asm/sn/sn_feature_sets.h> + +DEFINE_PER_CPU(struct ptc_stats, ptcstats); +DECLARE_PER_CPU(struct ptc_stats, ptcstats); + +static __cacheline_aligned DEFINE_SPINLOCK(sn2_global_ptc_lock); + +/* 0 = old algorithm (no IPI flushes), 1 = ipi deadlock flush, 2 = ipi instead of SHUB ptc, >2 = always ipi */ +static int sn2_flush_opt = 0; + +extern unsigned long +sn2_ptc_deadlock_recovery_core(volatile unsigned long *, unsigned long, + volatile unsigned long *, unsigned long, + volatile unsigned long *, unsigned long); +void +sn2_ptc_deadlock_recovery(short *, short, short, int, + volatile unsigned long *, unsigned long, + volatile unsigned long *, unsigned long); + +/* + * Note: some is the following is captured here to make degugging easier + * (the macros make more sense if you see the debug patch - not posted) + */ +#define sn2_ptctest 0 +#define local_node_uses_ptc_ga(sh1) ((sh1) ? 1 : 0) +#define max_active_pio(sh1) ((sh1) ? 32 : 7) +#define reset_max_active_on_deadlock() 1 +#define PTC_LOCK(sh1) ((sh1) ? &sn2_global_ptc_lock : &sn_nodepda->ptc_lock) + +struct ptc_stats { + unsigned long ptc_l; + unsigned long change_rid; + unsigned long shub_ptc_flushes; + unsigned long nodes_flushed; + unsigned long deadlocks; + unsigned long deadlocks2; + unsigned long lock_itc_clocks; + unsigned long shub_itc_clocks; + unsigned long shub_itc_clocks_max; + unsigned long shub_ptc_flushes_not_my_mm; + unsigned long shub_ipi_flushes; + unsigned long shub_ipi_flushes_itc_clocks; +}; + +#define sn2_ptctest 0 + +static inline unsigned long wait_piowc(void) +{ + volatile unsigned long *piows; + unsigned long zeroval, ws; + + piows = pda->pio_write_status_addr; + zeroval = pda->pio_write_status_val; + do { + cpu_relax(); + } while (((ws = *piows) & SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_MASK) != zeroval); + return (ws & SH_PIO_WRITE_STATUS_WRITE_DEADLOCK_MASK) != 0; +} + +/** + * sn_migrate - SN-specific task migration actions + * @task: Task being migrated to new CPU + * + * SN2 PIO writes from separate CPUs are not guaranteed to arrive in order. + * Context switching user threads which have memory-mapped MMIO may cause + * PIOs to issue from separate CPUs, thus the PIO writes must be drained + * from the previous CPU's Shub before execution resumes on the new CPU. + */ +void sn_migrate(struct task_struct *task) +{ + pda_t *last_pda = pdacpu(task_thread_info(task)->last_cpu); + volatile unsigned long *adr = last_pda->pio_write_status_addr; + unsigned long val = last_pda->pio_write_status_val; + + /* Drain PIO writes from old CPU's Shub */ + while (unlikely((*adr & SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_MASK) + != val)) + cpu_relax(); +} + +void sn_tlb_migrate_finish(struct mm_struct *mm) +{ + /* flush_tlb_mm is inefficient if more than 1 users of mm */ + if (mm == current->mm && mm && atomic_read(&mm->mm_users) == 1) + flush_tlb_mm(mm); +} + +static void +sn2_ipi_flush_all_tlb(struct mm_struct *mm) +{ + unsigned long itc; + + itc = ia64_get_itc(); + smp_flush_tlb_cpumask(*mm_cpumask(mm)); + itc = ia64_get_itc() - itc; + __get_cpu_var(ptcstats).shub_ipi_flushes_itc_clocks += itc; + __get_cpu_var(ptcstats).shub_ipi_flushes++; +} + +/** + * sn2_global_tlb_purge - globally purge translation cache of virtual address range + * @mm: mm_struct containing virtual address range + * @start: start of virtual address range + * @end: end of virtual address range + * @nbits: specifies number of bytes to purge per instruction (num = 1<<(nbits & 0xfc)) + * + * Purges the translation caches of all processors of the given virtual address + * range. + * + * Note: + * - cpu_vm_mask is a bit mask that indicates which cpus have loaded the context. + * - cpu_vm_mask is converted into a nodemask of the nodes containing the + * cpus in cpu_vm_mask. + * - if only one bit is set in cpu_vm_mask & it is the current cpu & the + * process is purging its own virtual address range, then only the + * local TLB needs to be flushed. This flushing can be done using + * ptc.l. This is the common case & avoids the global spinlock. + * - if multiple cpus have loaded the context, then flushing has to be + * done with ptc.g/MMRs under protection of the global ptc_lock. + */ + +void +sn2_global_tlb_purge(struct mm_struct *mm, unsigned long start, + unsigned long end, unsigned long nbits) +{ + int i, ibegin, shub1, cnode, mynasid, cpu, lcpu = 0, nasid; + int mymm = (mm == current->active_mm && mm == current->mm); + int use_cpu_ptcga; + volatile unsigned long *ptc0, *ptc1; + unsigned long itc, itc2, flags, data0 = 0, data1 = 0, rr_value, old_rr = 0; + short nasids[MAX_NUMNODES], nix; + nodemask_t nodes_flushed; + int active, max_active, deadlock, flush_opt = sn2_flush_opt; + + if (flush_opt > 2) { + sn2_ipi_flush_all_tlb(mm); + return; + } + + nodes_clear(nodes_flushed); + i = 0; + + for_each_cpu(cpu, mm_cpumask(mm)) { + cnode = cpu_to_node(cpu); + node_set(cnode, nodes_flushed); + lcpu = cpu; + i++; + } + + if (i == 0) + return; + + preempt_disable(); + + if (likely(i == 1 && lcpu == smp_processor_id() && mymm)) { + do { + ia64_ptcl(start, nbits << 2); + start += (1UL << nbits); + } while (start < end); + ia64_srlz_i(); + __get_cpu_var(ptcstats).ptc_l++; + preempt_enable(); + return; + } + + if (atomic_read(&mm->mm_users) == 1 && mymm) { + flush_tlb_mm(mm); + __get_cpu_var(ptcstats).change_rid++; + preempt_enable(); + return; + } + + if (flush_opt == 2) { + sn2_ipi_flush_all_tlb(mm); + preempt_enable(); + return; + } + + itc = ia64_get_itc(); + nix = 0; + for_each_node_mask(cnode, nodes_flushed) + nasids[nix++] = cnodeid_to_nasid(cnode); + + rr_value = (mm->context << 3) | REGION_NUMBER(start); + + shub1 = is_shub1(); + if (shub1) { + data0 = (1UL << SH1_PTC_0_A_SHFT) | + (nbits << SH1_PTC_0_PS_SHFT) | + (rr_value << SH1_PTC_0_RID_SHFT) | + (1UL << SH1_PTC_0_START_SHFT); + ptc0 = (long *)GLOBAL_MMR_PHYS_ADDR(0, SH1_PTC_0); + ptc1 = (long *)GLOBAL_MMR_PHYS_ADDR(0, SH1_PTC_1); + } else { + data0 = (1UL << SH2_PTC_A_SHFT) | + (nbits << SH2_PTC_PS_SHFT) | + (1UL << SH2_PTC_START_SHFT); + ptc0 = (long *)GLOBAL_MMR_PHYS_ADDR(0, SH2_PTC + + (rr_value << SH2_PTC_RID_SHFT)); + ptc1 = NULL; + } + + + mynasid = get_nasid(); + use_cpu_ptcga = local_node_uses_ptc_ga(shub1); + max_active = max_active_pio(shub1); + + itc = ia64_get_itc(); + spin_lock_irqsave(PTC_LOCK(shub1), flags); + itc2 = ia64_get_itc(); + + __get_cpu_var(ptcstats).lock_itc_clocks += itc2 - itc; + __get_cpu_var(ptcstats).shub_ptc_flushes++; + __get_cpu_var(ptcstats).nodes_flushed += nix; + if (!mymm) + __get_cpu_var(ptcstats).shub_ptc_flushes_not_my_mm++; + + if (use_cpu_ptcga && !mymm) { + old_rr = ia64_get_rr(start); + ia64_set_rr(start, (old_rr & 0xff) | (rr_value << 8)); + ia64_srlz_d(); + } + + wait_piowc(); + do { + if (shub1) + data1 = start | (1UL << SH1_PTC_1_START_SHFT); + else + data0 = (data0 & ~SH2_PTC_ADDR_MASK) | (start & SH2_PTC_ADDR_MASK); + deadlock = 0; + active = 0; + for (ibegin = 0, i = 0; i < nix; i++) { + nasid = nasids[i]; + if (use_cpu_ptcga && unlikely(nasid == mynasid)) { + ia64_ptcga(start, nbits << 2); + ia64_srlz_i(); + } else { + ptc0 = CHANGE_NASID(nasid, ptc0); + if (ptc1) + ptc1 = CHANGE_NASID(nasid, ptc1); + pio_atomic_phys_write_mmrs(ptc0, data0, ptc1, data1); + active++; + } + if (active >= max_active || i == (nix - 1)) { + if ((deadlock = wait_piowc())) { + if (flush_opt == 1) + goto done; + sn2_ptc_deadlock_recovery(nasids, ibegin, i, mynasid, ptc0, data0, ptc1, data1); + if (reset_max_active_on_deadlock()) + max_active = 1; + } + active = 0; + ibegin = i + 1; + } + } + start += (1UL << nbits); + } while (start < end); + +done: + itc2 = ia64_get_itc() - itc2; + __get_cpu_var(ptcstats).shub_itc_clocks += itc2; + if (itc2 > __get_cpu_var(ptcstats).shub_itc_clocks_max) + __get_cpu_var(ptcstats).shub_itc_clocks_max = itc2; + + if (old_rr) { + ia64_set_rr(start, old_rr); + ia64_srlz_d(); + } + + spin_unlock_irqrestore(PTC_LOCK(shub1), flags); + + if (flush_opt == 1 && deadlock) { + __get_cpu_var(ptcstats).deadlocks++; + sn2_ipi_flush_all_tlb(mm); + } + + preempt_enable(); +} + +/* + * sn2_ptc_deadlock_recovery + * + * Recover from PTC deadlocks conditions. Recovery requires stepping thru each + * TLB flush transaction. The recovery sequence is somewhat tricky & is + * coded in assembly language. + */ + +void +sn2_ptc_deadlock_recovery(short *nasids, short ib, short ie, int mynasid, + volatile unsigned long *ptc0, unsigned long data0, + volatile unsigned long *ptc1, unsigned long data1) +{ + short nasid, i; + unsigned long *piows, zeroval, n; + + __get_cpu_var(ptcstats).deadlocks++; + + piows = (unsigned long *) pda->pio_write_status_addr; + zeroval = pda->pio_write_status_val; + + + for (i=ib; i <= ie; i++) { + nasid = nasids[i]; + if (local_node_uses_ptc_ga(is_shub1()) && nasid == mynasid) + continue; + ptc0 = CHANGE_NASID(nasid, ptc0); + if (ptc1) + ptc1 = CHANGE_NASID(nasid, ptc1); + + n = sn2_ptc_deadlock_recovery_core(ptc0, data0, ptc1, data1, piows, zeroval); + __get_cpu_var(ptcstats).deadlocks2 += n; + } + +} + +/** + * sn_send_IPI_phys - send an IPI to a Nasid and slice + * @nasid: nasid to receive the interrupt (may be outside partition) + * @physid: physical cpuid to receive the interrupt. + * @vector: command to send + * @delivery_mode: delivery mechanism + * + * Sends an IPI (interprocessor interrupt) to the processor specified by + * @physid + * + * @delivery_mode can be one of the following + * + * %IA64_IPI_DM_INT - pend an interrupt + * %IA64_IPI_DM_PMI - pend a PMI + * %IA64_IPI_DM_NMI - pend an NMI + * %IA64_IPI_DM_INIT - pend an INIT interrupt + */ +void sn_send_IPI_phys(int nasid, long physid, int vector, int delivery_mode) +{ + long val; + unsigned long flags = 0; + volatile long *p; + + p = (long *)GLOBAL_MMR_PHYS_ADDR(nasid, SH_IPI_INT); + val = (1UL << SH_IPI_INT_SEND_SHFT) | + (physid << SH_IPI_INT_PID_SHFT) | + ((long)delivery_mode << SH_IPI_INT_TYPE_SHFT) | + ((long)vector << SH_IPI_INT_IDX_SHFT) | + (0x000feeUL << SH_IPI_INT_BASE_SHFT); + + mb(); + if (enable_shub_wars_1_1()) { + spin_lock_irqsave(&sn2_global_ptc_lock, flags); + } + pio_phys_write_mmr(p, val); + if (enable_shub_wars_1_1()) { + wait_piowc(); + spin_unlock_irqrestore(&sn2_global_ptc_lock, flags); + } + +} + +EXPORT_SYMBOL(sn_send_IPI_phys); + +/** + * sn2_send_IPI - send an IPI to a processor + * @cpuid: target of the IPI + * @vector: command to send + * @delivery_mode: delivery mechanism + * @redirect: redirect the IPI? + * + * Sends an IPI (InterProcessor Interrupt) to the processor specified by + * @cpuid. @vector specifies the command to send, while @delivery_mode can + * be one of the following + * + * %IA64_IPI_DM_INT - pend an interrupt + * %IA64_IPI_DM_PMI - pend a PMI + * %IA64_IPI_DM_NMI - pend an NMI + * %IA64_IPI_DM_INIT - pend an INIT interrupt + */ +void sn2_send_IPI(int cpuid, int vector, int delivery_mode, int redirect) +{ + long physid; + int nasid; + + physid = cpu_physical_id(cpuid); + nasid = cpuid_to_nasid(cpuid); + + /* the following is used only when starting cpus at boot time */ + if (unlikely(nasid == -1)) + ia64_sn_get_sapic_info(physid, &nasid, NULL, NULL); + + sn_send_IPI_phys(nasid, physid, vector, delivery_mode); +} + +#ifdef CONFIG_HOTPLUG_CPU +/** + * sn_cpu_disable_allowed - Determine if a CPU can be disabled. + * @cpu - CPU that is requested to be disabled. + * + * CPU disable is only allowed on SHub2 systems running with a PROM + * that supports CPU disable. It is not permitted to disable the boot processor. + */ +bool sn_cpu_disable_allowed(int cpu) +{ + if (is_shub2() && sn_prom_feature_available(PRF_CPU_DISABLE_SUPPORT)) { + if (cpu != 0) + return true; + else + printk(KERN_WARNING + "Disabling the boot processor is not allowed.\n"); + + } else + printk(KERN_WARNING + "CPU disable is not supported on this system.\n"); + + return false; +} +#endif /* CONFIG_HOTPLUG_CPU */ + +#ifdef CONFIG_PROC_FS + +#define PTC_BASENAME "sgi_sn/ptc_statistics" + +static void *sn2_ptc_seq_start(struct seq_file *file, loff_t * offset) +{ + if (*offset < nr_cpu_ids) + return offset; + return NULL; +} + +static void *sn2_ptc_seq_next(struct seq_file *file, void *data, loff_t * offset) +{ + (*offset)++; + if (*offset < nr_cpu_ids) + return offset; + return NULL; +} + +static void sn2_ptc_seq_stop(struct seq_file *file, void *data) +{ +} + +static int sn2_ptc_seq_show(struct seq_file *file, void *data) +{ + struct ptc_stats *stat; + int cpu; + + cpu = *(loff_t *) data; + + if (!cpu) { + seq_printf(file, + "# cpu ptc_l newrid ptc_flushes nodes_flushed deadlocks lock_nsec shub_nsec shub_nsec_max not_my_mm deadlock2 ipi_fluches ipi_nsec\n"); + seq_printf(file, "# ptctest %d, flushopt %d\n", sn2_ptctest, sn2_flush_opt); + } + + if (cpu < nr_cpu_ids && cpu_online(cpu)) { + stat = &per_cpu(ptcstats, cpu); + seq_printf(file, "cpu %d %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld\n", cpu, stat->ptc_l, + stat->change_rid, stat->shub_ptc_flushes, stat->nodes_flushed, + stat->deadlocks, + 1000 * stat->lock_itc_clocks / per_cpu(ia64_cpu_info, cpu).cyc_per_usec, + 1000 * stat->shub_itc_clocks / per_cpu(ia64_cpu_info, cpu).cyc_per_usec, + 1000 * stat->shub_itc_clocks_max / per_cpu(ia64_cpu_info, cpu).cyc_per_usec, + stat->shub_ptc_flushes_not_my_mm, + stat->deadlocks2, + stat->shub_ipi_flushes, + 1000 * stat->shub_ipi_flushes_itc_clocks / per_cpu(ia64_cpu_info, cpu).cyc_per_usec); + } + return 0; +} + +static ssize_t sn2_ptc_proc_write(struct file *file, const char __user *user, size_t count, loff_t *data) +{ + int cpu; + char optstr[64]; + + if (count == 0 || count > sizeof(optstr)) + return -EINVAL; + if (copy_from_user(optstr, user, count)) + return -EFAULT; + optstr[count - 1] = '\0'; + sn2_flush_opt = simple_strtoul(optstr, NULL, 0); + + for_each_online_cpu(cpu) + memset(&per_cpu(ptcstats, cpu), 0, sizeof(struct ptc_stats)); + + return count; +} + +static const struct seq_operations sn2_ptc_seq_ops = { + .start = sn2_ptc_seq_start, + .next = sn2_ptc_seq_next, + .stop = sn2_ptc_seq_stop, + .show = sn2_ptc_seq_show +}; + +static int sn2_ptc_proc_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &sn2_ptc_seq_ops); +} + +static const struct file_operations proc_sn2_ptc_operations = { + .open = sn2_ptc_proc_open, + .read = seq_read, + .write = sn2_ptc_proc_write, + .llseek = seq_lseek, + .release = seq_release, +}; + +static struct proc_dir_entry *proc_sn2_ptc; + +static int __init sn2_ptc_init(void) +{ + if (!ia64_platform_is("sn2")) + return 0; + + proc_sn2_ptc = proc_create(PTC_BASENAME, 0444, + NULL, &proc_sn2_ptc_operations); + if (!proc_sn2_ptc) { + printk(KERN_ERR "unable to create %s proc entry", PTC_BASENAME); + return -EINVAL; + } + spin_lock_init(&sn2_global_ptc_lock); + return 0; +} + +static void __exit sn2_ptc_exit(void) +{ + remove_proc_entry(PTC_BASENAME, NULL); +} + +module_init(sn2_ptc_init); +module_exit(sn2_ptc_exit); +#endif /* CONFIG_PROC_FS */ + diff --git a/arch/ia64/sn/kernel/sn2/sn_hwperf.c b/arch/ia64/sn/kernel/sn2/sn_hwperf.c new file mode 100644 index 00000000..4554f68b --- /dev/null +++ b/arch/ia64/sn/kernel/sn2/sn_hwperf.c @@ -0,0 +1,1003 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2004-2006 Silicon Graphics, Inc. All rights reserved. + * + * SGI Altix topology and hardware performance monitoring API. + * Mark Goodwin <markgw@sgi.com>. + * + * Creates /proc/sgi_sn/sn_topology (read-only) to export + * info about Altix nodes, routers, CPUs and NumaLink + * interconnection/topology. + * + * Also creates a dynamic misc device named "sn_hwperf" + * that supports an ioctl interface to call down into SAL + * to discover hw objects, topology and to read/write + * memory mapped registers, e.g. for performance monitoring. + * The "sn_hwperf" device is registered only after the procfs + * file is first opened, i.e. only if/when it's needed. + * + * This API is used by SGI Performance Co-Pilot and other + * tools, see http://oss.sgi.com/projects/pcp + */ + +#include <linux/fs.h> +#include <linux/slab.h> +#include <linux/export.h> +#include <linux/vmalloc.h> +#include <linux/seq_file.h> +#include <linux/miscdevice.h> +#include <linux/utsname.h> +#include <linux/cpumask.h> +#include <linux/nodemask.h> +#include <linux/smp.h> +#include <linux/mutex.h> + +#include <asm/processor.h> +#include <asm/topology.h> +#include <asm/uaccess.h> +#include <asm/sal.h> +#include <asm/sn/io.h> +#include <asm/sn/sn_sal.h> +#include <asm/sn/module.h> +#include <asm/sn/geo.h> +#include <asm/sn/sn2/sn_hwperf.h> +#include <asm/sn/addrs.h> + +static void *sn_hwperf_salheap = NULL; +static int sn_hwperf_obj_cnt = 0; +static nasid_t sn_hwperf_master_nasid = INVALID_NASID; +static int sn_hwperf_init(void); +static DEFINE_MUTEX(sn_hwperf_init_mutex); + +#define cnode_possible(n) ((n) < num_cnodes) + +static int sn_hwperf_enum_objects(int *nobj, struct sn_hwperf_object_info **ret) +{ + int e; + u64 sz; + struct sn_hwperf_object_info *objbuf = NULL; + + if ((e = sn_hwperf_init()) < 0) { + printk(KERN_ERR "sn_hwperf_init failed: err %d\n", e); + goto out; + } + + sz = sn_hwperf_obj_cnt * sizeof(struct sn_hwperf_object_info); + objbuf = vmalloc(sz); + if (objbuf == NULL) { + printk("sn_hwperf_enum_objects: vmalloc(%d) failed\n", (int)sz); + e = -ENOMEM; + goto out; + } + + e = ia64_sn_hwperf_op(sn_hwperf_master_nasid, SN_HWPERF_ENUM_OBJECTS, + 0, sz, (u64) objbuf, 0, 0, NULL); + if (e != SN_HWPERF_OP_OK) { + e = -EINVAL; + vfree(objbuf); + } + +out: + *nobj = sn_hwperf_obj_cnt; + *ret = objbuf; + return e; +} + +static int sn_hwperf_location_to_bpos(char *location, + int *rack, int *bay, int *slot, int *slab) +{ + char type; + + /* first scan for an old style geoid string */ + if (sscanf(location, "%03d%c%02d#%d", + rack, &type, bay, slab) == 4) + *slot = 0; + else /* scan for a new bladed geoid string */ + if (sscanf(location, "%03d%c%02d^%02d#%d", + rack, &type, bay, slot, slab) != 5) + return -1; + /* success */ + return 0; +} + +static int sn_hwperf_geoid_to_cnode(char *location) +{ + int cnode; + geoid_t geoid; + moduleid_t module_id; + int rack, bay, slot, slab; + int this_rack, this_bay, this_slot, this_slab; + + if (sn_hwperf_location_to_bpos(location, &rack, &bay, &slot, &slab)) + return -1; + + /* + * FIXME: replace with cleaner for_each_XXX macro which addresses + * both compute and IO nodes once ACPI3.0 is available. + */ + for (cnode = 0; cnode < num_cnodes; cnode++) { + geoid = cnodeid_get_geoid(cnode); + module_id = geo_module(geoid); + this_rack = MODULE_GET_RACK(module_id); + this_bay = MODULE_GET_BPOS(module_id); + this_slot = geo_slot(geoid); + this_slab = geo_slab(geoid); + if (rack == this_rack && bay == this_bay && + slot == this_slot && slab == this_slab) { + break; + } + } + + return cnode_possible(cnode) ? cnode : -1; +} + +static int sn_hwperf_obj_to_cnode(struct sn_hwperf_object_info * obj) +{ + if (!SN_HWPERF_IS_NODE(obj) && !SN_HWPERF_IS_IONODE(obj)) + BUG(); + if (SN_HWPERF_FOREIGN(obj)) + return -1; + return sn_hwperf_geoid_to_cnode(obj->location); +} + +static int sn_hwperf_generic_ordinal(struct sn_hwperf_object_info *obj, + struct sn_hwperf_object_info *objs) +{ + int ordinal; + struct sn_hwperf_object_info *p; + + for (ordinal=0, p=objs; p != obj; p++) { + if (SN_HWPERF_FOREIGN(p)) + continue; + if (SN_HWPERF_SAME_OBJTYPE(p, obj)) + ordinal++; + } + + return ordinal; +} + +static const char *slabname_node = "node"; /* SHub asic */ +static const char *slabname_ionode = "ionode"; /* TIO asic */ +static const char *slabname_router = "router"; /* NL3R or NL4R */ +static const char *slabname_other = "other"; /* unknown asic */ + +static const char *sn_hwperf_get_slabname(struct sn_hwperf_object_info *obj, + struct sn_hwperf_object_info *objs, int *ordinal) +{ + int isnode; + const char *slabname = slabname_other; + + if ((isnode = SN_HWPERF_IS_NODE(obj)) || SN_HWPERF_IS_IONODE(obj)) { + slabname = isnode ? slabname_node : slabname_ionode; + *ordinal = sn_hwperf_obj_to_cnode(obj); + } + else { + *ordinal = sn_hwperf_generic_ordinal(obj, objs); + if (SN_HWPERF_IS_ROUTER(obj)) + slabname = slabname_router; + } + + return slabname; +} + +static void print_pci_topology(struct seq_file *s) +{ + char *p; + size_t sz; + int e; + + for (sz = PAGE_SIZE; sz < 16 * PAGE_SIZE; sz += PAGE_SIZE) { + if (!(p = kmalloc(sz, GFP_KERNEL))) + break; + e = ia64_sn_ioif_get_pci_topology(__pa(p), sz); + if (e == SALRET_OK) + seq_puts(s, p); + kfree(p); + if (e == SALRET_OK || e == SALRET_NOT_IMPLEMENTED) + break; + } +} + +static inline int sn_hwperf_has_cpus(cnodeid_t node) +{ + return node < MAX_NUMNODES && node_online(node) && nr_cpus_node(node); +} + +static inline int sn_hwperf_has_mem(cnodeid_t node) +{ + return node < MAX_NUMNODES && node_online(node) && NODE_DATA(node)->node_present_pages; +} + +static struct sn_hwperf_object_info * +sn_hwperf_findobj_id(struct sn_hwperf_object_info *objbuf, + int nobj, int id) +{ + int i; + struct sn_hwperf_object_info *p = objbuf; + + for (i=0; i < nobj; i++, p++) { + if (p->id == id) + return p; + } + + return NULL; + +} + +static int sn_hwperf_get_nearest_node_objdata(struct sn_hwperf_object_info *objbuf, + int nobj, cnodeid_t node, cnodeid_t *near_mem_node, cnodeid_t *near_cpu_node) +{ + int e; + struct sn_hwperf_object_info *nodeobj = NULL; + struct sn_hwperf_object_info *op; + struct sn_hwperf_object_info *dest; + struct sn_hwperf_object_info *router; + struct sn_hwperf_port_info ptdata[16]; + int sz, i, j; + cnodeid_t c; + int found_mem = 0; + int found_cpu = 0; + + if (!cnode_possible(node)) + return -EINVAL; + + if (sn_hwperf_has_cpus(node)) { + if (near_cpu_node) + *near_cpu_node = node; + found_cpu++; + } + + if (sn_hwperf_has_mem(node)) { + if (near_mem_node) + *near_mem_node = node; + found_mem++; + } + + if (found_cpu && found_mem) + return 0; /* trivially successful */ + + /* find the argument node object */ + for (i=0, op=objbuf; i < nobj; i++, op++) { + if (!SN_HWPERF_IS_NODE(op) && !SN_HWPERF_IS_IONODE(op)) + continue; + if (node == sn_hwperf_obj_to_cnode(op)) { + nodeobj = op; + break; + } + } + if (!nodeobj) { + e = -ENOENT; + goto err; + } + + /* get it's interconnect topology */ + sz = op->ports * sizeof(struct sn_hwperf_port_info); + BUG_ON(sz > sizeof(ptdata)); + e = ia64_sn_hwperf_op(sn_hwperf_master_nasid, + SN_HWPERF_ENUM_PORTS, nodeobj->id, sz, + (u64)&ptdata, 0, 0, NULL); + if (e != SN_HWPERF_OP_OK) { + e = -EINVAL; + goto err; + } + + /* find nearest node with cpus and nearest memory */ + for (router=NULL, j=0; j < op->ports; j++) { + dest = sn_hwperf_findobj_id(objbuf, nobj, ptdata[j].conn_id); + if (dest && SN_HWPERF_IS_ROUTER(dest)) + router = dest; + if (!dest || SN_HWPERF_FOREIGN(dest) || + !SN_HWPERF_IS_NODE(dest) || SN_HWPERF_IS_IONODE(dest)) { + continue; + } + c = sn_hwperf_obj_to_cnode(dest); + if (!found_cpu && sn_hwperf_has_cpus(c)) { + if (near_cpu_node) + *near_cpu_node = c; + found_cpu++; + } + if (!found_mem && sn_hwperf_has_mem(c)) { + if (near_mem_node) + *near_mem_node = c; + found_mem++; + } + } + + if (router && (!found_cpu || !found_mem)) { + /* search for a node connected to the same router */ + sz = router->ports * sizeof(struct sn_hwperf_port_info); + BUG_ON(sz > sizeof(ptdata)); + e = ia64_sn_hwperf_op(sn_hwperf_master_nasid, + SN_HWPERF_ENUM_PORTS, router->id, sz, + (u64)&ptdata, 0, 0, NULL); + if (e != SN_HWPERF_OP_OK) { + e = -EINVAL; + goto err; + } + for (j=0; j < router->ports; j++) { + dest = sn_hwperf_findobj_id(objbuf, nobj, + ptdata[j].conn_id); + if (!dest || dest->id == node || + SN_HWPERF_FOREIGN(dest) || + !SN_HWPERF_IS_NODE(dest) || + SN_HWPERF_IS_IONODE(dest)) { + continue; + } + c = sn_hwperf_obj_to_cnode(dest); + if (!found_cpu && sn_hwperf_has_cpus(c)) { + if (near_cpu_node) + *near_cpu_node = c; + found_cpu++; + } + if (!found_mem && sn_hwperf_has_mem(c)) { + if (near_mem_node) + *near_mem_node = c; + found_mem++; + } + if (found_cpu && found_mem) + break; + } + } + + if (!found_cpu || !found_mem) { + /* resort to _any_ node with CPUs and memory */ + for (i=0, op=objbuf; i < nobj; i++, op++) { + if (SN_HWPERF_FOREIGN(op) || + SN_HWPERF_IS_IONODE(op) || + !SN_HWPERF_IS_NODE(op)) { + continue; + } + c = sn_hwperf_obj_to_cnode(op); + if (!found_cpu && sn_hwperf_has_cpus(c)) { + if (near_cpu_node) + *near_cpu_node = c; + found_cpu++; + } + if (!found_mem && sn_hwperf_has_mem(c)) { + if (near_mem_node) + *near_mem_node = c; + found_mem++; + } + if (found_cpu && found_mem) + break; + } + } + + if (!found_cpu || !found_mem) + e = -ENODATA; + +err: + return e; +} + + +static int sn_topology_show(struct seq_file *s, void *d) +{ + int sz; + int pt; + int e = 0; + int i; + int j; + const char *slabname; + int ordinal; + char slice; + struct cpuinfo_ia64 *c; + struct sn_hwperf_port_info *ptdata; + struct sn_hwperf_object_info *p; + struct sn_hwperf_object_info *obj = d; /* this object */ + struct sn_hwperf_object_info *objs = s->private; /* all objects */ + u8 shubtype; + u8 system_size; + u8 sharing_size; + u8 partid; + u8 coher; + u8 nasid_shift; + u8 region_size; + u16 nasid_mask; + int nasid_msb; + + if (obj == objs) { + seq_printf(s, "# sn_topology version 2\n"); + seq_printf(s, "# objtype ordinal location partition" + " [attribute value [, ...]]\n"); + + if (ia64_sn_get_sn_info(0, + &shubtype, &nasid_mask, &nasid_shift, &system_size, + &sharing_size, &partid, &coher, ®ion_size)) + BUG(); + for (nasid_msb=63; nasid_msb > 0; nasid_msb--) { + if (((u64)nasid_mask << nasid_shift) & (1ULL << nasid_msb)) + break; + } + seq_printf(s, "partition %u %s local " + "shubtype %s, " + "nasid_mask 0x%016llx, " + "nasid_bits %d:%d, " + "system_size %d, " + "sharing_size %d, " + "coherency_domain %d, " + "region_size %d\n", + + partid, utsname()->nodename, + shubtype ? "shub2" : "shub1", + (u64)nasid_mask << nasid_shift, nasid_msb, nasid_shift, + system_size, sharing_size, coher, region_size); + + print_pci_topology(s); + } + + if (SN_HWPERF_FOREIGN(obj)) { + /* private in another partition: not interesting */ + return 0; + } + + for (i = 0; i < SN_HWPERF_MAXSTRING && obj->name[i]; i++) { + if (obj->name[i] == ' ') + obj->name[i] = '_'; + } + + slabname = sn_hwperf_get_slabname(obj, objs, &ordinal); + seq_printf(s, "%s %d %s %s asic %s", slabname, ordinal, obj->location, + obj->sn_hwp_this_part ? "local" : "shared", obj->name); + + if (ordinal < 0 || (!SN_HWPERF_IS_NODE(obj) && !SN_HWPERF_IS_IONODE(obj))) + seq_putc(s, '\n'); + else { + cnodeid_t near_mem = -1; + cnodeid_t near_cpu = -1; + + seq_printf(s, ", nasid 0x%x", cnodeid_to_nasid(ordinal)); + + if (sn_hwperf_get_nearest_node_objdata(objs, sn_hwperf_obj_cnt, + ordinal, &near_mem, &near_cpu) == 0) { + seq_printf(s, ", near_mem_nodeid %d, near_cpu_nodeid %d", + near_mem, near_cpu); + } + + if (!SN_HWPERF_IS_IONODE(obj)) { + for_each_online_node(i) { + seq_printf(s, i ? ":%d" : ", dist %d", + node_distance(ordinal, i)); + } + } + + seq_putc(s, '\n'); + + /* + * CPUs on this node, if any + */ + if (!SN_HWPERF_IS_IONODE(obj)) { + for_each_cpu_and(i, cpu_online_mask, + cpumask_of_node(ordinal)) { + slice = 'a' + cpuid_to_slice(i); + c = cpu_data(i); + seq_printf(s, "cpu %d %s%c local" + " freq %luMHz, arch ia64", + i, obj->location, slice, + c->proc_freq / 1000000); + for_each_online_cpu(j) { + seq_printf(s, j ? ":%d" : ", dist %d", + node_distance( + cpu_to_node(i), + cpu_to_node(j))); + } + seq_putc(s, '\n'); + } + } + } + + if (obj->ports) { + /* + * numalink ports + */ + sz = obj->ports * sizeof(struct sn_hwperf_port_info); + if ((ptdata = kmalloc(sz, GFP_KERNEL)) == NULL) + return -ENOMEM; + e = ia64_sn_hwperf_op(sn_hwperf_master_nasid, + SN_HWPERF_ENUM_PORTS, obj->id, sz, + (u64) ptdata, 0, 0, NULL); + if (e != SN_HWPERF_OP_OK) + return -EINVAL; + for (ordinal=0, p=objs; p != obj; p++) { + if (!SN_HWPERF_FOREIGN(p)) + ordinal += p->ports; + } + for (pt = 0; pt < obj->ports; pt++) { + for (p = objs, i = 0; i < sn_hwperf_obj_cnt; i++, p++) { + if (ptdata[pt].conn_id == p->id) { + break; + } + } + seq_printf(s, "numalink %d %s-%d", + ordinal+pt, obj->location, ptdata[pt].port); + + if (i >= sn_hwperf_obj_cnt) { + /* no connection */ + seq_puts(s, " local endpoint disconnected" + ", protocol unknown\n"); + continue; + } + + if (obj->sn_hwp_this_part && p->sn_hwp_this_part) + /* both ends local to this partition */ + seq_puts(s, " local"); + else if (SN_HWPERF_FOREIGN(p)) + /* both ends of the link in foreign partiton */ + seq_puts(s, " foreign"); + else + /* link straddles a partition */ + seq_puts(s, " shared"); + + /* + * Unlikely, but strictly should query the LLP config + * registers because an NL4R can be configured to run + * NL3 protocol, even when not talking to an NL3 router. + * Ditto for node-node. + */ + seq_printf(s, " endpoint %s-%d, protocol %s\n", + p->location, ptdata[pt].conn_port, + (SN_HWPERF_IS_NL3ROUTER(obj) || + SN_HWPERF_IS_NL3ROUTER(p)) ? "LLP3" : "LLP4"); + } + kfree(ptdata); + } + + return 0; +} + +static void *sn_topology_start(struct seq_file *s, loff_t * pos) +{ + struct sn_hwperf_object_info *objs = s->private; + + if (*pos < sn_hwperf_obj_cnt) + return (void *)(objs + *pos); + + return NULL; +} + +static void *sn_topology_next(struct seq_file *s, void *v, loff_t * pos) +{ + ++*pos; + return sn_topology_start(s, pos); +} + +static void sn_topology_stop(struct seq_file *m, void *v) +{ + return; +} + +/* + * /proc/sgi_sn/sn_topology, read-only using seq_file + */ +static const struct seq_operations sn_topology_seq_ops = { + .start = sn_topology_start, + .next = sn_topology_next, + .stop = sn_topology_stop, + .show = sn_topology_show +}; + +struct sn_hwperf_op_info { + u64 op; + struct sn_hwperf_ioctl_args *a; + void *p; + int *v0; + int ret; +}; + +static void sn_hwperf_call_sal(void *info) +{ + struct sn_hwperf_op_info *op_info = info; + int r; + + r = ia64_sn_hwperf_op(sn_hwperf_master_nasid, op_info->op, + op_info->a->arg, op_info->a->sz, + (u64) op_info->p, 0, 0, op_info->v0); + op_info->ret = r; +} + +static int sn_hwperf_op_cpu(struct sn_hwperf_op_info *op_info) +{ + u32 cpu; + u32 use_ipi; + int r = 0; + cpumask_t save_allowed; + + cpu = (op_info->a->arg & SN_HWPERF_ARG_CPU_MASK) >> 32; + use_ipi = op_info->a->arg & SN_HWPERF_ARG_USE_IPI_MASK; + op_info->a->arg &= SN_HWPERF_ARG_OBJID_MASK; + + if (cpu != SN_HWPERF_ARG_ANY_CPU) { + if (cpu >= nr_cpu_ids || !cpu_online(cpu)) { + r = -EINVAL; + goto out; + } + } + + if (cpu == SN_HWPERF_ARG_ANY_CPU) { + /* don't care which cpu */ + sn_hwperf_call_sal(op_info); + } else if (cpu == get_cpu()) { + /* already on correct cpu */ + sn_hwperf_call_sal(op_info); + put_cpu(); + } else { + put_cpu(); + if (use_ipi) { + /* use an interprocessor interrupt to call SAL */ + smp_call_function_single(cpu, sn_hwperf_call_sal, + op_info, 1); + } + else { + /* migrate the task before calling SAL */ + save_allowed = current->cpus_allowed; + set_cpus_allowed_ptr(current, cpumask_of(cpu)); + sn_hwperf_call_sal(op_info); + set_cpus_allowed_ptr(current, &save_allowed); + } + } + r = op_info->ret; + +out: + return r; +} + +/* map SAL hwperf error code to system error code */ +static int sn_hwperf_map_err(int hwperf_err) +{ + int e; + + switch(hwperf_err) { + case SN_HWPERF_OP_OK: + e = 0; + break; + + case SN_HWPERF_OP_NOMEM: + e = -ENOMEM; + break; + + case SN_HWPERF_OP_NO_PERM: + e = -EPERM; + break; + + case SN_HWPERF_OP_IO_ERROR: + e = -EIO; + break; + + case SN_HWPERF_OP_BUSY: + e = -EBUSY; + break; + + case SN_HWPERF_OP_RECONFIGURE: + e = -EAGAIN; + break; + + case SN_HWPERF_OP_INVAL: + default: + e = -EINVAL; + break; + } + + return e; +} + +/* + * ioctl for "sn_hwperf" misc device + */ +static long sn_hwperf_ioctl(struct file *fp, u32 op, unsigned long arg) +{ + struct sn_hwperf_ioctl_args a; + struct cpuinfo_ia64 *cdata; + struct sn_hwperf_object_info *objs; + struct sn_hwperf_object_info *cpuobj; + struct sn_hwperf_op_info op_info; + void *p = NULL; + int nobj; + char slice; + int node; + int r; + int v0; + int i; + int j; + + /* only user requests are allowed here */ + if ((op & SN_HWPERF_OP_MASK) < 10) { + r = -EINVAL; + goto error; + } + r = copy_from_user(&a, (const void __user *)arg, + sizeof(struct sn_hwperf_ioctl_args)); + if (r != 0) { + r = -EFAULT; + goto error; + } + + /* + * Allocate memory to hold a kernel copy of the user buffer. The + * buffer contents are either copied in or out (or both) of user + * space depending on the flags encoded in the requested operation. + */ + if (a.ptr) { + p = vmalloc(a.sz); + if (!p) { + r = -ENOMEM; + goto error; + } + } + + if (op & SN_HWPERF_OP_MEM_COPYIN) { + r = copy_from_user(p, (const void __user *)a.ptr, a.sz); + if (r != 0) { + r = -EFAULT; + goto error; + } + } + + switch (op) { + case SN_HWPERF_GET_CPU_INFO: + if (a.sz == sizeof(u64)) { + /* special case to get size needed */ + *(u64 *) p = (u64) num_online_cpus() * + sizeof(struct sn_hwperf_object_info); + } else + if (a.sz < num_online_cpus() * sizeof(struct sn_hwperf_object_info)) { + r = -ENOMEM; + goto error; + } else + if ((r = sn_hwperf_enum_objects(&nobj, &objs)) == 0) { + int cpuobj_index = 0; + + memset(p, 0, a.sz); + for (i = 0; i < nobj; i++) { + if (!SN_HWPERF_IS_NODE(objs + i)) + continue; + node = sn_hwperf_obj_to_cnode(objs + i); + for_each_online_cpu(j) { + if (node != cpu_to_node(j)) + continue; + cpuobj = (struct sn_hwperf_object_info *) p + cpuobj_index++; + slice = 'a' + cpuid_to_slice(j); + cdata = cpu_data(j); + cpuobj->id = j; + snprintf(cpuobj->name, + sizeof(cpuobj->name), + "CPU %luMHz %s", + cdata->proc_freq / 1000000, + cdata->vendor); + snprintf(cpuobj->location, + sizeof(cpuobj->location), + "%s%c", objs[i].location, + slice); + } + } + + vfree(objs); + } + break; + + case SN_HWPERF_GET_NODE_NASID: + if (a.sz != sizeof(u64) || + (node = a.arg) < 0 || !cnode_possible(node)) { + r = -EINVAL; + goto error; + } + *(u64 *)p = (u64)cnodeid_to_nasid(node); + break; + + case SN_HWPERF_GET_OBJ_NODE: + i = a.arg; + if (a.sz != sizeof(u64) || i < 0) { + r = -EINVAL; + goto error; + } + if ((r = sn_hwperf_enum_objects(&nobj, &objs)) == 0) { + if (i >= nobj) { + r = -EINVAL; + vfree(objs); + goto error; + } + if (objs[i].id != a.arg) { + for (i = 0; i < nobj; i++) { + if (objs[i].id == a.arg) + break; + } + } + if (i == nobj) { + r = -EINVAL; + vfree(objs); + goto error; + } + + if (!SN_HWPERF_IS_NODE(objs + i) && + !SN_HWPERF_IS_IONODE(objs + i)) { + r = -ENOENT; + vfree(objs); + goto error; + } + + *(u64 *)p = (u64)sn_hwperf_obj_to_cnode(objs + i); + vfree(objs); + } + break; + + case SN_HWPERF_GET_MMRS: + case SN_HWPERF_SET_MMRS: + case SN_HWPERF_OBJECT_DISTANCE: + op_info.p = p; + op_info.a = &a; + op_info.v0 = &v0; + op_info.op = op; + r = sn_hwperf_op_cpu(&op_info); + if (r) { + r = sn_hwperf_map_err(r); + a.v0 = v0; + goto error; + } + break; + + default: + /* all other ops are a direct SAL call */ + r = ia64_sn_hwperf_op(sn_hwperf_master_nasid, op, + a.arg, a.sz, (u64) p, 0, 0, &v0); + if (r) { + r = sn_hwperf_map_err(r); + goto error; + } + a.v0 = v0; + break; + } + + if (op & SN_HWPERF_OP_MEM_COPYOUT) { + r = copy_to_user((void __user *)a.ptr, p, a.sz); + if (r != 0) { + r = -EFAULT; + goto error; + } + } + +error: + vfree(p); + + return r; +} + +static const struct file_operations sn_hwperf_fops = { + .unlocked_ioctl = sn_hwperf_ioctl, + .llseek = noop_llseek, +}; + +static struct miscdevice sn_hwperf_dev = { + MISC_DYNAMIC_MINOR, + "sn_hwperf", + &sn_hwperf_fops +}; + +static int sn_hwperf_init(void) +{ + u64 v; + int salr; + int e = 0; + + /* single threaded, once-only initialization */ + mutex_lock(&sn_hwperf_init_mutex); + + if (sn_hwperf_salheap) { + mutex_unlock(&sn_hwperf_init_mutex); + return e; + } + + /* + * The PROM code needs a fixed reference node. For convenience the + * same node as the console I/O is used. + */ + sn_hwperf_master_nasid = (nasid_t) ia64_sn_get_console_nasid(); + + /* + * Request the needed size and install the PROM scratch area. + * The PROM keeps various tracking bits in this memory area. + */ + salr = ia64_sn_hwperf_op(sn_hwperf_master_nasid, + (u64) SN_HWPERF_GET_HEAPSIZE, 0, + (u64) sizeof(u64), (u64) &v, 0, 0, NULL); + if (salr != SN_HWPERF_OP_OK) { + e = -EINVAL; + goto out; + } + + if ((sn_hwperf_salheap = vmalloc(v)) == NULL) { + e = -ENOMEM; + goto out; + } + salr = ia64_sn_hwperf_op(sn_hwperf_master_nasid, + SN_HWPERF_INSTALL_HEAP, 0, v, + (u64) sn_hwperf_salheap, 0, 0, NULL); + if (salr != SN_HWPERF_OP_OK) { + e = -EINVAL; + goto out; + } + + salr = ia64_sn_hwperf_op(sn_hwperf_master_nasid, + SN_HWPERF_OBJECT_COUNT, 0, + sizeof(u64), (u64) &v, 0, 0, NULL); + if (salr != SN_HWPERF_OP_OK) { + e = -EINVAL; + goto out; + } + sn_hwperf_obj_cnt = (int)v; + +out: + if (e < 0 && sn_hwperf_salheap) { + vfree(sn_hwperf_salheap); + sn_hwperf_salheap = NULL; + sn_hwperf_obj_cnt = 0; + } + mutex_unlock(&sn_hwperf_init_mutex); + return e; +} + +int sn_topology_open(struct inode *inode, struct file *file) +{ + int e; + struct seq_file *seq; + struct sn_hwperf_object_info *objbuf; + int nobj; + + if ((e = sn_hwperf_enum_objects(&nobj, &objbuf)) == 0) { + e = seq_open(file, &sn_topology_seq_ops); + seq = file->private_data; + seq->private = objbuf; + } + + return e; +} + +int sn_topology_release(struct inode *inode, struct file *file) +{ + struct seq_file *seq = file->private_data; + + vfree(seq->private); + return seq_release(inode, file); +} + +int sn_hwperf_get_nearest_node(cnodeid_t node, + cnodeid_t *near_mem_node, cnodeid_t *near_cpu_node) +{ + int e; + int nobj; + struct sn_hwperf_object_info *objbuf; + + if ((e = sn_hwperf_enum_objects(&nobj, &objbuf)) == 0) { + e = sn_hwperf_get_nearest_node_objdata(objbuf, nobj, + node, near_mem_node, near_cpu_node); + vfree(objbuf); + } + + return e; +} + +static int __devinit sn_hwperf_misc_register_init(void) +{ + int e; + + if (!ia64_platform_is("sn2")) + return 0; + + sn_hwperf_init(); + + /* + * Register a dynamic misc device for hwperf ioctls. Platforms + * supporting hotplug will create /dev/sn_hwperf, else user + * can to look up the minor number in /proc/misc. + */ + if ((e = misc_register(&sn_hwperf_dev)) != 0) { + printk(KERN_ERR "sn_hwperf_misc_register_init: failed to " + "register misc device for \"%s\"\n", sn_hwperf_dev.name); + } + + return e; +} + +device_initcall(sn_hwperf_misc_register_init); /* after misc_init() */ +EXPORT_SYMBOL(sn_hwperf_get_nearest_node); diff --git a/arch/ia64/sn/kernel/sn2/sn_proc_fs.c b/arch/ia64/sn/kernel/sn2/sn_proc_fs.c new file mode 100644 index 00000000..7aab87f4 --- /dev/null +++ b/arch/ia64/sn/kernel/sn2/sn_proc_fs.c @@ -0,0 +1,117 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2000-2005 Silicon Graphics, Inc. All rights reserved. + */ + +#ifdef CONFIG_PROC_FS +#include <linux/proc_fs.h> +#include <linux/seq_file.h> +#include <asm/uaccess.h> +#include <asm/sn/sn_sal.h> + +static int partition_id_show(struct seq_file *s, void *p) +{ + seq_printf(s, "%d\n", sn_partition_id); + return 0; +} + +static int partition_id_open(struct inode *inode, struct file *file) +{ + return single_open(file, partition_id_show, NULL); +} + +static int system_serial_number_show(struct seq_file *s, void *p) +{ + seq_printf(s, "%s\n", sn_system_serial_number()); + return 0; +} + +static int system_serial_number_open(struct inode *inode, struct file *file) +{ + return single_open(file, system_serial_number_show, NULL); +} + +static int licenseID_show(struct seq_file *s, void *p) +{ + seq_printf(s, "0x%llx\n", sn_partition_serial_number_val()); + return 0; +} + +static int licenseID_open(struct inode *inode, struct file *file) +{ + return single_open(file, licenseID_show, NULL); +} + +static int coherence_id_show(struct seq_file *s, void *p) +{ + seq_printf(s, "%d\n", partition_coherence_id()); + + return 0; +} + +static int coherence_id_open(struct inode *inode, struct file *file) +{ + return single_open(file, coherence_id_show, NULL); +} + +/* /proc/sgi_sn/sn_topology uses seq_file, see sn_hwperf.c */ +extern int sn_topology_open(struct inode *, struct file *); +extern int sn_topology_release(struct inode *, struct file *); + +static const struct file_operations proc_partition_id_fops = { + .open = partition_id_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static const struct file_operations proc_system_sn_fops = { + .open = system_serial_number_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static const struct file_operations proc_license_id_fops = { + .open = licenseID_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static const struct file_operations proc_coherence_id_fops = { + .open = coherence_id_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static const struct file_operations proc_sn_topo_fops = { + .open = sn_topology_open, + .read = seq_read, + .llseek = seq_lseek, + .release = sn_topology_release, +}; + +void register_sn_procfs(void) +{ + static struct proc_dir_entry *sgi_proc_dir = NULL; + + BUG_ON(sgi_proc_dir != NULL); + if (!(sgi_proc_dir = proc_mkdir("sgi_sn", NULL))) + return; + + proc_create("partition_id", 0444, sgi_proc_dir, + &proc_partition_id_fops); + proc_create("system_serial_number", 0444, sgi_proc_dir, + &proc_system_sn_fops); + proc_create("licenseID", 0444, sgi_proc_dir, &proc_license_id_fops); + proc_create("coherence_id", 0444, sgi_proc_dir, + &proc_coherence_id_fops); + proc_create("sn_topology", 0444, sgi_proc_dir, &proc_sn_topo_fops); +} + +#endif /* CONFIG_PROC_FS */ diff --git a/arch/ia64/sn/kernel/sn2/timer.c b/arch/ia64/sn/kernel/sn2/timer.c new file mode 100644 index 00000000..abab8f99 --- /dev/null +++ b/arch/ia64/sn/kernel/sn2/timer.c @@ -0,0 +1,60 @@ +/* + * linux/arch/ia64/sn/kernel/sn2/timer.c + * + * Copyright (C) 2003 Silicon Graphics, Inc. + * Copyright (C) 2003 Hewlett-Packard Co + * David Mosberger <davidm@hpl.hp.com>: updated for new timer-interpolation infrastructure + */ + +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/time.h> +#include <linux/interrupt.h> +#include <linux/clocksource.h> + +#include <asm/hw_irq.h> +#include <asm/timex.h> + +#include <asm/sn/leds.h> +#include <asm/sn/shub_mmr.h> +#include <asm/sn/clksupport.h> + +extern unsigned long sn_rtc_cycles_per_second; + +static cycle_t read_sn2(struct clocksource *cs) +{ + return (cycle_t)readq(RTC_COUNTER_ADDR); +} + +static struct clocksource clocksource_sn2 = { + .name = "sn2_rtc", + .rating = 450, + .read = read_sn2, + .mask = (1LL << 55) - 1, + .flags = CLOCK_SOURCE_IS_CONTINUOUS, +}; + +/* + * sn udelay uses the RTC instead of the ITC because the ITC is not + * synchronized across all CPUs, and the thread may migrate to another CPU + * if preemption is enabled. + */ +static void +ia64_sn_udelay (unsigned long usecs) +{ + unsigned long start = rtc_time(); + unsigned long end = start + + usecs * sn_rtc_cycles_per_second / 1000000; + + while (time_before((unsigned long)rtc_time(), end)) + cpu_relax(); +} + +void __init sn_timer_init(void) +{ + clocksource_sn2.archdata.fsys_mmio = RTC_COUNTER_ADDR; + clocksource_register_hz(&clocksource_sn2, sn_rtc_cycles_per_second); + + ia64_udelay = &ia64_sn_udelay; +} diff --git a/arch/ia64/sn/kernel/sn2/timer_interrupt.c b/arch/ia64/sn/kernel/sn2/timer_interrupt.c new file mode 100644 index 00000000..103d6ea8 --- /dev/null +++ b/arch/ia64/sn/kernel/sn2/timer_interrupt.c @@ -0,0 +1,60 @@ +/* + * + * + * Copyright (c) 2005, 2006 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/NoticeExplan + */ + +#include <linux/interrupt.h> +#include <asm/sn/pda.h> +#include <asm/sn/leds.h> + +extern void sn_lb_int_war_check(void); +extern irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs); + +#define SN_LB_INT_WAR_INTERVAL 100 + +void sn_timer_interrupt(int irq, void *dev_id) +{ + /* LED blinking */ + if (!pda->hb_count--) { + pda->hb_count = HZ / 2; + set_led_bits(pda->hb_state ^= + LED_CPU_HEARTBEAT, LED_CPU_HEARTBEAT); + } + + if (is_shub1()) { + if (enable_shub_wars_1_1()) { + /* Bugfix code for SHUB 1.1 */ + if (pda->pio_shub_war_cam_addr) + *pda->pio_shub_war_cam_addr = 0x8000000000000010UL; + } + if (pda->sn_lb_int_war_ticks == 0) + sn_lb_int_war_check(); + pda->sn_lb_int_war_ticks++; + if (pda->sn_lb_int_war_ticks >= SN_LB_INT_WAR_INTERVAL) + pda->sn_lb_int_war_ticks = 0; + } +} diff --git a/arch/ia64/sn/kernel/tiocx.c b/arch/ia64/sn/kernel/tiocx.c new file mode 100644 index 00000000..14c17112 --- /dev/null +++ b/arch/ia64/sn/kernel/tiocx.c @@ -0,0 +1,566 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2005 Silicon Graphics, Inc. All rights reserved. + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/spinlock.h> +#include <linux/proc_fs.h> +#include <linux/capability.h> +#include <linux/device.h> +#include <linux/delay.h> +#include <asm/uaccess.h> +#include <asm/sn/sn_sal.h> +#include <asm/sn/addrs.h> +#include <asm/sn/io.h> +#include <asm/sn/types.h> +#include <asm/sn/shubio.h> +#include <asm/sn/tiocx.h> +#include <asm/sn/l1.h> +#include <asm/sn/module.h> +#include "tio.h" +#include "xtalk/xwidgetdev.h" +#include "xtalk/hubdev.h" + +#define CX_DEV_NONE 0 +#define DEVICE_NAME "tiocx" +#define WIDGET_ID 0 +#define TIOCX_DEBUG 0 + +#if TIOCX_DEBUG +#define DBG(fmt...) printk(KERN_ALERT fmt) +#else +#define DBG(fmt...) +#endif + +struct device_attribute dev_attr_cxdev_control; + +/** + * tiocx_match - Try to match driver id list with device. + * @dev: device pointer + * @drv: driver pointer + * + * Returns 1 if match, 0 otherwise. + */ +static int tiocx_match(struct device *dev, struct device_driver *drv) +{ + struct cx_dev *cx_dev = to_cx_dev(dev); + struct cx_drv *cx_drv = to_cx_driver(drv); + const struct cx_device_id *ids = cx_drv->id_table; + + if (!ids) + return 0; + + while (ids->part_num) { + if (ids->part_num == cx_dev->cx_id.part_num) + return 1; + ids++; + } + return 0; + +} + +static int tiocx_uevent(struct device *dev, struct kobj_uevent_env *env) +{ + return -ENODEV; +} + +static void tiocx_bus_release(struct device *dev) +{ + kfree(to_cx_dev(dev)); +} + +/** + * cx_device_match - Find cx_device in the id table. + * @ids: id table from driver + * @cx_device: part/mfg id for the device + * + */ +static const struct cx_device_id *cx_device_match(const struct cx_device_id + *ids, + struct cx_dev *cx_device) +{ + /* + * NOTES: We may want to check for CX_ANY_ID too. + * Do we want to match against nasid too? + * CX_DEV_NONE == 0, if the driver tries to register for + * part/mfg == 0 we should return no-match (NULL) here. + */ + while (ids->part_num && ids->mfg_num) { + if (ids->part_num == cx_device->cx_id.part_num && + ids->mfg_num == cx_device->cx_id.mfg_num) + return ids; + ids++; + } + + return NULL; +} + +/** + * cx_device_probe - Look for matching device. + * Call driver probe routine if found. + * @cx_driver: driver table (cx_drv struct) from driver + * @cx_device: part/mfg id for the device + */ +static int cx_device_probe(struct device *dev) +{ + const struct cx_device_id *id; + struct cx_drv *cx_drv = to_cx_driver(dev->driver); + struct cx_dev *cx_dev = to_cx_dev(dev); + int error = 0; + + if (!cx_dev->driver && cx_drv->probe) { + id = cx_device_match(cx_drv->id_table, cx_dev); + if (id) { + if ((error = cx_drv->probe(cx_dev, id)) < 0) + return error; + else + cx_dev->driver = cx_drv; + } + } + + return error; +} + +/** + * cx_driver_remove - Remove driver from device struct. + * @dev: device + */ +static int cx_driver_remove(struct device *dev) +{ + struct cx_dev *cx_dev = to_cx_dev(dev); + struct cx_drv *cx_drv = cx_dev->driver; + if (cx_drv->remove) + cx_drv->remove(cx_dev); + cx_dev->driver = NULL; + return 0; +} + +struct bus_type tiocx_bus_type = { + .name = "tiocx", + .match = tiocx_match, + .uevent = tiocx_uevent, + .probe = cx_device_probe, + .remove = cx_driver_remove, +}; + +/** + * cx_driver_register - Register the driver. + * @cx_driver: driver table (cx_drv struct) from driver + * + * Called from the driver init routine to register a driver. + * The cx_drv struct contains the driver name, a pointer to + * a table of part/mfg numbers and a pointer to the driver's + * probe/attach routine. + */ +int cx_driver_register(struct cx_drv *cx_driver) +{ + cx_driver->driver.name = cx_driver->name; + cx_driver->driver.bus = &tiocx_bus_type; + + return driver_register(&cx_driver->driver); +} + +/** + * cx_driver_unregister - Unregister the driver. + * @cx_driver: driver table (cx_drv struct) from driver + */ +int cx_driver_unregister(struct cx_drv *cx_driver) +{ + driver_unregister(&cx_driver->driver); + return 0; +} + +/** + * cx_device_register - Register a device. + * @nasid: device's nasid + * @part_num: device's part number + * @mfg_num: device's manufacturer number + * @hubdev: hub info associated with this device + * @bt: board type of the device + * + */ +int +cx_device_register(nasid_t nasid, int part_num, int mfg_num, + struct hubdev_info *hubdev, int bt) +{ + struct cx_dev *cx_dev; + int r; + + cx_dev = kzalloc(sizeof(struct cx_dev), GFP_KERNEL); + DBG("cx_dev= 0x%p\n", cx_dev); + if (cx_dev == NULL) + return -ENOMEM; + + cx_dev->cx_id.part_num = part_num; + cx_dev->cx_id.mfg_num = mfg_num; + cx_dev->cx_id.nasid = nasid; + cx_dev->hubdev = hubdev; + cx_dev->bt = bt; + + cx_dev->dev.parent = NULL; + cx_dev->dev.bus = &tiocx_bus_type; + cx_dev->dev.release = tiocx_bus_release; + dev_set_name(&cx_dev->dev, "%d", cx_dev->cx_id.nasid); + r = device_register(&cx_dev->dev); + if (r) { + kfree(cx_dev); + return r; + } + get_device(&cx_dev->dev); + + device_create_file(&cx_dev->dev, &dev_attr_cxdev_control); + + return 0; +} + +/** + * cx_device_unregister - Unregister a device. + * @cx_dev: part/mfg id for the device + */ +int cx_device_unregister(struct cx_dev *cx_dev) +{ + put_device(&cx_dev->dev); + device_unregister(&cx_dev->dev); + return 0; +} + +/** + * cx_device_reload - Reload the device. + * @nasid: device's nasid + * @part_num: device's part number + * @mfg_num: device's manufacturer number + * + * Remove the device associated with 'nasid' from device list and then + * call device-register with the given part/mfg numbers. + */ +static int cx_device_reload(struct cx_dev *cx_dev) +{ + cx_device_unregister(cx_dev); + return cx_device_register(cx_dev->cx_id.nasid, cx_dev->cx_id.part_num, + cx_dev->cx_id.mfg_num, cx_dev->hubdev, + cx_dev->bt); +} + +static inline u64 tiocx_intr_alloc(nasid_t nasid, int widget, + u64 sn_irq_info, + int req_irq, nasid_t req_nasid, + int req_slice) +{ + struct ia64_sal_retval rv; + rv.status = 0; + rv.v0 = 0; + + ia64_sal_oemcall_nolock(&rv, SN_SAL_IOIF_INTERRUPT, + SAL_INTR_ALLOC, nasid, + widget, sn_irq_info, req_irq, + req_nasid, req_slice); + return rv.status; +} + +static inline void tiocx_intr_free(nasid_t nasid, int widget, + struct sn_irq_info *sn_irq_info) +{ + struct ia64_sal_retval rv; + rv.status = 0; + rv.v0 = 0; + + ia64_sal_oemcall_nolock(&rv, SN_SAL_IOIF_INTERRUPT, + SAL_INTR_FREE, nasid, + widget, sn_irq_info->irq_irq, + sn_irq_info->irq_cookie, 0, 0); +} + +struct sn_irq_info *tiocx_irq_alloc(nasid_t nasid, int widget, int irq, + nasid_t req_nasid, int slice) +{ + struct sn_irq_info *sn_irq_info; + int status; + int sn_irq_size = sizeof(struct sn_irq_info); + + if ((nasid & 1) == 0) + return NULL; + + sn_irq_info = kzalloc(sn_irq_size, GFP_KERNEL); + if (sn_irq_info == NULL) + return NULL; + + status = tiocx_intr_alloc(nasid, widget, __pa(sn_irq_info), irq, + req_nasid, slice); + if (status) { + kfree(sn_irq_info); + return NULL; + } else { + return sn_irq_info; + } +} + +void tiocx_irq_free(struct sn_irq_info *sn_irq_info) +{ + u64 bridge = (u64) sn_irq_info->irq_bridge; + nasid_t nasid = NASID_GET(bridge); + int widget; + + if (nasid & 1) { + widget = TIO_SWIN_WIDGETNUM(bridge); + tiocx_intr_free(nasid, widget, sn_irq_info); + kfree(sn_irq_info); + } +} + +u64 tiocx_dma_addr(u64 addr) +{ + return PHYS_TO_TIODMA(addr); +} + +u64 tiocx_swin_base(int nasid) +{ + return TIO_SWIN_BASE(nasid, TIOCX_CORELET); +} + +EXPORT_SYMBOL(cx_driver_register); +EXPORT_SYMBOL(cx_driver_unregister); +EXPORT_SYMBOL(cx_device_register); +EXPORT_SYMBOL(cx_device_unregister); +EXPORT_SYMBOL(tiocx_irq_alloc); +EXPORT_SYMBOL(tiocx_irq_free); +EXPORT_SYMBOL(tiocx_bus_type); +EXPORT_SYMBOL(tiocx_dma_addr); +EXPORT_SYMBOL(tiocx_swin_base); + +static void tio_conveyor_set(nasid_t nasid, int enable_flag) +{ + u64 ice_frz; + u64 disable_cb = (1ull << 61); + + if (!(nasid & 1)) + return; + + ice_frz = REMOTE_HUB_L(nasid, TIO_ICE_FRZ_CFG); + if (enable_flag) { + if (!(ice_frz & disable_cb)) /* already enabled */ + return; + ice_frz &= ~disable_cb; + } else { + if (ice_frz & disable_cb) /* already disabled */ + return; + ice_frz |= disable_cb; + } + DBG(KERN_ALERT "TIO_ICE_FRZ_CFG= 0x%lx\n", ice_frz); + REMOTE_HUB_S(nasid, TIO_ICE_FRZ_CFG, ice_frz); +} + +#define tio_conveyor_enable(nasid) tio_conveyor_set(nasid, 1) +#define tio_conveyor_disable(nasid) tio_conveyor_set(nasid, 0) + +static void tio_corelet_reset(nasid_t nasid, int corelet) +{ + if (!(nasid & 1)) + return; + + REMOTE_HUB_S(nasid, TIO_ICE_PMI_TX_CFG, 1 << corelet); + udelay(2000); + REMOTE_HUB_S(nasid, TIO_ICE_PMI_TX_CFG, 0); + udelay(2000); +} + +static int is_fpga_tio(int nasid, int *bt) +{ + u16 uninitialized_var(ioboard_type); /* GCC be quiet */ + long rc; + + rc = ia64_sn_sysctl_ioboard_get(nasid, &ioboard_type); + if (rc) { + printk(KERN_WARNING "ia64_sn_sysctl_ioboard_get failed: %ld\n", + rc); + return 0; + } + + switch (ioboard_type) { + case L1_BRICKTYPE_SA: + case L1_BRICKTYPE_ATHENA: + case L1_BOARDTYPE_DAYTONA: + *bt = ioboard_type; + return 1; + } + + return 0; +} + +static int bitstream_loaded(nasid_t nasid) +{ + u64 cx_credits; + + cx_credits = REMOTE_HUB_L(nasid, TIO_ICE_PMI_TX_DYN_CREDIT_STAT_CB3); + cx_credits &= TIO_ICE_PMI_TX_DYN_CREDIT_STAT_CB3_CREDIT_CNT_MASK; + DBG("cx_credits= 0x%lx\n", cx_credits); + + return (cx_credits == 0xf) ? 1 : 0; +} + +static int tiocx_reload(struct cx_dev *cx_dev) +{ + int part_num = CX_DEV_NONE; + int mfg_num = CX_DEV_NONE; + nasid_t nasid = cx_dev->cx_id.nasid; + + if (bitstream_loaded(nasid)) { + u64 cx_id; + int rv; + + rv = ia64_sn_sysctl_tio_clock_reset(nasid); + if (rv) { + printk(KERN_ALERT "CX port JTAG reset failed.\n"); + } else { + cx_id = *(volatile u64 *) + (TIO_SWIN_BASE(nasid, TIOCX_CORELET) + + WIDGET_ID); + part_num = XWIDGET_PART_NUM(cx_id); + mfg_num = XWIDGET_MFG_NUM(cx_id); + DBG("part= 0x%x, mfg= 0x%x\n", part_num, mfg_num); + /* just ignore it if it's a CE */ + if (part_num == TIO_CE_ASIC_PARTNUM) + return 0; + } + } + + cx_dev->cx_id.part_num = part_num; + cx_dev->cx_id.mfg_num = mfg_num; + + /* + * Delete old device and register the new one. It's ok if + * part_num/mfg_num == CX_DEV_NONE. We want to register + * devices in the table even if a bitstream isn't loaded. + * That allows use to see that a bitstream isn't loaded via + * TIOCX_IOCTL_DEV_LIST. + */ + return cx_device_reload(cx_dev); +} + +static ssize_t show_cxdev_control(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct cx_dev *cx_dev = to_cx_dev(dev); + + return sprintf(buf, "0x%x 0x%x 0x%x 0x%x\n", + cx_dev->cx_id.nasid, + cx_dev->cx_id.part_num, cx_dev->cx_id.mfg_num, + cx_dev->bt); +} + +static ssize_t store_cxdev_control(struct device *dev, struct device_attribute *attr, const char *buf, + size_t count) +{ + int n; + struct cx_dev *cx_dev = to_cx_dev(dev); + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (count <= 0) + return 0; + + n = simple_strtoul(buf, NULL, 0); + + switch (n) { + case 1: + tio_corelet_reset(cx_dev->cx_id.nasid, TIOCX_CORELET); + tiocx_reload(cx_dev); + break; + case 2: + tiocx_reload(cx_dev); + break; + case 3: + tio_corelet_reset(cx_dev->cx_id.nasid, TIOCX_CORELET); + break; + default: + break; + } + + return count; +} + +DEVICE_ATTR(cxdev_control, 0644, show_cxdev_control, store_cxdev_control); + +static int __init tiocx_init(void) +{ + cnodeid_t cnodeid; + int found_tiocx_device = 0; + + if (!ia64_platform_is("sn2")) + return 0; + + bus_register(&tiocx_bus_type); + + for (cnodeid = 0; cnodeid < num_cnodes; cnodeid++) { + nasid_t nasid; + int bt; + + nasid = cnodeid_to_nasid(cnodeid); + + if ((nasid & 0x1) && is_fpga_tio(nasid, &bt)) { + struct hubdev_info *hubdev; + struct xwidget_info *widgetp; + + DBG("Found TIO at nasid 0x%x\n", nasid); + + hubdev = + (struct hubdev_info *)(NODEPDA(cnodeid)->pdinfo); + + widgetp = &hubdev->hdi_xwidget_info[TIOCX_CORELET]; + + /* The CE hangs off of the CX port but is not an FPGA */ + if (widgetp->xwi_hwid.part_num == TIO_CE_ASIC_PARTNUM) + continue; + + tio_corelet_reset(nasid, TIOCX_CORELET); + tio_conveyor_enable(nasid); + + if (cx_device_register + (nasid, widgetp->xwi_hwid.part_num, + widgetp->xwi_hwid.mfg_num, hubdev, bt) < 0) + return -ENXIO; + else + found_tiocx_device++; + } + } + + /* It's ok if we find zero devices. */ + DBG("found_tiocx_device= %d\n", found_tiocx_device); + + return 0; +} + +static int cx_remove_device(struct device * dev, void * data) +{ + struct cx_dev *cx_dev = to_cx_dev(dev); + device_remove_file(dev, &dev_attr_cxdev_control); + cx_device_unregister(cx_dev); + return 0; +} + +static void __exit tiocx_exit(void) +{ + DBG("tiocx_exit\n"); + + /* + * Unregister devices. + */ + bus_for_each_dev(&tiocx_bus_type, NULL, NULL, cx_remove_device); + bus_unregister(&tiocx_bus_type); +} + +fs_initcall(tiocx_init); +module_exit(tiocx_exit); + +/************************************************************************ + * Module licensing and description + ************************************************************************/ +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Bruce Losure <blosure@sgi.com>"); +MODULE_DESCRIPTION("TIOCX module"); +MODULE_SUPPORTED_DEVICE(DEVICE_NAME); |