diff options
Diffstat (limited to 'gcell/src/lib/runtime/spu')
-rw-r--r-- | gcell/src/lib/runtime/spu/gc_delay.c | 58 | ||||
-rw-r--r-- | gcell/src/lib/runtime/spu/gc_logging.c | 77 | ||||
-rw-r--r-- | gcell/src/lib/runtime/spu/gc_main.c | 682 | ||||
-rw-r--r-- | gcell/src/lib/runtime/spu/gc_spu_config.h | 33 | ||||
-rw-r--r-- | gcell/src/lib/runtime/spu/gc_spu_jd_queue.c | 103 | ||||
-rw-r--r-- | gcell/src/lib/runtime/spu/gcell_qa.c | 109 | ||||
-rw-r--r-- | gcell/src/lib/runtime/spu/spu_buffers.c | 35 | ||||
-rw-r--r-- | gcell/src/lib/runtime/spu/spu_buffers.h | 32 |
8 files changed, 1129 insertions, 0 deletions
diff --git a/gcell/src/lib/runtime/spu/gc_delay.c b/gcell/src/lib/runtime/spu/gc_delay.c new file mode 100644 index 000000000..21ee587f8 --- /dev/null +++ b/gcell/src/lib/runtime/spu/gc_delay.c @@ -0,0 +1,58 @@ +/* -*- c++ -*- */ +/* + * Copyright 2007 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include "gc_delay.h" +#include <compiler.h> + +inline static void +gc_udelay_1us(void) +{ + unsigned int i = 158; + + do { // 20 clocks per iteration of the loop + asm ("nop $127; nop $127; nop $127; nop $127; nop $127"); + asm ("nop $127; nop $127; nop $127; nop $127; nop $127"); + asm ("nop $127; nop $127; nop $127; nop $127; nop $127"); + asm ("nop $127; nop $127"); + } while(--i != 0); +} + +void +gc_udelay(unsigned int usecs) +{ + unsigned int i; + for (i = 0; i < usecs; i++) + gc_udelay_1us(); +} + +void +gc_cdelay(unsigned int cpu_cycles) +{ + if (cpu_cycles < 40) // roughly the amount of overhead + return; + + cpu_cycles >>= 2; // about 4 cycles / loop + + while (cpu_cycles-- != 0){ + asm ("nop $127"); // keeps compiler from removing the loop + } +} + diff --git a/gcell/src/lib/runtime/spu/gc_logging.c b/gcell/src/lib/runtime/spu/gc_logging.c new file mode 100644 index 000000000..65a537504 --- /dev/null +++ b/gcell/src/lib/runtime/spu/gc_logging.c @@ -0,0 +1,77 @@ +/* -*- c++ -*- */ +/* + * Copyright 2008 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include <gc_logging.h> +#include <spu_intrinsics.h> +#include <spu_mfcio.h> +#include <gc_spu_args.h> + +static gc_eaddr_t log_base_ea; // base address of log entries in EA +static uint32_t log_idx_mask; // nentries - 1 +static uint32_t log_idx; // current log entry index +static uint32_t log_seqno; + +static int log_tags; // two consecutive tags +static int tmp_buffer_busy; // bitmask: buffer busy state +static int tmp_buffer_idx; // 0 or 1 +static gc_log_entry_t tmp_buffer[2]; + +void +_gc_log_init(gc_log_t info) +{ + spu_write_decrementer(~0); + + log_base_ea = info.base; + log_idx_mask = info.nentries - 1; + log_idx = 0; + log_seqno = 0; + + log_tags = mfc_multi_tag_reserve(2); + tmp_buffer_busy = 0; + tmp_buffer_idx = 0; + + gc_log_write0(GCL_SS_SYS, 0); +} + +void +_gc_log_write(gc_log_entry_t entry) +{ + if (log_base_ea == 0) + return; + + entry.seqno = log_seqno++; + entry.timestamp = spu_read_decrementer(); + + if (tmp_buffer_busy & (1 << tmp_buffer_idx)){ + mfc_write_tag_mask(1 << (log_tags + tmp_buffer_idx)); + mfc_read_tag_status_all(); + } + + tmp_buffer[tmp_buffer_idx] = entry; // save local copy + + mfc_put(&tmp_buffer[tmp_buffer_idx], + log_base_ea + log_idx * sizeof(entry), sizeof(entry), + log_tags + tmp_buffer_idx, 0, 0); + + tmp_buffer_busy |= (1 << tmp_buffer_idx); + tmp_buffer_idx ^= 0x1; + log_idx = (log_idx + 1) & log_idx_mask; +} diff --git a/gcell/src/lib/runtime/spu/gc_main.c b/gcell/src/lib/runtime/spu/gc_main.c new file mode 100644 index 000000000..96381fa66 --- /dev/null +++ b/gcell/src/lib/runtime/spu/gc_main.c @@ -0,0 +1,682 @@ +/* -*- c++ -*- */ +/* + * Copyright 2007,2008 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#define ENABLE_GC_LOGGING // define to enable logging + +#include <spu_intrinsics.h> +#include <spu_mfcio.h> +#include <sync_utils.h> +#include "gc_spu_config.h" +#include "gc_spu_args.h" +#include "gc_job_desc.h" +#include "gc_mbox.h" +#include "gc_jd_queue.h" +#include "gc_delay.h" +#include "gc_declare_proc.h" +#include "spu_buffers.h" +#include <string.h> +#include <assert.h> +#include <stdio.h> + + +#define MIN(a,b) ((a) < (b) ? (a) : (b)) +#define MAX(a,b) ((a) > (b) ? (a) : (b)) + +//! round x down to p2 boundary (p2 must be a power-of-2) +#define ROUND_DN(x, p2) ((x) & ~((p2)-1)) + +//! round x up to p2 boundary (p2 must be a power-of-2) +#define ROUND_UP(x, p2) (((x)+((p2)-1)) & ~((p2)-1)) + + +#define USE_LLR_LOST_EVENT 0 // define to 0 or 1 + +int gc_sys_tag; // tag for misc DMA operations +static gc_spu_args_t spu_args; + +static struct gc_proc_def *gc_proc_def; // procedure entry points + +// ------------------------------------------------------------------------ + +// state for DMA'ing arguments in and out + +static int get_tag; // 1 tag for job arg gets +static int put_tags; // 2 tags for job arg puts + +static int pb_idx = 0; // current put buffer index (0 or 1) + +// bitmask (bit per put buffer): bit is set if DMA is started but not complete +static int put_in_progress = 0; +#define PBI_MASK(_pbi_) (1 << (_pbi_)) + +// ------------------------------------------------------------------------ + +// our working copy of the completion info +static gc_comp_info_t comp_info = { + .in_use = 1, + .ncomplete = 0 +}; + +static int ci_idx = 0; // index of current comp_info +static int ci_tags; // two consecutive dma tags + +// ------------------------------------------------------------------------ + +/* + * Wait until EA copy of comp_info[idx].in_use is 0 + */ +static void +wait_for_ppe_to_be_done_with_comp_info(int idx) +{ + char _tmp[256]; + char *buf = (char *) ALIGN(_tmp, 128); // get cache-aligned buffer + gc_comp_info_t *p = (gc_comp_info_t *) buf; + + assert(sizeof(gc_comp_info_t) == 128); + + do { + mfc_get(buf, spu_args.comp_info[idx], 128, gc_sys_tag, 0, 0); + mfc_write_tag_mask(1 << gc_sys_tag); + mfc_read_tag_status_all(); + if (p->in_use == 0) + return; + + gc_udelay(5); + + } while (1); +} + +static void +flush_completion_info(void) +{ + // events: 0x3X + + static int total_complete = 0; + + if (comp_info.ncomplete == 0) + return; + + // ensure that PPE is done with the buffer we're about to overwrite + wait_for_ppe_to_be_done_with_comp_info(ci_idx); + + // dma the comp_info out to PPE + int tag = ci_tags + ci_idx; + mfc_put(&comp_info, spu_args.comp_info[ci_idx], sizeof(gc_comp_info_t), tag, 0, 0); + + // we need to wait for the completion info to finish, as well as + // any EA argument puts. + + int tag_mask = 1 << tag; // the comp_info tag + if (put_in_progress & PBI_MASK(0)) + tag_mask |= (1 << (put_tags + 0)); + if (put_in_progress & PBI_MASK(1)) + tag_mask |= (1 << (put_tags + 1)); + + gc_log_write2(GCL_SS_SYS, 0x30, put_in_progress, tag_mask); + + mfc_write_tag_mask(tag_mask); // the tags we're interested in + mfc_read_tag_status_all(); // wait for DMA to complete + put_in_progress = 0; // mark them all complete + + total_complete += comp_info.ncomplete; + gc_log_write4(GCL_SS_SYS, 0x31, + put_in_progress, ci_idx, comp_info.ncomplete, total_complete); + + // send PPE a message + spu_writech(SPU_WrOutIntrMbox, MK_MBOX_MSG(OP_JOBS_DONE, ci_idx)); + + ci_idx ^= 0x1; // switch buffers + comp_info.in_use = 1; + comp_info.ncomplete = 0; +} + +// ------------------------------------------------------------------------ + +static unsigned int backoff; // current backoff value in clock cycles +static unsigned int _backoff_start; +static unsigned int _backoff_cap; + +/* + * For 3.2 GHz SPE + * + * 12 4095 cycles 1.3 us + * 13 8191 cycles 2.6 us + * 14 16383 cycles 5.1 us + * 15 32767 cycles 10.2 us + * 16 20.4 us + * 17 40.8 us + * 18 81.9 us + * 19 163.8 us + * 20 327.7 us + * 21 655.4 us + */ +static unsigned char log2_backoff_start[16] = { +// 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 +// ------------------------------------------------------------- + 12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 16, 16 +}; + +static unsigned char log2_backoff_cap[16] = { +// 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 +// ------------------------------------------------------------- + 17, 17, 17, 18, 18, 18, 18, 19, 19, 19, 19, 20, 20, 20, 21, 21 +}; + +static void +backoff_init(void) +{ + _backoff_cap = (1 << (log2_backoff_cap[(spu_args.nspus - 1) & 0xf])) - 1; + _backoff_start = (1 << (log2_backoff_start[(spu_args.nspus - 1) & 0xf])) - 1; + + backoff = _backoff_start; +} + +static void +backoff_reset(void) +{ + backoff = _backoff_start; +} + +static void +backoff_delay(void) +{ + gc_cdelay(backoff); + + // capped exponential backoff + backoff = ((backoff << 1) + 1) & _backoff_cap; +} + +// ------------------------------------------------------------------------ + +static inline unsigned int +make_mask(int nbits) +{ + return ~(~0 << nbits); +} + +static unsigned int dc_work; +static int dc_put_tag; +static unsigned char *dc_ls_base; +static gc_eaddr_t dc_ea_base; + +// divide and conquer +static void +d_and_c(unsigned int offset, unsigned int len) +{ + unsigned int mask = make_mask(len) << offset; + unsigned int t = mask & dc_work; + if (t == 0) // nothing to do + return; + if (t == mask){ // got a match, generate dma + mfc_put(dc_ls_base + offset, dc_ea_base + offset, len, dc_put_tag, 0, 0); + } + else { // bisect + len >>= 1; + d_and_c(offset, len); + d_and_c(offset + len, len); + } +} + +// Handle the nasty case of a dma xfer that's less than 16 bytes long. +// len is guaranteed to be in [1, 15] + +static void +handle_slow_and_tedious_dma(gc_eaddr_t ea, unsigned char *ls, + unsigned int len, int put_tag) +{ + // Set up for divide and conquer + unsigned int alignment = ((uintptr_t) ls) & 0x7; + dc_work = make_mask(len) << alignment; + dc_ls_base = (unsigned char *) ROUND_DN((uintptr_t) ls, 8); + dc_ea_base = ROUND_DN(ea, (gc_eaddr_t) 8); + dc_put_tag = put_tag; + + d_and_c( 0, 8); + d_and_c( 8, 8); + d_and_c(16, 8); +} + + +static void +process_job(gc_eaddr_t jd_ea, gc_job_desc_t *jd) +{ + // events: 0x2X + + jd->status = JS_OK; // assume success + + if (jd->proc_id >= spu_args.nproc_defs) + jd->status = JS_UNKNOWN_PROC; + + else { + + if (jd->eaa.nargs == 0) + (*gc_proc_def[jd->proc_id].proc)(&jd->input, &jd->output, &jd->eaa); + + else { // handle EA args that must be DMA'd in/out + + gc_job_ea_args_t *eaa = &jd->eaa; + + int NELMS = + MAX(MAX_ARGS_EA, + (GC_SPU_BUFSIZE + MFC_MAX_DMA_SIZE - 1) / MFC_MAX_DMA_SIZE); + + mfc_list_element_t dma_get_list[NELMS]; + //mfc_list_element_t dma_put_list[NELMS]; + + memset(dma_get_list, 0, sizeof(dma_get_list)); + //memset(dma_put_list, 0, sizeof(dma_put_list)); + + int gli = 0; // get list index + //int pli = 0; // put list index + + unsigned char *get_base = _gci_getbuf[0]; + unsigned char *get_t = get_base; + unsigned int total_get_dma_len = 0; + + unsigned char *put_base = _gci_putbuf[pb_idx]; + unsigned char *put_t = put_base; + unsigned int total_put_alloc = 0; + int put_tag = put_tags + pb_idx; + + // Do we have any "put" args? If so ensure that previous + // dma from this buffer is complete + + gc_log_write2(GCL_SS_SYS, 0x24, put_in_progress, jd->sys.direction_union); + + if ((jd->sys.direction_union & GCJD_DMA_PUT) + && (put_in_progress & PBI_MASK(pb_idx))){ + + gc_log_write2(GCL_SS_SYS, 0x25, put_in_progress, 1 << put_tag); + + mfc_write_tag_mask(1 << put_tag); // the tag we're interested in + mfc_read_tag_status_all(); // wait for DMA to complete + put_in_progress &= ~(PBI_MASK(pb_idx)); + + gc_log_write1(GCL_SS_SYS, 0x26, put_in_progress); + } + + + // for now, all EA's must have the same high 32-bits + gc_eaddr_t common_ea = eaa->arg[0].ea_addr; + + + // assign LS addresses for buffers + + for (unsigned int i = 0; i < eaa->nargs; i++){ + + gc_eaddr_t ea_base = 0; + unsigned char *ls_base; + int offset; + unsigned int dma_len; + + if (eaa->arg[i].direction == GCJD_DMA_GET){ + ea_base = ROUND_DN(eaa->arg[i].ea_addr, (gc_eaddr_t) CACHE_LINE_SIZE); + offset = eaa->arg[i].ea_addr & (CACHE_LINE_SIZE-1); + dma_len = ROUND_UP(eaa->arg[i].get_size + offset, CACHE_LINE_SIZE); + total_get_dma_len += dma_len; + + if (total_get_dma_len > GC_SPU_BUFSIZE){ + jd->status = JS_ARGS_TOO_LONG; + goto wrap_up; + } + + ls_base = get_t; + get_t += dma_len; + eaa->arg[i].ls_addr = ls_base + offset; + + if (0){ + assert((mfc_ea2l(eaa->arg[i].ea_addr) & 0x7f) == ((intptr_t)eaa->arg[i].ls_addr & 0x7f)); + assert((ea_base & 0x7f) == 0); + assert(((intptr_t)ls_base & 0x7f) == 0); + assert((dma_len & 0x7f) == 0); + assert((eaa->arg[i].get_size <= dma_len) + && dma_len <= (eaa->arg[i].get_size + offset + CACHE_LINE_SIZE - 1)); + } + + // add to dma get list + // FIXME (someday) the dma list is where the JS_BAD_EAH limitation comes from + + while (dma_len != 0){ + int n = MIN(dma_len, MFC_MAX_DMA_SIZE); + dma_get_list[gli].size = n; + dma_get_list[gli].eal = mfc_ea2l(ea_base); + dma_len -= n; + ea_base += n; + gli++; + } + } + + else if (eaa->arg[i].direction == GCJD_DMA_PUT){ + // + // This case is a trickier than the PUT case since we can't + // write outside of the bounds of the user provided buffer. + // We still align the buffers to 128-bytes for good performance + // in the middle portion of the xfers. + // + ea_base = ROUND_DN(eaa->arg[i].ea_addr, (gc_eaddr_t) CACHE_LINE_SIZE); + offset = eaa->arg[i].ea_addr & (CACHE_LINE_SIZE-1); + + uint32_t ls_alloc_len = + ROUND_UP(eaa->arg[i].put_size + offset, CACHE_LINE_SIZE); + + total_put_alloc += ls_alloc_len; + + if (total_put_alloc > GC_SPU_BUFSIZE){ + jd->status = JS_ARGS_TOO_LONG; + goto wrap_up; + } + + ls_base = put_t; + put_t += ls_alloc_len; + eaa->arg[i].ls_addr = ls_base + offset; + + if (1){ + assert((mfc_ea2l(eaa->arg[i].ea_addr) & 0x7f) + == ((intptr_t)eaa->arg[i].ls_addr & 0x7f)); + assert((ea_base & 0x7f) == 0); + assert(((intptr_t)ls_base & 0x7f) == 0); + } + } + + else + assert(0); + } + + // fire off the dma to fetch the args and wait for it to complete + mfc_getl(get_base, common_ea, dma_get_list, gli*sizeof(dma_get_list[0]), get_tag, 0, 0); + mfc_write_tag_mask(1 << get_tag); // the tag we're interested in + mfc_read_tag_status_all(); // wait for DMA to complete + + // do the work + (*gc_proc_def[jd->proc_id].proc)(&jd->input, &jd->output, &jd->eaa); + + + // Do we have any "put" args? If so copy them out + if (jd->sys.direction_union & GCJD_DMA_PUT){ + + // Do the copy out using single DMA xfers. The LS ranges + // aren't generally contiguous. + + bool started_dma = false; + + for (unsigned int i = 0; i < eaa->nargs; i++){ + if (eaa->arg[i].direction == GCJD_DMA_PUT && eaa->arg[i].put_size != 0){ + + started_dma = true; + + gc_eaddr_t ea; + unsigned char *ls; + unsigned int len; + + ea = eaa->arg[i].ea_addr; + ls = (unsigned char *) eaa->arg[i].ls_addr; + len = eaa->arg[i].put_size; + + if (len < 16) + handle_slow_and_tedious_dma(ea, ls, len, put_tag); + + else { + if ((ea & 0xf) != 0){ + + // printf("1: ea = 0x%x len = %5d\n", (int) ea, len); + + // handle the "pre-multiple-of-16" portion + // do 1, 2, 4, or 8 byte xfers as required + + if (ea & 0x1){ // do a 1-byte xfer + mfc_put(ls, ea, 1, put_tag, 0, 0); + ea += 1; + ls += 1; + len -= 1; + } + if (ea & 0x2){ // do a 2-byte xfer + mfc_put(ls, ea, 2, put_tag, 0, 0); + ea += 2; + ls += 2; + len -= 2; + } + if (ea & 0x4){ // do a 4-byte xfer + mfc_put(ls, ea, 4, put_tag, 0, 0); + ea += 4; + ls += 4; + len -= 4; + } + if (ea & 0x8){ // do an 8-byte xfer + mfc_put(ls, ea, 8, put_tag, 0, 0); + ea += 8; + ls += 8; + len -= 8; + } + } + + if (1){ + // printf("2: ea = 0x%x len = %5d\n", (int) ea, len); + assert((ea & 0xf) == 0); + assert((((intptr_t) ls) & 0xf) == 0); + } + + // handle the "multiple-of-16" portion + + int aligned_len = ROUND_DN(len, 16); + len = len & (16 - 1); + + while (aligned_len != 0){ + int dma_len = MIN(aligned_len, MFC_MAX_DMA_SIZE); + mfc_put(ls, ea, dma_len, put_tag, 0, 0); + ea += dma_len; + ls += dma_len; + aligned_len -= dma_len; + } + + if (1){ + // printf("3: ea = 0x%x len = %5d\n", (int)ea, len); + assert((ea & 0xf) == 0); + assert((((intptr_t) ls) & 0xf) == 0); + } + + // handle "post-multiple-of-16" portion + + if (len != 0){ + + if (len >= 8){ // do an 8-byte xfer + mfc_put(ls, ea, 8, put_tag, 0, 0); + ea += 8; + ls += 8; + len -= 8; + } + if (len >= 4){ // do a 4-byte xfer + mfc_put(ls, ea, 4, put_tag, 0, 0); + ea += 4; + ls += 4; + len -= 4; + } + if (len >= 2){ // do a 2-byte xfer + mfc_put(ls, ea, 2, put_tag, 0, 0); + ea += 2; + ls += 2; + len -= 2; + } + if (len >= 1){ // do a 1-byte xfer + mfc_put(ls, ea, 1, put_tag, 0, 0); + ea += 1; + ls += 1; + len -= 1; + } + if (1) + assert(len == 0); + } + } + } + } + if (started_dma){ + put_in_progress |= PBI_MASK(pb_idx); // note it's running + gc_log_write2(GCL_SS_SYS, 0x27, put_in_progress, pb_idx); + pb_idx ^= 1; // toggle current buffer + } + } + } + } + + wrap_up:; // semicolon creates null statement for C99 compliance + + // Copy job descriptor back out to EA. + // (The dma will be waited on in flush_completion_info) + int tag = ci_tags + ci_idx; // use the current completion tag + mfc_put(jd, jd_ea, sizeof(*jd), tag, 0, 0); + + mfc_sync(tag); // FIXME this makes it work, but is expensive + + + // Tell PPE we're done with the job. + // + // We queue these up until we run out of room, or until we can send + // the info to the PPE w/o blocking. The blocking check is in + // main_loop + + comp_info.job_id[comp_info.ncomplete++] = jd->sys.job_id; + + if (comp_info.ncomplete == GC_CI_NJOBS){ + gc_log_write0(GCL_SS_SYS, 0x28); + flush_completion_info(); + } +} + +static void +main_loop(void) +{ + // events: 0x1X + + static gc_job_desc_t jd; // static gets us proper alignment + gc_eaddr_t jd_ea; + int total_jobs = 0; + + // setup events + spu_writech(SPU_WrEventMask, MFC_LLR_LOST_EVENT); + gc_jd_queue_getllar(spu_args.queue); // get a line reservation on the queue + + while (1){ + +#if (USE_LLR_LOST_EVENT) + + if (unlikely(spu_readchcnt(SPU_RdEventStat))){ + // + // execute standard event handling prologue + // + int status = spu_readch(SPU_RdEventStat); + int mask = spu_readch(SPU_RdEventMask); + spu_writech(SPU_WrEventMask, mask & ~status); // disable active events + spu_writech(SPU_WrEventAck, status); // ack active events + + // execute per-event actions + + if (status & MFC_LLR_LOST_EVENT){ + // + // We've lost a line reservation. This is most likely caused + // by somebody doing something to the queue. Go look and see + // if there's anything for us. + // + if (gc_jd_queue_dequeue(spu_args.queue, &jd_ea, &jd)) + process_job(jd_ea, &jd); + + gc_jd_queue_getllar(spu_args.queue); // get a new reservation + } + + // + // execute standard event handling epilogue + // + spu_writech(SPU_WrEventMask, mask); // restore event mask + } + +#else + + // try to get a job from the job queue + if (gc_jd_queue_dequeue(spu_args.queue, &jd_ea, &jd)){ + total_jobs++; + gc_log_write2(GCL_SS_SYS, 0x10, jd.sys.job_id, total_jobs); + + process_job(jd_ea, &jd); + + gc_log_write2(GCL_SS_SYS, 0x11, jd.sys.job_id, total_jobs); + backoff_reset(); + } + else + backoff_delay(); + +#endif + + // any msgs for us? + + if (unlikely(spu_readchcnt(SPU_RdInMbox))){ + int msg = spu_readch(SPU_RdInMbox); + // printf("spu[%d] mbox_msg: 0x%08x\n", spu_args.spu_idx, msg); + if (MBOX_MSG_OP(msg) == OP_EXIT){ + flush_completion_info(); + return; + } + if (MBOX_MSG_OP(msg) == OP_GET_SPU_BUFSIZE){ + spu_writech(SPU_WrOutIntrMbox, MK_MBOX_MSG(OP_SPU_BUFSIZE, GC_SPU_BUFSIZE_BASE)); + } + } + + // If we've got job completion info for the PPE and we can send a + // message without blocking, do it. + + if (comp_info.ncomplete != 0 && spu_readchcnt(SPU_WrOutIntrMbox) != 0){ + gc_log_write0(GCL_SS_SYS, 0x12); + flush_completion_info(); + } + } +} + + +int +main(unsigned long long spe_id __attribute__((unused)), + unsigned long long argp, + unsigned long long envp __attribute__((unused))) +{ + gc_sys_tag = mfc_tag_reserve(); // allocate a tag for our misc DMA operations + get_tag = mfc_tag_reserve(); + ci_tags = mfc_multi_tag_reserve(2); + put_tags = mfc_multi_tag_reserve(2); + +#if 0 + printf("gc_sys_tag = %d\n", gc_sys_tag); + printf("get_tag = %d\n", get_tag); + printf("ci_tags = %d\n", ci_tags); + printf("put_tags = %d\n", put_tags); +#endif + + // dma the args in + mfc_get(&spu_args, argp, sizeof(spu_args), gc_sys_tag, 0, 0); + mfc_write_tag_mask(1 << gc_sys_tag); // the tag we're interested in + mfc_read_tag_status_all(); // wait for DMA to complete + + // initialize pointer to procedure entry table + gc_proc_def = (gc_proc_def_t *) spu_args.proc_def_ls_addr; + + // initialize logging + _gc_log_init(spu_args.log); + + backoff_init(); // initialize backoff parameters + + main_loop(); + return 0; +} diff --git a/gcell/src/lib/runtime/spu/gc_spu_config.h b/gcell/src/lib/runtime/spu/gc_spu_config.h new file mode 100644 index 000000000..997645e68 --- /dev/null +++ b/gcell/src/lib/runtime/spu/gc_spu_config.h @@ -0,0 +1,33 @@ +/* -*- c++ -*- */ +/* + * Copyright 2008 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ +#ifndef INCLUDED_GC_SPU_CONFIG_H +#define INCLUDED_GC_SPU_CONFIG_H + +#include <gc_job_desc.h> + +#define CACHE_LINE_SIZE 128 // in bytes +#define GC_SPU_BUFSIZE_BASE (32 * 1024) // must be multiple of CACHE_LINE_SIZE +#define GC_SPU_BUFSIZE (GC_SPU_BUFSIZE_BASE + MAX_ARGS_EA * CACHE_LINE_SIZE) + +#define NGETBUFS 1 // single buffer job arg gets +#define NPUTBUFS 2 // double buffer job arg puts + +#endif /* INCLUDED_GC_SPU_CONFIG_H */ diff --git a/gcell/src/lib/runtime/spu/gc_spu_jd_queue.c b/gcell/src/lib/runtime/spu/gc_spu_jd_queue.c new file mode 100644 index 000000000..ba4a1b9d2 --- /dev/null +++ b/gcell/src/lib/runtime/spu/gc_spu_jd_queue.c @@ -0,0 +1,103 @@ +/* -*- c++ -*- */ +/* + * Copyright 2007 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include "gc_jd_queue.h" +#include "mutex_lock.h" +#include "mutex_unlock.h" + +extern int gc_sys_tag; + +bool +gc_jd_queue_dequeue(gc_eaddr_t q, gc_eaddr_t *item_ea, gc_job_desc_t *item) +{ + gc_jd_queue_t local_q; + + // Before aquiring the lock, see if it's possible that there's + // something in the queue. Checking in this way makes it easier + // for the PPE to insert things, since we're not contending for + // the lock unless there is something in the queue. + + // copy in the queue structure + mfc_get(&local_q, q, sizeof(gc_jd_queue_t), gc_sys_tag, 0, 0); + mfc_write_tag_mask(1 << gc_sys_tag); // the tag we're interested in + mfc_read_tag_status_all(); // wait for DMA to complete + + if (local_q.head == 0){ // empty + return false; + } + + // When we peeked, head was non-zero. Now grab the + // lock and do it for real. + + _mutex_lock(q + offsetof(gc_jd_queue_t, mutex)); + + // copy in the queue structure + mfc_get(&local_q, q, sizeof(gc_jd_queue_t), gc_sys_tag, 0, 0); + mfc_write_tag_mask(1 << gc_sys_tag); // the tag we're interested in + mfc_read_tag_status_all(); // wait for DMA to complete + + if (local_q.head == 0){ // empty + _mutex_unlock(q + offsetof(gc_jd_queue_t, mutex)); + return false; + } + + // copy in job descriptor at head of queue + *item_ea = local_q.head; + mfc_get(item, local_q.head, sizeof(gc_job_desc_t), gc_sys_tag, 0, 0); + mfc_write_tag_mask(1 << gc_sys_tag); // the tag we're interested in + mfc_read_tag_status_all(); // wait for DMA to complete + + local_q.head = item->sys.next; + item->sys.next = 0; + if (local_q.head == 0) // now empty? + local_q.tail = 0; + + + // copy the queue structure back out + mfc_put(&local_q, q, sizeof(gc_jd_queue_t), gc_sys_tag, 0, 0); + mfc_write_tag_mask(1 << gc_sys_tag); // the tag we're interested in + mfc_read_tag_status_all(); // wait for DMA to complete + + // Q: FIXME do we need to order stores in EA or can we just clear the + // local copy of the mutex above and blast it out, removing the need + // for this explicit unlock? + // + // A: Manual says it's better to use an atomic op rather than + // a normal DMA, and that a putlluc is better than a putllc if + // you can use it. + + _mutex_unlock(q + offsetof(gc_jd_queue_t, mutex)); + return true; +} + + +void +gc_jd_queue_getllar(gc_eaddr_t q) +{ + // get reservation that includes the tail of the queue + gc_eaddr_t tail = q + offsetof(gc_jd_queue_t, tail); + + char _tmp[256]; + char *buf = (char *) ALIGN(_tmp, 128); // get cache-aligned buffer + + mfc_getllar(buf, ALIGN128_EA(tail), 0, 0); + spu_readch(MFC_RdAtomicStat); +} diff --git a/gcell/src/lib/runtime/spu/gcell_qa.c b/gcell/src/lib/runtime/spu/gcell_qa.c new file mode 100644 index 000000000..51bf38a6a --- /dev/null +++ b/gcell/src/lib/runtime/spu/gcell_qa.c @@ -0,0 +1,109 @@ +/* -*- c++ -*- */ +/* + * Copyright 2008 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include <gc_delay.h> +#include <gc_declare_proc.h> +#include <string.h> + + +#define _UNUSED __attribute__((unused)) + +// FIXME move these out of here; only for QA usage + +static void +qa_nop(const gc_job_direct_args_t *input _UNUSED, + gc_job_direct_args_t *output _UNUSED, + const gc_job_ea_args_t *eaa _UNUSED) +{ +} + +GC_DECLARE_PROC(qa_nop, "qa_nop"); + +static int +sum_shorts(short *p, int nshorts) +{ + int total = 0; + for (int i = 0; i < nshorts; i++) + total += p[i]; + + return total; +} + +static void +qa_sum_shorts(const gc_job_direct_args_t *input _UNUSED, + gc_job_direct_args_t *output, + const gc_job_ea_args_t *eaa) +{ + for (unsigned int i = 0; i < eaa->nargs; i++){ + short *p = eaa->arg[i].ls_addr; + int n = eaa->arg[i].get_size / sizeof(short); + output->arg[i].s32 = sum_shorts(p, n); + //printf("qa_sum_shorts(%p, %d) = %d\n", p, n, output->arg[i].s32); + } +} + +GC_DECLARE_PROC(qa_sum_shorts, "qa_sum_shorts"); + +static void +write_seq(unsigned char *p, int nbytes, int counter) +{ + for (int i = 0; i < nbytes; i++) + p[i] = counter++; +} + +static void +qa_put_seq(const gc_job_direct_args_t *input, + gc_job_direct_args_t *output _UNUSED, + const gc_job_ea_args_t *eaa) +{ + int counter = input->arg[0].s32; + + for (unsigned int i = 0; i < eaa->nargs; i++){ + unsigned char *p = eaa->arg[i].ls_addr; + int n = eaa->arg[i].put_size; + write_seq(p, n, counter); + counter += n; + } +} + +GC_DECLARE_PROC(qa_put_seq, "qa_put_seq"); + +static void +qa_copy(const gc_job_direct_args_t *input _UNUSED, + gc_job_direct_args_t *output, + const gc_job_ea_args_t *eaa) +{ + if (eaa->nargs != 2 + || eaa->arg[0].direction != GCJD_DMA_PUT + || eaa->arg[1].direction != GCJD_DMA_GET){ + output->arg[0].s32 = -1; + return; + } + + output->arg[0].s32 = 0; + unsigned n = eaa->arg[0].put_size; + if (eaa->arg[1].get_size < n) + n = eaa->arg[1].get_size; + + memcpy(eaa->arg[0].ls_addr, eaa->arg[1].ls_addr, n); +} + +GC_DECLARE_PROC(qa_copy, "qa_copy"); diff --git a/gcell/src/lib/runtime/spu/spu_buffers.c b/gcell/src/lib/runtime/spu/spu_buffers.c new file mode 100644 index 000000000..58b405931 --- /dev/null +++ b/gcell/src/lib/runtime/spu/spu_buffers.c @@ -0,0 +1,35 @@ +/* -*- c++ -*- */ +/* + * Copyright 2008 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include <spu_buffers.h> +#include <compiler.h> + +static unsigned char _getbuf[NGETBUFS][GC_SPU_BUFSIZE] _AL128; +static unsigned char _putbuf[NPUTBUFS][GC_SPU_BUFSIZE] _AL128; + +unsigned char *_gci_getbuf[NGETBUFS] = { + _getbuf[0] +}; + +unsigned char *_gci_putbuf[NPUTBUFS] = { + _putbuf[0], + _putbuf[1] +}; diff --git a/gcell/src/lib/runtime/spu/spu_buffers.h b/gcell/src/lib/runtime/spu/spu_buffers.h new file mode 100644 index 000000000..24811dc38 --- /dev/null +++ b/gcell/src/lib/runtime/spu/spu_buffers.h @@ -0,0 +1,32 @@ +/* -*- c++ -*- */ +/* + * Copyright 2008 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ +#ifndef INCLUDED_SPU_BUFFERS_H +#define INCLUDED_SPU_BUFFERS_H + +#include "gc_spu_config.h" + +//! pointer to input buffer +extern unsigned char *_gci_getbuf[NGETBUFS]; + +//! pointers to output buffers +extern unsigned char *_gci_putbuf[NPUTBUFS]; + +#endif /* INCLUDED_SPU_BUFFERS_H */ |