diff options
Diffstat (limited to 'gcell/lib/general/spu')
-rw-r--r-- | gcell/lib/general/spu/fft_1d_r2.c | 35 | ||||
-rw-r--r-- | gcell/lib/general/spu/memset.S | 185 | ||||
-rw-r--r-- | gcell/lib/general/spu/qa_memset.c | 201 |
3 files changed, 421 insertions, 0 deletions
diff --git a/gcell/lib/general/spu/fft_1d_r2.c b/gcell/lib/general/spu/fft_1d_r2.c new file mode 100644 index 000000000..0a87e74a8 --- /dev/null +++ b/gcell/lib/general/spu/fft_1d_r2.c @@ -0,0 +1,35 @@ +/* -*- c++ -*- */ +/* + * Copyright 2008 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include <gcell/spu/libfft.h> +#include <gcell/spu/fft_1d_r2.h> +#include <assert.h> + +/* + * invoke the inline version + */ +void +fft_1d_r2(vector float *out, vector float *in, vector float *W, int log2_size) +{ + assert((1 << log2_size) <= MAX_FFT_1D_SIZE); + + _fft_1d_r2(out, in, W, log2_size); +} diff --git a/gcell/lib/general/spu/memset.S b/gcell/lib/general/spu/memset.S new file mode 100644 index 000000000..39eabce02 --- /dev/null +++ b/gcell/lib/general/spu/memset.S @@ -0,0 +1,185 @@ +/* -*- asm -*- */ +/* + * Copyright 2008 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include <gcell/spu/gc_spu_macs.h> + + .file "memset.S" + + /* + * Computes this, only a lot faster... + * + * void * + * memset(void *pv, int c, size_t n) + * { + * unsigned char *p = (unsigned char *) pv; + * size_t i; + * for (i = 0; i < n; i++) + * p[i] = c; + * + * return pv; + * } + */ + +#define p_arg arg1 // we're going to clobber arg1 w/ the return value +#define c arg2 // the constant we're writing +#define n arg3 // how many bytes to write + +#define p r13 // where we're writing +#define t0 r14 +#define t1 r15 +#define mask r16 +#define old r17 +#define an r18 // aligned n (n rounded down to mod 16 boundary) +#define next_p r19 +#define cond1 r20 +#define cond2 r21 +#define m r22 +#define r r23 + + PROC_ENTRY(memset) + + // Hint the return from do_head, in case we go that way. + // There's pretty much nothing to can do to hint the branch to it. + hbrr do_head_br, head_complete + + MR(p, p_arg) // leaves p, the return value, in the correct reg (r3) + BRZ_RETURN(n) + + MODULO(t0, p, 16) // is p%16 == 0? + VSPLTB(c, c, 3) // splat byte in preferred slot of c into all slots + brnz t0, do_head // no, handle it +head_complete: + + /* + * preconditions: + * p%16 == 0, n > 0 + */ + hbrr middle_loop_br, middle_loop + + ROUND_DOWN(an, n, 16) // an is "aligned n" + MODULO(n, n, 16) // what's left over in the last quad + brz an, do_tail // no whole quad words; skip to tail + clgti t0, an, 127 // an >= 128? + brz t0, middle2 // nope, go handle the cases between 0 and 112 + + /* + * 128 bytes / iteration + */ + .p2align 4 +middle_loop: + ai an, an, -128 + stqd c, 0*16(p) + ai next_p, p, 128 + stqd c, 1*16(p) + cgti cond1, an, 127 + stqd c, 2*16(p) + + stqd c, 3*16(p) + stqd c, 4*16(p) + stqd c, 5*16(p) + stqd c, 6*16(p) + + MR(p, next_p) + stqd c, 7*16-128(next_p) + or cond2, n, an +middle_loop_br: + brnz cond1, middle_loop + + /* + * if an and n are both zero, return now + */ + BRZ_RETURN(cond2) + + /* + * otherwise handle last of full quad words + * + * 0 <= an < 128, p%16 == 0 + */ +middle2: + /* + * if an == 0, go handle the final non-full quadword + */ + brz an, do_tail + hbrr middle2_loop_br, middle2_loop + + .p2align 3 +middle2_loop: + ai next_p, p, 16 + stqd c, 0(p) + ai an, an, -16 + LMR(p, next_p) +middle2_loop_br: + brnz an, middle2_loop + + /* We're done with the full quadwords. */ + + /* + * Handle the final partial quadword. + * We'll be modifying only the left hand portion of the quad. + * + * preconditions: + * an == 0, 0 <= n < 16, p%16 == 0 + */ +do_tail: + HINT_RETURN(do_tail_ret) + il mask, -1 + sfi t1, n, 16 // t1 = 16 - n + lqd old, 0(p) + shlqby mask, mask, t1 + selb t0, old, c, mask + stqd t0, 0(p) +do_tail_ret: + RETURN() + + /* + * ---------------------------------------------------------------- + * Handle the first partial quadword + * + * preconditions: + * p%16 != 0 + * + * postconditions: + * p%16 == 0 or n == 0 + * + * |-- m --| + * +----------------+----------------+ + * | //////// | | + * +----------------+----------------+ + * |----- r -----| + * p + * ---------------------------------------------------------------- + */ +do_head: + lqd old, 0(p) + MODULO_NEG(r, p, 16) + il mask, -1 + UMIN(m, r, n) + shlqby mask, mask, m // 1's in the top, m*8 0's in the bottom + MR(t1, p) + sf t0, m, r // t0 = r - m + a p, p, m // p += m + rotqby mask, mask, t0 // rotate 0's to the right place + sf n, m, n // n -= m + selb t0, c, old, mask // merge + stqd t0, 0(t1) + BRZ_RETURN(n) +do_head_br: + br head_complete diff --git a/gcell/lib/general/spu/qa_memset.c b/gcell/lib/general/spu/qa_memset.c new file mode 100644 index 000000000..e51b02c9a --- /dev/null +++ b/gcell/lib/general/spu/qa_memset.c @@ -0,0 +1,201 @@ +/* -*- c++ -*- */ +/* + * Copyright 2008 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include <gcell/gc_declare_proc.h> +#include <spu_intrinsics.h> +#include <spu_mfcio.h> +#include <string.h> +#include <stdio.h> + + +#define MAX_QA_BYTES 1024 +#define MAX_OFFSET 32 +#define ALIGNMENT 16 +#define K 0xA5 + +// FIXME should be passed at gcell init time +//static const int TIMEBASE = 79800000; // ps3 +static const int TIMEBASE = 26666666; // qs21 + +typedef void* (*memset_fptr)(void *s, int val, size_t n); + +void * +memset_ref(void *sv, int c, size_t n) +{ + unsigned char *s = (unsigned char *) sv; + size_t i; + for (i = 0; i < n; i++) + s[i] = c; + + return sv; +} + +static bool +check_before(unsigned char *buf, size_t len, size_t offset) +{ + unsigned char *p = buf + sizeof(vector unsigned char) + offset; + bool ok = true; + int i; + + for (i = -16; i < 0; i++){ + unsigned char expected = (&p[i] - buf) & 0xff; + if (p[i] != expected){ + printf("b:memset(%p, 0x%x, %zu) <offset %2zd> [%3d] expected %02x, got %02x\n", + p, K, len, offset, i, K, p[i]); + ok = false; + } + } + return ok; +} + +static bool +check_middle(unsigned char *buf, size_t len, size_t offset) +{ + unsigned char *p = buf + sizeof(vector unsigned char) + offset; + bool ok = true; + size_t i; + + for (i = 0; i < len; i++){ + unsigned char expected = K; + if (p[i] != expected){ + printf("m:memset(%p, 0x%x, %zu) <offset %2zd> [%3zd] expected %02x, got %02x\n", + p, K, len, offset, i, expected, p[i]); + ok = false; + } + } + return ok; +} + +static bool +check_after(unsigned char *buf, size_t len, size_t offset) +{ + unsigned char *p = buf + sizeof(vector unsigned char) + offset; + bool ok = true; + size_t i; + + for (i = len; i < len + 16; i++){ + unsigned char expected = (&p[i] - buf) & 0xff; + if (p[i] != expected){ + printf("a:memset(%p, 0x%x, %zu) <offset %2zd> [%3zd] expected %02x, got %02x\n", + p, K, len, offset, i, expected, p[i]); + ok = false; + } + } + return ok; +} + + +static bool +test_memset_aux(memset_fptr f, + unsigned char *buf, size_t buflen, size_t len, size_t offset) +{ + size_t i; + + // init buffer to non-zero known state + for (i = 0; i < buflen; i++) + buf[i] = i; + + // Our working buffer. Starts 16 bytes + offset into buf. + // We offset by 16 so that we can see if data before is getting damaged. + unsigned char *p = buf + sizeof(vector unsigned char) + offset; + + (*f)(p, K, len); + + bool ok = true; + ok &= check_before(buf, len, offset); + ok &= check_middle(buf, len, offset); + ok &= check_after(buf, len, offset); + + return ok; +} + +bool +test_memset(memset_fptr f) +{ + size_t BUFLEN = MAX_QA_BYTES + 2*sizeof(vector unsigned char) + MAX_OFFSET; + unsigned char unaligned_buf[BUFLEN + ALIGNMENT -1]; + unsigned char *aligned_buf = + (unsigned char *)((((intptr_t) unaligned_buf) + ALIGNMENT - 1) & -ALIGNMENT); + + // printf("unaligned = %p\n", unaligned_buf); + // printf("aligned = %p\n", aligned_buf); + + size_t len; + size_t offset; + bool ok = true; + + for (len = 0; len < MAX_QA_BYTES; len++){ + for (offset = 0; offset <= MAX_OFFSET; offset++){ + ok &= test_memset_aux(f, aligned_buf, BUFLEN, len, offset); + } + } + + return ok; +} + +// returns bytes/s +float +benchmark_memset(memset_fptr f, bool aligned) +{ + static const int SIZE = 32768; + unsigned char buf[SIZE]; + uint32_t t0, t1; + int nbytes; + + spu_write_decrementer(0xffffffff); + + if (aligned){ + nbytes = SIZE; + t0 = spu_read_decrementer(); + (*f)(buf, 0x55, nbytes); + (*f)(buf, 0x55, nbytes); + (*f)(buf, 0x55, nbytes); + (*f)(buf, 0x55, nbytes); + t1 = spu_read_decrementer(); + } + else { + nbytes = SIZE - 2; + t0 = spu_read_decrementer(); + (*f)(buf + 1, 0x55, nbytes); + (*f)(buf + 1, 0x55, nbytes); + (*f)(buf + 1, 0x55, nbytes); + (*f)(buf + 1, 0x55, nbytes); + t1 = spu_read_decrementer(); + } + + //printf("delta ticks: %d\n", t0 - t1); + return (float) nbytes * 4 / ((t0 - t1) * 1.0/TIMEBASE); +} + +/* + * Implement the standard QA stub. + * No input arguments, 1 bool output. + */ +static void +gcs_qa_memset(const gc_job_direct_args_t *input _UNUSED, + gc_job_direct_args_t *output, + const gc_job_ea_args_t *eaa _UNUSED) +{ + bool ok = test_memset(memset); + output->arg[0].u32 = ok; +} + +GC_DECLARE_PROC(gcs_qa_memset, "qa_memset"); |