summaryrefslogtreecommitdiff
path: root/gcell/lib/general/spu
diff options
context:
space:
mode:
Diffstat (limited to 'gcell/lib/general/spu')
-rw-r--r--gcell/lib/general/spu/fft_1d_r2.c35
-rw-r--r--gcell/lib/general/spu/memset.S185
-rw-r--r--gcell/lib/general/spu/qa_memset.c201
3 files changed, 421 insertions, 0 deletions
diff --git a/gcell/lib/general/spu/fft_1d_r2.c b/gcell/lib/general/spu/fft_1d_r2.c
new file mode 100644
index 000000000..0a87e74a8
--- /dev/null
+++ b/gcell/lib/general/spu/fft_1d_r2.c
@@ -0,0 +1,35 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2008 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU Radio
+ *
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <gcell/spu/libfft.h>
+#include <gcell/spu/fft_1d_r2.h>
+#include <assert.h>
+
+/*
+ * invoke the inline version
+ */
+void
+fft_1d_r2(vector float *out, vector float *in, vector float *W, int log2_size)
+{
+ assert((1 << log2_size) <= MAX_FFT_1D_SIZE);
+
+ _fft_1d_r2(out, in, W, log2_size);
+}
diff --git a/gcell/lib/general/spu/memset.S b/gcell/lib/general/spu/memset.S
new file mode 100644
index 000000000..39eabce02
--- /dev/null
+++ b/gcell/lib/general/spu/memset.S
@@ -0,0 +1,185 @@
+/* -*- asm -*- */
+/*
+ * Copyright 2008 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU Radio
+ *
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <gcell/spu/gc_spu_macs.h>
+
+ .file "memset.S"
+
+ /*
+ * Computes this, only a lot faster...
+ *
+ * void *
+ * memset(void *pv, int c, size_t n)
+ * {
+ * unsigned char *p = (unsigned char *) pv;
+ * size_t i;
+ * for (i = 0; i < n; i++)
+ * p[i] = c;
+ *
+ * return pv;
+ * }
+ */
+
+#define p_arg arg1 // we're going to clobber arg1 w/ the return value
+#define c arg2 // the constant we're writing
+#define n arg3 // how many bytes to write
+
+#define p r13 // where we're writing
+#define t0 r14
+#define t1 r15
+#define mask r16
+#define old r17
+#define an r18 // aligned n (n rounded down to mod 16 boundary)
+#define next_p r19
+#define cond1 r20
+#define cond2 r21
+#define m r22
+#define r r23
+
+ PROC_ENTRY(memset)
+
+ // Hint the return from do_head, in case we go that way.
+ // There's pretty much nothing to can do to hint the branch to it.
+ hbrr do_head_br, head_complete
+
+ MR(p, p_arg) // leaves p, the return value, in the correct reg (r3)
+ BRZ_RETURN(n)
+
+ MODULO(t0, p, 16) // is p%16 == 0?
+ VSPLTB(c, c, 3) // splat byte in preferred slot of c into all slots
+ brnz t0, do_head // no, handle it
+head_complete:
+
+ /*
+ * preconditions:
+ * p%16 == 0, n > 0
+ */
+ hbrr middle_loop_br, middle_loop
+
+ ROUND_DOWN(an, n, 16) // an is "aligned n"
+ MODULO(n, n, 16) // what's left over in the last quad
+ brz an, do_tail // no whole quad words; skip to tail
+ clgti t0, an, 127 // an >= 128?
+ brz t0, middle2 // nope, go handle the cases between 0 and 112
+
+ /*
+ * 128 bytes / iteration
+ */
+ .p2align 4
+middle_loop:
+ ai an, an, -128
+ stqd c, 0*16(p)
+ ai next_p, p, 128
+ stqd c, 1*16(p)
+ cgti cond1, an, 127
+ stqd c, 2*16(p)
+
+ stqd c, 3*16(p)
+ stqd c, 4*16(p)
+ stqd c, 5*16(p)
+ stqd c, 6*16(p)
+
+ MR(p, next_p)
+ stqd c, 7*16-128(next_p)
+ or cond2, n, an
+middle_loop_br:
+ brnz cond1, middle_loop
+
+ /*
+ * if an and n are both zero, return now
+ */
+ BRZ_RETURN(cond2)
+
+ /*
+ * otherwise handle last of full quad words
+ *
+ * 0 <= an < 128, p%16 == 0
+ */
+middle2:
+ /*
+ * if an == 0, go handle the final non-full quadword
+ */
+ brz an, do_tail
+ hbrr middle2_loop_br, middle2_loop
+
+ .p2align 3
+middle2_loop:
+ ai next_p, p, 16
+ stqd c, 0(p)
+ ai an, an, -16
+ LMR(p, next_p)
+middle2_loop_br:
+ brnz an, middle2_loop
+
+ /* We're done with the full quadwords. */
+
+ /*
+ * Handle the final partial quadword.
+ * We'll be modifying only the left hand portion of the quad.
+ *
+ * preconditions:
+ * an == 0, 0 <= n < 16, p%16 == 0
+ */
+do_tail:
+ HINT_RETURN(do_tail_ret)
+ il mask, -1
+ sfi t1, n, 16 // t1 = 16 - n
+ lqd old, 0(p)
+ shlqby mask, mask, t1
+ selb t0, old, c, mask
+ stqd t0, 0(p)
+do_tail_ret:
+ RETURN()
+
+ /*
+ * ----------------------------------------------------------------
+ * Handle the first partial quadword
+ *
+ * preconditions:
+ * p%16 != 0
+ *
+ * postconditions:
+ * p%16 == 0 or n == 0
+ *
+ * |-- m --|
+ * +----------------+----------------+
+ * | //////// | |
+ * +----------------+----------------+
+ * |----- r -----|
+ * p
+ * ----------------------------------------------------------------
+ */
+do_head:
+ lqd old, 0(p)
+ MODULO_NEG(r, p, 16)
+ il mask, -1
+ UMIN(m, r, n)
+ shlqby mask, mask, m // 1's in the top, m*8 0's in the bottom
+ MR(t1, p)
+ sf t0, m, r // t0 = r - m
+ a p, p, m // p += m
+ rotqby mask, mask, t0 // rotate 0's to the right place
+ sf n, m, n // n -= m
+ selb t0, c, old, mask // merge
+ stqd t0, 0(t1)
+ BRZ_RETURN(n)
+do_head_br:
+ br head_complete
diff --git a/gcell/lib/general/spu/qa_memset.c b/gcell/lib/general/spu/qa_memset.c
new file mode 100644
index 000000000..e51b02c9a
--- /dev/null
+++ b/gcell/lib/general/spu/qa_memset.c
@@ -0,0 +1,201 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2008 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU Radio
+ *
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <gcell/gc_declare_proc.h>
+#include <spu_intrinsics.h>
+#include <spu_mfcio.h>
+#include <string.h>
+#include <stdio.h>
+
+
+#define MAX_QA_BYTES 1024
+#define MAX_OFFSET 32
+#define ALIGNMENT 16
+#define K 0xA5
+
+// FIXME should be passed at gcell init time
+//static const int TIMEBASE = 79800000; // ps3
+static const int TIMEBASE = 26666666; // qs21
+
+typedef void* (*memset_fptr)(void *s, int val, size_t n);
+
+void *
+memset_ref(void *sv, int c, size_t n)
+{
+ unsigned char *s = (unsigned char *) sv;
+ size_t i;
+ for (i = 0; i < n; i++)
+ s[i] = c;
+
+ return sv;
+}
+
+static bool
+check_before(unsigned char *buf, size_t len, size_t offset)
+{
+ unsigned char *p = buf + sizeof(vector unsigned char) + offset;
+ bool ok = true;
+ int i;
+
+ for (i = -16; i < 0; i++){
+ unsigned char expected = (&p[i] - buf) & 0xff;
+ if (p[i] != expected){
+ printf("b:memset(%p, 0x%x, %zu) <offset %2zd> [%3d] expected %02x, got %02x\n",
+ p, K, len, offset, i, K, p[i]);
+ ok = false;
+ }
+ }
+ return ok;
+}
+
+static bool
+check_middle(unsigned char *buf, size_t len, size_t offset)
+{
+ unsigned char *p = buf + sizeof(vector unsigned char) + offset;
+ bool ok = true;
+ size_t i;
+
+ for (i = 0; i < len; i++){
+ unsigned char expected = K;
+ if (p[i] != expected){
+ printf("m:memset(%p, 0x%x, %zu) <offset %2zd> [%3zd] expected %02x, got %02x\n",
+ p, K, len, offset, i, expected, p[i]);
+ ok = false;
+ }
+ }
+ return ok;
+}
+
+static bool
+check_after(unsigned char *buf, size_t len, size_t offset)
+{
+ unsigned char *p = buf + sizeof(vector unsigned char) + offset;
+ bool ok = true;
+ size_t i;
+
+ for (i = len; i < len + 16; i++){
+ unsigned char expected = (&p[i] - buf) & 0xff;
+ if (p[i] != expected){
+ printf("a:memset(%p, 0x%x, %zu) <offset %2zd> [%3zd] expected %02x, got %02x\n",
+ p, K, len, offset, i, expected, p[i]);
+ ok = false;
+ }
+ }
+ return ok;
+}
+
+
+static bool
+test_memset_aux(memset_fptr f,
+ unsigned char *buf, size_t buflen, size_t len, size_t offset)
+{
+ size_t i;
+
+ // init buffer to non-zero known state
+ for (i = 0; i < buflen; i++)
+ buf[i] = i;
+
+ // Our working buffer. Starts 16 bytes + offset into buf.
+ // We offset by 16 so that we can see if data before is getting damaged.
+ unsigned char *p = buf + sizeof(vector unsigned char) + offset;
+
+ (*f)(p, K, len);
+
+ bool ok = true;
+ ok &= check_before(buf, len, offset);
+ ok &= check_middle(buf, len, offset);
+ ok &= check_after(buf, len, offset);
+
+ return ok;
+}
+
+bool
+test_memset(memset_fptr f)
+{
+ size_t BUFLEN = MAX_QA_BYTES + 2*sizeof(vector unsigned char) + MAX_OFFSET;
+ unsigned char unaligned_buf[BUFLEN + ALIGNMENT -1];
+ unsigned char *aligned_buf =
+ (unsigned char *)((((intptr_t) unaligned_buf) + ALIGNMENT - 1) & -ALIGNMENT);
+
+ // printf("unaligned = %p\n", unaligned_buf);
+ // printf("aligned = %p\n", aligned_buf);
+
+ size_t len;
+ size_t offset;
+ bool ok = true;
+
+ for (len = 0; len < MAX_QA_BYTES; len++){
+ for (offset = 0; offset <= MAX_OFFSET; offset++){
+ ok &= test_memset_aux(f, aligned_buf, BUFLEN, len, offset);
+ }
+ }
+
+ return ok;
+}
+
+// returns bytes/s
+float
+benchmark_memset(memset_fptr f, bool aligned)
+{
+ static const int SIZE = 32768;
+ unsigned char buf[SIZE];
+ uint32_t t0, t1;
+ int nbytes;
+
+ spu_write_decrementer(0xffffffff);
+
+ if (aligned){
+ nbytes = SIZE;
+ t0 = spu_read_decrementer();
+ (*f)(buf, 0x55, nbytes);
+ (*f)(buf, 0x55, nbytes);
+ (*f)(buf, 0x55, nbytes);
+ (*f)(buf, 0x55, nbytes);
+ t1 = spu_read_decrementer();
+ }
+ else {
+ nbytes = SIZE - 2;
+ t0 = spu_read_decrementer();
+ (*f)(buf + 1, 0x55, nbytes);
+ (*f)(buf + 1, 0x55, nbytes);
+ (*f)(buf + 1, 0x55, nbytes);
+ (*f)(buf + 1, 0x55, nbytes);
+ t1 = spu_read_decrementer();
+ }
+
+ //printf("delta ticks: %d\n", t0 - t1);
+ return (float) nbytes * 4 / ((t0 - t1) * 1.0/TIMEBASE);
+}
+
+/*
+ * Implement the standard QA stub.
+ * No input arguments, 1 bool output.
+ */
+static void
+gcs_qa_memset(const gc_job_direct_args_t *input _UNUSED,
+ gc_job_direct_args_t *output,
+ const gc_job_ea_args_t *eaa _UNUSED)
+{
+ bool ok = test_memset(memset);
+ output->arg[0].u32 = ok;
+}
+
+GC_DECLARE_PROC(gcs_qa_memset, "qa_memset");