summaryrefslogtreecommitdiff
path: root/gcell/src/lib
diff options
context:
space:
mode:
authoreb2008-04-22 22:24:16 +0000
committereb2008-04-22 22:24:16 +0000
commitb9ba2711addfc9057c136b520afc9e121ec19be9 (patch)
tree059e93ef559bb837c9ca46549688c86d2e153f1c /gcell/src/lib
parent2ae538ed6a5d18615fb9eea280d861ed3a8600e5 (diff)
downloadgnuradio-b9ba2711addfc9057c136b520afc9e121ec19be9.tar.gz
gnuradio-b9ba2711addfc9057c136b520afc9e121ec19be9.tar.bz2
gnuradio-b9ba2711addfc9057c136b520afc9e121ec19be9.zip
Merged eb/gcell -r8215:8243 into trunk. This adds gr-gcell, the GNU
Radio interface to the Cell Broadband Engine. git-svn-id: http://gnuradio.org/svn/gnuradio/trunk@8244 221aa14e-8319-0410-a670-987f0aec2ac5
Diffstat (limited to 'gcell/src/lib')
-rw-r--r--gcell/src/lib/runtime/Makefile.am2
-rw-r--r--gcell/src/lib/runtime/gc_aligned_alloc.cc54
-rw-r--r--gcell/src/lib/runtime/gc_aligned_alloc.h52
-rw-r--r--gcell/src/lib/runtime/gc_job_manager.cc26
-rw-r--r--gcell/src/lib/runtime/gc_job_manager.h27
-rw-r--r--gcell/src/lib/runtime/gc_job_manager_impl.cc33
-rw-r--r--gcell/src/lib/wrapper/gcp_fft_1d_r2.cc59
-rw-r--r--gcell/src/lib/wrapper/gcp_fft_1d_r2.h19
-rw-r--r--gcell/src/lib/wrapper/qa_gcp_fft_1d_r2.cc12
-rw-r--r--gcell/src/lib/wrapper/spu/gcs_fft_1d_r2.c64
10 files changed, 273 insertions, 75 deletions
diff --git a/gcell/src/lib/runtime/Makefile.am b/gcell/src/lib/runtime/Makefile.am
index a68d2bcd0..89d6f1bb9 100644
--- a/gcell/src/lib/runtime/Makefile.am
+++ b/gcell/src/lib/runtime/Makefile.am
@@ -32,6 +32,7 @@ dist_bin_SCRIPTS = gcell-embedspu-libtool
noinst_LTLIBRARIES = libruntime.la libruntime-qa.la
libruntime_la_SOURCES = \
+ gc_aligned_alloc.cc \
gc_job_manager.cc \
gc_job_manager_impl.cc \
gc_jd_queue.c \
@@ -46,6 +47,7 @@ libruntime_qa_la_SOURCES = \
gcellinclude_HEADERS = \
+ gc_aligned_alloc.h \
gc_job_manager.h
noinst_HEADERS = \
diff --git a/gcell/src/lib/runtime/gc_aligned_alloc.cc b/gcell/src/lib/runtime/gc_aligned_alloc.cc
new file mode 100644
index 000000000..fa20a6443
--- /dev/null
+++ b/gcell/src/lib/runtime/gc_aligned_alloc.cc
@@ -0,0 +1,54 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2008 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU Radio
+ *
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+#include <gc_aligned_alloc.h>
+#include <stdlib.h>
+#include <stdexcept>
+
+// custom deleter of anything that can be freed with "free"
+class free_deleter {
+public:
+ void operator()(void *p) {
+ free(p);
+ }
+};
+
+void *
+gc_aligned_alloc(size_t size, size_t alignment)
+{
+ void *p = 0;
+ if (posix_memalign(&p, alignment, size) != 0){
+ perror("posix_memalign");
+ throw std::runtime_error("memory");
+ }
+ memset(p, 0, size); // zero the memory
+ return p;
+}
+
+boost::shared_ptr<void>
+gc_aligned_alloc_sptr(size_t size, size_t alignment)
+{
+ return boost::shared_ptr<void>(gc_aligned_alloc(size, alignment),
+ free_deleter());
+}
diff --git a/gcell/src/lib/runtime/gc_aligned_alloc.h b/gcell/src/lib/runtime/gc_aligned_alloc.h
new file mode 100644
index 000000000..bdc21c278
--- /dev/null
+++ b/gcell/src/lib/runtime/gc_aligned_alloc.h
@@ -0,0 +1,52 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2008 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU Radio
+ *
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+#ifndef INCLUDED_GC_ALIGNED_ALLOC_H
+#define INCLUDED_GC_ALIGNED_ALLOC_H
+
+#include <boost/shared_ptr.hpp>
+
+/*!
+ * \brief Return pointer to chunk of storage of size size bytes.
+ * The allocation will be aligned to an \p alignment boundary.
+ *
+ * \param size is the number of bytes to allocate
+ * \param alignment is the minimum storage alignment in bytes; must be a power of 2.
+ *
+ * Throws if can't allocate memory. The storage should be freed
+ * with "free" when done. The memory is initialized to zero.
+ */
+void *
+gc_aligned_alloc(size_t size, size_t alignment = 128);
+
+/*!
+ * \brief Return boost::shared_ptr to chunk of storage of size size bytes.
+ * The allocation will be aligned to an \p alignment boundary.
+ *
+ * \param size is the number of bytes to allocate
+ * \param alignment is the minimum storage alignment in bytes; must be a power of 2.
+ *
+ * Throws if can't allocate memory. The storage should be freed
+ * with "free" when done. The memory is initialized to zero.
+ */
+boost::shared_ptr<void>
+gc_aligned_alloc_sptr(size_t size, size_t alignment = 128);
+
+#endif /* INCLUDED_GC_ALIGNED_ALLOC_H */
diff --git a/gcell/src/lib/runtime/gc_job_manager.cc b/gcell/src/lib/runtime/gc_job_manager.cc
index 9ede5e156..ac2e989a4 100644
--- a/gcell/src/lib/runtime/gc_job_manager.cc
+++ b/gcell/src/lib/runtime/gc_job_manager.cc
@@ -31,6 +31,19 @@
static boost::weak_ptr<gc_job_manager> s_singleton;
+// custom deleter of gc_job_desc allocated via alloc_job_desc_sptr
+class job_desc_deleter {
+ gc_job_manager_sptr d_mgr;
+public:
+ job_desc_deleter(gc_job_manager_sptr mgr) : d_mgr(mgr) {}
+
+ void operator()(gc_job_desc *jd) {
+ d_mgr->free_job_desc(jd);
+ }
+};
+
+
+
gc_job_manager_sptr
gc_make_job_manager(const gc_jm_options *options)
{
@@ -71,6 +84,19 @@ gc_job_manager::singleton()
return gc_job_manager_sptr(s_singleton);
}
+gc_job_desc_sptr
+gc_job_manager::make_jd_sptr(gc_job_manager_sptr mgr, gc_job_desc *jd)
+{
+ return gc_job_desc_sptr(jd, job_desc_deleter(mgr));
+}
+
+gc_job_desc_sptr
+gc_job_manager::alloc_job_desc(gc_job_manager_sptr mgr)
+{
+ return make_jd_sptr(mgr, mgr->alloc_job_desc());
+}
+
+
// ------------------------------------------------------------------------
diff --git a/gcell/src/lib/runtime/gc_job_manager.h b/gcell/src/lib/runtime/gc_job_manager.h
index aa30dc24b..67abce7ed 100644
--- a/gcell/src/lib/runtime/gc_job_manager.h
+++ b/gcell/src/lib/runtime/gc_job_manager.h
@@ -33,6 +33,7 @@
class gc_job_manager;
typedef boost::shared_ptr<gc_job_manager> gc_job_manager_sptr;
typedef boost::shared_ptr<spe_program_handle_t> spe_program_handle_sptr;
+typedef boost::shared_ptr<gc_job_desc> gc_job_desc_sptr;
/*!
* \brief Return a boost::shared_ptr to an spe_program_handle_t
@@ -86,10 +87,19 @@ struct gc_jm_options {
gc_jm_options() :
max_jobs(0), max_client_threads(0), nspes(0),
- gang_schedule(true), use_affinity(false),
+ gang_schedule(false), use_affinity(false),
enable_logging(false), log2_nlog_entries(12)
{
}
+
+ gc_jm_options(spe_program_handle_sptr program_handle_,
+ unsigned int nspes_ = 0) :
+ max_jobs(0), max_client_threads(0), nspes(nspes_),
+ gang_schedule(false), use_affinity(false),
+ enable_logging(false), log2_nlog_entries(12),
+ program_handle(program_handle_)
+ {
+ }
};
enum gc_wait_mode {
@@ -236,6 +246,11 @@ public:
*/
virtual std::vector<std::string> proc_names() = 0;
+ virtual void set_debug(int debug);
+ virtual int debug();
+
+ /* ----- static methods ----- */
+
/*!
* \brief Set the singleton gc_job_manager instance.
* \param mgr is the job manager instance.
@@ -256,9 +271,15 @@ public:
*/
static gc_job_manager_sptr singleton();
+ /*!
+ * \brief return a boost::shared_ptr to a job descriptor.
+ */
+ static gc_job_desc_sptr make_jd_sptr(gc_job_manager_sptr mgr, gc_job_desc *jd);
- virtual void set_debug(int debug);
- virtual int debug();
+ /*!
+ * \brief allocate a job descriptor and return a boost::shared_ptr to it.
+ */
+ static gc_job_desc_sptr alloc_job_desc(gc_job_manager_sptr mgr);
};
diff --git a/gcell/src/lib/runtime/gc_job_manager_impl.cc b/gcell/src/lib/runtime/gc_job_manager_impl.cc
index 59deb4ae5..9c859511b 100644
--- a/gcell/src/lib/runtime/gc_job_manager_impl.cc
+++ b/gcell/src/lib/runtime/gc_job_manager_impl.cc
@@ -25,7 +25,7 @@
#include <gc_job_manager_impl.h>
#include <gc_mbox.h>
#include <gc_proc_def_utils.h>
-
+#include <gc_aligned_alloc.h>
#include <stdio.h>
#include <stdexcept>
#include <stdlib.h>
@@ -85,23 +85,6 @@ client_key_destructor(void *p)
((gc_client_thread_info *) p)->d_free = 1;
}
-/*
- * Return pointer to cache-aligned chunk of storage of size size bytes.
- * Throw if can't allocate memory. The storage should be freed
- * with "free" when done. The memory is initialized to zero.
- */
-static void *
-aligned_alloc(size_t size, size_t alignment = CACHE_LINE_SIZE)
-{
- void *p = 0;
- if (posix_memalign(&p, alignment, size) != 0){
- perror("posix_memalign");
- throw std::runtime_error("memory");
- }
- memset(p, 0, size); // zero the memory
- return p;
-}
-
static bool
is_power_of_2(uint32_t x)
{
@@ -196,7 +179,7 @@ gc_job_manager_impl::gc_job_manager_impl(const gc_jm_options *options)
// ----------------------------------------------------------------
// initalize the job queue
- d_queue = (gc_jd_queue_t *) aligned_alloc(sizeof(gc_jd_queue_t));
+ d_queue = (gc_jd_queue_t *) gc_aligned_alloc(sizeof(gc_jd_queue_t), CACHE_LINE_SIZE);
_d_queue_boost =
boost::shared_ptr<void>((void *) d_queue, free_deleter());
gc_jd_queue_init(d_queue);
@@ -208,15 +191,15 @@ gc_job_manager_impl::gc_job_manager_impl(const gc_jm_options *options)
// 1 spu_arg struct for each SPE
assert(sizeof(gc_spu_args_t) % 16 == 0);
d_spu_args =
- (gc_spu_args_t *) aligned_alloc(MAX_SPES * sizeof(gc_spu_args_t), 16);
+ (gc_spu_args_t *) gc_aligned_alloc(MAX_SPES * sizeof(gc_spu_args_t), 16);
_d_spu_args_boost =
boost::shared_ptr<void>((void *) d_spu_args, free_deleter());
// 2 completion info structs for each SPE (we double buffer them)
assert(sizeof(gc_comp_info_t) % CACHE_LINE_SIZE == 0);
d_comp_info =
- (gc_comp_info_t *) aligned_alloc(2 * MAX_SPES * sizeof(gc_comp_info_t),
- CACHE_LINE_SIZE);
+ (gc_comp_info_t *) gc_aligned_alloc(2 * MAX_SPES * sizeof(gc_comp_info_t),
+ CACHE_LINE_SIZE);
_d_comp_info_boost =
boost::shared_ptr<void>((void *) d_comp_info, free_deleter());
@@ -269,7 +252,7 @@ gc_job_manager_impl::gc_job_manager_impl(const gc_jm_options *options)
// ----------------------------------------------------------------
// initalize the free list of job descriptors
- d_free_list = (gc_jd_stack_t *) aligned_alloc(sizeof(gc_jd_stack_t));
+ d_free_list = (gc_jd_stack_t *) gc_aligned_alloc(sizeof(gc_jd_stack_t), CACHE_LINE_SIZE);
// This ensures that the memory associated with d_free_list is
// automatically freed in the destructor or if an exception occurs
// here in the constructor.
@@ -283,7 +266,7 @@ gc_job_manager_impl::gc_job_manager_impl(const gc_jm_options *options)
}
// Initialize the array of job descriptors.
- d_jd = (gc_job_desc_t *) aligned_alloc(sizeof(d_jd[0]) * d_options.max_jobs);
+ d_jd = (gc_job_desc_t *) gc_aligned_alloc(sizeof(d_jd[0]) * d_options.max_jobs, CACHE_LINE_SIZE);
_d_jd_boost = boost::shared_ptr<void>((void *) d_jd, free_deleter());
@@ -317,7 +300,7 @@ gc_job_manager_impl::gc_job_manager_impl(const gc_jm_options *options)
// allocate all bitvectors in a single cache-aligned chunk
size_t nlongs = d_bvlen * d_options.max_client_threads;
- void *p = aligned_alloc(nlongs * sizeof(unsigned long));
+ void *p = gc_aligned_alloc(nlongs * sizeof(unsigned long), CACHE_LINE_SIZE);
_d_all_bitvectors = boost::shared_ptr<void>(p, free_deleter());
// Now point the gc_client_thread_info bitvectors into this storage
diff --git a/gcell/src/lib/wrapper/gcp_fft_1d_r2.cc b/gcell/src/lib/wrapper/gcp_fft_1d_r2.cc
index f92ee42c8..07267e303 100644
--- a/gcell/src/lib/wrapper/gcp_fft_1d_r2.cc
+++ b/gcell/src/lib/wrapper/gcp_fft_1d_r2.cc
@@ -30,18 +30,19 @@ static void
init_jd(gc_job_desc *jd,
gc_proc_id_t proc_id,
unsigned log2_fft_length,
- bool forward,
+ bool shift,
std::complex<float> *out,
const std::complex<float> *in,
- const std::complex<float> *W)
+ const std::complex<float> *twiddle,
+ const float *window)
{
jd->proc_id = proc_id;
jd->input.nargs = 2;
jd->output.nargs = 0;
- jd->eaa.nargs = 3;
+ jd->eaa.nargs = 4;
jd->input.arg[0].u32 = log2_fft_length;
- jd->input.arg[1].u32 = forward;
+ jd->input.arg[1].u32 = shift;
unsigned int fft_length = 1 << log2_fft_length;
jd->eaa.arg[0].ea_addr = ptr_to_ea(out);
@@ -52,19 +53,28 @@ init_jd(gc_job_desc *jd,
jd->eaa.arg[1].direction = GCJD_DMA_GET;
jd->eaa.arg[1].get_size = sizeof(std::complex<float>) * fft_length;
- jd->eaa.arg[2].ea_addr = ptr_to_ea(const_cast<std::complex<float>*>(W));
+ jd->eaa.arg[2].ea_addr = ptr_to_ea(const_cast<std::complex<float>*>(twiddle));
jd->eaa.arg[2].direction = GCJD_DMA_GET;
jd->eaa.arg[2].get_size = sizeof(std::complex<float>) * fft_length / 4;
-}
+ jd->eaa.arg[3].ea_addr = ptr_to_ea(const_cast<float*>(window));
+ jd->eaa.arg[3].direction = GCJD_DMA_GET;
+ if (window == 0)
+ jd->eaa.arg[3].get_size = 0;
+ else
+ jd->eaa.arg[3].get_size = sizeof(float) * fft_length;
+}
-gc_job_desc *
+
+gc_job_desc_sptr
gcp_fft_1d_r2_submit(gc_job_manager_sptr mgr,
unsigned int log2_fft_length,
bool forward,
+ bool shift,
std::complex<float> *out,
const std::complex<float> *in,
- const std::complex<float> *W)
+ const std::complex<float> *twiddle,
+ const float *window)
{
unsigned int fft_length = 1 << log2_fft_length;
if (fft_length > 4096)
@@ -74,29 +84,36 @@ gcp_fft_1d_r2_submit(gc_job_manager_sptr mgr,
throw gc_bad_align("out");
if ((intptr_t)in & 0xf)
throw gc_bad_align("in");
- if ((intptr_t)W & 0xf)
- throw gc_bad_align("W");
+ if ((intptr_t)twiddle & 0xf)
+ throw gc_bad_align("twiddle");
+ if ((intptr_t)window & 0xf)
+ throw gc_bad_align("window");
+
+ std::string proc_name;
+ if (forward)
+ proc_name = "fwd_fft_1d_r2";
+ else
+ proc_name = "inv_fft_1d_r2";
- gc_proc_id_t fft_id = mgr->lookup_proc("fft_1d_r2");
- gc_job_desc *jd = mgr->alloc_job_desc();
- init_jd(jd, fft_id, log2_fft_length, forward, out, in, W);
- if (!mgr->submit_job(jd)){
+ gc_proc_id_t fft_id = mgr->lookup_proc(proc_name);
+ gc_job_desc_sptr jd = gc_job_manager::alloc_job_desc(mgr);
+ init_jd(jd.get(), fft_id, log2_fft_length, shift, out, in, twiddle, window);
+ if (!mgr->submit_job(jd.get())){
gc_job_status_t s = jd->status;
- mgr->free_job_desc(jd);
- throw gc_bad_submit("fft_1d_r2", s);
+ throw gc_bad_submit(proc_name, s);
}
return jd;
}
void
-gcp_fft_1d_r2_twiddle(unsigned int log2_fft_length, std::complex<float> *W)
+gcp_fft_1d_r2_twiddle(unsigned int log2_fft_length, std::complex<float> *twiddle)
{
unsigned int n = 1 << log2_fft_length;
- W[0].real() = 1.0;
- W[0].imag() = 0.0;
+ twiddle[0].real() = 1.0;
+ twiddle[0].imag() = 0.0;
for (unsigned i=1; i < n/4; i++){
- W[i].real() = cos(i * 2*M_PI/n);
- W[n/4 - i].imag() = -W[i].real();
+ twiddle[i].real() = cos(i * 2*M_PI/n);
+ twiddle[n/4 - i].imag() = -twiddle[i].real();
}
}
diff --git a/gcell/src/lib/wrapper/gcp_fft_1d_r2.h b/gcell/src/lib/wrapper/gcp_fft_1d_r2.h
index ed1d9e783..1207a5f36 100644
--- a/gcell/src/lib/wrapper/gcp_fft_1d_r2.h
+++ b/gcell/src/lib/wrapper/gcp_fft_1d_r2.h
@@ -25,25 +25,32 @@
#include <complex>
/*!
- * \brief Submit a job that computes the forward or reverse FFT.
+ * \brief Submit a job that computes the forward or inverse FFT.
*
* \param mgr is the job manager instance
* \param log2_fft_length is the log2 of the fft_length (4 <= x <= 12).
- * \param forward is true to compute the forward xform
+ * \param forward is true to compute the forward transform, else the inverse.
+ * \param shift indicates if an "fftshift" should be applied to the output data
* \param out is the fft_length output from FFT (must be 16-byte aligned).
* \param in is the fft_length input to FFT (must be 16-byte aligned).
- * \param W is fft_length/4 twiddle factor input to FFT (must be 16-byte aligned).
+ * \param twiddle is fft_length/4 twiddle factor input to FFT (must be 16-byte aligned).
+ * \param window is the window to be applied to the input data.
+ * The window length must be either 0 or fft_length (must be 16-byte aligned).
*
- * Returns a job descriptor which should be passed to wait_job*.
+ * Returns a shared_ptr to a job descriptor which should be passed to wait_job*.
* Throws an exception in the event of a problem.
+ * This uses the FFTW conventions for scaling. That is, neither the forward nor inverse
+ * are scaled by 1/fft_length.
*/
-gc_job_desc *
+gc_job_desc_sptr
gcp_fft_1d_r2_submit(gc_job_manager_sptr mgr,
unsigned int log2_fft_length,
bool forward,
+ bool shift,
std::complex<float> *out,
const std::complex<float> *in,
- const std::complex<float> *W);
+ const std::complex<float> *twiddle,
+ const float *window);
/*!
* \brief Compute twiddle factors
diff --git a/gcell/src/lib/wrapper/qa_gcp_fft_1d_r2.cc b/gcell/src/lib/wrapper/qa_gcp_fft_1d_r2.cc
index b177edede..404f83657 100644
--- a/gcell/src/lib/wrapper/qa_gcp_fft_1d_r2.cc
+++ b/gcell/src/lib/wrapper/qa_gcp_fft_1d_r2.cc
@@ -80,7 +80,7 @@ qa_gcp_fft_1d_r2::t1()
#endif
}
-// test reverse FFT
+// test inverse FFT
void
qa_gcp_fft_1d_r2::t2()
{
@@ -101,11 +101,13 @@ qa_gcp_fft_1d_r2::t2()
void
qa_gcp_fft_1d_r2::t3()
{
+ // FIXME Test fwd and inv with windowing option
}
void
qa_gcp_fft_1d_r2::t4()
{
+ // FIXME Test fwd and inv with shift option
}
static inline float
@@ -178,14 +180,12 @@ qa_gcp_fft_1d_r2::test(gc_job_manager_sptr mgr, int log2_fft_size, bool forward)
// ------------------------------------------------------------------------
// compute the answer on the cell
- gc_job_desc *jd = gcp_fft_1d_r2_submit(mgr, log2_fft_size, forward,
- cell_out, cell_in, cell_twiddle);
- if (!mgr->wait_job(jd)){
+ gc_job_desc_sptr jd = gcp_fft_1d_r2_submit(mgr, log2_fft_size, forward, false,
+ cell_out, cell_in, cell_twiddle, 0);
+ if (!mgr->wait_job(jd.get())){
fprintf(stderr, "wait_job failed: %s\n", gc_job_status_string(jd->status).c_str());
- mgr->free_job_desc(jd);
CPPUNIT_ASSERT(0);
}
- mgr->free_job_desc(jd);
// ------------------------------------------------------------------------
// compute the maximum of the relative error
diff --git a/gcell/src/lib/wrapper/spu/gcs_fft_1d_r2.c b/gcell/src/lib/wrapper/spu/gcs_fft_1d_r2.c
index bf4bdfd20..81e5dfd87 100644
--- a/gcell/src/lib/wrapper/spu/gcs_fft_1d_r2.c
+++ b/gcell/src/lib/wrapper/spu/gcs_fft_1d_r2.c
@@ -21,6 +21,7 @@
#include <gc_declare_proc.h>
#include <libfft.h>
+#include <assert.h>
/*
* v is really vector complex<float>
@@ -35,24 +36,59 @@ conjugate_vector(vector float *v, int nelements)
}
static void
-gcs_fft_1d_r2(const gc_job_direct_args_t *input,
- gc_job_direct_args_t *output __attribute__((unused)),
- const gc_job_ea_args_t *eaa)
+gcs_fwd_fft_1d_r2(const gc_job_direct_args_t *input,
+ gc_job_direct_args_t *output __attribute__((unused)),
+ const gc_job_ea_args_t *eaa)
{
- vector float *out = (vector float *) eaa->arg[0].ls_addr;
- vector float *in = (vector float *) eaa->arg[1].ls_addr;
- vector float *W = (vector float *) eaa->arg[2].ls_addr;
+ vector float *out = (vector float *) eaa->arg[0].ls_addr; // complex
+ vector float *in = (vector float *) eaa->arg[1].ls_addr; // complex
+ vector float *twiddle = (vector float *) eaa->arg[2].ls_addr; // complex
+ vector float *window = (vector float *) eaa->arg[3].ls_addr; // float
+
int log2_fft_length = input->arg[0].u32;
- int forward = input->arg[1].u32; // non-zero if forward xform
+ int shift = input->arg[1].u32; // non-zero if we should apply fftshift
- if (forward){
- fft_1d_r2(out, in, W, log2_fft_length);
+ if (eaa->arg[3].get_size){ // apply window
+ // FIXME pointwise multiply in *= window
+ assert(0);
}
- else {
- conjugate_vector(in, 1 << (log2_fft_length - 1));
- fft_1d_r2(out, in, W, log2_fft_length);
- conjugate_vector(out, 1 << (log2_fft_length - 1));
+
+ fft_1d_r2(out, in, twiddle, log2_fft_length);
+
+ if (shift){
+ // FIXME apply "fftshift" to output data in-place
+ assert(0);
}
}
-GC_DECLARE_PROC(gcs_fft_1d_r2, "fft_1d_r2");
+GC_DECLARE_PROC(gcs_fwd_fft_1d_r2, "fwd_fft_1d_r2");
+
+static void
+gcs_inv_fft_1d_r2(const gc_job_direct_args_t *input,
+ gc_job_direct_args_t *output __attribute__((unused)),
+ const gc_job_ea_args_t *eaa)
+{
+ vector float *out = (vector float *) eaa->arg[0].ls_addr; // complex
+ vector float *in = (vector float *) eaa->arg[1].ls_addr; // complex
+ vector float *twiddle = (vector float *) eaa->arg[2].ls_addr; // complex
+ vector float *window = (vector float *) eaa->arg[3].ls_addr; // float
+
+ int log2_fft_length = input->arg[0].u32;
+ int shift = input->arg[1].u32; // non-zero if we should apply fftshift
+
+ if (eaa->arg[3].get_size){ // apply window
+ // FIXME pointwise multiply in *= window
+ assert(0);
+ }
+
+ if (shift){
+ // FIXME apply "fftshift" to input data in-place
+ assert(0);
+ }
+
+ conjugate_vector(in, 1 << (log2_fft_length - 1));
+ fft_1d_r2(out, in, twiddle, log2_fft_length);
+ conjugate_vector(out, 1 << (log2_fft_length - 1));
+}
+
+GC_DECLARE_PROC(gcs_inv_fft_1d_r2, "inv_fft_1d_r2");