summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--docs/doxygen/other/main_page.dox10
-rw-r--r--docs/doxygen/other/volk_guide.dox161
-rw-r--r--gnuradio-core/src/lib/CMakeLists.txt4
-rw-r--r--gnuradio-core/src/lib/filter/gr_fft_filter_ccc.cc3
-rw-r--r--gnuradio-core/src/lib/filter/gr_fft_filter_fff.cc2
-rw-r--r--gnuradio-core/src/lib/filter/gri_fft_filter_ccc_generic.cc5
-rw-r--r--gnuradio-core/src/lib/filter/gri_fft_filter_fff_generic.cc5
-rw-r--r--gnuradio-core/src/lib/general/CMakeLists.txt10
-rw-r--r--gnuradio-core/src/lib/general/general.i16
-rw-r--r--gnuradio-core/src/lib/general/general_generated.i10
-rw-r--r--gnuradio-core/src/lib/general/gr_add_ff.cc66
-rw-r--r--gnuradio-core/src/lib/general/gr_add_ff.h56
-rw-r--r--gnuradio-core/src/lib/general/gr_add_ff.i32
-rw-r--r--gnuradio-core/src/lib/general/gr_char_to_float.cc39
-rw-r--r--gnuradio-core/src/lib/general/gr_char_to_float.h14
-rw-r--r--gnuradio-core/src/lib/general/gr_char_to_float.i7
-rw-r--r--gnuradio-core/src/lib/general/gr_char_to_short.cc64
-rw-r--r--gnuradio-core/src/lib/general/gr_char_to_short.h56
-rw-r--r--gnuradio-core/src/lib/general/gr_char_to_short.i30
-rw-r--r--gnuradio-core/src/lib/general/gr_complex_to_xxx.cc79
-rw-r--r--gnuradio-core/src/lib/general/gr_complex_to_xxx.h5
-rw-r--r--gnuradio-core/src/lib/general/gr_conjugate_cc.cc27
-rw-r--r--gnuradio-core/src/lib/general/gr_float_to_char.cc39
-rw-r--r--gnuradio-core/src/lib/general/gr_float_to_char.h16
-rw-r--r--gnuradio-core/src/lib/general/gr_float_to_char.i9
-rw-r--r--gnuradio-core/src/lib/general/gr_float_to_int.cc55
-rw-r--r--gnuradio-core/src/lib/general/gr_float_to_int.h16
-rw-r--r--gnuradio-core/src/lib/general/gr_float_to_int.i9
-rw-r--r--gnuradio-core/src/lib/general/gr_float_to_short.cc39
-rw-r--r--gnuradio-core/src/lib/general/gr_float_to_short.h15
-rw-r--r--gnuradio-core/src/lib/general/gr_float_to_short.i9
-rw-r--r--gnuradio-core/src/lib/general/gr_int_to_float.cc27
-rw-r--r--gnuradio-core/src/lib/general/gr_int_to_float.h16
-rw-r--r--gnuradio-core/src/lib/general/gr_int_to_float.i8
-rw-r--r--gnuradio-core/src/lib/general/gr_multiply_cc.cc69
-rw-r--r--gnuradio-core/src/lib/general/gr_multiply_cc.h56
-rw-r--r--gnuradio-core/src/lib/general/gr_multiply_cc.i32
-rw-r--r--gnuradio-core/src/lib/general/gr_multiply_conjugate_cc.cc69
-rw-r--r--gnuradio-core/src/lib/general/gr_multiply_conjugate_cc.h57
-rw-r--r--gnuradio-core/src/lib/general/gr_multiply_conjugate_cc.i32
-rw-r--r--gnuradio-core/src/lib/general/gr_multiply_const_cc.cc80
-rw-r--r--gnuradio-core/src/lib/general/gr_multiply_const_cc.h60
-rw-r--r--gnuradio-core/src/lib/general/gr_multiply_const_cc.i33
-rw-r--r--gnuradio-core/src/lib/general/gr_multiply_const_ff.cc80
-rw-r--r--gnuradio-core/src/lib/general/gr_multiply_const_ff.h60
-rw-r--r--gnuradio-core/src/lib/general/gr_multiply_const_ff.i33
-rw-r--r--gnuradio-core/src/lib/general/gr_multiply_ff.cc69
-rw-r--r--gnuradio-core/src/lib/general/gr_multiply_ff.h56
-rw-r--r--gnuradio-core/src/lib/general/gr_multiply_ff.i32
-rw-r--r--gnuradio-core/src/lib/general/gr_short_to_char.cc67
-rw-r--r--gnuradio-core/src/lib/general/gr_short_to_char.h56
-rw-r--r--gnuradio-core/src/lib/general/gr_short_to_char.i31
-rw-r--r--gnuradio-core/src/lib/general/gr_short_to_float.cc38
-rw-r--r--gnuradio-core/src/lib/general/gr_short_to_float.h18
-rw-r--r--gnuradio-core/src/lib/general/gr_short_to_float.i9
-rw-r--r--gnuradio-core/src/lib/general/gri_float_to_int.cc4
-rw-r--r--gnuradio-core/src/lib/general/gri_float_to_int.h2
-rw-r--r--gnuradio-core/src/lib/gengen/CMakeLists.txt6
-rw-r--r--gnuradio-core/src/lib/gengen/Makefile.gen15
-rwxr-xr-xgnuradio-core/src/lib/gengen/generate_common.py8
-rw-r--r--gnuradio-core/src/lib/runtime/gr_block.cc30
-rw-r--r--gnuradio-core/src/lib/runtime/gr_block.h28
-rw-r--r--gnuradio-core/src/lib/runtime/gr_block_executor.cc55
-rwxr-xr-xgnuradio-core/src/python/gnuradio/gr/qa_add_and_friends.py34
-rwxr-xr-xgnuradio-core/src/python/gnuradio/gr/qa_complex_to_xxx.py2
-rw-r--r--gnuradio-core/src/python/gnuradio/gr/qa_conjugate.py53
-rwxr-xr-xgnuradio-core/src/python/gnuradio/gr/qa_float_to_char.py82
-rwxr-xr-x[-rw-r--r--]gnuradio-core/src/python/gnuradio/gr/qa_float_to_int.py22
-rwxr-xr-xgnuradio-core/src/python/gnuradio/gr/qa_float_to_short.py86
-rwxr-xr-xgnuradio-core/src/python/gnuradio/gr/qa_float_to_uchar.py64
-rwxr-xr-xgnuradio-core/src/python/gnuradio/gr/qa_int_to_float.py20
-rw-r--r--gnuradio-core/src/python/gnuradio/gr/qa_multiply_conjugate.py57
-rwxr-xr-xgnuradio-core/src/python/gnuradio/gr/qa_short_to_char.py69
-rwxr-xr-xgnuradio-core/src/python/gnuradio/gr/qa_short_to_float.py70
-rw-r--r--gnuradio-examples/python/volk_benchmark/README252
-rwxr-xr-xgnuradio-examples/python/volk_benchmark/volk_math.py152
-rwxr-xr-xgnuradio-examples/python/volk_benchmark/volk_plot.py169
-rw-r--r--gnuradio-examples/python/volk_benchmark/volk_test_funcs.py171
-rwxr-xr-xgnuradio-examples/python/volk_benchmark/volk_types.py182
-rw-r--r--volk/apps/CMakeLists.txt7
-rw-r--r--volk/apps/volk_profile.cc18
-rw-r--r--volk/include/volk/volk_32f_s32f_convert_16i_a.h61
-rw-r--r--volk/include/volk/volk_32f_s32f_convert_16i_u.h61
-rw-r--r--volk/include/volk/volk_32f_s32f_convert_32i_a.h60
-rw-r--r--volk/include/volk/volk_32f_s32f_convert_32i_u.h45
-rw-r--r--volk/include/volk/volk_32f_s32f_convert_8i_a.h53
-rw-r--r--volk/include/volk/volk_32f_s32f_convert_8i_u.h53
-rw-r--r--volk/include/volk/volk_32f_s32f_multiply_32f_a.h75
-rw-r--r--volk/include/volk/volk_32f_s32f_multiply_32f_u.h102
-rw-r--r--volk/include/volk/volk_32f_x2_add_32f_u.h66
-rw-r--r--volk/include/volk/volk_32f_x2_multiply_32f_u.h106
-rw-r--r--volk/include/volk/volk_32fc_conjugate_32fc_a.h64
-rw-r--r--volk/include/volk/volk_32fc_conjugate_32fc_u.h64
-rw-r--r--volk/include/volk/volk_32fc_deinterleave_imag_32f_a.h68
-rw-r--r--volk/include/volk/volk_32fc_magnitude_32f_u.h118
-rw-r--r--volk/include/volk/volk_32fc_magnitude_squared_32f_a.h114
-rw-r--r--volk/include/volk/volk_32fc_magnitude_squared_32f_u.h114
-rw-r--r--volk/include/volk/volk_32fc_s32fc_multiply_32fc_a.h71
-rw-r--r--volk/include/volk/volk_32fc_s32fc_multiply_32fc_u.h87
-rw-r--r--volk/include/volk/volk_32fc_x2_multiply_32fc_a.h1
-rw-r--r--volk/include/volk/volk_32fc_x2_multiply_32fc_u.h77
-rw-r--r--volk/include/volk/volk_32fc_x2_multiply_conjugate_32fc_a.h81
-rw-r--r--volk/include/volk/volk_32fc_x2_multiply_conjugate_32fc_u.h81
-rw-r--r--volk/include/volk/volk_64u_popcnt_a.h8
-rw-r--r--volk/lib/qa_utils.cc32
-rw-r--r--volk/lib/qa_utils.h5
-rw-r--r--volk/lib/testqa.cc15
107 files changed, 5075 insertions, 261 deletions
diff --git a/docs/doxygen/other/main_page.dox b/docs/doxygen/other/main_page.dox
index 0caa0b20f..68b098943 100644
--- a/docs/doxygen/other/main_page.dox
+++ b/docs/doxygen/other/main_page.dox
@@ -38,4 +38,14 @@ More details on packages in GNU Radio:
\li \ref page_uhd
\li \ref page_vocoder
\li \ref page_pfb
+
+\section volk_main Using Volk in GNU Radio
+
+The \ref volk_guide page provides an overview of how to incorporate
+and use Volk in GNU Radio blocks.
+
+Many blocks have already been converted to use Volk in their calls, so
+they can also serve as examples. See the gr_complex_to_xxx.h file for
+examples of various blocks that make use of Volk.
+
*/
diff --git a/docs/doxygen/other/volk_guide.dox b/docs/doxygen/other/volk_guide.dox
new file mode 100644
index 000000000..d898f3864
--- /dev/null
+++ b/docs/doxygen/other/volk_guide.dox
@@ -0,0 +1,161 @@
+/*! \page volk_guide Instructions for using Volk in GNU Radio
+
+\section volk_intro Introduction
+
+Volk is the Vector-Optimized Library of Kernels. It is a library that
+contains kernels of hand-written SIMD code for different mathematical
+operations. Since each SIMD architecture can be greatly different and
+no compiler has yet come along to handle vectorization properly or
+highly efficiently, Volk approaches the problem differently. For each
+architecture or platform that a developer wishes to vectorize for, a
+new proto-kernel is added to Volk. At runtime, Volk will select the
+correct proto-kernel. In this way, the users of Volk call a kernel for
+performing the operation that is platform/architecture agnostic. This
+allows us to write portable SIMD code.
+
+Volk kernels are always defined with a 'generic' proto-kernel, which
+is written in plain C. With the generic kernel, the kernel becomes
+portable to any platform. Kernels are then extended by adding
+proto-kernels for new platforms in which they are desired.
+
+A good example of a Volk kernel with multiple proto-kernels defined is
+the volk_32f_s32f_multiply_32f_a. This kernel implements a scalar
+multiplication of a vector of floating point numbers (each item in the
+vector is multiplied by the same value). This kernel has the following
+proto-kernels that are defined for 'generic,' 'avx,' 'sse,' and 'orc.'
+
+\code
+ void volk_32f_s32f_multiply_32f_a_generic
+ void volk_32f_s32f_multiply_32f_a_sse
+ void volk_32f_s32f_multiply_32f_a_avx
+ void volk_32f_s32f_multiply_32f_a_orc
+\endcode
+
+These proto-kernels means that on platforms with AVX support, Volk can
+select this option or the SSE option, depending on which is faster. On
+other platforms, the ORC SIMD compiler might provide a solution. If
+all else fails, Volk can fall back on the generic proto-kernel, which
+will always work.
+
+Just a note on ORC. ORC is a SIMD compiler library that uses a generic
+assembly-like language for SIMD commands. Based on the available SIMD
+architecture of a system, it will try and compile a good
+solution. Tests show that the results of ORC proto-kernels are
+generally better than the generic versions but often not as good as
+the hand-tuned proto-kernels for a specific SIMD architecture. This
+is, of course, to be expected, and ORC provides a nice intermediary
+step to performance improvements until a specific hand-tuned
+proto-kernel can be made for a given platform.
+
+See <a
+href="http://gnuradio.org/redmine/projects/gnuradio/wiki/Volk">Volk on
+gnuradio.org</a> for details on the Volk naming scheme.
+
+
+\section volk_alignment Setting and Using Memory Alignment Information
+
+For Volk to work as best as possible, we want to use memory-aligned
+SIMD calls, which means we have to have some way of knowing and
+controlling the alignment of the buffers passed to gr_block's work
+function. We set the alignment requirement for SIMD aligned memory
+calls with:
+
+\code
+ const int alignment_multiple =
+ volk_get_alignment() / output_item_size;
+ set_alignment(alignment_multiple);
+\endcode
+
+The Volk function 'volk_get_alignment' provides the alignment of the
+the machine architecture. We then base the alignment on the number of
+output items required to maintain the alignment, so we divide the
+number of alignment bytes by the number of bytes in an output items
+(sizeof(float), sizeof(gr_complex), etc.). This value is then set per
+block with the 'set_alignment' function.
+
+Because the scheduler tries to optimize throughput, the number of
+items available per call to work will change and depends on the
+availability of the read and write buffers. This means that it
+sometimes cannot produce a buffer that is properly memory
+aligned. This is an inevitable consequence of the scheduler
+system. Instead of requiring alignment, the scheduler enforces the
+alignment as much as possible, and when a buffer becomes unaligned,
+the scheduler will work to correct it as much as possible. If a
+block's buffers are unaligned, then, the scheduler sets a flag to
+indicate as much so that the block can then decide what best to
+do. The next section discusses the use of the aligned/unaligned
+information in a gr_block's work function.
+
+
+\section volk_work Using Alignment Properties in Work()
+
+The buffers passed to work/general_work in a gr_block are not
+guaranteed to be aligned, but they will mostly be aligned whenever
+possible. When not aligned, the 'is_unaligned()' flag will be set. So
+a block can know if its buffers are aligned and make the right
+decisions. This looks like:
+
+\code
+int
+gr_some_block::work (int noutput_items,
+ gr_vector_const_void_star &input_items,
+ gr_vector_void_star &output_items)
+{
+ const float *in = (const float *) input_items[0];
+ float *out = (float *) output_items[0];
+
+ if(is_unaligned()) {
+ // do something with unaligned data. This can either be a manual
+ // handling of the items or a call to an unaligned Volk function.
+ volk_32f_something_32f_u(out, in, noutput_items);
+ }
+ else {
+ // Buffers are aligned; can call the aligned Volk function.
+ volk_32f_something_32f_a(out, in, noutput_items);
+ }
+
+ return noutput_items;
+}
+\endcode
+
+
+
+\section volk_tuning Tuning Volk Performance
+
+VOLK comes with a profiler that will build a config file for the best
+SIMD architecture for your processor. Run volk_profile that is
+installed into $PREFIX/bin. This program tests all known VOLK kernels
+for each architecture supported by the processor. When finished, it
+will write to $HOME/.volk/volk_config the best architecture for the
+VOLK function. This file is read when using a function to know the
+best version of the function to execute.
+
+\subsection volk_hand_tuning Hand-Tuning Performance
+
+If you know a particular architecture works best for your processor,
+you can specify the particular architecture to use in the VOLK
+preferences file: $HOME/.volk/volk_config
+
+The file looks like:
+
+\code
+ volk_<FUNCTION_NAME> <ARCHITECTURE>
+\endcode
+
+Where the "FUNCTION_NAME" is the particular function that you want to
+over-ride the default value and "ARCHITECTURE" is the VOLK SIMD
+architecture to use (generic, sse, sse2, sse3, avx, etc.). For
+example, the following config file tells VOLK to use SSE3 for the
+aligned and unaligned versions of a function that multiplies two
+complex streams together.
+
+\code
+ volk_32fc_x2_multiply_32fc_a sse3
+ volk_32fc_x2_multiply_32fc_u sse3
+\endcode
+
+\b Tip: if benchmarking GNU Radio blocks, it can be useful to have a
+volk_config file that sets all architectures to 'generic' as a way to
+test the vectorized versus non-vectorized implementations.
+
+*/
diff --git a/gnuradio-core/src/lib/CMakeLists.txt b/gnuradio-core/src/lib/CMakeLists.txt
index 3fbe8f807..86f88242c 100644
--- a/gnuradio-core/src/lib/CMakeLists.txt
+++ b/gnuradio-core/src/lib/CMakeLists.txt
@@ -42,6 +42,7 @@ list(APPEND test_gnuradio_core_sources bug_work_around_6.cc)
# Setup the include and linker paths
########################################################################
include_directories(${GNURADIO_CORE_INCLUDE_DIRS})
+include_directories(${VOLK_INCLUDE_DIRS})
include_directories(${Boost_INCLUDE_DIRS})
link_directories(${Boost_LIBRARY_DIRS})
@@ -73,6 +74,9 @@ if(LINUX)
list(APPEND gnuradio_core_libs rt)
endif()
+# Link against libvolk
+list(APPEND gnuradio_core_libs volk)
+
add_library(gnuradio-core SHARED ${gnuradio_core_sources})
target_link_libraries(gnuradio-core ${gnuradio_core_libs})
GR_LIBRARY_FOO(gnuradio-core RUNTIME_COMPONENT "core_runtime" DEVEL_COMPONENT "core_devel")
diff --git a/gnuradio-core/src/lib/filter/gr_fft_filter_ccc.cc b/gnuradio-core/src/lib/filter/gr_fft_filter_ccc.cc
index 63b52411e..5968e487e 100644
--- a/gnuradio-core/src/lib/filter/gr_fft_filter_ccc.cc
+++ b/gnuradio-core/src/lib/filter/gr_fft_filter_ccc.cc
@@ -30,7 +30,6 @@
#endif
#include <gr_fft_filter_ccc.h>
-//#include <gri_fft_filter_ccc_sse.h>
#include <gri_fft_filter_ccc_generic.h>
#include <gr_io_signature.h>
#include <gri_fft.h>
@@ -61,11 +60,13 @@ gr_fft_filter_ccc::gr_fft_filter_ccc (int decimation,
d_updated(false)
{
set_history(1);
+
#if 1 // don't enable the sse version until handling it is worked out
d_filter = new gri_fft_filter_ccc_generic(decimation, taps, nthreads);
#else
d_filter = new gri_fft_filter_ccc_sse(decimation, taps);
#endif
+
d_new_taps = taps;
d_nsamples = d_filter->set_taps(taps);
set_output_multiple(d_nsamples);
diff --git a/gnuradio-core/src/lib/filter/gr_fft_filter_fff.cc b/gnuradio-core/src/lib/filter/gr_fft_filter_fff.cc
index 0e4072f63..e4a669150 100644
--- a/gnuradio-core/src/lib/filter/gr_fft_filter_fff.cc
+++ b/gnuradio-core/src/lib/filter/gr_fft_filter_fff.cc
@@ -26,7 +26,6 @@
#include <gr_fft_filter_fff.h>
#include <gri_fft_filter_fff_generic.h>
-//#include <gri_fft_filter_fff_sse.h>
#include <gr_io_signature.h>
#include <assert.h>
#include <stdexcept>
@@ -59,6 +58,7 @@ gr_fft_filter_fff::gr_fft_filter_fff (int decimation,
#else
d_filter = new gri_fft_filter_fff_sse(decimation, taps);
#endif
+
d_new_taps = taps;
d_nsamples = d_filter->set_taps(taps);
set_output_multiple(d_nsamples);
diff --git a/gnuradio-core/src/lib/filter/gri_fft_filter_ccc_generic.cc b/gnuradio-core/src/lib/filter/gri_fft_filter_ccc_generic.cc
index 47e18b3e3..c894d62aa 100644
--- a/gnuradio-core/src/lib/filter/gri_fft_filter_ccc_generic.cc
+++ b/gnuradio-core/src/lib/filter/gri_fft_filter_ccc_generic.cc
@@ -26,6 +26,7 @@
#include <gri_fft_filter_ccc_generic.h>
#include <gri_fft.h>
+#include <volk/volk.h>
#include <assert.h>
#include <stdexcept>
#include <cstdio>
@@ -154,9 +155,7 @@ gri_fft_filter_ccc_generic::filter (int nitems, const gr_complex *input, gr_comp
gr_complex *b = &d_xformed_taps[0];
gr_complex *c = d_invfft->get_inbuf();
- for (j = 0; j < d_fftsize; j+=1) { // filter in the freq domain
- c[j] = a[j] * b[j];
- }
+ volk_32fc_x2_multiply_32fc_a(c, a, b, d_fftsize);
d_invfft->execute(); // compute inv xform
diff --git a/gnuradio-core/src/lib/filter/gri_fft_filter_fff_generic.cc b/gnuradio-core/src/lib/filter/gri_fft_filter_fff_generic.cc
index 598bf268c..e7f66b714 100644
--- a/gnuradio-core/src/lib/filter/gri_fft_filter_fff_generic.cc
+++ b/gnuradio-core/src/lib/filter/gri_fft_filter_fff_generic.cc
@@ -26,6 +26,7 @@
#include <gri_fft_filter_fff_generic.h>
#include <gri_fft.h>
+#include <volk/volk.h>
#include <assert.h>
#include <stdexcept>
#include <cstdio>
@@ -141,9 +142,7 @@ gri_fft_filter_fff_generic::filter (int nitems, const float *input, float *outpu
gr_complex *b = &d_xformed_taps[0];
gr_complex *c = d_invfft->get_inbuf();
- for (j = 0; j < d_fftsize/2+1; j++) { // filter in the freq domain
- c[j] = a[j] * b[j];
- }
+ volk_32fc_x2_multiply_32fc_a(c, a, b, d_fftsize/2+1);
d_invfft->execute(); // compute inv xform
diff --git a/gnuradio-core/src/lib/general/CMakeLists.txt b/gnuradio-core/src/lib/general/CMakeLists.txt
index 6ecaa930a..2bc639cd3 100644
--- a/gnuradio-core/src/lib/general/CMakeLists.txt
+++ b/gnuradio-core/src/lib/general/CMakeLists.txt
@@ -1,4 +1,4 @@
-# Copyright 2010-2011 Free Software Foundation, Inc.
+# Copyright 2010-2012 Free Software Foundation, Inc.
#
# This file is part of GNU Radio
#
@@ -178,6 +178,7 @@ endif(ENABLE_PYTHON)
########################################################################
set(gr_core_general_triple_threats
complex_vec_test
+ gr_add_ff
gr_additive_scrambler_bb
gr_agc_cc
gr_agc_ff
@@ -187,6 +188,7 @@ set(gr_core_general_triple_threats
gr_bin_statistics_f
gr_bytes_to_syms
gr_char_to_float
+ gr_char_to_short
gr_check_counting_s
gr_check_lfsr_32k_s
gr_complex_to_interleaved_short
@@ -229,6 +231,11 @@ set(gr_core_general_triple_threats
gr_kludge_copy
gr_lfsr_32k_source_s
gr_map_bb
+ gr_multiply_cc
+ gr_multiply_ff
+ gr_multiply_const_cc
+ gr_multiply_const_ff
+ gr_multiply_conjugate_cc
gr_nlog10_ff
gr_nop
gr_null_sink
@@ -256,6 +263,7 @@ set(gr_core_general_triple_threats
gr_rms_ff
gr_repeat
gr_short_to_float
+ gr_short_to_char
gr_simple_correlator
gr_simple_framer
gr_simple_squelch_cc
diff --git a/gnuradio-core/src/lib/general/general.i b/gnuradio-core/src/lib/general/general.i
index 5a701bf80..89738b01a 100644
--- a/gnuradio-core/src/lib/general/general.i
+++ b/gnuradio-core/src/lib/general/general.i
@@ -44,8 +44,10 @@
#include <gr_float_to_char.h>
#include <gr_float_to_uchar.h>
#include <gr_short_to_float.h>
+#include <gr_short_to_char.h>
#include <gr_int_to_float.h>
#include <gr_char_to_float.h>
+#include <gr_char_to_short.h>
#include <gr_uchar_to_float.h>
#include <gr_frequency_modulator_fc.h>
#include <gr_phase_modulator_fc.h>
@@ -104,6 +106,11 @@
#include <gr_diff_decoder_bb.h>
#include <gr_framer_sink_1.h>
#include <gr_map_bb.h>
+#include <gr_multiply_cc.h>
+#include <gr_multiply_ff.h>
+#include <gr_multiply_const_cc.h>
+#include <gr_multiply_const_ff.h>
+#include <gr_multiply_conjugate_cc.h>
#include <gr_feval.h>
#include <gr_pwr_squelch_cc.h>
#include <gr_pwr_squelch_ff.h>
@@ -134,6 +141,7 @@
#include <gr_burst_tagger.h>
#include <gr_cpm.h>
#include <gr_correlate_access_code_tag_bb.h>
+#include <gr_add_ff.h>
%}
%include "gri_control_loop.i"
@@ -158,8 +166,10 @@
%include "gr_float_to_char.i"
%include "gr_float_to_uchar.i"
%include "gr_short_to_float.i"
+%include "gr_short_to_char.i"
%include "gr_int_to_float.i"
%include "gr_char_to_float.i"
+%include "gr_char_to_short.i"
%include "gr_uchar_to_float.i"
%include "gr_frequency_modulator_fc.i"
%include "gr_phase_modulator_fc.i"
@@ -218,6 +228,11 @@
%include "gr_diff_decoder_bb.i"
%include "gr_framer_sink_1.i"
%include "gr_map_bb.i"
+%include "gr_multiply_cc.i"
+%include "gr_multiply_ff.i"
+%include "gr_multiply_const_cc.i"
+%include "gr_multiply_const_ff.i"
+%include "gr_multiply_conjugate_cc.i"
%include "gr_feval.i"
%include "gr_pwr_squelch_cc.i"
%include "gr_pwr_squelch_ff.i"
@@ -248,3 +263,4 @@
%include "gr_burst_tagger.i"
%include "gr_cpm.i"
%include "gr_correlate_access_code_tag_bb.i"
+%include "gr_add_ff.i"
diff --git a/gnuradio-core/src/lib/general/general_generated.i b/gnuradio-core/src/lib/general/general_generated.i
index a41f30a3d..e12f2b0ec 100644
--- a/gnuradio-core/src/lib/general/general_generated.i
+++ b/gnuradio-core/src/lib/general/general_generated.i
@@ -12,7 +12,6 @@
#include <gr_add_const_vff.h>
#include <gr_add_const_vii.h>
#include <gr_add_const_vss.h>
-#include <gr_add_ff.h>
#include <gr_add_ii.h>
#include <gr_add_ss.h>
#include <gr_add_vcc.h>
@@ -29,16 +28,12 @@
#include <gr_divide_ff.h>
#include <gr_divide_ii.h>
#include <gr_divide_ss.h>
-#include <gr_multiply_cc.h>
-#include <gr_multiply_const_cc.h>
-#include <gr_multiply_const_ff.h>
#include <gr_multiply_const_ii.h>
#include <gr_multiply_const_ss.h>
#include <gr_multiply_const_vcc.h>
#include <gr_multiply_const_vff.h>
#include <gr_multiply_const_vii.h>
#include <gr_multiply_const_vss.h>
-#include <gr_multiply_ff.h>
#include <gr_multiply_ii.h>
#include <gr_multiply_ss.h>
#include <gr_multiply_vcc.h>
@@ -89,7 +84,6 @@
%include <gr_add_const_vff.i>
%include <gr_add_const_vii.i>
%include <gr_add_const_vss.i>
-%include <gr_add_ff.i>
%include <gr_add_ii.i>
%include <gr_add_ss.i>
%include <gr_add_vcc.i>
@@ -106,16 +100,12 @@
%include <gr_divide_ff.i>
%include <gr_divide_ii.i>
%include <gr_divide_ss.i>
-%include <gr_multiply_cc.i>
-%include <gr_multiply_const_cc.i>
-%include <gr_multiply_const_ff.i>
%include <gr_multiply_const_ii.i>
%include <gr_multiply_const_ss.i>
%include <gr_multiply_const_vcc.i>
%include <gr_multiply_const_vff.i>
%include <gr_multiply_const_vii.i>
%include <gr_multiply_const_vss.i>
-%include <gr_multiply_ff.i>
%include <gr_multiply_ii.i>
%include <gr_multiply_ss.i>
%include <gr_multiply_vcc.i>
diff --git a/gnuradio-core/src/lib/general/gr_add_ff.cc b/gnuradio-core/src/lib/general/gr_add_ff.cc
new file mode 100644
index 000000000..fc5455c98
--- /dev/null
+++ b/gnuradio-core/src/lib/general/gr_add_ff.cc
@@ -0,0 +1,66 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2012 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU Radio
+ *
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <gr_add_ff.h>
+#include <gr_io_signature.h>
+#include <volk/volk.h>
+
+gr_add_ff_sptr
+gr_make_add_ff(size_t vlen)
+{
+ return gnuradio::get_initial_sptr(new gr_add_ff(vlen));
+}
+
+gr_add_ff::gr_add_ff (size_t vlen)
+ : gr_sync_block("add_ff",
+ gr_make_io_signature (1, -1, sizeof(float)*vlen),
+ gr_make_io_signature (1, 1, sizeof(float)*vlen)),
+ d_vlen (vlen)
+{
+ const int alignment_multiple =
+ volk_get_alignment() / sizeof(float);
+ set_alignment(alignment_multiple);
+}
+
+int
+gr_add_ff::work(int noutput_items,
+ gr_vector_const_void_star &input_items,
+ gr_vector_void_star &output_items)
+{
+ float *out = (float *) output_items[0];
+ int noi = d_vlen*noutput_items;
+
+ memcpy(out, input_items[0], noi*sizeof(float));
+ if(is_unaligned()) {
+ for(size_t i = 1; i < input_items.size(); i++)
+ volk_32f_x2_add_32f_u(out, out, (const float*)input_items[i], noi);
+ }
+ else {
+ for(size_t i = 1; i < input_items.size(); i++)
+ volk_32f_x2_add_32f_a(out, out, (const float*)input_items[i], noi);
+ }
+ return noutput_items;
+}
diff --git a/gnuradio-core/src/lib/general/gr_add_ff.h b/gnuradio-core/src/lib/general/gr_add_ff.h
new file mode 100644
index 000000000..6421f8da2
--- /dev/null
+++ b/gnuradio-core/src/lib/general/gr_add_ff.h
@@ -0,0 +1,56 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2012 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU Radio
+ *
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifndef INCLUDED_GR_ADD_FF_H
+#define INCLUDED_GR_ADD_FF_H
+
+#include <gr_core_api.h>
+#include <gr_sync_block.h>
+
+class gr_add_ff;
+typedef boost::shared_ptr<gr_add_ff> gr_add_ff_sptr;
+
+GR_CORE_API gr_add_ff_sptr
+gr_make_add_ff (size_t vlen=1);
+
+/*!
+ * \brief Add streams of complex values
+ * \ingroup math_blk
+ */
+
+class GR_CORE_API gr_add_ff : public gr_sync_block
+{
+ private:
+ friend GR_CORE_API gr_add_ff_sptr
+ gr_make_add_ff (size_t vlen);
+ gr_add_ff (size_t vlen);
+
+ size_t d_vlen;
+
+ public:
+ virtual int work (int noutput_items,
+ gr_vector_const_void_star &input_items,
+ gr_vector_void_star &output_items);
+};
+
+
+#endif /* INCLUDED_GR_ADD_FF_H */
diff --git a/gnuradio-core/src/lib/general/gr_add_ff.i b/gnuradio-core/src/lib/general/gr_add_ff.i
new file mode 100644
index 000000000..3c30640b1
--- /dev/null
+++ b/gnuradio-core/src/lib/general/gr_add_ff.i
@@ -0,0 +1,32 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2012 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU Radio
+ *
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street,
+ * Boston, MA 02110-1301, USA.
+ */
+
+GR_SWIG_BLOCK_MAGIC(gr,add_ff)
+
+gr_add_ff_sptr
+gr_make_add_ff (size_t vlen=1);
+
+class gr_add_ff : public gr_sync_block
+{
+public:
+
+};
diff --git a/gnuradio-core/src/lib/general/gr_char_to_float.cc b/gnuradio-core/src/lib/general/gr_char_to_float.cc
index e68f8d208..ffe8ee4a1 100644
--- a/gnuradio-core/src/lib/general/gr_char_to_float.cc
+++ b/gnuradio-core/src/lib/general/gr_char_to_float.cc
@@ -26,30 +26,47 @@
#include <gr_char_to_float.h>
#include <gr_io_signature.h>
-#include <gri_char_to_float.h>
+#include <volk/volk.h>
gr_char_to_float_sptr
-gr_make_char_to_float ()
+gr_make_char_to_float (size_t vlen, float scale)
{
- return gnuradio::get_initial_sptr(new gr_char_to_float ());
+ return gnuradio::get_initial_sptr(new gr_char_to_float (vlen, scale));
}
-gr_char_to_float::gr_char_to_float ()
+gr_char_to_float::gr_char_to_float (size_t vlen, float scale)
: gr_sync_block ("gr_char_to_float",
- gr_make_io_signature (1, 1, sizeof (char)),
- gr_make_io_signature (1, 1, sizeof (float)))
+ gr_make_io_signature (1, 1, sizeof (char)*vlen),
+ gr_make_io_signature (1, 1, sizeof (float)*vlen)),
+ d_vlen(vlen), d_scale(scale)
{
+ const int alignment_multiple =
+ volk_get_alignment() / sizeof(float);
+ set_alignment(alignment_multiple);
+}
+
+float
+gr_char_to_float::scale() const
+{
+ return d_scale;
+}
+
+void
+gr_char_to_float::set_scale(float scale)
+{
+ d_scale = scale;
}
int
gr_char_to_float::work (int noutput_items,
- gr_vector_const_void_star &input_items,
- gr_vector_void_star &output_items)
+ gr_vector_const_void_star &input_items,
+ gr_vector_void_star &output_items)
{
- const char *in = (const char *) input_items[0];
+ const int8_t *in = (const int8_t *) input_items[0];
float *out = (float *) output_items[0];
- gri_char_to_float (in, out, noutput_items);
-
+ // Note: the unaligned benchmarked much faster than the aligned
+ volk_8i_s32f_convert_32f_u(out, in, d_scale, d_vlen*noutput_items);
+
return noutput_items;
}
diff --git a/gnuradio-core/src/lib/general/gr_char_to_float.h b/gnuradio-core/src/lib/general/gr_char_to_float.h
index b20d2066f..4ad8e59a8 100644
--- a/gnuradio-core/src/lib/general/gr_char_to_float.h
+++ b/gnuradio-core/src/lib/general/gr_char_to_float.h
@@ -30,7 +30,7 @@ class gr_char_to_float;
typedef boost::shared_ptr<gr_char_to_float> gr_char_to_float_sptr;
GR_CORE_API gr_char_to_float_sptr
-gr_make_char_to_float ();
+gr_make_char_to_float (size_t vlen=1, float scale=1);
/*!
* \brief Convert stream of chars to a stream of float
@@ -39,10 +39,18 @@ gr_make_char_to_float ();
class GR_CORE_API gr_char_to_float : public gr_sync_block
{
- friend GR_CORE_API gr_char_to_float_sptr gr_make_char_to_float ();
- gr_char_to_float ();
+ private:
+ friend GR_CORE_API gr_char_to_float_sptr
+ gr_make_char_to_float (size_t vlen, float scale);
+ gr_char_to_float (size_t vlen, float scale);
+
+ size_t d_vlen;
+ float d_scale;
public:
+ float scale() const;
+ void set_scale(float scale);
+
virtual int work (int noutput_items,
gr_vector_const_void_star &input_items,
gr_vector_void_star &output_items);
diff --git a/gnuradio-core/src/lib/general/gr_char_to_float.i b/gnuradio-core/src/lib/general/gr_char_to_float.i
index 0403b621d..65ad861f2 100644
--- a/gnuradio-core/src/lib/general/gr_char_to_float.i
+++ b/gnuradio-core/src/lib/general/gr_char_to_float.i
@@ -22,9 +22,12 @@
GR_SWIG_BLOCK_MAGIC(gr,char_to_float)
-gr_char_to_float_sptr gr_make_char_to_float ();
+gr_char_to_float_sptr
+gr_make_char_to_float (size_t vlen=1, float scale=1);
class gr_char_to_float : public gr_sync_block
{
- gr_char_to_float ();
+public:
+ float scale() const;
+ void set_scale(float scale);
};
diff --git a/gnuradio-core/src/lib/general/gr_char_to_short.cc b/gnuradio-core/src/lib/general/gr_char_to_short.cc
new file mode 100644
index 000000000..8b6cd0be1
--- /dev/null
+++ b/gnuradio-core/src/lib/general/gr_char_to_short.cc
@@ -0,0 +1,64 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2011,2012 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU Radio
+ *
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <gr_char_to_short.h>
+#include <gr_io_signature.h>
+#include <volk/volk.h>
+
+gr_char_to_short_sptr
+gr_make_char_to_short (size_t vlen)
+{
+ return gnuradio::get_initial_sptr(new gr_char_to_short (vlen));
+}
+
+gr_char_to_short::gr_char_to_short (size_t vlen)
+ : gr_sync_block ("gr_char_to_short",
+ gr_make_io_signature (1, 1, sizeof (char)*vlen),
+ gr_make_io_signature (1, 1, sizeof (short)*vlen)),
+ d_vlen(vlen)
+{
+ const int alignment_multiple =
+ volk_get_alignment() / sizeof(char);
+ set_alignment(alignment_multiple);
+}
+
+int
+gr_char_to_short::work (int noutput_items,
+ gr_vector_const_void_star &input_items,
+ gr_vector_void_star &output_items)
+{
+ const int8_t *in = (const int8_t *) input_items[0];
+ int16_t *out = (int16_t *) output_items[0];
+
+ if(is_unaligned()) {
+ volk_8i_convert_16i_u(out, in, d_vlen*noutput_items);
+ }
+ else {
+ volk_8i_convert_16i_a(out, in, d_vlen*noutput_items);
+ }
+
+ return noutput_items;
+}
diff --git a/gnuradio-core/src/lib/general/gr_char_to_short.h b/gnuradio-core/src/lib/general/gr_char_to_short.h
new file mode 100644
index 000000000..58f9a62b0
--- /dev/null
+++ b/gnuradio-core/src/lib/general/gr_char_to_short.h
@@ -0,0 +1,56 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2011,2012 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU Radio
+ *
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifndef INCLUDED_GR_CHAR_TO_SHORT_H
+#define INCLUDED_GR_CHAR_TO_SHORT_H
+
+#include <gr_core_api.h>
+#include <gr_sync_block.h>
+
+class gr_char_to_short;
+typedef boost::shared_ptr<gr_char_to_short> gr_char_to_short_sptr;
+
+GR_CORE_API gr_char_to_short_sptr
+gr_make_char_to_short (size_t vlen=1);
+
+/*!
+ * \brief Convert stream of chars to a stream of float
+ * \ingroup converter_blk
+ */
+
+class GR_CORE_API gr_char_to_short : public gr_sync_block
+{
+ private:
+ friend GR_CORE_API gr_char_to_short_sptr
+ gr_make_char_to_short (size_t vlen);
+ gr_char_to_short (size_t vlen);
+
+ size_t d_vlen;
+
+ public:
+ virtual int work (int noutput_items,
+ gr_vector_const_void_star &input_items,
+ gr_vector_void_star &output_items);
+};
+
+
+#endif /* INCLUDED_GR_CHAR_TO_SHORT_H */
diff --git a/gnuradio-core/src/lib/general/gr_char_to_short.i b/gnuradio-core/src/lib/general/gr_char_to_short.i
new file mode 100644
index 000000000..48ddbf26b
--- /dev/null
+++ b/gnuradio-core/src/lib/general/gr_char_to_short.i
@@ -0,0 +1,30 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2011 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU Radio
+ *
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street,
+ * Boston, MA 02110-1301, USA.
+ */
+
+GR_SWIG_BLOCK_MAGIC(gr,char_to_short)
+
+gr_char_to_short_sptr gr_make_char_to_short (size_t vlen=1);
+
+class gr_char_to_short : public gr_sync_block
+{
+
+};
diff --git a/gnuradio-core/src/lib/general/gr_complex_to_xxx.cc b/gnuradio-core/src/lib/general/gr_complex_to_xxx.cc
index a59c127f3..108a92835 100644
--- a/gnuradio-core/src/lib/general/gr_complex_to_xxx.cc
+++ b/gnuradio-core/src/lib/general/gr_complex_to_xxx.cc
@@ -1,6 +1,6 @@
/* -*- c++ -*- */
/*
- * Copyright 2004,2008,2010 Free Software Foundation, Inc.
+ * Copyright 2004,2008,2010,2012 Free Software Foundation, Inc.
*
* This file is part of GNU Radio
*
@@ -27,6 +27,7 @@
#include <gr_complex_to_xxx.h>
#include <gr_io_signature.h>
#include <gr_math.h>
+#include <volk/volk.h>
// ----------------------------------------------------------------
@@ -42,6 +43,9 @@ gr_complex_to_float::gr_complex_to_float (unsigned int vlen)
gr_make_io_signature (1, 2, sizeof (float) * vlen)),
d_vlen(vlen)
{
+ const int alignment_multiple =
+ volk_get_alignment() / sizeof(float);
+ set_alignment(alignment_multiple);
}
int
@@ -56,16 +60,26 @@ gr_complex_to_float::work (int noutput_items,
switch (output_items.size ()){
case 1:
- for (int i = 0; i < noi; i++){
- out0[i] = in[i].real ();
+ if(is_unaligned()) {
+ for (int i = 0; i < noi; i++){
+ out0[i] = in[i].real ();
+ }
+ }
+ else {
+ volk_32fc_deinterleave_real_32f_a(out0, in, noi);
}
break;
case 2:
out1 = (float *) output_items[1];
- for (int i = 0; i < noi; i++){
- out0[i] = in[i].real ();
- out1[i] = in[i].imag ();
+ if(is_unaligned()) {
+ for (int i = 0; i < noi; i++){
+ out0[i] = in[i].real ();
+ out1[i] = in[i].imag ();
+ }
+ }
+ else {
+ volk_32fc_deinterleave_32f_x2_a(out0, out1, in, noi);
}
break;
@@ -90,6 +104,9 @@ gr_complex_to_real::gr_complex_to_real (unsigned int vlen)
gr_make_io_signature (1, 1, sizeof (float) * vlen)),
d_vlen(vlen)
{
+ const int alignment_multiple =
+ volk_get_alignment() / sizeof(float);
+ set_alignment(alignment_multiple);
}
int
@@ -101,9 +118,15 @@ gr_complex_to_real::work (int noutput_items,
float *out = (float *) output_items[0];
int noi = noutput_items * d_vlen;
- for (int i = 0; i < noi; i++){
- out[i] = in[i].real ();
+ if(is_unaligned()) {
+ for (int i = 0; i < noi; i++){
+ out[i] = in[i].real ();
+ }
+ }
+ else {
+ volk_32fc_deinterleave_real_32f_a(out, in, noi);
}
+
return noutput_items;
}
@@ -121,6 +144,9 @@ gr_complex_to_imag::gr_complex_to_imag (unsigned int vlen)
gr_make_io_signature (1, 1, sizeof (float) * vlen)),
d_vlen(vlen)
{
+ const int alignment_multiple =
+ volk_get_alignment() / sizeof(float);
+ set_alignment(alignment_multiple);
}
int
@@ -132,9 +158,15 @@ gr_complex_to_imag::work (int noutput_items,
float *out = (float *) output_items[0];
int noi = noutput_items * d_vlen;
- for (int i = 0; i < noi; i++){
- out[i] = in[i].imag ();
+ if(is_unaligned()) {
+ for (int i = 0; i < noi; i++){
+ out[i] = in[i].imag ();
+ }
+ }
+ else {
+ volk_32fc_deinterleave_imag_32f_a(out, in, noi);
}
+
return noutput_items;
}
@@ -152,6 +184,9 @@ gr_complex_to_mag::gr_complex_to_mag (unsigned int vlen)
gr_make_io_signature (1, 1, sizeof (float) * vlen)),
d_vlen(vlen)
{
+ const int alignment_multiple =
+ volk_get_alignment() / sizeof(float);
+ set_alignment(alignment_multiple);
}
int
@@ -163,9 +198,9 @@ gr_complex_to_mag::work (int noutput_items,
float *out = (float *) output_items[0];
int noi = noutput_items * d_vlen;
- for (int i = 0; i < noi; i++){
- out[i] = std::abs (in[i]);
- }
+ // turned out to be faster than aligned/unaligned switching
+ volk_32fc_magnitude_32f_u(out, in, noi);
+
return noutput_items;
}
@@ -183,6 +218,9 @@ gr_complex_to_mag_squared::gr_complex_to_mag_squared (unsigned int vlen)
gr_make_io_signature (1, 1, sizeof (float) * vlen)),
d_vlen(vlen)
{
+ const int alignment_multiple =
+ volk_get_alignment() / sizeof(float);
+ set_alignment(alignment_multiple);
}
int
@@ -194,11 +232,13 @@ gr_complex_to_mag_squared::work (int noutput_items,
float *out = (float *) output_items[0];
int noi = noutput_items * d_vlen;
- for (int i = 0; i < noi; i++){
- const float __x = in[i].real();
- const float __y = in[i].imag();
- out[i] = __x * __x + __y * __y;
+ if(is_unaligned()) {
+ volk_32fc_magnitude_squared_32f_u(out, in, noi);
+ }
+ else {
+ volk_32fc_magnitude_squared_32f_a(out, in, noi);
}
+
return noutput_items;
}
@@ -216,6 +256,9 @@ gr_complex_to_arg::gr_complex_to_arg (unsigned int vlen)
gr_make_io_signature (1, 1, sizeof (float) * vlen)),
d_vlen(vlen)
{
+ const int alignment_multiple =
+ volk_get_alignment() / sizeof(float);
+ set_alignment(alignment_multiple);
}
int
@@ -227,9 +270,11 @@ gr_complex_to_arg::work (int noutput_items,
float *out = (float *) output_items[0];
int noi = noutput_items * d_vlen;
+ // The fast_atan2f is faster than Volk
for (int i = 0; i < noi; i++){
// out[i] = std::arg (in[i]);
out[i] = gr_fast_atan2f(in[i]);
}
+
return noutput_items;
}
diff --git a/gnuradio-core/src/lib/general/gr_complex_to_xxx.h b/gnuradio-core/src/lib/general/gr_complex_to_xxx.h
index 166403259..232071323 100644
--- a/gnuradio-core/src/lib/general/gr_complex_to_xxx.h
+++ b/gnuradio-core/src/lib/general/gr_complex_to_xxx.h
@@ -109,10 +109,11 @@ class GR_CORE_API gr_complex_to_imag : public gr_sync_block
*/
class GR_CORE_API gr_complex_to_mag : public gr_sync_block
{
- friend GR_CORE_API gr_complex_to_mag_sptr gr_make_complex_to_mag (unsigned int vlen);
+ friend GR_CORE_API gr_complex_to_mag_sptr
+ gr_make_complex_to_mag (unsigned int vlen);
gr_complex_to_mag (unsigned int vlen);
- unsigned int d_vlen;
+ unsigned int d_vlen;
public:
virtual int work (int noutput_items,
diff --git a/gnuradio-core/src/lib/general/gr_conjugate_cc.cc b/gnuradio-core/src/lib/general/gr_conjugate_cc.cc
index 59c3bae89..d2b20ffe6 100644
--- a/gnuradio-core/src/lib/general/gr_conjugate_cc.cc
+++ b/gnuradio-core/src/lib/general/gr_conjugate_cc.cc
@@ -28,6 +28,7 @@
#include <gr_conjugate_cc.h>
#include <gr_io_signature.h>
+#include <volk/volk.h>
gr_conjugate_cc_sptr
gr_make_conjugate_cc ()
@@ -40,6 +41,9 @@ gr_conjugate_cc::gr_conjugate_cc ()
gr_make_io_signature (1, 1, sizeof (gr_complex)),
gr_make_io_signature (1, 1, sizeof (gr_complex)))
{
+ const int alignment_multiple =
+ volk_get_alignment() / sizeof(gr_complex);
+ set_alignment(alignment_multiple);
}
int
@@ -50,26 +54,11 @@ gr_conjugate_cc::work (int noutput_items,
gr_complex *iptr = (gr_complex *) input_items[0];
gr_complex *optr = (gr_complex *) output_items[0];
- int size = noutput_items;
-
- while (size >= 8){
- optr[0] = conj(iptr[0]);
- optr[1] = conj(iptr[1]);
- optr[2] = conj(iptr[2]);
- optr[3] = conj(iptr[3]);
- optr[4] = conj(iptr[4]);
- optr[5] = conj(iptr[5]);
- optr[6] = conj(iptr[6]);
- optr[7] = conj(iptr[7]);
- size -= 8;
- optr += 8;
- iptr += 8;
+ if(is_unaligned()) {
+ volk_32fc_conjugate_32fc_u(optr, iptr, noutput_items);
}
-
- while (size-- > 0) {
- *optr = conj(*iptr);
- iptr++;
- optr++;
+ else {
+ volk_32fc_conjugate_32fc_a(optr, iptr, noutput_items);
}
return noutput_items;
diff --git a/gnuradio-core/src/lib/general/gr_float_to_char.cc b/gnuradio-core/src/lib/general/gr_float_to_char.cc
index 88b9d276e..14635ff71 100644
--- a/gnuradio-core/src/lib/general/gr_float_to_char.cc
+++ b/gnuradio-core/src/lib/general/gr_float_to_char.cc
@@ -1,6 +1,6 @@
/* -*- c++ -*- */
/*
- * Copyright 2004,2010 Free Software Foundation, Inc.
+ * Copyright 2004,2010,2012 Free Software Foundation, Inc.
*
* This file is part of GNU Radio
*
@@ -26,19 +26,35 @@
#include <gr_float_to_char.h>
#include <gr_io_signature.h>
-#include <gri_float_to_char.h>
+#include <volk/volk.h>
gr_float_to_char_sptr
-gr_make_float_to_char ()
+gr_make_float_to_char (size_t vlen, float scale)
{
- return gnuradio::get_initial_sptr(new gr_float_to_char ());
+ return gnuradio::get_initial_sptr(new gr_float_to_char (vlen, scale));
}
-gr_float_to_char::gr_float_to_char ()
+gr_float_to_char::gr_float_to_char (size_t vlen, float scale)
: gr_sync_block ("gr_float_to_char",
- gr_make_io_signature (1, 1, sizeof (float)),
- gr_make_io_signature (1, 1, sizeof (char)))
+ gr_make_io_signature (1, 1, sizeof (float)*vlen),
+ gr_make_io_signature (1, 1, sizeof (char)*vlen)),
+ d_vlen(vlen), d_scale(scale)
{
+ const int alignment_multiple =
+ volk_get_alignment() / sizeof(char);
+ set_alignment(alignment_multiple);
+}
+
+float
+gr_float_to_char::scale() const
+{
+ return d_scale;
+}
+
+void
+gr_float_to_char::set_scale(float scale)
+{
+ d_scale = scale;
}
int
@@ -47,9 +63,14 @@ gr_float_to_char::work (int noutput_items,
gr_vector_void_star &output_items)
{
const float *in = (const float *) input_items[0];
- char *out = (char *) output_items[0];
+ int8_t *out = (int8_t *) output_items[0];
- gri_float_to_char (in, out, noutput_items);
+ if(is_unaligned()) {
+ volk_32f_s32f_convert_8i_u(out, in, d_scale, d_vlen*noutput_items);
+ }
+ else {
+ volk_32f_s32f_convert_8i_a(out, in, d_scale, d_vlen*noutput_items);
+ }
return noutput_items;
}
diff --git a/gnuradio-core/src/lib/general/gr_float_to_char.h b/gnuradio-core/src/lib/general/gr_float_to_char.h
index 434e2e9d0..c88645a18 100644
--- a/gnuradio-core/src/lib/general/gr_float_to_char.h
+++ b/gnuradio-core/src/lib/general/gr_float_to_char.h
@@ -1,6 +1,6 @@
/* -*- c++ -*- */
/*
- * Copyright 2004 Free Software Foundation, Inc.
+ * Copyright 2004,2012 Free Software Foundation, Inc.
*
* This file is part of GNU Radio
*
@@ -30,7 +30,7 @@ class gr_float_to_char;
typedef boost::shared_ptr<gr_float_to_char> gr_float_to_char_sptr;
GR_CORE_API gr_float_to_char_sptr
-gr_make_float_to_char ();
+gr_make_float_to_char (size_t vlen=1, float scale=1);
/*!
* \brief Convert stream of float to a stream of char
@@ -39,10 +39,18 @@ gr_make_float_to_char ();
class GR_CORE_API gr_float_to_char : public gr_sync_block
{
- friend GR_CORE_API gr_float_to_char_sptr gr_make_float_to_char ();
- gr_float_to_char ();
+ private:
+ friend GR_CORE_API gr_float_to_char_sptr gr_make_float_to_char
+ (size_t vlen, float scale);
+ gr_float_to_char (size_t vlen, float scale);
+
+ size_t d_vlen;
+ float d_scale;
public:
+ float scale() const;
+ void set_scale(float scale);
+
virtual int work (int noutput_items,
gr_vector_const_void_star &input_items,
gr_vector_void_star &output_items);
diff --git a/gnuradio-core/src/lib/general/gr_float_to_char.i b/gnuradio-core/src/lib/general/gr_float_to_char.i
index 05b206554..a1c88750f 100644
--- a/gnuradio-core/src/lib/general/gr_float_to_char.i
+++ b/gnuradio-core/src/lib/general/gr_float_to_char.i
@@ -1,6 +1,6 @@
/* -*- c++ -*- */
/*
- * Copyright 2004 Free Software Foundation, Inc.
+ * Copyright 2004,2012 Free Software Foundation, Inc.
*
* This file is part of GNU Radio
*
@@ -22,9 +22,12 @@
GR_SWIG_BLOCK_MAGIC(gr,float_to_char)
-gr_float_to_char_sptr gr_make_float_to_char ();
+gr_float_to_char_sptr
+gr_make_float_to_char (size_t vlen=1, float scale=1);
class gr_float_to_char : public gr_sync_block
{
- gr_float_to_char ();
+public:
+ float scale() const;
+ void set_scale(float scale);
};
diff --git a/gnuradio-core/src/lib/general/gr_float_to_int.cc b/gnuradio-core/src/lib/general/gr_float_to_int.cc
index 2349de8cb..b69591043 100644
--- a/gnuradio-core/src/lib/general/gr_float_to_int.cc
+++ b/gnuradio-core/src/lib/general/gr_float_to_int.cc
@@ -1,6 +1,6 @@
/* -*- c++ -*- */
/*
- * Copyright 2011 Free Software Foundation, Inc.
+ * Copyright 2011,2012 Free Software Foundation, Inc.
*
* This file is part of GNU Radio
*
@@ -27,32 +27,63 @@
#include <gr_float_to_int.h>
#include <gr_io_signature.h>
#include <gri_float_to_int.h>
+#include <volk/volk.h>
gr_float_to_int_sptr
-gr_make_float_to_int ()
+gr_make_float_to_int (size_t vlen, float scale)
{
- return gnuradio::get_initial_sptr(new gr_float_to_int ());
+ return gnuradio::get_initial_sptr(new gr_float_to_int (vlen, scale));
}
-gr_float_to_int::gr_float_to_int ()
+gr_float_to_int::gr_float_to_int (size_t vlen, float scale)
: gr_sync_block ("gr_float_to_int",
- gr_make_io_signature (1, 1, sizeof (float)),
- gr_make_io_signature (1, 1, sizeof (int)))
+ gr_make_io_signature (1, 1, sizeof (float)*vlen),
+ gr_make_io_signature (1, 1, sizeof (int)*vlen)),
+ d_vlen(vlen), d_scale(scale)
{
+ const int alignment_multiple =
+ volk_get_alignment() / sizeof(int);
+ set_alignment(alignment_multiple);
}
+float
+gr_float_to_int::scale() const
+{
+ return d_scale;
+}
+
+void
+gr_float_to_int::set_scale(float scale)
+{
+ d_scale = scale;
+}
int
gr_float_to_int::work (int noutput_items,
- gr_vector_const_void_star &input_items,
- gr_vector_void_star &output_items)
+ gr_vector_const_void_star &input_items,
+ gr_vector_void_star &output_items)
{
+ // Disable the Volk for now. There is a problem for large 32-bit ints that
+ // are not properly represented by the precisions of a single float, which
+ // can cause wrapping from large, positive numbers to negative.
+ // In gri_float_to_int, the value is first promoted to a 64-bit
+ // value, clipped, then converted to a float.
+#if 0
+ const float *in = (const float *) input_items[0];
+ int32_t *out = (int32_t *) output_items[0];
+
+ if(is_unaligned()) {
+ volk_32f_s32f_convert_32i_u(out, in, d_scale, d_vlen*noutput_items);
+ }
+ else {
+ volk_32f_s32f_convert_32i_a(out, in, d_scale, d_vlen*noutput_items);
+ }
+#else
const float *in = (const float *) input_items[0];
int *out = (int *) output_items[0];
- gri_float_to_int (in, out, noutput_items);
+ gri_float_to_int (in, out, d_scale, d_vlen*noutput_items);
+
+#endif
return noutput_items;
}
-
-
-
diff --git a/gnuradio-core/src/lib/general/gr_float_to_int.h b/gnuradio-core/src/lib/general/gr_float_to_int.h
index 3324ed110..0b42c0aab 100644
--- a/gnuradio-core/src/lib/general/gr_float_to_int.h
+++ b/gnuradio-core/src/lib/general/gr_float_to_int.h
@@ -1,6 +1,6 @@
/* -*- c++ -*- */
/*
- * Copyright 2011 Free Software Foundation, Inc.
+ * Copyright 2011,2012 Free Software Foundation, Inc.
*
* This file is part of GNU Radio
*
@@ -30,7 +30,7 @@ class gr_float_to_int;
typedef boost::shared_ptr<gr_float_to_int> gr_float_to_int_sptr;
GR_CORE_API gr_float_to_int_sptr
-gr_make_float_to_int ();
+gr_make_float_to_int (size_t vlen=1, float scale=1);
/*!
* \brief Convert stream of float to a stream of short
@@ -39,10 +39,18 @@ gr_make_float_to_int ();
class GR_CORE_API gr_float_to_int : public gr_sync_block
{
- friend GR_CORE_API gr_float_to_int_sptr gr_make_float_to_int ();
- gr_float_to_int ();
+ private:
+ friend GR_CORE_API
+ gr_float_to_int_sptr gr_make_float_to_int (size_t vlen, float scale);
+ gr_float_to_int (size_t vlen, float scale);
+
+ size_t d_vlen;
+ float d_scale;
public:
+ float scale() const;
+ void set_scale(float scale);
+
virtual int work (int noutput_items,
gr_vector_const_void_star &input_items,
gr_vector_void_star &output_items);
diff --git a/gnuradio-core/src/lib/general/gr_float_to_int.i b/gnuradio-core/src/lib/general/gr_float_to_int.i
index 4ab04cbf2..6e71f54a9 100644
--- a/gnuradio-core/src/lib/general/gr_float_to_int.i
+++ b/gnuradio-core/src/lib/general/gr_float_to_int.i
@@ -1,6 +1,6 @@
/* -*- c++ -*- */
/*
- * Copyright 2011 Free Software Foundation, Inc.
+ * Copyright 2011,2012 Free Software Foundation, Inc.
*
* This file is part of GNU Radio
*
@@ -22,9 +22,12 @@
GR_SWIG_BLOCK_MAGIC(gr,float_to_int)
-gr_float_to_int_sptr gr_make_float_to_int ();
+gr_float_to_int_sptr
+gr_make_float_to_int (size_t vlen=1, float scale=1);
class gr_float_to_int : public gr_sync_block
{
- gr_float_to_int ();
+public:
+ float scale() const;
+ void set_scale(float scale);
};
diff --git a/gnuradio-core/src/lib/general/gr_float_to_short.cc b/gnuradio-core/src/lib/general/gr_float_to_short.cc
index 084f76f9c..188bfdae3 100644
--- a/gnuradio-core/src/lib/general/gr_float_to_short.cc
+++ b/gnuradio-core/src/lib/general/gr_float_to_short.cc
@@ -1,6 +1,6 @@
/* -*- c++ -*- */
/*
- * Copyright 2004,2010 Free Software Foundation, Inc.
+ * Copyright 2004,2010,2012 Free Software Foundation, Inc.
*
* This file is part of GNU Radio
*
@@ -26,19 +26,35 @@
#include <gr_float_to_short.h>
#include <gr_io_signature.h>
-#include <gri_float_to_short.h>
+#include <volk/volk.h>
gr_float_to_short_sptr
-gr_make_float_to_short ()
+gr_make_float_to_short (size_t vlen, float scale)
{
- return gnuradio::get_initial_sptr(new gr_float_to_short ());
+ return gnuradio::get_initial_sptr(new gr_float_to_short (vlen, scale));
}
-gr_float_to_short::gr_float_to_short ()
+gr_float_to_short::gr_float_to_short (size_t vlen, float scale)
: gr_sync_block ("gr_float_to_short",
- gr_make_io_signature (1, 1, sizeof (float)),
- gr_make_io_signature (1, 1, sizeof (short)))
+ gr_make_io_signature (1, 1, sizeof (float)*vlen),
+ gr_make_io_signature (1, 1, sizeof (short)*vlen)),
+ d_vlen(vlen), d_scale(scale)
{
+ const int alignment_multiple =
+ volk_get_alignment() / sizeof(short);
+ set_alignment(alignment_multiple);
+}
+
+float
+gr_float_to_short::scale() const
+{
+ return d_scale;
+}
+
+void
+gr_float_to_short::set_scale(float scale)
+{
+ d_scale = scale;
}
int
@@ -49,8 +65,13 @@ gr_float_to_short::work (int noutput_items,
const float *in = (const float *) input_items[0];
short *out = (short *) output_items[0];
- gri_float_to_short (in, out, noutput_items);
-
+ if(is_unaligned()) {
+ volk_32f_s32f_convert_16i_u(out, in, d_scale, d_vlen*noutput_items);
+ }
+ else {
+ volk_32f_s32f_convert_16i_a(out, in, d_scale, d_vlen*noutput_items);
+ }
+
return noutput_items;
}
diff --git a/gnuradio-core/src/lib/general/gr_float_to_short.h b/gnuradio-core/src/lib/general/gr_float_to_short.h
index 010d61141..93e441f41 100644
--- a/gnuradio-core/src/lib/general/gr_float_to_short.h
+++ b/gnuradio-core/src/lib/general/gr_float_to_short.h
@@ -1,6 +1,6 @@
/* -*- c++ -*- */
/*
- * Copyright 2004 Free Software Foundation, Inc.
+ * Copyright 2004,2012 Free Software Foundation, Inc.
*
* This file is part of GNU Radio
*
@@ -30,7 +30,7 @@ class gr_float_to_short;
typedef boost::shared_ptr<gr_float_to_short> gr_float_to_short_sptr;
GR_CORE_API gr_float_to_short_sptr
-gr_make_float_to_short ();
+gr_make_float_to_short (size_t vlen=1, float scale=1);
/*!
* \brief Convert stream of float to a stream of short
@@ -39,10 +39,17 @@ gr_make_float_to_short ();
class GR_CORE_API gr_float_to_short : public gr_sync_block
{
- friend GR_CORE_API gr_float_to_short_sptr gr_make_float_to_short ();
- gr_float_to_short ();
+ friend GR_CORE_API
+ gr_float_to_short_sptr gr_make_float_to_short (size_t vlen, float scale);
+ gr_float_to_short (size_t vlen, float scale);
+
+ size_t d_vlen;
+ float d_scale;
public:
+ float scale() const;
+ void set_scale(float scale);
+
virtual int work (int noutput_items,
gr_vector_const_void_star &input_items,
gr_vector_void_star &output_items);
diff --git a/gnuradio-core/src/lib/general/gr_float_to_short.i b/gnuradio-core/src/lib/general/gr_float_to_short.i
index ad059c453..072da5213 100644
--- a/gnuradio-core/src/lib/general/gr_float_to_short.i
+++ b/gnuradio-core/src/lib/general/gr_float_to_short.i
@@ -1,6 +1,6 @@
/* -*- c++ -*- */
/*
- * Copyright 2004 Free Software Foundation, Inc.
+ * Copyright 2004,2012 Free Software Foundation, Inc.
*
* This file is part of GNU Radio
*
@@ -22,9 +22,12 @@
GR_SWIG_BLOCK_MAGIC(gr,float_to_short)
-gr_float_to_short_sptr gr_make_float_to_short ();
+gr_float_to_short_sptr
+gr_make_float_to_short (size_t vlen=1, float scale=1);
class gr_float_to_short : public gr_sync_block
{
- gr_float_to_short ();
+public:
+ float scale() const;
+ void set_scale(float scale);
};
diff --git a/gnuradio-core/src/lib/general/gr_int_to_float.cc b/gnuradio-core/src/lib/general/gr_int_to_float.cc
index 29ca22add..7ec15b1a8 100644
--- a/gnuradio-core/src/lib/general/gr_int_to_float.cc
+++ b/gnuradio-core/src/lib/general/gr_int_to_float.cc
@@ -1,6 +1,6 @@
/* -*- c++ -*- */
/*
- * Copyright 2011 Free Software Foundation, Inc.
+ * Copyright 2011,2012 Free Software Foundation, Inc.
*
* This file is part of GNU Radio
*
@@ -26,19 +26,23 @@
#include <gr_int_to_float.h>
#include <gr_io_signature.h>
-#include <gri_int_to_float.h>
+#include <volk/volk.h>
gr_int_to_float_sptr
-gr_make_int_to_float ()
+gr_make_int_to_float (size_t vlen, float scale)
{
- return gnuradio::get_initial_sptr(new gr_int_to_float ());
+ return gnuradio::get_initial_sptr(new gr_int_to_float (vlen, scale));
}
-gr_int_to_float::gr_int_to_float ()
+gr_int_to_float::gr_int_to_float (size_t vlen, float scale)
: gr_sync_block ("gr_int_to_float",
- gr_make_io_signature (1, 1, sizeof (int32_t)),
- gr_make_io_signature (1, 1, sizeof (float)))
+ gr_make_io_signature (1, 1, sizeof (int32_t)*vlen),
+ gr_make_io_signature (1, 1, sizeof (float)*vlen)),
+ d_vlen(vlen), d_scale(scale)
{
+ const int alignment_multiple =
+ volk_get_alignment() / sizeof(float);
+ set_alignment(alignment_multiple);
}
int
@@ -48,9 +52,14 @@ gr_int_to_float::work (int noutput_items,
{
const int32_t *in = (const int32_t *) input_items[0];
float *out = (float *) output_items[0];
-
- gri_int_to_float(in, out, noutput_items);
+ if(is_unaligned()) {
+ volk_32i_s32f_convert_32f_u(out, in, d_scale, d_vlen*noutput_items);
+ }
+ else {
+ volk_32i_s32f_convert_32f_a(out, in, d_scale, d_vlen*noutput_items);
+ }
+
return noutput_items;
}
diff --git a/gnuradio-core/src/lib/general/gr_int_to_float.h b/gnuradio-core/src/lib/general/gr_int_to_float.h
index 9af381ba9..af6488a50 100644
--- a/gnuradio-core/src/lib/general/gr_int_to_float.h
+++ b/gnuradio-core/src/lib/general/gr_int_to_float.h
@@ -1,6 +1,6 @@
/* -*- c++ -*- */
/*
- * Copyright 2011 Free Software Foundation, Inc.
+ * Copyright 2011,2012 Free Software Foundation, Inc.
*
* This file is part of GNU Radio
*
@@ -30,7 +30,7 @@ class gr_int_to_float;
typedef boost::shared_ptr<gr_int_to_float> gr_int_to_float_sptr;
GR_CORE_API gr_int_to_float_sptr
-gr_make_int_to_float ();
+gr_make_int_to_float (size_t vlen=1, float scale=1);
/*!
* \brief Convert stream of int to a stream of float
@@ -39,10 +39,18 @@ gr_make_int_to_float ();
class GR_CORE_API gr_int_to_float : public gr_sync_block
{
- friend GR_CORE_API gr_int_to_float_sptr gr_make_int_to_float ();
- gr_int_to_float ();
+ private:
+ friend GR_CORE_API gr_int_to_float_sptr
+ gr_make_int_to_float (size_t vlen, float scale);
+ gr_int_to_float (size_t vlen, float scale);
+
+ size_t d_vlen;
+ float d_scale;
public:
+ float scale() const;
+ void set_scale(float scale);
+
virtual int work (int noutput_items,
gr_vector_const_void_star &input_items,
gr_vector_void_star &output_items);
diff --git a/gnuradio-core/src/lib/general/gr_int_to_float.i b/gnuradio-core/src/lib/general/gr_int_to_float.i
index 8cb9e35b5..c1f25e37b 100644
--- a/gnuradio-core/src/lib/general/gr_int_to_float.i
+++ b/gnuradio-core/src/lib/general/gr_int_to_float.i
@@ -1,6 +1,6 @@
/* -*- c++ -*- */
/*
- * Copyright 2011 Free Software Foundation, Inc.
+ * Copyright 2011,2012 Free Software Foundation, Inc.
*
* This file is part of GNU Radio
*
@@ -22,9 +22,11 @@
GR_SWIG_BLOCK_MAGIC(gr,int_to_float)
-gr_int_to_float_sptr gr_make_int_to_float ();
+gr_int_to_float_sptr
+gr_make_int_to_float (size_t vlen=1, float scale=1);
class gr_int_to_float : public gr_sync_block
{
- gr_int_to_float ();
+ float scale() const;
+ void set_scale(float scale);
};
diff --git a/gnuradio-core/src/lib/general/gr_multiply_cc.cc b/gnuradio-core/src/lib/general/gr_multiply_cc.cc
new file mode 100644
index 000000000..0d20e6257
--- /dev/null
+++ b/gnuradio-core/src/lib/general/gr_multiply_cc.cc
@@ -0,0 +1,69 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2012 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU Radio
+ *
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <gr_multiply_cc.h>
+#include <gr_io_signature.h>
+#include <volk/volk.h>
+
+gr_multiply_cc_sptr
+gr_make_multiply_cc (size_t vlen)
+{
+ return gnuradio::get_initial_sptr(new gr_multiply_cc (vlen));
+}
+
+gr_multiply_cc::gr_multiply_cc (size_t vlen)
+ : gr_sync_block ("gr_multiply_cc",
+ gr_make_io_signature (1, -1, sizeof (gr_complex)*vlen),
+ gr_make_io_signature (1, 1, sizeof (gr_complex)*vlen)),
+ d_vlen(vlen)
+{
+ const int alignment_multiple =
+ volk_get_alignment() / sizeof(gr_complex);
+ set_alignment(alignment_multiple);
+}
+
+int
+gr_multiply_cc::work (int noutput_items,
+ gr_vector_const_void_star &input_items,
+ gr_vector_void_star &output_items)
+{
+ gr_complex *out = (gr_complex *) output_items[0];
+ int noi = d_vlen*noutput_items;
+
+ memcpy(out, input_items[0], noi*sizeof(gr_complex));
+ if(is_unaligned()) {
+ for(size_t i = 1; i < input_items.size(); i++)
+ volk_32fc_x2_multiply_32fc_u(out, out, (gr_complex*)input_items[i], noi);
+ }
+ else {
+ for(size_t i = 1; i < input_items.size(); i++)
+ volk_32fc_x2_multiply_32fc_a(out, out, (gr_complex*)input_items[i], noi);
+ }
+ return noutput_items;
+}
+
+
+
diff --git a/gnuradio-core/src/lib/general/gr_multiply_cc.h b/gnuradio-core/src/lib/general/gr_multiply_cc.h
new file mode 100644
index 000000000..f80ec8b25
--- /dev/null
+++ b/gnuradio-core/src/lib/general/gr_multiply_cc.h
@@ -0,0 +1,56 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2012 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU Radio
+ *
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifndef INCLUDED_GR_MULTIPLY_CC_H
+#define INCLUDED_GR_MULTIPLY_CC_H
+
+#include <gr_core_api.h>
+#include <gr_sync_block.h>
+
+class gr_multiply_cc;
+typedef boost::shared_ptr<gr_multiply_cc> gr_multiply_cc_sptr;
+
+GR_CORE_API gr_multiply_cc_sptr
+gr_make_multiply_cc (size_t vlen=1);
+
+/*!
+ * \brief Multiply streams of complex values
+ * \ingroup math_blk
+ */
+
+class GR_CORE_API gr_multiply_cc : public gr_sync_block
+{
+ private:
+ friend GR_CORE_API gr_multiply_cc_sptr
+ gr_make_multiply_cc (size_t vlen);
+ gr_multiply_cc (size_t vlen);
+
+ size_t d_vlen;
+
+ public:
+ virtual int work (int noutput_items,
+ gr_vector_const_void_star &input_items,
+ gr_vector_void_star &output_items);
+};
+
+
+#endif /* INCLUDED_GR_MULTIPLY_CC_H */
diff --git a/gnuradio-core/src/lib/general/gr_multiply_cc.i b/gnuradio-core/src/lib/general/gr_multiply_cc.i
new file mode 100644
index 000000000..61768c390
--- /dev/null
+++ b/gnuradio-core/src/lib/general/gr_multiply_cc.i
@@ -0,0 +1,32 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2012 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU Radio
+ *
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street,
+ * Boston, MA 02110-1301, USA.
+ */
+
+GR_SWIG_BLOCK_MAGIC(gr,multiply_cc)
+
+gr_multiply_cc_sptr
+gr_make_multiply_cc (size_t vlen=1);
+
+class gr_multiply_cc : public gr_sync_block
+{
+public:
+
+};
diff --git a/gnuradio-core/src/lib/general/gr_multiply_conjugate_cc.cc b/gnuradio-core/src/lib/general/gr_multiply_conjugate_cc.cc
new file mode 100644
index 000000000..103d87b8b
--- /dev/null
+++ b/gnuradio-core/src/lib/general/gr_multiply_conjugate_cc.cc
@@ -0,0 +1,69 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2012 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU Radio
+ *
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <gr_multiply_conjugate_cc.h>
+#include <gr_io_signature.h>
+#include <volk/volk.h>
+
+gr_multiply_conjugate_cc_sptr
+gr_make_multiply_conjugate_cc (size_t vlen)
+{
+ return gnuradio::get_initial_sptr(new gr_multiply_conjugate_cc (vlen));
+}
+
+gr_multiply_conjugate_cc::gr_multiply_conjugate_cc (size_t vlen)
+ : gr_sync_block ("gr_multiply_conjugate_cc",
+ gr_make_io_signature (2, 2, sizeof (gr_complex)*vlen),
+ gr_make_io_signature (1, 1, sizeof (gr_complex)*vlen)),
+ d_vlen(vlen)
+{
+ const int alignment_multiple =
+ volk_get_alignment() / sizeof(gr_complex);
+ set_alignment(alignment_multiple);
+}
+
+int
+gr_multiply_conjugate_cc::work (int noutput_items,
+ gr_vector_const_void_star &input_items,
+ gr_vector_void_star &output_items)
+{
+ gr_complex *in0 = (gr_complex *) input_items[0];
+ gr_complex *in1 = (gr_complex *) input_items[1];
+ gr_complex *out = (gr_complex *) output_items[0];
+ int noi = d_vlen*noutput_items;
+
+ if(is_unaligned()) {
+ volk_32fc_x2_multiply_conjugate_32fc_u(out, in0, in1, noi);
+ }
+ else {
+ volk_32fc_x2_multiply_conjugate_32fc_a(out, in0, in1, noi);
+ }
+
+ return noutput_items;
+}
+
+
+
diff --git a/gnuradio-core/src/lib/general/gr_multiply_conjugate_cc.h b/gnuradio-core/src/lib/general/gr_multiply_conjugate_cc.h
new file mode 100644
index 000000000..eb032f31b
--- /dev/null
+++ b/gnuradio-core/src/lib/general/gr_multiply_conjugate_cc.h
@@ -0,0 +1,57 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2012 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU Radio
+ *
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifndef INCLUDED_GR_MULTIPLY_CONJUGATE_CC_H
+#define INCLUDED_GR_MULTIPLY_CONJUGATE_CC_H
+
+#include <gr_core_api.h>
+#include <gr_sync_block.h>
+
+class gr_multiply_conjugate_cc;
+typedef boost::shared_ptr<gr_multiply_conjugate_cc>
+gr_multiply_conjugate_cc_sptr;
+
+GR_CORE_API gr_multiply_conjugate_cc_sptr
+gr_make_multiply_conjugate_cc (size_t vlen=1);
+
+/*!
+ * \brief Multiplies a stream by the conjugate of the second stream
+ * \ingroup math_blk
+ */
+
+class GR_CORE_API gr_multiply_conjugate_cc : public gr_sync_block
+{
+ private:
+ friend GR_CORE_API gr_multiply_conjugate_cc_sptr
+ gr_make_multiply_conjugate_cc (size_t vlen);
+ gr_multiply_conjugate_cc (size_t vlen);
+
+ size_t d_vlen;
+
+ public:
+ virtual int work (int noutput_items,
+ gr_vector_const_void_star &input_items,
+ gr_vector_void_star &output_items);
+};
+
+
+#endif /* INCLUDED_GR_MULTIPLY_CONJUGATE_CC_H */
diff --git a/gnuradio-core/src/lib/general/gr_multiply_conjugate_cc.i b/gnuradio-core/src/lib/general/gr_multiply_conjugate_cc.i
new file mode 100644
index 000000000..023410505
--- /dev/null
+++ b/gnuradio-core/src/lib/general/gr_multiply_conjugate_cc.i
@@ -0,0 +1,32 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2012 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU Radio
+ *
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street,
+ * Boston, MA 02110-1301, USA.
+ */
+
+GR_SWIG_BLOCK_MAGIC(gr,multiply_conjugate_cc)
+
+gr_multiply_conjugate_cc_sptr
+gr_make_multiply_conjugate_cc (size_t vlen=1);
+
+class gr_multiply_conjugate_cc : public gr_sync_block
+{
+public:
+
+};
diff --git a/gnuradio-core/src/lib/general/gr_multiply_const_cc.cc b/gnuradio-core/src/lib/general/gr_multiply_const_cc.cc
new file mode 100644
index 000000000..59521f54a
--- /dev/null
+++ b/gnuradio-core/src/lib/general/gr_multiply_const_cc.cc
@@ -0,0 +1,80 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2012 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU Radio
+ *
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <gr_multiply_const_cc.h>
+#include <gr_io_signature.h>
+#include <volk/volk.h>
+
+gr_multiply_const_cc_sptr
+gr_make_multiply_const_cc (gr_complex k, size_t vlen)
+{
+ return gnuradio::get_initial_sptr(new gr_multiply_const_cc (k, vlen));
+}
+
+gr_multiply_const_cc::gr_multiply_const_cc (gr_complex k, size_t vlen)
+ : gr_sync_block ("gr_multiply_const_cc",
+ gr_make_io_signature (1, 1, sizeof (gr_complex)*vlen),
+ gr_make_io_signature (1, 1, sizeof (gr_complex)*vlen)),
+ d_k(k), d_vlen(vlen)
+{
+ const int alignment_multiple =
+ volk_get_alignment() / sizeof(gr_complex);
+ set_alignment(alignment_multiple);
+}
+
+gr_complex
+gr_multiply_const_cc::k() const
+{
+ return d_k;
+}
+
+void
+gr_multiply_const_cc::set_k(gr_complex k)
+{
+ d_k = k;
+}
+
+int
+gr_multiply_const_cc::work (int noutput_items,
+ gr_vector_const_void_star &input_items,
+ gr_vector_void_star &output_items)
+{
+ const gr_complex *in = (const gr_complex *) input_items[0];
+ gr_complex *out = (gr_complex *) output_items[0];
+ int noi = d_vlen*noutput_items;
+
+ if(is_unaligned()) {
+ volk_32fc_s32fc_multiply_32fc_u(out, in, d_k, noi);
+ }
+ else {
+ volk_32fc_s32fc_multiply_32fc_a(out, in, d_k, noi);
+ }
+
+ return noutput_items;
+}
+
+
+
diff --git a/gnuradio-core/src/lib/general/gr_multiply_const_cc.h b/gnuradio-core/src/lib/general/gr_multiply_const_cc.h
new file mode 100644
index 000000000..1791d9160
--- /dev/null
+++ b/gnuradio-core/src/lib/general/gr_multiply_const_cc.h
@@ -0,0 +1,60 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2012 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU Radio
+ *
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifndef INCLUDED_GR_MULTIPLY_CONST_CC_H
+#define INCLUDED_GR_MULTIPLY_CONST_CC_H
+
+#include <gr_core_api.h>
+#include <gr_sync_block.h>
+
+class gr_multiply_const_cc;
+typedef boost::shared_ptr<gr_multiply_const_cc> gr_multiply_const_cc_sptr;
+
+GR_CORE_API gr_multiply_const_cc_sptr
+gr_make_multiply_const_cc (gr_complex k, size_t vlen=1);
+
+/*!
+ * \brief Multiply stream of complex values with a constant \p k
+ * \ingroup math_blk
+ */
+
+class GR_CORE_API gr_multiply_const_cc : public gr_sync_block
+{
+ private:
+ friend GR_CORE_API gr_multiply_const_cc_sptr
+ gr_make_multiply_const_cc (gr_complex k, size_t vlen);
+ gr_multiply_const_cc (gr_complex k, size_t vlen);
+
+ gr_complex d_k;
+ size_t d_vlen;
+
+ public:
+ gr_complex k() const;
+ void set_k(gr_complex k);
+
+ virtual int work (int noutput_items,
+ gr_vector_const_void_star &input_items,
+ gr_vector_void_star &output_items);
+};
+
+
+#endif /* INCLUDED_GR_MULTIPLY_CONST_CC_H */
diff --git a/gnuradio-core/src/lib/general/gr_multiply_const_cc.i b/gnuradio-core/src/lib/general/gr_multiply_const_cc.i
new file mode 100644
index 000000000..be8d32b31
--- /dev/null
+++ b/gnuradio-core/src/lib/general/gr_multiply_const_cc.i
@@ -0,0 +1,33 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2012 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU Radio
+ *
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street,
+ * Boston, MA 02110-1301, USA.
+ */
+
+GR_SWIG_BLOCK_MAGIC(gr,multiply_const_cc)
+
+gr_multiply_const_cc_sptr
+gr_make_multiply_const_cc (gr_complex k, size_t vlen=1);
+
+class gr_multiply_const_cc : public gr_sync_block
+{
+public:
+ gr_complex k() const;
+ void set_k(gr_complex k);
+};
diff --git a/gnuradio-core/src/lib/general/gr_multiply_const_ff.cc b/gnuradio-core/src/lib/general/gr_multiply_const_ff.cc
new file mode 100644
index 000000000..8354cb27b
--- /dev/null
+++ b/gnuradio-core/src/lib/general/gr_multiply_const_ff.cc
@@ -0,0 +1,80 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2012 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU Radio
+ *
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <gr_multiply_const_ff.h>
+#include <gr_io_signature.h>
+#include <volk/volk.h>
+
+gr_multiply_const_ff_sptr
+gr_make_multiply_const_ff (float k, size_t vlen)
+{
+ return gnuradio::get_initial_sptr(new gr_multiply_const_ff (k, vlen));
+}
+
+gr_multiply_const_ff::gr_multiply_const_ff (float k, size_t vlen)
+ : gr_sync_block ("gr_multiply_const_ff",
+ gr_make_io_signature (1, 1, sizeof (float)*vlen),
+ gr_make_io_signature (1, 1, sizeof (float)*vlen)),
+ d_k(k), d_vlen(vlen)
+{
+ const int alignment_multiple =
+ volk_get_alignment() / sizeof(float);
+ set_alignment(alignment_multiple);
+}
+
+float
+gr_multiply_const_ff::k() const
+{
+ return d_k;
+}
+
+void
+gr_multiply_const_ff::set_k(float k)
+{
+ d_k = k;
+}
+
+int
+gr_multiply_const_ff::work (int noutput_items,
+ gr_vector_const_void_star &input_items,
+ gr_vector_void_star &output_items)
+{
+ const float *in = (const float *) input_items[0];
+ float *out = (float *) output_items[0];
+ int noi = d_vlen*noutput_items;
+
+ if(is_unaligned()) {
+ volk_32f_s32f_multiply_32f_u(out, in, d_k, noi);
+ }
+ else {
+ volk_32f_s32f_multiply_32f_a(out, in, d_k, noi);
+ }
+
+ return noutput_items;
+}
+
+
+
diff --git a/gnuradio-core/src/lib/general/gr_multiply_const_ff.h b/gnuradio-core/src/lib/general/gr_multiply_const_ff.h
new file mode 100644
index 000000000..ef42a92f4
--- /dev/null
+++ b/gnuradio-core/src/lib/general/gr_multiply_const_ff.h
@@ -0,0 +1,60 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2012 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU Radio
+ *
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifndef INCLUDED_GR_MULTIPLY_CONST_FF_H
+#define INCLUDED_GR_MULTIPLY_CONST_FF_H
+
+#include <gr_core_api.h>
+#include <gr_sync_block.h>
+
+class gr_multiply_const_ff;
+typedef boost::shared_ptr<gr_multiply_const_ff> gr_multiply_const_ff_sptr;
+
+GR_CORE_API gr_multiply_const_ff_sptr
+gr_make_multiply_const_ff (float k, size_t vlen=1);
+
+/*!
+ * \brief Multiply stream of float values with a constant \p k
+ * \ingroup math_blk
+ */
+
+class GR_CORE_API gr_multiply_const_ff : public gr_sync_block
+{
+ private:
+ friend GR_CORE_API gr_multiply_const_ff_sptr
+ gr_make_multiply_const_ff (float k, size_t vlen);
+ gr_multiply_const_ff (float k, size_t vlen);
+
+ float d_k;
+ size_t d_vlen;
+
+ public:
+ float k() const;
+ void set_k(float k);
+
+ virtual int work (int noutput_items,
+ gr_vector_const_void_star &input_items,
+ gr_vector_void_star &output_items);
+};
+
+
+#endif /* INCLUDED_GR_MULTIPLY_CONST_FF_H */
diff --git a/gnuradio-core/src/lib/general/gr_multiply_const_ff.i b/gnuradio-core/src/lib/general/gr_multiply_const_ff.i
new file mode 100644
index 000000000..0fd3b1225
--- /dev/null
+++ b/gnuradio-core/src/lib/general/gr_multiply_const_ff.i
@@ -0,0 +1,33 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2012 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU Radio
+ *
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street,
+ * Boston, MA 02110-1301, USA.
+ */
+
+GR_SWIG_BLOCK_MAGIC(gr,multiply_const_ff)
+
+gr_multiply_const_ff_sptr
+gr_make_multiply_const_ff (float k, size_t vlen=1);
+
+class gr_multiply_const_ff : public gr_sync_block
+{
+public:
+ float k() const;
+ void set_k(float k);
+};
diff --git a/gnuradio-core/src/lib/general/gr_multiply_ff.cc b/gnuradio-core/src/lib/general/gr_multiply_ff.cc
new file mode 100644
index 000000000..a7d34ce51
--- /dev/null
+++ b/gnuradio-core/src/lib/general/gr_multiply_ff.cc
@@ -0,0 +1,69 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2012 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU Radio
+ *
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <gr_multiply_ff.h>
+#include <gr_io_signature.h>
+#include <volk/volk.h>
+
+gr_multiply_ff_sptr
+gr_make_multiply_ff (size_t vlen)
+{
+ return gnuradio::get_initial_sptr(new gr_multiply_ff (vlen));
+}
+
+gr_multiply_ff::gr_multiply_ff (size_t vlen)
+ : gr_sync_block ("gr_multiply_ff",
+ gr_make_io_signature (1, -1, sizeof (float)*vlen),
+ gr_make_io_signature (1, 1, sizeof (float)*vlen)),
+ d_vlen(vlen)
+{
+ const int alignment_multiple =
+ volk_get_alignment() / sizeof(float);
+ set_alignment(alignment_multiple);
+}
+
+int
+gr_multiply_ff::work (int noutput_items,
+ gr_vector_const_void_star &input_items,
+ gr_vector_void_star &output_items)
+{
+ float *out = (float *) output_items[0];
+ int noi = d_vlen*noutput_items;
+
+ memcpy(out, input_items[0], noi*sizeof(float));
+ if(is_unaligned()) {
+ for(size_t i = 1; i < input_items.size(); i++)
+ volk_32f_x2_multiply_32f_u(out, out, (const float*)input_items[i], noi);
+ }
+ else {
+ for(size_t i = 1; i < input_items.size(); i++)
+ volk_32f_x2_multiply_32f_a(out, out, (const float*)input_items[i], noi);
+ }
+ return noutput_items;
+}
+
+
+
diff --git a/gnuradio-core/src/lib/general/gr_multiply_ff.h b/gnuradio-core/src/lib/general/gr_multiply_ff.h
new file mode 100644
index 000000000..ae36cb1e0
--- /dev/null
+++ b/gnuradio-core/src/lib/general/gr_multiply_ff.h
@@ -0,0 +1,56 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2012 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU Radio
+ *
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifndef INCLUDED_GR_MULTIPLY_FF_H
+#define INCLUDED_GR_MULTIPLY_FF_H
+
+#include <gr_core_api.h>
+#include <gr_sync_block.h>
+
+class gr_multiply_ff;
+typedef boost::shared_ptr<gr_multiply_ff> gr_multiply_ff_sptr;
+
+GR_CORE_API gr_multiply_ff_sptr
+gr_make_multiply_ff (size_t vlen=1);
+
+/*!
+ * \brief Multiply streams of complex values
+ * \ingroup math_blk
+ */
+
+class GR_CORE_API gr_multiply_ff : public gr_sync_block
+{
+ private:
+ friend GR_CORE_API gr_multiply_ff_sptr
+ gr_make_multiply_ff (size_t vlen);
+ gr_multiply_ff (size_t vlen);
+
+ size_t d_vlen;
+
+ public:
+ virtual int work (int noutput_items,
+ gr_vector_const_void_star &input_items,
+ gr_vector_void_star &output_items);
+};
+
+
+#endif /* INCLUDED_GR_MULTIPLY_FF_H */
diff --git a/gnuradio-core/src/lib/general/gr_multiply_ff.i b/gnuradio-core/src/lib/general/gr_multiply_ff.i
new file mode 100644
index 000000000..0f06301f2
--- /dev/null
+++ b/gnuradio-core/src/lib/general/gr_multiply_ff.i
@@ -0,0 +1,32 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2012 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU Radio
+ *
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street,
+ * Boston, MA 02110-1301, USA.
+ */
+
+GR_SWIG_BLOCK_MAGIC(gr,multiply_ff)
+
+gr_multiply_ff_sptr
+gr_make_multiply_ff (size_t vlen=1);
+
+class gr_multiply_ff : public gr_sync_block
+{
+public:
+
+};
diff --git a/gnuradio-core/src/lib/general/gr_short_to_char.cc b/gnuradio-core/src/lib/general/gr_short_to_char.cc
new file mode 100644
index 000000000..a3c096e6d
--- /dev/null
+++ b/gnuradio-core/src/lib/general/gr_short_to_char.cc
@@ -0,0 +1,67 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2011,2012 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU Radio
+ *
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <gr_short_to_char.h>
+#include <gr_io_signature.h>
+#include <volk/volk.h>
+
+gr_short_to_char_sptr
+gr_make_short_to_char (size_t vlen)
+{
+ return gnuradio::get_initial_sptr(new gr_short_to_char (vlen));
+}
+
+gr_short_to_char::gr_short_to_char (size_t vlen)
+ : gr_sync_block ("gr_short_to_char",
+ gr_make_io_signature (1, 1, sizeof (short)*vlen),
+ gr_make_io_signature (1, 1, sizeof (char)*vlen)),
+ d_vlen(vlen)
+{
+ const int alignment_multiple =
+ volk_get_alignment() / sizeof(char);
+ set_alignment(alignment_multiple);
+}
+
+int
+gr_short_to_char::work (int noutput_items,
+ gr_vector_const_void_star &input_items,
+ gr_vector_void_star &output_items)
+{
+ const int16_t *in = (const int16_t *) input_items[0];
+ int8_t *out = (int8_t *) output_items[0];
+
+ if(is_unaligned()) {
+ volk_16i_convert_8i_u(out, in, d_vlen*noutput_items);
+ }
+ else {
+ volk_16i_convert_8i_a(out, in, d_vlen*noutput_items);
+ }
+
+ return noutput_items;
+}
+
+
+
diff --git a/gnuradio-core/src/lib/general/gr_short_to_char.h b/gnuradio-core/src/lib/general/gr_short_to_char.h
new file mode 100644
index 000000000..9682d86ec
--- /dev/null
+++ b/gnuradio-core/src/lib/general/gr_short_to_char.h
@@ -0,0 +1,56 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2011,2012 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU Radio
+ *
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifndef INCLUDED_GR_SHORT_TO_CHAR_H
+#define INCLUDED_GR_SHORT_TO_CHAR_H
+
+#include <gr_core_api.h>
+#include <gr_sync_block.h>
+
+class gr_short_to_char;
+typedef boost::shared_ptr<gr_short_to_char> gr_short_to_char_sptr;
+
+GR_CORE_API gr_short_to_char_sptr
+gr_make_short_to_char (size_t vlen=1);
+
+/*!
+ * \brief Convert stream of short to a stream of float
+ * \ingroup converter_blk
+ */
+
+class GR_CORE_API gr_short_to_char : public gr_sync_block
+{
+ private:
+ friend GR_CORE_API gr_short_to_char_sptr
+ gr_make_short_to_char (size_t vlen);
+ gr_short_to_char (size_t vlen);
+
+ size_t d_vlen;
+
+ public:
+ virtual int work (int noutput_items,
+ gr_vector_const_void_star &input_items,
+ gr_vector_void_star &output_items);
+};
+
+
+#endif /* INCLUDED_GR_SHORT_TO_CHAR_H */
diff --git a/gnuradio-core/src/lib/general/gr_short_to_char.i b/gnuradio-core/src/lib/general/gr_short_to_char.i
new file mode 100644
index 000000000..330a4fdda
--- /dev/null
+++ b/gnuradio-core/src/lib/general/gr_short_to_char.i
@@ -0,0 +1,31 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2011,2012 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU Radio
+ *
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street,
+ * Boston, MA 02110-1301, USA.
+ */
+
+GR_SWIG_BLOCK_MAGIC(gr,short_to_char)
+
+gr_short_to_char_sptr
+gr_make_short_to_char (size_t vlen=1);
+
+class gr_short_to_char : public gr_sync_block
+{
+
+};
diff --git a/gnuradio-core/src/lib/general/gr_short_to_float.cc b/gnuradio-core/src/lib/general/gr_short_to_float.cc
index 7b80953ac..94d376a27 100644
--- a/gnuradio-core/src/lib/general/gr_short_to_float.cc
+++ b/gnuradio-core/src/lib/general/gr_short_to_float.cc
@@ -1,6 +1,6 @@
/* -*- c++ -*- */
/*
- * Copyright 2004,2010 Free Software Foundation, Inc.
+ * Copyright 2004,2010,2012 Free Software Foundation, Inc.
*
* This file is part of GNU Radio
*
@@ -26,19 +26,35 @@
#include <gr_short_to_float.h>
#include <gr_io_signature.h>
-#include <gri_short_to_float.h>
+#include <volk/volk.h>
gr_short_to_float_sptr
-gr_make_short_to_float ()
+gr_make_short_to_float (size_t vlen, float scale)
{
- return gnuradio::get_initial_sptr(new gr_short_to_float ());
+ return gnuradio::get_initial_sptr(new gr_short_to_float (vlen, scale));
}
-gr_short_to_float::gr_short_to_float ()
+gr_short_to_float::gr_short_to_float (size_t vlen, float scale)
: gr_sync_block ("gr_short_to_float",
- gr_make_io_signature (1, 1, sizeof (short)),
- gr_make_io_signature (1, 1, sizeof (float)))
+ gr_make_io_signature (1, 1, sizeof (short)*vlen),
+ gr_make_io_signature (1, 1, sizeof (float)*vlen)),
+ d_vlen(vlen), d_scale(scale)
{
+ const int alignment_multiple =
+ volk_get_alignment() / sizeof(float);
+ set_alignment(alignment_multiple);
+}
+
+float
+gr_short_to_float::scale() const
+{
+ return d_scale;
+}
+
+void
+gr_short_to_float::set_scale(float scale)
+{
+ d_scale = scale;
}
int
@@ -49,8 +65,12 @@ gr_short_to_float::work (int noutput_items,
const short *in = (const short *) input_items[0];
float *out = (float *) output_items[0];
- gri_short_to_float (in, out, noutput_items);
-
+ if(is_unaligned()) {
+ volk_16i_s32f_convert_32f_u(out, in, d_scale, d_vlen*noutput_items);
+ }
+ else {
+ volk_16i_s32f_convert_32f_a(out, in, d_scale, d_vlen*noutput_items);
+ }
return noutput_items;
}
diff --git a/gnuradio-core/src/lib/general/gr_short_to_float.h b/gnuradio-core/src/lib/general/gr_short_to_float.h
index b40c966ea..efdc81ecd 100644
--- a/gnuradio-core/src/lib/general/gr_short_to_float.h
+++ b/gnuradio-core/src/lib/general/gr_short_to_float.h
@@ -1,6 +1,6 @@
/* -*- c++ -*- */
/*
- * Copyright 2004 Free Software Foundation, Inc.
+ * Copyright 2004,2012 Free Software Foundation, Inc.
*
* This file is part of GNU Radio
*
@@ -30,7 +30,7 @@ class gr_short_to_float;
typedef boost::shared_ptr<gr_short_to_float> gr_short_to_float_sptr;
GR_CORE_API gr_short_to_float_sptr
-gr_make_short_to_float ();
+gr_make_short_to_float (size_t vlen=1, float scale=1);
/*!
* \brief Convert stream of short to a stream of float
@@ -39,10 +39,18 @@ gr_make_short_to_float ();
class GR_CORE_API gr_short_to_float : public gr_sync_block
{
- friend GR_CORE_API gr_short_to_float_sptr gr_make_short_to_float ();
- gr_short_to_float ();
-
+ private:
+ friend GR_CORE_API gr_short_to_float_sptr
+ gr_make_short_to_float (size_t vlen, float scale);
+ gr_short_to_float (size_t vlen, float scale);
+
+ size_t d_vlen;
+ float d_scale;
+
public:
+ float scale() const;
+ void set_scale(float scale);
+
virtual int work (int noutput_items,
gr_vector_const_void_star &input_items,
gr_vector_void_star &output_items);
diff --git a/gnuradio-core/src/lib/general/gr_short_to_float.i b/gnuradio-core/src/lib/general/gr_short_to_float.i
index 56759df29..229618890 100644
--- a/gnuradio-core/src/lib/general/gr_short_to_float.i
+++ b/gnuradio-core/src/lib/general/gr_short_to_float.i
@@ -1,6 +1,6 @@
/* -*- c++ -*- */
/*
- * Copyright 2004 Free Software Foundation, Inc.
+ * Copyright 2004,2012 Free Software Foundation, Inc.
*
* This file is part of GNU Radio
*
@@ -22,9 +22,12 @@
GR_SWIG_BLOCK_MAGIC(gr,short_to_float)
-gr_short_to_float_sptr gr_make_short_to_float ();
+gr_short_to_float_sptr
+gr_make_short_to_float (size_t vlen=1, float scale=1);
class gr_short_to_float : public gr_sync_block
{
- gr_short_to_float ();
+public:
+ float scale() const;
+ void set_scale(float scale);
};
diff --git a/gnuradio-core/src/lib/general/gri_float_to_int.cc b/gnuradio-core/src/lib/general/gri_float_to_int.cc
index 5271e60e2..0b0b10dfe 100644
--- a/gnuradio-core/src/lib/general/gri_float_to_int.cc
+++ b/gnuradio-core/src/lib/general/gri_float_to_int.cc
@@ -34,10 +34,10 @@ static const int64_t MIN_INT = -2147483647; // -(2^31)-1
void
-gri_float_to_int (const float *in, int *out, int nsamples)
+gri_float_to_int (const float *in, int *out, float scale, int nsamples)
{
for (int i = 0; i < nsamples; i++){
- int64_t r = llrintf(in[i]);
+ int64_t r = llrintf(scale * in[i]);
if (r < MIN_INT)
r = MIN_INT;
else if (r > MAX_INT)
diff --git a/gnuradio-core/src/lib/general/gri_float_to_int.h b/gnuradio-core/src/lib/general/gri_float_to_int.h
index a2f6ea877..d8b98efc1 100644
--- a/gnuradio-core/src/lib/general/gri_float_to_int.h
+++ b/gnuradio-core/src/lib/general/gri_float_to_int.h
@@ -28,6 +28,6 @@
/*!
* convert array of floats to int with rounding and saturation.
*/
-GR_CORE_API void gri_float_to_int (const float *in, int *out, int nsamples);
+GR_CORE_API void gri_float_to_int (const float *in, int *out, float scale, int nsamples);
#endif /* INCLUDED_GRI_FLOAT_TO_INT_H */
diff --git a/gnuradio-core/src/lib/gengen/CMakeLists.txt b/gnuradio-core/src/lib/gengen/CMakeLists.txt
index a7292f131..98b149e97 100644
--- a/gnuradio-core/src/lib/gengen/CMakeLists.txt
+++ b/gnuradio-core/src/lib/gengen/CMakeLists.txt
@@ -86,10 +86,10 @@ expand_h_cc_i(gr_noise_source_X s i f c)
expand_h_cc_i(gr_sig_source_X s i f c)
expand_h_cc_i(gr_add_const_XX ss ii ff cc sf)
-expand_h_cc_i(gr_multiply_const_XX ss ii ff cc)
-expand_h_cc_i(gr_add_XX ss ii ff cc)
+expand_h_cc_i(gr_multiply_const_XX ss ii)
+expand_h_cc_i(gr_add_XX ss ii cc)
expand_h_cc_i(gr_sub_XX ss ii ff cc)
-expand_h_cc_i(gr_multiply_XX ss ii ff cc)
+expand_h_cc_i(gr_multiply_XX ss ii)
expand_h_cc_i(gr_divide_XX ss ii ff cc)
expand_h_cc_i(gr_mute_XX ss ii ff cc)
expand_h_cc_i(gr_add_const_vXX ss ii ff cc)
diff --git a/gnuradio-core/src/lib/gengen/Makefile.gen b/gnuradio-core/src/lib/gengen/Makefile.gen
index 1c529803c..db260585f 100644
--- a/gnuradio-core/src/lib/gengen/Makefile.gen
+++ b/gnuradio-core/src/lib/gengen/Makefile.gen
@@ -12,7 +12,6 @@ GENERATED_H = \
gr_add_const_vff.h \
gr_add_const_vii.h \
gr_add_const_vss.h \
- gr_add_ff.h \
gr_add_ii.h \
gr_add_ss.h \
gr_and_bb.h \
@@ -45,16 +44,12 @@ GENERATED_H = \
gr_moving_average_ff.h \
gr_moving_average_ii.h \
gr_moving_average_ss.h \
- gr_multiply_cc.h \
- gr_multiply_const_cc.h \
- gr_multiply_const_ff.h \
gr_multiply_const_ii.h \
gr_multiply_const_ss.h \
gr_multiply_const_vcc.h \
gr_multiply_const_vff.h \
gr_multiply_const_vii.h \
gr_multiply_const_vss.h \
- gr_multiply_ff.h \
gr_multiply_ii.h \
gr_multiply_ss.h \
gr_mute_cc.h \
@@ -117,7 +112,6 @@ GENERATED_I = \
gr_add_const_vff.i \
gr_add_const_vii.i \
gr_add_const_vss.i \
- gr_add_ff.i \
gr_add_ii.i \
gr_add_ss.i \
gr_and_bb.i \
@@ -150,16 +144,12 @@ GENERATED_I = \
gr_moving_average_ff.i \
gr_moving_average_ii.i \
gr_moving_average_ss.i \
- gr_multiply_cc.i \
- gr_multiply_const_cc.i \
- gr_multiply_const_ff.i \
gr_multiply_const_ii.i \
gr_multiply_const_ss.i \
gr_multiply_const_vcc.i \
gr_multiply_const_vff.i \
gr_multiply_const_vii.i \
gr_multiply_const_vss.i \
- gr_multiply_ff.i \
gr_multiply_ii.i \
gr_multiply_ss.i \
gr_mute_cc.i \
@@ -222,7 +212,6 @@ GENERATED_CC = \
gr_add_const_vff.cc \
gr_add_const_vii.cc \
gr_add_const_vss.cc \
- gr_add_ff.cc \
gr_add_ii.cc \
gr_add_ss.cc \
gr_and_bb.cc \
@@ -255,16 +244,12 @@ GENERATED_CC = \
gr_moving_average_ff.cc \
gr_moving_average_ii.cc \
gr_moving_average_ss.cc \
- gr_multiply_cc.cc \
- gr_multiply_const_cc.cc \
- gr_multiply_const_ff.cc \
gr_multiply_const_ii.cc \
gr_multiply_const_ss.cc \
gr_multiply_const_vcc.cc \
gr_multiply_const_vff.cc \
gr_multiply_const_vii.cc \
gr_multiply_const_vss.cc \
- gr_multiply_ff.cc \
gr_multiply_ii.cc \
gr_multiply_ss.cc \
gr_mute_cc.cc \
diff --git a/gnuradio-core/src/lib/gengen/generate_common.py b/gnuradio-core/src/lib/gengen/generate_common.py
index 9bd6bcc9c..6da2044e0 100755
--- a/gnuradio-core/src/lib/gengen/generate_common.py
+++ b/gnuradio-core/src/lib/gengen/generate_common.py
@@ -41,10 +41,7 @@ reg_signatures = ['ss', 'ii', 'ff', 'cc']
reg_roots = [
'gr_add_const_XX',
- 'gr_multiply_const_XX',
- 'gr_add_XX',
'gr_sub_XX',
- 'gr_multiply_XX',
'gr_divide_XX',
'gr_mute_XX',
'gr_add_const_vXX',
@@ -66,7 +63,10 @@ others = (
('gr_sample_and_hold_XX', ('bb','ss','ii','ff')),
('gr_argmax_XX', ('fs','is','ss')),
('gr_max_XX', ('ff','ii','ss')),
- ('gr_peak_detector_XX', ('fb','ib','sb'))
+ ('gr_peak_detector_XX', ('fb','ib','sb')),
+ ('gr_multiply_XX', ('ss','ii')),
+ ('gr_multiply_const_XX', ('ss','ii')),
+ ('gr_add_XX', ('ss','cc','ii'))
)
diff --git a/gnuradio-core/src/lib/runtime/gr_block.cc b/gnuradio-core/src/lib/runtime/gr_block.cc
index 9463869f5..78f77486b 100644
--- a/gnuradio-core/src/lib/runtime/gr_block.cc
+++ b/gnuradio-core/src/lib/runtime/gr_block.cc
@@ -34,6 +34,9 @@ gr_block::gr_block (const std::string &name,
gr_io_signature_sptr output_signature)
: gr_basic_block(name, input_signature, output_signature),
d_output_multiple (1),
+ d_output_multiple_set(false),
+ d_unaligned(0),
+ d_is_unaligned(false),
d_relative_rate (1.0),
d_history(1),
d_fixed_rate(false),
@@ -75,10 +78,37 @@ gr_block::set_output_multiple (int multiple)
if (multiple < 1)
throw std::invalid_argument ("gr_block::set_output_multiple");
+ d_output_multiple_set = true;
d_output_multiple = multiple;
}
void
+gr_block::set_alignment (int multiple)
+{
+ if (multiple < 1)
+ throw std::invalid_argument ("gr_block::set_alignment_multiple");
+
+ d_output_multiple = multiple;
+}
+
+void
+gr_block::set_unaligned (int na)
+{
+ // unaligned value must be less than 0 and it doesn't make sense
+ // that it's larger than the alignment value.
+ if ((na < 0) || (na > d_output_multiple))
+ throw std::invalid_argument ("gr_block::set_unaligned");
+
+ d_unaligned = na;
+}
+
+void
+gr_block::set_is_unaligned (bool u)
+{
+ d_is_unaligned = u;
+}
+
+void
gr_block::set_relative_rate (double relative_rate)
{
if (relative_rate < 0.0)
diff --git a/gnuradio-core/src/lib/runtime/gr_block.h b/gnuradio-core/src/lib/runtime/gr_block.h
index 86e0583e9..9171942e0 100644
--- a/gnuradio-core/src/lib/runtime/gr_block.h
+++ b/gnuradio-core/src/lib/runtime/gr_block.h
@@ -152,8 +152,33 @@ class GR_CORE_API gr_block : public gr_basic_block {
*/
void set_output_multiple (int multiple);
int output_multiple () const { return d_output_multiple; }
+ bool output_multiple_set () const { return d_output_multiple_set; }
/*!
+ * \brief Constrains buffers to work on a set item alignment (for SIMD)
+ *
+ * set_alignment_multiple causes the scheduler to ensure that the noutput_items
+ * argument passed to forecast and general_work will be an integer multiple
+ * of \param multiple The default value is 1.
+ *
+ * This control is similar to the output_multiple setting, except
+ * that if the number of items passed to the block is less than the
+ * output_multiple, this value is ignored and the block can produce
+ * like normal. The d_unaligned value is set to the number of items
+ * the block is off by. In the next call to general_work, the
+ * noutput_items is set to d_unaligned or less until
+ * d_unaligned==0. The buffers are now aligned again and the aligned
+ * calls can be performed again.
+ */
+ void set_alignment (int multiple);
+ int alignment () const { return d_output_multiple; }
+
+ void set_unaligned (int na);
+ int unaligned () const { return d_unaligned; }
+ void set_is_unaligned (bool u);
+ bool is_unaligned () const { return d_is_unaligned; }
+
+ /*!
* \brief Tell the scheduler \p how_many_items of input stream \p which_input were consumed.
*/
void consume (int which_input, int how_many_items);
@@ -231,6 +256,9 @@ class GR_CORE_API gr_block : public gr_basic_block {
private:
int d_output_multiple;
+ bool d_output_multiple_set;
+ int d_unaligned;
+ bool d_is_unaligned;
double d_relative_rate; // approx output_rate / input_rate
gr_block_detail_sptr d_detail; // implementation details
unsigned d_history;
diff --git a/gnuradio-core/src/lib/runtime/gr_block_executor.cc b/gnuradio-core/src/lib/runtime/gr_block_executor.cc
index ef53baf78..86289695a 100644
--- a/gnuradio-core/src/lib/runtime/gr_block_executor.cc
+++ b/gnuradio-core/src/lib/runtime/gr_block_executor.cc
@@ -183,6 +183,8 @@ gr_block_executor::run_one_iteration()
int noutput_items;
int max_items_avail;
int max_noutput_items = d_max_noutput_items;
+ int new_alignment=0;
+ int alignment_state=-1;
gr_block *m = d_block.get();
gr_block_detail *d = m->detail().get();
@@ -307,7 +309,11 @@ gr_block_executor::run_one_iteration()
// try to work it forward starting with max_items_avail.
// We want to try to consume all the input we've got.
int reqd_noutput_items = m->fixed_rate_ninput_to_noutput(max_items_avail);
- reqd_noutput_items = round_up(reqd_noutput_items, m->output_multiple());
+
+ // only test this if we specifically set the output_multiple
+ if(m->output_multiple_set())
+ reqd_noutput_items = round_down(reqd_noutput_items, m->output_multiple());
+
if (reqd_noutput_items > 0 && reqd_noutput_items <= noutput_items)
noutput_items = reqd_noutput_items;
@@ -316,6 +322,41 @@ gr_block_executor::run_one_iteration()
}
noutput_items = std::min(noutput_items, max_noutput_items);
+ // Check if we're still unaligned; use up items until we're
+ // aligned again. Otherwise, make sure we set the alignment
+ // requirement.
+ if(!m->output_multiple_set()) {
+ if(m->is_unaligned()) {
+ // When unaligned, don't just set noutput_items to the remaining
+ // samples to meet alignment; this causes too much overhead in
+ // requiring a premature call back here. Set the maximum amount
+ // of samples to handle unalignment and get us back aligned.
+ if(noutput_items >= m->unaligned()) {
+ noutput_items = round_up(noutput_items, m->alignment()) \
+ - (m->alignment() - m->unaligned());
+ new_alignment = 0;
+ }
+ else {
+ new_alignment = m->unaligned() - noutput_items;
+ }
+ alignment_state = 0;
+ }
+ else if(noutput_items < m->alignment()) {
+ // if we don't have enough for an aligned call, keep track of
+ // misalignment, set unaligned flag, and proceed.
+ new_alignment = m->alignment() - noutput_items;
+ m->set_unaligned(new_alignment);
+ m->set_is_unaligned(true);
+ alignment_state = 1;
+ }
+ else {
+ // enough to round down to the nearest alignment and process.
+ noutput_items = round_down(noutput_items, m->alignment());
+ m->set_is_unaligned(false);
+ alignment_state = 2;
+ }
+ }
+
// ask the block how much input they need to produce noutput_items
m->forecast (noutput_items, d_ninput_items_required);
@@ -354,6 +395,12 @@ gr_block_executor::run_one_iteration()
goto were_done;
}
+ // If we were made unaligned in this round but return here without
+ // processing; reset the unalignment claim before next entry.
+ if(alignment_state == 1) {
+ m->set_unaligned(0);
+ m->set_is_unaligned(false);
+ }
return BLKD_IN;
}
@@ -379,6 +426,12 @@ gr_block_executor::run_one_iteration()
LOG(*d_log << " general_work: noutput_items = " << noutput_items
<< " result = " << n << std::endl);
+ // Adjust number of unaligned items left to process
+ if(m->is_unaligned()) {
+ m->set_unaligned(new_alignment);
+ m->set_is_unaligned(m->unaligned() != 0);
+ }
+
if(!propagate_tags(m->tag_propagation_policy(), d,
d_start_nitems_read, m->relative_rate(),
d_returned_tags))
diff --git a/gnuradio-core/src/python/gnuradio/gr/qa_add_and_friends.py b/gnuradio-core/src/python/gnuradio/gr/qa_add_and_friends.py
index 8fb70fb3f..e3b20c3c3 100755
--- a/gnuradio-core/src/python/gnuradio/gr/qa_add_and_friends.py
+++ b/gnuradio-core/src/python/gnuradio/gr/qa_add_and_friends.py
@@ -78,6 +78,24 @@ class test_add_and_friends (gr_unittest.TestCase):
op = gr.multiply_const_ii (5)
self.help_ii ((src_data,), expected_result, op)
+ def test_mult_const_ff (self):
+ src_data = (-1, 0, 1, 2, 3)
+ expected_result = (-5, 0, 5, 10, 15)
+ op = gr.multiply_const_cc (5)
+ self.help_cc ((src_data,), expected_result, op)
+
+ def test_mult_const_cc (self):
+ src_data = (-1-1j, 0+0j, 1+1j, 2+2j, 3+3j)
+ expected_result = (-5-5j, 0+0j, 5+5j, 10+10j, 15+15j)
+ op = gr.multiply_const_cc (5)
+ self.help_cc ((src_data,), expected_result, op)
+
+ def test_mult_const_cc2 (self):
+ src_data = (-1-1j, 0+0j, 1+1j, 2+2j, 3+3j)
+ expected_result = (-3-7j, 0+0j, 3+7j, 6+14j, 9+21j)
+ op = gr.multiply_const_cc (5+2j)
+ self.help_cc ((src_data,), expected_result, op)
+
def test_add_ii (self):
src1_data = (1, 2, 3, 4, 5)
src2_data = (8, -3, 4, 8, 2)
@@ -94,6 +112,22 @@ class test_add_and_friends (gr_unittest.TestCase):
self.help_ii ((src1_data, src2_data),
expected_result, op)
+ def test_mult_ff (self):
+ src1_data = (1, 2, 3, 4, 5)
+ src2_data = (8, -3, 4, 8, 2)
+ expected_result = (8, -6, 12, 32, 10)
+ op = gr.multiply_ff ()
+ self.help_ff ((src1_data, src2_data),
+ expected_result, op)
+
+ def test_mult_cc (self):
+ src1_data = (1+1j, 2+2j, 3+3j, 4+4j, 5+5j)
+ src2_data = (8, -3, 4, 8, 2)
+ expected_result = (8+8j, -6-6j, 12+12j, 32+32j, 10+10j)
+ op = gr.multiply_cc ()
+ self.help_cc ((src1_data, src2_data),
+ expected_result, op)
+
def test_sub_ii_1 (self):
src1_data = (1, 2, 3, 4, 5)
expected_result = (-1, -2, -3, -4, -5)
diff --git a/gnuradio-core/src/python/gnuradio/gr/qa_complex_to_xxx.py b/gnuradio-core/src/python/gnuradio/gr/qa_complex_to_xxx.py
index 76627247b..01679dc05 100755
--- a/gnuradio-core/src/python/gnuradio/gr/qa_complex_to_xxx.py
+++ b/gnuradio-core/src/python/gnuradio/gr/qa_complex_to_xxx.py
@@ -134,7 +134,7 @@ class test_complex_ops (gr_unittest.TestCase):
self.tb.run ()
actual_result = dst.data ()
- self.assertFloatTuplesAlmostEqual (expected_result, actual_result, 5)
+ self.assertFloatTuplesAlmostEqual (expected_result, actual_result, 3)
if __name__ == '__main__':
diff --git a/gnuradio-core/src/python/gnuradio/gr/qa_conjugate.py b/gnuradio-core/src/python/gnuradio/gr/qa_conjugate.py
new file mode 100644
index 000000000..c07902a5a
--- /dev/null
+++ b/gnuradio-core/src/python/gnuradio/gr/qa_conjugate.py
@@ -0,0 +1,53 @@
+#!/usr/bin/env python
+#
+# Copyright 2012 Free Software Foundation, Inc.
+#
+# This file is part of GNU Radio
+#
+# GNU Radio is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GNU Radio is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GNU Radio; see the file COPYING. If not, write to
+# the Free Software Foundation, Inc., 51 Franklin Street,
+# Boston, MA 02110-1301, USA.
+#
+
+from gnuradio import gr, gr_unittest
+
+class test_conjugate (gr_unittest.TestCase):
+
+ def setUp (self):
+ self.tb = gr.top_block ()
+
+ def tearDown (self):
+ self.tb = None
+
+ def test_000 (self):
+ src_data = (-2-2j, -1-1j, -2+2j, -1+1j,
+ 2-2j, 1-1j, 2+2j, 1+1j,
+ 0+0j)
+
+ exp_data = (-2+2j, -1+1j, -2-2j, -1-1j,
+ 2+2j, 1+1j, 2-2j, 1-1j,
+ 0-0j)
+
+ src = gr.vector_source_c(src_data)
+ op = gr.conjugate_cc ()
+ dst = gr.vector_sink_c ()
+
+ self.tb.connect(src, op)
+ self.tb.connect(op, dst)
+ self.tb.run()
+ result_data = dst.data ()
+ self.assertEqual (exp_data, result_data)
+
+if __name__ == '__main__':
+ gr_unittest.run(test_conjugate, "test_conjugate.xml")
diff --git a/gnuradio-core/src/python/gnuradio/gr/qa_float_to_char.py b/gnuradio-core/src/python/gnuradio/gr/qa_float_to_char.py
new file mode 100755
index 000000000..ecdd36228
--- /dev/null
+++ b/gnuradio-core/src/python/gnuradio/gr/qa_float_to_char.py
@@ -0,0 +1,82 @@
+#!/usr/bin/env python
+#
+# Copyright 2011,2012 Free Software Foundation, Inc.
+#
+# This file is part of GNU Radio
+#
+# GNU Radio is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GNU Radio is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GNU Radio; see the file COPYING. If not, write to
+# the Free Software Foundation, Inc., 51 Franklin Street,
+# Boston, MA 02110-1301, USA.
+#
+
+from gnuradio import gr, gr_unittest
+class test_float_to_char (gr_unittest.TestCase):
+
+ def setUp (self):
+ self.tb = gr.top_block ()
+
+ def tearDown (self):
+ self.tb = None
+
+ def test_001(self):
+
+ src_data = (0.0, 1.1, 2.2, 3.3, 4.4, 5.5, -1.1, -2.2, -3.3)
+ expected_result = [0, 1, 2, 3, 4, 5, 255, 254, 253]
+ src = gr.vector_source_f(src_data)
+ op = gr.float_to_char()
+ dst = gr.vector_sink_b()
+
+ self.tb.connect(src, op, dst)
+ self.tb.run()
+ result_data = list(dst.data())
+
+ self.assertEqual(expected_result, result_data)
+
+ def test_002(self):
+
+ src_data = ( 126.0, 127.0, 128.0)
+ expected_result = [ 126, 127, 127 ]
+
+ src = gr.vector_source_f(src_data)
+ op = gr.float_to_char()
+ # Note: vector_sink_b returns uchar
+ dst = gr.vector_sink_b()
+
+ self.tb.connect(src, op, dst)
+ self.tb.run()
+ result_data = list(dst.data())
+
+ self.assertEqual(expected_result, result_data)
+
+ def test_003(self):
+
+ scale = 2
+ vlen = 3
+ src_data = (0.0, 1.1, 2.2, 3.3, 4.4, 5.5, -1.1, -2.2, -3.3)
+ expected_result = [0, 2, 4, 6, 8, 11, 254, 252, 250]
+ src = gr.vector_source_f(src_data)
+ s2v = gr.stream_to_vector(gr.sizeof_float, vlen)
+ op = gr.float_to_char(vlen, scale)
+ v2s = gr.vector_to_stream(gr.sizeof_char, vlen)
+ dst = gr.vector_sink_b()
+
+ self.tb.connect(src, s2v, op, v2s, dst)
+ self.tb.run()
+ result_data = list(dst.data())
+
+ self.assertEqual(expected_result, result_data)
+
+if __name__ == '__main__':
+ gr_unittest.run(test_float_to_char, "test_float_to_char.xml")
+
diff --git a/gnuradio-core/src/python/gnuradio/gr/qa_float_to_int.py b/gnuradio-core/src/python/gnuradio/gr/qa_float_to_int.py
index 3e0b847a2..977a8518d 100644..100755
--- a/gnuradio-core/src/python/gnuradio/gr/qa_float_to_int.py
+++ b/gnuradio-core/src/python/gnuradio/gr/qa_float_to_int.py
@@ -33,7 +33,8 @@ class test_float_to_int (gr_unittest.TestCase):
def test_001(self):
src_data = (0.0, 1.1, 2.2, 3.3, 4.4, 5.5, -1.1, -2.2, -3.3, -4.4, -5.5)
- expected_result = [int(round(s)) for s in src_data]
+ expected_result = [0, 1, 2, 3, 4, 6, -1, -2, -3, -4, -6]
+
src = gr.vector_source_f(src_data)
op = gr.float_to_int()
dst = gr.vector_sink_i()
@@ -60,6 +61,25 @@ class test_float_to_int (gr_unittest.TestCase):
self.assertEqual(expected_result, result_data)
+
+ def test_003(self):
+
+ scale = 2
+ vlen = 3
+ src_data = (0.0, 1.1, 2.2, 3.3, 4.4, 5.5, -1.1, -2.2, -3.3)
+ expected_result = [0, 2, 4, 7, 9, 11, -2, -4, -7,]
+ src = gr.vector_source_f(src_data)
+ s2v = gr.stream_to_vector(gr.sizeof_float, vlen)
+ op = gr.float_to_int(vlen, scale)
+ v2s = gr.vector_to_stream(gr.sizeof_int, vlen)
+ dst = gr.vector_sink_i()
+
+ self.tb.connect(src, s2v, op, v2s, dst)
+ self.tb.run()
+ result_data = list(dst.data())
+
+ self.assertEqual(expected_result, result_data)
+
if __name__ == '__main__':
gr_unittest.run(test_float_to_int, "test_float_to_int.xml")
diff --git a/gnuradio-core/src/python/gnuradio/gr/qa_float_to_short.py b/gnuradio-core/src/python/gnuradio/gr/qa_float_to_short.py
new file mode 100755
index 000000000..0d89a149c
--- /dev/null
+++ b/gnuradio-core/src/python/gnuradio/gr/qa_float_to_short.py
@@ -0,0 +1,86 @@
+#!/usr/bin/env python
+#
+# Copyright 2011,2012 Free Software Foundation, Inc.
+#
+# This file is part of GNU Radio
+#
+# GNU Radio is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GNU Radio is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GNU Radio; see the file COPYING. If not, write to
+# the Free Software Foundation, Inc., 51 Franklin Street,
+# Boston, MA 02110-1301, USA.
+#
+
+from gnuradio import gr, gr_unittest
+import ctypes
+
+class test_float_to_short (gr_unittest.TestCase):
+
+ def setUp (self):
+ self.tb = gr.top_block ()
+
+ def tearDown (self):
+ self.tb = None
+
+ def test_001(self):
+
+ src_data = (0.0, 1.1, 2.2, 3.3, 4.4, 5.5, -1.1, -2.2, -3.3, -4.4, -5.5)
+ expected_result = [0, 1, 2, 3, 4, 6, -1, -2, -3, -4, -6]
+
+ src = gr.vector_source_f(src_data)
+ op = gr.float_to_short()
+ dst = gr.vector_sink_s()
+
+ self.tb.connect(src, op, dst)
+ self.tb.run()
+ result_data = list(dst.data())
+
+ self.assertEqual(expected_result, result_data)
+
+ def test_002(self):
+
+ src_data = ( 32766, 32767, 32768,
+ -32767, -32768, -32769)
+ expected_result = [ 32766, 32767, 32767,
+ -32767, -32768, -32768 ]
+
+ src = gr.vector_source_f(src_data)
+ op = gr.float_to_short()
+ dst = gr.vector_sink_s()
+
+ self.tb.connect(src, op, dst)
+ self.tb.run()
+ result_data = list(dst.data())
+
+ self.assertEqual(expected_result, result_data)
+
+ def test_003(self):
+
+ scale = 2
+ vlen = 3
+ src_data = (0.0, 1.1, 2.2, 3.3, 4.4, 5.5, -1.1, -2.2, -3.3)
+ expected_result = [0, 2, 4, 7, 9, 11, -2, -4, -7]
+ src = gr.vector_source_f(src_data)
+ s2v = gr.stream_to_vector(gr.sizeof_float, vlen)
+ op = gr.float_to_short(vlen, scale)
+ v2s = gr.vector_to_stream(gr.sizeof_short, vlen)
+ dst = gr.vector_sink_s()
+
+ self.tb.connect(src, s2v, op, v2s, dst)
+ self.tb.run()
+ result_data = list(dst.data())
+
+ self.assertEqual(expected_result, result_data)
+
+if __name__ == '__main__':
+ gr_unittest.run(test_float_to_short, "test_float_to_short.xml")
+
diff --git a/gnuradio-core/src/python/gnuradio/gr/qa_float_to_uchar.py b/gnuradio-core/src/python/gnuradio/gr/qa_float_to_uchar.py
new file mode 100755
index 000000000..0d54f45f3
--- /dev/null
+++ b/gnuradio-core/src/python/gnuradio/gr/qa_float_to_uchar.py
@@ -0,0 +1,64 @@
+#!/usr/bin/env python
+#
+# Copyright 2011 Free Software Foundation, Inc.
+#
+# This file is part of GNU Radio
+#
+# GNU Radio is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GNU Radio is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GNU Radio; see the file COPYING. If not, write to
+# the Free Software Foundation, Inc., 51 Franklin Street,
+# Boston, MA 02110-1301, USA.
+#
+
+from gnuradio import gr, gr_unittest
+import ctypes
+
+class test_float_to_uchar (gr_unittest.TestCase):
+
+ def setUp (self):
+ self.tb = gr.top_block ()
+
+ def tearDown (self):
+ self.tb = None
+
+ def test_001(self):
+
+ src_data = (0.0, 1.1, 2.2, 3.3, 4.4, 5.5, -1.1, -2.2, -3.3, -4.4, -5.5)
+ expected_result = [0, 1, 2, 3, 4, 6, 0, 0, 0, 0, 0]
+ src = gr.vector_source_f(src_data)
+ op = gr.float_to_uchar()
+ dst = gr.vector_sink_b()
+
+ self.tb.connect(src, op, dst)
+ self.tb.run()
+ result_data = list(dst.data())
+
+ self.assertEqual(expected_result, result_data)
+
+ def test_002(self):
+
+ src_data = ( 254.0, 255.0, 256.0)
+ expected_result = [ 254, 255, 255 ]
+ src = gr.vector_source_f(src_data)
+ op = gr.float_to_uchar()
+ dst = gr.vector_sink_b()
+
+ self.tb.connect(src, op, dst)
+ self.tb.run()
+ result_data = list(dst.data())
+
+ self.assertEqual(expected_result, result_data)
+
+if __name__ == '__main__':
+ gr_unittest.run(test_float_to_uchar, "test_float_to_uchar.xml")
+
diff --git a/gnuradio-core/src/python/gnuradio/gr/qa_int_to_float.py b/gnuradio-core/src/python/gnuradio/gr/qa_int_to_float.py
index edfc26409..530b2a5cc 100755
--- a/gnuradio-core/src/python/gnuradio/gr/qa_int_to_float.py
+++ b/gnuradio-core/src/python/gnuradio/gr/qa_int_to_float.py
@@ -44,6 +44,26 @@ class test_int_to_float (gr_unittest.TestCase):
self.assertFloatTuplesAlmostEqual(expected_result, result_data)
+ def test_002(self):
+
+ vlen = 3
+ src_data = ( 65000, 65001, 65002, 65003, 65004, 65005,
+ -65001, -65002, -65003)
+ expected_result = [ 65000.0, 65001.0, 65002.0,
+ 65003.0, 65004.0, 65005.0,
+ -65001.0, -65002.0, -65003.0]
+ src = gr.vector_source_i(src_data)
+ s2v = gr.stream_to_vector(gr.sizeof_int, vlen)
+ op = gr.int_to_float(vlen)
+ v2s = gr.vector_to_stream(gr.sizeof_float, vlen)
+ dst = gr.vector_sink_f()
+
+ self.tb.connect(src, s2v, op, v2s, dst)
+ self.tb.run()
+ result_data = list(dst.data())
+
+ self.assertEqual(expected_result, result_data)
+
if __name__ == '__main__':
gr_unittest.run(test_int_to_float, "test_int_to_float.xml")
diff --git a/gnuradio-core/src/python/gnuradio/gr/qa_multiply_conjugate.py b/gnuradio-core/src/python/gnuradio/gr/qa_multiply_conjugate.py
new file mode 100644
index 000000000..aaf3cc125
--- /dev/null
+++ b/gnuradio-core/src/python/gnuradio/gr/qa_multiply_conjugate.py
@@ -0,0 +1,57 @@
+#!/usr/bin/env python
+#
+# Copyright 2012 Free Software Foundation, Inc.
+#
+# This file is part of GNU Radio
+#
+# GNU Radio is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GNU Radio is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GNU Radio; see the file COPYING. If not, write to
+# the Free Software Foundation, Inc., 51 Franklin Street,
+# Boston, MA 02110-1301, USA.
+#
+
+from gnuradio import gr, gr_unittest
+
+class test_multiply_conjugate (gr_unittest.TestCase):
+
+ def setUp (self):
+ self.tb = gr.top_block ()
+
+ def tearDown (self):
+ self.tb = None
+
+ def test_000 (self):
+ src_data0 = (-2-2j, -1-1j, -2+2j, -1+1j,
+ 2-2j, 1-1j, 2+2j, 1+1j,
+ 0+0j)
+ src_data1 = (-3-3j, -4-4j, -3+3j, -4+4j,
+ 3-3j, 4-4j, 3+3j, 4+4j,
+ 0+0j)
+
+ exp_data = (12+0j, 8+0j, 12+0j, 8+0j,
+ 12+0j, 8+0j, 12+0j, 8+0j,
+ 0+0j)
+ src0 = gr.vector_source_c(src_data0)
+ src1 = gr.vector_source_c(src_data1)
+ op = gr.multiply_conjugate_cc ()
+ dst = gr.vector_sink_c ()
+
+ self.tb.connect(src0, (op,0))
+ self.tb.connect(src1, (op,1))
+ self.tb.connect(op, dst)
+ self.tb.run()
+ result_data = dst.data ()
+ self.assertEqual (exp_data, result_data)
+
+if __name__ == '__main__':
+ gr_unittest.run(test_multiply_conjugate, "test_multiply_conjugate.xml")
diff --git a/gnuradio-core/src/python/gnuradio/gr/qa_short_to_char.py b/gnuradio-core/src/python/gnuradio/gr/qa_short_to_char.py
new file mode 100755
index 000000000..6a95fa01d
--- /dev/null
+++ b/gnuradio-core/src/python/gnuradio/gr/qa_short_to_char.py
@@ -0,0 +1,69 @@
+#!/usr/bin/env python
+#
+# Copyright 2011,2012 Free Software Foundation, Inc.
+#
+# This file is part of GNU Radio
+#
+# GNU Radio is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GNU Radio is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GNU Radio; see the file COPYING. If not, write to
+# the Free Software Foundation, Inc., 51 Franklin Street,
+# Boston, MA 02110-1301, USA.
+#
+
+from gnuradio import gr, gr_unittest
+import ctypes
+
+class test_short_to_char (gr_unittest.TestCase):
+
+ def setUp (self):
+ self.tb = gr.top_block ()
+
+ def tearDown (self):
+ self.tb = None
+
+ def test_001(self):
+
+ src_data = range(0, 32767, 32767/127)
+ src_data = [int(s) for s in src_data]
+ expected_result = range(0, 128)
+ src = gr.vector_source_s(src_data)
+ op = gr.short_to_char()
+ dst = gr.vector_sink_b()
+
+ self.tb.connect(src, op, dst)
+ self.tb.run()
+ result_data = list(dst.data())
+
+ self.assertEqual(expected_result, result_data)
+
+ def test_002(self):
+
+ vlen = 3
+ src_data = range(0, 32400, 32767/127)
+ src_data = [int(s) for s in src_data]
+ expected_result = range(0, 126)
+ src = gr.vector_source_s(src_data)
+ s2v = gr.stream_to_vector(gr.sizeof_short, vlen)
+ op = gr.short_to_char(vlen)
+ v2s = gr.vector_to_stream(gr.sizeof_char, vlen)
+ dst = gr.vector_sink_b()
+
+ self.tb.connect(src, s2v, op, v2s, dst)
+ self.tb.run()
+ result_data = list(dst.data())
+
+ self.assertEqual(expected_result, result_data)
+
+if __name__ == '__main__':
+ gr_unittest.run(test_short_to_char, "test_short_to_char.xml")
+
diff --git a/gnuradio-core/src/python/gnuradio/gr/qa_short_to_float.py b/gnuradio-core/src/python/gnuradio/gr/qa_short_to_float.py
new file mode 100755
index 000000000..8f331b495
--- /dev/null
+++ b/gnuradio-core/src/python/gnuradio/gr/qa_short_to_float.py
@@ -0,0 +1,70 @@
+#!/usr/bin/env python
+#
+# Copyright 2011,2012 Free Software Foundation, Inc.
+#
+# This file is part of GNU Radio
+#
+# GNU Radio is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GNU Radio is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GNU Radio; see the file COPYING. If not, write to
+# the Free Software Foundation, Inc., 51 Franklin Street,
+# Boston, MA 02110-1301, USA.
+#
+
+from gnuradio import gr, gr_unittest
+import ctypes
+
+class test_short_to_float (gr_unittest.TestCase):
+
+ def setUp (self):
+ self.tb = gr.top_block ()
+
+ def tearDown (self):
+ self.tb = None
+
+ def test_001(self):
+
+ src_data = (0, 1, 2, 3, 4, 5, -1, -2, -3, -4, -5)
+ expected_result = [ 0.0, 1.0, 2.0, 3.0, 4.0, 5.0,
+ -1.0, -2.0, -3.0, -4.0, -5.0]
+
+ src = gr.vector_source_s(src_data)
+ op = gr.short_to_float()
+ dst = gr.vector_sink_f()
+
+ self.tb.connect(src, op, dst)
+ self.tb.run()
+ result_data = list(dst.data())
+
+ self.assertEqual(expected_result, result_data)
+
+ def test_002(self):
+
+ vlen = 3
+ src_data = (0, 1, 2, 3, 4, 5, -1, -2, -3)
+ expected_result = [0.0, 1.0, 2.0, 3.0, 4.0,
+ 5.0, -1.0, -2.0, -3.0]
+ src = gr.vector_source_s(src_data)
+ s2v = gr.stream_to_vector(gr.sizeof_short, vlen)
+ op = gr.short_to_float(vlen)
+ v2s = gr.vector_to_stream(gr.sizeof_float, vlen)
+ dst = gr.vector_sink_f()
+
+ self.tb.connect(src, s2v, op, v2s, dst)
+ self.tb.run()
+ result_data = list(dst.data())
+
+ self.assertEqual(expected_result, result_data)
+
+if __name__ == '__main__':
+ gr_unittest.run(test_short_to_float, "test_short_to_float.xml")
+
diff --git a/gnuradio-examples/python/volk_benchmark/README b/gnuradio-examples/python/volk_benchmark/README
new file mode 100644
index 000000000..516fc15bd
--- /dev/null
+++ b/gnuradio-examples/python/volk_benchmark/README
@@ -0,0 +1,252 @@
+VOLK Benchmarking Scripts
+
+The Python programs in this directory are designed to help benchmark
+and compare Volk enhancements to GNU Radio. There are two kinds of
+scripts here: collecting data and displaying the data.
+
+Data collection is done by running a Volk testing script that will
+populate a SQLite database file (volk_results.db by default). The
+plotting utility provided here reads from the database files and plots
+bar graphs to compare the different installations.
+
+These benchmarks can be used to compare previous versions of GNU
+Radio to using Volk; they can be used to compare different Volk
+proto-kernels, as well, by editing the volk_config file; or they could
+be used to compare performance between different machines and/or
+processors.
+
+
+======================================================================
+Volk Profiling
+
+Before doing any kind of Volk benchmarking, it is important to run the
+volk_profile program. The profiler will build a config file for the
+best SIMD architecture for your processor. Run volk_profile that is
+installed into $PREFIX/bin. This program tests all known Volk kernels
+for each proto-kernel supported by the processor. When finished, it
+will write to $HOME/.volk/volk_config the best architecture for the
+VOLK function. This file is read when using a function to know the
+best version of the function to execute.
+
+The volk_config file contains a line for each kernel, where each line
+looks like:
+
+ volk_<KERNEL_NAME> <ARCHITECTURE>
+
+The architecture will be something like (sse, sse2, sse3, avx, neon,
+etc.), depending on your processor.
+
+
+======================================================================
+Benchmark Tests
+
+There are currently two benchmark scripts defined for collecting
+data. There is one that runs through the type conversions that have
+been converted to Volk (volk_types.py) and the other runs through the
+math operators converted to using Volk (volk_math.py).
+
+Script prototypes
+Both have the same structure for use:
+
+----------------------------------------------------------------------
+./volk_<test>.py [-h] -L LABEL [-D DATABASE] [-N NITEMS] [-I ITERATIONS]
+ [--tests [{0,1,2,3} [{0,1,2,3} ...]]] [--list]
+ [--all]
+
+optional arguments:
+ -h, --help show this help message and exit
+ -L LABEL, --label LABEL
+ Label of database table [default: None]
+ -D DATABASE, --database DATABASE
+ Database file to store data in [default:
+ volk_results.db]
+ -N NITEMS, --nitems NITEMS
+ Number of items per iterations [default: 1000000000.0]
+ -I ITERATIONS, --iterations ITERATIONS
+ Number of iterations [default: 20]
+ --tests [{0,1,2,3} [{0,1,2,3} ...]]
+ A list of tests to run; can be a single test or a
+ space-separated list.
+ --list List the available tests
+ --all Run all tests
+----------------------------------------------------------------------
+
+To run, you specify the tests to run and a label to store along with
+the results. To find out what the available tests are, use the
+'--list' option.
+
+To specify a subset of tests, use the '--tests' with space-separated
+list of tests numbers (e.g., --tests 0 2 4 9).
+
+Use the '--all' to run all tests.
+
+The label specified is used as an identifier for the benchmarking
+currently being done. This is required as it is important in
+organizing the data in the database (each label is its own
+table). Usually, the label will specify the type of run being done,
+such as "volk_aligned" or "v3_5_1". In these cases, the "volk_aligned"
+label says that this is for a benchmarking using the GNU Radio version
+that uses the aligned scheduler and Volk calls in the work
+functions. The "v3_5_1" label is if you were benchmarking an installed
+version 3.5.1 of GNU Radio, which is pre-Volk. These will then be
+plotted against each other to see the timing differences.
+
+The 'database' option will output the results to a new database
+file. This can be useful for separating the output of different runs
+or of different benchmarks, such as the types versus the math scripts,
+say, or to distinguish results from different computers.
+
+If rerun using the same database and label, the entries in the table
+will simply be replaced by the new results.
+
+It is often useful to use the 'sqlitebrowser' program to interrogate
+the database file farther, if you are interested in the structure or
+the raw data.
+
+Other parameters of this script set the number of items to process and
+number of iterations to use when computing the benchmarking
+data. These default to 1 billion samples per iteration over 20
+iterations. Expect a default run to take a long time. Using the '-N'
+and '-I' options can be used to change the runtime of the benchmarks
+but are set high to remove problems of variance between iterations.
+
+======================================================================
+Plotting Results
+
+The volk_plot.py script reads a given database file and plots the
+results. The default behavior is to read all of the labels stored in
+the database and plot them as data sets on a bar graph. This shows the
+average time taken to process the number of items given.
+
+The options for the plotting script are:
+
+usage: volk_plot.py [-h] [-D DATABASE] [-E] [-P {mean,min,max}] [-% table]
+
+Plot Volk performance results from a SQLite database. Run one of the volk
+tests first (e.g, volk_math.py)
+
+----------------------------------------------------------------------
+optional arguments:
+ -h, --help show this help message and exit
+ -D DATABASE, --database DATABASE
+ Database file to read data from [default:
+ volk_results.db]
+ -E, --errorbars Show error bars (1 standard dev.)
+ -P {mean,min,max}, --plot {mean,min,max}
+ Set the type of plot to produce [default: mean]
+ -% table, --percent table
+ Show percent difference to the given type [default:
+ None]
+----------------------------------------------------------------------
+
+This script allows you to specify the database used (-D), but will
+always read all rows from all tables from it and display them. You can
+also turn on plotting error bars (1 standard deviation the mean). Be
+careful, though, as some older versions of Matplotlib might have an
+issue with this option.
+
+The mean time is only one possible statistic that we might be
+interested in when looking at the data. It represents the average user
+experience when running a given block. On the other hand, the minimum
+runtime best represents the actual performance of a block given
+minimal OS interruptions while running. Right now, the data collected
+includes the mean, variance, min, and max over the number of
+iterations given. Using the '-P' option, you can specify the type of
+data to plot (mean, min, or max).
+
+Another useful way of looking at the data is to compare the percent
+improvement of a benchmark compared to another. This is done using the
+'-%' option with the provided table (or label) as the baseline. So if
+we were interested in comparing how much the 'volk_aligned' was over
+'v3_5_1', we would specify '-% v3_5_1' to see this. The plot would
+then only show the percent speedup observed using Volk for each of the
+blocks.
+
+
+======================================================================
+Benchmarking Walkthrough
+
+This will walk through an example of benchmarking the new Volk
+implementation versus the pre-Volk GNU Radio. It also shows how to
+look at the SIMD optimized versions versus the generic
+implementations.
+
+Since we introduced Volk in GNU Radio 3.5.2, we will use the following
+labels for our data:
+
+ 1.) volk_aligned: v3.5.2 with volk_profile results in .volk/volk_config
+ 2.) v3_5_2: v3.5.2 with the generic (non-SIMD) calls to Volk
+ 3.) v3_5_1: an installation of GNU Radio from version v3.5.1
+
+We assume that we have installed two versions of GNU Radio.
+
+ v3.5.2 installed into /opt/gr-3_5_2
+ v3.5.1 installed into /opt/gr-3_5_1
+
+To test cases 1 and 2 above, we have to run GNU Radio from the v3.5.2
+installation, so we set the following environmental variables. Note
+that this is written for Ubuntu 11.10. These commands and directories
+may have to be changed depending on your OS and versions.
+
+ export LD_LIBRARY_PATH=/opt/gr-3_5_2/lib
+ export LD_LIBRARY_PATH=/opt/gr-3_5_2/lib/python2.7/dist-packages
+
+Now we can run the benchmark tests, so we will focus on the math
+operators:
+
+ ./volk_math.py -D volk_results_math.db --all -L volk_aligned
+
+When this finishes, the 'volk_results_math.db' will contain our
+results for this run.
+
+We next want to run the generic, non-SIMD, calls. This can be done by
+changing the Volk kernel settings in $HOME/.volk/volk_config. First,
+make a backup of this file. Then edit it and change all architecture
+calls (sse, sse2, etc.) to 'generic.' Now, Volk will only call the
+generic versions of these functions. So we rerun the benchmark with:
+
+ ./volk_math.py -D volk_results_math.db --all -L v3_5_2
+
+Notice that the only thing changed here was the label to 'v3_5_2'.
+
+Next, we want to collect data for the non-Volk version of GNU
+Radio. This is important because some internals to GNU Radio were made
+when adding support for Volk, so it is nice to know what the
+differences do to our performance. First, we set the environmental
+variables to point to the v3.5.1 installation:
+
+ export LD_LIBRARY_PATH=/opt/gr-3_5_1/lib
+ export LD_LIBRARY_PATH=/opt/gr-3_5_1/lib/python2.7/dist-packages
+
+And when we run the test, we use the same command line, but the GNU
+Radio libraries and Python files used come from v3.5.1. We also change
+the label to indicate the different version to store.
+
+ ./volk_math.py -D volk_results_math.db --all -L v3_5_1
+
+We now have a database populated with three tables for the three
+different labels. We can plot them all together by simply running:
+
+ ./volk_plot.py -D volk_results_math.db
+
+This will show the average run times for each of the three
+configurations for all math functions tested. We might also be
+interested to see the difference in performance from the v3.5.1
+version, so we can run:
+
+ ./volk_plot.py -D volk_results_math.db -% v3_5_1
+
+That will plot both the 'volk_aligned' and 'v3_5_2' as a percentage
+improvement over v3_5_1. A positive value indicates that this version
+runs faster than the v3.5.1 version.
+
+
+----------------------------------------------------------------------
+
+Another interesting test case could be to compare results on different
+processors. So if you have different generation Intels, AMD, or
+whatever, you can simply pass the .db file around and run the Volk
+benchmark script to populate the database with different results. For
+this, you would specify a label like '-L i7_2620M' that indicates the
+processor type to uniquely ID the data.
+
diff --git a/gnuradio-examples/python/volk_benchmark/volk_math.py b/gnuradio-examples/python/volk_benchmark/volk_math.py
new file mode 100755
index 000000000..8b0081387
--- /dev/null
+++ b/gnuradio-examples/python/volk_benchmark/volk_math.py
@@ -0,0 +1,152 @@
+#!/usr/bin/env python
+
+from gnuradio import gr
+import argparse
+from volk_test_funcs import *
+
+def multiply_const_cc(N):
+ k = 3.3
+ op = gr.multiply_const_cc(k)
+ tb = helper(N, op, gr.sizeof_gr_complex, gr.sizeof_gr_complex, 1, 1)
+ return tb
+
+######################################################################
+
+def multiply_const_ff(N):
+ k = 3.3
+ op = gr.multiply_const_ff(k)
+ tb = helper(N, op, gr.sizeof_float, gr.sizeof_float, 1, 1)
+ return tb
+
+######################################################################
+
+def multiply_cc(N):
+ op = gr.multiply_cc()
+ tb = helper(N, op, gr.sizeof_gr_complex, gr.sizeof_gr_complex, 2, 1)
+ return tb
+
+######################################################################
+
+def multiply_ff(N):
+ op = gr.multiply_ff()
+ tb = helper(N, op, gr.sizeof_float, gr.sizeof_float, 2, 1)
+ return tb
+
+######################################################################
+
+def add_ff(N):
+ op = gr.add_ff()
+ tb = helper(N, op, gr.sizeof_float, gr.sizeof_float, 2, 1)
+ return tb
+
+######################################################################
+
+def conjugate_cc(N):
+ op = gr.conjugate_cc()
+ tb = helper(N, op, gr.sizeof_gr_complex, gr.sizeof_gr_complex, 1, 1)
+ return tb
+
+######################################################################
+
+def multiply_conjugate_cc(N):
+ try:
+ op = gr.multiply_conjugate_cc()
+ tb = helper(N, op, gr.sizeof_gr_complex, gr.sizeof_gr_complex, 2, 1)
+ return tb
+
+ except AttributeError:
+ class s(gr.hier_block2):
+ def __init__(self):
+ gr.hier_block2.__init__(self, "s",
+ gr.io_signature(2, 2, gr.sizeof_gr_complex),
+ gr.io_signature(1, 1, gr.sizeof_gr_complex))
+ conj = gr.conjugate_cc()
+ mult = gr.multiply_cc()
+ self.connect((self,0), (mult,0))
+ self.connect((self,1), conj, (mult,1))
+ self.connect(mult, self)
+
+ op = s()
+ tb = helper(N, op, gr.sizeof_gr_complex, gr.sizeof_gr_complex, 2, 1)
+ return tb
+
+
+######################################################################
+
+def run_tests(func, N, iters):
+ print("Running Test: {0}".format(func.__name__))
+ try:
+ tb = func(N)
+ t = timeit(tb, iters)
+ res = format_results(func.__name__, t)
+ return res
+ except AttributeError:
+ print "\tCould not run test. Skipping."
+ return None
+
+def main():
+ avail_tests = [multiply_const_cc,
+ multiply_const_ff,
+ multiply_cc,
+ multiply_ff,
+ add_ff,
+ conjugate_cc,
+ multiply_conjugate_cc]
+
+ desc='Time an operation to compare with other implementations. \
+ This program runs a simple GNU Radio flowgraph to test a \
+ particular math function, mostly to compare the \
+ Volk-optimized implementation versus a regular \
+ implementation. The results are stored to an SQLite database \
+ that can then be read by volk_plot.py to plot the differences.'
+ parser = argparse.ArgumentParser(description=desc)
+ parser.add_argument('-L', '--label', type=str,
+ required=True, default=None,
+ help='Label of database table [default: %(default)s]')
+ parser.add_argument('-D', '--database', type=str,
+ default="volk_results.db",
+ help='Database file to store data in [default: %(default)s]')
+ parser.add_argument('-N', '--nitems', type=float,
+ default=1e9,
+ help='Number of items per iterations [default: %(default)s]')
+ parser.add_argument('-I', '--iterations', type=int,
+ default=20,
+ help='Number of iterations [default: %(default)s]')
+ parser.add_argument('--tests', type=int, nargs='*',
+ choices=xrange(len(avail_tests)),
+ help='A list of tests to run; can be a single test or a \
+ space-separated list.')
+ parser.add_argument('--list', action='store_true',
+ help='List the available tests')
+ parser.add_argument('--all', action='store_true',
+ help='Run all tests')
+ args = parser.parse_args()
+
+ if(args.list):
+ print "Available Tests to Run:"
+ print "\n".join(["\t{0}: {1}".format(i,f.__name__) for i,f in enumerate(avail_tests)])
+ sys.exit(0)
+
+ N = int(args.nitems)
+ iters = args.iterations
+ label = args.label
+
+ conn = create_connection(args.database)
+ new_table(conn, label)
+
+ if not args.all:
+ func = avail_tests[args.test]
+ res = run_tests(func, N, iters)
+ if res is not None:
+ replace_results(conn, label, N, iters, res)
+ else:
+ for f in avail_tests:
+ res = run_tests(f, N, iters)
+ if res is not None:
+ replace_results(conn, label, N, iters, res)
+
+if __name__ == "__main__":
+ try:
+ main()
+ except KeyboardInterrupt:
+ pass
diff --git a/gnuradio-examples/python/volk_benchmark/volk_plot.py b/gnuradio-examples/python/volk_benchmark/volk_plot.py
new file mode 100755
index 000000000..823dfbf64
--- /dev/null
+++ b/gnuradio-examples/python/volk_benchmark/volk_plot.py
@@ -0,0 +1,169 @@
+#!/usr/bin/env python
+
+import sys, math
+import argparse
+from volk_test_funcs import *
+
+try:
+ import matplotlib
+ import matplotlib.pyplot as plt
+except ImportError:
+ sys.stderr.write("Could not import Matplotlib (http://matplotlib.sourceforge.net/)\n")
+ sys.exit(1)
+
+def main():
+ desc='Plot Volk performance results from a SQLite database. ' + \
+ 'Run one of the volk tests first (e.g, volk_math.py)'
+ parser = argparse.ArgumentParser(description=desc)
+ parser.add_argument('-D', '--database', type=str,
+ default='volk_results.db',
+ help='Database file to read data from [default: %(default)s]')
+ parser.add_argument('-E', '--errorbars',
+ action='store_true', default=False,
+ help='Show error bars (1 standard dev.)')
+ parser.add_argument('-P', '--plot', type=str,
+ choices=['mean', 'min', 'max'],
+ default='mean',
+ help='Set the type of plot to produce [default: %(default)s]')
+ parser.add_argument('-%', '--percent', type=str,
+ default=None, metavar="table",
+ help='Show percent difference to the given type [default: %(default)s]')
+ args = parser.parse_args()
+
+ # Set up global plotting properties
+ matplotlib.rcParams['figure.subplot.bottom'] = 0.2
+ matplotlib.rcParams['figure.subplot.top'] = 0.95
+ matplotlib.rcParams['figure.subplot.right'] = 0.98
+ matplotlib.rcParams['ytick.labelsize'] = 16
+ matplotlib.rcParams['xtick.labelsize'] = 16
+ matplotlib.rcParams['legend.fontsize'] = 18
+
+ # Get list of tables to compare
+ conn = create_connection(args.database)
+ tables = list_tables(conn)
+ M = len(tables)
+
+ # Colors to distinguish each table in the bar graph
+ # More than 5 tables will wrap around to the start.
+ colors = ['b', 'r', 'g', 'm', 'k']
+
+ # Set up figure for plotting
+ f0 = plt.figure(0, facecolor='w', figsize=(14,10))
+ s0 = f0.add_subplot(1,1,1)
+
+ # Create a register of names that exist in all tables
+ tmp_regs = []
+ for table in tables:
+ # Get results from the next table
+ res = get_results(conn, table[0])
+
+ tmp_regs.append(list())
+ for r in res:
+ try:
+ tmp_regs[-1].index(r['kernel'])
+ except ValueError:
+ tmp_regs[-1].append(r['kernel'])
+
+ # Get only those names that are common in all tables
+ name_reg = tmp_regs[0]
+ for t in tmp_regs[1:]:
+ name_reg = list(set(name_reg) & set(t))
+ name_reg.sort()
+
+ # Pull the data out for each table into a dictionary
+ # we can ref the table by it's name and the data associated
+ # with a given kernel in name_reg by it's name.
+ # This ensures there is no sorting issue with the data in the
+ # dictionary, so the kernels are plotted against each other.
+ table_data = dict()
+ for i,table in enumerate(tables):
+ # Get results from the next table
+ res = get_results(conn, table[0])
+
+ data = dict()
+ for r in res:
+ data[r['kernel']] = r
+
+ table_data[table[0]] = data
+
+ if args.percent is not None:
+ for i,t in enumerate(table_data):
+ if args.percent == t:
+ norm_data = []
+ for name in name_reg:
+ if(args.plot == 'max'):
+ norm_data.append(table_data[t][name]['max'])
+ elif(args.plot == 'min'):
+ norm_data.append(table_data[t][name]['min'])
+ elif(args.plot == 'mean'):
+ norm_data.append(table_data[t][name]['avg'])
+
+
+ # Plot the results
+ x0 = xrange(len(name_reg))
+ i = 0
+ for t in (table_data):
+ ydata = []
+ stds = []
+ for name in name_reg:
+ stds.append(math.sqrt(table_data[t][name]['var']))
+ if(args.plot == 'max'):
+ ydata.append(table_data[t][name]['max'])
+ elif(args.plot == 'min'):
+ ydata.append(table_data[t][name]['min'])
+ elif(args.plot == 'mean'):
+ ydata.append(table_data[t][name]['avg'])
+
+ if args.percent is not None:
+ ydata = [-100*(y-n)/y for y,n in zip(ydata,norm_data)]
+ if(args.percent != t):
+ # makes x values for this data set placement
+ # width of bars depends on number of comparisons
+ wdth = 0.80/(M-1)
+ x1 = [x + i*wdth for x in x0]
+ i += 1
+
+ s0.bar(x1, ydata, width=wdth,
+ color=colors[(i-1)%M], label=t,
+ edgecolor='k', linewidth=2)
+
+ else:
+ # makes x values for this data set placement
+ # width of bars depends on number of comparisons
+ wdth = 0.80/M
+ x1 = [x + i*wdth for x in x0]
+ i += 1
+
+ if(args.errorbars is False):
+ s0.bar(x1, ydata, width=wdth,
+ color=colors[(i-1)%M], label=t,
+ edgecolor='k', linewidth=2)
+ else:
+ s0.bar(x1, ydata, width=wdth,
+ yerr=stds,
+ color=colors[i%M], label=t,
+ edgecolor='k', linewidth=2,
+ error_kw={"ecolor": 'k', "capsize":5,
+ "linewidth":2})
+
+ nitems = res[0]['nitems']
+ if args.percent is None:
+ s0.set_ylabel("Processing time (sec) [{0:G} items]".format(nitems),
+ fontsize=22, fontweight='bold',
+ horizontalalignment='center')
+ else:
+ s0.set_ylabel("% Improvement over {0} [{1:G} items]".format(
+ args.percent, nitems),
+ fontsize=22, fontweight='bold')
+
+ s0.legend()
+ s0.set_xticks(x0)
+ s0.set_xticklabels(name_reg)
+ for label in s0.xaxis.get_ticklabels():
+ label.set_rotation(45)
+ label.set_fontsize(16)
+
+ plt.show()
+
+if __name__ == "__main__":
+ main()
diff --git a/gnuradio-examples/python/volk_benchmark/volk_test_funcs.py b/gnuradio-examples/python/volk_benchmark/volk_test_funcs.py
new file mode 100644
index 000000000..4f4e4afd3
--- /dev/null
+++ b/gnuradio-examples/python/volk_benchmark/volk_test_funcs.py
@@ -0,0 +1,171 @@
+#!/usr/bin/env python
+
+from gnuradio import gr
+import math, sys, os, time
+
+try:
+ import scipy
+except ImportError:
+ sys.stderr.write("Unable to import Scipy (www.scipy.org)\n")
+ sys.exit(1)
+
+try:
+ import sqlite3
+except ImportError:
+ sys.stderr.write("Unable to import sqlite3: requires Python 2.5\n")
+ sys.exit(1)
+
+def execute(conn, cmd):
+ '''
+ Executes the command cmd to the database opened in connection conn.
+ '''
+ c = conn.cursor()
+ c.execute(cmd)
+ conn.commit()
+ c.close()
+
+def create_connection(database):
+ '''
+ Returns a connection object to the SQLite database.
+ '''
+ return sqlite3.connect(database)
+
+def new_table(conn, tablename):
+ '''
+ Create a new table for results.
+ All results are in the form: [kernel | nitems | iters | avg. time | variance | max time | min time ]
+ Each table is meant as a different setting (e.g., volk_aligned, volk_unaligned, etc.)
+ '''
+ cols = "kernel text, nitems int, iters int, avg real, var real, max real, min real"
+ cmd = "create table if not exists {0} ({1})".format(
+ tablename, cols)
+ execute(conn, cmd)
+
+def replace_results(conn, tablename, nitems, iters, res):
+ '''
+ Inserts or replaces the results 'res' dictionary values into the table.
+ This deletes all old entries of the kernel in this table.
+ '''
+ cmd = "DELETE FROM {0} where kernel='{1}'".format(tablename, res["kernel"])
+ execute(conn, cmd)
+ insert_results(conn, tablename, nitems, iters, res)
+
+def insert_results(conn, tablename, nitems, iters, res):
+ '''
+ Inserts the results dictionary values into the table.
+ '''
+ cols = "kernel, nitems, iters, avg, var, max, min"
+ cmd = "INSERT INTO {0} ({1}) VALUES ('{2}', {3}, {4}, {5}, {6}, {7}, {8})".format(
+ tablename, cols, res["kernel"], nitems, iters,
+ res["avg"], res["var"], res["max"], res["min"])
+ execute(conn, cmd)
+
+def list_tables(conn):
+ '''
+ Returns a list of all tables in the database.
+ '''
+ cmd = "SELECT name FROM sqlite_master WHERE type='table' ORDER BY name"
+ c = conn.cursor()
+ c.execute(cmd)
+ t = c.fetchall()
+ c.close()
+
+ return t
+
+def get_results(conn, tablename):
+ '''
+ Gets all results in tablename.
+ '''
+ cmd = "SELECT * FROM {0}".format(tablename)
+ c = conn.cursor()
+ c.execute(cmd)
+ fetched = c.fetchall()
+ c.close()
+
+ res = list()
+ for f in fetched:
+ r = dict()
+ r['kernel'] = f[0]
+ r['nitems'] = f[1]
+ r['iters'] = f[2]
+ r['avg'] = f[3]
+ r['var'] = f[4]
+ r['min'] = f[5]
+ r['max'] = f[6]
+ res.append(r)
+
+ return res
+
+
+class helper(gr.top_block):
+ '''
+ Helper function to run the tests. The parameters are:
+ N: number of items to process (int)
+ op: The GR block/hier_block to test
+ isizeof: the sizeof the input type
+ osizeof: the sizeof the output type
+ nsrcs: number of inputs to the op
+ nsnks: number of outputs of the op
+
+ This function can only handle blocks where all inputs are the same
+ datatype and all outputs are the same data type
+ '''
+ def __init__(self, N, op,
+ isizeof=gr.sizeof_gr_complex,
+ osizeof=gr.sizeof_gr_complex,
+ nsrcs=1, nsnks=1):
+ gr.top_block.__init__(self, "helper")
+
+ self.op = op
+ self.srcs = []
+ self.snks = []
+ self.head = gr.head(isizeof, N)
+
+ for n in xrange(nsrcs):
+ self.srcs.append(gr.null_source(isizeof))
+
+ for n in xrange(nsnks):
+ self.snks.append(gr.null_sink(osizeof))
+
+ self.connect(self.srcs[0], self.head, (self.op,0))
+
+ for n in xrange(1, nsrcs):
+ self.connect(self.srcs[n], (self.op,n))
+
+ for n in xrange(nsnks):
+ self.connect((self.op,n), self.snks[n])
+
+def timeit(tb, iterations):
+ '''
+ Given a top block, this function times it for a number of
+ iterations and stores the time in a list that is returned.
+ '''
+ r = gr.enable_realtime_scheduling()
+ if r != gr.RT_OK:
+ print "Warning: failed to enable realtime scheduling"
+
+ times = []
+ for i in xrange(iterations):
+ start_time = time.time()
+ tb.run()
+ end_time = time.time()
+ tb.head.reset()
+
+ times.append(end_time - start_time)
+
+ return times
+
+def format_results(kernel, times):
+ '''
+ Convinience function to convert the results of the timeit function
+ into a dictionary.
+ '''
+ res = dict()
+ res["kernel"] = kernel
+ res["avg"] = scipy.mean(times)
+ res["var"] = scipy.var(times)
+ res["max"] = max(times)
+ res["min"] = min(times)
+ return res
+
+
diff --git a/gnuradio-examples/python/volk_benchmark/volk_types.py b/gnuradio-examples/python/volk_benchmark/volk_types.py
new file mode 100755
index 000000000..3bc5a22ae
--- /dev/null
+++ b/gnuradio-examples/python/volk_benchmark/volk_types.py
@@ -0,0 +1,182 @@
+#!/usr/bin/env python
+
+from gnuradio import gr
+import argparse
+from volk_test_funcs import *
+
+######################################################################
+
+def float_to_char(N):
+ op = gr.float_to_char()
+ tb = helper(N, op, gr.sizeof_float, gr.sizeof_char, 1, 1)
+ return tb
+
+######################################################################
+
+def float_to_int(N):
+ op = gr.float_to_int()
+ tb = helper(N, op, gr.sizeof_float, gr.sizeof_int, 1, 1)
+ return tb
+
+######################################################################
+
+def float_to_short(N):
+ op = gr.float_to_short()
+ tb = helper(N, op, gr.sizeof_float, gr.sizeof_short, 1, 1)
+ return tb
+
+######################################################################
+
+def short_to_float(N):
+ op = gr.short_to_float()
+ tb = helper(N, op, gr.sizeof_short, gr.sizeof_float, 1, 1)
+ return tb
+
+######################################################################
+
+def short_to_char(N):
+ op = gr.short_to_char()
+ tb = helper(N, op, gr.sizeof_short, gr.sizeof_char, 1, 1)
+ return tb
+
+######################################################################
+
+def char_to_short(N):
+ op = gr.char_to_short()
+ tb = helper(N, op, gr.sizeof_char, gr.sizeof_short, 1, 1)
+ return tb
+
+######################################################################
+
+def char_to_float(N):
+ op = gr.char_to_float()
+ tb = helper(N, op, gr.sizeof_char, gr.sizeof_float, 1, 1)
+ return tb
+
+######################################################################
+
+def int_to_float(N):
+ op = gr.int_to_float()
+ tb = helper(N, op, gr.sizeof_int, gr.sizeof_float, 1, 1)
+ return tb
+
+######################################################################
+
+def complex_to_float(N):
+ op = gr.complex_to_float()
+ tb = helper(N, op, gr.sizeof_gr_complex, gr.sizeof_float, 1, 2)
+ return tb
+
+######################################################################
+
+def complex_to_real(N):
+ op = gr.complex_to_real()
+ tb = helper(N, op, gr.sizeof_gr_complex, gr.sizeof_float, 1, 1)
+ return tb
+
+######################################################################
+
+def complex_to_imag(N):
+ op = gr.complex_to_imag()
+ tb = helper(N, op, gr.sizeof_gr_complex, gr.sizeof_float, 1, 1)
+ return tb
+
+######################################################################
+
+def complex_to_mag(N):
+ op = gr.complex_to_mag()
+ tb = helper(N, op, gr.sizeof_gr_complex, gr.sizeof_float, 1, 1)
+ return tb
+
+######################################################################
+
+def complex_to_mag_squared(N):
+ op = gr.complex_to_mag_squared()
+ tb = helper(N, op, gr.sizeof_gr_complex, gr.sizeof_float, 1, 1)
+ return tb
+
+######################################################################
+
+
+def run_tests(func, N, iters):
+ print("Running Test: {0}".format(func.__name__))
+ try:
+ tb = func(N)
+ t = timeit(tb, iters)
+ res = format_results(func.__name__, t)
+ return res
+ except AttributeError:
+ print "\tCould not run test. Skipping."
+ return None
+
+def main():
+ avail_tests = [float_to_char,
+ float_to_int,
+ float_to_short,
+ short_to_float,
+ short_to_char,
+ char_to_short,
+ char_to_float,
+ int_to_float,
+ complex_to_float,
+ complex_to_real,
+ complex_to_imag,
+ complex_to_mag,
+ complex_to_mag_squared]
+
+ desc='Time an operation to compare with other implementations. \
+ This program runs a simple GNU Radio flowgraph to test a \
+ particular math function, mostly to compare the \
+ Volk-optimized implementation versus a regular \
+ implementation. The results are stored to an SQLite database \
+ that can then be read by volk_plot.py to plot the differences.'
+ parser = argparse.ArgumentParser(description=desc)
+ parser.add_argument('-L', '--label', type=str,
+ required=True, default=None,
+ help='Label of database table [default: %(default)s]')
+ parser.add_argument('-D', '--database', type=str,
+ default="volk_results.db",
+ help='Database file to store data in [default: %(default)s]')
+ parser.add_argument('-N', '--nitems', type=float,
+ default=1e9,
+ help='Number of items per iterations [default: %(default)s]')
+ parser.add_argument('-I', '--iterations', type=int,
+ default=20,
+ help='Number of iterations [default: %(default)s]')
+ parser.add_argument('--tests', type=int, nargs='*',
+ choices=xrange(len(avail_tests)),
+ help='A list of tests to run; can be a single test or a \
+ space-separated list.')
+ parser.add_argument('--list', action='store_true',
+ help='List the available tests')
+ parser.add_argument('--all', action='store_true',
+ help='Run all tests')
+ args = parser.parse_args()
+
+ if(args.list):
+ print "Available Tests to Run:"
+ print "\n".join(["\t{0}: {1}".format(i,f.__name__) for i,f in enumerate(avail_tests)])
+ sys.exit(0)
+
+ N = int(args.nitems)
+ iters = args.iterations
+ label = args.label
+
+ conn = create_connection(args.database)
+ new_table(conn, label)
+
+ if args.all:
+ tests = xrange(len(avail_tests))
+ else:
+ tests = args.tests
+
+ for test in tests:
+ res = run_tests(avail_tests[test], N, iters)
+ if res is not None:
+ replace_results(conn, label, N, iters, res)
+
+if __name__ == "__main__":
+ try:
+ main()
+ except KeyboardInterrupt:
+ pass
diff --git a/volk/apps/CMakeLists.txt b/volk/apps/CMakeLists.txt
index f27bdc126..14291e5e3 100644
--- a/volk/apps/CMakeLists.txt
+++ b/volk/apps/CMakeLists.txt
@@ -42,4 +42,11 @@ add_executable(volk_profile
target_link_libraries(volk_profile volk ${Boost_LIBRARIES})
+install(
+ PROGRAMS
+ ${CMAKE_BINARY_DIR}/apps/volk_profile
+ DESTINATION ${GR_RUNTIME_DIR}
+ COMPONENT "volk"
+)
+
endif(Boost_FOUND AND UNIX)
diff --git a/volk/apps/volk_profile.cc b/volk/apps/volk_profile.cc
index 10a699872..bd36d6dc7 100644
--- a/volk/apps/volk_profile.cc
+++ b/volk/apps/volk_profile.cc
@@ -34,6 +34,7 @@ int main(int argc, char *argv[]) {
VOLK_PROFILE(volk_16u_byteswap_a, 0, 0, 204600, 10000, &results);
VOLK_PROFILE(volk_32f_accumulator_s32f_a, 1e-4, 0, 204600, 10000, &results);
VOLK_PROFILE(volk_32f_x2_add_32f_a, 1e-4, 0, 204600, 10000, &results);
+ VOLK_PROFILE(volk_32f_x2_add_32f_u, 1e-4, 0, 204600, 10000, &results);
VOLK_PROFILE(volk_32fc_32f_multiply_32fc_a, 1e-4, 0, 204600, 1000, &results);
VOLK_PROFILE(volk_32fc_s32f_power_32fc_a, 1e-4, 0, 204600, 50, &results);
VOLK_PROFILE(volk_32f_s32f_calc_spectral_noise_floor_32f_a, 1e-4, 20.0, 204600, 1000, &results);
@@ -43,13 +44,22 @@ int main(int argc, char *argv[]) {
VOLK_PROFILE(volk_32fc_deinterleave_32f_x2_a, 1e-4, 0, 204600, 1000, &results);
VOLK_PROFILE(volk_32fc_deinterleave_64f_x2_a, 1e-4, 0, 204600, 1000, &results);
VOLK_PROFILE(volk_32fc_s32f_deinterleave_real_16i_a, 0, 32768, 204600, 10000, &results);
+ VOLK_PROFILE(volk_32fc_deinterleave_imag_32f_a, 1e-4, 0, 204600, 5000, &results);
VOLK_PROFILE(volk_32fc_deinterleave_real_32f_a, 1e-4, 0, 204600, 5000, &results);
VOLK_PROFILE(volk_32fc_deinterleave_real_64f_a, 1e-4, 0, 204600, 1000, &results);
VOLK_PROFILE(volk_32fc_x2_dot_prod_32fc_a, 1e-4, 0, 204600, 10000, &results);
VOLK_PROFILE(volk_32fc_index_max_16u_a, 3, 0, 204600, 10000, &results);
VOLK_PROFILE(volk_32fc_s32f_magnitude_16i_a, 1, 32768, 204600, 100, &results);
VOLK_PROFILE(volk_32fc_magnitude_32f_a, 1e-4, 0, 204600, 1000, &results);
+ VOLK_PROFILE(volk_32fc_magnitude_32f_u, 1e-4, 0, 204600, 1000, &results);
+ VOLK_PROFILE(volk_32fc_magnitude_squared_32f_a, 1e-4, 0, 204600, 1000, &results);
+ VOLK_PROFILE(volk_32fc_magnitude_squared_32f_u, 1e-4, 0, 204600, 1000, &results);
VOLK_PROFILE(volk_32fc_x2_multiply_32fc_a, 1e-4, 0, 204600, 1000, &results);
+ VOLK_PROFILE(volk_32fc_x2_multiply_32fc_u, 1e-4, 0, 204600, 1000, &results);
+ VOLK_PROFILE(volk_32fc_x2_multiply_conjugate_32fc_a, 1e-4, 0, 204600, 1000, &results);
+ VOLK_PROFILE(volk_32fc_x2_multiply_conjugate_32fc_u, 1e-4, 0, 204600, 1000, &results);
+ VOLK_PROFILE(volk_32fc_conjugate_32fc_a, 1e-4, 0, 204600, 1000, &results);
+ VOLK_PROFILE(volk_32fc_conjugate_32fc_u, 1e-4, 0, 204600, 1000, &results);
VOLK_PROFILE(volk_32f_s32f_convert_16i_a, 1, 32768, 204600, 10000, &results);
VOLK_PROFILE(volk_32f_s32f_convert_16i_u, 1, 32768, 204600, 10000, &results);
VOLK_PROFILE(volk_32f_s32f_convert_32i_a, 1, 2<<31, 204600, 10000, &results);
@@ -72,6 +82,7 @@ int main(int argc, char *argv[]) {
VOLK_PROFILE(volk_32f_x2_max_32f_a, 1e-4, 0, 204600, 2000, &results);
VOLK_PROFILE(volk_32f_x2_min_32f_a, 1e-4, 0, 204600, 2000, &results);
VOLK_PROFILE(volk_32f_x2_multiply_32f_a, 1e-4, 0, 204600, 10000, &results);
+ VOLK_PROFILE(volk_32f_x2_multiply_32f_u, 1e-4, 0, 204600, 10000, &results);
VOLK_PROFILE(volk_32f_s32f_normalize_a, 1e-4, 100, 204600, 10000, &results);
VOLK_PROFILE(volk_32f_s32f_power_32f_a, 1e-4, 4, 204600, 100, &results);
VOLK_PROFILE(volk_32f_sqrt_32f_a, 1e-4, 0, 204600, 100, &results);
@@ -102,8 +113,11 @@ int main(int argc, char *argv[]) {
VOLK_PROFILE(volk_8i_convert_16i_u, 0, 0, 204600, 2000, &results);
VOLK_PROFILE(volk_8i_s32f_convert_32f_a, 1e-4, 100, 204600, 2000, &results);
VOLK_PROFILE(volk_8i_s32f_convert_32f_u, 1e-4, 100, 204600, 2000, &results);
- VOLK_PROFILE(volk_32fc_s32fc_multiply_32fc_a, 1e-4, 0, 204600, 1000, &results);
- VOLK_PROFILE(volk_32f_s32f_multiply_32f_a, 1e-4, 0, 204600, 1000, &results);
+ //VOLK_PROFILE(volk_32fc_s32fc_multiply_32fc_a, 1e-4, lv_32fc_t(1.0, 0.5), 204600, 1000, &results);
+ VOLK_PROFILE(volk_32fc_s32fc_multiply_32fc_u, 1e-4, 0, 204600, 1000, &results);
+ VOLK_PROFILE(volk_32f_s32f_multiply_32f_a, 1e-4, 1.0, 204600, 10000, &results);
+ VOLK_PROFILE(volk_32f_s32f_multiply_32f_u, 1e-4, 0, 204600, 1000, &results);
+
char path[256];
get_config_path(path);
diff --git a/volk/include/volk/volk_32f_s32f_convert_16i_a.h b/volk/include/volk/volk_32f_s32f_convert_16i_a.h
index 0a2b4f0f2..a24959678 100644
--- a/volk/include/volk/volk_32f_s32f_convert_16i_a.h
+++ b/volk/include/volk/volk_32f_s32f_convert_16i_a.h
@@ -4,6 +4,7 @@
#include <volk/volk_common.h>
#include <inttypes.h>
#include <stdio.h>
+#include <math.h>
#ifdef LV_HAVE_SSE2
#include <emmintrin.h>
@@ -21,17 +22,29 @@ static inline void volk_32f_s32f_convert_16i_a_sse2(int16_t* outputVector, const
const float* inputVectorPtr = (const float*)inputVector;
int16_t* outputVectorPtr = outputVector;
+
+ float min_val = -32768;
+ float max_val = 32767;
+ float r;
+
__m128 vScalar = _mm_set_ps1(scalar);
__m128 inputVal1, inputVal2;
__m128i intInputVal1, intInputVal2;
+ __m128 ret1, ret2;
+ __m128 vmin_val = _mm_set_ps1(min_val);
+ __m128 vmax_val = _mm_set_ps1(max_val);
for(;number < eighthPoints; number++){
inputVal1 = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4;
inputVal2 = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4;
- intInputVal1 = _mm_cvtps_epi32(_mm_mul_ps(inputVal1, vScalar));
- intInputVal2 = _mm_cvtps_epi32(_mm_mul_ps(inputVal2, vScalar));
-
+ // Scale and clip
+ ret1 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
+ ret2 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal2, vScalar), vmax_val), vmin_val);
+
+ intInputVal1 = _mm_cvtps_epi32(ret1);
+ intInputVal2 = _mm_cvtps_epi32(ret2);
+
intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2);
_mm_store_si128((__m128i*)outputVectorPtr, intInputVal1);
@@ -40,7 +53,12 @@ static inline void volk_32f_s32f_convert_16i_a_sse2(int16_t* outputVector, const
number = eighthPoints * 8;
for(; number < num_points; number++){
- *outputVectorPtr++ = (int16_t)(*inputVectorPtr++ * scalar);
+ r = inputVector[number] * scalar;
+ if(r > max_val)
+ r = max_val;
+ else if(r < min_val)
+ r = min_val;
+ outputVector[number] = (int16_t)rintf(r);
}
}
#endif /* LV_HAVE_SSE2 */
@@ -61,8 +79,15 @@ static inline void volk_32f_s32f_convert_16i_a_sse(int16_t* outputVector, const
const float* inputVectorPtr = (const float*)inputVector;
int16_t* outputVectorPtr = outputVector;
+
+ float min_val = -32768;
+ float max_val = 32767;
+ float r;
+
__m128 vScalar = _mm_set_ps1(scalar);
__m128 ret;
+ __m128 vmin_val = _mm_set_ps1(min_val);
+ __m128 vmax_val = _mm_set_ps1(max_val);
__VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4];
@@ -70,18 +95,24 @@ static inline void volk_32f_s32f_convert_16i_a_sse(int16_t* outputVector, const
ret = _mm_load_ps(inputVectorPtr);
inputVectorPtr += 4;
- ret = _mm_mul_ps(ret, vScalar);
+ // Scale and clip
+ ret = _mm_max_ps(_mm_min_ps(_mm_mul_ps(ret, vScalar), vmax_val), vmin_val);
_mm_store_ps(outputFloatBuffer, ret);
- *outputVectorPtr++ = (int16_t)(outputFloatBuffer[0]);
- *outputVectorPtr++ = (int16_t)(outputFloatBuffer[1]);
- *outputVectorPtr++ = (int16_t)(outputFloatBuffer[2]);
- *outputVectorPtr++ = (int16_t)(outputFloatBuffer[3]);
+ *outputVectorPtr++ = (int16_t)rintf(outputFloatBuffer[0]);
+ *outputVectorPtr++ = (int16_t)rintf(outputFloatBuffer[1]);
+ *outputVectorPtr++ = (int16_t)rintf(outputFloatBuffer[2]);
+ *outputVectorPtr++ = (int16_t)rintf(outputFloatBuffer[3]);
}
number = quarterPoints * 4;
for(; number < num_points; number++){
- *outputVectorPtr++ = (int16_t)(*inputVectorPtr++ * scalar);
+ r = inputVector[number] * scalar;
+ if(r > max_val)
+ r = max_val;
+ else if(r < min_val)
+ r = min_val;
+ outputVector[number] = (int16_t)rintf(r);
}
}
#endif /* LV_HAVE_SSE */
@@ -98,9 +129,17 @@ static inline void volk_32f_s32f_convert_16i_a_generic(int16_t* outputVector, co
int16_t* outputVectorPtr = outputVector;
const float* inputVectorPtr = inputVector;
unsigned int number = 0;
+ float min_val = -32768;
+ float max_val = 32767;
+ float r;
for(number = 0; number < num_points; number++){
- *outputVectorPtr++ = ((int16_t)(*inputVectorPtr++ * scalar));
+ r = *inputVectorPtr++ * scalar;
+ if(r < min_val)
+ r = min_val;
+ else if(r > max_val)
+ r = max_val;
+ *outputVectorPtr++ = (int16_t)rintf(r);
}
}
#endif /* LV_HAVE_GENERIC */
diff --git a/volk/include/volk/volk_32f_s32f_convert_16i_u.h b/volk/include/volk/volk_32f_s32f_convert_16i_u.h
index dec3f1611..f58158041 100644
--- a/volk/include/volk/volk_32f_s32f_convert_16i_u.h
+++ b/volk/include/volk/volk_32f_s32f_convert_16i_u.h
@@ -3,6 +3,7 @@
#include <inttypes.h>
#include <stdio.h>
+#include <math.h>
#ifdef LV_HAVE_SSE2
#include <emmintrin.h>
@@ -21,17 +22,29 @@ static inline void volk_32f_s32f_convert_16i_u_sse2(int16_t* outputVector, const
const float* inputVectorPtr = (const float*)inputVector;
int16_t* outputVectorPtr = outputVector;
+
+ float min_val = -32768;
+ float max_val = 32767;
+ float r;
+
__m128 vScalar = _mm_set_ps1(scalar);
__m128 inputVal1, inputVal2;
__m128i intInputVal1, intInputVal2;
+ __m128 ret1, ret2;
+ __m128 vmin_val = _mm_set_ps1(min_val);
+ __m128 vmax_val = _mm_set_ps1(max_val);
for(;number < eighthPoints; number++){
inputVal1 = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4;
inputVal2 = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4;
- intInputVal1 = _mm_cvtps_epi32(_mm_mul_ps(inputVal1, vScalar));
- intInputVal2 = _mm_cvtps_epi32(_mm_mul_ps(inputVal2, vScalar));
-
+ // Scale and clip
+ ret1 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
+ ret2 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal2, vScalar), vmax_val), vmin_val);
+
+ intInputVal1 = _mm_cvtps_epi32(ret1);
+ intInputVal2 = _mm_cvtps_epi32(ret2);
+
intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2);
_mm_storeu_si128((__m128i*)outputVectorPtr, intInputVal1);
@@ -40,7 +53,12 @@ static inline void volk_32f_s32f_convert_16i_u_sse2(int16_t* outputVector, const
number = eighthPoints * 8;
for(; number < num_points; number++){
- outputVector[number] = (int16_t)(inputVector[number] * scalar);
+ r = inputVector[number] * scalar;
+ if(r > max_val)
+ r = max_val;
+ else if(r < min_val)
+ r = min_val;
+ outputVector[number] = (int16_t)rintf(r);
}
}
#endif /* LV_HAVE_SSE2 */
@@ -62,8 +80,15 @@ static inline void volk_32f_s32f_convert_16i_u_sse(int16_t* outputVector, const
const float* inputVectorPtr = (const float*)inputVector;
int16_t* outputVectorPtr = outputVector;
+
+ float min_val = -32768;
+ float max_val = 32767;
+ float r;
+
__m128 vScalar = _mm_set_ps1(scalar);
__m128 ret;
+ __m128 vmin_val = _mm_set_ps1(min_val);
+ __m128 vmax_val = _mm_set_ps1(max_val);
__VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4];
@@ -71,18 +96,24 @@ static inline void volk_32f_s32f_convert_16i_u_sse(int16_t* outputVector, const
ret = _mm_loadu_ps(inputVectorPtr);
inputVectorPtr += 4;
- ret = _mm_mul_ps(ret, vScalar);
+ // Scale and clip
+ ret = _mm_max_ps(_mm_min_ps(_mm_mul_ps(ret, vScalar), vmax_val), vmin_val);
_mm_store_ps(outputFloatBuffer, ret);
- *outputVectorPtr++ = (int16_t)(outputFloatBuffer[0]);
- *outputVectorPtr++ = (int16_t)(outputFloatBuffer[1]);
- *outputVectorPtr++ = (int16_t)(outputFloatBuffer[2]);
- *outputVectorPtr++ = (int16_t)(outputFloatBuffer[3]);
+ *outputVectorPtr++ = (int16_t)rintf(outputFloatBuffer[0]);
+ *outputVectorPtr++ = (int16_t)rintf(outputFloatBuffer[1]);
+ *outputVectorPtr++ = (int16_t)rintf(outputFloatBuffer[2]);
+ *outputVectorPtr++ = (int16_t)rintf(outputFloatBuffer[3]);
}
number = quarterPoints * 4;
for(; number < num_points; number++){
- outputVector[number] = (int16_t)(inputVector[number] * scalar);
+ r = inputVector[number] * scalar;
+ if(r > max_val)
+ r = max_val;
+ else if(r < min_val)
+ r = min_val;
+ outputVector[number] = (int16_t)rintf(r);
}
}
#endif /* LV_HAVE_SSE */
@@ -100,9 +131,17 @@ static inline void volk_32f_s32f_convert_16i_u_generic(int16_t* outputVector, co
int16_t* outputVectorPtr = outputVector;
const float* inputVectorPtr = inputVector;
unsigned int number = 0;
+ float min_val = -32768;
+ float max_val = 32767;
+ float r;
for(number = 0; number < num_points; number++){
- *outputVectorPtr++ = ((int16_t)(*inputVectorPtr++ * scalar));
+ r = *inputVectorPtr++ * scalar;
+ if(r > max_val)
+ r = max_val;
+ else if(r < min_val)
+ r = min_val;
+ *outputVectorPtr++ = (int16_t)rintf(r);
}
}
#endif /* LV_HAVE_GENERIC */
diff --git a/volk/include/volk/volk_32f_s32f_convert_32i_a.h b/volk/include/volk/volk_32f_s32f_convert_32i_a.h
index aa370e614..8f2fc791e 100644
--- a/volk/include/volk/volk_32f_s32f_convert_32i_a.h
+++ b/volk/include/volk/volk_32f_s32f_convert_32i_a.h
@@ -21,14 +21,22 @@ static inline void volk_32f_s32f_convert_32i_a_avx(int32_t* outputVector, const
const float* inputVectorPtr = (const float*)inputVector;
int32_t* outputVectorPtr = outputVector;
+
+ float min_val = -2147483648;
+ float max_val = 2147483647;
+ float r;
+
__m256 vScalar = _mm256_set1_ps(scalar);
__m256 inputVal1;
__m256i intInputVal1;
+ __m256 vmin_val = _mm256_set1_ps(min_val);
+ __m256 vmax_val = _mm256_set1_ps(max_val);
for(;number < eighthPoints; number++){
inputVal1 = _mm256_load_ps(inputVectorPtr); inputVectorPtr += 8;
- intInputVal1 = _mm256_cvtps_epi32(_mm256_mul_ps(inputVal1, vScalar));
+ inputVal1 = _mm256_max_ps(_mm256_min_ps(_mm256_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
+ intInputVal1 = _mm256_cvtps_epi32(inputVal1);
_mm256_store_si256((__m256i*)outputVectorPtr, intInputVal1);
outputVectorPtr += 8;
@@ -36,7 +44,12 @@ static inline void volk_32f_s32f_convert_32i_a_avx(int32_t* outputVector, const
number = eighthPoints * 8;
for(; number < num_points; number++){
- outputVector[number] = (int32_t)(inputVector[number] * scalar);
+ r = inputVector[number] * scalar;
+ if(r > max_val)
+ r = max_val;
+ else if(r < min_val)
+ r = min_val;
+ outputVector[number] = (int32_t)(r);
}
}
#endif /* LV_HAVE_AVX */
@@ -57,14 +70,22 @@ static inline void volk_32f_s32f_convert_32i_a_sse2(int32_t* outputVector, const
const float* inputVectorPtr = (const float*)inputVector;
int32_t* outputVectorPtr = outputVector;
+
+ float min_val = -2147483648;
+ float max_val = 2147483647;
+ float r;
+
__m128 vScalar = _mm_set_ps1(scalar);
__m128 inputVal1;
__m128i intInputVal1;
+ __m128 vmin_val = _mm_set_ps1(min_val);
+ __m128 vmax_val = _mm_set_ps1(max_val);
for(;number < quarterPoints; number++){
inputVal1 = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4;
- intInputVal1 = _mm_cvtps_epi32(_mm_mul_ps(inputVal1, vScalar));
+ inputVal1 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
+ intInputVal1 = _mm_cvtps_epi32(inputVal1);
_mm_store_si128((__m128i*)outputVectorPtr, intInputVal1);
outputVectorPtr += 4;
@@ -72,7 +93,12 @@ static inline void volk_32f_s32f_convert_32i_a_sse2(int32_t* outputVector, const
number = quarterPoints * 4;
for(; number < num_points; number++){
- outputVector[number] = (int32_t)(inputVector[number] * scalar);
+ r = inputVector[number] * scalar;
+ if(r > max_val)
+ r = max_val;
+ else if(r < min_val)
+ r = min_val;
+ outputVector[number] = (int32_t)(r);
}
}
#endif /* LV_HAVE_SSE2 */
@@ -93,8 +119,15 @@ static inline void volk_32f_s32f_convert_32i_a_sse(int32_t* outputVector, const
const float* inputVectorPtr = (const float*)inputVector;
int32_t* outputVectorPtr = outputVector;
+
+ float min_val = -2147483647;
+ float max_val = 2147483647;
+ float r;
+
__m128 vScalar = _mm_set_ps1(scalar);
__m128 ret;
+ __m128 vmin_val = _mm_set_ps1(min_val);
+ __m128 vmax_val = _mm_set_ps1(max_val);
__VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4];
@@ -102,7 +135,7 @@ static inline void volk_32f_s32f_convert_32i_a_sse(int32_t* outputVector, const
ret = _mm_load_ps(inputVectorPtr);
inputVectorPtr += 4;
- ret = _mm_mul_ps(ret, vScalar);
+ ret = _mm_max_ps(_mm_min_ps(_mm_mul_ps(ret, vScalar), vmax_val), vmin_val);
_mm_store_ps(outputFloatBuffer, ret);
*outputVectorPtr++ = (int32_t)(outputFloatBuffer[0]);
@@ -113,7 +146,12 @@ static inline void volk_32f_s32f_convert_32i_a_sse(int32_t* outputVector, const
number = quarterPoints * 4;
for(; number < num_points; number++){
- outputVector[number] = (int32_t)(inputVector[number] * scalar);
+ r = inputVector[number] * scalar;
+ if(r > max_val)
+ r = max_val;
+ else if(r < min_val)
+ r = min_val;
+ outputVector[number] = (int32_t)(r);
}
}
#endif /* LV_HAVE_SSE */
@@ -130,9 +168,17 @@ static inline void volk_32f_s32f_convert_32i_a_generic(int32_t* outputVector, co
int32_t* outputVectorPtr = outputVector;
const float* inputVectorPtr = inputVector;
unsigned int number = 0;
+ float min_val = -2147483647;
+ float max_val = 2147483647;
+ float r;
for(number = 0; number < num_points; number++){
- *outputVectorPtr++ = ((int32_t)(*inputVectorPtr++ * scalar));
+ r = *inputVectorPtr++ * scalar;
+ if(r > max_val)
+ r = max_val;
+ else if(r < min_val)
+ r = min_val;
+ *outputVectorPtr++ = (int32_t)(r);
}
}
#endif /* LV_HAVE_GENERIC */
diff --git a/volk/include/volk/volk_32f_s32f_convert_32i_u.h b/volk/include/volk/volk_32f_s32f_convert_32i_u.h
index b4e954dc4..d8493454b 100644
--- a/volk/include/volk/volk_32f_s32f_convert_32i_u.h
+++ b/volk/include/volk/volk_32f_s32f_convert_32i_u.h
@@ -21,14 +21,24 @@ static inline void volk_32f_s32f_convert_32i_u_sse2(int32_t* outputVector, const
const float* inputVectorPtr = (const float*)inputVector;
int32_t* outputVectorPtr = outputVector;
+
+ //float min_val = -2147483647;
+ //float max_val = 2147483647;
+ float min_val = -2146400000;
+ float max_val = 2146400000;
+ float r;
+
__m128 vScalar = _mm_set_ps1(scalar);
__m128 inputVal1;
__m128i intInputVal1;
+ __m128 vmin_val = _mm_set_ps1(min_val);
+ __m128 vmax_val = _mm_set_ps1(max_val);
for(;number < quarterPoints; number++){
inputVal1 = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4;
- intInputVal1 = _mm_cvtps_epi32(_mm_mul_ps(inputVal1, vScalar));
+ inputVal1 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
+ intInputVal1 = _mm_cvtps_epi32(inputVal1);
_mm_storeu_si128((__m128i*)outputVectorPtr, intInputVal1);
outputVectorPtr += 4;
@@ -36,7 +46,12 @@ static inline void volk_32f_s32f_convert_32i_u_sse2(int32_t* outputVector, const
number = quarterPoints * 4;
for(; number < num_points; number++){
- outputVector[number] = (int32_t)(inputVector[number] * scalar);
+ r = inputVector[number] * scalar;
+ if(r > max_val)
+ r = max_val;
+ else if(r < min_val)
+ r = min_val;
+ outputVector[number] = (int32_t)(r);
}
}
#endif /* LV_HAVE_SSE2 */
@@ -58,8 +73,15 @@ static inline void volk_32f_s32f_convert_32i_u_sse(int32_t* outputVector, const
const float* inputVectorPtr = (const float*)inputVector;
int32_t* outputVectorPtr = outputVector;
+
+ float min_val = -2147483647;
+ float max_val = 2147483647;
+ float r;
+
__m128 vScalar = _mm_set_ps1(scalar);
__m128 ret;
+ __m128 vmin_val = _mm_set_ps1(min_val);
+ __m128 vmax_val = _mm_set_ps1(max_val);
__VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4];
@@ -67,7 +89,7 @@ static inline void volk_32f_s32f_convert_32i_u_sse(int32_t* outputVector, const
ret = _mm_loadu_ps(inputVectorPtr);
inputVectorPtr += 4;
- ret = _mm_mul_ps(ret, vScalar);
+ ret = _mm_max_ps(_mm_min_ps(_mm_mul_ps(ret, vScalar), vmax_val), vmin_val);
_mm_store_ps(outputFloatBuffer, ret);
*outputVectorPtr++ = (int32_t)(outputFloatBuffer[0]);
@@ -78,7 +100,12 @@ static inline void volk_32f_s32f_convert_32i_u_sse(int32_t* outputVector, const
number = quarterPoints * 4;
for(; number < num_points; number++){
- outputVector[number] = (int32_t)(inputVector[number] * scalar);
+ r = inputVector[number] * scalar;
+ if(r > max_val)
+ r = max_val;
+ else if(r < min_val)
+ r = min_val;
+ outputVector[number] = (int32_t)(r);
}
}
#endif /* LV_HAVE_SSE */
@@ -96,9 +123,17 @@ static inline void volk_32f_s32f_convert_32i_u_generic(int32_t* outputVector, co
int32_t* outputVectorPtr = outputVector;
const float* inputVectorPtr = inputVector;
unsigned int number = 0;
+ float min_val = -2147483647;
+ float max_val = 2147483647;
+ float r;
for(number = 0; number < num_points; number++){
- *outputVectorPtr++ = ((int32_t)(*inputVectorPtr++ * scalar));
+ r = *inputVectorPtr++ * scalar;
+ if(r > max_val)
+ r = max_val;
+ else if(r < min_val)
+ r = min_val;
+ *outputVectorPtr++ = (int32_t)(r);
}
}
#endif /* LV_HAVE_GENERIC */
diff --git a/volk/include/volk/volk_32f_s32f_convert_8i_a.h b/volk/include/volk/volk_32f_s32f_convert_8i_a.h
index 8d87a07d7..05172171c 100644
--- a/volk/include/volk/volk_32f_s32f_convert_8i_a.h
+++ b/volk/include/volk/volk_32f_s32f_convert_8i_a.h
@@ -21,9 +21,16 @@ static inline void volk_32f_s32f_convert_8i_a_sse2(int8_t* outputVector, const f
const float* inputVectorPtr = (const float*)inputVector;
int8_t* outputVectorPtr = outputVector;
+
+ float min_val = -128;
+ float max_val = 127;
+ float r;
+
__m128 vScalar = _mm_set_ps1(scalar);
__m128 inputVal1, inputVal2, inputVal3, inputVal4;
__m128i intInputVal1, intInputVal2, intInputVal3, intInputVal4;
+ __m128 vmin_val = _mm_set_ps1(min_val);
+ __m128 vmax_val = _mm_set_ps1(max_val);
for(;number < sixteenthPoints; number++){
inputVal1 = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4;
@@ -31,10 +38,15 @@ static inline void volk_32f_s32f_convert_8i_a_sse2(int8_t* outputVector, const f
inputVal3 = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4;
inputVal4 = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4;
- intInputVal1 = _mm_cvtps_epi32(_mm_mul_ps(inputVal1, vScalar));
- intInputVal2 = _mm_cvtps_epi32(_mm_mul_ps(inputVal2, vScalar));
- intInputVal3 = _mm_cvtps_epi32(_mm_mul_ps(inputVal3, vScalar));
- intInputVal4 = _mm_cvtps_epi32(_mm_mul_ps(inputVal4, vScalar));
+ inputVal1 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
+ inputVal2 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal2, vScalar), vmax_val), vmin_val);
+ inputVal3 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal3, vScalar), vmax_val), vmin_val);
+ inputVal4 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal4, vScalar), vmax_val), vmin_val);
+
+ intInputVal1 = _mm_cvtps_epi32(inputVal1);
+ intInputVal2 = _mm_cvtps_epi32(inputVal2);
+ intInputVal3 = _mm_cvtps_epi32(inputVal3);
+ intInputVal4 = _mm_cvtps_epi32(inputVal4);
intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2);
intInputVal3 = _mm_packs_epi32(intInputVal3, intInputVal4);
@@ -47,7 +59,12 @@ static inline void volk_32f_s32f_convert_8i_a_sse2(int8_t* outputVector, const f
number = sixteenthPoints * 16;
for(; number < num_points; number++){
- outputVector[number] = (int8_t)(inputVector[number] * scalar);
+ r = inputVector[number] * scalar;
+ if(r > max_val)
+ r = max_val;
+ else if(r < min_val)
+ r = min_val;
+ outputVector[number] = (int8_t)(r);
}
}
#endif /* LV_HAVE_SSE2 */
@@ -67,9 +84,16 @@ static inline void volk_32f_s32f_convert_8i_a_sse(int8_t* outputVector, const fl
const unsigned int quarterPoints = num_points / 4;
const float* inputVectorPtr = (const float*)inputVector;
+
+ float min_val = -128;
+ float max_val = 127;
+ float r;
+
int8_t* outputVectorPtr = outputVector;
__m128 vScalar = _mm_set_ps1(scalar);
__m128 ret;
+ __m128 vmin_val = _mm_set_ps1(min_val);
+ __m128 vmax_val = _mm_set_ps1(max_val);
__VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4];
@@ -77,7 +101,7 @@ static inline void volk_32f_s32f_convert_8i_a_sse(int8_t* outputVector, const fl
ret = _mm_load_ps(inputVectorPtr);
inputVectorPtr += 4;
- ret = _mm_mul_ps(ret, vScalar);
+ ret = _mm_max_ps(_mm_min_ps(_mm_mul_ps(ret, vScalar), vmax_val), vmin_val);
_mm_store_ps(outputFloatBuffer, ret);
*outputVectorPtr++ = (int8_t)(outputFloatBuffer[0]);
@@ -88,7 +112,12 @@ static inline void volk_32f_s32f_convert_8i_a_sse(int8_t* outputVector, const fl
number = quarterPoints * 4;
for(; number < num_points; number++){
- outputVector[number] = (int8_t)(inputVector[number] * scalar);
+ r = inputVector[number] * scalar;
+ if(r > max_val)
+ r = max_val;
+ else if(r < min_val)
+ r = min_val;
+ outputVector[number] = (int8_t)(r);
}
}
#endif /* LV_HAVE_SSE */
@@ -105,9 +134,17 @@ static inline void volk_32f_s32f_convert_8i_a_generic(int8_t* outputVector, cons
int8_t* outputVectorPtr = outputVector;
const float* inputVectorPtr = inputVector;
unsigned int number = 0;
+ float min_val = -128;
+ float max_val = 127;
+ float r;
for(number = 0; number < num_points; number++){
- *outputVectorPtr++ = (int8_t)(*inputVectorPtr++ * scalar);
+ r = *inputVectorPtr++ * scalar;
+ if(r > max_val)
+ r = max_val;
+ else if(r < min_val)
+ r = min_val;
+ *outputVectorPtr++ = (int8_t)(r);
}
}
#endif /* LV_HAVE_GENERIC */
diff --git a/volk/include/volk/volk_32f_s32f_convert_8i_u.h b/volk/include/volk/volk_32f_s32f_convert_8i_u.h
index 1c6bf87c9..12991e9c1 100644
--- a/volk/include/volk/volk_32f_s32f_convert_8i_u.h
+++ b/volk/include/volk/volk_32f_s32f_convert_8i_u.h
@@ -21,9 +21,16 @@ static inline void volk_32f_s32f_convert_8i_u_sse2(int8_t* outputVector, const f
const float* inputVectorPtr = (const float*)inputVector;
int8_t* outputVectorPtr = outputVector;
+
+ float min_val = -128;
+ float max_val = 127;
+ float r;
+
__m128 vScalar = _mm_set_ps1(scalar);
__m128 inputVal1, inputVal2, inputVal3, inputVal4;
__m128i intInputVal1, intInputVal2, intInputVal3, intInputVal4;
+ __m128 vmin_val = _mm_set_ps1(min_val);
+ __m128 vmax_val = _mm_set_ps1(max_val);
for(;number < sixteenthPoints; number++){
inputVal1 = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4;
@@ -31,10 +38,15 @@ static inline void volk_32f_s32f_convert_8i_u_sse2(int8_t* outputVector, const f
inputVal3 = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4;
inputVal4 = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4;
- intInputVal1 = _mm_cvtps_epi32(_mm_mul_ps(inputVal1, vScalar));
- intInputVal2 = _mm_cvtps_epi32(_mm_mul_ps(inputVal2, vScalar));
- intInputVal3 = _mm_cvtps_epi32(_mm_mul_ps(inputVal3, vScalar));
- intInputVal4 = _mm_cvtps_epi32(_mm_mul_ps(inputVal4, vScalar));
+ inputVal1 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
+ inputVal2 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal2, vScalar), vmax_val), vmin_val);
+ inputVal3 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal3, vScalar), vmax_val), vmin_val);
+ inputVal4 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal4, vScalar), vmax_val), vmin_val);
+
+ intInputVal1 = _mm_cvtps_epi32(inputVal1);
+ intInputVal2 = _mm_cvtps_epi32(inputVal2);
+ intInputVal3 = _mm_cvtps_epi32(inputVal3);
+ intInputVal4 = _mm_cvtps_epi32(inputVal4);
intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2);
intInputVal3 = _mm_packs_epi32(intInputVal3, intInputVal4);
@@ -47,7 +59,12 @@ static inline void volk_32f_s32f_convert_8i_u_sse2(int8_t* outputVector, const f
number = sixteenthPoints * 16;
for(; number < num_points; number++){
- outputVector[number] = (int8_t)(inputVector[number] * scalar);
+ r = inputVector[number] * scalar;
+ if(r > max_val)
+ r = max_val;
+ else if(r < min_val)
+ r = min_val;
+ outputVector[number] = (int16_t)(r);
}
}
#endif /* LV_HAVE_SSE2 */
@@ -69,8 +86,15 @@ static inline void volk_32f_s32f_convert_8i_u_sse(int8_t* outputVector, const fl
const float* inputVectorPtr = (const float*)inputVector;
int8_t* outputVectorPtr = outputVector;
+
+ float min_val = -128;
+ float max_val = 127;
+ float r;
+
__m128 vScalar = _mm_set_ps1(scalar);
__m128 ret;
+ __m128 vmin_val = _mm_set_ps1(min_val);
+ __m128 vmax_val = _mm_set_ps1(max_val);
__VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4];
@@ -78,7 +102,7 @@ static inline void volk_32f_s32f_convert_8i_u_sse(int8_t* outputVector, const fl
ret = _mm_loadu_ps(inputVectorPtr);
inputVectorPtr += 4;
- ret = _mm_mul_ps(ret, vScalar);
+ ret = _mm_max_ps(_mm_min_ps(_mm_mul_ps(ret, vScalar), vmax_val), vmin_val);
_mm_store_ps(outputFloatBuffer, ret);
*outputVectorPtr++ = (int8_t)(outputFloatBuffer[0]);
@@ -89,7 +113,12 @@ static inline void volk_32f_s32f_convert_8i_u_sse(int8_t* outputVector, const fl
number = quarterPoints * 4;
for(; number < num_points; number++){
- outputVector[number] = (int8_t)(inputVector[number] * scalar);
+ r = inputVector[number] * scalar;
+ if(r > max_val)
+ r = max_val;
+ else if(r < min_val)
+ r = min_val;
+ outputVector[number] = (int16_t)(r);
}
}
#endif /* LV_HAVE_SSE */
@@ -107,9 +136,17 @@ static inline void volk_32f_s32f_convert_8i_u_generic(int8_t* outputVector, cons
int8_t* outputVectorPtr = outputVector;
const float* inputVectorPtr = inputVector;
unsigned int number = 0;
+ float min_val = -128;
+ float max_val = 127;
+ float r;
for(number = 0; number < num_points; number++){
- *outputVectorPtr++ = ((int8_t)(*inputVectorPtr++ * scalar));
+ r = *inputVectorPtr++ * scalar;
+ if(r > max_val)
+ r = max_val;
+ else if(r < min_val)
+ r = min_val;
+ *outputVectorPtr++ = (int16_t)(r);
}
}
#endif /* LV_HAVE_GENERIC */
diff --git a/volk/include/volk/volk_32f_s32f_multiply_32f_a.h b/volk/include/volk/volk_32f_s32f_multiply_32f_a.h
index 37223dc81..d1c6f3f65 100644
--- a/volk/include/volk/volk_32f_s32f_multiply_32f_a.h
+++ b/volk/include/volk/volk_32f_s32f_multiply_32f_a.h
@@ -4,6 +4,81 @@
#include <inttypes.h>
#include <stdio.h>
+#ifdef LV_HAVE_SSE
+#include <xmmintrin.h>
+/*!
+ \brief Scalar float multiply
+ \param cVector The vector where the results will be stored
+ \param aVector One of the vectors to be multiplied
+ \param scalar the scalar value
+ \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
+*/
+static inline void volk_32f_s32f_multiply_32f_a_sse(float* cVector, const float* aVector, const float scalar, unsigned int num_points){
+ unsigned int number = 0;
+ const unsigned int quarterPoints = num_points / 4;
+
+ float* cPtr = cVector;
+ const float* aPtr = aVector;
+
+ __m128 aVal, bVal, cVal;
+ bVal = _mm_set_ps1(scalar);
+ for(;number < quarterPoints; number++){
+
+ aVal = _mm_load_ps(aPtr);
+
+ cVal = _mm_mul_ps(aVal, bVal);
+
+ _mm_store_ps(cPtr,cVal); // Store the results back into the C container
+
+ aPtr += 4;
+ cPtr += 4;
+ }
+
+ number = quarterPoints * 4;
+ for(;number < num_points; number++){
+ *cPtr++ = (*aPtr++) * scalar;
+ }
+}
+#endif /* LV_HAVE_SSE */
+
+#ifdef LV_HAVE_AVX
+#include <immintrin.h>
+/*!
+ \brief Scalar float multiply
+ \param cVector The vector where the results will be stored
+ \param aVector One of the vectors to be multiplied
+ \param scalar the scalar value
+ \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
+*/
+static inline void volk_32f_s32f_multiply_32f_a_avx(float* cVector, const float* aVector, const float scalar, unsigned int num_points){
+ unsigned int number = 0;
+ const unsigned int eighthPoints = num_points / 8;
+
+ float* cPtr = cVector;
+ const float* aPtr = aVector;
+
+ __m256 aVal, bVal, cVal;
+ bVal = _mm256_set1_ps(scalar);
+ for(;number < eighthPoints; number++){
+
+ aVal = _mm256_load_ps(aPtr);
+
+ cVal = _mm256_mul_ps(aVal, bVal);
+
+ _mm256_store_ps(cPtr,cVal); // Store the results back into the C container
+
+ aPtr += 8;
+ cPtr += 8;
+ }
+
+ number = eighthPoints * 8;
+ for(;number < num_points; number++){
+ *cPtr++ = (*aPtr++) * scalar;
+ }
+}
+#endif /* LV_HAVE_AVX */
+
+
#ifdef LV_HAVE_GENERIC
/*!
\brief Scalar float multiply
diff --git a/volk/include/volk/volk_32f_s32f_multiply_32f_u.h b/volk/include/volk/volk_32f_s32f_multiply_32f_u.h
new file mode 100644
index 000000000..0e700060f
--- /dev/null
+++ b/volk/include/volk/volk_32f_s32f_multiply_32f_u.h
@@ -0,0 +1,102 @@
+#ifndef INCLUDED_volk_32f_s32f_multiply_32f_u_H
+#define INCLUDED_volk_32f_s32f_multiply_32f_u_H
+
+#include <inttypes.h>
+#include <stdio.h>
+
+#ifdef LV_HAVE_SSE
+#include <xmmintrin.h>
+/*!
+ \brief Scalar float multiply
+ \param cVector The vector where the results will be stored
+ \param aVector One of the vectors to be multiplied
+ \param scalar the scalar value
+ \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
+*/
+static inline void volk_32f_s32f_multiply_32f_u_sse(float* cVector, const float* aVector, const float scalar, unsigned int num_points){
+ unsigned int number = 0;
+ const unsigned int quarterPoints = num_points / 4;
+
+ float* cPtr = cVector;
+ const float* aPtr = aVector;
+
+ __m128 aVal, bVal, cVal;
+ bVal = _mm_set_ps1(scalar);
+ for(;number < quarterPoints; number++){
+
+ aVal = _mm_loadu_ps(aPtr);
+
+ cVal = _mm_mul_ps(aVal, bVal);
+
+ _mm_storeu_ps(cPtr,cVal); // Store the results back into the C container
+
+ aPtr += 4;
+ cPtr += 4;
+ }
+
+ number = quarterPoints * 4;
+ for(;number < num_points; number++){
+ *cPtr++ = (*aPtr++) * scalar;
+ }
+}
+#endif /* LV_HAVE_SSE */
+
+#ifdef LV_HAVE_AVX
+#include <immintrin.h>
+/*!
+ \brief Scalar float multiply
+ \param cVector The vector where the results will be stored
+ \param aVector One of the vectors to be multiplied
+ \param scalar the scalar value
+ \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
+*/
+static inline void volk_32f_s32f_multiply_32f_u_avx(float* cVector, const float* aVector, const float scalar, unsigned int num_points){
+ unsigned int number = 0;
+ const unsigned int eighthPoints = num_points / 8;
+
+ float* cPtr = cVector;
+ const float* aPtr = aVector;
+
+ __m256 aVal, bVal, cVal;
+ bVal = _mm256_set1_ps(scalar);
+ for(;number < eighthPoints; number++){
+
+ aVal = _mm256_loadu_ps(aPtr);
+
+ cVal = _mm256_mul_ps(aVal, bVal);
+
+ _mm256_storeu_ps(cPtr,cVal); // Store the results back into the C container
+
+ aPtr += 8;
+ cPtr += 8;
+ }
+
+ number = eighthPoints * 8;
+ for(;number < num_points; number++){
+ *cPtr++ = (*aPtr++) * scalar;
+ }
+}
+#endif /* LV_HAVE_AVX */
+
+#ifdef LV_HAVE_GENERIC
+/*!
+ \brief Scalar float multiply
+ \param cVector The vector where the results will be stored
+ \param aVector One of the vectors to be multiplied
+ \param scalar the scalar value
+ \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
+*/
+static inline void volk_32f_s32f_multiply_32f_u_generic(float* cVector, const float* aVector, const float scalar, unsigned int num_points){
+ unsigned int number = 0;
+ const float* inputPtr = aVector;
+ float* outputPtr = cVector;
+ for(number = 0; number < num_points; number++){
+ *outputPtr = (*inputPtr) * scalar;
+ inputPtr++;
+ outputPtr++;
+ }
+}
+#endif /* LV_HAVE_GENERIC */
+
+
+#endif /* INCLUDED_volk_32f_s32f_multiply_32f_u_H */
diff --git a/volk/include/volk/volk_32f_x2_add_32f_u.h b/volk/include/volk/volk_32f_x2_add_32f_u.h
new file mode 100644
index 000000000..e360a7958
--- /dev/null
+++ b/volk/include/volk/volk_32f_x2_add_32f_u.h
@@ -0,0 +1,66 @@
+#ifndef INCLUDED_volk_32f_x2_add_32f_u_H
+#define INCLUDED_volk_32f_x2_add_32f_u_H
+
+#include <inttypes.h>
+#include <stdio.h>
+
+#ifdef LV_HAVE_SSE
+#include <xmmintrin.h>
+/*!
+ \brief Adds the two input vectors and store their results in the third vector
+ \param cVector The vector where the results will be stored
+ \param aVector One of the vectors to be added
+ \param bVector One of the vectors to be added
+ \param num_points The number of values in aVector and bVector to be added together and stored into cVector
+*/
+static inline void volk_32f_x2_add_32f_u_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
+ unsigned int number = 0;
+ const unsigned int quarterPoints = num_points / 4;
+
+ float* cPtr = cVector;
+ const float* aPtr = aVector;
+ const float* bPtr= bVector;
+
+ __m128 aVal, bVal, cVal;
+ for(;number < quarterPoints; number++){
+
+ aVal = _mm_loadu_ps(aPtr);
+ bVal = _mm_loadu_ps(bPtr);
+
+ cVal = _mm_add_ps(aVal, bVal);
+
+ _mm_storeu_ps(cPtr,cVal); // Store the results back into the C container
+
+ aPtr += 4;
+ bPtr += 4;
+ cPtr += 4;
+ }
+
+ number = quarterPoints * 4;
+ for(;number < num_points; number++){
+ *cPtr++ = (*aPtr++) + (*bPtr++);
+ }
+}
+#endif /* LV_HAVE_SSE */
+
+#ifdef LV_HAVE_GENERIC
+/*!
+ \brief Adds the two input vectors and store their results in the third vector
+ \param cVector The vector where the results will be stored
+ \param aVector One of the vectors to be added
+ \param bVector One of the vectors to be added
+ \param num_points The number of values in aVector and bVector to be added together and stored into cVector
+*/
+static inline void volk_32f_x2_add_32f_u_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
+ float* cPtr = cVector;
+ const float* aPtr = aVector;
+ const float* bPtr= bVector;
+ unsigned int number = 0;
+
+ for(number = 0; number < num_points; number++){
+ *cPtr++ = (*aPtr++) + (*bPtr++);
+ }
+}
+#endif /* LV_HAVE_GENERIC */
+
+#endif /* INCLUDED_volk_32f_x2_add_32f_u_H */
diff --git a/volk/include/volk/volk_32f_x2_multiply_32f_u.h b/volk/include/volk/volk_32f_x2_multiply_32f_u.h
new file mode 100644
index 000000000..6c3ce5d83
--- /dev/null
+++ b/volk/include/volk/volk_32f_x2_multiply_32f_u.h
@@ -0,0 +1,106 @@
+#ifndef INCLUDED_volk_32f_x2_multiply_32f_u_H
+#define INCLUDED_volk_32f_x2_multiply_32f_u_H
+
+#include <inttypes.h>
+#include <stdio.h>
+
+#ifdef LV_HAVE_SSE
+#include <xmmintrin.h>
+/*!
+ \brief Multiplys the two input vectors and store their results in the third vector
+ \param cVector The vector where the results will be stored
+ \param aVector One of the vectors to be multiplied
+ \param bVector One of the vectors to be multiplied
+ \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
+*/
+static inline void volk_32f_x2_multiply_32f_u_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
+ unsigned int number = 0;
+ const unsigned int quarterPoints = num_points / 4;
+
+ float* cPtr = cVector;
+ const float* aPtr = aVector;
+ const float* bPtr= bVector;
+
+ __m128 aVal, bVal, cVal;
+ for(;number < quarterPoints; number++){
+
+ aVal = _mm_loadu_ps(aPtr);
+ bVal = _mm_loadu_ps(bPtr);
+
+ cVal = _mm_mul_ps(aVal, bVal);
+
+ _mm_storeu_ps(cPtr,cVal); // Store the results back into the C container
+
+ aPtr += 4;
+ bPtr += 4;
+ cPtr += 4;
+ }
+
+ number = quarterPoints * 4;
+ for(;number < num_points; number++){
+ *cPtr++ = (*aPtr++) * (*bPtr++);
+ }
+}
+#endif /* LV_HAVE_SSE */
+
+#ifdef LV_HAVE_AVX
+#include <immintrin.h>
+/*!
+ \brief Multiplies the two input vectors and store their results in the third vector
+ \param cVector The vector where the results will be stored
+ \param aVector One of the vectors to be multiplied
+ \param bVector One of the vectors to be multiplied
+ \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
+*/
+static inline void volk_32f_x2_multiply_32f_u_avx(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
+ unsigned int number = 0;
+ const unsigned int eighthPoints = num_points / 8;
+
+ float* cPtr = cVector;
+ const float* aPtr = aVector;
+ const float* bPtr= bVector;
+
+ __m256 aVal, bVal, cVal;
+ for(;number < eighthPoints; number++){
+
+ aVal = _mm256_loadu_ps(aPtr);
+ bVal = _mm256_loadu_ps(bPtr);
+
+ cVal = _mm256_mul_ps(aVal, bVal);
+
+ _mm256_storeu_ps(cPtr,cVal); // Store the results back into the C container
+
+ aPtr += 8;
+ bPtr += 8;
+ cPtr += 8;
+ }
+
+ number = eighthPoints * 8;
+ for(;number < num_points; number++){
+ *cPtr++ = (*aPtr++) * (*bPtr++);
+ }
+}
+#endif /* LV_HAVE_AVX */
+
+#ifdef LV_HAVE_GENERIC
+/*!
+ \brief Multiplys the two input vectors and store their results in the third vector
+ \param cVector The vector where the results will be stored
+ \param aVector One of the vectors to be multiplied
+ \param bVector One of the vectors to be multiplied
+ \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
+*/
+static inline void volk_32f_x2_multiply_32f_u_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
+ float* cPtr = cVector;
+ const float* aPtr = aVector;
+ const float* bPtr= bVector;
+ unsigned int number = 0;
+
+ for(number = 0; number < num_points; number++){
+ *cPtr++ = (*aPtr++) * (*bPtr++);
+ }
+}
+#endif /* LV_HAVE_GENERIC */
+
+
+#endif /* INCLUDED_volk_32f_x2_multiply_32f_u_H */
diff --git a/volk/include/volk/volk_32fc_conjugate_32fc_a.h b/volk/include/volk/volk_32fc_conjugate_32fc_a.h
new file mode 100644
index 000000000..1518af9be
--- /dev/null
+++ b/volk/include/volk/volk_32fc_conjugate_32fc_a.h
@@ -0,0 +1,64 @@
+#ifndef INCLUDED_volk_32fc_conjugate_32fc_a_H
+#define INCLUDED_volk_32fc_conjugate_32fc_a_H
+
+#include <inttypes.h>
+#include <stdio.h>
+#include <volk/volk_complex.h>
+#include <float.h>
+
+#ifdef LV_HAVE_SSE3
+#include <pmmintrin.h>
+ /*!
+ \brief Takes the conjugate of a complex vector.
+ \param cVector The vector where the results will be stored
+ \param aVector Vector to be conjugated
+ \param num_points The number of complex values in aVector to be conjugated and stored into cVector
+ */
+static inline void volk_32fc_conjugate_32fc_a_sse3(lv_32fc_t* cVector, const lv_32fc_t* aVector, unsigned int num_points){
+ unsigned int number = 0;
+ const unsigned int halfPoints = num_points / 2;
+
+ __m128 x;
+ lv_32fc_t* c = cVector;
+ const lv_32fc_t* a = aVector;
+
+ __m128 conjugator = _mm_setr_ps(0, -0.f, 0, -0.f);
+
+ for(;number < halfPoints; number++){
+
+ x = _mm_load_ps((float*)a); // Load the complex data as ar,ai,br,bi
+
+ x = _mm_xor_ps(x, conjugator); // conjugate register
+
+ _mm_store_ps((float*)c,x); // Store the results back into the C container
+
+ a += 2;
+ c += 2;
+ }
+
+ if((num_points % 2) != 0) {
+ *c = lv_conj(*a);
+ }
+}
+#endif /* LV_HAVE_SSE3 */
+
+#ifdef LV_HAVE_GENERIC
+ /*!
+ \brief Takes the conjugate of a complex vector.
+ \param cVector The vector where the results will be stored
+ \param aVector Vector to be conjugated
+ \param num_points The number of complex values in aVector to be conjugated and stored into cVector
+ */
+static inline void volk_32fc_conjugate_32fc_a_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, unsigned int num_points){
+ lv_32fc_t* cPtr = cVector;
+ const lv_32fc_t* aPtr = aVector;
+ unsigned int number = 0;
+
+ for(number = 0; number < num_points; number++){
+ *cPtr++ = lv_conj(*aPtr++);
+ }
+}
+#endif /* LV_HAVE_GENERIC */
+
+
+#endif /* INCLUDED_volk_32fc_conjugate_32fc_a_H */
diff --git a/volk/include/volk/volk_32fc_conjugate_32fc_u.h b/volk/include/volk/volk_32fc_conjugate_32fc_u.h
new file mode 100644
index 000000000..b26fe0789
--- /dev/null
+++ b/volk/include/volk/volk_32fc_conjugate_32fc_u.h
@@ -0,0 +1,64 @@
+#ifndef INCLUDED_volk_32fc_conjugate_32fc_u_H
+#define INCLUDED_volk_32fc_conjugate_32fc_u_H
+
+#include <inttypes.h>
+#include <stdio.h>
+#include <volk/volk_complex.h>
+#include <float.h>
+
+#ifdef LV_HAVE_SSE3
+#include <pmmintrin.h>
+ /*!
+ \brief Takes the conjugate of a complex vector.
+ \param cVector The vector where the results will be stored
+ \param aVector Vector to be conjugated
+ \param num_points The number of complex values in aVector to be conjugated and stored into cVector
+ */
+static inline void volk_32fc_conjugate_32fc_u_sse3(lv_32fc_t* cVector, const lv_32fc_t* aVector, unsigned int num_points){
+ unsigned int number = 0;
+ const unsigned int halfPoints = num_points / 2;
+
+ __m128 x;
+ lv_32fc_t* c = cVector;
+ const lv_32fc_t* a = aVector;
+
+ __m128 conjugator = _mm_setr_ps(0, -0.f, 0, -0.f);
+
+ for(;number < halfPoints; number++){
+
+ x = _mm_loadu_ps((float*)a); // Load the complex data as ar,ai,br,bi
+
+ x = _mm_xor_ps(x, conjugator); // conjugate register
+
+ _mm_storeu_ps((float*)c,x); // Store the results back into the C container
+
+ a += 2;
+ c += 2;
+ }
+
+ if((num_points % 2) != 0) {
+ *c = lv_conj(*a);
+ }
+}
+#endif /* LV_HAVE_SSE3 */
+
+#ifdef LV_HAVE_GENERIC
+ /*!
+ \brief Takes the conjugate of a complex vector.
+ \param cVector The vector where the results will be stored
+ \param aVector Vector to be conjugated
+ \param num_points The number of complex values in aVector to be conjugated and stored into cVector
+ */
+static inline void volk_32fc_conjugate_32fc_u_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, unsigned int num_points){
+ lv_32fc_t* cPtr = cVector;
+ const lv_32fc_t* aPtr = aVector;
+ unsigned int number = 0;
+
+ for(number = 0; number < num_points; number++){
+ *cPtr++ = lv_conj(*aPtr++);
+ }
+}
+#endif /* LV_HAVE_GENERIC */
+
+
+#endif /* INCLUDED_volk_32fc_conjugate_32fc_u_H */
diff --git a/volk/include/volk/volk_32fc_deinterleave_imag_32f_a.h b/volk/include/volk/volk_32fc_deinterleave_imag_32f_a.h
new file mode 100644
index 000000000..adc4112b9
--- /dev/null
+++ b/volk/include/volk/volk_32fc_deinterleave_imag_32f_a.h
@@ -0,0 +1,68 @@
+#ifndef INCLUDED_volk_32fc_deinterleave_imag_32f_a_H
+#define INCLUDED_volk_32fc_deinterleave_imag_32f_a_H
+
+#include <inttypes.h>
+#include <stdio.h>
+
+#ifdef LV_HAVE_SSE
+#include <xmmintrin.h>
+/*!
+ \brief Deinterleaves the complex vector into Q vector data
+ \param complexVector The complex input vector
+ \param qBuffer The Q buffer output data
+ \param num_points The number of complex data values to be deinterleaved
+*/
+static inline void volk_32fc_deinterleave_imag_32f_a_sse(float* qBuffer, const lv_32fc_t* complexVector, unsigned int num_points){
+ unsigned int number = 0;
+ const unsigned int quarterPoints = num_points / 4;
+
+ const float* complexVectorPtr = (const float*)complexVector;
+ float* qBufferPtr = qBuffer;
+
+ __m128 cplxValue1, cplxValue2, iValue;
+ for(;number < quarterPoints; number++){
+
+ cplxValue1 = _mm_load_ps(complexVectorPtr);
+ complexVectorPtr += 4;
+
+ cplxValue2 = _mm_load_ps(complexVectorPtr);
+ complexVectorPtr += 4;
+
+ // Arrange in q1q2q3q4 format
+ iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3,1,3,1));
+
+ _mm_store_ps(qBufferPtr, iValue);
+
+ qBufferPtr += 4;
+ }
+
+ number = quarterPoints * 4;
+ for(; number < num_points; number++){
+ complexVectorPtr++;
+ *qBufferPtr++ = *complexVectorPtr++;
+ }
+}
+#endif /* LV_HAVE_SSE */
+
+#ifdef LV_HAVE_GENERIC
+/*!
+ \brief Deinterleaves the complex vector into Q vector data
+ \param complexVector The complex input vector
+ \param qBuffer The I buffer output data
+ \param num_points The number of complex data values to be deinterleaved
+*/
+static inline void volk_32fc_deinterleave_imag_32f_a_generic(float* qBuffer, const lv_32fc_t* complexVector, unsigned int num_points){
+ unsigned int number = 0;
+ const float* complexVectorPtr = (float*)complexVector;
+ float* qBufferPtr = qBuffer;
+ for(number = 0; number < num_points; number++){
+ complexVectorPtr++;
+ *qBufferPtr++ = *complexVectorPtr++;
+ }
+}
+#endif /* LV_HAVE_GENERIC */
+
+
+
+
+#endif /* INCLUDED_volk_32fc_deinterleave_imag_32f_a_H */
diff --git a/volk/include/volk/volk_32fc_magnitude_32f_u.h b/volk/include/volk/volk_32fc_magnitude_32f_u.h
new file mode 100644
index 000000000..ed1cedef9
--- /dev/null
+++ b/volk/include/volk/volk_32fc_magnitude_32f_u.h
@@ -0,0 +1,118 @@
+#ifndef INCLUDED_volk_32fc_magnitude_32f_u_H
+#define INCLUDED_volk_32fc_magnitude_32f_u_H
+
+#include <inttypes.h>
+#include <stdio.h>
+#include <math.h>
+
+#ifdef LV_HAVE_SSE3
+#include <pmmintrin.h>
+ /*!
+ \brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector
+ \param complexVector The vector containing the complex input values
+ \param magnitudeVector The vector containing the real output values
+ \param num_points The number of complex values in complexVector to be calculated and stored into cVector
+ */
+static inline void volk_32fc_magnitude_32f_u_sse3(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
+ unsigned int number = 0;
+ const unsigned int quarterPoints = num_points / 4;
+
+ const float* complexVectorPtr = (float*)complexVector;
+ float* magnitudeVectorPtr = magnitudeVector;
+
+ __m128 cplxValue1, cplxValue2, result;
+ for(;number < quarterPoints; number++){
+ cplxValue1 = _mm_loadu_ps(complexVectorPtr);
+ complexVectorPtr += 4;
+
+ cplxValue2 = _mm_loadu_ps(complexVectorPtr);
+ complexVectorPtr += 4;
+
+ cplxValue1 = _mm_mul_ps(cplxValue1, cplxValue1); // Square the values
+ cplxValue2 = _mm_mul_ps(cplxValue2, cplxValue2); // Square the Values
+
+ result = _mm_hadd_ps(cplxValue1, cplxValue2); // Add the I2 and Q2 values
+
+ result = _mm_sqrt_ps(result);
+
+ _mm_storeu_ps(magnitudeVectorPtr, result);
+ magnitudeVectorPtr += 4;
+ }
+
+ number = quarterPoints * 4;
+ for(; number < num_points; number++){
+ float val1Real = *complexVectorPtr++;
+ float val1Imag = *complexVectorPtr++;
+ *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
+ }
+}
+#endif /* LV_HAVE_SSE3 */
+
+#ifdef LV_HAVE_SSE
+#include <xmmintrin.h>
+ /*!
+ \brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector
+ \param complexVector The vector containing the complex input values
+ \param magnitudeVector The vector containing the real output values
+ \param num_points The number of complex values in complexVector to be calculated and stored into cVector
+ */
+static inline void volk_32fc_magnitude_32f_u_sse(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
+ unsigned int number = 0;
+ const unsigned int quarterPoints = num_points / 4;
+
+ const float* complexVectorPtr = (float*)complexVector;
+ float* magnitudeVectorPtr = magnitudeVector;
+
+ __m128 cplxValue1, cplxValue2, iValue, qValue, result;
+ for(;number < quarterPoints; number++){
+ cplxValue1 = _mm_loadu_ps(complexVectorPtr);
+ complexVectorPtr += 4;
+
+ cplxValue2 = _mm_loadu_ps(complexVectorPtr);
+ complexVectorPtr += 4;
+
+ // Arrange in i1i2i3i4 format
+ iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2,0,2,0));
+ // Arrange in q1q2q3q4 format
+ qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3,1,3,1));
+
+ iValue = _mm_mul_ps(iValue, iValue); // Square the I values
+ qValue = _mm_mul_ps(qValue, qValue); // Square the Q Values
+
+ result = _mm_add_ps(iValue, qValue); // Add the I2 and Q2 values
+
+ result = _mm_sqrt_ps(result);
+
+ _mm_storeu_ps(magnitudeVectorPtr, result);
+ magnitudeVectorPtr += 4;
+ }
+
+ number = quarterPoints * 4;
+ for(; number < num_points; number++){
+ float val1Real = *complexVectorPtr++;
+ float val1Imag = *complexVectorPtr++;
+ *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
+ }
+}
+#endif /* LV_HAVE_SSE */
+
+#ifdef LV_HAVE_GENERIC
+ /*!
+ \brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector
+ \param complexVector The vector containing the complex input values
+ \param magnitudeVector The vector containing the real output values
+ \param num_points The number of complex values in complexVector to be calculated and stored into cVector
+ */
+static inline void volk_32fc_magnitude_32f_u_generic(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
+ const float* complexVectorPtr = (float*)complexVector;
+ float* magnitudeVectorPtr = magnitudeVector;
+ unsigned int number = 0;
+ for(number = 0; number < num_points; number++){
+ const float real = *complexVectorPtr++;
+ const float imag = *complexVectorPtr++;
+ *magnitudeVectorPtr++ = sqrtf((real*real) + (imag*imag));
+ }
+}
+#endif /* LV_HAVE_GENERIC */
+
+#endif /* INCLUDED_volk_32fc_magnitude_32f_u_H */
diff --git a/volk/include/volk/volk_32fc_magnitude_squared_32f_a.h b/volk/include/volk/volk_32fc_magnitude_squared_32f_a.h
new file mode 100644
index 000000000..00bdefbb5
--- /dev/null
+++ b/volk/include/volk/volk_32fc_magnitude_squared_32f_a.h
@@ -0,0 +1,114 @@
+#ifndef INCLUDED_volk_32fc_magnitude_squared_32f_a_H
+#define INCLUDED_volk_32fc_magnitude_squared_32f_a_H
+
+#include <inttypes.h>
+#include <stdio.h>
+#include <math.h>
+
+#ifdef LV_HAVE_SSE3
+#include <pmmintrin.h>
+ /*!
+ \brief Calculates the magnitude squared of the complexVector and stores the results in the magnitudeVector
+ \param complexVector The vector containing the complex input values
+ \param magnitudeVector The vector containing the real output values
+ \param num_points The number of complex values in complexVector to be calculated and stored into cVector
+ */
+static inline void volk_32fc_magnitude_squared_32f_a_sse3(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
+ unsigned int number = 0;
+ const unsigned int quarterPoints = num_points / 4;
+
+ const float* complexVectorPtr = (float*)complexVector;
+ float* magnitudeVectorPtr = magnitudeVector;
+
+ __m128 cplxValue1, cplxValue2, result;
+ for(;number < quarterPoints; number++){
+ cplxValue1 = _mm_load_ps(complexVectorPtr);
+ complexVectorPtr += 4;
+
+ cplxValue2 = _mm_load_ps(complexVectorPtr);
+ complexVectorPtr += 4;
+
+ cplxValue1 = _mm_mul_ps(cplxValue1, cplxValue1); // Square the values
+ cplxValue2 = _mm_mul_ps(cplxValue2, cplxValue2); // Square the Values
+
+ result = _mm_hadd_ps(cplxValue1, cplxValue2); // Add the I2 and Q2 values
+
+ _mm_store_ps(magnitudeVectorPtr, result);
+ magnitudeVectorPtr += 4;
+ }
+
+ number = quarterPoints * 4;
+ for(; number < num_points; number++){
+ float val1Real = *complexVectorPtr++;
+ float val1Imag = *complexVectorPtr++;
+ *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
+ }
+}
+#endif /* LV_HAVE_SSE3 */
+
+#ifdef LV_HAVE_SSE
+#include <xmmintrin.h>
+ /*!
+ \brief Calculates the magnitude squared of the complexVector and stores the results in the magnitudeVector
+ \param complexVector The vector containing the complex input values
+ \param magnitudeVector The vector containing the real output values
+ \param num_points The number of complex values in complexVector to be calculated and stored into cVector
+ */
+static inline void volk_32fc_magnitude_squared_32f_a_sse(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
+ unsigned int number = 0;
+ const unsigned int quarterPoints = num_points / 4;
+
+ const float* complexVectorPtr = (float*)complexVector;
+ float* magnitudeVectorPtr = magnitudeVector;
+
+ __m128 cplxValue1, cplxValue2, iValue, qValue, result;
+ for(;number < quarterPoints; number++){
+ cplxValue1 = _mm_load_ps(complexVectorPtr);
+ complexVectorPtr += 4;
+
+ cplxValue2 = _mm_load_ps(complexVectorPtr);
+ complexVectorPtr += 4;
+
+ // Arrange in i1i2i3i4 format
+ iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2,0,2,0));
+ // Arrange in q1q2q3q4 format
+ qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3,1,3,1));
+
+ iValue = _mm_mul_ps(iValue, iValue); // Square the I values
+ qValue = _mm_mul_ps(qValue, qValue); // Square the Q Values
+
+ result = _mm_add_ps(iValue, qValue); // Add the I2 and Q2 values
+
+ _mm_store_ps(magnitudeVectorPtr, result);
+ magnitudeVectorPtr += 4;
+ }
+
+ number = quarterPoints * 4;
+ for(; number < num_points; number++){
+ float val1Real = *complexVectorPtr++;
+ float val1Imag = *complexVectorPtr++;
+ *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
+ }
+}
+#endif /* LV_HAVE_SSE */
+
+#ifdef LV_HAVE_GENERIC
+ /*!
+ \brief Calculates the magnitude squared of the complexVector and stores the results in the magnitudeVector
+ \param complexVector The vector containing the complex input values
+ \param magnitudeVector The vector containing the real output values
+ \param num_points The number of complex values in complexVector to be calculated and stored into cVector
+ */
+static inline void volk_32fc_magnitude_squared_32f_a_generic(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
+ const float* complexVectorPtr = (float*)complexVector;
+ float* magnitudeVectorPtr = magnitudeVector;
+ unsigned int number = 0;
+ for(number = 0; number < num_points; number++){
+ const float real = *complexVectorPtr++;
+ const float imag = *complexVectorPtr++;
+ *magnitudeVectorPtr++ = (real*real) + (imag*imag);
+ }
+}
+#endif /* LV_HAVE_GENERIC */
+
+#endif /* INCLUDED_volk_32fc_magnitude_32f_a_H */
diff --git a/volk/include/volk/volk_32fc_magnitude_squared_32f_u.h b/volk/include/volk/volk_32fc_magnitude_squared_32f_u.h
new file mode 100644
index 000000000..6eb4a523a
--- /dev/null
+++ b/volk/include/volk/volk_32fc_magnitude_squared_32f_u.h
@@ -0,0 +1,114 @@
+#ifndef INCLUDED_volk_32fc_magnitude_squared_32f_u_H
+#define INCLUDED_volk_32fc_magnitude_squared_32f_u_H
+
+#include <inttypes.h>
+#include <stdio.h>
+#include <math.h>
+
+#ifdef LV_HAVE_SSE3
+#include <pmmintrin.h>
+ /*!
+ \brief Calculates the magnitude squared of the complexVector and stores the results in the magnitudeVector
+ \param complexVector The vector containing the complex input values
+ \param magnitudeVector The vector containing the real output values
+ \param num_points The number of complex values in complexVector to be calculated and stored into cVector
+ */
+static inline void volk_32fc_magnitude_squared_32f_u_sse3(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
+ unsigned int number = 0;
+ const unsigned int quarterPoints = num_points / 4;
+
+ const float* complexVectorPtr = (float*)complexVector;
+ float* magnitudeVectorPtr = magnitudeVector;
+
+ __m128 cplxValue1, cplxValue2, result;
+ for(;number < quarterPoints; number++){
+ cplxValue1 = _mm_loadu_ps(complexVectorPtr);
+ complexVectorPtr += 4;
+
+ cplxValue2 = _mm_loadu_ps(complexVectorPtr);
+ complexVectorPtr += 4;
+
+ cplxValue1 = _mm_mul_ps(cplxValue1, cplxValue1); // Square the values
+ cplxValue2 = _mm_mul_ps(cplxValue2, cplxValue2); // Square the Values
+
+ result = _mm_hadd_ps(cplxValue1, cplxValue2); // Add the I2 and Q2 values
+
+ _mm_storeu_ps(magnitudeVectorPtr, result);
+ magnitudeVectorPtr += 4;
+ }
+
+ number = quarterPoints * 4;
+ for(; number < num_points; number++){
+ float val1Real = *complexVectorPtr++;
+ float val1Imag = *complexVectorPtr++;
+ *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
+ }
+}
+#endif /* LV_HAVE_SSE3 */
+
+#ifdef LV_HAVE_SSE
+#include <xmmintrin.h>
+ /*!
+ \brief Calculates the magnitude squared of the complexVector and stores the results in the magnitudeVector
+ \param complexVector The vector containing the complex input values
+ \param magnitudeVector The vector containing the real output values
+ \param num_points The number of complex values in complexVector to be calculated and stored into cVector
+ */
+static inline void volk_32fc_magnitude_squared_32f_u_sse(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
+ unsigned int number = 0;
+ const unsigned int quarterPoints = num_points / 4;
+
+ const float* complexVectorPtr = (float*)complexVector;
+ float* magnitudeVectorPtr = magnitudeVector;
+
+ __m128 cplxValue1, cplxValue2, iValue, qValue, result;
+ for(;number < quarterPoints; number++){
+ cplxValue1 = _mm_loadu_ps(complexVectorPtr);
+ complexVectorPtr += 4;
+
+ cplxValue2 = _mm_loadu_ps(complexVectorPtr);
+ complexVectorPtr += 4;
+
+ // Arrange in i1i2i3i4 format
+ iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2,0,2,0));
+ // Arrange in q1q2q3q4 format
+ qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3,1,3,1));
+
+ iValue = _mm_mul_ps(iValue, iValue); // Square the I values
+ qValue = _mm_mul_ps(qValue, qValue); // Square the Q Values
+
+ result = _mm_add_ps(iValue, qValue); // Add the I2 and Q2 values
+
+ _mm_storeu_ps(magnitudeVectorPtr, result);
+ magnitudeVectorPtr += 4;
+ }
+
+ number = quarterPoints * 4;
+ for(; number < num_points; number++){
+ float val1Real = *complexVectorPtr++;
+ float val1Imag = *complexVectorPtr++;
+ *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
+ }
+}
+#endif /* LV_HAVE_SSE */
+
+#ifdef LV_HAVE_GENERIC
+ /*!
+ \brief Calculates the magnitude squared of the complexVector and stores the results in the magnitudeVector
+ \param complexVector The vector containing the complex input values
+ \param magnitudeVector The vector containing the real output values
+ \param num_points The number of complex values in complexVector to be calculated and stored into cVector
+ */
+static inline void volk_32fc_magnitude_squared_32f_u_generic(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
+ const float* complexVectorPtr = (float*)complexVector;
+ float* magnitudeVectorPtr = magnitudeVector;
+ unsigned int number = 0;
+ for(number = 0; number < num_points; number++){
+ const float real = *complexVectorPtr++;
+ const float imag = *complexVectorPtr++;
+ *magnitudeVectorPtr++ = (real*real) + (imag*imag);
+ }
+}
+#endif /* LV_HAVE_GENERIC */
+
+#endif /* INCLUDED_volk_32fc_magnitude_32f_u_H */
diff --git a/volk/include/volk/volk_32fc_s32fc_multiply_32fc_a.h b/volk/include/volk/volk_32fc_s32fc_multiply_32fc_a.h
index b27a7259f..534dc2a25 100644
--- a/volk/include/volk/volk_32fc_s32fc_multiply_32fc_a.h
+++ b/volk/include/volk/volk_32fc_s32fc_multiply_32fc_a.h
@@ -6,7 +6,8 @@
#include <volk/volk_complex.h>
#include <float.h>
-#ifdef LV_HAVE_GENERIC
+#ifdef LV_HAVE_SSE3
+#include <pmmintrin.h>
/*!
\brief Multiplies the two input complex vectors and stores their results in the third vector
\param cVector The vector where the results will be stored
@@ -14,18 +15,44 @@
\param bVector One of the vectors to be multiplied
\param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
*/
-static inline void volk_32fc_s32fc_multiply_32fc_a_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t scalar, unsigned int num_points){
- lv_32fc_t* cPtr = cVector;
- const lv_32fc_t* aPtr = aVector;
- unsigned int number = 0;
+static inline void volk_32fc_s32fc_multiply_32fc_a_sse3(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t scalar, unsigned int num_points){
+ unsigned int number = 0;
+ const unsigned int halfPoints = num_points / 2;
- for(number = 0; number < num_points; number++){
- *cPtr++ = (*aPtr++) * scalar;
+ __m128 x, yl, yh, z, tmp1, tmp2;
+ lv_32fc_t* c = cVector;
+ const lv_32fc_t* a = aVector;
+
+ // Set up constant scalar vector
+ yl = _mm_set_ps1(lv_creal(scalar));
+ yh = _mm_set_ps1(lv_cimag(scalar));
+
+ for(;number < halfPoints; number++){
+
+ x = _mm_load_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi
+
+ tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
+
+ x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
+
+ tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
+
+ z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
+
+ _mm_store_ps((float*)c,z); // Store the results back into the C container
+
+ a += 2;
+ c += 2;
+ }
+
+ if((num_points % 2) != 0) {
+ *c = (*a) * scalar;
}
}
-#endif /* LV_HAVE_GENERIC */
+#endif /* LV_HAVE_SSE */
-#ifdef LV_HAVE_ORC
+
+#ifdef LV_HAVE_GENERIC
/*!
\brief Multiplies the two input complex vectors and stores their results in the third vector
\param cVector The vector where the results will be stored
@@ -33,11 +60,29 @@ static inline void volk_32fc_s32fc_multiply_32fc_a_generic(lv_32fc_t* cVector, c
\param bVector One of the vectors to be multiplied
\param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
*/
-extern void volk_32fc_s32fc_multiply_32fc_a_orc_impl(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t scalar, unsigned int num_points);
-static inline void volk_32fc_s32fc_multiply_32fc_a_orc(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t scalar, unsigned int num_points){
- volk_32fc_s32fc_multiply_32fc_a_orc_impl(cVector, aVector, scalar, num_points);
+static inline void volk_32fc_s32fc_multiply_32fc_a_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t scalar, unsigned int num_points){
+ lv_32fc_t* cPtr = cVector;
+ const lv_32fc_t* aPtr = aVector;
+ unsigned int number = num_points;
+
+ // unwrap loop
+ while (number >= 8){
+ *cPtr++ = (*aPtr++) * scalar;
+ *cPtr++ = (*aPtr++) * scalar;
+ *cPtr++ = (*aPtr++) * scalar;
+ *cPtr++ = (*aPtr++) * scalar;
+ *cPtr++ = (*aPtr++) * scalar;
+ *cPtr++ = (*aPtr++) * scalar;
+ *cPtr++ = (*aPtr++) * scalar;
+ *cPtr++ = (*aPtr++) * scalar;
+ number -= 8;
+ }
+
+ // clean up any remaining
+ while (number-- > 0)
+ *cPtr++ = *aPtr++ * scalar;
}
-#endif /* LV_HAVE_ORC */
+#endif /* LV_HAVE_GENERIC */
diff --git a/volk/include/volk/volk_32fc_s32fc_multiply_32fc_u.h b/volk/include/volk/volk_32fc_s32fc_multiply_32fc_u.h
new file mode 100644
index 000000000..218c450f8
--- /dev/null
+++ b/volk/include/volk/volk_32fc_s32fc_multiply_32fc_u.h
@@ -0,0 +1,87 @@
+#ifndef INCLUDED_volk_32fc_s32fc_multiply_32fc_u_H
+#define INCLUDED_volk_32fc_s32fc_multiply_32fc_u_H
+
+#include <inttypes.h>
+#include <stdio.h>
+#include <volk/volk_complex.h>
+#include <float.h>
+
+#ifdef LV_HAVE_SSE3
+#include <pmmintrin.h>
+/*!
+ \brief Multiplies the input vector by a scalar and stores the results in the third vector
+ \param cVector The vector where the results will be stored
+ \param aVector The vector to be multiplied
+ \param scalar The complex scalar to multiply aVector
+ \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
+*/
+static inline void volk_32fc_s32fc_multiply_32fc_u_sse3(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t scalar, unsigned int num_points){
+ unsigned int number = 0;
+ const unsigned int halfPoints = num_points / 2;
+
+ __m128 x, yl, yh, z, tmp1, tmp2;
+ lv_32fc_t* c = cVector;
+ const lv_32fc_t* a = aVector;
+
+ // Set up constant scalar vector
+ yl = _mm_set_ps1(lv_creal(scalar));
+ yh = _mm_set_ps1(lv_cimag(scalar));
+
+ for(;number < halfPoints; number++){
+
+ x = _mm_loadu_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi
+
+ tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
+
+ x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
+
+ tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
+
+ z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
+
+ _mm_storeu_ps((float*)c,z); // Store the results back into the C container
+
+ a += 2;
+ c += 2;
+ }
+
+ if((num_points % 2) != 0) {
+ *c = (*a) * scalar;
+ }
+}
+#endif /* LV_HAVE_SSE */
+
+#ifdef LV_HAVE_GENERIC
+/*!
+ \brief Multiplies the input vector by a scalar and stores the results in the third vector
+ \param cVector The vector where the results will be stored
+ \param aVector The vector to be multiplied
+ \param scalar The complex scalar to multiply aVector
+ \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
+*/
+static inline void volk_32fc_s32fc_multiply_32fc_u_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t scalar, unsigned int num_points){
+ lv_32fc_t* cPtr = cVector;
+ const lv_32fc_t* aPtr = aVector;
+ unsigned int number = num_points;
+
+ // unwrap loop
+ while (number >= 8){
+ *cPtr++ = (*aPtr++) * scalar;
+ *cPtr++ = (*aPtr++) * scalar;
+ *cPtr++ = (*aPtr++) * scalar;
+ *cPtr++ = (*aPtr++) * scalar;
+ *cPtr++ = (*aPtr++) * scalar;
+ *cPtr++ = (*aPtr++) * scalar;
+ *cPtr++ = (*aPtr++) * scalar;
+ *cPtr++ = (*aPtr++) * scalar;
+ number -= 8;
+ }
+
+ // clean up any remaining
+ while (number-- > 0)
+ *cPtr++ = *aPtr++ * scalar;
+}
+#endif /* LV_HAVE_GENERIC */
+
+
+#endif /* INCLUDED_volk_32fc_x2_multiply_32fc_u_H */
diff --git a/volk/include/volk/volk_32fc_x2_multiply_32fc_a.h b/volk/include/volk/volk_32fc_x2_multiply_32fc_a.h
index 18dd092e8..aec8bd716 100644
--- a/volk/include/volk/volk_32fc_x2_multiply_32fc_a.h
+++ b/volk/include/volk/volk_32fc_x2_multiply_32fc_a.h
@@ -23,7 +23,6 @@ static inline void volk_32fc_x2_multiply_32fc_a_sse3(lv_32fc_t* cVector, const l
lv_32fc_t* c = cVector;
const lv_32fc_t* a = aVector;
const lv_32fc_t* b = bVector;
-
for(;number < halfPoints; number++){
x = _mm_load_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi
diff --git a/volk/include/volk/volk_32fc_x2_multiply_32fc_u.h b/volk/include/volk/volk_32fc_x2_multiply_32fc_u.h
new file mode 100644
index 000000000..729c1a4ad
--- /dev/null
+++ b/volk/include/volk/volk_32fc_x2_multiply_32fc_u.h
@@ -0,0 +1,77 @@
+#ifndef INCLUDED_volk_32fc_x2_multiply_32fc_u_H
+#define INCLUDED_volk_32fc_x2_multiply_32fc_u_H
+
+#include <inttypes.h>
+#include <stdio.h>
+#include <volk/volk_complex.h>
+#include <float.h>
+
+#ifdef LV_HAVE_SSE3
+#include <pmmintrin.h>
+ /*!
+ \brief Multiplies the two input complex vectors and stores their results in the third vector
+ \param cVector The vector where the results will be stored
+ \param aVector One of the vectors to be multiplied
+ \param bVector One of the vectors to be multiplied
+ \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
+ */
+static inline void volk_32fc_x2_multiply_32fc_u_sse3(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){
+ unsigned int number = 0;
+ const unsigned int halfPoints = num_points / 2;
+
+ __m128 x, y, yl, yh, z, tmp1, tmp2;
+ lv_32fc_t* c = cVector;
+ const lv_32fc_t* a = aVector;
+ const lv_32fc_t* b = bVector;
+
+ for(;number < halfPoints; number++){
+
+ x = _mm_loadu_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi
+ y = _mm_loadu_ps((float*)b); // Load the cr + ci, dr + di as cr,ci,dr,di
+
+ yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
+ yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
+
+ tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
+
+ x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
+
+ tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
+
+ z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
+
+ _mm_storeu_ps((float*)c,z); // Store the results back into the C container
+
+ a += 2;
+ b += 2;
+ c += 2;
+ }
+
+ if((num_points % 2) != 0) {
+ *c = (*a) * (*b);
+ }
+}
+#endif /* LV_HAVE_SSE */
+
+#ifdef LV_HAVE_GENERIC
+ /*!
+ \brief Multiplies the two input complex vectors and stores their results in the third vector
+ \param cVector The vector where the results will be stored
+ \param aVector One of the vectors to be multiplied
+ \param bVector One of the vectors to be multiplied
+ \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
+ */
+static inline void volk_32fc_x2_multiply_32fc_u_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){
+ lv_32fc_t* cPtr = cVector;
+ const lv_32fc_t* aPtr = aVector;
+ const lv_32fc_t* bPtr= bVector;
+ unsigned int number = 0;
+
+ for(number = 0; number < num_points; number++){
+ *cPtr++ = (*aPtr++) * (*bPtr++);
+ }
+}
+#endif /* LV_HAVE_GENERIC */
+
+
+#endif /* INCLUDED_volk_32fc_x2_multiply_32fc_u_H */
diff --git a/volk/include/volk/volk_32fc_x2_multiply_conjugate_32fc_a.h b/volk/include/volk/volk_32fc_x2_multiply_conjugate_32fc_a.h
new file mode 100644
index 000000000..2a1bcbce0
--- /dev/null
+++ b/volk/include/volk/volk_32fc_x2_multiply_conjugate_32fc_a.h
@@ -0,0 +1,81 @@
+#ifndef INCLUDED_volk_32fc_x2_multiply_conjugate_32fc_a_H
+#define INCLUDED_volk_32fc_x2_multiply_conjugate_32fc_a_H
+
+#include <inttypes.h>
+#include <stdio.h>
+#include <volk/volk_complex.h>
+#include <float.h>
+
+#ifdef LV_HAVE_SSE3
+#include <pmmintrin.h>
+ /*!
+ \brief Multiplies vector a by the conjugate of vector b and stores the results in the third vector
+ \param cVector The vector where the results will be stored
+ \param aVector First vector to be multiplied
+ \param bVector Second vector that is conjugated before being multiplied
+ \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
+ */
+static inline void volk_32fc_x2_multiply_conjugate_32fc_a_sse3(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){
+ unsigned int number = 0;
+ const unsigned int halfPoints = num_points / 2;
+
+ __m128 x, y, yl, yh, z, tmp1, tmp2;
+ lv_32fc_t* c = cVector;
+ const lv_32fc_t* a = aVector;
+ const lv_32fc_t* b = bVector;
+
+ __m128 conjugator = _mm_setr_ps(0, -0.f, 0, -0.f);
+
+ for(;number < halfPoints; number++){
+
+ x = _mm_load_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi
+ y = _mm_load_ps((float*)b); // Load the cr + ci, dr + di as cr,ci,dr,di
+
+ y = _mm_xor_ps(y, conjugator); // conjugate y
+
+ yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
+ yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
+
+ tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
+
+ x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
+
+ tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
+
+ z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
+
+ _mm_store_ps((float*)c,z); // Store the results back into the C container
+
+ a += 2;
+ b += 2;
+ c += 2;
+ }
+
+ if((num_points % 2) != 0) {
+ *c = (*a) * lv_conj(*b);
+ }
+}
+#endif /* LV_HAVE_SSE */
+
+#ifdef LV_HAVE_GENERIC
+ /*!
+ \brief Multiplies vector a by the conjugate of vector b and stores the results in the third vector
+ \param cVector The vector where the results will be stored
+ \param aVector First vector to be multiplied
+ \param bVector Second vector that is conjugated before being multiplied
+ \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
+ */
+static inline void volk_32fc_x2_multiply_conjugate_32fc_a_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){
+ lv_32fc_t* cPtr = cVector;
+ const lv_32fc_t* aPtr = aVector;
+ const lv_32fc_t* bPtr= bVector;
+ unsigned int number = 0;
+
+ for(number = 0; number < num_points; number++){
+ *cPtr++ = (*aPtr++) * lv_conj(*bPtr++);
+ }
+}
+#endif /* LV_HAVE_GENERIC */
+
+
+#endif /* INCLUDED_volk_32fc_x2_multiply_conjugate_32fc_a_H */
diff --git a/volk/include/volk/volk_32fc_x2_multiply_conjugate_32fc_u.h b/volk/include/volk/volk_32fc_x2_multiply_conjugate_32fc_u.h
new file mode 100644
index 000000000..92f6a051e
--- /dev/null
+++ b/volk/include/volk/volk_32fc_x2_multiply_conjugate_32fc_u.h
@@ -0,0 +1,81 @@
+#ifndef INCLUDED_volk_32fc_x2_multiply_conjugate_32fc_u_H
+#define INCLUDED_volk_32fc_x2_multiply_conjugate_32fc_u_H
+
+#include <inttypes.h>
+#include <stdio.h>
+#include <volk/volk_complex.h>
+#include <float.h>
+
+#ifdef LV_HAVE_SSE3
+#include <pmmintrin.h>
+ /*!
+ \brief Multiplies vector a by the conjugate of vector b and stores the results in the third vector
+ \param cVector The vector where the results will be stored
+ \param aVector First vector to be multiplied
+ \param bVector Second vector that is conjugated before being multiplied
+ \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
+ */
+static inline void volk_32fc_x2_multiply_conjugate_32fc_u_sse3(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){
+ unsigned int number = 0;
+ const unsigned int halfPoints = num_points / 2;
+
+ __m128 x, y, yl, yh, z, tmp1, tmp2;
+ lv_32fc_t* c = cVector;
+ const lv_32fc_t* a = aVector;
+ const lv_32fc_t* b = bVector;
+
+ __m128 conjugator = _mm_setr_ps(0, -0.f, 0, -0.f);
+
+ for(;number < halfPoints; number++){
+
+ x = _mm_loadu_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi
+ y = _mm_loadu_ps((float*)b); // Load the cr + ci, dr + di as cr,ci,dr,di
+
+ y = _mm_xor_ps(y, conjugator); // conjugate y
+
+ yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
+ yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
+
+ tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
+
+ x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
+
+ tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
+
+ z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
+
+ _mm_storeu_ps((float*)c,z); // Store the results back into the C container
+
+ a += 2;
+ b += 2;
+ c += 2;
+ }
+
+ if((num_points % 2) != 0) {
+ *c = (*a) * lv_conj(*b);
+ }
+}
+#endif /* LV_HAVE_SSE */
+
+#ifdef LV_HAVE_GENERIC
+ /*!
+ \brief Multiplies vector a by the conjugate of vector b and stores the results in the third vector
+ \param cVector The vector where the results will be stored
+ \param aVector First vector to be multiplied
+ \param bVector Second vector that is conjugated before being multiplied
+ \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
+ */
+static inline void volk_32fc_x2_multiply_conjugate_32fc_u_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){
+ lv_32fc_t* cPtr = cVector;
+ const lv_32fc_t* aPtr = aVector;
+ const lv_32fc_t* bPtr= bVector;
+ unsigned int number = 0;
+
+ for(number = 0; number < num_points; number++){
+ *cPtr++ = (*aPtr++) * lv_conj(*bPtr++);
+ }
+}
+#endif /* LV_HAVE_GENERIC */
+
+
+#endif /* INCLUDED_volk_32fc_x2_multiply_conjugate_32fc_u_H */
diff --git a/volk/include/volk/volk_64u_popcnt_a.h b/volk/include/volk/volk_64u_popcnt_a.h
index bdaa98643..4683f1e38 100644
--- a/volk/include/volk/volk_64u_popcnt_a.h
+++ b/volk/include/volk/volk_64u_popcnt_a.h
@@ -10,10 +10,11 @@
static inline void volk_64u_popcnt_a_generic(uint64_t* ret, const uint64_t value) {
- const uint32_t* valueVector = (const uint32_t*)&value;
+ //const uint32_t* valueVector = (const uint32_t*)&value;
// This is faster than a lookup table
- uint32_t retVal = valueVector[0];
+ //uint32_t retVal = valueVector[0];
+ uint32_t retVal = (uint32_t)(value && 0x00000000FFFFFFFF);
retVal = (retVal & 0x55555555) + (retVal >> 1 & 0x55555555);
retVal = (retVal & 0x33333333) + (retVal >> 2 & 0x33333333);
@@ -22,7 +23,8 @@ static inline void volk_64u_popcnt_a_generic(uint64_t* ret, const uint64_t value
retVal = (retVal + (retVal >> 16)) & 0x0000003F;
uint64_t retVal64 = retVal;
- retVal = valueVector[1];
+ //retVal = valueVector[1];
+ retVal = (uint32_t)((value && 0xFFFFFFFF00000000) >> 31);
retVal = (retVal & 0x55555555) + (retVal >> 1 & 0x55555555);
retVal = (retVal & 0x33333333) + (retVal >> 2 & 0x33333333);
retVal = (retVal + (retVal >> 4)) & 0x0F0F0F0F;
diff --git a/volk/lib/qa_utils.cc b/volk/lib/qa_utils.cc
index 9bb515e9f..bb37801c9 100644
--- a/volk/lib/qa_utils.cc
+++ b/volk/lib/qa_utils.cc
@@ -198,6 +198,18 @@ inline void run_cast_test3_s32f(volk_fn_3arg_s32f func, std::vector<void *> &buf
while(iter--) func(buffs[0], buffs[1], buffs[2], scalar, vlen, arch.c_str());
}
+inline void run_cast_test1_s32fc(volk_fn_1arg_s32fc func, std::vector<void *> &buffs, lv_32fc_t scalar, unsigned int vlen, unsigned int iter, std::string arch) {
+ while(iter--) func(buffs[0], scalar, vlen, arch.c_str());
+}
+
+inline void run_cast_test2_s32fc(volk_fn_2arg_s32fc func, std::vector<void *> &buffs, lv_32fc_t scalar, unsigned int vlen, unsigned int iter, std::string arch) {
+ while(iter--) func(buffs[0], buffs[1], scalar, vlen, arch.c_str());
+}
+
+inline void run_cast_test3_s32fc(volk_fn_3arg_s32fc func, std::vector<void *> &buffs, lv_32fc_t scalar, unsigned int vlen, unsigned int iter, std::string arch) {
+ while(iter--) func(buffs[0], buffs[1], buffs[2], scalar, vlen, arch.c_str());
+}
+
template <class t>
bool fcompare(t *in1, t *in2, unsigned int vlen, float tol) {
bool fail = false;
@@ -246,7 +258,7 @@ bool run_volk_tests(struct volk_func_desc desc,
void (*manual_func)(),
std::string name,
float tol,
- float scalar,
+ lv_32fc_t scalar,
int vlen,
int iter,
std::vector<std::string> *best_arch_vector = 0
@@ -316,21 +328,33 @@ bool run_volk_tests(struct volk_func_desc desc,
if(inputsc.size() == 0) {
run_cast_test1((volk_fn_1arg)(manual_func), test_data[i], vlen, iter, arch_list[i]);
} else if(inputsc.size() == 1 && inputsc[0].is_float) {
- run_cast_test1_s32f((volk_fn_1arg_s32f)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]);
+ if(inputsc[0].is_complex) {
+ run_cast_test1_s32fc((volk_fn_1arg_s32fc)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]);
+ } else {
+ run_cast_test1_s32f((volk_fn_1arg_s32f)(manual_func), test_data[i], scalar.real(), vlen, iter, arch_list[i]);
+ }
} else throw "unsupported 1 arg function >1 scalars";
break;
case 2:
if(inputsc.size() == 0) {
run_cast_test2((volk_fn_2arg)(manual_func), test_data[i], vlen, iter, arch_list[i]);
} else if(inputsc.size() == 1 && inputsc[0].is_float) {
- run_cast_test2_s32f((volk_fn_2arg_s32f)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]);
+ if(inputsc[0].is_complex) {
+ run_cast_test2_s32fc((volk_fn_2arg_s32fc)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]);
+ } else {
+ run_cast_test2_s32f((volk_fn_2arg_s32f)(manual_func), test_data[i], scalar.real(), vlen, iter, arch_list[i]);
+ }
} else throw "unsupported 2 arg function >1 scalars";
break;
case 3:
if(inputsc.size() == 0) {
run_cast_test3((volk_fn_3arg)(manual_func), test_data[i], vlen, iter, arch_list[i]);
} else if(inputsc.size() == 1 && inputsc[0].is_float) {
- run_cast_test3_s32f((volk_fn_3arg_s32f)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]);
+ if(inputsc[0].is_complex) {
+ run_cast_test3_s32fc((volk_fn_3arg_s32fc)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]);
+ } else {
+ run_cast_test3_s32f((volk_fn_3arg_s32f)(manual_func), test_data[i], scalar.real(), vlen, iter, arch_list[i]);
+ }
} else throw "unsupported 3 arg function >1 scalars";
break;
case 4:
diff --git a/volk/lib/qa_utils.h b/volk/lib/qa_utils.h
index a1bc1f20c..b998df852 100644
--- a/volk/lib/qa_utils.h
+++ b/volk/lib/qa_utils.h
@@ -21,7 +21,7 @@ volk_type_t volk_type_from_string(std::string);
float uniform(void);
void random_floats(float *buf, unsigned n);
-bool run_volk_tests(struct volk_func_desc, void(*)(), std::string, float, float, int, int, std::vector<std::string> *);
+bool run_volk_tests(struct volk_func_desc, void(*)(), std::string, float, lv_32fc_t, int, int, std::vector<std::string> *);
#define VOLK_RUN_TESTS(func, tol, scalar, len, iter) BOOST_AUTO_TEST_CASE(func##_test) { BOOST_CHECK_EQUAL(run_volk_tests(func##_get_func_desc(), (void (*)())func##_manual, std::string(#func), tol, scalar, len, iter, 0), 0); }
#define VOLK_PROFILE(func, tol, scalar, len, iter, results) run_volk_tests(func##_get_func_desc(), (void (*)())func##_manual, std::string(#func), tol, scalar, len, iter, results)
@@ -32,5 +32,8 @@ typedef void (*volk_fn_4arg)(void *, void *, void *, void *, unsigned int, const
typedef void (*volk_fn_1arg_s32f)(void *, float, unsigned int, const char*); //one input vector, one scalar float input
typedef void (*volk_fn_2arg_s32f)(void *, void *, float, unsigned int, const char*);
typedef void (*volk_fn_3arg_s32f)(void *, void *, void *, float, unsigned int, const char*);
+typedef void (*volk_fn_1arg_s32fc)(void *, lv_32fc_t, unsigned int, const char*); //one input vector, one scalar float input
+typedef void (*volk_fn_2arg_s32fc)(void *, void *, lv_32fc_t, unsigned int, const char*);
+typedef void (*volk_fn_3arg_s32fc)(void *, void *, void *, lv_32fc_t, unsigned int, const char*);
#endif //VOLK_QA_UTILS_H
diff --git a/volk/lib/testqa.cc b/volk/lib/testqa.cc
index fbd4bdea5..593087f85 100644
--- a/volk/lib/testqa.cc
+++ b/volk/lib/testqa.cc
@@ -22,6 +22,7 @@ VOLK_RUN_TESTS(volk_16i_convert_8i_u, 0, 0, 20460, 1);
VOLK_RUN_TESTS(volk_16u_byteswap_a, 0, 0, 20460, 1);
VOLK_RUN_TESTS(volk_32f_accumulator_s32f_a, 1e-4, 0, 20460, 1);
VOLK_RUN_TESTS(volk_32f_x2_add_32f_a, 1e-4, 0, 20460, 1);
+VOLK_RUN_TESTS(volk_32f_x2_add_32f_u, 1e-4, 0, 20460, 1);
VOLK_RUN_TESTS(volk_32fc_32f_multiply_32fc_a, 1e-4, 0, 20460, 1);
VOLK_RUN_TESTS(volk_32fc_s32f_power_32fc_a, 1e-4, 0, 20460, 1);
VOLK_RUN_TESTS(volk_32f_s32f_calc_spectral_noise_floor_32f_a, 1e-4, 20.0, 20460, 1);
@@ -37,7 +38,6 @@ VOLK_RUN_TESTS(volk_32fc_x2_dot_prod_32fc_a, 1e-4, 0, 204600, 1);
VOLK_RUN_TESTS(volk_32fc_index_max_16u_a, 3, 0, 20460, 1);
VOLK_RUN_TESTS(volk_32fc_s32f_magnitude_16i_a, 1, 32768, 20460, 1);
VOLK_RUN_TESTS(volk_32fc_magnitude_32f_a, 1e-4, 0, 20460, 1);
-VOLK_RUN_TESTS(volk_32fc_x2_multiply_32fc_a, 1e-4, 0, 20460, 1);
VOLK_RUN_TESTS(volk_32f_s32f_convert_16i_a, 1, 32768, 20460, 1);
VOLK_RUN_TESTS(volk_32f_s32f_convert_16i_u, 1, 32768, 20460, 1);
VOLK_RUN_TESTS(volk_32f_s32f_convert_32i_a, 1, 2<<31, 20460, 1);
@@ -59,7 +59,6 @@ VOLK_RUN_TESTS(volk_32f_x2_s32f_interleave_16ic_a, 1, 32768, 20460, 1);
VOLK_RUN_TESTS(volk_32f_x2_interleave_32fc_a, 0, 0, 20460, 1);
VOLK_RUN_TESTS(volk_32f_x2_max_32f_a, 1e-4, 0, 20460, 1);
VOLK_RUN_TESTS(volk_32f_x2_min_32f_a, 1e-4, 0, 20460, 1);
-VOLK_RUN_TESTS(volk_32f_x2_multiply_32f_a, 1e-4, 0, 20460, 1);
VOLK_RUN_TESTS(volk_32f_s32f_normalize_a, 1e-4, 100, 20460, 1);
VOLK_RUN_TESTS(volk_32f_s32f_power_32f_a, 1e-4, 4, 20460, 1);
VOLK_RUN_TESTS(volk_32f_sqrt_32f_a, 1e-4, 0, 20460, 1);
@@ -90,3 +89,15 @@ VOLK_RUN_TESTS(volk_8i_convert_16i_a, 0, 0, 20460, 1);
VOLK_RUN_TESTS(volk_8i_convert_16i_u, 0, 0, 20460, 1);
VOLK_RUN_TESTS(volk_8i_s32f_convert_32f_a, 1e-4, 100, 20460, 1);
VOLK_RUN_TESTS(volk_8i_s32f_convert_32f_u, 1e-4, 100, 20460, 1);
+VOLK_RUN_TESTS(volk_32fc_x2_multiply_32fc_a, 1e-4, 0, 20460, 1);
+VOLK_RUN_TESTS(volk_32fc_x2_multiply_32fc_u, 1e-4, 0, 20460, 1);
+VOLK_RUN_TESTS(volk_32fc_x2_multiply_conjugate_32fc_a, 1e-4, 0, 20460, 1);
+VOLK_RUN_TESTS(volk_32fc_x2_multiply_conjugate_32fc_u, 1e-4, 0, 20460, 1);
+VOLK_RUN_TESTS(volk_32fc_conjugate_32fc_a, 1e-4, 0, 20460, 1);
+VOLK_RUN_TESTS(volk_32fc_conjugate_32fc_u, 1e-4, 0, 20460, 1);
+VOLK_RUN_TESTS(volk_32f_x2_multiply_32f_a, 1e-4, 0, 20460, 1);
+VOLK_RUN_TESTS(volk_32f_x2_multiply_32f_u, 1e-4, 0, 20460, 1);
+VOLK_RUN_TESTS(volk_32fc_s32fc_multiply_32fc_a, 1e-4, 0, 20460, 1);
+VOLK_RUN_TESTS(volk_32fc_s32fc_multiply_32fc_u, 1e-4, 0, 20460, 1);
+VOLK_RUN_TESTS(volk_32f_s32f_multiply_32f_a, 1e-4, 0, 20460, 1);
+VOLK_RUN_TESTS(volk_32f_s32f_multiply_32f_u, 1e-4, 0, 20460, 1);